blob: d25ab51b1e62fbbb96064c22c00a9e39423fabdf [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010027#include "block/block_int.h"
28#include "block/blockjob.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010029#include "qemu/module.h"
Paolo Bonzini7b1b5d12012-12-17 18:19:43 +010030#include "qapi/qmp/qjson.h"
Markus Armbrusterbfb197e2014-10-07 13:59:11 +020031#include "sysemu/block-backend.h"
Paolo Bonzini9c17d612012-12-17 18:20:04 +010032#include "sysemu/sysemu.h"
Fam Zhengde50a202015-03-25 15:27:26 +080033#include "sysemu/qtest.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010034#include "qemu/notify.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010035#include "block/coroutine.h"
Benoît Canetc13163f2014-01-23 21:31:34 +010036#include "block/qapi.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030037#include "qmp-commands.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010038#include "qemu/timer.h"
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +020039#include "qapi-event.h"
bellardfc01f7e2003-06-30 10:03:06 +000040
Juan Quintela71e72a12009-07-27 16:12:56 +020041#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000042#include <sys/types.h>
43#include <sys/stat.h>
44#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000045#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000046#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000047#include <sys/disk.h>
48#endif
blueswir1c5e97232009-03-07 20:06:23 +000049#endif
bellard7674e7b2005-04-26 21:59:26 +000050
aliguori49dc7682009-03-08 16:26:59 +000051#ifdef _WIN32
52#include <windows.h>
53#endif
54
Fam Zhenge4654d22013-11-13 18:29:43 +080055struct BdrvDirtyBitmap {
56 HBitmap *bitmap;
Fam Zheng0db6e542015-04-17 19:49:50 -040057 char *name;
Fam Zhenge4654d22013-11-13 18:29:43 +080058 QLIST_ENTRY(BdrvDirtyBitmap) list;
59};
60
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010061#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
62
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020063static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000064 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020065 BlockCompletionFunc *cb, void *opaque);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020066static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000067 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020068 BlockCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020069static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
70 int64_t sector_num, int nb_sectors,
71 QEMUIOVector *iov);
72static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
73 int64_t sector_num, int nb_sectors,
74 QEMUIOVector *iov);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010075static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
76 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000077 BdrvRequestFlags flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010078static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
79 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000080 BdrvRequestFlags flags);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020081static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
82 int64_t sector_num,
83 QEMUIOVector *qiov,
84 int nb_sectors,
85 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +020086 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020087 void *opaque,
88 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010089static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010090static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +020091 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellardec530c82006-04-25 22:36:06 +000092
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010093static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
94 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000095
Benoît Canetdc364f42014-01-23 21:31:32 +010096static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
97 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
98
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010099static QLIST_HEAD(, BlockDriver) bdrv_drivers =
100 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +0000101
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +0300102static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
103 int nr_sectors);
104static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
105 int nr_sectors);
Markus Armbrustereb852012009-10-27 18:41:44 +0100106/* If non-zero, use only whitelisted block drivers */
107static int use_bdrv_whitelist;
108
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000109#ifdef _WIN32
110static int is_windows_drive_prefix(const char *filename)
111{
112 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
113 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
114 filename[1] == ':');
115}
116
117int is_windows_drive(const char *filename)
118{
119 if (is_windows_drive_prefix(filename) &&
120 filename[2] == '\0')
121 return 1;
122 if (strstart(filename, "\\\\.\\", NULL) ||
123 strstart(filename, "//./", NULL))
124 return 1;
125 return 0;
126}
127#endif
128
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800129/* throttling disk I/O limits */
Benoît Canetcc0681c2013-09-02 14:14:39 +0200130void bdrv_set_io_limits(BlockDriverState *bs,
131 ThrottleConfig *cfg)
132{
133 int i;
134
135 throttle_config(&bs->throttle_state, cfg);
136
137 for (i = 0; i < 2; i++) {
138 qemu_co_enter_next(&bs->throttled_reqs[i]);
139 }
140}
141
142/* this function drain all the throttled IOs */
143static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
144{
145 bool drained = false;
146 bool enabled = bs->io_limits_enabled;
147 int i;
148
149 bs->io_limits_enabled = false;
150
151 for (i = 0; i < 2; i++) {
152 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
153 drained = true;
154 }
155 }
156
157 bs->io_limits_enabled = enabled;
158
159 return drained;
160}
161
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800162void bdrv_io_limits_disable(BlockDriverState *bs)
163{
164 bs->io_limits_enabled = false;
165
Benoît Canetcc0681c2013-09-02 14:14:39 +0200166 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800167
Benoît Canetcc0681c2013-09-02 14:14:39 +0200168 throttle_destroy(&bs->throttle_state);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800169}
170
Benoît Canetcc0681c2013-09-02 14:14:39 +0200171static void bdrv_throttle_read_timer_cb(void *opaque)
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800172{
173 BlockDriverState *bs = opaque;
Benoît Canetcc0681c2013-09-02 14:14:39 +0200174 qemu_co_enter_next(&bs->throttled_reqs[0]);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800175}
176
Benoît Canetcc0681c2013-09-02 14:14:39 +0200177static void bdrv_throttle_write_timer_cb(void *opaque)
178{
179 BlockDriverState *bs = opaque;
180 qemu_co_enter_next(&bs->throttled_reqs[1]);
181}
182
183/* should be called before bdrv_set_io_limits if a limit is set */
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800184void bdrv_io_limits_enable(BlockDriverState *bs)
185{
Fam Zhengde50a202015-03-25 15:27:26 +0800186 int clock_type = QEMU_CLOCK_REALTIME;
187
188 if (qtest_enabled()) {
189 /* For testing block IO throttling only */
190 clock_type = QEMU_CLOCK_VIRTUAL;
191 }
Benoît Canetcc0681c2013-09-02 14:14:39 +0200192 assert(!bs->io_limits_enabled);
193 throttle_init(&bs->throttle_state,
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +0200194 bdrv_get_aio_context(bs),
Fam Zhengde50a202015-03-25 15:27:26 +0800195 clock_type,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200196 bdrv_throttle_read_timer_cb,
197 bdrv_throttle_write_timer_cb,
198 bs);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800199 bs->io_limits_enabled = true;
200}
201
Benoît Canetcc0681c2013-09-02 14:14:39 +0200202/* This function makes an IO wait if needed
203 *
204 * @nb_sectors: the number of sectors of the IO
205 * @is_write: is the IO a write
206 */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800207static void bdrv_io_limits_intercept(BlockDriverState *bs,
Kevin Wolfd5103582014-01-16 13:29:10 +0100208 unsigned int bytes,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200209 bool is_write)
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800210{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200211 /* does this io must wait */
212 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800213
Benoît Canetcc0681c2013-09-02 14:14:39 +0200214 /* if must wait or any request of this type throttled queue the IO */
215 if (must_wait ||
216 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
217 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800218 }
219
Benoît Canetcc0681c2013-09-02 14:14:39 +0200220 /* the IO will be executed, do the accounting */
Kevin Wolfd5103582014-01-16 13:29:10 +0100221 throttle_account(&bs->throttle_state, is_write, bytes);
222
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800223
Benoît Canetcc0681c2013-09-02 14:14:39 +0200224 /* if the next request must wait -> do nothing */
225 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
226 return;
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800227 }
228
Benoît Canetcc0681c2013-09-02 14:14:39 +0200229 /* else queue next request for execution */
230 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800231}
232
Kevin Wolf339064d2013-11-28 10:23:32 +0100233size_t bdrv_opt_mem_align(BlockDriverState *bs)
234{
235 if (!bs || !bs->drv) {
236 /* 4k should be on the safe side */
237 return 4096;
238 }
239
240 return bs->bl.opt_mem_alignment;
241}
242
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000243/* check if the path starts with "<protocol>:" */
Max Reitz5c984152014-12-03 14:57:22 +0100244int path_has_protocol(const char *path)
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000245{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200246 const char *p;
247
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000248#ifdef _WIN32
249 if (is_windows_drive(path) ||
250 is_windows_drive_prefix(path)) {
251 return 0;
252 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200253 p = path + strcspn(path, ":/\\");
254#else
255 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000256#endif
257
Paolo Bonzini947995c2012-05-08 16:51:48 +0200258 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000259}
260
bellard83f64092006-08-01 16:21:11 +0000261int path_is_absolute(const char *path)
262{
bellard21664422007-01-07 18:22:37 +0000263#ifdef _WIN32
264 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200265 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000266 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200267 }
268 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000269#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200270 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000271#endif
bellard83f64092006-08-01 16:21:11 +0000272}
273
274/* if filename is absolute, just copy it to dest. Otherwise, build a
275 path to it by considering it is relative to base_path. URL are
276 supported. */
277void path_combine(char *dest, int dest_size,
278 const char *base_path,
279 const char *filename)
280{
281 const char *p, *p1;
282 int len;
283
284 if (dest_size <= 0)
285 return;
286 if (path_is_absolute(filename)) {
287 pstrcpy(dest, dest_size, filename);
288 } else {
289 p = strchr(base_path, ':');
290 if (p)
291 p++;
292 else
293 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000294 p1 = strrchr(base_path, '/');
295#ifdef _WIN32
296 {
297 const char *p2;
298 p2 = strrchr(base_path, '\\');
299 if (!p1 || p2 > p1)
300 p1 = p2;
301 }
302#endif
bellard83f64092006-08-01 16:21:11 +0000303 if (p1)
304 p1++;
305 else
306 p1 = base_path;
307 if (p1 > p)
308 p = p1;
309 len = p - base_path;
310 if (len > dest_size - 1)
311 len = dest_size - 1;
312 memcpy(dest, base_path, len);
313 dest[len] = '\0';
314 pstrcat(dest, dest_size, filename);
315 }
316}
317
Max Reitz0a828552014-11-26 17:20:25 +0100318void bdrv_get_full_backing_filename_from_filename(const char *backed,
319 const char *backing,
Max Reitz9f074292014-11-26 17:20:26 +0100320 char *dest, size_t sz,
321 Error **errp)
Max Reitz0a828552014-11-26 17:20:25 +0100322{
Max Reitz9f074292014-11-26 17:20:26 +0100323 if (backing[0] == '\0' || path_has_protocol(backing) ||
324 path_is_absolute(backing))
325 {
Max Reitz0a828552014-11-26 17:20:25 +0100326 pstrcpy(dest, sz, backing);
Max Reitz9f074292014-11-26 17:20:26 +0100327 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
328 error_setg(errp, "Cannot use relative backing file names for '%s'",
329 backed);
Max Reitz0a828552014-11-26 17:20:25 +0100330 } else {
331 path_combine(dest, sz, backed, backing);
332 }
333}
334
Max Reitz9f074292014-11-26 17:20:26 +0100335void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
336 Error **errp)
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200337{
Max Reitz9f074292014-11-26 17:20:26 +0100338 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
339
340 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
341 dest, sz, errp);
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200342}
343
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500344void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000345{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100346 /* Block drivers without coroutine functions need emulation */
347 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200348 bdrv->bdrv_co_readv = bdrv_co_readv_em;
349 bdrv->bdrv_co_writev = bdrv_co_writev_em;
350
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100351 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
352 * the block driver lacks aio we need to emulate that too.
353 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200354 if (!bdrv->bdrv_aio_readv) {
355 /* add AIO emulation layer */
356 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
357 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200358 }
bellard83f64092006-08-01 16:21:11 +0000359 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200360
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100361 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000362}
bellardb3380822004-03-14 21:38:54 +0000363
Markus Armbruster7f06d472014-10-07 13:59:12 +0200364BlockDriverState *bdrv_new_root(void)
bellardfc01f7e2003-06-30 10:03:06 +0000365{
Markus Armbruster7f06d472014-10-07 13:59:12 +0200366 BlockDriverState *bs = bdrv_new();
Markus Armbrustere4e99862014-10-07 13:59:03 +0200367
Markus Armbrustere4e99862014-10-07 13:59:03 +0200368 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
Markus Armbrustere4e99862014-10-07 13:59:03 +0200369 return bs;
370}
371
372BlockDriverState *bdrv_new(void)
373{
374 BlockDriverState *bs;
375 int i;
376
Markus Armbruster5839e532014-08-19 10:31:08 +0200377 bs = g_new0(BlockDriverState, 1);
Fam Zhenge4654d22013-11-13 18:29:43 +0800378 QLIST_INIT(&bs->dirty_bitmaps);
Fam Zhengfbe40ff2014-05-23 21:29:42 +0800379 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
380 QLIST_INIT(&bs->op_blockers[i]);
381 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300382 bdrv_iostatus_disable(bs);
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200383 notifier_list_init(&bs->close_notifiers);
Stefan Hajnoczid616b222013-06-24 17:13:10 +0200384 notifier_with_return_list_init(&bs->before_write_notifiers);
Benoît Canetcc0681c2013-09-02 14:14:39 +0200385 qemu_co_queue_init(&bs->throttled_reqs[0]);
386 qemu_co_queue_init(&bs->throttled_reqs[1]);
Fam Zheng9fcb0252013-08-23 09:14:46 +0800387 bs->refcnt = 1;
Stefan Hajnoczidcd04222014-05-08 16:34:37 +0200388 bs->aio_context = qemu_get_aio_context();
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200389
bellardb3380822004-03-14 21:38:54 +0000390 return bs;
391}
392
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200393void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
394{
395 notifier_list_add(&bs->close_notifiers, notify);
396}
397
bellardea2384d2004-08-01 21:59:26 +0000398BlockDriver *bdrv_find_format(const char *format_name)
399{
400 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100401 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
402 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000403 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100404 }
bellardea2384d2004-08-01 21:59:26 +0000405 }
406 return NULL;
407}
408
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800409static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100410{
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800411 static const char *whitelist_rw[] = {
412 CONFIG_BDRV_RW_WHITELIST
413 };
414 static const char *whitelist_ro[] = {
415 CONFIG_BDRV_RO_WHITELIST
Markus Armbrustereb852012009-10-27 18:41:44 +0100416 };
417 const char **p;
418
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800419 if (!whitelist_rw[0] && !whitelist_ro[0]) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100420 return 1; /* no whitelist, anything goes */
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800421 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100422
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800423 for (p = whitelist_rw; *p; p++) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100424 if (!strcmp(drv->format_name, *p)) {
425 return 1;
426 }
427 }
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800428 if (read_only) {
429 for (p = whitelist_ro; *p; p++) {
430 if (!strcmp(drv->format_name, *p)) {
431 return 1;
432 }
433 }
434 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100435 return 0;
436}
437
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800438BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
439 bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100440{
441 BlockDriver *drv = bdrv_find_format(format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800442 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
Markus Armbrustereb852012009-10-27 18:41:44 +0100443}
444
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800445typedef struct CreateCo {
446 BlockDriver *drv;
447 char *filename;
Chunyan Liu83d05212014-06-05 17:20:51 +0800448 QemuOpts *opts;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800449 int ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200450 Error *err;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800451} CreateCo;
452
453static void coroutine_fn bdrv_create_co_entry(void *opaque)
454{
Max Reitzcc84d902013-09-06 17:14:26 +0200455 Error *local_err = NULL;
456 int ret;
457
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800458 CreateCo *cco = opaque;
459 assert(cco->drv);
460
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800461 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100462 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200463 error_propagate(&cco->err, local_err);
464 }
465 cco->ret = ret;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800466}
467
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200468int bdrv_create(BlockDriver *drv, const char* filename,
Chunyan Liu83d05212014-06-05 17:20:51 +0800469 QemuOpts *opts, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000470{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800471 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200472
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800473 Coroutine *co;
474 CreateCo cco = {
475 .drv = drv,
476 .filename = g_strdup(filename),
Chunyan Liu83d05212014-06-05 17:20:51 +0800477 .opts = opts,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800478 .ret = NOT_DONE,
Max Reitzcc84d902013-09-06 17:14:26 +0200479 .err = NULL,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800480 };
481
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800482 if (!drv->bdrv_create) {
Max Reitzcc84d902013-09-06 17:14:26 +0200483 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300484 ret = -ENOTSUP;
485 goto out;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800486 }
487
488 if (qemu_in_coroutine()) {
489 /* Fast-path if already in coroutine context */
490 bdrv_create_co_entry(&cco);
491 } else {
492 co = qemu_coroutine_create(bdrv_create_co_entry);
493 qemu_coroutine_enter(co, &cco);
494 while (cco.ret == NOT_DONE) {
Paolo Bonzinib47ec2c2014-07-07 15:18:01 +0200495 aio_poll(qemu_get_aio_context(), true);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800496 }
497 }
498
499 ret = cco.ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200500 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100501 if (cco.err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200502 error_propagate(errp, cco.err);
503 } else {
504 error_setg_errno(errp, -ret, "Could not create image");
505 }
506 }
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800507
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300508out:
509 g_free(cco.filename);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800510 return ret;
bellardea2384d2004-08-01 21:59:26 +0000511}
512
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800513int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200514{
515 BlockDriver *drv;
Max Reitzcc84d902013-09-06 17:14:26 +0200516 Error *local_err = NULL;
517 int ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200518
Max Reitzb65a5e12015-02-05 13:58:12 -0500519 drv = bdrv_find_protocol(filename, true, errp);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200520 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000521 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200522 }
523
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800524 ret = bdrv_create(drv, filename, opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100525 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200526 error_propagate(errp, local_err);
527 }
528 return ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200529}
530
Kevin Wolf3baca892014-07-16 17:48:16 +0200531void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
Kevin Wolfd34682c2013-12-11 19:26:16 +0100532{
533 BlockDriver *drv = bs->drv;
Kevin Wolf3baca892014-07-16 17:48:16 +0200534 Error *local_err = NULL;
Kevin Wolfd34682c2013-12-11 19:26:16 +0100535
536 memset(&bs->bl, 0, sizeof(bs->bl));
537
Kevin Wolf466ad822013-12-11 19:50:32 +0100538 if (!drv) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200539 return;
Kevin Wolf466ad822013-12-11 19:50:32 +0100540 }
541
542 /* Take some limits from the children as a default */
543 if (bs->file) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200544 bdrv_refresh_limits(bs->file, &local_err);
545 if (local_err) {
546 error_propagate(errp, local_err);
547 return;
548 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100549 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
Peter Lieven2647fab2014-10-27 10:18:44 +0100550 bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
Kevin Wolf339064d2013-11-28 10:23:32 +0100551 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
552 } else {
553 bs->bl.opt_mem_alignment = 512;
Kevin Wolf466ad822013-12-11 19:50:32 +0100554 }
555
556 if (bs->backing_hd) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200557 bdrv_refresh_limits(bs->backing_hd, &local_err);
558 if (local_err) {
559 error_propagate(errp, local_err);
560 return;
561 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100562 bs->bl.opt_transfer_length =
563 MAX(bs->bl.opt_transfer_length,
564 bs->backing_hd->bl.opt_transfer_length);
Peter Lieven2647fab2014-10-27 10:18:44 +0100565 bs->bl.max_transfer_length =
566 MIN_NON_ZERO(bs->bl.max_transfer_length,
567 bs->backing_hd->bl.max_transfer_length);
Kevin Wolf339064d2013-11-28 10:23:32 +0100568 bs->bl.opt_mem_alignment =
569 MAX(bs->bl.opt_mem_alignment,
570 bs->backing_hd->bl.opt_mem_alignment);
Kevin Wolf466ad822013-12-11 19:50:32 +0100571 }
572
573 /* Then let the driver override it */
574 if (drv->bdrv_refresh_limits) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200575 drv->bdrv_refresh_limits(bs, errp);
Kevin Wolfd34682c2013-12-11 19:26:16 +0100576 }
Kevin Wolfd34682c2013-12-11 19:26:16 +0100577}
578
Ekaterina Tumanova892b7de2015-02-16 12:47:54 +0100579/**
580 * Try to get @bs's logical and physical block size.
581 * On success, store them in @bsz struct and return 0.
582 * On failure return -errno.
583 * @bs must not be empty.
584 */
585int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
586{
587 BlockDriver *drv = bs->drv;
588
589 if (drv && drv->bdrv_probe_blocksizes) {
590 return drv->bdrv_probe_blocksizes(bs, bsz);
591 }
592
593 return -ENOTSUP;
594}
595
596/**
597 * Try to get @bs's geometry (cyls, heads, sectors).
598 * On success, store them in @geo struct and return 0.
599 * On failure return -errno.
600 * @bs must not be empty.
601 */
602int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
603{
604 BlockDriver *drv = bs->drv;
605
606 if (drv && drv->bdrv_probe_geometry) {
607 return drv->bdrv_probe_geometry(bs, geo);
608 }
609
610 return -ENOTSUP;
611}
612
Jim Meyeringeba25052012-05-28 09:27:54 +0200613/*
614 * Create a uniquely-named empty temporary file.
615 * Return 0 upon success, otherwise a negative errno value.
616 */
617int get_tmp_filename(char *filename, int size)
618{
bellardd5249392004-08-03 21:14:23 +0000619#ifdef _WIN32
bellard3b9f94e2007-01-07 17:27:07 +0000620 char temp_dir[MAX_PATH];
Jim Meyeringeba25052012-05-28 09:27:54 +0200621 /* GetTempFileName requires that its output buffer (4th param)
622 have length MAX_PATH or greater. */
623 assert(size >= MAX_PATH);
624 return (GetTempPath(MAX_PATH, temp_dir)
625 && GetTempFileName(temp_dir, "qem", 0, filename)
626 ? 0 : -GetLastError());
bellardd5249392004-08-03 21:14:23 +0000627#else
bellardea2384d2004-08-01 21:59:26 +0000628 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000629 const char *tmpdir;
aurel320badc1e2008-03-10 00:05:34 +0000630 tmpdir = getenv("TMPDIR");
Amit Shah69bef792014-02-26 15:12:37 +0530631 if (!tmpdir) {
632 tmpdir = "/var/tmp";
633 }
Jim Meyeringeba25052012-05-28 09:27:54 +0200634 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
635 return -EOVERFLOW;
636 }
bellardea2384d2004-08-01 21:59:26 +0000637 fd = mkstemp(filename);
Dunrong Huangfe235a02012-09-05 21:26:22 +0800638 if (fd < 0) {
639 return -errno;
640 }
641 if (close(fd) != 0) {
642 unlink(filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200643 return -errno;
644 }
645 return 0;
bellardd5249392004-08-03 21:14:23 +0000646#endif
Jim Meyeringeba25052012-05-28 09:27:54 +0200647}
bellardea2384d2004-08-01 21:59:26 +0000648
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200649/*
650 * Detect host devices. By convention, /dev/cdrom[N] is always
651 * recognized as a host CDROM.
652 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200653static BlockDriver *find_hdev_driver(const char *filename)
654{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200655 int score_max = 0, score;
656 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200657
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100658 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200659 if (d->bdrv_probe_device) {
660 score = d->bdrv_probe_device(filename);
661 if (score > score_max) {
662 score_max = score;
663 drv = d;
664 }
665 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200666 }
667
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200668 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200669}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200670
Kevin Wolf98289622013-07-10 15:47:39 +0200671BlockDriver *bdrv_find_protocol(const char *filename,
Max Reitzb65a5e12015-02-05 13:58:12 -0500672 bool allow_protocol_prefix,
673 Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200674{
675 BlockDriver *drv1;
676 char protocol[128];
677 int len;
678 const char *p;
679
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200680 /* TODO Drivers without bdrv_file_open must be specified explicitly */
681
Christoph Hellwig39508e72010-06-23 12:25:17 +0200682 /*
683 * XXX(hch): we really should not let host device detection
684 * override an explicit protocol specification, but moving this
685 * later breaks access to device names with colons in them.
686 * Thanks to the brain-dead persistent naming schemes on udev-
687 * based Linux systems those actually are quite common.
688 */
689 drv1 = find_hdev_driver(filename);
690 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200691 return drv1;
692 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200693
Kevin Wolf98289622013-07-10 15:47:39 +0200694 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
Max Reitzef810432014-12-02 18:32:42 +0100695 return &bdrv_file;
Christoph Hellwig39508e72010-06-23 12:25:17 +0200696 }
Kevin Wolf98289622013-07-10 15:47:39 +0200697
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000698 p = strchr(filename, ':');
699 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200700 len = p - filename;
701 if (len > sizeof(protocol) - 1)
702 len = sizeof(protocol) - 1;
703 memcpy(protocol, filename, len);
704 protocol[len] = '\0';
705 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
706 if (drv1->protocol_name &&
707 !strcmp(drv1->protocol_name, protocol)) {
708 return drv1;
709 }
710 }
Max Reitzb65a5e12015-02-05 13:58:12 -0500711
712 error_setg(errp, "Unknown protocol '%s'", protocol);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200713 return NULL;
714}
715
Markus Armbrusterc6684242014-11-20 16:27:10 +0100716/*
717 * Guess image format by probing its contents.
718 * This is not a good idea when your image is raw (CVE-2008-2004), but
719 * we do it anyway for backward compatibility.
720 *
721 * @buf contains the image's first @buf_size bytes.
Kevin Wolf7cddd372014-11-20 16:27:11 +0100722 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
723 * but can be smaller if the image file is smaller)
Markus Armbrusterc6684242014-11-20 16:27:10 +0100724 * @filename is its filename.
725 *
726 * For all block drivers, call the bdrv_probe() method to get its
727 * probing score.
728 * Return the first block driver with the highest probing score.
729 */
Kevin Wolf38f3ef52014-11-20 16:27:12 +0100730BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
731 const char *filename)
Markus Armbrusterc6684242014-11-20 16:27:10 +0100732{
733 int score_max = 0, score;
734 BlockDriver *drv = NULL, *d;
735
736 QLIST_FOREACH(d, &bdrv_drivers, list) {
737 if (d->bdrv_probe) {
738 score = d->bdrv_probe(buf, buf_size, filename);
739 if (score > score_max) {
740 score_max = score;
741 drv = d;
742 }
743 }
744 }
745
746 return drv;
747}
748
Kevin Wolff500a6d2012-11-12 17:35:27 +0100749static int find_image_format(BlockDriverState *bs, const char *filename,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200750 BlockDriver **pdrv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000751{
Markus Armbrusterc6684242014-11-20 16:27:10 +0100752 BlockDriver *drv;
Kevin Wolf7cddd372014-11-20 16:27:11 +0100753 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
Kevin Wolff500a6d2012-11-12 17:35:27 +0100754 int ret = 0;
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700755
Kevin Wolf08a00552010-06-01 18:37:31 +0200756 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
Paolo Bonzini8e895592013-01-10 15:39:27 +0100757 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
Max Reitzef810432014-12-02 18:32:42 +0100758 *pdrv = &bdrv_raw;
Stefan Weilc98ac352010-07-21 21:51:51 +0200759 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700760 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700761
bellard83f64092006-08-01 16:21:11 +0000762 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
bellard83f64092006-08-01 16:21:11 +0000763 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200764 error_setg_errno(errp, -ret, "Could not read image for determining its "
765 "format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200766 *pdrv = NULL;
767 return ret;
bellard83f64092006-08-01 16:21:11 +0000768 }
769
Markus Armbrusterc6684242014-11-20 16:27:10 +0100770 drv = bdrv_probe_all(buf, ret, filename);
Stefan Weilc98ac352010-07-21 21:51:51 +0200771 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200772 error_setg(errp, "Could not determine image format: No compatible "
773 "driver found");
Stefan Weilc98ac352010-07-21 21:51:51 +0200774 ret = -ENOENT;
775 }
776 *pdrv = drv;
777 return ret;
bellardea2384d2004-08-01 21:59:26 +0000778}
779
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100780/**
781 * Set the current 'total_sectors' value
Markus Armbruster65a9bb22014-06-26 13:23:17 +0200782 * Return 0 on success, -errno on error.
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100783 */
784static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
785{
786 BlockDriver *drv = bs->drv;
787
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700788 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
789 if (bs->sg)
790 return 0;
791
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100792 /* query actual device if possible, otherwise just trust the hint */
793 if (drv->bdrv_getlength) {
794 int64_t length = drv->bdrv_getlength(bs);
795 if (length < 0) {
796 return length;
797 }
Fam Zheng7e382002013-11-06 19:48:06 +0800798 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100799 }
800
801 bs->total_sectors = hint;
802 return 0;
803}
804
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100805/**
Paolo Bonzini9e8f1832013-02-08 14:06:11 +0100806 * Set open flags for a given discard mode
807 *
808 * Return 0 on success, -1 if the discard mode was invalid.
809 */
810int bdrv_parse_discard_flags(const char *mode, int *flags)
811{
812 *flags &= ~BDRV_O_UNMAP;
813
814 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
815 /* do nothing */
816 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
817 *flags |= BDRV_O_UNMAP;
818 } else {
819 return -1;
820 }
821
822 return 0;
823}
824
825/**
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100826 * Set open flags for a given cache mode
827 *
828 * Return 0 on success, -1 if the cache mode was invalid.
829 */
830int bdrv_parse_cache_flags(const char *mode, int *flags)
831{
832 *flags &= ~BDRV_O_CACHE_MASK;
833
834 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
835 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100836 } else if (!strcmp(mode, "directsync")) {
837 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100838 } else if (!strcmp(mode, "writeback")) {
839 *flags |= BDRV_O_CACHE_WB;
840 } else if (!strcmp(mode, "unsafe")) {
841 *flags |= BDRV_O_CACHE_WB;
842 *flags |= BDRV_O_NO_FLUSH;
843 } else if (!strcmp(mode, "writethrough")) {
844 /* this is the default */
845 } else {
846 return -1;
847 }
848
849 return 0;
850}
851
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000852/**
853 * The copy-on-read flag is actually a reference count so multiple users may
854 * use the feature without worrying about clobbering its previous state.
855 * Copy-on-read stays enabled until all users have called to disable it.
856 */
857void bdrv_enable_copy_on_read(BlockDriverState *bs)
858{
859 bs->copy_on_read++;
860}
861
862void bdrv_disable_copy_on_read(BlockDriverState *bs)
863{
864 assert(bs->copy_on_read > 0);
865 bs->copy_on_read--;
866}
867
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200868/*
Kevin Wolfb1e6fc02014-05-06 12:11:42 +0200869 * Returns the flags that a temporary snapshot should get, based on the
870 * originally requested flags (the originally requested image will have flags
871 * like a backing file)
872 */
873static int bdrv_temp_snapshot_flags(int flags)
874{
875 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
876}
877
878/*
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200879 * Returns the flags that bs->file should get, based on the given flags for
880 * the parent BDS
881 */
882static int bdrv_inherited_flags(int flags)
883{
884 /* Enable protocol handling, disable format probing for bs->file */
885 flags |= BDRV_O_PROTOCOL;
886
887 /* Our block drivers take care to send flushes and respect unmap policy,
888 * so we can enable both unconditionally on lower layers. */
889 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
890
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200891 /* Clear flags that only apply to the top layer */
Kevin Wolf5669b442014-04-11 21:36:45 +0200892 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200893
894 return flags;
895}
896
Kevin Wolf317fc442014-04-25 13:27:34 +0200897/*
898 * Returns the flags that bs->backing_hd should get, based on the given flags
899 * for the parent BDS
900 */
901static int bdrv_backing_flags(int flags)
902{
903 /* backing files always opened read-only */
904 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
905
906 /* snapshot=on is handled on the top layer */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200907 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
Kevin Wolf317fc442014-04-25 13:27:34 +0200908
909 return flags;
910}
911
Kevin Wolf7b272452012-11-12 17:05:39 +0100912static int bdrv_open_flags(BlockDriverState *bs, int flags)
913{
914 int open_flags = flags | BDRV_O_CACHE_WB;
915
916 /*
917 * Clear flags that are internal to the block layer before opening the
918 * image.
919 */
Kevin Wolf20cca272014-06-04 14:33:27 +0200920 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
Kevin Wolf7b272452012-11-12 17:05:39 +0100921
922 /*
923 * Snapshots should be writable.
924 */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200925 if (flags & BDRV_O_TEMPORARY) {
Kevin Wolf7b272452012-11-12 17:05:39 +0100926 open_flags |= BDRV_O_RDWR;
927 }
928
929 return open_flags;
930}
931
Kevin Wolf636ea372014-01-24 14:11:52 +0100932static void bdrv_assign_node_name(BlockDriverState *bs,
933 const char *node_name,
934 Error **errp)
Benoît Canet6913c0c2014-01-23 21:31:33 +0100935{
936 if (!node_name) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100937 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100938 }
939
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200940 /* Check for empty string or invalid characters */
Markus Armbrusterf5bebbb2014-09-30 13:59:30 +0200941 if (!id_wellformed(node_name)) {
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200942 error_setg(errp, "Invalid node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100943 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100944 }
945
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100946 /* takes care of avoiding namespaces collisions */
Markus Armbruster7f06d472014-10-07 13:59:12 +0200947 if (blk_by_name(node_name)) {
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100948 error_setg(errp, "node-name=%s is conflicting with a device id",
949 node_name);
Kevin Wolf636ea372014-01-24 14:11:52 +0100950 return;
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100951 }
952
Benoît Canet6913c0c2014-01-23 21:31:33 +0100953 /* takes care of avoiding duplicates node names */
954 if (bdrv_find_node(node_name)) {
955 error_setg(errp, "Duplicate node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100956 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100957 }
958
959 /* copy node name into the bs and insert it into the graph list */
960 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
961 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
Benoît Canet6913c0c2014-01-23 21:31:33 +0100962}
963
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200964/*
Kevin Wolf57915332010-04-14 15:24:50 +0200965 * Common part for opening disk images and files
Kevin Wolfb6ad4912013-03-15 10:35:04 +0100966 *
967 * Removes all processed options from *options.
Kevin Wolf57915332010-04-14 15:24:50 +0200968 */
Kevin Wolff500a6d2012-11-12 17:35:27 +0100969static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200970 QDict *options, int flags, BlockDriver *drv, Error **errp)
Kevin Wolf57915332010-04-14 15:24:50 +0200971{
972 int ret, open_flags;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200973 const char *filename;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100974 const char *node_name = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200975 Error *local_err = NULL;
Kevin Wolf57915332010-04-14 15:24:50 +0200976
977 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200978 assert(bs->file == NULL);
Kevin Wolf707ff822013-03-06 12:20:31 +0100979 assert(options != NULL && bs->options != options);
Kevin Wolf57915332010-04-14 15:24:50 +0200980
Kevin Wolf45673672013-04-22 17:48:40 +0200981 if (file != NULL) {
982 filename = file->filename;
983 } else {
984 filename = qdict_get_try_str(options, "filename");
985 }
986
Kevin Wolf765003d2014-02-03 14:49:42 +0100987 if (drv->bdrv_needs_filename && !filename) {
988 error_setg(errp, "The '%s' block driver requires a file name",
989 drv->format_name);
990 return -EINVAL;
991 }
992
Kevin Wolf45673672013-04-22 17:48:40 +0200993 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100994
Benoît Canet6913c0c2014-01-23 21:31:33 +0100995 node_name = qdict_get_try_str(options, "node-name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100996 bdrv_assign_node_name(bs, node_name, &local_err);
Markus Armbruster0fb63952014-04-25 16:50:31 +0200997 if (local_err) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100998 error_propagate(errp, local_err);
999 return -EINVAL;
Benoît Canet6913c0c2014-01-23 21:31:33 +01001000 }
1001 qdict_del(options, "node-name");
1002
Kevin Wolf5d186eb2013-03-27 17:28:18 +01001003 /* bdrv_open() with directly using a protocol as drv. This layer is already
1004 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
1005 * and return immediately. */
1006 if (file != NULL && drv->bdrv_file_open) {
1007 bdrv_swap(file, bs);
1008 return 0;
1009 }
1010
Kevin Wolf57915332010-04-14 15:24:50 +02001011 bs->open_flags = flags;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001012 bs->guest_block_size = 512;
Paolo Bonzinic25f53b2011-11-29 12:42:20 +01001013 bs->request_alignment = 512;
Asias He0d51b4d2013-08-22 15:24:14 +08001014 bs->zero_beyond_eof = true;
Fam Zhengb64ec4e2013-05-29 19:35:40 +08001015 open_flags = bdrv_open_flags(bs, flags);
1016 bs->read_only = !(open_flags & BDRV_O_RDWR);
1017
1018 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +02001019 error_setg(errp,
1020 !bs->read_only && bdrv_is_whitelisted(drv, true)
1021 ? "Driver '%s' can only be used for read-only devices"
1022 : "Driver '%s' is not whitelisted",
1023 drv->format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +08001024 return -ENOTSUP;
1025 }
Kevin Wolf57915332010-04-14 15:24:50 +02001026
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001027 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
Kevin Wolf0ebd24e2013-09-19 15:12:18 +02001028 if (flags & BDRV_O_COPY_ON_READ) {
1029 if (!bs->read_only) {
1030 bdrv_enable_copy_on_read(bs);
1031 } else {
1032 error_setg(errp, "Can't use copy-on-read on read-only device");
1033 return -EINVAL;
1034 }
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001035 }
1036
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001037 if (filename != NULL) {
1038 pstrcpy(bs->filename, sizeof(bs->filename), filename);
1039 } else {
1040 bs->filename[0] = '\0';
1041 }
Max Reitz91af7012014-07-18 20:24:56 +02001042 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
Kevin Wolf57915332010-04-14 15:24:50 +02001043
Kevin Wolf57915332010-04-14 15:24:50 +02001044 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -05001045 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +02001046
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +01001047 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Stefan Hajnoczie7c63792011-10-27 10:54:27 +01001048
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001049 /* Open the image, either directly or using a protocol */
1050 if (drv->bdrv_file_open) {
Kevin Wolf5d186eb2013-03-27 17:28:18 +01001051 assert(file == NULL);
Benoît Canet030be322013-09-24 17:07:04 +02001052 assert(!drv->bdrv_needs_filename || filename != NULL);
Max Reitz34b5d2c2013-09-05 14:45:29 +02001053 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001054 } else {
Kevin Wolf2af5ef72013-04-09 13:19:18 +02001055 if (file == NULL) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001056 error_setg(errp, "Can't use '%s' as a block driver for the "
1057 "protocol level", drv->format_name);
Kevin Wolf2af5ef72013-04-09 13:19:18 +02001058 ret = -EINVAL;
1059 goto free_and_fail;
1060 }
Kevin Wolff500a6d2012-11-12 17:35:27 +01001061 bs->file = file;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001062 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001063 }
1064
Kevin Wolf57915332010-04-14 15:24:50 +02001065 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +01001066 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001067 error_propagate(errp, local_err);
Dunrong Huang2fa9aa52013-09-24 18:14:01 +08001068 } else if (bs->filename[0]) {
1069 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
Max Reitz34b5d2c2013-09-05 14:45:29 +02001070 } else {
1071 error_setg_errno(errp, -ret, "Could not open image");
1072 }
Kevin Wolf57915332010-04-14 15:24:50 +02001073 goto free_and_fail;
1074 }
1075
Markus Armbrustera1f688f2015-03-13 21:09:40 +01001076 if (bs->encrypted) {
1077 error_report("Encrypted images are deprecated");
1078 error_printf("Support for them will be removed in a future release.\n"
1079 "You can use 'qemu-img convert' to convert your image"
1080 " to an unencrypted one.\n");
1081 }
1082
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001083 ret = refresh_total_sectors(bs, bs->total_sectors);
1084 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001085 error_setg_errno(errp, -ret, "Could not refresh total sector count");
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001086 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +02001087 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001088
Kevin Wolf3baca892014-07-16 17:48:16 +02001089 bdrv_refresh_limits(bs, &local_err);
1090 if (local_err) {
1091 error_propagate(errp, local_err);
1092 ret = -EINVAL;
1093 goto free_and_fail;
1094 }
1095
Paolo Bonzinic25f53b2011-11-29 12:42:20 +01001096 assert(bdrv_opt_mem_align(bs) != 0);
Kevin Wolf47ea2de2014-03-05 15:49:55 +01001097 assert((bs->request_alignment != 0) || bs->sg);
Kevin Wolf57915332010-04-14 15:24:50 +02001098 return 0;
1099
1100free_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001101 bs->file = NULL;
Anthony Liguori7267c092011-08-20 22:09:37 -05001102 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +02001103 bs->opaque = NULL;
1104 bs->drv = NULL;
1105 return ret;
1106}
1107
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001108static QDict *parse_json_filename(const char *filename, Error **errp)
1109{
1110 QObject *options_obj;
1111 QDict *options;
1112 int ret;
1113
1114 ret = strstart(filename, "json:", &filename);
1115 assert(ret);
1116
1117 options_obj = qobject_from_json(filename);
1118 if (!options_obj) {
1119 error_setg(errp, "Could not parse the JSON options");
1120 return NULL;
1121 }
1122
1123 if (qobject_type(options_obj) != QTYPE_QDICT) {
1124 qobject_decref(options_obj);
1125 error_setg(errp, "Invalid JSON object given");
1126 return NULL;
1127 }
1128
1129 options = qobject_to_qdict(options_obj);
1130 qdict_flatten(options);
1131
1132 return options;
1133}
1134
Kevin Wolf57915332010-04-14 15:24:50 +02001135/*
Kevin Wolff54120f2014-05-26 11:09:59 +02001136 * Fills in default options for opening images and converts the legacy
1137 * filename/flags pair to option QDict entries.
1138 */
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001139static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
Kevin Wolf17b005f2014-05-27 10:50:29 +02001140 BlockDriver *drv, Error **errp)
Kevin Wolff54120f2014-05-26 11:09:59 +02001141{
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001142 const char *filename = *pfilename;
Kevin Wolff54120f2014-05-26 11:09:59 +02001143 const char *drvname;
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001144 bool protocol = flags & BDRV_O_PROTOCOL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001145 bool parse_filename = false;
1146 Error *local_err = NULL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001147
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001148 /* Parse json: pseudo-protocol */
1149 if (filename && g_str_has_prefix(filename, "json:")) {
1150 QDict *json_options = parse_json_filename(filename, &local_err);
1151 if (local_err) {
1152 error_propagate(errp, local_err);
1153 return -EINVAL;
1154 }
1155
1156 /* Options given in the filename have lower priority than options
1157 * specified directly */
1158 qdict_join(*options, json_options, false);
1159 QDECREF(json_options);
1160 *pfilename = filename = NULL;
1161 }
1162
Kevin Wolff54120f2014-05-26 11:09:59 +02001163 /* Fetch the file name from the options QDict if necessary */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001164 if (protocol && filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001165 if (!qdict_haskey(*options, "filename")) {
1166 qdict_put(*options, "filename", qstring_from_str(filename));
1167 parse_filename = true;
1168 } else {
1169 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1170 "the same time");
1171 return -EINVAL;
1172 }
1173 }
1174
1175 /* Find the right block driver */
1176 filename = qdict_get_try_str(*options, "filename");
1177 drvname = qdict_get_try_str(*options, "driver");
1178
Kevin Wolf17b005f2014-05-27 10:50:29 +02001179 if (drv) {
1180 if (drvname) {
1181 error_setg(errp, "Driver specified twice");
1182 return -EINVAL;
1183 }
1184 drvname = drv->format_name;
1185 qdict_put(*options, "driver", qstring_from_str(drvname));
1186 } else {
1187 if (!drvname && protocol) {
1188 if (filename) {
Max Reitzb65a5e12015-02-05 13:58:12 -05001189 drv = bdrv_find_protocol(filename, parse_filename, errp);
Kevin Wolf17b005f2014-05-27 10:50:29 +02001190 if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001191 return -EINVAL;
1192 }
1193
1194 drvname = drv->format_name;
1195 qdict_put(*options, "driver", qstring_from_str(drvname));
1196 } else {
1197 error_setg(errp, "Must specify either driver or file");
Kevin Wolff54120f2014-05-26 11:09:59 +02001198 return -EINVAL;
1199 }
Kevin Wolf17b005f2014-05-27 10:50:29 +02001200 } else if (drvname) {
1201 drv = bdrv_find_format(drvname);
1202 if (!drv) {
1203 error_setg(errp, "Unknown driver '%s'", drvname);
1204 return -ENOENT;
1205 }
Kevin Wolff54120f2014-05-26 11:09:59 +02001206 }
1207 }
1208
Kevin Wolf17b005f2014-05-27 10:50:29 +02001209 assert(drv || !protocol);
Kevin Wolff54120f2014-05-26 11:09:59 +02001210
1211 /* Driver-specific filename parsing */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001212 if (drv && drv->bdrv_parse_filename && parse_filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001213 drv->bdrv_parse_filename(filename, *options, &local_err);
1214 if (local_err) {
1215 error_propagate(errp, local_err);
1216 return -EINVAL;
1217 }
1218
1219 if (!drv->bdrv_needs_filename) {
1220 qdict_del(*options, "filename");
1221 }
1222 }
1223
1224 return 0;
1225}
1226
Fam Zheng8d24cce2014-05-23 21:29:45 +08001227void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1228{
1229
Fam Zheng826b6ca2014-05-23 21:29:47 +08001230 if (bs->backing_hd) {
1231 assert(bs->backing_blocker);
1232 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1233 } else if (backing_hd) {
1234 error_setg(&bs->backing_blocker,
Alberto Garcia81e5f782015-04-08 12:29:19 +03001235 "node is used as backing hd of '%s'",
1236 bdrv_get_device_or_node_name(bs));
Fam Zheng826b6ca2014-05-23 21:29:47 +08001237 }
1238
Fam Zheng8d24cce2014-05-23 21:29:45 +08001239 bs->backing_hd = backing_hd;
1240 if (!backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001241 error_free(bs->backing_blocker);
1242 bs->backing_blocker = NULL;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001243 goto out;
1244 }
1245 bs->open_flags &= ~BDRV_O_NO_BACKING;
1246 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1247 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1248 backing_hd->drv ? backing_hd->drv->format_name : "");
Fam Zheng826b6ca2014-05-23 21:29:47 +08001249
1250 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1251 /* Otherwise we won't be able to commit due to check in bdrv_commit */
Fam Zhengbb000212014-09-11 13:14:00 +08001252 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
Fam Zheng826b6ca2014-05-23 21:29:47 +08001253 bs->backing_blocker);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001254out:
Kevin Wolf3baca892014-07-16 17:48:16 +02001255 bdrv_refresh_limits(bs, NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001256}
1257
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001258/*
1259 * Opens the backing file for a BlockDriverState if not yet open
1260 *
1261 * options is a QDict of options to pass to the block drivers, or NULL for an
1262 * empty set of options. The reference to the QDict is transferred to this
1263 * function (even on failure), so if the caller intends to reuse the dictionary,
1264 * it needs to use QINCREF() before calling bdrv_file_open.
1265 */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001266int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
Paolo Bonzini9156df12012-10-18 16:49:17 +02001267{
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001268 char *backing_filename = g_malloc0(PATH_MAX);
Kevin Wolf317fc442014-04-25 13:27:34 +02001269 int ret = 0;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001270 BlockDriverState *backing_hd;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001271 Error *local_err = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001272
1273 if (bs->backing_hd != NULL) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001274 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001275 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001276 }
1277
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001278 /* NULL means an empty set of options */
1279 if (options == NULL) {
1280 options = qdict_new();
1281 }
1282
Paolo Bonzini9156df12012-10-18 16:49:17 +02001283 bs->open_flags &= ~BDRV_O_NO_BACKING;
Kevin Wolf1cb6f502013-04-12 20:27:07 +02001284 if (qdict_haskey(options, "file.filename")) {
1285 backing_filename[0] = '\0';
1286 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001287 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001288 goto free_exit;
Fam Zhengdbecebd2013-09-22 20:05:06 +08001289 } else {
Max Reitz9f074292014-11-26 17:20:26 +01001290 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1291 &local_err);
1292 if (local_err) {
1293 ret = -EINVAL;
1294 error_propagate(errp, local_err);
1295 QDECREF(options);
1296 goto free_exit;
1297 }
Paolo Bonzini9156df12012-10-18 16:49:17 +02001298 }
1299
Kevin Wolf8ee79e72014-06-04 15:09:35 +02001300 if (!bs->drv || !bs->drv->supports_backing) {
1301 ret = -EINVAL;
1302 error_setg(errp, "Driver doesn't support backing files");
1303 QDECREF(options);
1304 goto free_exit;
1305 }
1306
Markus Armbrustere4e99862014-10-07 13:59:03 +02001307 backing_hd = bdrv_new();
Fam Zheng8d24cce2014-05-23 21:29:45 +08001308
Kevin Wolfc5f6e492014-11-25 18:12:42 +01001309 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1310 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
Paolo Bonzini9156df12012-10-18 16:49:17 +02001311 }
1312
Max Reitzf67503e2014-02-18 18:33:05 +01001313 assert(bs->backing_hd == NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001314 ret = bdrv_open(&backing_hd,
Max Reitzddf56362014-02-18 18:33:06 +01001315 *backing_filename ? backing_filename : NULL, NULL, options,
Kevin Wolfc5f6e492014-11-25 18:12:42 +01001316 bdrv_backing_flags(bs->open_flags), NULL, &local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001317 if (ret < 0) {
Fam Zheng8d24cce2014-05-23 21:29:45 +08001318 bdrv_unref(backing_hd);
1319 backing_hd = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001320 bs->open_flags |= BDRV_O_NO_BACKING;
Fam Zhengb04b6b62013-11-08 11:26:49 +08001321 error_setg(errp, "Could not open backing file: %s",
1322 error_get_pretty(local_err));
1323 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001324 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001325 }
Fam Zheng8d24cce2014-05-23 21:29:45 +08001326 bdrv_set_backing_hd(bs, backing_hd);
Peter Feinerd80ac652014-01-08 19:43:25 +00001327
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001328free_exit:
1329 g_free(backing_filename);
1330 return ret;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001331}
1332
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001333/*
Max Reitzda557aa2013-12-20 19:28:11 +01001334 * Opens a disk image whose options are given as BlockdevRef in another block
1335 * device's options.
1336 *
Max Reitzda557aa2013-12-20 19:28:11 +01001337 * If allow_none is true, no image will be opened if filename is false and no
1338 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1339 *
1340 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1341 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1342 * itself, all options starting with "${bdref_key}." are considered part of the
1343 * BlockdevRef.
1344 *
1345 * The BlockdevRef will be removed from the options QDict.
Max Reitzf67503e2014-02-18 18:33:05 +01001346 *
1347 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
Max Reitzda557aa2013-12-20 19:28:11 +01001348 */
1349int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1350 QDict *options, const char *bdref_key, int flags,
Max Reitzf7d9fd82014-02-18 18:33:12 +01001351 bool allow_none, Error **errp)
Max Reitzda557aa2013-12-20 19:28:11 +01001352{
1353 QDict *image_options;
1354 int ret;
1355 char *bdref_key_dot;
1356 const char *reference;
1357
Max Reitzf67503e2014-02-18 18:33:05 +01001358 assert(pbs);
1359 assert(*pbs == NULL);
1360
Max Reitzda557aa2013-12-20 19:28:11 +01001361 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1362 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1363 g_free(bdref_key_dot);
1364
1365 reference = qdict_get_try_str(options, bdref_key);
1366 if (!filename && !reference && !qdict_size(image_options)) {
1367 if (allow_none) {
1368 ret = 0;
1369 } else {
1370 error_setg(errp, "A block device must be specified for \"%s\"",
1371 bdref_key);
1372 ret = -EINVAL;
1373 }
Markus Armbrusterb20e61e2014-05-28 11:16:57 +02001374 QDECREF(image_options);
Max Reitzda557aa2013-12-20 19:28:11 +01001375 goto done;
1376 }
1377
Max Reitzf7d9fd82014-02-18 18:33:12 +01001378 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
Max Reitzda557aa2013-12-20 19:28:11 +01001379
1380done:
1381 qdict_del(options, bdref_key);
1382 return ret;
1383}
1384
Chen Gang6b8aeca2014-06-23 23:28:23 +08001385int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
Kevin Wolfb9988752014-04-03 12:09:34 +02001386{
1387 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001388 char *tmp_filename = g_malloc0(PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001389 int64_t total_size;
Chunyan Liu83d05212014-06-05 17:20:51 +08001390 QemuOpts *opts = NULL;
Kevin Wolfb9988752014-04-03 12:09:34 +02001391 QDict *snapshot_options;
1392 BlockDriverState *bs_snapshot;
1393 Error *local_err;
1394 int ret;
1395
1396 /* if snapshot, we create a temporary backing file and open it
1397 instead of opening 'filename' directly */
1398
1399 /* Get the required size from the image */
Kevin Wolff1877432014-04-04 17:07:19 +02001400 total_size = bdrv_getlength(bs);
1401 if (total_size < 0) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001402 ret = total_size;
Kevin Wolff1877432014-04-04 17:07:19 +02001403 error_setg_errno(errp, -total_size, "Could not get image size");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001404 goto out;
Kevin Wolff1877432014-04-04 17:07:19 +02001405 }
Kevin Wolfb9988752014-04-03 12:09:34 +02001406
1407 /* Create the temporary image */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001408 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001409 if (ret < 0) {
1410 error_setg_errno(errp, -ret, "Could not get temporary filename");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001411 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001412 }
1413
Max Reitzef810432014-12-02 18:32:42 +01001414 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
Chunyan Liuc282e1f2014-06-05 17:21:11 +08001415 &error_abort);
Markus Armbruster39101f22015-02-12 16:46:36 +01001416 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
Max Reitzef810432014-12-02 18:32:42 +01001417 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08001418 qemu_opts_del(opts);
Kevin Wolfb9988752014-04-03 12:09:34 +02001419 if (ret < 0) {
1420 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1421 "'%s': %s", tmp_filename,
1422 error_get_pretty(local_err));
1423 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001424 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001425 }
1426
1427 /* Prepare a new options QDict for the temporary file */
1428 snapshot_options = qdict_new();
1429 qdict_put(snapshot_options, "file.driver",
1430 qstring_from_str("file"));
1431 qdict_put(snapshot_options, "file.filename",
1432 qstring_from_str(tmp_filename));
1433
Markus Armbrustere4e99862014-10-07 13:59:03 +02001434 bs_snapshot = bdrv_new();
Kevin Wolfb9988752014-04-03 12:09:34 +02001435
1436 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
Max Reitzef810432014-12-02 18:32:42 +01001437 flags, &bdrv_qcow2, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001438 if (ret < 0) {
1439 error_propagate(errp, local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001440 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001441 }
1442
1443 bdrv_append(bs_snapshot, bs);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001444
1445out:
1446 g_free(tmp_filename);
Chen Gang6b8aeca2014-06-23 23:28:23 +08001447 return ret;
Kevin Wolfb9988752014-04-03 12:09:34 +02001448}
1449
Max Reitzda557aa2013-12-20 19:28:11 +01001450/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001451 * Opens a disk image (raw, qcow2, vmdk, ...)
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001452 *
1453 * options is a QDict of options to pass to the block drivers, or NULL for an
1454 * empty set of options. The reference to the QDict belongs to the block layer
1455 * after the call (even on failure), so if the caller intends to reuse the
1456 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Max Reitzf67503e2014-02-18 18:33:05 +01001457 *
1458 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1459 * If it is not NULL, the referenced BDS will be reused.
Max Reitzddf56362014-02-18 18:33:06 +01001460 *
1461 * The reference parameter may be used to specify an existing block device which
1462 * should be opened. If specified, neither options nor a filename may be given,
1463 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001464 */
Max Reitzddf56362014-02-18 18:33:06 +01001465int bdrv_open(BlockDriverState **pbs, const char *filename,
1466 const char *reference, QDict *options, int flags,
1467 BlockDriver *drv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +00001468{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001469 int ret;
Max Reitzf67503e2014-02-18 18:33:05 +01001470 BlockDriverState *file = NULL, *bs;
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001471 const char *drvname;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001472 Error *local_err = NULL;
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001473 int snapshot_flags = 0;
bellard712e7872005-04-28 21:09:32 +00001474
Max Reitzf67503e2014-02-18 18:33:05 +01001475 assert(pbs);
1476
Max Reitzddf56362014-02-18 18:33:06 +01001477 if (reference) {
1478 bool options_non_empty = options ? qdict_size(options) : false;
1479 QDECREF(options);
1480
1481 if (*pbs) {
1482 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1483 "another block device");
1484 return -EINVAL;
1485 }
1486
1487 if (filename || options_non_empty) {
1488 error_setg(errp, "Cannot reference an existing block device with "
1489 "additional options or a new filename");
1490 return -EINVAL;
1491 }
1492
1493 bs = bdrv_lookup_bs(reference, reference, errp);
1494 if (!bs) {
1495 return -ENODEV;
1496 }
1497 bdrv_ref(bs);
1498 *pbs = bs;
1499 return 0;
1500 }
1501
Max Reitzf67503e2014-02-18 18:33:05 +01001502 if (*pbs) {
1503 bs = *pbs;
1504 } else {
Markus Armbrustere4e99862014-10-07 13:59:03 +02001505 bs = bdrv_new();
Max Reitzf67503e2014-02-18 18:33:05 +01001506 }
1507
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001508 /* NULL means an empty set of options */
1509 if (options == NULL) {
1510 options = qdict_new();
1511 }
1512
Kevin Wolf17b005f2014-05-27 10:50:29 +02001513 ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001514 if (local_err) {
1515 goto fail;
1516 }
1517
Kevin Wolf76c591b2014-06-04 14:19:44 +02001518 /* Find the right image format driver */
1519 drv = NULL;
1520 drvname = qdict_get_try_str(options, "driver");
1521 if (drvname) {
1522 drv = bdrv_find_format(drvname);
1523 qdict_del(options, "driver");
1524 if (!drv) {
1525 error_setg(errp, "Unknown driver: '%s'", drvname);
1526 ret = -EINVAL;
1527 goto fail;
1528 }
1529 }
1530
1531 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1532 if (drv && !drv->bdrv_file_open) {
1533 /* If the user explicitly wants a format driver here, we'll need to add
1534 * another layer for the protocol in bs->file */
1535 flags &= ~BDRV_O_PROTOCOL;
1536 }
1537
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001538 bs->options = options;
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001539 options = qdict_clone_shallow(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001540
Kevin Wolff4788ad2014-06-03 16:44:19 +02001541 /* Open image file without format layer */
1542 if ((flags & BDRV_O_PROTOCOL) == 0) {
1543 if (flags & BDRV_O_RDWR) {
1544 flags |= BDRV_O_ALLOW_RDWR;
1545 }
1546 if (flags & BDRV_O_SNAPSHOT) {
1547 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1548 flags = bdrv_backing_flags(flags);
1549 }
1550
1551 assert(file == NULL);
1552 ret = bdrv_open_image(&file, filename, options, "file",
1553 bdrv_inherited_flags(flags),
1554 true, &local_err);
1555 if (ret < 0) {
Max Reitz5469a2a2014-02-18 18:33:10 +01001556 goto fail;
1557 }
1558 }
1559
Kevin Wolf76c591b2014-06-04 14:19:44 +02001560 /* Image format probing */
Kevin Wolf38f3ef52014-11-20 16:27:12 +01001561 bs->probed = !drv;
Kevin Wolf76c591b2014-06-04 14:19:44 +02001562 if (!drv && file) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001563 ret = find_image_format(file, filename, &drv, &local_err);
1564 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001565 goto fail;
Max Reitz2a05cbe2013-12-20 19:28:10 +01001566 }
Kevin Wolf76c591b2014-06-04 14:19:44 +02001567 } else if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001568 error_setg(errp, "Must specify either driver or file");
1569 ret = -EINVAL;
Kevin Wolf8bfea152014-04-11 19:16:36 +02001570 goto fail;
Kevin Wolff500a6d2012-11-12 17:35:27 +01001571 }
1572
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001573 /* Open the image */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001574 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001575 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001576 goto fail;
Christoph Hellwig69873072010-01-20 18:13:25 +01001577 }
1578
Max Reitz2a05cbe2013-12-20 19:28:10 +01001579 if (file && (bs->file != file)) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001580 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001581 file = NULL;
1582 }
1583
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001584 /* If there is a backing file, use it */
Paolo Bonzini9156df12012-10-18 16:49:17 +02001585 if ((flags & BDRV_O_NO_BACKING) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001586 QDict *backing_options;
1587
Benoît Canet5726d872013-09-25 13:30:01 +02001588 qdict_extract_subqdict(options, &backing_options, "backing.");
Max Reitz34b5d2c2013-09-05 14:45:29 +02001589 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001590 if (ret < 0) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001591 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001592 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001593 }
1594
Max Reitz91af7012014-07-18 20:24:56 +02001595 bdrv_refresh_filename(bs);
1596
Kevin Wolfb9988752014-04-03 12:09:34 +02001597 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1598 * temporary snapshot afterwards. */
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001599 if (snapshot_flags) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001600 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001601 if (local_err) {
Kevin Wolfb9988752014-04-03 12:09:34 +02001602 goto close_and_fail;
1603 }
1604 }
1605
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001606 /* Check if any unknown options were used */
Max Reitz5acd9d82014-02-18 18:33:11 +01001607 if (options && (qdict_size(options) != 0)) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001608 const QDictEntry *entry = qdict_first(options);
Max Reitz5acd9d82014-02-18 18:33:11 +01001609 if (flags & BDRV_O_PROTOCOL) {
1610 error_setg(errp, "Block protocol '%s' doesn't support the option "
1611 "'%s'", drv->format_name, entry->key);
1612 } else {
1613 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1614 "support the option '%s'", drv->format_name,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001615 bdrv_get_device_name(bs), entry->key);
Max Reitz5acd9d82014-02-18 18:33:11 +01001616 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001617
1618 ret = -EINVAL;
1619 goto close_and_fail;
1620 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001621
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001622 if (!bdrv_key_required(bs)) {
Markus Armbrustera7f53e22014-10-07 13:59:25 +02001623 if (bs->blk) {
1624 blk_dev_change_media_cb(bs->blk, true);
1625 }
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001626 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1627 && !runstate_check(RUN_STATE_INMIGRATE)
1628 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1629 error_setg(errp,
1630 "Guest must be stopped for opening of encrypted image");
1631 ret = -EBUSY;
1632 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001633 }
1634
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001635 QDECREF(options);
Max Reitzf67503e2014-02-18 18:33:05 +01001636 *pbs = bs;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001637 return 0;
1638
Kevin Wolf8bfea152014-04-11 19:16:36 +02001639fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001640 if (file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001641 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001642 }
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001643 QDECREF(bs->options);
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001644 QDECREF(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001645 bs->options = NULL;
Max Reitzf67503e2014-02-18 18:33:05 +01001646 if (!*pbs) {
1647 /* If *pbs is NULL, a new BDS has been created in this function and
1648 needs to be freed now. Otherwise, it does not need to be closed,
1649 since it has not really been opened yet. */
1650 bdrv_unref(bs);
1651 }
Markus Armbruster84d18f02014-01-30 15:07:28 +01001652 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001653 error_propagate(errp, local_err);
1654 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001655 return ret;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001656
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001657close_and_fail:
Max Reitzf67503e2014-02-18 18:33:05 +01001658 /* See fail path, but now the BDS has to be always closed */
1659 if (*pbs) {
1660 bdrv_close(bs);
1661 } else {
1662 bdrv_unref(bs);
1663 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001664 QDECREF(options);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001665 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001666 error_propagate(errp, local_err);
1667 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001668 return ret;
1669}
1670
Jeff Codye971aa12012-09-20 15:13:19 -04001671typedef struct BlockReopenQueueEntry {
1672 bool prepared;
1673 BDRVReopenState state;
1674 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1675} BlockReopenQueueEntry;
1676
1677/*
1678 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1679 * reopen of multiple devices.
1680 *
1681 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1682 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1683 * be created and initialized. This newly created BlockReopenQueue should be
1684 * passed back in for subsequent calls that are intended to be of the same
1685 * atomic 'set'.
1686 *
1687 * bs is the BlockDriverState to add to the reopen queue.
1688 *
1689 * flags contains the open flags for the associated bs
1690 *
1691 * returns a pointer to bs_queue, which is either the newly allocated
1692 * bs_queue, or the existing bs_queue being used.
1693 *
1694 */
1695BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1696 BlockDriverState *bs, int flags)
1697{
1698 assert(bs != NULL);
1699
1700 BlockReopenQueueEntry *bs_entry;
1701 if (bs_queue == NULL) {
1702 bs_queue = g_new0(BlockReopenQueue, 1);
1703 QSIMPLEQ_INIT(bs_queue);
1704 }
1705
Kevin Wolff1f25a22014-04-25 19:04:55 +02001706 /* bdrv_open() masks this flag out */
1707 flags &= ~BDRV_O_PROTOCOL;
1708
Jeff Codye971aa12012-09-20 15:13:19 -04001709 if (bs->file) {
Kevin Wolff1f25a22014-04-25 19:04:55 +02001710 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
Jeff Codye971aa12012-09-20 15:13:19 -04001711 }
1712
1713 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1714 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1715
1716 bs_entry->state.bs = bs;
1717 bs_entry->state.flags = flags;
1718
1719 return bs_queue;
1720}
1721
1722/*
1723 * Reopen multiple BlockDriverStates atomically & transactionally.
1724 *
1725 * The queue passed in (bs_queue) must have been built up previous
1726 * via bdrv_reopen_queue().
1727 *
1728 * Reopens all BDS specified in the queue, with the appropriate
1729 * flags. All devices are prepared for reopen, and failure of any
1730 * device will cause all device changes to be abandonded, and intermediate
1731 * data cleaned up.
1732 *
1733 * If all devices prepare successfully, then the changes are committed
1734 * to all devices.
1735 *
1736 */
1737int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1738{
1739 int ret = -1;
1740 BlockReopenQueueEntry *bs_entry, *next;
1741 Error *local_err = NULL;
1742
1743 assert(bs_queue != NULL);
1744
1745 bdrv_drain_all();
1746
1747 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1748 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1749 error_propagate(errp, local_err);
1750 goto cleanup;
1751 }
1752 bs_entry->prepared = true;
1753 }
1754
1755 /* If we reach this point, we have success and just need to apply the
1756 * changes
1757 */
1758 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1759 bdrv_reopen_commit(&bs_entry->state);
1760 }
1761
1762 ret = 0;
1763
1764cleanup:
1765 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1766 if (ret && bs_entry->prepared) {
1767 bdrv_reopen_abort(&bs_entry->state);
1768 }
1769 g_free(bs_entry);
1770 }
1771 g_free(bs_queue);
1772 return ret;
1773}
1774
1775
1776/* Reopen a single BlockDriverState with the specified flags. */
1777int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1778{
1779 int ret = -1;
1780 Error *local_err = NULL;
1781 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1782
1783 ret = bdrv_reopen_multiple(queue, &local_err);
1784 if (local_err != NULL) {
1785 error_propagate(errp, local_err);
1786 }
1787 return ret;
1788}
1789
1790
1791/*
1792 * Prepares a BlockDriverState for reopen. All changes are staged in the
1793 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1794 * the block driver layer .bdrv_reopen_prepare()
1795 *
1796 * bs is the BlockDriverState to reopen
1797 * flags are the new open flags
1798 * queue is the reopen queue
1799 *
1800 * Returns 0 on success, non-zero on error. On error errp will be set
1801 * as well.
1802 *
1803 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1804 * It is the responsibility of the caller to then call the abort() or
1805 * commit() for any other BDS that have been left in a prepare() state
1806 *
1807 */
1808int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1809 Error **errp)
1810{
1811 int ret = -1;
1812 Error *local_err = NULL;
1813 BlockDriver *drv;
1814
1815 assert(reopen_state != NULL);
1816 assert(reopen_state->bs->drv != NULL);
1817 drv = reopen_state->bs->drv;
1818
1819 /* if we are to stay read-only, do not allow permission change
1820 * to r/w */
1821 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1822 reopen_state->flags & BDRV_O_RDWR) {
Alberto Garcia81e5f782015-04-08 12:29:19 +03001823 error_setg(errp, "Node '%s' is read only",
1824 bdrv_get_device_or_node_name(reopen_state->bs));
Jeff Codye971aa12012-09-20 15:13:19 -04001825 goto error;
1826 }
1827
1828
1829 ret = bdrv_flush(reopen_state->bs);
1830 if (ret) {
1831 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1832 strerror(-ret));
1833 goto error;
1834 }
1835
1836 if (drv->bdrv_reopen_prepare) {
1837 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1838 if (ret) {
1839 if (local_err != NULL) {
1840 error_propagate(errp, local_err);
1841 } else {
Luiz Capitulinod8b68952013-06-10 11:29:27 -04001842 error_setg(errp, "failed while preparing to reopen image '%s'",
1843 reopen_state->bs->filename);
Jeff Codye971aa12012-09-20 15:13:19 -04001844 }
1845 goto error;
1846 }
1847 } else {
1848 /* It is currently mandatory to have a bdrv_reopen_prepare()
1849 * handler for each supported drv. */
Alberto Garcia81e5f782015-04-08 12:29:19 +03001850 error_setg(errp, "Block format '%s' used by node '%s' "
1851 "does not support reopening files", drv->format_name,
1852 bdrv_get_device_or_node_name(reopen_state->bs));
Jeff Codye971aa12012-09-20 15:13:19 -04001853 ret = -1;
1854 goto error;
1855 }
1856
1857 ret = 0;
1858
1859error:
1860 return ret;
1861}
1862
1863/*
1864 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1865 * makes them final by swapping the staging BlockDriverState contents into
1866 * the active BlockDriverState contents.
1867 */
1868void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1869{
1870 BlockDriver *drv;
1871
1872 assert(reopen_state != NULL);
1873 drv = reopen_state->bs->drv;
1874 assert(drv != NULL);
1875
1876 /* If there are any driver level actions to take */
1877 if (drv->bdrv_reopen_commit) {
1878 drv->bdrv_reopen_commit(reopen_state);
1879 }
1880
1881 /* set BDS specific flags now */
1882 reopen_state->bs->open_flags = reopen_state->flags;
1883 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1884 BDRV_O_CACHE_WB);
1885 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
Kevin Wolf355ef4a2013-12-11 20:14:09 +01001886
Kevin Wolf3baca892014-07-16 17:48:16 +02001887 bdrv_refresh_limits(reopen_state->bs, NULL);
Jeff Codye971aa12012-09-20 15:13:19 -04001888}
1889
1890/*
1891 * Abort the reopen, and delete and free the staged changes in
1892 * reopen_state
1893 */
1894void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1895{
1896 BlockDriver *drv;
1897
1898 assert(reopen_state != NULL);
1899 drv = reopen_state->bs->drv;
1900 assert(drv != NULL);
1901
1902 if (drv->bdrv_reopen_abort) {
1903 drv->bdrv_reopen_abort(reopen_state);
1904 }
1905}
1906
1907
bellardfc01f7e2003-06-30 10:03:06 +00001908void bdrv_close(BlockDriverState *bs)
1909{
Max Reitz33384422014-06-20 21:57:33 +02001910 BdrvAioNotifier *ban, *ban_next;
1911
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001912 if (bs->job) {
1913 block_job_cancel_sync(bs->job);
1914 }
Stefan Hajnoczi58fda172013-07-02 15:36:25 +02001915 bdrv_drain_all(); /* complete I/O */
1916 bdrv_flush(bs);
1917 bdrv_drain_all(); /* in case flush left pending I/O */
Paolo Bonzinid7d512f2012-08-23 11:20:36 +02001918 notifier_list_notify(&bs->close_notifiers, bs);
Kevin Wolf7094f122012-04-11 11:06:37 +02001919
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001920 if (bs->drv) {
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001921 if (bs->backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001922 BlockDriverState *backing_hd = bs->backing_hd;
1923 bdrv_set_backing_hd(bs, NULL);
1924 bdrv_unref(backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001925 }
bellardea2384d2004-08-01 21:59:26 +00001926 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -05001927 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +00001928 bs->opaque = NULL;
1929 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001930 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +02001931 bs->backing_file[0] = '\0';
1932 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +02001933 bs->total_sectors = 0;
1934 bs->encrypted = 0;
1935 bs->valid_key = 0;
1936 bs->sg = 0;
Asias He0d51b4d2013-08-22 15:24:14 +08001937 bs->zero_beyond_eof = false;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001938 QDECREF(bs->options);
1939 bs->options = NULL;
Max Reitz91af7012014-07-18 20:24:56 +02001940 QDECREF(bs->full_open_options);
1941 bs->full_open_options = NULL;
bellardb3380822004-03-14 21:38:54 +00001942
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001943 if (bs->file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001944 bdrv_unref(bs->file);
Paolo Bonzini0ac93772012-05-08 16:51:44 +02001945 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001946 }
bellardb3380822004-03-14 21:38:54 +00001947 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001948
Markus Armbrustera7f53e22014-10-07 13:59:25 +02001949 if (bs->blk) {
1950 blk_dev_change_media_cb(bs->blk, false);
1951 }
Pavel Hrdina9ca11152012-08-09 12:44:48 +02001952
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001953 /*throttling disk I/O limits*/
1954 if (bs->io_limits_enabled) {
1955 bdrv_io_limits_disable(bs);
1956 }
Max Reitz33384422014-06-20 21:57:33 +02001957
1958 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1959 g_free(ban);
1960 }
1961 QLIST_INIT(&bs->aio_notifiers);
bellardb3380822004-03-14 21:38:54 +00001962}
1963
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001964void bdrv_close_all(void)
1965{
1966 BlockDriverState *bs;
1967
Benoît Canetdc364f42014-01-23 21:31:32 +01001968 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001969 AioContext *aio_context = bdrv_get_aio_context(bs);
1970
1971 aio_context_acquire(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001972 bdrv_close(bs);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001973 aio_context_release(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001974 }
1975}
1976
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001977/* Check if any requests are in-flight (including throttled requests) */
1978static bool bdrv_requests_pending(BlockDriverState *bs)
1979{
1980 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1981 return true;
1982 }
Benoît Canetcc0681c2013-09-02 14:14:39 +02001983 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1984 return true;
1985 }
1986 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001987 return true;
1988 }
1989 if (bs->file && bdrv_requests_pending(bs->file)) {
1990 return true;
1991 }
1992 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1993 return true;
1994 }
1995 return false;
1996}
1997
Stefan Hajnoczi5b98db02014-10-21 12:03:55 +01001998static bool bdrv_drain_one(BlockDriverState *bs)
1999{
2000 bool bs_busy;
2001
2002 bdrv_flush_io_queue(bs);
2003 bdrv_start_throttled_reqs(bs);
2004 bs_busy = bdrv_requests_pending(bs);
2005 bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy);
2006 return bs_busy;
2007}
2008
2009/*
2010 * Wait for pending requests to complete on a single BlockDriverState subtree
2011 *
2012 * See the warning in bdrv_drain_all(). This function can only be called if
2013 * you are sure nothing can generate I/O because you have op blockers
2014 * installed.
2015 *
2016 * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
2017 * AioContext.
2018 */
2019void bdrv_drain(BlockDriverState *bs)
2020{
2021 while (bdrv_drain_one(bs)) {
2022 /* Keep iterating */
2023 }
2024}
2025
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002026/*
2027 * Wait for pending requests to complete across all BlockDriverStates
2028 *
2029 * This function does not flush data to disk, use bdrv_flush_all() for that
2030 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02002031 *
2032 * Note that completion of an asynchronous I/O operation can trigger any
2033 * number of other I/O operations on other devices---for example a coroutine
2034 * can be arbitrarily complex and a constant flow of I/O can come until the
2035 * coroutine is complete. Because of this, it is not possible to have a
2036 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002037 */
2038void bdrv_drain_all(void)
2039{
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02002040 /* Always run first iteration so any pending completion BHs run */
2041 bool busy = true;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002042 BlockDriverState *bs;
2043
Fam Zheng69da3b02015-04-03 22:05:19 +08002044 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2045 AioContext *aio_context = bdrv_get_aio_context(bs);
2046
2047 aio_context_acquire(aio_context);
2048 if (bs->job) {
2049 block_job_pause(bs->job);
2050 }
2051 aio_context_release(aio_context);
2052 }
2053
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02002054 while (busy) {
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002055 busy = false;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002056
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002057 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2058 AioContext *aio_context = bdrv_get_aio_context(bs);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002059
2060 aio_context_acquire(aio_context);
Stefan Hajnoczi5b98db02014-10-21 12:03:55 +01002061 busy |= bdrv_drain_one(bs);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002062 aio_context_release(aio_context);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002063 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002064 }
Fam Zheng69da3b02015-04-03 22:05:19 +08002065
2066 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2067 AioContext *aio_context = bdrv_get_aio_context(bs);
2068
2069 aio_context_acquire(aio_context);
2070 if (bs->job) {
2071 block_job_resume(bs->job);
2072 }
2073 aio_context_release(aio_context);
2074 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002075}
2076
Benoît Canetdc364f42014-01-23 21:31:32 +01002077/* make a BlockDriverState anonymous by removing from bdrv_state and
2078 * graph_bdrv_state list.
Ryan Harperd22b2f42011-03-29 20:51:47 -05002079 Also, NULL terminate the device_name to prevent double remove */
2080void bdrv_make_anon(BlockDriverState *bs)
2081{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002082 /*
2083 * Take care to remove bs from bdrv_states only when it's actually
2084 * in it. Note that bs->device_list.tqe_prev is initially null,
2085 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
2086 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
2087 * resetting it to null on remove.
2088 */
2089 if (bs->device_list.tqe_prev) {
Benoît Canetdc364f42014-01-23 21:31:32 +01002090 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002091 bs->device_list.tqe_prev = NULL;
Ryan Harperd22b2f42011-03-29 20:51:47 -05002092 }
Benoît Canetdc364f42014-01-23 21:31:32 +01002093 if (bs->node_name[0] != '\0') {
2094 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2095 }
2096 bs->node_name[0] = '\0';
Ryan Harperd22b2f42011-03-29 20:51:47 -05002097}
2098
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02002099static void bdrv_rebind(BlockDriverState *bs)
2100{
2101 if (bs->drv && bs->drv->bdrv_rebind) {
2102 bs->drv->bdrv_rebind(bs);
2103 }
2104}
2105
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002106static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2107 BlockDriverState *bs_src)
2108{
2109 /* move some fields that need to stay attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002110
2111 /* dev info */
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01002112 bs_dest->guest_block_size = bs_src->guest_block_size;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002113 bs_dest->copy_on_read = bs_src->copy_on_read;
2114
2115 bs_dest->enable_write_cache = bs_src->enable_write_cache;
2116
Benoît Canetcc0681c2013-09-02 14:14:39 +02002117 /* i/o throttled req */
2118 memcpy(&bs_dest->throttle_state,
2119 &bs_src->throttle_state,
2120 sizeof(ThrottleState));
2121 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
2122 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002123 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
2124
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002125 /* r/w error */
2126 bs_dest->on_read_error = bs_src->on_read_error;
2127 bs_dest->on_write_error = bs_src->on_write_error;
2128
2129 /* i/o status */
2130 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
2131 bs_dest->iostatus = bs_src->iostatus;
2132
2133 /* dirty bitmap */
Fam Zhenge4654d22013-11-13 18:29:43 +08002134 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002135
Fam Zheng9fcb0252013-08-23 09:14:46 +08002136 /* reference count */
2137 bs_dest->refcnt = bs_src->refcnt;
2138
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002139 /* job */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002140 bs_dest->job = bs_src->job;
2141
2142 /* keep the same entry in bdrv_states */
Benoît Canetdc364f42014-01-23 21:31:32 +01002143 bs_dest->device_list = bs_src->device_list;
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002144 bs_dest->blk = bs_src->blk;
2145
Fam Zhengfbe40ff2014-05-23 21:29:42 +08002146 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2147 sizeof(bs_dest->op_blockers));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002148}
2149
2150/*
2151 * Swap bs contents for two image chains while they are live,
2152 * while keeping required fields on the BlockDriverState that is
2153 * actually attached to a device.
2154 *
2155 * This will modify the BlockDriverState fields, and swap contents
2156 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2157 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002158 * bs_new must not be attached to a BlockBackend.
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002159 *
2160 * This function does not create any image files.
2161 */
2162void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2163{
2164 BlockDriverState tmp;
2165
Benoît Canet90ce8a02014-03-05 23:48:29 +01002166 /* The code needs to swap the node_name but simply swapping node_list won't
2167 * work so first remove the nodes from the graph list, do the swap then
2168 * insert them back if needed.
2169 */
2170 if (bs_new->node_name[0] != '\0') {
2171 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2172 }
2173 if (bs_old->node_name[0] != '\0') {
2174 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2175 }
2176
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002177 /* bs_new must be unattached and shouldn't have anything fancy enabled */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002178 assert(!bs_new->blk);
Fam Zhenge4654d22013-11-13 18:29:43 +08002179 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002180 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002181 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002182 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002183
2184 tmp = *bs_new;
2185 *bs_new = *bs_old;
2186 *bs_old = tmp;
2187
2188 /* there are some fields that should not be swapped, move them back */
2189 bdrv_move_feature_fields(&tmp, bs_old);
2190 bdrv_move_feature_fields(bs_old, bs_new);
2191 bdrv_move_feature_fields(bs_new, &tmp);
2192
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002193 /* bs_new must remain unattached */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002194 assert(!bs_new->blk);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002195
2196 /* Check a few fields that should remain attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002197 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002198 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002199 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002200
Benoît Canet90ce8a02014-03-05 23:48:29 +01002201 /* insert the nodes back into the graph node list if needed */
2202 if (bs_new->node_name[0] != '\0') {
2203 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2204 }
2205 if (bs_old->node_name[0] != '\0') {
2206 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2207 }
2208
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002209 bdrv_rebind(bs_new);
2210 bdrv_rebind(bs_old);
2211}
2212
Jeff Cody8802d1f2012-02-28 15:54:06 -05002213/*
2214 * Add new bs contents at the top of an image chain while the chain is
2215 * live, while keeping required fields on the top layer.
2216 *
2217 * This will modify the BlockDriverState fields, and swap contents
2218 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2219 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002220 * bs_new must not be attached to a BlockBackend.
Jeff Codyf6801b82012-03-27 16:30:19 -04002221 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05002222 * This function does not create any image files.
2223 */
2224void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2225{
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002226 bdrv_swap(bs_new, bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002227
2228 /* The contents of 'tmp' will become bs_top, as we are
2229 * swapping bs_new and bs_top contents. */
Fam Zheng8d24cce2014-05-23 21:29:45 +08002230 bdrv_set_backing_hd(bs_top, bs_new);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002231}
2232
Fam Zheng4f6fd342013-08-23 09:14:47 +08002233static void bdrv_delete(BlockDriverState *bs)
bellardb3380822004-03-14 21:38:54 +00002234{
Paolo Bonzini3e914652012-03-30 13:17:11 +02002235 assert(!bs->job);
Fam Zheng3718d8a2014-05-23 21:29:43 +08002236 assert(bdrv_op_blocker_is_empty(bs));
Fam Zheng4f6fd342013-08-23 09:14:47 +08002237 assert(!bs->refcnt);
Fam Zhenge4654d22013-11-13 18:29:43 +08002238 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Markus Armbruster18846de2010-06-29 16:58:30 +02002239
Stefan Hajnoczie1b5c522013-06-27 15:32:26 +02002240 bdrv_close(bs);
2241
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002242 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05002243 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00002244
Anthony Liguori7267c092011-08-20 22:09:37 -05002245 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00002246}
2247
aliguorie97fc192009-04-21 23:11:50 +00002248/*
2249 * Run consistency checks on an image
2250 *
Kevin Wolfe076f332010-06-29 11:43:13 +02002251 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02002252 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02002253 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00002254 */
Kevin Wolf4534ff52012-05-11 16:07:02 +02002255int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00002256{
Max Reitz908bcd52014-08-07 22:47:55 +02002257 if (bs->drv == NULL) {
2258 return -ENOMEDIUM;
2259 }
aliguorie97fc192009-04-21 23:11:50 +00002260 if (bs->drv->bdrv_check == NULL) {
2261 return -ENOTSUP;
2262 }
2263
Kevin Wolfe076f332010-06-29 11:43:13 +02002264 memset(res, 0, sizeof(*res));
Kevin Wolf4534ff52012-05-11 16:07:02 +02002265 return bs->drv->bdrv_check(bs, res, fix);
aliguorie97fc192009-04-21 23:11:50 +00002266}
2267
Kevin Wolf8a426612010-07-16 17:17:01 +02002268#define COMMIT_BUF_SECTORS 2048
2269
bellard33e39632003-07-06 17:15:21 +00002270/* commit COW file into the raw image */
2271int bdrv_commit(BlockDriverState *bs)
2272{
bellard19cb3732006-08-19 11:45:59 +00002273 BlockDriver *drv = bs->drv;
Jeff Cody72706ea2014-01-24 09:02:35 -05002274 int64_t sector, total_sectors, length, backing_length;
Kevin Wolf8a426612010-07-16 17:17:01 +02002275 int n, ro, open_flags;
Jeff Cody0bce5972012-09-20 15:13:34 -04002276 int ret = 0;
Jeff Cody72706ea2014-01-24 09:02:35 -05002277 uint8_t *buf = NULL;
bellard33e39632003-07-06 17:15:21 +00002278
bellard19cb3732006-08-19 11:45:59 +00002279 if (!drv)
2280 return -ENOMEDIUM;
Liu Yuan6bb45152014-09-01 13:35:21 +08002281
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002282 if (!bs->backing_hd) {
2283 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00002284 }
2285
Fam Zhengbb000212014-09-11 13:14:00 +08002286 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2287 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00002288 return -EBUSY;
2289 }
2290
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002291 ro = bs->backing_hd->read_only;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002292 open_flags = bs->backing_hd->open_flags;
2293
2294 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002295 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2296 return -EACCES;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002297 }
bellard33e39632003-07-06 17:15:21 +00002298 }
bellardea2384d2004-08-01 21:59:26 +00002299
Jeff Cody72706ea2014-01-24 09:02:35 -05002300 length = bdrv_getlength(bs);
2301 if (length < 0) {
2302 ret = length;
2303 goto ro_cleanup;
2304 }
2305
2306 backing_length = bdrv_getlength(bs->backing_hd);
2307 if (backing_length < 0) {
2308 ret = backing_length;
2309 goto ro_cleanup;
2310 }
2311
2312 /* If our top snapshot is larger than the backing file image,
2313 * grow the backing file image if possible. If not possible,
2314 * we must return an error */
2315 if (length > backing_length) {
2316 ret = bdrv_truncate(bs->backing_hd, length);
2317 if (ret < 0) {
2318 goto ro_cleanup;
2319 }
2320 }
2321
2322 total_sectors = length >> BDRV_SECTOR_BITS;
Kevin Wolf857d4f42014-05-20 13:16:51 +02002323
2324 /* qemu_try_blockalign() for bs will choose an alignment that works for
2325 * bs->backing_hd as well, so no need to compare the alignment manually. */
2326 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2327 if (buf == NULL) {
2328 ret = -ENOMEM;
2329 goto ro_cleanup;
2330 }
bellardea2384d2004-08-01 21:59:26 +00002331
Kevin Wolf8a426612010-07-16 17:17:01 +02002332 for (sector = 0; sector < total_sectors; sector += n) {
Paolo Bonzinid6636402013-09-04 19:00:25 +02002333 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2334 if (ret < 0) {
2335 goto ro_cleanup;
2336 }
2337 if (ret) {
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002338 ret = bdrv_read(bs, sector, buf, n);
2339 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002340 goto ro_cleanup;
2341 }
2342
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002343 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2344 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002345 goto ro_cleanup;
2346 }
bellardea2384d2004-08-01 21:59:26 +00002347 }
2348 }
bellard95389c82005-12-18 18:28:15 +00002349
Christoph Hellwig1d449522010-01-17 12:32:30 +01002350 if (drv->bdrv_make_empty) {
2351 ret = drv->bdrv_make_empty(bs);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002352 if (ret < 0) {
2353 goto ro_cleanup;
2354 }
Christoph Hellwig1d449522010-01-17 12:32:30 +01002355 bdrv_flush(bs);
2356 }
bellard95389c82005-12-18 18:28:15 +00002357
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002358 /*
2359 * Make sure all data we wrote to the backing device is actually
2360 * stable on disk.
2361 */
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002362 if (bs->backing_hd) {
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002363 bdrv_flush(bs->backing_hd);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002364 }
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002365
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002366 ret = 0;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002367ro_cleanup:
Kevin Wolf857d4f42014-05-20 13:16:51 +02002368 qemu_vfree(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002369
2370 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002371 /* ignoring error return here */
2372 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002373 }
2374
Christoph Hellwig1d449522010-01-17 12:32:30 +01002375 return ret;
bellard33e39632003-07-06 17:15:21 +00002376}
2377
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002378int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002379{
2380 BlockDriverState *bs;
2381
Benoît Canetdc364f42014-01-23 21:31:32 +01002382 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002383 AioContext *aio_context = bdrv_get_aio_context(bs);
2384
2385 aio_context_acquire(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002386 if (bs->drv && bs->backing_hd) {
2387 int ret = bdrv_commit(bs);
2388 if (ret < 0) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002389 aio_context_release(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002390 return ret;
2391 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002392 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002393 aio_context_release(aio_context);
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002394 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002395 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002396}
2397
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002398/**
2399 * Remove an active request from the tracked requests list
2400 *
2401 * This function should be called when a tracked request is completing.
2402 */
2403static void tracked_request_end(BdrvTrackedRequest *req)
2404{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002405 if (req->serialising) {
2406 req->bs->serialising_in_flight--;
2407 }
2408
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002409 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002410 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002411}
2412
2413/**
2414 * Add an active request to the tracked requests list
2415 */
2416static void tracked_request_begin(BdrvTrackedRequest *req,
2417 BlockDriverState *bs,
Kevin Wolf793ed472013-12-03 15:31:25 +01002418 int64_t offset,
2419 unsigned int bytes, bool is_write)
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002420{
2421 *req = (BdrvTrackedRequest){
2422 .bs = bs,
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002423 .offset = offset,
2424 .bytes = bytes,
2425 .is_write = is_write,
2426 .co = qemu_coroutine_self(),
2427 .serialising = false,
Kevin Wolf73271452013-12-04 17:08:50 +01002428 .overlap_offset = offset,
2429 .overlap_bytes = bytes,
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002430 };
2431
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002432 qemu_co_queue_init(&req->wait_queue);
2433
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002434 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2435}
2436
Kevin Wolfe96126f2014-02-08 10:42:18 +01002437static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002438{
Kevin Wolf73271452013-12-04 17:08:50 +01002439 int64_t overlap_offset = req->offset & ~(align - 1);
Kevin Wolfe96126f2014-02-08 10:42:18 +01002440 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2441 - overlap_offset;
Kevin Wolf73271452013-12-04 17:08:50 +01002442
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002443 if (!req->serialising) {
2444 req->bs->serialising_in_flight++;
2445 req->serialising = true;
2446 }
Kevin Wolf73271452013-12-04 17:08:50 +01002447
2448 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2449 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002450}
2451
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002452/**
2453 * Round a region to cluster boundaries
2454 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002455void bdrv_round_to_clusters(BlockDriverState *bs,
2456 int64_t sector_num, int nb_sectors,
2457 int64_t *cluster_sector_num,
2458 int *cluster_nb_sectors)
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002459{
2460 BlockDriverInfo bdi;
2461
2462 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2463 *cluster_sector_num = sector_num;
2464 *cluster_nb_sectors = nb_sectors;
2465 } else {
2466 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2467 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2468 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2469 nb_sectors, c);
2470 }
2471}
2472
Kevin Wolf73271452013-12-04 17:08:50 +01002473static int bdrv_get_cluster_size(BlockDriverState *bs)
Kevin Wolf793ed472013-12-03 15:31:25 +01002474{
2475 BlockDriverInfo bdi;
Kevin Wolf73271452013-12-04 17:08:50 +01002476 int ret;
Kevin Wolf793ed472013-12-03 15:31:25 +01002477
Kevin Wolf73271452013-12-04 17:08:50 +01002478 ret = bdrv_get_info(bs, &bdi);
2479 if (ret < 0 || bdi.cluster_size == 0) {
2480 return bs->request_alignment;
Kevin Wolf793ed472013-12-03 15:31:25 +01002481 } else {
Kevin Wolf73271452013-12-04 17:08:50 +01002482 return bdi.cluster_size;
Kevin Wolf793ed472013-12-03 15:31:25 +01002483 }
2484}
2485
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002486static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Kevin Wolf793ed472013-12-03 15:31:25 +01002487 int64_t offset, unsigned int bytes)
2488{
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002489 /* aaaa bbbb */
Kevin Wolf73271452013-12-04 17:08:50 +01002490 if (offset >= req->overlap_offset + req->overlap_bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002491 return false;
2492 }
2493 /* bbbb aaaa */
Kevin Wolf73271452013-12-04 17:08:50 +01002494 if (req->overlap_offset >= offset + bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002495 return false;
2496 }
2497 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002498}
2499
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002500static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002501{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002502 BlockDriverState *bs = self->bs;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002503 BdrvTrackedRequest *req;
2504 bool retry;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002505 bool waited = false;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002506
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002507 if (!bs->serialising_in_flight) {
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002508 return false;
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002509 }
2510
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002511 do {
2512 retry = false;
2513 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002514 if (req == self || (!req->serialising && !self->serialising)) {
Kevin Wolf65afd212013-12-03 14:55:55 +01002515 continue;
2516 }
Kevin Wolf73271452013-12-04 17:08:50 +01002517 if (tracked_request_overlaps(req, self->overlap_offset,
2518 self->overlap_bytes))
2519 {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00002520 /* Hitting this means there was a reentrant request, for
2521 * example, a block driver issuing nested requests. This must
2522 * never happen since it means deadlock.
2523 */
2524 assert(qemu_coroutine_self() != req->co);
2525
Kevin Wolf64604402013-12-13 13:04:35 +01002526 /* If the request is already (indirectly) waiting for us, or
2527 * will wait for us as soon as it wakes up, then just go on
2528 * (instead of producing a deadlock in the former case). */
2529 if (!req->waiting_for) {
2530 self->waiting_for = req;
2531 qemu_co_queue_wait(&req->wait_queue);
2532 self->waiting_for = NULL;
2533 retry = true;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002534 waited = true;
Kevin Wolf64604402013-12-13 13:04:35 +01002535 break;
2536 }
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002537 }
2538 }
2539 } while (retry);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002540
2541 return waited;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002542}
2543
Kevin Wolf756e6732010-01-12 12:55:17 +01002544/*
2545 * Return values:
2546 * 0 - success
2547 * -EINVAL - backing format specified, but no file
2548 * -ENOSPC - can't update the backing file because no space is left in the
2549 * image file header
2550 * -ENOTSUP - format driver doesn't support changing the backing file
2551 */
2552int bdrv_change_backing_file(BlockDriverState *bs,
2553 const char *backing_file, const char *backing_fmt)
2554{
2555 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02002556 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002557
Paolo Bonzini5f377792012-04-12 14:01:01 +02002558 /* Backing file format doesn't make sense without a backing file */
2559 if (backing_fmt && !backing_file) {
2560 return -EINVAL;
2561 }
2562
Kevin Wolf756e6732010-01-12 12:55:17 +01002563 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002564 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01002565 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002566 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01002567 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02002568
2569 if (ret == 0) {
2570 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2571 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2572 }
2573 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002574}
2575
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002576/*
2577 * Finds the image layer in the chain that has 'bs' as its backing file.
2578 *
2579 * active is the current topmost image.
2580 *
2581 * Returns NULL if bs is not found in active's image chain,
2582 * or if active == bs.
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002583 *
2584 * Returns the bottommost base image if bs == NULL.
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002585 */
2586BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2587 BlockDriverState *bs)
2588{
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002589 while (active && bs != active->backing_hd) {
2590 active = active->backing_hd;
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002591 }
2592
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002593 return active;
2594}
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002595
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002596/* Given a BDS, searches for the base layer. */
2597BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2598{
2599 return bdrv_find_overlay(bs, NULL);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002600}
2601
2602typedef struct BlkIntermediateStates {
2603 BlockDriverState *bs;
2604 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2605} BlkIntermediateStates;
2606
2607
2608/*
2609 * Drops images above 'base' up to and including 'top', and sets the image
2610 * above 'top' to have base as its backing file.
2611 *
2612 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2613 * information in 'bs' can be properly updated.
2614 *
2615 * E.g., this will convert the following chain:
2616 * bottom <- base <- intermediate <- top <- active
2617 *
2618 * to
2619 *
2620 * bottom <- base <- active
2621 *
2622 * It is allowed for bottom==base, in which case it converts:
2623 *
2624 * base <- intermediate <- top <- active
2625 *
2626 * to
2627 *
2628 * base <- active
2629 *
Jeff Cody54e26902014-06-25 15:40:10 -04002630 * If backing_file_str is non-NULL, it will be used when modifying top's
2631 * overlay image metadata.
2632 *
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002633 * Error conditions:
2634 * if active == top, that is considered an error
2635 *
2636 */
2637int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
Jeff Cody54e26902014-06-25 15:40:10 -04002638 BlockDriverState *base, const char *backing_file_str)
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002639{
2640 BlockDriverState *intermediate;
2641 BlockDriverState *base_bs = NULL;
2642 BlockDriverState *new_top_bs = NULL;
2643 BlkIntermediateStates *intermediate_state, *next;
2644 int ret = -EIO;
2645
2646 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2647 QSIMPLEQ_INIT(&states_to_delete);
2648
2649 if (!top->drv || !base->drv) {
2650 goto exit;
2651 }
2652
2653 new_top_bs = bdrv_find_overlay(active, top);
2654
2655 if (new_top_bs == NULL) {
2656 /* we could not find the image above 'top', this is an error */
2657 goto exit;
2658 }
2659
2660 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2661 * to do, no intermediate images */
2662 if (new_top_bs->backing_hd == base) {
2663 ret = 0;
2664 goto exit;
2665 }
2666
2667 intermediate = top;
2668
2669 /* now we will go down through the list, and add each BDS we find
2670 * into our deletion queue, until we hit the 'base'
2671 */
2672 while (intermediate) {
Markus Armbruster5839e532014-08-19 10:31:08 +02002673 intermediate_state = g_new0(BlkIntermediateStates, 1);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002674 intermediate_state->bs = intermediate;
2675 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2676
2677 if (intermediate->backing_hd == base) {
2678 base_bs = intermediate->backing_hd;
2679 break;
2680 }
2681 intermediate = intermediate->backing_hd;
2682 }
2683 if (base_bs == NULL) {
2684 /* something went wrong, we did not end at the base. safely
2685 * unravel everything, and exit with error */
2686 goto exit;
2687 }
2688
2689 /* success - we can delete the intermediate states, and link top->base */
Jeff Cody54e26902014-06-25 15:40:10 -04002690 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2691 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002692 base_bs->drv ? base_bs->drv->format_name : "");
2693 if (ret) {
2694 goto exit;
2695 }
Fam Zheng920beae2014-05-23 21:29:46 +08002696 bdrv_set_backing_hd(new_top_bs, base_bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002697
2698 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2699 /* so that bdrv_close() does not recursively close the chain */
Fam Zheng920beae2014-05-23 21:29:46 +08002700 bdrv_set_backing_hd(intermediate_state->bs, NULL);
Fam Zheng4f6fd342013-08-23 09:14:47 +08002701 bdrv_unref(intermediate_state->bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002702 }
2703 ret = 0;
2704
2705exit:
2706 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2707 g_free(intermediate_state);
2708 }
2709 return ret;
2710}
2711
2712
aliguori71d07702009-03-03 17:37:16 +00002713static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2714 size_t size)
2715{
Peter Lieven75af1f32015-02-06 11:54:11 +01002716 if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
Kevin Wolf1dd3a442014-04-14 14:48:16 +02002717 return -EIO;
2718 }
2719
Max Reitzc0191e72015-02-05 13:58:24 -05002720 if (!bdrv_is_inserted(bs)) {
aliguori71d07702009-03-03 17:37:16 +00002721 return -ENOMEDIUM;
Max Reitzc0191e72015-02-05 13:58:24 -05002722 }
aliguori71d07702009-03-03 17:37:16 +00002723
Max Reitzc0191e72015-02-05 13:58:24 -05002724 if (offset < 0) {
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002725 return -EIO;
Max Reitzc0191e72015-02-05 13:58:24 -05002726 }
aliguori71d07702009-03-03 17:37:16 +00002727
2728 return 0;
2729}
2730
2731static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2732 int nb_sectors)
2733{
Peter Lieven75af1f32015-02-06 11:54:11 +01002734 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf8f4754e2014-03-26 13:06:02 +01002735 return -EIO;
2736 }
2737
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002738 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2739 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00002740}
2741
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002742typedef struct RwCo {
2743 BlockDriverState *bs;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002744 int64_t offset;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002745 QEMUIOVector *qiov;
2746 bool is_write;
2747 int ret;
Peter Lieven4105eaa2013-07-11 14:16:22 +02002748 BdrvRequestFlags flags;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002749} RwCo;
2750
2751static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2752{
2753 RwCo *rwco = opaque;
2754
2755 if (!rwco->is_write) {
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002756 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2757 rwco->qiov->size, rwco->qiov,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002758 rwco->flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002759 } else {
2760 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2761 rwco->qiov->size, rwco->qiov,
2762 rwco->flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002763 }
2764}
2765
2766/*
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002767 * Process a vectored synchronous request using coroutines
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002768 */
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002769static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2770 QEMUIOVector *qiov, bool is_write,
2771 BdrvRequestFlags flags)
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002772{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002773 Coroutine *co;
2774 RwCo rwco = {
2775 .bs = bs,
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002776 .offset = offset,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002777 .qiov = qiov,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002778 .is_write = is_write,
2779 .ret = NOT_DONE,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002780 .flags = flags,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002781 };
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002782
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002783 /**
2784 * In sync call context, when the vcpu is blocked, this throttling timer
2785 * will not fire; so the I/O throttling function has to be disabled here
2786 * if it has been enabled.
2787 */
2788 if (bs->io_limits_enabled) {
2789 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2790 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2791 bdrv_io_limits_disable(bs);
2792 }
2793
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002794 if (qemu_in_coroutine()) {
2795 /* Fast-path if already in coroutine context */
2796 bdrv_rw_co_entry(&rwco);
2797 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002798 AioContext *aio_context = bdrv_get_aio_context(bs);
2799
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002800 co = qemu_coroutine_create(bdrv_rw_co_entry);
2801 qemu_coroutine_enter(co, &rwco);
2802 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002803 aio_poll(aio_context, true);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002804 }
2805 }
2806 return rwco.ret;
2807}
2808
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002809/*
2810 * Process a synchronous request using coroutines
2811 */
2812static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002813 int nb_sectors, bool is_write, BdrvRequestFlags flags)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002814{
2815 QEMUIOVector qiov;
2816 struct iovec iov = {
2817 .iov_base = (void *)buf,
2818 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2819 };
2820
Peter Lieven75af1f32015-02-06 11:54:11 +01002821 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolfda15ee52014-04-14 15:39:36 +02002822 return -EINVAL;
2823 }
2824
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002825 qemu_iovec_init_external(&qiov, &iov, 1);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002826 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2827 &qiov, is_write, flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002828}
2829
bellard19cb3732006-08-19 11:45:59 +00002830/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00002831int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002832 uint8_t *buf, int nb_sectors)
2833{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002834 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
bellardfc01f7e2003-06-30 10:03:06 +00002835}
2836
Markus Armbruster07d27a42012-06-29 17:34:29 +02002837/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2838int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2839 uint8_t *buf, int nb_sectors)
2840{
2841 bool enabled;
2842 int ret;
2843
2844 enabled = bs->io_limits_enabled;
2845 bs->io_limits_enabled = false;
Peter Lieven4e7395e2013-07-18 10:37:32 +02002846 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
Markus Armbruster07d27a42012-06-29 17:34:29 +02002847 bs->io_limits_enabled = enabled;
2848 return ret;
2849}
2850
ths5fafdf22007-09-16 21:08:06 +00002851/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00002852 -EIO generic I/O error (may happen for all errors)
2853 -ENOMEDIUM No media inserted.
2854 -EINVAL Invalid sector number or nb_sectors
2855 -EACCES Trying to write a read-only device
2856*/
ths5fafdf22007-09-16 21:08:06 +00002857int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002858 const uint8_t *buf, int nb_sectors)
2859{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002860 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
bellard83f64092006-08-01 16:21:11 +00002861}
2862
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002863int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2864 int nb_sectors, BdrvRequestFlags flags)
Peter Lieven4105eaa2013-07-11 14:16:22 +02002865{
2866 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002867 BDRV_REQ_ZERO_WRITE | flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002868}
2869
Peter Lievend75cbb52013-10-24 12:07:03 +02002870/*
2871 * Completely zero out a block device with the help of bdrv_write_zeroes.
2872 * The operation is sped up by checking the block status and only writing
2873 * zeroes to the device if they currently do not return zeroes. Optional
2874 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2875 *
2876 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2877 */
2878int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2879{
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002880 int64_t target_sectors, ret, nb_sectors, sector_num = 0;
Peter Lievend75cbb52013-10-24 12:07:03 +02002881 int n;
2882
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002883 target_sectors = bdrv_nb_sectors(bs);
2884 if (target_sectors < 0) {
2885 return target_sectors;
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002886 }
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002887
Peter Lievend75cbb52013-10-24 12:07:03 +02002888 for (;;) {
Peter Lieven75af1f32015-02-06 11:54:11 +01002889 nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
Peter Lievend75cbb52013-10-24 12:07:03 +02002890 if (nb_sectors <= 0) {
2891 return 0;
2892 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002893 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
Peter Lieven3d94ce62013-12-12 13:57:05 +01002894 if (ret < 0) {
2895 error_report("error getting block status at sector %" PRId64 ": %s",
2896 sector_num, strerror(-ret));
2897 return ret;
2898 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002899 if (ret & BDRV_BLOCK_ZERO) {
2900 sector_num += n;
2901 continue;
2902 }
2903 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2904 if (ret < 0) {
2905 error_report("error writing zeroes at sector %" PRId64 ": %s",
2906 sector_num, strerror(-ret));
2907 return ret;
2908 }
2909 sector_num += n;
2910 }
2911}
2912
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002913int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
bellard83f64092006-08-01 16:21:11 +00002914{
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002915 QEMUIOVector qiov;
2916 struct iovec iov = {
2917 .iov_base = (void *)buf,
2918 .iov_len = bytes,
2919 };
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002920 int ret;
bellard83f64092006-08-01 16:21:11 +00002921
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002922 if (bytes < 0) {
2923 return -EINVAL;
bellard83f64092006-08-01 16:21:11 +00002924 }
2925
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002926 qemu_iovec_init_external(&qiov, &iov, 1);
2927 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2928 if (ret < 0) {
2929 return ret;
bellard83f64092006-08-01 16:21:11 +00002930 }
2931
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002932 return bytes;
bellard83f64092006-08-01 16:21:11 +00002933}
2934
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002935int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
bellard83f64092006-08-01 16:21:11 +00002936{
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002937 int ret;
bellard83f64092006-08-01 16:21:11 +00002938
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002939 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2940 if (ret < 0) {
2941 return ret;
bellard83f64092006-08-01 16:21:11 +00002942 }
2943
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002944 return qiov->size;
2945}
2946
2947int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002948 const void *buf, int bytes)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002949{
2950 QEMUIOVector qiov;
2951 struct iovec iov = {
2952 .iov_base = (void *) buf,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002953 .iov_len = bytes,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002954 };
2955
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002956 if (bytes < 0) {
2957 return -EINVAL;
2958 }
2959
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002960 qemu_iovec_init_external(&qiov, &iov, 1);
2961 return bdrv_pwritev(bs, offset, &qiov);
bellard83f64092006-08-01 16:21:11 +00002962}
bellard83f64092006-08-01 16:21:11 +00002963
Kevin Wolff08145f2010-06-16 16:38:15 +02002964/*
2965 * Writes to the file and ensures that no writes are reordered across this
2966 * request (acts as a barrier)
2967 *
2968 * Returns 0 on success, -errno in error cases.
2969 */
2970int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2971 const void *buf, int count)
2972{
2973 int ret;
2974
2975 ret = bdrv_pwrite(bs, offset, buf, count);
2976 if (ret < 0) {
2977 return ret;
2978 }
2979
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002980 /* No flush needed for cache modes that already do it */
2981 if (bs->enable_write_cache) {
Kevin Wolff08145f2010-06-16 16:38:15 +02002982 bdrv_flush(bs);
2983 }
2984
2985 return 0;
2986}
2987
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002988static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002989 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2990{
2991 /* Perform I/O through a temporary buffer so that users who scribble over
2992 * their read buffer while the operation is in progress do not end up
2993 * modifying the image file. This is critical for zero-copy guest I/O
2994 * where anything might happen inside guest memory.
2995 */
2996 void *bounce_buffer;
2997
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002998 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00002999 struct iovec iov;
3000 QEMUIOVector bounce_qiov;
3001 int64_t cluster_sector_num;
3002 int cluster_nb_sectors;
3003 size_t skip_bytes;
3004 int ret;
3005
3006 /* Cover entire cluster so no additional backing file I/O is required when
3007 * allocating cluster in the image file.
3008 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01003009 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
3010 &cluster_sector_num, &cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003011
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003012 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
3013 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003014
3015 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
Kevin Wolf857d4f42014-05-20 13:16:51 +02003016 iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
3017 if (bounce_buffer == NULL) {
3018 ret = -ENOMEM;
3019 goto err;
3020 }
3021
Stefan Hajnocziab185922011-11-17 13:40:31 +00003022 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
3023
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003024 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
3025 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003026 if (ret < 0) {
3027 goto err;
3028 }
3029
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003030 if (drv->bdrv_co_write_zeroes &&
3031 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01003032 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003033 cluster_nb_sectors, 0);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003034 } else {
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003035 /* This does not change the data on the disk, it is not necessary
3036 * to flush even in cache=writethrough mode.
3037 */
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003038 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00003039 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003040 }
3041
Stefan Hajnocziab185922011-11-17 13:40:31 +00003042 if (ret < 0) {
3043 /* It might be okay to ignore write errors for guest requests. If this
3044 * is a deliberate copy-on-read then we don't want to ignore the error.
3045 * Simply report it in all cases.
3046 */
3047 goto err;
3048 }
3049
3050 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
Michael Tokarev03396142012-06-07 20:17:55 +04003051 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
3052 nb_sectors * BDRV_SECTOR_SIZE);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003053
3054err:
3055 qemu_vfree(bounce_buffer);
3056 return ret;
3057}
3058
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003059/*
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003060 * Forwards an already correctly aligned request to the BlockDriver. This
3061 * handles copy on read and zeroing after EOF; any other features must be
3062 * implemented by the caller.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003063 */
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003064static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003065 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
Kevin Wolfec746e12013-12-04 12:13:10 +01003066 int64_t align, QEMUIOVector *qiov, int flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02003067{
3068 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003069 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003070
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003071 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3072 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003073
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003074 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3075 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003076 assert(!qiov || bytes == qiov->size);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003077
3078 /* Handle Copy on Read and associated serialisation */
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003079 if (flags & BDRV_REQ_COPY_ON_READ) {
Kevin Wolf73271452013-12-04 17:08:50 +01003080 /* If we touch the same cluster it counts as an overlap. This
3081 * guarantees that allocating writes will be serialized and not race
3082 * with each other for the same cluster. For example, in copy-on-read
3083 * it ensures that the CoR read and write operations are atomic and
3084 * guest writes cannot interleave between them. */
3085 mark_request_serialising(req, bdrv_get_cluster_size(bs));
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003086 }
3087
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01003088 wait_serialising_requests(req);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00003089
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003090 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00003091 int pnum;
3092
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003093 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003094 if (ret < 0) {
3095 goto out;
3096 }
3097
3098 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003099 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003100 goto out;
3101 }
3102 }
3103
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003104 /* Forward the request to the BlockDriver */
Max Reitzc0191e72015-02-05 13:58:24 -05003105 if (!bs->zero_beyond_eof) {
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003106 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3107 } else {
Max Reitzc0191e72015-02-05 13:58:24 -05003108 /* Read zeros after EOF */
Markus Armbruster40490822014-06-26 13:23:19 +02003109 int64_t total_sectors, max_nb_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003110
Markus Armbruster40490822014-06-26 13:23:19 +02003111 total_sectors = bdrv_nb_sectors(bs);
3112 if (total_sectors < 0) {
3113 ret = total_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003114 goto out;
3115 }
3116
Kevin Wolf5f5bcd82014-02-07 16:00:09 +01003117 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
3118 align >> BDRV_SECTOR_BITS);
Paolo Bonzinie012b782014-12-17 16:09:59 +01003119 if (nb_sectors < max_nb_sectors) {
3120 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3121 } else if (max_nb_sectors > 0) {
Kevin Wolf33f461e2014-07-03 13:21:24 +02003122 QEMUIOVector local_qiov;
Kevin Wolf33f461e2014-07-03 13:21:24 +02003123
3124 qemu_iovec_init(&local_qiov, qiov->niov);
3125 qemu_iovec_concat(&local_qiov, qiov, 0,
Paolo Bonzinie012b782014-12-17 16:09:59 +01003126 max_nb_sectors * BDRV_SECTOR_SIZE);
Kevin Wolf33f461e2014-07-03 13:21:24 +02003127
Paolo Bonzinie012b782014-12-17 16:09:59 +01003128 ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
Kevin Wolf33f461e2014-07-03 13:21:24 +02003129 &local_qiov);
3130
3131 qemu_iovec_destroy(&local_qiov);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003132 } else {
3133 ret = 0;
3134 }
3135
3136 /* Reading beyond end of file is supposed to produce zeroes */
3137 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3138 uint64_t offset = MAX(0, total_sectors - sector_num);
3139 uint64_t bytes = (sector_num + nb_sectors - offset) *
3140 BDRV_SECTOR_SIZE;
3141 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3142 }
3143 }
Stefan Hajnocziab185922011-11-17 13:40:31 +00003144
3145out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003146 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003147}
3148
Fam Zhengfc3959e2015-03-24 09:23:49 +08003149static inline uint64_t bdrv_get_align(BlockDriverState *bs)
3150{
3151 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3152 return MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3153}
3154
3155static inline bool bdrv_req_is_aligned(BlockDriverState *bs,
3156 int64_t offset, size_t bytes)
3157{
3158 int64_t align = bdrv_get_align(bs);
3159 return !(offset & (align - 1) || (bytes & (align - 1)));
3160}
3161
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003162/*
3163 * Handle a read request in coroutine context
3164 */
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003165static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
3166 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003167 BdrvRequestFlags flags)
3168{
3169 BlockDriver *drv = bs->drv;
Kevin Wolf65afd212013-12-03 14:55:55 +01003170 BdrvTrackedRequest req;
3171
Fam Zhengfc3959e2015-03-24 09:23:49 +08003172 uint64_t align = bdrv_get_align(bs);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003173 uint8_t *head_buf = NULL;
3174 uint8_t *tail_buf = NULL;
3175 QEMUIOVector local_qiov;
3176 bool use_local_qiov = false;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003177 int ret;
3178
3179 if (!drv) {
3180 return -ENOMEDIUM;
3181 }
Max Reitzb9c64942015-02-05 13:58:25 -05003182
3183 ret = bdrv_check_byte_request(bs, offset, bytes);
3184 if (ret < 0) {
3185 return ret;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003186 }
3187
3188 if (bs->copy_on_read) {
3189 flags |= BDRV_REQ_COPY_ON_READ;
3190 }
3191
3192 /* throttling disk I/O */
3193 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003194 bdrv_io_limits_intercept(bs, bytes, false);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003195 }
3196
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003197 /* Align read if necessary by padding qiov */
3198 if (offset & (align - 1)) {
3199 head_buf = qemu_blockalign(bs, align);
3200 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3201 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3202 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3203 use_local_qiov = true;
3204
3205 bytes += offset & (align - 1);
3206 offset = offset & ~(align - 1);
3207 }
3208
3209 if ((offset + bytes) & (align - 1)) {
3210 if (!use_local_qiov) {
3211 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3212 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3213 use_local_qiov = true;
3214 }
3215 tail_buf = qemu_blockalign(bs, align);
3216 qemu_iovec_add(&local_qiov, tail_buf,
3217 align - ((offset + bytes) & (align - 1)));
3218
3219 bytes = ROUND_UP(bytes, align);
3220 }
3221
Kevin Wolf65afd212013-12-03 14:55:55 +01003222 tracked_request_begin(&req, bs, offset, bytes, false);
Kevin Wolfec746e12013-12-04 12:13:10 +01003223 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003224 use_local_qiov ? &local_qiov : qiov,
3225 flags);
Kevin Wolf65afd212013-12-03 14:55:55 +01003226 tracked_request_end(&req);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003227
3228 if (use_local_qiov) {
3229 qemu_iovec_destroy(&local_qiov);
3230 qemu_vfree(head_buf);
3231 qemu_vfree(tail_buf);
3232 }
3233
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003234 return ret;
3235}
3236
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003237static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3238 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3239 BdrvRequestFlags flags)
3240{
Peter Lieven75af1f32015-02-06 11:54:11 +01003241 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003242 return -EINVAL;
3243 }
3244
3245 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3246 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3247}
3248
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003249int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02003250 int nb_sectors, QEMUIOVector *qiov)
3251{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003252 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003253
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003254 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3255}
3256
3257int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3258 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3259{
3260 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3261
3262 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3263 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003264}
3265
Peter Lieven98764152015-02-02 15:48:34 +01003266#define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
Peter Lievenc31cb702013-10-24 12:06:58 +02003267
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003268static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003269 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003270{
3271 BlockDriver *drv = bs->drv;
3272 QEMUIOVector qiov;
Peter Lievenc31cb702013-10-24 12:06:58 +02003273 struct iovec iov = {0};
3274 int ret = 0;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003275
Peter Lieven75af1f32015-02-06 11:54:11 +01003276 int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
3277 BDRV_REQUEST_MAX_SECTORS);
Kevin Wolf621f0582012-03-20 15:12:58 +01003278
Peter Lievenc31cb702013-10-24 12:06:58 +02003279 while (nb_sectors > 0 && !ret) {
3280 int num = nb_sectors;
3281
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003282 /* Align request. Block drivers can expect the "bulk" of the request
3283 * to be aligned.
3284 */
3285 if (bs->bl.write_zeroes_alignment
3286 && num > bs->bl.write_zeroes_alignment) {
3287 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3288 /* Make a small request up to the first aligned sector. */
Peter Lievenc31cb702013-10-24 12:06:58 +02003289 num = bs->bl.write_zeroes_alignment;
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003290 num -= sector_num % bs->bl.write_zeroes_alignment;
3291 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3292 /* Shorten the request to the last aligned sector. num cannot
3293 * underflow because num > bs->bl.write_zeroes_alignment.
3294 */
3295 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
Peter Lievenc31cb702013-10-24 12:06:58 +02003296 }
Kevin Wolf621f0582012-03-20 15:12:58 +01003297 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003298
3299 /* limit request size */
3300 if (num > max_write_zeroes) {
3301 num = max_write_zeroes;
3302 }
3303
3304 ret = -ENOTSUP;
3305 /* First try the efficient write zeroes operation */
3306 if (drv->bdrv_co_write_zeroes) {
3307 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3308 }
3309
3310 if (ret == -ENOTSUP) {
3311 /* Fall back to bounce buffer if write zeroes is unsupported */
Peter Lieven095e4fa2015-01-05 12:29:49 +01003312 int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
Peter Lieven98764152015-02-02 15:48:34 +01003313 MAX_WRITE_ZEROES_BOUNCE_BUFFER);
Peter Lieven095e4fa2015-01-05 12:29:49 +01003314 num = MIN(num, max_xfer_len);
Peter Lievenc31cb702013-10-24 12:06:58 +02003315 iov.iov_len = num * BDRV_SECTOR_SIZE;
3316 if (iov.iov_base == NULL) {
Kevin Wolf857d4f42014-05-20 13:16:51 +02003317 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
3318 if (iov.iov_base == NULL) {
3319 ret = -ENOMEM;
3320 goto fail;
3321 }
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003322 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
Peter Lievenc31cb702013-10-24 12:06:58 +02003323 }
3324 qemu_iovec_init_external(&qiov, &iov, 1);
3325
3326 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003327
3328 /* Keep bounce buffer around if it is big enough for all
3329 * all future requests.
3330 */
Peter Lieven095e4fa2015-01-05 12:29:49 +01003331 if (num < max_xfer_len) {
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003332 qemu_vfree(iov.iov_base);
3333 iov.iov_base = NULL;
3334 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003335 }
3336
3337 sector_num += num;
3338 nb_sectors -= num;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003339 }
3340
Kevin Wolf857d4f42014-05-20 13:16:51 +02003341fail:
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003342 qemu_vfree(iov.iov_base);
3343 return ret;
3344}
3345
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003346/*
Kevin Wolfb404f722013-12-03 14:02:23 +01003347 * Forwards an already correctly aligned write request to the BlockDriver.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003348 */
Kevin Wolfb404f722013-12-03 14:02:23 +01003349static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003350 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3351 QEMUIOVector *qiov, int flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003352{
3353 BlockDriver *drv = bs->drv;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003354 bool waited;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003355 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003356
Kevin Wolfb404f722013-12-03 14:02:23 +01003357 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3358 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003359
Kevin Wolfb404f722013-12-03 14:02:23 +01003360 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3361 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003362 assert(!qiov || bytes == qiov->size);
Benoît Canetcc0681c2013-09-02 14:14:39 +02003363
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003364 waited = wait_serialising_requests(req);
3365 assert(!waited || !req->serialising);
Kevin Wolfaf91f9a2014-02-07 15:35:56 +01003366 assert(req->overlap_offset <= offset);
3367 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
Kevin Wolf244eade2013-12-03 14:30:44 +01003368
Kevin Wolf65afd212013-12-03 14:55:55 +01003369 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003370
Peter Lieven465bee12014-05-18 00:58:19 +02003371 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3372 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3373 qemu_iovec_is_zero(qiov)) {
3374 flags |= BDRV_REQ_ZERO_WRITE;
3375 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3376 flags |= BDRV_REQ_MAY_UNMAP;
3377 }
3378 }
3379
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003380 if (ret < 0) {
3381 /* Do nothing, write notifier decided to fail this request */
3382 } else if (flags & BDRV_REQ_ZERO_WRITE) {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003383 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003384 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003385 } else {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003386 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003387 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3388 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003389 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003390
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003391 if (ret == 0 && !bs->enable_write_cache) {
3392 ret = bdrv_co_flush(bs);
3393 }
3394
Fam Zhenge4654d22013-11-13 18:29:43 +08003395 bdrv_set_dirty(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003396
Benoît Canet5366d0c2014-09-05 15:46:18 +02003397 block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
Benoît Canet5e5a94b2014-09-05 15:46:16 +02003398
Max Reitzc0191e72015-02-05 13:58:24 -05003399 if (ret >= 0) {
Paolo Bonzinidf2a6f22013-09-04 19:00:21 +02003400 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3401 }
Kevin Wolfda1fa912011-07-14 17:27:13 +02003402
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003403 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003404}
3405
Kevin Wolfb404f722013-12-03 14:02:23 +01003406/*
3407 * Handle a write request in coroutine context
3408 */
Kevin Wolf66015532013-12-03 14:40:18 +01003409static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3410 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfb404f722013-12-03 14:02:23 +01003411 BdrvRequestFlags flags)
3412{
Kevin Wolf65afd212013-12-03 14:55:55 +01003413 BdrvTrackedRequest req;
Fam Zhengfc3959e2015-03-24 09:23:49 +08003414 uint64_t align = bdrv_get_align(bs);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003415 uint8_t *head_buf = NULL;
3416 uint8_t *tail_buf = NULL;
3417 QEMUIOVector local_qiov;
3418 bool use_local_qiov = false;
Kevin Wolfb404f722013-12-03 14:02:23 +01003419 int ret;
3420
3421 if (!bs->drv) {
3422 return -ENOMEDIUM;
3423 }
3424 if (bs->read_only) {
3425 return -EACCES;
3426 }
Max Reitzb9c64942015-02-05 13:58:25 -05003427
3428 ret = bdrv_check_byte_request(bs, offset, bytes);
3429 if (ret < 0) {
3430 return ret;
Kevin Wolfb404f722013-12-03 14:02:23 +01003431 }
3432
Kevin Wolfb404f722013-12-03 14:02:23 +01003433 /* throttling disk I/O */
3434 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003435 bdrv_io_limits_intercept(bs, bytes, true);
Kevin Wolfb404f722013-12-03 14:02:23 +01003436 }
3437
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003438 /*
3439 * Align write if necessary by performing a read-modify-write cycle.
3440 * Pad qiov with the read parts and be sure to have a tracked request not
3441 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3442 */
Kevin Wolf65afd212013-12-03 14:55:55 +01003443 tracked_request_begin(&req, bs, offset, bytes, true);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003444
3445 if (offset & (align - 1)) {
3446 QEMUIOVector head_qiov;
3447 struct iovec head_iov;
3448
3449 mark_request_serialising(&req, align);
3450 wait_serialising_requests(&req);
3451
3452 head_buf = qemu_blockalign(bs, align);
3453 head_iov = (struct iovec) {
3454 .iov_base = head_buf,
3455 .iov_len = align,
3456 };
3457 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3458
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003459 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003460 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3461 align, &head_qiov, 0);
3462 if (ret < 0) {
3463 goto fail;
3464 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003465 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003466
3467 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3468 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3469 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3470 use_local_qiov = true;
3471
3472 bytes += offset & (align - 1);
3473 offset = offset & ~(align - 1);
3474 }
3475
3476 if ((offset + bytes) & (align - 1)) {
3477 QEMUIOVector tail_qiov;
3478 struct iovec tail_iov;
3479 size_t tail_bytes;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003480 bool waited;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003481
3482 mark_request_serialising(&req, align);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003483 waited = wait_serialising_requests(&req);
3484 assert(!waited || !use_local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003485
3486 tail_buf = qemu_blockalign(bs, align);
3487 tail_iov = (struct iovec) {
3488 .iov_base = tail_buf,
3489 .iov_len = align,
3490 };
3491 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3492
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003493 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003494 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3495 align, &tail_qiov, 0);
3496 if (ret < 0) {
3497 goto fail;
3498 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003499 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003500
3501 if (!use_local_qiov) {
3502 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3503 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3504 use_local_qiov = true;
3505 }
3506
3507 tail_bytes = (offset + bytes) & (align - 1);
3508 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3509
3510 bytes = ROUND_UP(bytes, align);
3511 }
3512
Fam Zhengfc3959e2015-03-24 09:23:49 +08003513 if (use_local_qiov) {
3514 /* Local buffer may have non-zero data. */
3515 flags &= ~BDRV_REQ_ZERO_WRITE;
3516 }
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003517 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3518 use_local_qiov ? &local_qiov : qiov,
3519 flags);
3520
3521fail:
Kevin Wolf65afd212013-12-03 14:55:55 +01003522 tracked_request_end(&req);
Kevin Wolfb404f722013-12-03 14:02:23 +01003523
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003524 if (use_local_qiov) {
3525 qemu_iovec_destroy(&local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003526 }
Kevin Wolf99c4a852014-02-07 15:29:00 +01003527 qemu_vfree(head_buf);
3528 qemu_vfree(tail_buf);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003529
Kevin Wolfb404f722013-12-03 14:02:23 +01003530 return ret;
3531}
3532
Kevin Wolf66015532013-12-03 14:40:18 +01003533static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3534 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3535 BdrvRequestFlags flags)
3536{
Peter Lieven75af1f32015-02-06 11:54:11 +01003537 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf66015532013-12-03 14:40:18 +01003538 return -EINVAL;
3539 }
3540
3541 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3542 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3543}
3544
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003545int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3546 int nb_sectors, QEMUIOVector *qiov)
3547{
3548 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3549
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003550 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3551}
3552
3553int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003554 int64_t sector_num, int nb_sectors,
3555 BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003556{
Fam Zhengfc3959e2015-03-24 09:23:49 +08003557 int ret;
3558
Paolo Bonzini94d6ff22013-11-22 13:39:45 +01003559 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003560
Peter Lievend32f35c2013-10-24 12:06:52 +02003561 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3562 flags &= ~BDRV_REQ_MAY_UNMAP;
3563 }
Fam Zhengfc3959e2015-03-24 09:23:49 +08003564 if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS,
3565 nb_sectors << BDRV_SECTOR_BITS)) {
3566 ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
3567 BDRV_REQ_ZERO_WRITE | flags);
3568 } else {
3569 uint8_t *buf;
3570 QEMUIOVector local_qiov;
3571 size_t bytes = nb_sectors << BDRV_SECTOR_BITS;
Peter Lievend32f35c2013-10-24 12:06:52 +02003572
Fam Zhengfc3959e2015-03-24 09:23:49 +08003573 buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes);
3574 memset(buf, 0, bytes);
3575 qemu_iovec_init(&local_qiov, 1);
3576 qemu_iovec_add(&local_qiov, buf, bytes);
3577
3578 ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov,
3579 BDRV_REQ_ZERO_WRITE | flags);
3580 qemu_vfree(buf);
3581 }
3582 return ret;
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003583}
3584
bellard83f64092006-08-01 16:21:11 +00003585/**
bellard83f64092006-08-01 16:21:11 +00003586 * Truncate file to 'offset' bytes (needed only for file protocols)
3587 */
3588int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3589{
3590 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003591 int ret;
bellard83f64092006-08-01 16:21:11 +00003592 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003593 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00003594 if (!drv->bdrv_truncate)
3595 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02003596 if (bs->read_only)
3597 return -EACCES;
Jeff Cody9c75e162014-06-25 16:55:30 -04003598
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003599 ret = drv->bdrv_truncate(bs, offset);
3600 if (ret == 0) {
3601 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbrustera7f53e22014-10-07 13:59:25 +02003602 if (bs->blk) {
3603 blk_dev_resize_cb(bs->blk);
3604 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003605 }
3606 return ret;
bellard83f64092006-08-01 16:21:11 +00003607}
3608
3609/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08003610 * Length of a allocated file in bytes. Sparse files are counted by actual
3611 * allocated space. Return < 0 if error or unknown.
3612 */
3613int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3614{
3615 BlockDriver *drv = bs->drv;
3616 if (!drv) {
3617 return -ENOMEDIUM;
3618 }
3619 if (drv->bdrv_get_allocated_file_size) {
3620 return drv->bdrv_get_allocated_file_size(bs);
3621 }
3622 if (bs->file) {
3623 return bdrv_get_allocated_file_size(bs->file);
3624 }
3625 return -ENOTSUP;
3626}
3627
3628/**
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003629 * Return number of sectors on success, -errno on error.
bellard83f64092006-08-01 16:21:11 +00003630 */
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003631int64_t bdrv_nb_sectors(BlockDriverState *bs)
bellard83f64092006-08-01 16:21:11 +00003632{
3633 BlockDriver *drv = bs->drv;
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003634
bellard83f64092006-08-01 16:21:11 +00003635 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003636 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003637
Kevin Wolfb94a2612013-10-29 12:18:58 +01003638 if (drv->has_variable_length) {
3639 int ret = refresh_total_sectors(bs, bs->total_sectors);
3640 if (ret < 0) {
3641 return ret;
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003642 }
bellard83f64092006-08-01 16:21:11 +00003643 }
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003644 return bs->total_sectors;
3645}
3646
3647/**
3648 * Return length in bytes on success, -errno on error.
3649 * The length is always a multiple of BDRV_SECTOR_SIZE.
3650 */
3651int64_t bdrv_getlength(BlockDriverState *bs)
3652{
3653 int64_t ret = bdrv_nb_sectors(bs);
3654
3655 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00003656}
3657
bellard19cb3732006-08-19 11:45:59 +00003658/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00003659void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00003660{
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003661 int64_t nb_sectors = bdrv_nb_sectors(bs);
3662
3663 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
bellardfc01f7e2003-06-30 10:03:06 +00003664}
bellardcf989512004-02-16 21:56:36 +00003665
Paolo Bonziniff06f5f2012-09-28 17:22:54 +02003666void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3667 BlockdevOnError on_write_error)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003668{
3669 bs->on_read_error = on_read_error;
3670 bs->on_write_error = on_write_error;
3671}
3672
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02003673BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003674{
3675 return is_read ? bs->on_read_error : bs->on_write_error;
3676}
3677
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003678BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3679{
3680 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3681
3682 switch (on_err) {
3683 case BLOCKDEV_ON_ERROR_ENOSPC:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003684 return (error == ENOSPC) ?
3685 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003686 case BLOCKDEV_ON_ERROR_STOP:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003687 return BLOCK_ERROR_ACTION_STOP;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003688 case BLOCKDEV_ON_ERROR_REPORT:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003689 return BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003690 case BLOCKDEV_ON_ERROR_IGNORE:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003691 return BLOCK_ERROR_ACTION_IGNORE;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003692 default:
3693 abort();
3694 }
3695}
3696
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003697static void send_qmp_error_event(BlockDriverState *bs,
3698 BlockErrorAction action,
3699 bool is_read, int error)
3700{
Peter Maydell573742a2014-10-10 20:33:03 +01003701 IoOperationType optype;
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003702
Peter Maydell573742a2014-10-10 20:33:03 +01003703 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
3704 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003705 bdrv_iostatus_is_enabled(bs),
Luiz Capitulino624ff572014-09-11 10:25:48 -04003706 error == ENOSPC, strerror(error),
3707 &error_abort);
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003708}
3709
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003710/* This is done by device models because, while the block layer knows
3711 * about the error, it does not know whether an operation comes from
3712 * the device or the block layer (from a job, for example).
3713 */
3714void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3715 bool is_read, int error)
3716{
3717 assert(error >= 0);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003718
Wenchao Xiaa5895692014-06-18 08:43:30 +02003719 if (action == BLOCK_ERROR_ACTION_STOP) {
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003720 /* First set the iostatus, so that "info block" returns an iostatus
3721 * that matches the events raised so far (an additional error iostatus
3722 * is fine, but not a lost one).
3723 */
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003724 bdrv_iostatus_set_err(bs, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003725
3726 /* Then raise the request to stop the VM and the event.
3727 * qemu_system_vmstop_request_prepare has two effects. First,
3728 * it ensures that the STOP event always comes after the
3729 * BLOCK_IO_ERROR event. Second, it ensures that even if management
3730 * can observe the STOP event and do a "cont" before the STOP
3731 * event is issued, the VM will not stop. In this case, vm_start()
3732 * also ensures that the STOP/RESUME pair of events is emitted.
3733 */
3734 qemu_system_vmstop_request_prepare();
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003735 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003736 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
3737 } else {
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003738 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003739 }
3740}
3741
bellardb3380822004-03-14 21:38:54 +00003742int bdrv_is_read_only(BlockDriverState *bs)
3743{
3744 return bs->read_only;
3745}
3746
ths985a03b2007-12-24 16:10:43 +00003747int bdrv_is_sg(BlockDriverState *bs)
3748{
3749 return bs->sg;
3750}
3751
Christoph Hellwige900a7b2009-09-04 19:01:15 +02003752int bdrv_enable_write_cache(BlockDriverState *bs)
3753{
3754 return bs->enable_write_cache;
3755}
3756
Paolo Bonzini425b0142012-06-06 00:04:52 +02003757void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3758{
3759 bs->enable_write_cache = wce;
Jeff Cody55b110f2012-09-20 15:13:18 -04003760
3761 /* so a reopen() will preserve wce */
3762 if (wce) {
3763 bs->open_flags |= BDRV_O_CACHE_WB;
3764 } else {
3765 bs->open_flags &= ~BDRV_O_CACHE_WB;
3766 }
Paolo Bonzini425b0142012-06-06 00:04:52 +02003767}
3768
bellardea2384d2004-08-01 21:59:26 +00003769int bdrv_is_encrypted(BlockDriverState *bs)
3770{
3771 if (bs->backing_hd && bs->backing_hd->encrypted)
3772 return 1;
3773 return bs->encrypted;
3774}
3775
aliguoric0f4ce72009-03-05 23:01:01 +00003776int bdrv_key_required(BlockDriverState *bs)
3777{
3778 BlockDriverState *backing_hd = bs->backing_hd;
3779
3780 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3781 return 1;
3782 return (bs->encrypted && !bs->valid_key);
3783}
3784
bellardea2384d2004-08-01 21:59:26 +00003785int bdrv_set_key(BlockDriverState *bs, const char *key)
3786{
3787 int ret;
3788 if (bs->backing_hd && bs->backing_hd->encrypted) {
3789 ret = bdrv_set_key(bs->backing_hd, key);
3790 if (ret < 0)
3791 return ret;
3792 if (!bs->encrypted)
3793 return 0;
3794 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02003795 if (!bs->encrypted) {
3796 return -EINVAL;
3797 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3798 return -ENOMEDIUM;
3799 }
aliguoric0f4ce72009-03-05 23:01:01 +00003800 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00003801 if (ret < 0) {
3802 bs->valid_key = 0;
3803 } else if (!bs->valid_key) {
3804 bs->valid_key = 1;
Markus Armbrustera7f53e22014-10-07 13:59:25 +02003805 if (bs->blk) {
3806 /* call the change callback now, we skipped it on open */
3807 blk_dev_change_media_cb(bs->blk, true);
3808 }
aliguoribb5fc202009-03-05 23:01:15 +00003809 }
aliguoric0f4ce72009-03-05 23:01:01 +00003810 return ret;
bellardea2384d2004-08-01 21:59:26 +00003811}
3812
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003813/*
3814 * Provide an encryption key for @bs.
3815 * If @key is non-null:
3816 * If @bs is not encrypted, fail.
3817 * Else if the key is invalid, fail.
3818 * Else set @bs's key to @key, replacing the existing key, if any.
3819 * If @key is null:
3820 * If @bs is encrypted and still lacks a key, fail.
3821 * Else do nothing.
3822 * On failure, store an error object through @errp if non-null.
3823 */
3824void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
3825{
3826 if (key) {
3827 if (!bdrv_is_encrypted(bs)) {
Alberto Garcia81e5f782015-04-08 12:29:19 +03003828 error_setg(errp, "Node '%s' is not encrypted",
3829 bdrv_get_device_or_node_name(bs));
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003830 } else if (bdrv_set_key(bs, key) < 0) {
3831 error_set(errp, QERR_INVALID_PASSWORD);
3832 }
3833 } else {
3834 if (bdrv_key_required(bs)) {
Markus Armbrusterb1ca6392015-01-29 10:37:01 +01003835 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
3836 "'%s' (%s) is encrypted",
Alberto Garcia81e5f782015-04-08 12:29:19 +03003837 bdrv_get_device_or_node_name(bs),
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003838 bdrv_get_encrypted_filename(bs));
3839 }
3840 }
3841}
3842
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003843const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003844{
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003845 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00003846}
3847
Stefan Hajnocziada42402014-08-27 12:08:55 +01003848static int qsort_strcmp(const void *a, const void *b)
3849{
3850 return strcmp(a, b);
3851}
3852
ths5fafdf22007-09-16 21:08:06 +00003853void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00003854 void *opaque)
3855{
3856 BlockDriver *drv;
Jeff Codye855e4f2014-04-28 18:29:54 -04003857 int count = 0;
Stefan Hajnocziada42402014-08-27 12:08:55 +01003858 int i;
Jeff Codye855e4f2014-04-28 18:29:54 -04003859 const char **formats = NULL;
bellardea2384d2004-08-01 21:59:26 +00003860
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01003861 QLIST_FOREACH(drv, &bdrv_drivers, list) {
Jeff Codye855e4f2014-04-28 18:29:54 -04003862 if (drv->format_name) {
3863 bool found = false;
3864 int i = count;
3865 while (formats && i && !found) {
3866 found = !strcmp(formats[--i], drv->format_name);
3867 }
3868
3869 if (!found) {
Markus Armbruster5839e532014-08-19 10:31:08 +02003870 formats = g_renew(const char *, formats, count + 1);
Jeff Codye855e4f2014-04-28 18:29:54 -04003871 formats[count++] = drv->format_name;
Jeff Codye855e4f2014-04-28 18:29:54 -04003872 }
3873 }
bellardea2384d2004-08-01 21:59:26 +00003874 }
Stefan Hajnocziada42402014-08-27 12:08:55 +01003875
3876 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3877
3878 for (i = 0; i < count; i++) {
3879 it(opaque, formats[i]);
3880 }
3881
Jeff Codye855e4f2014-04-28 18:29:54 -04003882 g_free(formats);
bellardea2384d2004-08-01 21:59:26 +00003883}
3884
Benoît Canetdc364f42014-01-23 21:31:32 +01003885/* This function is to find a node in the bs graph */
3886BlockDriverState *bdrv_find_node(const char *node_name)
3887{
3888 BlockDriverState *bs;
3889
3890 assert(node_name);
3891
3892 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3893 if (!strcmp(node_name, bs->node_name)) {
3894 return bs;
3895 }
3896 }
3897 return NULL;
3898}
3899
Benoît Canetc13163f2014-01-23 21:31:34 +01003900/* Put this QMP function here so it can access the static graph_bdrv_states. */
Alberto Garciad5a8ee62015-04-17 14:52:43 +03003901BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
Benoît Canetc13163f2014-01-23 21:31:34 +01003902{
3903 BlockDeviceInfoList *list, *entry;
3904 BlockDriverState *bs;
3905
3906 list = NULL;
3907 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
Alberto Garciad5a8ee62015-04-17 14:52:43 +03003908 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
3909 if (!info) {
3910 qapi_free_BlockDeviceInfoList(list);
3911 return NULL;
3912 }
Benoît Canetc13163f2014-01-23 21:31:34 +01003913 entry = g_malloc0(sizeof(*entry));
Alberto Garciad5a8ee62015-04-17 14:52:43 +03003914 entry->value = info;
Benoît Canetc13163f2014-01-23 21:31:34 +01003915 entry->next = list;
3916 list = entry;
3917 }
3918
3919 return list;
3920}
3921
Benoît Canet12d3ba82014-01-23 21:31:35 +01003922BlockDriverState *bdrv_lookup_bs(const char *device,
3923 const char *node_name,
3924 Error **errp)
3925{
Markus Armbruster7f06d472014-10-07 13:59:12 +02003926 BlockBackend *blk;
3927 BlockDriverState *bs;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003928
Benoît Canet12d3ba82014-01-23 21:31:35 +01003929 if (device) {
Markus Armbruster7f06d472014-10-07 13:59:12 +02003930 blk = blk_by_name(device);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003931
Markus Armbruster7f06d472014-10-07 13:59:12 +02003932 if (blk) {
3933 return blk_bs(blk);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003934 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003935 }
3936
Benoît Canetdd67fa52014-02-12 17:15:06 +01003937 if (node_name) {
3938 bs = bdrv_find_node(node_name);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003939
Benoît Canetdd67fa52014-02-12 17:15:06 +01003940 if (bs) {
3941 return bs;
3942 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003943 }
3944
Benoît Canetdd67fa52014-02-12 17:15:06 +01003945 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3946 device ? device : "",
3947 node_name ? node_name : "");
3948 return NULL;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003949}
3950
Jeff Cody5a6684d2014-06-25 15:40:09 -04003951/* If 'base' is in the same chain as 'top', return true. Otherwise,
3952 * return false. If either argument is NULL, return false. */
3953bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
3954{
3955 while (top && top != base) {
3956 top = top->backing_hd;
3957 }
3958
3959 return top != NULL;
3960}
3961
Fam Zheng04df7652014-10-31 11:32:54 +08003962BlockDriverState *bdrv_next_node(BlockDriverState *bs)
3963{
3964 if (!bs) {
3965 return QTAILQ_FIRST(&graph_bdrv_states);
3966 }
3967 return QTAILQ_NEXT(bs, node_list);
3968}
3969
Markus Armbruster2f399b02010-06-02 18:55:20 +02003970BlockDriverState *bdrv_next(BlockDriverState *bs)
3971{
3972 if (!bs) {
3973 return QTAILQ_FIRST(&bdrv_states);
3974 }
Benoît Canetdc364f42014-01-23 21:31:32 +01003975 return QTAILQ_NEXT(bs, device_list);
Markus Armbruster2f399b02010-06-02 18:55:20 +02003976}
3977
Fam Zheng20a9e772014-10-31 11:32:55 +08003978const char *bdrv_get_node_name(const BlockDriverState *bs)
3979{
3980 return bs->node_name;
3981}
3982
Markus Armbruster7f06d472014-10-07 13:59:12 +02003983/* TODO check what callers really want: bs->node_name or blk_name() */
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003984const char *bdrv_get_device_name(const BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003985{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003986 return bs->blk ? blk_name(bs->blk) : "";
bellardea2384d2004-08-01 21:59:26 +00003987}
3988
Alberto Garcia9b2aa842015-04-08 12:29:18 +03003989/* This can be used to identify nodes that might not have a device
3990 * name associated. Since node and device names live in the same
3991 * namespace, the result is unambiguous. The exception is if both are
3992 * absent, then this returns an empty (non-null) string. */
3993const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
3994{
3995 return bs->blk ? blk_name(bs->blk) : bs->node_name;
3996}
3997
Markus Armbrusterc8433282012-06-05 16:49:24 +02003998int bdrv_get_flags(BlockDriverState *bs)
3999{
4000 return bs->open_flags;
4001}
4002
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02004003int bdrv_flush_all(void)
aliguoric6ca28d2008-10-06 13:55:43 +00004004{
4005 BlockDriverState *bs;
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02004006 int result = 0;
aliguoric6ca28d2008-10-06 13:55:43 +00004007
Benoît Canetdc364f42014-01-23 21:31:32 +01004008 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02004009 AioContext *aio_context = bdrv_get_aio_context(bs);
4010 int ret;
4011
4012 aio_context_acquire(aio_context);
4013 ret = bdrv_flush(bs);
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02004014 if (ret < 0 && !result) {
4015 result = ret;
4016 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02004017 aio_context_release(aio_context);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01004018 }
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02004019
4020 return result;
aliguoric6ca28d2008-10-06 13:55:43 +00004021}
4022
Peter Lieven3ac21622013-06-28 12:47:42 +02004023int bdrv_has_zero_init_1(BlockDriverState *bs)
4024{
4025 return 1;
4026}
4027
Kevin Wolff2feebb2010-04-14 17:30:35 +02004028int bdrv_has_zero_init(BlockDriverState *bs)
4029{
4030 assert(bs->drv);
4031
Paolo Bonzini11212d82013-09-04 19:00:27 +02004032 /* If BS is a copy on write image, it is initialized to
4033 the contents of the base image, which may not be zeroes. */
4034 if (bs->backing_hd) {
4035 return 0;
4036 }
Kevin Wolf336c1c12010-07-28 11:26:29 +02004037 if (bs->drv->bdrv_has_zero_init) {
4038 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02004039 }
4040
Peter Lieven3ac21622013-06-28 12:47:42 +02004041 /* safe default */
4042 return 0;
Kevin Wolff2feebb2010-04-14 17:30:35 +02004043}
4044
Peter Lieven4ce78692013-10-24 12:06:54 +02004045bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
4046{
4047 BlockDriverInfo bdi;
4048
4049 if (bs->backing_hd) {
4050 return false;
4051 }
4052
4053 if (bdrv_get_info(bs, &bdi) == 0) {
4054 return bdi.unallocated_blocks_are_zero;
4055 }
4056
4057 return false;
4058}
4059
4060bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
4061{
4062 BlockDriverInfo bdi;
4063
4064 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
4065 return false;
4066 }
4067
4068 if (bdrv_get_info(bs, &bdi) == 0) {
4069 return bdi.can_write_zeroes_with_unmap;
4070 }
4071
4072 return false;
4073}
4074
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004075typedef struct BdrvCoGetBlockStatusData {
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004076 BlockDriverState *bs;
Miroslav Rezaninab35b2bb2013-02-13 09:09:39 +01004077 BlockDriverState *base;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004078 int64_t sector_num;
4079 int nb_sectors;
4080 int *pnum;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004081 int64_t ret;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004082 bool done;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004083} BdrvCoGetBlockStatusData;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004084
thsf58c7b32008-06-05 21:53:49 +00004085/*
Fam Zheng705be722014-11-10 17:10:38 +08004086 * Returns the allocation status of the specified sectors.
4087 * Drivers not implementing the functionality are assumed to not support
4088 * backing files, hence all their sectors are reported as allocated.
thsf58c7b32008-06-05 21:53:49 +00004089 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004090 * If 'sector_num' is beyond the end of the disk image the return value is 0
4091 * and 'pnum' is set to 0.
4092 *
thsf58c7b32008-06-05 21:53:49 +00004093 * 'pnum' is set to the number of sectors (including and immediately following
4094 * the specified sector) that are known to be in the same
4095 * allocated/unallocated state.
4096 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004097 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
4098 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00004099 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004100static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
4101 int64_t sector_num,
4102 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00004103{
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004104 int64_t total_sectors;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004105 int64_t n;
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004106 int64_t ret, ret2;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004107
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004108 total_sectors = bdrv_nb_sectors(bs);
4109 if (total_sectors < 0) {
4110 return total_sectors;
Paolo Bonzini617ccb42013-09-04 19:00:23 +02004111 }
4112
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004113 if (sector_num >= total_sectors) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004114 *pnum = 0;
4115 return 0;
4116 }
4117
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004118 n = total_sectors - sector_num;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004119 if (n < nb_sectors) {
4120 nb_sectors = n;
4121 }
4122
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004123 if (!bs->drv->bdrv_co_get_block_status) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004124 *pnum = nb_sectors;
Kevin Wolfe88ae222014-05-06 15:25:36 +02004125 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
Paolo Bonzini918e92d2013-09-04 19:00:37 +02004126 if (bs->drv->protocol_name) {
4127 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
4128 }
4129 return ret;
thsf58c7b32008-06-05 21:53:49 +00004130 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004131
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004132 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
4133 if (ret < 0) {
Peter Lieven3e0a2332013-09-24 15:35:08 +02004134 *pnum = 0;
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004135 return ret;
4136 }
4137
Peter Lieven92bc50a2013-10-08 14:43:14 +02004138 if (ret & BDRV_BLOCK_RAW) {
4139 assert(ret & BDRV_BLOCK_OFFSET_VALID);
4140 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4141 *pnum, pnum);
4142 }
4143
Kevin Wolfe88ae222014-05-06 15:25:36 +02004144 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
4145 ret |= BDRV_BLOCK_ALLOCATED;
4146 }
4147
Peter Lievenc3d86882013-10-24 12:07:04 +02004148 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
4149 if (bdrv_unallocated_blocks_are_zero(bs)) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004150 ret |= BDRV_BLOCK_ZERO;
Peter Lieven1f9db222013-09-24 15:35:09 +02004151 } else if (bs->backing_hd) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004152 BlockDriverState *bs2 = bs->backing_hd;
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004153 int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
4154 if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004155 ret |= BDRV_BLOCK_ZERO;
4156 }
4157 }
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004158 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004159
4160 if (bs->file &&
4161 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
4162 (ret & BDRV_BLOCK_OFFSET_VALID)) {
Max Reitz59c9a952014-10-22 17:00:15 +02004163 int file_pnum;
4164
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004165 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
Max Reitz59c9a952014-10-22 17:00:15 +02004166 *pnum, &file_pnum);
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004167 if (ret2 >= 0) {
4168 /* Ignore errors. This is just providing extra information, it
4169 * is useful but not necessary.
4170 */
Max Reitz59c9a952014-10-22 17:00:15 +02004171 if (!file_pnum) {
4172 /* !file_pnum indicates an offset at or beyond the EOF; it is
4173 * perfectly valid for the format block driver to point to such
4174 * offsets, so catch it and mark everything as zero */
4175 ret |= BDRV_BLOCK_ZERO;
4176 } else {
4177 /* Limit request to the range reported by the protocol driver */
4178 *pnum = file_pnum;
4179 ret |= (ret2 & BDRV_BLOCK_ZERO);
4180 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004181 }
4182 }
4183
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004184 return ret;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004185}
4186
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004187/* Coroutine wrapper for bdrv_get_block_status() */
4188static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004189{
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004190 BdrvCoGetBlockStatusData *data = opaque;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004191 BlockDriverState *bs = data->bs;
4192
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004193 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
4194 data->pnum);
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004195 data->done = true;
4196}
4197
4198/*
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004199 * Synchronous wrapper around bdrv_co_get_block_status().
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004200 *
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004201 * See bdrv_co_get_block_status() for details.
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004202 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004203int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4204 int nb_sectors, int *pnum)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004205{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004206 Coroutine *co;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004207 BdrvCoGetBlockStatusData data = {
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004208 .bs = bs,
4209 .sector_num = sector_num,
4210 .nb_sectors = nb_sectors,
4211 .pnum = pnum,
4212 .done = false,
4213 };
4214
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004215 if (qemu_in_coroutine()) {
4216 /* Fast-path if already in coroutine context */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004217 bdrv_get_block_status_co_entry(&data);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004218 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004219 AioContext *aio_context = bdrv_get_aio_context(bs);
4220
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004221 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004222 qemu_coroutine_enter(co, &data);
4223 while (!data.done) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004224 aio_poll(aio_context, true);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004225 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004226 }
4227 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00004228}
4229
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004230int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4231 int nb_sectors, int *pnum)
4232{
Paolo Bonzini4333bb72013-09-04 19:00:29 +02004233 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
4234 if (ret < 0) {
4235 return ret;
4236 }
Kevin Wolf01fb2702014-07-07 17:00:37 +02004237 return !!(ret & BDRV_BLOCK_ALLOCATED);
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004238}
4239
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004240/*
4241 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4242 *
4243 * Return true if the given sector is allocated in any image between
4244 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4245 * sector is allocated in any image of the chain. Return false otherwise.
4246 *
4247 * 'pnum' is set to the number of sectors (including and immediately following
4248 * the specified sector) that are known to be in the same
4249 * allocated/unallocated state.
4250 *
4251 */
Paolo Bonzini4f578632013-09-04 19:00:24 +02004252int bdrv_is_allocated_above(BlockDriverState *top,
4253 BlockDriverState *base,
4254 int64_t sector_num,
4255 int nb_sectors, int *pnum)
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004256{
4257 BlockDriverState *intermediate;
4258 int ret, n = nb_sectors;
4259
4260 intermediate = top;
4261 while (intermediate && intermediate != base) {
4262 int pnum_inter;
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004263 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4264 &pnum_inter);
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004265 if (ret < 0) {
4266 return ret;
4267 } else if (ret) {
4268 *pnum = pnum_inter;
4269 return 1;
4270 }
4271
4272 /*
4273 * [sector_num, nb_sectors] is unallocated on top but intermediate
4274 * might have
4275 *
4276 * [sector_num+x, nr_sectors] allocated.
4277 */
Vishvananda Ishaya63ba17d2013-01-24 10:02:08 -08004278 if (n > pnum_inter &&
4279 (intermediate == top ||
4280 sector_num + pnum_inter < intermediate->total_sectors)) {
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004281 n = pnum_inter;
4282 }
4283
4284 intermediate = intermediate->backing_hd;
4285 }
4286
4287 *pnum = n;
4288 return 0;
4289}
4290
aliguori045df332009-03-05 23:00:48 +00004291const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4292{
4293 if (bs->backing_hd && bs->backing_hd->encrypted)
4294 return bs->backing_file;
4295 else if (bs->encrypted)
4296 return bs->filename;
4297 else
4298 return NULL;
4299}
4300
ths5fafdf22007-09-16 21:08:06 +00004301void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00004302 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00004303{
Kevin Wolf3574c602011-10-26 11:02:11 +02004304 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00004305}
4306
ths5fafdf22007-09-16 21:08:06 +00004307int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00004308 const uint8_t *buf, int nb_sectors)
4309{
4310 BlockDriver *drv = bs->drv;
Max Reitzb9c64942015-02-05 13:58:25 -05004311 int ret;
4312
4313 if (!drv) {
bellard19cb3732006-08-19 11:45:59 +00004314 return -ENOMEDIUM;
Max Reitzb9c64942015-02-05 13:58:25 -05004315 }
4316 if (!drv->bdrv_write_compressed) {
bellardfaea38e2006-08-05 21:31:00 +00004317 return -ENOTSUP;
Max Reitzb9c64942015-02-05 13:58:25 -05004318 }
4319 ret = bdrv_check_request(bs, sector_num, nb_sectors);
4320 if (ret < 0) {
4321 return ret;
4322 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004323
Fam Zhenge4654d22013-11-13 18:29:43 +08004324 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004325
bellardfaea38e2006-08-05 21:31:00 +00004326 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4327}
ths3b46e622007-09-17 08:09:54 +00004328
bellardfaea38e2006-08-05 21:31:00 +00004329int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4330{
4331 BlockDriver *drv = bs->drv;
4332 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00004333 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00004334 if (!drv->bdrv_get_info)
4335 return -ENOTSUP;
4336 memset(bdi, 0, sizeof(*bdi));
4337 return drv->bdrv_get_info(bs, bdi);
4338}
4339
Max Reitzeae041f2013-10-09 10:46:16 +02004340ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4341{
4342 BlockDriver *drv = bs->drv;
4343 if (drv && drv->bdrv_get_specific_info) {
4344 return drv->bdrv_get_specific_info(bs);
4345 }
4346 return NULL;
4347}
4348
Christoph Hellwig45566e92009-07-10 23:11:57 +02004349int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
4350 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004351{
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004352 QEMUIOVector qiov;
4353 struct iovec iov = {
4354 .iov_base = (void *) buf,
4355 .iov_len = size,
4356 };
4357
4358 qemu_iovec_init_external(&qiov, &iov, 1);
4359 return bdrv_writev_vmstate(bs, &qiov, pos);
4360}
4361
4362int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4363{
aliguori178e08a2009-04-05 19:10:55 +00004364 BlockDriver *drv = bs->drv;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004365
4366 if (!drv) {
aliguori178e08a2009-04-05 19:10:55 +00004367 return -ENOMEDIUM;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004368 } else if (drv->bdrv_save_vmstate) {
4369 return drv->bdrv_save_vmstate(bs, qiov, pos);
4370 } else if (bs->file) {
4371 return bdrv_writev_vmstate(bs->file, qiov, pos);
4372 }
4373
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004374 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004375}
4376
Christoph Hellwig45566e92009-07-10 23:11:57 +02004377int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4378 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004379{
4380 BlockDriver *drv = bs->drv;
4381 if (!drv)
4382 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004383 if (drv->bdrv_load_vmstate)
4384 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4385 if (bs->file)
4386 return bdrv_load_vmstate(bs->file, buf, pos, size);
4387 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004388}
4389
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004390void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4391{
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004392 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004393 return;
4394 }
4395
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004396 bs->drv->bdrv_debug_event(bs, event);
Kevin Wolf41c695c2012-12-06 14:32:58 +01004397}
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004398
Kevin Wolf41c695c2012-12-06 14:32:58 +01004399int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4400 const char *tag)
4401{
4402 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4403 bs = bs->file;
4404 }
4405
4406 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4407 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4408 }
4409
4410 return -ENOTSUP;
4411}
4412
Fam Zheng4cc70e92013-11-20 10:01:54 +08004413int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4414{
4415 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4416 bs = bs->file;
4417 }
4418
4419 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4420 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4421 }
4422
4423 return -ENOTSUP;
4424}
4425
Kevin Wolf41c695c2012-12-06 14:32:58 +01004426int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4427{
Max Reitz938789e2014-03-10 23:44:08 +01004428 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
Kevin Wolf41c695c2012-12-06 14:32:58 +01004429 bs = bs->file;
4430 }
4431
4432 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4433 return bs->drv->bdrv_debug_resume(bs, tag);
4434 }
4435
4436 return -ENOTSUP;
4437}
4438
4439bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4440{
4441 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4442 bs = bs->file;
4443 }
4444
4445 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4446 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4447 }
4448
4449 return false;
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004450}
4451
Blue Swirl199630b2010-07-25 20:49:34 +00004452int bdrv_is_snapshot(BlockDriverState *bs)
4453{
4454 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4455}
4456
Jeff Codyb1b1d782012-10-16 15:49:09 -04004457/* backing_file can either be relative, or absolute, or a protocol. If it is
4458 * relative, it must be relative to the chain. So, passing in bs->filename
4459 * from a BDS as backing_file should not be done, as that may be relative to
4460 * the CWD rather than the chain. */
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004461BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4462 const char *backing_file)
4463{
Jeff Codyb1b1d782012-10-16 15:49:09 -04004464 char *filename_full = NULL;
4465 char *backing_file_full = NULL;
4466 char *filename_tmp = NULL;
4467 int is_protocol = 0;
4468 BlockDriverState *curr_bs = NULL;
4469 BlockDriverState *retval = NULL;
4470
4471 if (!bs || !bs->drv || !backing_file) {
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004472 return NULL;
4473 }
4474
Jeff Codyb1b1d782012-10-16 15:49:09 -04004475 filename_full = g_malloc(PATH_MAX);
4476 backing_file_full = g_malloc(PATH_MAX);
4477 filename_tmp = g_malloc(PATH_MAX);
4478
4479 is_protocol = path_has_protocol(backing_file);
4480
4481 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4482
4483 /* If either of the filename paths is actually a protocol, then
4484 * compare unmodified paths; otherwise make paths relative */
4485 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4486 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4487 retval = curr_bs->backing_hd;
4488 break;
4489 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004490 } else {
Jeff Codyb1b1d782012-10-16 15:49:09 -04004491 /* If not an absolute filename path, make it relative to the current
4492 * image's filename path */
4493 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4494 backing_file);
4495
4496 /* We are going to compare absolute pathnames */
4497 if (!realpath(filename_tmp, filename_full)) {
4498 continue;
4499 }
4500
4501 /* We need to make sure the backing filename we are comparing against
4502 * is relative to the current image filename (or absolute) */
4503 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4504 curr_bs->backing_file);
4505
4506 if (!realpath(filename_tmp, backing_file_full)) {
4507 continue;
4508 }
4509
4510 if (strcmp(backing_file_full, filename_full) == 0) {
4511 retval = curr_bs->backing_hd;
4512 break;
4513 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004514 }
4515 }
4516
Jeff Codyb1b1d782012-10-16 15:49:09 -04004517 g_free(filename_full);
4518 g_free(backing_file_full);
4519 g_free(filename_tmp);
4520 return retval;
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004521}
4522
Benoît Canetf198fd12012-08-02 10:22:47 +02004523int bdrv_get_backing_file_depth(BlockDriverState *bs)
4524{
4525 if (!bs->drv) {
4526 return 0;
4527 }
4528
4529 if (!bs->backing_hd) {
4530 return 0;
4531 }
4532
4533 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4534}
4535
bellard83f64092006-08-01 16:21:11 +00004536/**************************************************************/
4537/* async I/Os */
4538
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004539BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
4540 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004541 BlockCompletionFunc *cb, void *opaque)
aliguori3b69e4b2009-01-22 16:59:24 +00004542{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004543 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4544
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004545 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004546 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00004547}
4548
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004549BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4550 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004551 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004552{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004553 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4554
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004555 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004556 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00004557}
4558
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004559BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004560 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004561 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004562{
4563 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4564
4565 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4566 BDRV_REQ_ZERO_WRITE | flags,
4567 cb, opaque, true);
4568}
4569
Kevin Wolf40b4f532009-09-09 17:53:37 +02004570
4571typedef struct MultiwriteCB {
4572 int error;
4573 int num_requests;
4574 int num_callbacks;
4575 struct {
Markus Armbruster097310b2014-10-07 13:59:15 +02004576 BlockCompletionFunc *cb;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004577 void *opaque;
4578 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004579 } callbacks[];
4580} MultiwriteCB;
4581
4582static void multiwrite_user_cb(MultiwriteCB *mcb)
4583{
4584 int i;
4585
4586 for (i = 0; i < mcb->num_callbacks; i++) {
4587 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01004588 if (mcb->callbacks[i].free_qiov) {
4589 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4590 }
Anthony Liguori7267c092011-08-20 22:09:37 -05004591 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004592 }
4593}
4594
4595static void multiwrite_cb(void *opaque, int ret)
4596{
4597 MultiwriteCB *mcb = opaque;
4598
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004599 trace_multiwrite_cb(mcb, ret);
4600
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02004601 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02004602 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004603 }
4604
4605 mcb->num_requests--;
4606 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02004607 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05004608 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004609 }
4610}
4611
4612static int multiwrite_req_compare(const void *a, const void *b)
4613{
Christoph Hellwig77be4362010-05-19 20:53:10 +02004614 const BlockRequest *req1 = a, *req2 = b;
4615
4616 /*
4617 * Note that we can't simply subtract req2->sector from req1->sector
4618 * here as that could overflow the return value.
4619 */
4620 if (req1->sector > req2->sector) {
4621 return 1;
4622 } else if (req1->sector < req2->sector) {
4623 return -1;
4624 } else {
4625 return 0;
4626 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02004627}
4628
4629/*
4630 * Takes a bunch of requests and tries to merge them. Returns the number of
4631 * requests that remain after merging.
4632 */
4633static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4634 int num_reqs, MultiwriteCB *mcb)
4635{
4636 int i, outidx;
4637
4638 // Sort requests by start sector
4639 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4640
4641 // Check if adjacent requests touch the same clusters. If so, combine them,
4642 // filling up gaps with zero sectors.
4643 outidx = 0;
4644 for (i = 1; i < num_reqs; i++) {
4645 int merge = 0;
4646 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4647
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004648 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02004649 if (reqs[i].sector <= oldreq_last) {
4650 merge = 1;
4651 }
4652
Christoph Hellwige2a305f2010-01-26 14:49:08 +01004653 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4654 merge = 0;
4655 }
4656
Peter Lieven6c5a42a2014-10-27 10:18:46 +01004657 if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
4658 reqs[i].nb_sectors > bs->bl.max_transfer_length) {
4659 merge = 0;
4660 }
4661
Kevin Wolf40b4f532009-09-09 17:53:37 +02004662 if (merge) {
4663 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05004664 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004665 qemu_iovec_init(qiov,
4666 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4667
4668 // Add the first request to the merged one. If the requests are
4669 // overlapping, drop the last sectors of the first request.
4670 size = (reqs[i].sector - reqs[outidx].sector) << 9;
Michael Tokarev1b093c42012-03-12 21:28:06 +04004671 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004672
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004673 // We should need to add any zeros between the two requests
4674 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004675
4676 // Add the second request
Michael Tokarev1b093c42012-03-12 21:28:06 +04004677 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004678
Stefan Hajnoczi391827e2014-07-30 09:53:30 +01004679 // Add tail of first request, if necessary
4680 if (qiov->size < reqs[outidx].qiov->size) {
4681 qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
4682 reqs[outidx].qiov->size - qiov->size);
4683 }
4684
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02004685 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004686 reqs[outidx].qiov = qiov;
4687
4688 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4689 } else {
4690 outidx++;
4691 reqs[outidx].sector = reqs[i].sector;
4692 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4693 reqs[outidx].qiov = reqs[i].qiov;
4694 }
4695 }
4696
Peter Lievenf4564d52015-02-02 14:52:18 +01004697 block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
4698
Kevin Wolf40b4f532009-09-09 17:53:37 +02004699 return outidx + 1;
4700}
4701
4702/*
4703 * Submit multiple AIO write requests at once.
4704 *
4705 * On success, the function returns 0 and all requests in the reqs array have
4706 * been submitted. In error case this function returns -1, and any of the
4707 * requests may or may not be submitted yet. In particular, this means that the
4708 * callback will be called for some of the requests, for others it won't. The
4709 * caller must check the error field of the BlockRequest to wait for the right
4710 * callbacks (if error != 0, no callback will be called).
4711 *
4712 * The implementation may modify the contents of the reqs array, e.g. to merge
4713 * requests. However, the fields opaque and error are left unmodified as they
4714 * are used to signal failure for a single request to the caller.
4715 */
4716int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4717{
Kevin Wolf40b4f532009-09-09 17:53:37 +02004718 MultiwriteCB *mcb;
4719 int i;
4720
Ryan Harper301db7c2011-03-07 10:01:04 -06004721 /* don't submit writes if we don't have a medium */
4722 if (bs->drv == NULL) {
4723 for (i = 0; i < num_reqs; i++) {
4724 reqs[i].error = -ENOMEDIUM;
4725 }
4726 return -1;
4727 }
4728
Kevin Wolf40b4f532009-09-09 17:53:37 +02004729 if (num_reqs == 0) {
4730 return 0;
4731 }
4732
4733 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05004734 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004735 mcb->num_requests = 0;
4736 mcb->num_callbacks = num_reqs;
4737
4738 for (i = 0; i < num_reqs; i++) {
4739 mcb->callbacks[i].cb = reqs[i].cb;
4740 mcb->callbacks[i].opaque = reqs[i].opaque;
4741 }
4742
4743 // Check for mergable requests
4744 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4745
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004746 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4747
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01004748 /* Run the aio requests. */
4749 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004750 for (i = 0; i < num_reqs; i++) {
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004751 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4752 reqs[i].nb_sectors, reqs[i].flags,
4753 multiwrite_cb, mcb,
4754 true);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004755 }
4756
4757 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004758}
4759
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004760void bdrv_aio_cancel(BlockAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00004761{
Fam Zhengca5fd112014-09-11 13:41:27 +08004762 qemu_aio_ref(acb);
4763 bdrv_aio_cancel_async(acb);
4764 while (acb->refcnt > 1) {
4765 if (acb->aiocb_info->get_aio_context) {
4766 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
4767 } else if (acb->bs) {
4768 aio_poll(bdrv_get_aio_context(acb->bs), true);
4769 } else {
4770 abort();
Fam Zheng02c50ef2014-09-11 13:41:09 +08004771 }
Fam Zheng02c50ef2014-09-11 13:41:09 +08004772 }
Fam Zheng80074292014-09-11 13:41:28 +08004773 qemu_aio_unref(acb);
Fam Zheng02c50ef2014-09-11 13:41:09 +08004774}
4775
4776/* Async version of aio cancel. The caller is not blocked if the acb implements
4777 * cancel_async, otherwise we do nothing and let the request normally complete.
4778 * In either case the completion callback must be called. */
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004779void bdrv_aio_cancel_async(BlockAIOCB *acb)
Fam Zheng02c50ef2014-09-11 13:41:09 +08004780{
4781 if (acb->aiocb_info->cancel_async) {
4782 acb->aiocb_info->cancel_async(acb);
4783 }
bellard83f64092006-08-01 16:21:11 +00004784}
4785
4786/**************************************************************/
4787/* async block device emulation */
4788
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004789typedef struct BlockAIOCBSync {
4790 BlockAIOCB common;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004791 QEMUBH *bh;
4792 int ret;
4793 /* vector translation state */
4794 QEMUIOVector *qiov;
4795 uint8_t *bounce;
4796 int is_write;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004797} BlockAIOCBSync;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004798
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004799static const AIOCBInfo bdrv_em_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004800 .aiocb_size = sizeof(BlockAIOCBSync),
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004801};
4802
bellard83f64092006-08-01 16:21:11 +00004803static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00004804{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004805 BlockAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00004806
Kevin Wolf857d4f42014-05-20 13:16:51 +02004807 if (!acb->is_write && acb->ret >= 0) {
Michael Tokarev03396142012-06-07 20:17:55 +04004808 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
Kevin Wolf857d4f42014-05-20 13:16:51 +02004809 }
aliguoriceb42de2009-04-07 18:43:28 +00004810 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00004811 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03004812 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004813 acb->bh = NULL;
Fam Zheng80074292014-09-11 13:41:28 +08004814 qemu_aio_unref(acb);
bellardbeac80c2006-06-26 20:08:57 +00004815}
bellardbeac80c2006-06-26 20:08:57 +00004816
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004817static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4818 int64_t sector_num,
4819 QEMUIOVector *qiov,
4820 int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004821 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004822 void *opaque,
4823 int is_write)
aliguorif141eaf2009-04-07 18:43:24 +00004824
bellardea2384d2004-08-01 21:59:26 +00004825{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004826 BlockAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004827
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004828 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00004829 acb->is_write = is_write;
4830 acb->qiov = qiov;
Kevin Wolf857d4f42014-05-20 13:16:51 +02004831 acb->bounce = qemu_try_blockalign(bs, qiov->size);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004832 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00004833
Kevin Wolf857d4f42014-05-20 13:16:51 +02004834 if (acb->bounce == NULL) {
4835 acb->ret = -ENOMEM;
4836 } else if (is_write) {
Michael Tokarevd5e6b162012-06-07 20:21:06 +04004837 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004838 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004839 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004840 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004841 }
4842
pbrookce1a14d2006-08-07 02:38:06 +00004843 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00004844
pbrookce1a14d2006-08-07 02:38:06 +00004845 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00004846}
4847
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004848static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004849 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004850 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004851{
aliguorif141eaf2009-04-07 18:43:24 +00004852 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00004853}
4854
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004855static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004856 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004857 BlockCompletionFunc *cb, void *opaque)
aliguorif141eaf2009-04-07 18:43:24 +00004858{
4859 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4860}
4861
Kevin Wolf68485422011-06-30 10:05:46 +02004862
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004863typedef struct BlockAIOCBCoroutine {
4864 BlockAIOCB common;
Kevin Wolf68485422011-06-30 10:05:46 +02004865 BlockRequest req;
4866 bool is_write;
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004867 bool need_bh;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004868 bool *done;
Kevin Wolf68485422011-06-30 10:05:46 +02004869 QEMUBH* bh;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004870} BlockAIOCBCoroutine;
Kevin Wolf68485422011-06-30 10:05:46 +02004871
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004872static const AIOCBInfo bdrv_em_co_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004873 .aiocb_size = sizeof(BlockAIOCBCoroutine),
Kevin Wolf68485422011-06-30 10:05:46 +02004874};
4875
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004876static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
4877{
4878 if (!acb->need_bh) {
4879 acb->common.cb(acb->common.opaque, acb->req.error);
4880 qemu_aio_unref(acb);
4881 }
4882}
4883
Paolo Bonzini35246a62011-10-14 10:41:29 +02004884static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02004885{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004886 BlockAIOCBCoroutine *acb = opaque;
Kevin Wolf68485422011-06-30 10:05:46 +02004887
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004888 assert(!acb->need_bh);
Kevin Wolf68485422011-06-30 10:05:46 +02004889 qemu_bh_delete(acb->bh);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004890 bdrv_co_complete(acb);
4891}
4892
4893static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
4894{
4895 acb->need_bh = false;
4896 if (acb->req.error != -EINPROGRESS) {
4897 BlockDriverState *bs = acb->common.bs;
4898
4899 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
4900 qemu_bh_schedule(acb->bh);
4901 }
Kevin Wolf68485422011-06-30 10:05:46 +02004902}
4903
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004904/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4905static void coroutine_fn bdrv_co_do_rw(void *opaque)
4906{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004907 BlockAIOCBCoroutine *acb = opaque;
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004908 BlockDriverState *bs = acb->common.bs;
4909
4910 if (!acb->is_write) {
4911 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004912 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004913 } else {
4914 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004915 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004916 }
4917
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004918 bdrv_co_complete(acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004919}
4920
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004921static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4922 int64_t sector_num,
4923 QEMUIOVector *qiov,
4924 int nb_sectors,
4925 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004926 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004927 void *opaque,
4928 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02004929{
4930 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004931 BlockAIOCBCoroutine *acb;
Kevin Wolf68485422011-06-30 10:05:46 +02004932
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004933 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004934 acb->need_bh = true;
4935 acb->req.error = -EINPROGRESS;
Kevin Wolf68485422011-06-30 10:05:46 +02004936 acb->req.sector = sector_num;
4937 acb->req.nb_sectors = nb_sectors;
4938 acb->req.qiov = qiov;
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004939 acb->req.flags = flags;
Kevin Wolf68485422011-06-30 10:05:46 +02004940 acb->is_write = is_write;
4941
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004942 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02004943 qemu_coroutine_enter(co, acb);
4944
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004945 bdrv_co_maybe_schedule_bh(acb);
Kevin Wolf68485422011-06-30 10:05:46 +02004946 return &acb->common;
4947}
4948
Paolo Bonzini07f07612011-10-17 12:32:12 +02004949static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004950{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004951 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004952 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004953
Paolo Bonzini07f07612011-10-17 12:32:12 +02004954 acb->req.error = bdrv_co_flush(bs);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004955 bdrv_co_complete(acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004956}
4957
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004958BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02004959 BlockCompletionFunc *cb, void *opaque)
Alexander Graf016f5cf2010-05-26 17:51:49 +02004960{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004961 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004962
Paolo Bonzini07f07612011-10-17 12:32:12 +02004963 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004964 BlockAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02004965
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004966 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004967 acb->need_bh = true;
4968 acb->req.error = -EINPROGRESS;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004969
Paolo Bonzini07f07612011-10-17 12:32:12 +02004970 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4971 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004972
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004973 bdrv_co_maybe_schedule_bh(acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004974 return &acb->common;
4975}
4976
Paolo Bonzini4265d622011-10-17 12:32:14 +02004977static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4978{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004979 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004980 BlockDriverState *bs = acb->common.bs;
4981
4982 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004983 bdrv_co_complete(acb);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004984}
4985
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004986BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
Paolo Bonzini4265d622011-10-17 12:32:14 +02004987 int64_t sector_num, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004988 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzini4265d622011-10-17 12:32:14 +02004989{
4990 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004991 BlockAIOCBCoroutine *acb;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004992
4993 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4994
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004995 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004996 acb->need_bh = true;
4997 acb->req.error = -EINPROGRESS;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004998 acb->req.sector = sector_num;
4999 acb->req.nb_sectors = nb_sectors;
5000 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
5001 qemu_coroutine_enter(co, acb);
5002
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01005003 bdrv_co_maybe_schedule_bh(acb);
Paolo Bonzini4265d622011-10-17 12:32:14 +02005004 return &acb->common;
5005}
5006
bellardea2384d2004-08-01 21:59:26 +00005007void bdrv_init(void)
5008{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05005009 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00005010}
pbrookce1a14d2006-08-07 02:38:06 +00005011
Markus Armbrustereb852012009-10-27 18:41:44 +01005012void bdrv_init_with_whitelist(void)
5013{
5014 use_bdrv_whitelist = 1;
5015 bdrv_init();
5016}
5017
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01005018void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02005019 BlockCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00005020{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005021 BlockAIOCB *acb;
pbrookce1a14d2006-08-07 02:38:06 +00005022
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01005023 acb = g_slice_alloc(aiocb_info->aiocb_size);
5024 acb->aiocb_info = aiocb_info;
pbrookce1a14d2006-08-07 02:38:06 +00005025 acb->bs = bs;
5026 acb->cb = cb;
5027 acb->opaque = opaque;
Fam Zhengf197fe22014-09-11 13:41:08 +08005028 acb->refcnt = 1;
pbrookce1a14d2006-08-07 02:38:06 +00005029 return acb;
5030}
5031
Fam Zhengf197fe22014-09-11 13:41:08 +08005032void qemu_aio_ref(void *p)
5033{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005034 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08005035 acb->refcnt++;
5036}
5037
Fam Zheng80074292014-09-11 13:41:28 +08005038void qemu_aio_unref(void *p)
pbrookce1a14d2006-08-07 02:38:06 +00005039{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005040 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08005041 assert(acb->refcnt > 0);
5042 if (--acb->refcnt == 0) {
5043 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
5044 }
pbrookce1a14d2006-08-07 02:38:06 +00005045}
bellard19cb3732006-08-19 11:45:59 +00005046
5047/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005048/* Coroutine block device emulation */
5049
5050typedef struct CoroutineIOCompletion {
5051 Coroutine *coroutine;
5052 int ret;
5053} CoroutineIOCompletion;
5054
5055static void bdrv_co_io_em_complete(void *opaque, int ret)
5056{
5057 CoroutineIOCompletion *co = opaque;
5058
5059 co->ret = ret;
5060 qemu_coroutine_enter(co->coroutine, NULL);
5061}
5062
5063static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
5064 int nb_sectors, QEMUIOVector *iov,
5065 bool is_write)
5066{
5067 CoroutineIOCompletion co = {
5068 .coroutine = qemu_coroutine_self(),
5069 };
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005070 BlockAIOCB *acb;
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005071
5072 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01005073 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
5074 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005075 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01005076 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
5077 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005078 }
5079
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01005080 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005081 if (!acb) {
5082 return -EIO;
5083 }
5084 qemu_coroutine_yield();
5085
5086 return co.ret;
5087}
5088
5089static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
5090 int64_t sector_num, int nb_sectors,
5091 QEMUIOVector *iov)
5092{
5093 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
5094}
5095
5096static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
5097 int64_t sector_num, int nb_sectors,
5098 QEMUIOVector *iov)
5099{
5100 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
5101}
5102
Paolo Bonzini07f07612011-10-17 12:32:12 +02005103static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005104{
Paolo Bonzini07f07612011-10-17 12:32:12 +02005105 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005106
Paolo Bonzini07f07612011-10-17 12:32:12 +02005107 rwco->ret = bdrv_co_flush(rwco->bs);
5108}
5109
5110int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
5111{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005112 int ret;
5113
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005114 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02005115 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005116 }
5117
Kevin Wolfca716362011-11-10 18:13:59 +01005118 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfbf736fe2013-06-05 15:17:55 +02005119 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005120 if (bs->drv->bdrv_co_flush_to_os) {
5121 ret = bs->drv->bdrv_co_flush_to_os(bs);
5122 if (ret < 0) {
5123 return ret;
5124 }
5125 }
5126
Kevin Wolfca716362011-11-10 18:13:59 +01005127 /* But don't actually force it to the disk with cache=unsafe */
5128 if (bs->open_flags & BDRV_O_NO_FLUSH) {
Kevin Wolfd4c82322012-08-15 12:52:45 +02005129 goto flush_parent;
Kevin Wolfca716362011-11-10 18:13:59 +01005130 }
5131
Kevin Wolfbf736fe2013-06-05 15:17:55 +02005132 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005133 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005134 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005135 } else if (bs->drv->bdrv_aio_flush) {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005136 BlockAIOCB *acb;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005137 CoroutineIOCompletion co = {
5138 .coroutine = qemu_coroutine_self(),
5139 };
5140
5141 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
5142 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005143 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005144 } else {
5145 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005146 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005147 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02005148 } else {
5149 /*
5150 * Some block drivers always operate in either writethrough or unsafe
5151 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
5152 * know how the server works (because the behaviour is hardcoded or
5153 * depends on server-side configuration), so we can't ensure that
5154 * everything is safe on disk. Returning an error doesn't work because
5155 * that would break guests even if the server operates in writethrough
5156 * mode.
5157 *
5158 * Let's hope the user knows what he's doing.
5159 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005160 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005161 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005162 if (ret < 0) {
5163 return ret;
5164 }
5165
5166 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
5167 * in the case of cache=unsafe, so there are no useless flushes.
5168 */
Kevin Wolfd4c82322012-08-15 12:52:45 +02005169flush_parent:
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005170 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005171}
5172
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005173void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005174{
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005175 Error *local_err = NULL;
5176 int ret;
5177
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005178 if (!bs->drv) {
5179 return;
Anthony Liguori0f154232011-11-14 15:09:45 -06005180 }
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005181
Alexey Kardashevskiy7ea2d262014-10-09 13:50:46 +11005182 if (!(bs->open_flags & BDRV_O_INCOMING)) {
5183 return;
5184 }
5185 bs->open_flags &= ~BDRV_O_INCOMING;
5186
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005187 if (bs->drv->bdrv_invalidate_cache) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005188 bs->drv->bdrv_invalidate_cache(bs, &local_err);
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005189 } else if (bs->file) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005190 bdrv_invalidate_cache(bs->file, &local_err);
5191 }
5192 if (local_err) {
5193 error_propagate(errp, local_err);
5194 return;
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005195 }
5196
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005197 ret = refresh_total_sectors(bs, bs->total_sectors);
5198 if (ret < 0) {
5199 error_setg_errno(errp, -ret, "Could not refresh total sector count");
5200 return;
5201 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005202}
5203
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005204void bdrv_invalidate_cache_all(Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005205{
5206 BlockDriverState *bs;
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005207 Error *local_err = NULL;
Anthony Liguori0f154232011-11-14 15:09:45 -06005208
Benoît Canetdc364f42014-01-23 21:31:32 +01005209 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005210 AioContext *aio_context = bdrv_get_aio_context(bs);
5211
5212 aio_context_acquire(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005213 bdrv_invalidate_cache(bs, &local_err);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005214 aio_context_release(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005215 if (local_err) {
5216 error_propagate(errp, local_err);
5217 return;
5218 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005219 }
5220}
5221
Paolo Bonzini07f07612011-10-17 12:32:12 +02005222int bdrv_flush(BlockDriverState *bs)
5223{
5224 Coroutine *co;
5225 RwCo rwco = {
5226 .bs = bs,
5227 .ret = NOT_DONE,
5228 };
5229
5230 if (qemu_in_coroutine()) {
5231 /* Fast-path if already in coroutine context */
5232 bdrv_flush_co_entry(&rwco);
5233 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005234 AioContext *aio_context = bdrv_get_aio_context(bs);
5235
Paolo Bonzini07f07612011-10-17 12:32:12 +02005236 co = qemu_coroutine_create(bdrv_flush_co_entry);
5237 qemu_coroutine_enter(co, &rwco);
5238 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005239 aio_poll(aio_context, true);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005240 }
5241 }
5242
5243 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005244}
5245
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005246typedef struct DiscardCo {
5247 BlockDriverState *bs;
5248 int64_t sector_num;
5249 int nb_sectors;
5250 int ret;
5251} DiscardCo;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005252static void coroutine_fn bdrv_discard_co_entry(void *opaque)
5253{
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005254 DiscardCo *rwco = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005255
5256 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
5257}
5258
5259int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
5260 int nb_sectors)
5261{
Max Reitzb9c64942015-02-05 13:58:25 -05005262 int max_discard, ret;
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005263
Paolo Bonzini4265d622011-10-17 12:32:14 +02005264 if (!bs->drv) {
5265 return -ENOMEDIUM;
Max Reitzb9c64942015-02-05 13:58:25 -05005266 }
5267
5268 ret = bdrv_check_request(bs, sector_num, nb_sectors);
5269 if (ret < 0) {
5270 return ret;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005271 } else if (bs->read_only) {
5272 return -EROFS;
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005273 }
5274
Fam Zhenge4654d22013-11-13 18:29:43 +08005275 bdrv_reset_dirty(bs, sector_num, nb_sectors);
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005276
Paolo Bonzini9e8f1832013-02-08 14:06:11 +01005277 /* Do nothing if disabled. */
5278 if (!(bs->open_flags & BDRV_O_UNMAP)) {
5279 return 0;
5280 }
5281
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005282 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005283 return 0;
5284 }
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005285
Peter Lieven75af1f32015-02-06 11:54:11 +01005286 max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005287 while (nb_sectors > 0) {
5288 int ret;
5289 int num = nb_sectors;
5290
5291 /* align request */
5292 if (bs->bl.discard_alignment &&
5293 num >= bs->bl.discard_alignment &&
5294 sector_num % bs->bl.discard_alignment) {
5295 if (num > bs->bl.discard_alignment) {
5296 num = bs->bl.discard_alignment;
5297 }
5298 num -= sector_num % bs->bl.discard_alignment;
5299 }
5300
5301 /* limit request size */
5302 if (num > max_discard) {
5303 num = max_discard;
5304 }
5305
5306 if (bs->drv->bdrv_co_discard) {
5307 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
5308 } else {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005309 BlockAIOCB *acb;
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005310 CoroutineIOCompletion co = {
5311 .coroutine = qemu_coroutine_self(),
5312 };
5313
5314 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
5315 bdrv_co_io_em_complete, &co);
5316 if (acb == NULL) {
5317 return -EIO;
5318 } else {
5319 qemu_coroutine_yield();
5320 ret = co.ret;
5321 }
5322 }
Paolo Bonzini7ce21012013-11-22 13:39:47 +01005323 if (ret && ret != -ENOTSUP) {
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005324 return ret;
5325 }
5326
5327 sector_num += num;
5328 nb_sectors -= num;
5329 }
5330 return 0;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005331}
5332
5333int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
5334{
5335 Coroutine *co;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005336 DiscardCo rwco = {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005337 .bs = bs,
5338 .sector_num = sector_num,
5339 .nb_sectors = nb_sectors,
5340 .ret = NOT_DONE,
5341 };
5342
5343 if (qemu_in_coroutine()) {
5344 /* Fast-path if already in coroutine context */
5345 bdrv_discard_co_entry(&rwco);
5346 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005347 AioContext *aio_context = bdrv_get_aio_context(bs);
5348
Paolo Bonzini4265d622011-10-17 12:32:14 +02005349 co = qemu_coroutine_create(bdrv_discard_co_entry);
5350 qemu_coroutine_enter(co, &rwco);
5351 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005352 aio_poll(aio_context, true);
Paolo Bonzini4265d622011-10-17 12:32:14 +02005353 }
5354 }
5355
5356 return rwco.ret;
5357}
5358
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005359/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00005360/* removable device support */
5361
5362/**
5363 * Return TRUE if the media is present
5364 */
5365int bdrv_is_inserted(BlockDriverState *bs)
5366{
5367 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005368
bellard19cb3732006-08-19 11:45:59 +00005369 if (!drv)
5370 return 0;
5371 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005372 return 1;
5373 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00005374}
5375
5376/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005377 * Return whether the media changed since the last call to this
5378 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00005379 */
5380int bdrv_media_changed(BlockDriverState *bs)
5381{
5382 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00005383
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005384 if (drv && drv->bdrv_media_changed) {
5385 return drv->bdrv_media_changed(bs);
5386 }
5387 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00005388}
5389
5390/**
5391 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5392 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02005393void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00005394{
5395 BlockDriver *drv = bs->drv;
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005396 const char *device_name;
bellard19cb3732006-08-19 11:45:59 +00005397
Markus Armbruster822e1cd2011-07-20 18:23:42 +02005398 if (drv && drv->bdrv_eject) {
5399 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00005400 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005401
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005402 device_name = bdrv_get_device_name(bs);
5403 if (device_name[0] != '\0') {
5404 qapi_event_send_device_tray_moved(device_name,
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02005405 eject_flag, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005406 }
bellard19cb3732006-08-19 11:45:59 +00005407}
5408
bellard19cb3732006-08-19 11:45:59 +00005409/**
5410 * Lock or unlock the media (if it is locked, the user won't be able
5411 * to eject it manually).
5412 */
Markus Armbruster025e8492011-09-06 18:58:47 +02005413void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00005414{
5415 BlockDriver *drv = bs->drv;
5416
Markus Armbruster025e8492011-09-06 18:58:47 +02005417 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01005418
Markus Armbruster025e8492011-09-06 18:58:47 +02005419 if (drv && drv->bdrv_lock_medium) {
5420 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00005421 }
5422}
ths985a03b2007-12-24 16:10:43 +00005423
5424/* needed for generic scsi interface */
5425
5426int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5427{
5428 BlockDriver *drv = bs->drv;
5429
5430 if (drv && drv->bdrv_ioctl)
5431 return drv->bdrv_ioctl(bs, req, buf);
5432 return -ENOTSUP;
5433}
aliguori7d780662009-03-12 19:57:08 +00005434
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005435BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
aliguori221f7152009-03-28 17:28:41 +00005436 unsigned long int req, void *buf,
Markus Armbruster097310b2014-10-07 13:59:15 +02005437 BlockCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00005438{
aliguori221f7152009-03-28 17:28:41 +00005439 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00005440
aliguori221f7152009-03-28 17:28:41 +00005441 if (drv && drv->bdrv_aio_ioctl)
5442 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5443 return NULL;
aliguori7d780662009-03-12 19:57:08 +00005444}
aliguorie268ca52009-04-22 20:20:00 +00005445
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005446void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005447{
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005448 bs->guest_block_size = align;
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005449}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005450
aliguorie268ca52009-04-22 20:20:00 +00005451void *qemu_blockalign(BlockDriverState *bs, size_t size)
5452{
Kevin Wolf339064d2013-11-28 10:23:32 +01005453 return qemu_memalign(bdrv_opt_mem_align(bs), size);
aliguorie268ca52009-04-22 20:20:00 +00005454}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005455
Max Reitz9ebd8442014-10-22 14:09:27 +02005456void *qemu_blockalign0(BlockDriverState *bs, size_t size)
5457{
5458 return memset(qemu_blockalign(bs, size), 0, size);
5459}
5460
Kevin Wolf7d2a35c2014-05-20 12:24:05 +02005461void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
5462{
5463 size_t align = bdrv_opt_mem_align(bs);
5464
5465 /* Ensure that NULL is never returned on success */
5466 assert(align > 0);
5467 if (size == 0) {
5468 size = align;
5469 }
5470
5471 return qemu_try_memalign(align, size);
5472}
5473
Max Reitz9ebd8442014-10-22 14:09:27 +02005474void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
5475{
5476 void *mem = qemu_try_blockalign(bs, size);
5477
5478 if (mem) {
5479 memset(mem, 0, size);
5480 }
5481
5482 return mem;
5483}
5484
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005485/*
5486 * Check if all memory in this vector is sector aligned.
5487 */
5488bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5489{
5490 int i;
Kevin Wolf339064d2013-11-28 10:23:32 +01005491 size_t alignment = bdrv_opt_mem_align(bs);
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005492
5493 for (i = 0; i < qiov->niov; i++) {
Kevin Wolf339064d2013-11-28 10:23:32 +01005494 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005495 return false;
5496 }
Kevin Wolf339064d2013-11-28 10:23:32 +01005497 if (qiov->iov[i].iov_len % alignment) {
Kevin Wolf1ff735b2013-12-05 13:01:46 +01005498 return false;
5499 }
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005500 }
5501
5502 return true;
5503}
5504
Fam Zheng0db6e542015-04-17 19:49:50 -04005505BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
5506{
5507 BdrvDirtyBitmap *bm;
5508
5509 assert(name);
5510 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
5511 if (bm->name && !strcmp(name, bm->name)) {
5512 return bm;
5513 }
5514 }
5515 return NULL;
5516}
5517
5518void bdrv_dirty_bitmap_make_anon(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5519{
5520 g_free(bitmap->name);
5521 bitmap->name = NULL;
5522}
5523
5524BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
5525 int granularity,
5526 const char *name,
Fam Zhengb8afb522014-04-16 09:34:30 +08005527 Error **errp)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005528{
5529 int64_t bitmap_size;
Fam Zhenge4654d22013-11-13 18:29:43 +08005530 BdrvDirtyBitmap *bitmap;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005531
Paolo Bonzini50717e92013-01-21 17:09:45 +01005532 assert((granularity & (granularity - 1)) == 0);
5533
Fam Zheng0db6e542015-04-17 19:49:50 -04005534 if (name && bdrv_find_dirty_bitmap(bs, name)) {
5535 error_setg(errp, "Bitmap already exists: %s", name);
5536 return NULL;
5537 }
Fam Zhenge4654d22013-11-13 18:29:43 +08005538 granularity >>= BDRV_SECTOR_BITS;
5539 assert(granularity);
Markus Armbruster57322b72014-06-26 13:23:22 +02005540 bitmap_size = bdrv_nb_sectors(bs);
Fam Zhengb8afb522014-04-16 09:34:30 +08005541 if (bitmap_size < 0) {
5542 error_setg_errno(errp, -bitmap_size, "could not get length of device");
5543 errno = -bitmap_size;
5544 return NULL;
5545 }
Markus Armbruster5839e532014-08-19 10:31:08 +02005546 bitmap = g_new0(BdrvDirtyBitmap, 1);
Stefan Hajnoczi786a4ea2015-03-23 15:29:26 +00005547 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(granularity));
Fam Zheng0db6e542015-04-17 19:49:50 -04005548 bitmap->name = g_strdup(name);
Fam Zhenge4654d22013-11-13 18:29:43 +08005549 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5550 return bitmap;
5551}
5552
5553void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5554{
5555 BdrvDirtyBitmap *bm, *next;
5556 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5557 if (bm == bitmap) {
5558 QLIST_REMOVE(bitmap, list);
5559 hbitmap_free(bitmap->bitmap);
Fam Zheng0db6e542015-04-17 19:49:50 -04005560 g_free(bitmap->name);
Fam Zhenge4654d22013-11-13 18:29:43 +08005561 g_free(bitmap);
5562 return;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005563 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005564 }
5565}
5566
Fam Zheng21b56832013-11-13 18:29:44 +08005567BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5568{
5569 BdrvDirtyBitmap *bm;
5570 BlockDirtyInfoList *list = NULL;
5571 BlockDirtyInfoList **plist = &list;
5572
5573 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
Markus Armbruster5839e532014-08-19 10:31:08 +02005574 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
5575 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
Fam Zheng21b56832013-11-13 18:29:44 +08005576 info->count = bdrv_get_dirty_count(bs, bm);
5577 info->granularity =
5578 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
Fam Zheng0db6e542015-04-17 19:49:50 -04005579 info->has_name = !!bm->name;
5580 info->name = g_strdup(bm->name);
Fam Zheng21b56832013-11-13 18:29:44 +08005581 entry->value = info;
5582 *plist = entry;
5583 plist = &entry->next;
5584 }
5585
5586 return list;
5587}
5588
Fam Zhenge4654d22013-11-13 18:29:43 +08005589int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005590{
Fam Zhenge4654d22013-11-13 18:29:43 +08005591 if (bitmap) {
5592 return hbitmap_get(bitmap->bitmap, sector);
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005593 } else {
5594 return 0;
5595 }
5596}
5597
Fam Zhenge4654d22013-11-13 18:29:43 +08005598void bdrv_dirty_iter_init(BlockDriverState *bs,
5599 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005600{
Fam Zhenge4654d22013-11-13 18:29:43 +08005601 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
Paolo Bonzini1755da12012-10-18 16:49:18 +02005602}
5603
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005604void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
5605 int64_t cur_sector, int nr_sectors)
5606{
5607 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5608}
5609
5610void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
5611 int64_t cur_sector, int nr_sectors)
5612{
5613 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5614}
5615
5616static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5617 int nr_sectors)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005618{
Fam Zhenge4654d22013-11-13 18:29:43 +08005619 BdrvDirtyBitmap *bitmap;
5620 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5621 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
Paolo Bonzini8f0720e2013-01-21 17:09:41 +01005622 }
Liran Schouraaa0eb72010-01-26 10:31:48 +02005623}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005624
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005625static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
5626 int nr_sectors)
Fam Zhenge4654d22013-11-13 18:29:43 +08005627{
5628 BdrvDirtyBitmap *bitmap;
5629 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5630 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5631 }
5632}
5633
5634int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5635{
5636 return hbitmap_count(bitmap->bitmap);
5637}
5638
Fam Zheng9fcb0252013-08-23 09:14:46 +08005639/* Get a reference to bs */
5640void bdrv_ref(BlockDriverState *bs)
5641{
5642 bs->refcnt++;
5643}
5644
5645/* Release a previously grabbed reference to bs.
5646 * If after releasing, reference count is zero, the BlockDriverState is
5647 * deleted. */
5648void bdrv_unref(BlockDriverState *bs)
5649{
Jeff Cody9a4d5ca2014-07-23 17:22:57 -04005650 if (!bs) {
5651 return;
5652 }
Fam Zheng9fcb0252013-08-23 09:14:46 +08005653 assert(bs->refcnt > 0);
5654 if (--bs->refcnt == 0) {
5655 bdrv_delete(bs);
5656 }
5657}
5658
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005659struct BdrvOpBlocker {
5660 Error *reason;
5661 QLIST_ENTRY(BdrvOpBlocker) list;
5662};
5663
5664bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5665{
5666 BdrvOpBlocker *blocker;
5667 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5668 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5669 blocker = QLIST_FIRST(&bs->op_blockers[op]);
5670 if (errp) {
Alberto Garcia81e5f782015-04-08 12:29:19 +03005671 error_setg(errp, "Node '%s' is busy: %s",
5672 bdrv_get_device_or_node_name(bs),
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005673 error_get_pretty(blocker->reason));
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005674 }
5675 return true;
5676 }
5677 return false;
5678}
5679
5680void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5681{
5682 BdrvOpBlocker *blocker;
5683 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5684
Markus Armbruster5839e532014-08-19 10:31:08 +02005685 blocker = g_new0(BdrvOpBlocker, 1);
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005686 blocker->reason = reason;
5687 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5688}
5689
5690void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5691{
5692 BdrvOpBlocker *blocker, *next;
5693 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5694 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5695 if (blocker->reason == reason) {
5696 QLIST_REMOVE(blocker, list);
5697 g_free(blocker);
5698 }
5699 }
5700}
5701
5702void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5703{
5704 int i;
5705 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5706 bdrv_op_block(bs, i, reason);
5707 }
5708}
5709
5710void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5711{
5712 int i;
5713 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5714 bdrv_op_unblock(bs, i, reason);
5715 }
5716}
5717
5718bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5719{
5720 int i;
5721
5722 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5723 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5724 return false;
5725 }
5726 }
5727 return true;
5728}
5729
Luiz Capitulino28a72822011-09-26 17:43:50 -03005730void bdrv_iostatus_enable(BlockDriverState *bs)
5731{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005732 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005733 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005734}
5735
5736/* The I/O status is only enabled if the drive explicitly
5737 * enables it _and_ the VM is configured to stop on errors */
5738bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5739{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005740 return (bs->iostatus_enabled &&
Paolo Bonzini92aa5c62012-09-28 17:22:55 +02005741 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5742 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5743 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
Luiz Capitulino28a72822011-09-26 17:43:50 -03005744}
5745
5746void bdrv_iostatus_disable(BlockDriverState *bs)
5747{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005748 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005749}
5750
5751void bdrv_iostatus_reset(BlockDriverState *bs)
5752{
5753 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005754 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Paolo Bonzini3bd293c2012-10-18 16:49:27 +02005755 if (bs->job) {
5756 block_job_iostatus_reset(bs->job);
5757 }
Luiz Capitulino28a72822011-09-26 17:43:50 -03005758 }
5759}
5760
Luiz Capitulino28a72822011-09-26 17:43:50 -03005761void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5762{
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02005763 assert(bdrv_iostatus_is_enabled(bs));
5764 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005765 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5766 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005767 }
5768}
5769
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005770void bdrv_img_create(const char *filename, const char *fmt,
5771 const char *base_filename, const char *base_fmt,
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005772 char *options, uint64_t img_size, int flags,
5773 Error **errp, bool quiet)
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005774{
Chunyan Liu83d05212014-06-05 17:20:51 +08005775 QemuOptsList *create_opts = NULL;
5776 QemuOpts *opts = NULL;
5777 const char *backing_fmt, *backing_file;
5778 int64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005779 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005780 BlockDriver *backing_drv = NULL;
Max Reitzcc84d902013-09-06 17:14:26 +02005781 Error *local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005782 int ret = 0;
5783
5784 /* Find driver and parse its options */
5785 drv = bdrv_find_format(fmt);
5786 if (!drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005787 error_setg(errp, "Unknown file format '%s'", fmt);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005788 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005789 }
5790
Max Reitzb65a5e12015-02-05 13:58:12 -05005791 proto_drv = bdrv_find_protocol(filename, true, errp);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005792 if (!proto_drv) {
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005793 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005794 }
5795
Max Reitzc6149722014-12-02 18:32:45 +01005796 if (!drv->create_opts) {
5797 error_setg(errp, "Format driver '%s' does not support image creation",
5798 drv->format_name);
5799 return;
5800 }
5801
5802 if (!proto_drv->create_opts) {
5803 error_setg(errp, "Protocol driver '%s' does not support image creation",
5804 proto_drv->format_name);
5805 return;
5806 }
5807
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005808 create_opts = qemu_opts_append(create_opts, drv->create_opts);
5809 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005810
5811 /* Create parameter list with default values */
Chunyan Liu83d05212014-06-05 17:20:51 +08005812 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
Markus Armbruster39101f22015-02-12 16:46:36 +01005813 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005814
5815 /* Parse -o options */
5816 if (options) {
Markus Armbrusterdc523cd2015-02-12 18:37:11 +01005817 qemu_opts_do_parse(opts, options, NULL, &local_err);
5818 if (local_err) {
5819 error_report_err(local_err);
5820 local_err = NULL;
Chunyan Liu83d05212014-06-05 17:20:51 +08005821 error_setg(errp, "Invalid options for file format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005822 goto out;
5823 }
5824 }
5825
5826 if (base_filename) {
Markus Armbrusterf43e47d2015-02-12 17:52:20 +01005827 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
Markus Armbruster6be41942015-02-12 17:49:02 +01005828 if (local_err) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005829 error_setg(errp, "Backing file not supported for file format '%s'",
5830 fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005831 goto out;
5832 }
5833 }
5834
5835 if (base_fmt) {
Markus Armbrusterf43e47d2015-02-12 17:52:20 +01005836 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
Markus Armbruster6be41942015-02-12 17:49:02 +01005837 if (local_err) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005838 error_setg(errp, "Backing file format not supported for file "
5839 "format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005840 goto out;
5841 }
5842 }
5843
Chunyan Liu83d05212014-06-05 17:20:51 +08005844 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5845 if (backing_file) {
5846 if (!strcmp(filename, backing_file)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005847 error_setg(errp, "Error: Trying to create an image with the "
5848 "same filename as the backing file");
Jes Sorensen792da932010-12-16 13:52:17 +01005849 goto out;
5850 }
5851 }
5852
Chunyan Liu83d05212014-06-05 17:20:51 +08005853 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5854 if (backing_fmt) {
5855 backing_drv = bdrv_find_format(backing_fmt);
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005856 if (!backing_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005857 error_setg(errp, "Unknown backing file format '%s'",
Chunyan Liu83d05212014-06-05 17:20:51 +08005858 backing_fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005859 goto out;
5860 }
5861 }
5862
5863 // The size for the image must always be specified, with one exception:
5864 // If we are using a backing file, we can obtain the size from there
Chunyan Liu83d05212014-06-05 17:20:51 +08005865 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5866 if (size == -1) {
5867 if (backing_file) {
Max Reitz66f6b812013-12-03 14:57:52 +01005868 BlockDriverState *bs;
Max Reitz29168012014-11-26 17:20:27 +01005869 char *full_backing = g_new0(char, PATH_MAX);
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005870 int64_t size;
Paolo Bonzini63090da2012-04-12 14:01:03 +02005871 int back_flags;
5872
Max Reitz29168012014-11-26 17:20:27 +01005873 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
5874 full_backing, PATH_MAX,
5875 &local_err);
5876 if (local_err) {
5877 g_free(full_backing);
5878 goto out;
5879 }
5880
Paolo Bonzini63090da2012-04-12 14:01:03 +02005881 /* backing files always opened read-only */
5882 back_flags =
5883 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005884
Max Reitzf67503e2014-02-18 18:33:05 +01005885 bs = NULL;
Max Reitz29168012014-11-26 17:20:27 +01005886 ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
Max Reitzcc84d902013-09-06 17:14:26 +02005887 backing_drv, &local_err);
Max Reitz29168012014-11-26 17:20:27 +01005888 g_free(full_backing);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005889 if (ret < 0) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005890 goto out;
5891 }
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005892 size = bdrv_getlength(bs);
5893 if (size < 0) {
5894 error_setg_errno(errp, -size, "Could not get size of '%s'",
5895 backing_file);
5896 bdrv_unref(bs);
5897 goto out;
5898 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005899
Markus Armbruster39101f22015-02-12 16:46:36 +01005900 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
Max Reitz66f6b812013-12-03 14:57:52 +01005901
5902 bdrv_unref(bs);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005903 } else {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005904 error_setg(errp, "Image creation needs a size parameter");
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005905 goto out;
5906 }
5907 }
5908
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005909 if (!quiet) {
Fam Zheng43c5d8f2014-12-09 15:38:04 +08005910 printf("Formatting '%s', fmt=%s", filename, fmt);
5911 qemu_opts_print(opts, " ");
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005912 puts("");
5913 }
Chunyan Liu83d05212014-06-05 17:20:51 +08005914
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005915 ret = bdrv_create(drv, filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08005916
Max Reitzcc84d902013-09-06 17:14:26 +02005917 if (ret == -EFBIG) {
5918 /* This is generally a better message than whatever the driver would
5919 * deliver (especially because of the cluster_size_hint), since that
5920 * is most probably not much different from "image too large". */
5921 const char *cluster_size_hint = "";
Chunyan Liu83d05212014-06-05 17:20:51 +08005922 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
Max Reitzcc84d902013-09-06 17:14:26 +02005923 cluster_size_hint = " (try using a larger cluster size)";
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005924 }
Max Reitzcc84d902013-09-06 17:14:26 +02005925 error_setg(errp, "The image size is too large for file format '%s'"
5926 "%s", fmt, cluster_size_hint);
5927 error_free(local_err);
5928 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005929 }
5930
5931out:
Chunyan Liu83d05212014-06-05 17:20:51 +08005932 qemu_opts_del(opts);
5933 qemu_opts_free(create_opts);
Markus Armbruster84d18f02014-01-30 15:07:28 +01005934 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +02005935 error_propagate(errp, local_err);
5936 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005937}
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005938
5939AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5940{
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005941 return bs->aio_context;
5942}
5943
5944void bdrv_detach_aio_context(BlockDriverState *bs)
5945{
Max Reitz33384422014-06-20 21:57:33 +02005946 BdrvAioNotifier *baf;
5947
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005948 if (!bs->drv) {
5949 return;
5950 }
5951
Max Reitz33384422014-06-20 21:57:33 +02005952 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
5953 baf->detach_aio_context(baf->opaque);
5954 }
5955
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005956 if (bs->io_limits_enabled) {
5957 throttle_detach_aio_context(&bs->throttle_state);
5958 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005959 if (bs->drv->bdrv_detach_aio_context) {
5960 bs->drv->bdrv_detach_aio_context(bs);
5961 }
5962 if (bs->file) {
5963 bdrv_detach_aio_context(bs->file);
5964 }
5965 if (bs->backing_hd) {
5966 bdrv_detach_aio_context(bs->backing_hd);
5967 }
5968
5969 bs->aio_context = NULL;
5970}
5971
5972void bdrv_attach_aio_context(BlockDriverState *bs,
5973 AioContext *new_context)
5974{
Max Reitz33384422014-06-20 21:57:33 +02005975 BdrvAioNotifier *ban;
5976
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005977 if (!bs->drv) {
5978 return;
5979 }
5980
5981 bs->aio_context = new_context;
5982
5983 if (bs->backing_hd) {
5984 bdrv_attach_aio_context(bs->backing_hd, new_context);
5985 }
5986 if (bs->file) {
5987 bdrv_attach_aio_context(bs->file, new_context);
5988 }
5989 if (bs->drv->bdrv_attach_aio_context) {
5990 bs->drv->bdrv_attach_aio_context(bs, new_context);
5991 }
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005992 if (bs->io_limits_enabled) {
5993 throttle_attach_aio_context(&bs->throttle_state, new_context);
5994 }
Max Reitz33384422014-06-20 21:57:33 +02005995
5996 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
5997 ban->attached_aio_context(new_context, ban->opaque);
5998 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005999}
6000
6001void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
6002{
6003 bdrv_drain_all(); /* ensure there are no in-flight requests */
6004
6005 bdrv_detach_aio_context(bs);
6006
6007 /* This function executes in the old AioContext so acquire the new one in
6008 * case it runs in a different thread.
6009 */
6010 aio_context_acquire(new_context);
6011 bdrv_attach_aio_context(bs, new_context);
6012 aio_context_release(new_context);
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01006013}
Stefan Hajnoczid616b222013-06-24 17:13:10 +02006014
Max Reitz33384422014-06-20 21:57:33 +02006015void bdrv_add_aio_context_notifier(BlockDriverState *bs,
6016 void (*attached_aio_context)(AioContext *new_context, void *opaque),
6017 void (*detach_aio_context)(void *opaque), void *opaque)
6018{
6019 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
6020 *ban = (BdrvAioNotifier){
6021 .attached_aio_context = attached_aio_context,
6022 .detach_aio_context = detach_aio_context,
6023 .opaque = opaque
6024 };
6025
6026 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
6027}
6028
6029void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
6030 void (*attached_aio_context)(AioContext *,
6031 void *),
6032 void (*detach_aio_context)(void *),
6033 void *opaque)
6034{
6035 BdrvAioNotifier *ban, *ban_next;
6036
6037 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
6038 if (ban->attached_aio_context == attached_aio_context &&
6039 ban->detach_aio_context == detach_aio_context &&
6040 ban->opaque == opaque)
6041 {
6042 QLIST_REMOVE(ban, list);
6043 g_free(ban);
6044
6045 return;
6046 }
6047 }
6048
6049 abort();
6050}
6051
Stefan Hajnoczid616b222013-06-24 17:13:10 +02006052void bdrv_add_before_write_notifier(BlockDriverState *bs,
6053 NotifierWithReturn *notifier)
6054{
6055 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
6056}
Max Reitz6f176b42013-09-03 10:09:50 +02006057
Max Reitz77485432014-10-27 11:12:50 +01006058int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
6059 BlockDriverAmendStatusCB *status_cb)
Max Reitz6f176b42013-09-03 10:09:50 +02006060{
Chunyan Liuc282e1f2014-06-05 17:21:11 +08006061 if (!bs->drv->bdrv_amend_options) {
Max Reitz6f176b42013-09-03 10:09:50 +02006062 return -ENOTSUP;
6063 }
Max Reitz77485432014-10-27 11:12:50 +01006064 return bs->drv->bdrv_amend_options(bs, opts, status_cb);
Max Reitz6f176b42013-09-03 10:09:50 +02006065}
Benoît Canetf6186f42013-10-02 14:33:48 +02006066
Benoît Canetb5042a32014-03-03 19:11:34 +01006067/* This function will be called by the bdrv_recurse_is_first_non_filter method
6068 * of block filter and by bdrv_is_first_non_filter.
6069 * It is used to test if the given bs is the candidate or recurse more in the
6070 * node graph.
Benoît Canet212a5a82014-01-23 21:31:36 +01006071 */
Benoît Canet212a5a82014-01-23 21:31:36 +01006072bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
6073 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02006074{
Benoît Canetb5042a32014-03-03 19:11:34 +01006075 /* return false if basic checks fails */
6076 if (!bs || !bs->drv) {
6077 return false;
6078 }
6079
6080 /* the code reached a non block filter driver -> check if the bs is
6081 * the same as the candidate. It's the recursion termination condition.
6082 */
6083 if (!bs->drv->is_filter) {
6084 return bs == candidate;
6085 }
6086 /* Down this path the driver is a block filter driver */
6087
6088 /* If the block filter recursion method is defined use it to recurse down
6089 * the node graph.
6090 */
6091 if (bs->drv->bdrv_recurse_is_first_non_filter) {
Benoît Canet212a5a82014-01-23 21:31:36 +01006092 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
6093 }
6094
Benoît Canetb5042a32014-03-03 19:11:34 +01006095 /* the driver is a block filter but don't allow to recurse -> return false
6096 */
6097 return false;
Benoît Canet212a5a82014-01-23 21:31:36 +01006098}
6099
6100/* This function checks if the candidate is the first non filter bs down it's
6101 * bs chain. Since we don't have pointers to parents it explore all bs chains
6102 * from the top. Some filters can choose not to pass down the recursion.
6103 */
6104bool bdrv_is_first_non_filter(BlockDriverState *candidate)
6105{
6106 BlockDriverState *bs;
6107
6108 /* walk down the bs forest recursively */
6109 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
6110 bool perm;
6111
Benoît Canetb5042a32014-03-03 19:11:34 +01006112 /* try to recurse in this top level bs */
Kevin Wolfe6dc8a12014-02-04 11:45:31 +01006113 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
Benoît Canet212a5a82014-01-23 21:31:36 +01006114
6115 /* candidate is the first non filter */
6116 if (perm) {
6117 return true;
6118 }
6119 }
6120
6121 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02006122}
Benoît Canet09158f02014-06-27 18:25:25 +02006123
6124BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
6125{
6126 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006127 AioContext *aio_context;
6128
Benoît Canet09158f02014-06-27 18:25:25 +02006129 if (!to_replace_bs) {
6130 error_setg(errp, "Node name '%s' not found", node_name);
6131 return NULL;
6132 }
6133
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006134 aio_context = bdrv_get_aio_context(to_replace_bs);
6135 aio_context_acquire(aio_context);
6136
Benoît Canet09158f02014-06-27 18:25:25 +02006137 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006138 to_replace_bs = NULL;
6139 goto out;
Benoît Canet09158f02014-06-27 18:25:25 +02006140 }
6141
6142 /* We don't want arbitrary node of the BDS chain to be replaced only the top
6143 * most non filter in order to prevent data corruption.
6144 * Another benefit is that this tests exclude backing files which are
6145 * blocked by the backing blockers.
6146 */
6147 if (!bdrv_is_first_non_filter(to_replace_bs)) {
6148 error_setg(errp, "Only top most non filter can be replaced");
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006149 to_replace_bs = NULL;
6150 goto out;
Benoît Canet09158f02014-06-27 18:25:25 +02006151 }
6152
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006153out:
6154 aio_context_release(aio_context);
Benoît Canet09158f02014-06-27 18:25:25 +02006155 return to_replace_bs;
6156}
Ming Lei448ad912014-07-04 18:04:33 +08006157
6158void bdrv_io_plug(BlockDriverState *bs)
6159{
6160 BlockDriver *drv = bs->drv;
6161 if (drv && drv->bdrv_io_plug) {
6162 drv->bdrv_io_plug(bs);
6163 } else if (bs->file) {
6164 bdrv_io_plug(bs->file);
6165 }
6166}
6167
6168void bdrv_io_unplug(BlockDriverState *bs)
6169{
6170 BlockDriver *drv = bs->drv;
6171 if (drv && drv->bdrv_io_unplug) {
6172 drv->bdrv_io_unplug(bs);
6173 } else if (bs->file) {
6174 bdrv_io_unplug(bs->file);
6175 }
6176}
6177
6178void bdrv_flush_io_queue(BlockDriverState *bs)
6179{
6180 BlockDriver *drv = bs->drv;
6181 if (drv && drv->bdrv_flush_io_queue) {
6182 drv->bdrv_flush_io_queue(bs);
6183 } else if (bs->file) {
6184 bdrv_flush_io_queue(bs->file);
6185 }
6186}
Max Reitz91af7012014-07-18 20:24:56 +02006187
6188static bool append_open_options(QDict *d, BlockDriverState *bs)
6189{
6190 const QDictEntry *entry;
6191 bool found_any = false;
6192
6193 for (entry = qdict_first(bs->options); entry;
6194 entry = qdict_next(bs->options, entry))
6195 {
6196 /* Only take options for this level and exclude all non-driver-specific
6197 * options */
6198 if (!strchr(qdict_entry_key(entry), '.') &&
6199 strcmp(qdict_entry_key(entry), "node-name"))
6200 {
6201 qobject_incref(qdict_entry_value(entry));
6202 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
6203 found_any = true;
6204 }
6205 }
6206
6207 return found_any;
6208}
6209
6210/* Updates the following BDS fields:
6211 * - exact_filename: A filename which may be used for opening a block device
6212 * which (mostly) equals the given BDS (even without any
6213 * other options; so reading and writing must return the same
6214 * results, but caching etc. may be different)
6215 * - full_open_options: Options which, when given when opening a block device
6216 * (without a filename), result in a BDS (mostly)
6217 * equalling the given one
6218 * - filename: If exact_filename is set, it is copied here. Otherwise,
6219 * full_open_options is converted to a JSON object, prefixed with
6220 * "json:" (for use through the JSON pseudo protocol) and put here.
6221 */
6222void bdrv_refresh_filename(BlockDriverState *bs)
6223{
6224 BlockDriver *drv = bs->drv;
6225 QDict *opts;
6226
6227 if (!drv) {
6228 return;
6229 }
6230
6231 /* This BDS's file name will most probably depend on its file's name, so
6232 * refresh that first */
6233 if (bs->file) {
6234 bdrv_refresh_filename(bs->file);
6235 }
6236
6237 if (drv->bdrv_refresh_filename) {
6238 /* Obsolete information is of no use here, so drop the old file name
6239 * information before refreshing it */
6240 bs->exact_filename[0] = '\0';
6241 if (bs->full_open_options) {
6242 QDECREF(bs->full_open_options);
6243 bs->full_open_options = NULL;
6244 }
6245
6246 drv->bdrv_refresh_filename(bs);
6247 } else if (bs->file) {
6248 /* Try to reconstruct valid information from the underlying file */
6249 bool has_open_options;
6250
6251 bs->exact_filename[0] = '\0';
6252 if (bs->full_open_options) {
6253 QDECREF(bs->full_open_options);
6254 bs->full_open_options = NULL;
6255 }
6256
6257 opts = qdict_new();
6258 has_open_options = append_open_options(opts, bs);
6259
6260 /* If no specific options have been given for this BDS, the filename of
6261 * the underlying file should suffice for this one as well */
6262 if (bs->file->exact_filename[0] && !has_open_options) {
6263 strcpy(bs->exact_filename, bs->file->exact_filename);
6264 }
6265 /* Reconstructing the full options QDict is simple for most format block
6266 * drivers, as long as the full options are known for the underlying
6267 * file BDS. The full options QDict of that file BDS should somehow
6268 * contain a representation of the filename, therefore the following
6269 * suffices without querying the (exact_)filename of this BDS. */
6270 if (bs->file->full_open_options) {
6271 qdict_put_obj(opts, "driver",
6272 QOBJECT(qstring_from_str(drv->format_name)));
6273 QINCREF(bs->file->full_open_options);
6274 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
6275
6276 bs->full_open_options = opts;
6277 } else {
6278 QDECREF(opts);
6279 }
6280 } else if (!bs->full_open_options && qdict_size(bs->options)) {
6281 /* There is no underlying file BDS (at least referenced by BDS.file),
6282 * so the full options QDict should be equal to the options given
6283 * specifically for this block device when it was opened (plus the
6284 * driver specification).
6285 * Because those options don't change, there is no need to update
6286 * full_open_options when it's already set. */
6287
6288 opts = qdict_new();
6289 append_open_options(opts, bs);
6290 qdict_put_obj(opts, "driver",
6291 QOBJECT(qstring_from_str(drv->format_name)));
6292
6293 if (bs->exact_filename[0]) {
6294 /* This may not work for all block protocol drivers (some may
6295 * require this filename to be parsed), but we have to find some
6296 * default solution here, so just include it. If some block driver
6297 * does not support pure options without any filename at all or
6298 * needs some special format of the options QDict, it needs to
6299 * implement the driver-specific bdrv_refresh_filename() function.
6300 */
6301 qdict_put_obj(opts, "filename",
6302 QOBJECT(qstring_from_str(bs->exact_filename)));
6303 }
6304
6305 bs->full_open_options = opts;
6306 }
6307
6308 if (bs->exact_filename[0]) {
6309 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
6310 } else if (bs->full_open_options) {
6311 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
6312 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
6313 qstring_get_str(json));
6314 QDECREF(json);
6315 }
6316}
Benoît Canet5366d0c2014-09-05 15:46:18 +02006317
6318/* This accessor function purpose is to allow the device models to access the
6319 * BlockAcctStats structure embedded inside a BlockDriverState without being
6320 * aware of the BlockDriverState structure layout.
6321 * It will go away when the BlockAcctStats structure will be moved inside
6322 * the device models.
6323 */
6324BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
6325{
6326 return &bs->stats;
6327}