]> git.proxmox.com Git - mirror_qemu.git/blame - block.c
block: Use bdrv_nb_sectors() in img_convert()
[mirror_qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
737e150e
PB
27#include "block/block_int.h"
28#include "block/blockjob.h"
1de7afc9 29#include "qemu/module.h"
7b1b5d19 30#include "qapi/qmp/qjson.h"
9c17d615 31#include "sysemu/sysemu.h"
1de7afc9 32#include "qemu/notify.h"
737e150e 33#include "block/coroutine.h"
c13163fb 34#include "block/qapi.h"
b2023818 35#include "qmp-commands.h"
1de7afc9 36#include "qemu/timer.h"
a5ee7bd4 37#include "qapi-event.h"
fc01f7e7 38
71e72a19 39#ifdef CONFIG_BSD
7674e7bf
FB
40#include <sys/types.h>
41#include <sys/stat.h>
42#include <sys/ioctl.h>
72cf2d4f 43#include <sys/queue.h>
c5e97233 44#ifndef __DragonFly__
7674e7bf
FB
45#include <sys/disk.h>
46#endif
c5e97233 47#endif
7674e7bf 48
49dc768d
AL
49#ifdef _WIN32
50#include <windows.h>
51#endif
52
e4654d2d
FZ
53struct BdrvDirtyBitmap {
54 HBitmap *bitmap;
55 QLIST_ENTRY(BdrvDirtyBitmap) list;
56};
57
1c9805a3
SH
58#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
59
7d4b4ba5 60static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
61static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
62 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 63 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
64static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
65 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 66 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
67static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
68 int64_t sector_num, int nb_sectors,
69 QEMUIOVector *iov);
70static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
72 QEMUIOVector *iov);
775aa8b6
KW
73static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
74 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
470c0504 75 BdrvRequestFlags flags);
775aa8b6
KW
76static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
77 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
f08f2dda 78 BdrvRequestFlags flags);
b2a61371
SH
79static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
80 int64_t sector_num,
81 QEMUIOVector *qiov,
82 int nb_sectors,
d20d9b7c 83 BdrvRequestFlags flags,
b2a61371
SH
84 BlockDriverCompletionFunc *cb,
85 void *opaque,
8c5873d6 86 bool is_write);
b2a61371 87static void coroutine_fn bdrv_co_do_rw(void *opaque);
621f0589 88static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
aa7bfbff 89 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
ec530c81 90
1b7bdbc1
SH
91static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
92 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 93
dc364f4c
BC
94static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
95 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
96
8a22f02a
SH
97static QLIST_HEAD(, BlockDriver) bdrv_drivers =
98 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 99
eb852011
MA
100/* If non-zero, use only whitelisted block drivers */
101static int use_bdrv_whitelist;
102
9e0b22f4
SH
103#ifdef _WIN32
104static int is_windows_drive_prefix(const char *filename)
105{
106 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
107 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
108 filename[1] == ':');
109}
110
111int is_windows_drive(const char *filename)
112{
113 if (is_windows_drive_prefix(filename) &&
114 filename[2] == '\0')
115 return 1;
116 if (strstart(filename, "\\\\.\\", NULL) ||
117 strstart(filename, "//./", NULL))
118 return 1;
119 return 0;
120}
121#endif
122
0563e191 123/* throttling disk I/O limits */
cc0681c4
BC
124void bdrv_set_io_limits(BlockDriverState *bs,
125 ThrottleConfig *cfg)
98f90dba 126{
cc0681c4 127 int i;
98f90dba 128
cc0681c4 129 throttle_config(&bs->throttle_state, cfg);
98f90dba 130
cc0681c4
BC
131 for (i = 0; i < 2; i++) {
132 qemu_co_enter_next(&bs->throttled_reqs[i]);
98f90dba 133 }
cc0681c4
BC
134}
135
136/* this function drain all the throttled IOs */
137static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
138{
139 bool drained = false;
140 bool enabled = bs->io_limits_enabled;
141 int i;
142
143 bs->io_limits_enabled = false;
144
145 for (i = 0; i < 2; i++) {
146 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
147 drained = true;
148 }
149 }
150
151 bs->io_limits_enabled = enabled;
98f90dba 152
cc0681c4 153 return drained;
98f90dba
ZYW
154}
155
cc0681c4 156void bdrv_io_limits_disable(BlockDriverState *bs)
0563e191 157{
cc0681c4 158 bs->io_limits_enabled = false;
0563e191 159
cc0681c4
BC
160 bdrv_start_throttled_reqs(bs);
161
162 throttle_destroy(&bs->throttle_state);
0563e191
ZYW
163}
164
cc0681c4 165static void bdrv_throttle_read_timer_cb(void *opaque)
0563e191 166{
cc0681c4
BC
167 BlockDriverState *bs = opaque;
168 qemu_co_enter_next(&bs->throttled_reqs[0]);
0563e191
ZYW
169}
170
cc0681c4 171static void bdrv_throttle_write_timer_cb(void *opaque)
0563e191 172{
cc0681c4
BC
173 BlockDriverState *bs = opaque;
174 qemu_co_enter_next(&bs->throttled_reqs[1]);
0563e191
ZYW
175}
176
cc0681c4
BC
177/* should be called before bdrv_set_io_limits if a limit is set */
178void bdrv_io_limits_enable(BlockDriverState *bs)
179{
180 assert(!bs->io_limits_enabled);
181 throttle_init(&bs->throttle_state,
13af91eb 182 bdrv_get_aio_context(bs),
cc0681c4
BC
183 QEMU_CLOCK_VIRTUAL,
184 bdrv_throttle_read_timer_cb,
185 bdrv_throttle_write_timer_cb,
186 bs);
187 bs->io_limits_enabled = true;
188}
189
190/* This function makes an IO wait if needed
191 *
192 * @nb_sectors: the number of sectors of the IO
193 * @is_write: is the IO a write
194 */
98f90dba 195static void bdrv_io_limits_intercept(BlockDriverState *bs,
d5103588 196 unsigned int bytes,
cc0681c4 197 bool is_write)
98f90dba 198{
cc0681c4
BC
199 /* does this io must wait */
200 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
98f90dba 201
cc0681c4
BC
202 /* if must wait or any request of this type throttled queue the IO */
203 if (must_wait ||
204 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
205 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
98f90dba
ZYW
206 }
207
cc0681c4 208 /* the IO will be executed, do the accounting */
d5103588
KW
209 throttle_account(&bs->throttle_state, is_write, bytes);
210
98f90dba 211
cc0681c4
BC
212 /* if the next request must wait -> do nothing */
213 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
214 return;
98f90dba
ZYW
215 }
216
cc0681c4
BC
217 /* else queue next request for execution */
218 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
98f90dba
ZYW
219}
220
339064d5
KW
221size_t bdrv_opt_mem_align(BlockDriverState *bs)
222{
223 if (!bs || !bs->drv) {
224 /* 4k should be on the safe side */
225 return 4096;
226 }
227
228 return bs->bl.opt_mem_alignment;
229}
230
9e0b22f4
SH
231/* check if the path starts with "<protocol>:" */
232static int path_has_protocol(const char *path)
233{
947995c0
PB
234 const char *p;
235
9e0b22f4
SH
236#ifdef _WIN32
237 if (is_windows_drive(path) ||
238 is_windows_drive_prefix(path)) {
239 return 0;
240 }
947995c0
PB
241 p = path + strcspn(path, ":/\\");
242#else
243 p = path + strcspn(path, ":/");
9e0b22f4
SH
244#endif
245
947995c0 246 return *p == ':';
9e0b22f4
SH
247}
248
83f64091 249int path_is_absolute(const char *path)
3b0d4f61 250{
21664424
FB
251#ifdef _WIN32
252 /* specific case for names like: "\\.\d:" */
f53f4da9 253 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
21664424 254 return 1;
f53f4da9
PB
255 }
256 return (*path == '/' || *path == '\\');
3b9f94e1 257#else
f53f4da9 258 return (*path == '/');
3b9f94e1 259#endif
3b0d4f61
FB
260}
261
83f64091
FB
262/* if filename is absolute, just copy it to dest. Otherwise, build a
263 path to it by considering it is relative to base_path. URL are
264 supported. */
265void path_combine(char *dest, int dest_size,
266 const char *base_path,
267 const char *filename)
3b0d4f61 268{
83f64091
FB
269 const char *p, *p1;
270 int len;
271
272 if (dest_size <= 0)
273 return;
274 if (path_is_absolute(filename)) {
275 pstrcpy(dest, dest_size, filename);
276 } else {
277 p = strchr(base_path, ':');
278 if (p)
279 p++;
280 else
281 p = base_path;
3b9f94e1
FB
282 p1 = strrchr(base_path, '/');
283#ifdef _WIN32
284 {
285 const char *p2;
286 p2 = strrchr(base_path, '\\');
287 if (!p1 || p2 > p1)
288 p1 = p2;
289 }
290#endif
83f64091
FB
291 if (p1)
292 p1++;
293 else
294 p1 = base_path;
295 if (p1 > p)
296 p = p1;
297 len = p - base_path;
298 if (len > dest_size - 1)
299 len = dest_size - 1;
300 memcpy(dest, base_path, len);
301 dest[len] = '\0';
302 pstrcat(dest, dest_size, filename);
3b0d4f61 303 }
3b0d4f61
FB
304}
305
dc5a1371
PB
306void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
307{
308 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
309 pstrcpy(dest, sz, bs->backing_file);
310 } else {
311 path_combine(dest, sz, bs->filename, bs->backing_file);
312 }
313}
314
5efa9d5a 315void bdrv_register(BlockDriver *bdrv)
ea2384d3 316{
8c5873d6
SH
317 /* Block drivers without coroutine functions need emulation */
318 if (!bdrv->bdrv_co_readv) {
f9f05dc5
KW
319 bdrv->bdrv_co_readv = bdrv_co_readv_em;
320 bdrv->bdrv_co_writev = bdrv_co_writev_em;
321
f8c35c1d
SH
322 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
323 * the block driver lacks aio we need to emulate that too.
324 */
f9f05dc5
KW
325 if (!bdrv->bdrv_aio_readv) {
326 /* add AIO emulation layer */
327 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
328 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
f9f05dc5 329 }
83f64091 330 }
b2e12bc6 331
8a22f02a 332 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 333}
b338082b
FB
334
335/* create a new block device (by default it is empty) */
98522f63 336BlockDriverState *bdrv_new(const char *device_name, Error **errp)
b338082b 337{
1b7bdbc1 338 BlockDriverState *bs;
fbe40ff7 339 int i;
b338082b 340
f2d953ec
KW
341 if (bdrv_find(device_name)) {
342 error_setg(errp, "Device with id '%s' already exists",
343 device_name);
344 return NULL;
345 }
346 if (bdrv_find_node(device_name)) {
347 error_setg(errp, "Device with node-name '%s' already exists",
348 device_name);
349 return NULL;
350 }
351
7267c094 352 bs = g_malloc0(sizeof(BlockDriverState));
e4654d2d 353 QLIST_INIT(&bs->dirty_bitmaps);
b338082b 354 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 355 if (device_name[0] != '\0') {
dc364f4c 356 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
ea2384d3 357 }
fbe40ff7
FZ
358 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
359 QLIST_INIT(&bs->op_blockers[i]);
360 }
28a7282a 361 bdrv_iostatus_disable(bs);
d7d512f6 362 notifier_list_init(&bs->close_notifiers);
d616b224 363 notifier_with_return_list_init(&bs->before_write_notifiers);
cc0681c4
BC
364 qemu_co_queue_init(&bs->throttled_reqs[0]);
365 qemu_co_queue_init(&bs->throttled_reqs[1]);
9fcb0251 366 bs->refcnt = 1;
dcd04228 367 bs->aio_context = qemu_get_aio_context();
d7d512f6 368
b338082b
FB
369 return bs;
370}
371
d7d512f6
PB
372void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
373{
374 notifier_list_add(&bs->close_notifiers, notify);
375}
376
ea2384d3
FB
377BlockDriver *bdrv_find_format(const char *format_name)
378{
379 BlockDriver *drv1;
8a22f02a
SH
380 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
381 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 382 return drv1;
8a22f02a 383 }
ea2384d3
FB
384 }
385 return NULL;
386}
387
b64ec4e4 388static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
eb852011 389{
b64ec4e4
FZ
390 static const char *whitelist_rw[] = {
391 CONFIG_BDRV_RW_WHITELIST
392 };
393 static const char *whitelist_ro[] = {
394 CONFIG_BDRV_RO_WHITELIST
eb852011
MA
395 };
396 const char **p;
397
b64ec4e4 398 if (!whitelist_rw[0] && !whitelist_ro[0]) {
eb852011 399 return 1; /* no whitelist, anything goes */
b64ec4e4 400 }
eb852011 401
b64ec4e4 402 for (p = whitelist_rw; *p; p++) {
eb852011
MA
403 if (!strcmp(drv->format_name, *p)) {
404 return 1;
405 }
406 }
b64ec4e4
FZ
407 if (read_only) {
408 for (p = whitelist_ro; *p; p++) {
409 if (!strcmp(drv->format_name, *p)) {
410 return 1;
411 }
412 }
413 }
eb852011
MA
414 return 0;
415}
416
b64ec4e4
FZ
417BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
418 bool read_only)
eb852011
MA
419{
420 BlockDriver *drv = bdrv_find_format(format_name);
b64ec4e4 421 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
eb852011
MA
422}
423
5b7e1542
ZYW
424typedef struct CreateCo {
425 BlockDriver *drv;
426 char *filename;
83d0521a 427 QemuOpts *opts;
5b7e1542 428 int ret;
cc84d90f 429 Error *err;
5b7e1542
ZYW
430} CreateCo;
431
432static void coroutine_fn bdrv_create_co_entry(void *opaque)
433{
cc84d90f
HR
434 Error *local_err = NULL;
435 int ret;
436
5b7e1542
ZYW
437 CreateCo *cco = opaque;
438 assert(cco->drv);
439
c282e1fd 440 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
84d18f06 441 if (local_err) {
cc84d90f
HR
442 error_propagate(&cco->err, local_err);
443 }
444 cco->ret = ret;
5b7e1542
ZYW
445}
446
0e7e1989 447int bdrv_create(BlockDriver *drv, const char* filename,
83d0521a 448 QemuOpts *opts, Error **errp)
ea2384d3 449{
5b7e1542
ZYW
450 int ret;
451
452 Coroutine *co;
453 CreateCo cco = {
454 .drv = drv,
455 .filename = g_strdup(filename),
83d0521a 456 .opts = opts,
5b7e1542 457 .ret = NOT_DONE,
cc84d90f 458 .err = NULL,
5b7e1542
ZYW
459 };
460
c282e1fd 461 if (!drv->bdrv_create) {
cc84d90f 462 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
80168bff
LC
463 ret = -ENOTSUP;
464 goto out;
5b7e1542
ZYW
465 }
466
467 if (qemu_in_coroutine()) {
468 /* Fast-path if already in coroutine context */
469 bdrv_create_co_entry(&cco);
470 } else {
471 co = qemu_coroutine_create(bdrv_create_co_entry);
472 qemu_coroutine_enter(co, &cco);
473 while (cco.ret == NOT_DONE) {
b47ec2c4 474 aio_poll(qemu_get_aio_context(), true);
5b7e1542
ZYW
475 }
476 }
477
478 ret = cco.ret;
cc84d90f 479 if (ret < 0) {
84d18f06 480 if (cco.err) {
cc84d90f
HR
481 error_propagate(errp, cco.err);
482 } else {
483 error_setg_errno(errp, -ret, "Could not create image");
484 }
485 }
0e7e1989 486
80168bff
LC
487out:
488 g_free(cco.filename);
5b7e1542 489 return ret;
ea2384d3
FB
490}
491
c282e1fd 492int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
84a12e66
CH
493{
494 BlockDriver *drv;
cc84d90f
HR
495 Error *local_err = NULL;
496 int ret;
84a12e66 497
98289620 498 drv = bdrv_find_protocol(filename, true);
84a12e66 499 if (drv == NULL) {
cc84d90f 500 error_setg(errp, "Could not find protocol for file '%s'", filename);
16905d71 501 return -ENOENT;
84a12e66
CH
502 }
503
c282e1fd 504 ret = bdrv_create(drv, filename, opts, &local_err);
84d18f06 505 if (local_err) {
cc84d90f
HR
506 error_propagate(errp, local_err);
507 }
508 return ret;
84a12e66
CH
509}
510
3baca891 511void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
d34682cd
KW
512{
513 BlockDriver *drv = bs->drv;
3baca891 514 Error *local_err = NULL;
d34682cd
KW
515
516 memset(&bs->bl, 0, sizeof(bs->bl));
517
466ad822 518 if (!drv) {
3baca891 519 return;
466ad822
KW
520 }
521
522 /* Take some limits from the children as a default */
523 if (bs->file) {
3baca891
KW
524 bdrv_refresh_limits(bs->file, &local_err);
525 if (local_err) {
526 error_propagate(errp, local_err);
527 return;
528 }
466ad822 529 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
339064d5
KW
530 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
531 } else {
532 bs->bl.opt_mem_alignment = 512;
466ad822
KW
533 }
534
535 if (bs->backing_hd) {
3baca891
KW
536 bdrv_refresh_limits(bs->backing_hd, &local_err);
537 if (local_err) {
538 error_propagate(errp, local_err);
539 return;
540 }
466ad822
KW
541 bs->bl.opt_transfer_length =
542 MAX(bs->bl.opt_transfer_length,
543 bs->backing_hd->bl.opt_transfer_length);
339064d5
KW
544 bs->bl.opt_mem_alignment =
545 MAX(bs->bl.opt_mem_alignment,
546 bs->backing_hd->bl.opt_mem_alignment);
466ad822
KW
547 }
548
549 /* Then let the driver override it */
550 if (drv->bdrv_refresh_limits) {
3baca891 551 drv->bdrv_refresh_limits(bs, errp);
d34682cd 552 }
d34682cd
KW
553}
554
eba25057
JM
555/*
556 * Create a uniquely-named empty temporary file.
557 * Return 0 upon success, otherwise a negative errno value.
558 */
559int get_tmp_filename(char *filename, int size)
d5249393 560{
eba25057 561#ifdef _WIN32
3b9f94e1 562 char temp_dir[MAX_PATH];
eba25057
JM
563 /* GetTempFileName requires that its output buffer (4th param)
564 have length MAX_PATH or greater. */
565 assert(size >= MAX_PATH);
566 return (GetTempPath(MAX_PATH, temp_dir)
567 && GetTempFileName(temp_dir, "qem", 0, filename)
568 ? 0 : -GetLastError());
d5249393 569#else
67b915a5 570 int fd;
7ccfb2eb 571 const char *tmpdir;
0badc1ee 572 tmpdir = getenv("TMPDIR");
69bef793
AS
573 if (!tmpdir) {
574 tmpdir = "/var/tmp";
575 }
eba25057
JM
576 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
577 return -EOVERFLOW;
578 }
ea2384d3 579 fd = mkstemp(filename);
fe235a06
DH
580 if (fd < 0) {
581 return -errno;
582 }
583 if (close(fd) != 0) {
584 unlink(filename);
eba25057
JM
585 return -errno;
586 }
587 return 0;
d5249393 588#endif
eba25057 589}
fc01f7e7 590
84a12e66
CH
591/*
592 * Detect host devices. By convention, /dev/cdrom[N] is always
593 * recognized as a host CDROM.
594 */
595static BlockDriver *find_hdev_driver(const char *filename)
596{
597 int score_max = 0, score;
598 BlockDriver *drv = NULL, *d;
599
600 QLIST_FOREACH(d, &bdrv_drivers, list) {
601 if (d->bdrv_probe_device) {
602 score = d->bdrv_probe_device(filename);
603 if (score > score_max) {
604 score_max = score;
605 drv = d;
606 }
607 }
608 }
609
610 return drv;
611}
612
98289620
KW
613BlockDriver *bdrv_find_protocol(const char *filename,
614 bool allow_protocol_prefix)
83f64091
FB
615{
616 BlockDriver *drv1;
617 char protocol[128];
1cec71e3 618 int len;
83f64091 619 const char *p;
19cb3738 620
66f82cee
KW
621 /* TODO Drivers without bdrv_file_open must be specified explicitly */
622
39508e7a
CH
623 /*
624 * XXX(hch): we really should not let host device detection
625 * override an explicit protocol specification, but moving this
626 * later breaks access to device names with colons in them.
627 * Thanks to the brain-dead persistent naming schemes on udev-
628 * based Linux systems those actually are quite common.
629 */
630 drv1 = find_hdev_driver(filename);
631 if (drv1) {
632 return drv1;
633 }
634
98289620 635 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
39508e7a 636 return bdrv_find_format("file");
84a12e66 637 }
98289620 638
9e0b22f4
SH
639 p = strchr(filename, ':');
640 assert(p != NULL);
1cec71e3
AL
641 len = p - filename;
642 if (len > sizeof(protocol) - 1)
643 len = sizeof(protocol) - 1;
644 memcpy(protocol, filename, len);
645 protocol[len] = '\0';
8a22f02a 646 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 647 if (drv1->protocol_name &&
8a22f02a 648 !strcmp(drv1->protocol_name, protocol)) {
83f64091 649 return drv1;
8a22f02a 650 }
83f64091
FB
651 }
652 return NULL;
653}
654
f500a6d3 655static int find_image_format(BlockDriverState *bs, const char *filename,
34b5d2c6 656 BlockDriver **pdrv, Error **errp)
f3a5d3f8 657{
f500a6d3 658 int score, score_max;
f3a5d3f8
CH
659 BlockDriver *drv1, *drv;
660 uint8_t buf[2048];
f500a6d3 661 int ret = 0;
f8ea0b00 662
08a00559 663 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
8e895599 664 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
c98ac35d
SW
665 drv = bdrv_find_format("raw");
666 if (!drv) {
34b5d2c6 667 error_setg(errp, "Could not find raw image format");
c98ac35d
SW
668 ret = -ENOENT;
669 }
670 *pdrv = drv;
671 return ret;
1a396859 672 }
f8ea0b00 673
83f64091 674 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
83f64091 675 if (ret < 0) {
34b5d2c6
HR
676 error_setg_errno(errp, -ret, "Could not read image for determining its "
677 "format");
c98ac35d
SW
678 *pdrv = NULL;
679 return ret;
83f64091
FB
680 }
681
ea2384d3 682 score_max = 0;
84a12e66 683 drv = NULL;
8a22f02a 684 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
685 if (drv1->bdrv_probe) {
686 score = drv1->bdrv_probe(buf, ret, filename);
687 if (score > score_max) {
688 score_max = score;
689 drv = drv1;
690 }
0849bf08 691 }
fc01f7e7 692 }
c98ac35d 693 if (!drv) {
34b5d2c6
HR
694 error_setg(errp, "Could not determine image format: No compatible "
695 "driver found");
c98ac35d
SW
696 ret = -ENOENT;
697 }
698 *pdrv = drv;
699 return ret;
ea2384d3
FB
700}
701
51762288
SH
702/**
703 * Set the current 'total_sectors' value
65a9bb25 704 * Return 0 on success, -errno on error.
51762288
SH
705 */
706static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
707{
708 BlockDriver *drv = bs->drv;
709
396759ad
NB
710 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
711 if (bs->sg)
712 return 0;
713
51762288
SH
714 /* query actual device if possible, otherwise just trust the hint */
715 if (drv->bdrv_getlength) {
716 int64_t length = drv->bdrv_getlength(bs);
717 if (length < 0) {
718 return length;
719 }
7e382003 720 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
51762288
SH
721 }
722
723 bs->total_sectors = hint;
724 return 0;
725}
726
9e8f1835
PB
727/**
728 * Set open flags for a given discard mode
729 *
730 * Return 0 on success, -1 if the discard mode was invalid.
731 */
732int bdrv_parse_discard_flags(const char *mode, int *flags)
733{
734 *flags &= ~BDRV_O_UNMAP;
735
736 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
737 /* do nothing */
738 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
739 *flags |= BDRV_O_UNMAP;
740 } else {
741 return -1;
742 }
743
744 return 0;
745}
746
c3993cdc
SH
747/**
748 * Set open flags for a given cache mode
749 *
750 * Return 0 on success, -1 if the cache mode was invalid.
751 */
752int bdrv_parse_cache_flags(const char *mode, int *flags)
753{
754 *flags &= ~BDRV_O_CACHE_MASK;
755
756 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
757 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
758 } else if (!strcmp(mode, "directsync")) {
759 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
760 } else if (!strcmp(mode, "writeback")) {
761 *flags |= BDRV_O_CACHE_WB;
762 } else if (!strcmp(mode, "unsafe")) {
763 *flags |= BDRV_O_CACHE_WB;
764 *flags |= BDRV_O_NO_FLUSH;
765 } else if (!strcmp(mode, "writethrough")) {
766 /* this is the default */
767 } else {
768 return -1;
769 }
770
771 return 0;
772}
773
53fec9d3
SH
774/**
775 * The copy-on-read flag is actually a reference count so multiple users may
776 * use the feature without worrying about clobbering its previous state.
777 * Copy-on-read stays enabled until all users have called to disable it.
778 */
779void bdrv_enable_copy_on_read(BlockDriverState *bs)
780{
781 bs->copy_on_read++;
782}
783
784void bdrv_disable_copy_on_read(BlockDriverState *bs)
785{
786 assert(bs->copy_on_read > 0);
787 bs->copy_on_read--;
788}
789
b1e6fc08
KW
790/*
791 * Returns the flags that a temporary snapshot should get, based on the
792 * originally requested flags (the originally requested image will have flags
793 * like a backing file)
794 */
795static int bdrv_temp_snapshot_flags(int flags)
796{
797 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
798}
799
0b50cc88
KW
800/*
801 * Returns the flags that bs->file should get, based on the given flags for
802 * the parent BDS
803 */
804static int bdrv_inherited_flags(int flags)
805{
806 /* Enable protocol handling, disable format probing for bs->file */
807 flags |= BDRV_O_PROTOCOL;
808
809 /* Our block drivers take care to send flushes and respect unmap policy,
810 * so we can enable both unconditionally on lower layers. */
811 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
812
0b50cc88 813 /* Clear flags that only apply to the top layer */
5669b44d 814 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
0b50cc88
KW
815
816 return flags;
817}
818
317fc44e
KW
819/*
820 * Returns the flags that bs->backing_hd should get, based on the given flags
821 * for the parent BDS
822 */
823static int bdrv_backing_flags(int flags)
824{
825 /* backing files always opened read-only */
826 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
827
828 /* snapshot=on is handled on the top layer */
8bfea15d 829 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
317fc44e
KW
830
831 return flags;
832}
833
7b272452
KW
834static int bdrv_open_flags(BlockDriverState *bs, int flags)
835{
836 int open_flags = flags | BDRV_O_CACHE_WB;
837
838 /*
839 * Clear flags that are internal to the block layer before opening the
840 * image.
841 */
20cca275 842 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
7b272452
KW
843
844 /*
845 * Snapshots should be writable.
846 */
8bfea15d 847 if (flags & BDRV_O_TEMPORARY) {
7b272452
KW
848 open_flags |= BDRV_O_RDWR;
849 }
850
851 return open_flags;
852}
853
636ea370
KW
854static void bdrv_assign_node_name(BlockDriverState *bs,
855 const char *node_name,
856 Error **errp)
6913c0c2
BC
857{
858 if (!node_name) {
636ea370 859 return;
6913c0c2
BC
860 }
861
862 /* empty string node name is invalid */
863 if (node_name[0] == '\0') {
864 error_setg(errp, "Empty node name");
636ea370 865 return;
6913c0c2
BC
866 }
867
0c5e94ee
BC
868 /* takes care of avoiding namespaces collisions */
869 if (bdrv_find(node_name)) {
870 error_setg(errp, "node-name=%s is conflicting with a device id",
871 node_name);
636ea370 872 return;
0c5e94ee
BC
873 }
874
6913c0c2
BC
875 /* takes care of avoiding duplicates node names */
876 if (bdrv_find_node(node_name)) {
877 error_setg(errp, "Duplicate node name");
636ea370 878 return;
6913c0c2
BC
879 }
880
881 /* copy node name into the bs and insert it into the graph list */
882 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
883 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
6913c0c2
BC
884}
885
57915332
KW
886/*
887 * Common part for opening disk images and files
b6ad491a
KW
888 *
889 * Removes all processed options from *options.
57915332 890 */
f500a6d3 891static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
34b5d2c6 892 QDict *options, int flags, BlockDriver *drv, Error **errp)
57915332
KW
893{
894 int ret, open_flags;
035fccdf 895 const char *filename;
6913c0c2 896 const char *node_name = NULL;
34b5d2c6 897 Error *local_err = NULL;
57915332
KW
898
899 assert(drv != NULL);
6405875c 900 assert(bs->file == NULL);
707ff828 901 assert(options != NULL && bs->options != options);
57915332 902
45673671
KW
903 if (file != NULL) {
904 filename = file->filename;
905 } else {
906 filename = qdict_get_try_str(options, "filename");
907 }
908
765003db
KW
909 if (drv->bdrv_needs_filename && !filename) {
910 error_setg(errp, "The '%s' block driver requires a file name",
911 drv->format_name);
912 return -EINVAL;
913 }
914
45673671 915 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
28dcee10 916
6913c0c2 917 node_name = qdict_get_try_str(options, "node-name");
636ea370 918 bdrv_assign_node_name(bs, node_name, &local_err);
0fb6395c 919 if (local_err) {
636ea370
KW
920 error_propagate(errp, local_err);
921 return -EINVAL;
6913c0c2
BC
922 }
923 qdict_del(options, "node-name");
924
5d186eb0
KW
925 /* bdrv_open() with directly using a protocol as drv. This layer is already
926 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
927 * and return immediately. */
928 if (file != NULL && drv->bdrv_file_open) {
929 bdrv_swap(file, bs);
930 return 0;
931 }
932
57915332 933 bs->open_flags = flags;
1b7fd729 934 bs->guest_block_size = 512;
c25f53b0 935 bs->request_alignment = 512;
0d51b4de 936 bs->zero_beyond_eof = true;
b64ec4e4
FZ
937 open_flags = bdrv_open_flags(bs, flags);
938 bs->read_only = !(open_flags & BDRV_O_RDWR);
20cca275 939 bs->growable = !!(flags & BDRV_O_PROTOCOL);
b64ec4e4
FZ
940
941 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
8f94a6e4
KW
942 error_setg(errp,
943 !bs->read_only && bdrv_is_whitelisted(drv, true)
944 ? "Driver '%s' can only be used for read-only devices"
945 : "Driver '%s' is not whitelisted",
946 drv->format_name);
b64ec4e4
FZ
947 return -ENOTSUP;
948 }
57915332 949
53fec9d3 950 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
0ebd24e0
KW
951 if (flags & BDRV_O_COPY_ON_READ) {
952 if (!bs->read_only) {
953 bdrv_enable_copy_on_read(bs);
954 } else {
955 error_setg(errp, "Can't use copy-on-read on read-only device");
956 return -EINVAL;
957 }
53fec9d3
SH
958 }
959
c2ad1b0c
KW
960 if (filename != NULL) {
961 pstrcpy(bs->filename, sizeof(bs->filename), filename);
962 } else {
963 bs->filename[0] = '\0';
964 }
57915332 965
57915332 966 bs->drv = drv;
7267c094 967 bs->opaque = g_malloc0(drv->instance_size);
57915332 968
03f541bd 969 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
e7c63796 970
66f82cee
KW
971 /* Open the image, either directly or using a protocol */
972 if (drv->bdrv_file_open) {
5d186eb0 973 assert(file == NULL);
030be321 974 assert(!drv->bdrv_needs_filename || filename != NULL);
34b5d2c6 975 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
f500a6d3 976 } else {
2af5ef70 977 if (file == NULL) {
34b5d2c6
HR
978 error_setg(errp, "Can't use '%s' as a block driver for the "
979 "protocol level", drv->format_name);
2af5ef70
KW
980 ret = -EINVAL;
981 goto free_and_fail;
982 }
f500a6d3 983 bs->file = file;
34b5d2c6 984 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
66f82cee
KW
985 }
986
57915332 987 if (ret < 0) {
84d18f06 988 if (local_err) {
34b5d2c6 989 error_propagate(errp, local_err);
2fa9aa59
DH
990 } else if (bs->filename[0]) {
991 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
34b5d2c6
HR
992 } else {
993 error_setg_errno(errp, -ret, "Could not open image");
994 }
57915332
KW
995 goto free_and_fail;
996 }
997
51762288
SH
998 ret = refresh_total_sectors(bs, bs->total_sectors);
999 if (ret < 0) {
34b5d2c6 1000 error_setg_errno(errp, -ret, "Could not refresh total sector count");
51762288 1001 goto free_and_fail;
57915332 1002 }
51762288 1003
3baca891
KW
1004 bdrv_refresh_limits(bs, &local_err);
1005 if (local_err) {
1006 error_propagate(errp, local_err);
1007 ret = -EINVAL;
1008 goto free_and_fail;
1009 }
1010
c25f53b0 1011 assert(bdrv_opt_mem_align(bs) != 0);
47ea2de2 1012 assert((bs->request_alignment != 0) || bs->sg);
57915332
KW
1013 return 0;
1014
1015free_and_fail:
f500a6d3 1016 bs->file = NULL;
7267c094 1017 g_free(bs->opaque);
57915332
KW
1018 bs->opaque = NULL;
1019 bs->drv = NULL;
1020 return ret;
1021}
1022
5e5c4f63
KW
1023static QDict *parse_json_filename(const char *filename, Error **errp)
1024{
1025 QObject *options_obj;
1026 QDict *options;
1027 int ret;
1028
1029 ret = strstart(filename, "json:", &filename);
1030 assert(ret);
1031
1032 options_obj = qobject_from_json(filename);
1033 if (!options_obj) {
1034 error_setg(errp, "Could not parse the JSON options");
1035 return NULL;
1036 }
1037
1038 if (qobject_type(options_obj) != QTYPE_QDICT) {
1039 qobject_decref(options_obj);
1040 error_setg(errp, "Invalid JSON object given");
1041 return NULL;
1042 }
1043
1044 options = qobject_to_qdict(options_obj);
1045 qdict_flatten(options);
1046
1047 return options;
1048}
1049
b6ce07aa 1050/*
f54120ff
KW
1051 * Fills in default options for opening images and converts the legacy
1052 * filename/flags pair to option QDict entries.
b6ce07aa 1053 */
5e5c4f63 1054static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
17b005f1 1055 BlockDriver *drv, Error **errp)
ea2384d3 1056{
5e5c4f63 1057 const char *filename = *pfilename;
c2ad1b0c 1058 const char *drvname;
462f5bcf 1059 bool protocol = flags & BDRV_O_PROTOCOL;
e3fa4bfa 1060 bool parse_filename = false;
34b5d2c6 1061 Error *local_err = NULL;
83f64091 1062
5e5c4f63
KW
1063 /* Parse json: pseudo-protocol */
1064 if (filename && g_str_has_prefix(filename, "json:")) {
1065 QDict *json_options = parse_json_filename(filename, &local_err);
1066 if (local_err) {
1067 error_propagate(errp, local_err);
1068 return -EINVAL;
1069 }
1070
1071 /* Options given in the filename have lower priority than options
1072 * specified directly */
1073 qdict_join(*options, json_options, false);
1074 QDECREF(json_options);
1075 *pfilename = filename = NULL;
1076 }
1077
035fccdf 1078 /* Fetch the file name from the options QDict if necessary */
17b005f1 1079 if (protocol && filename) {
f54120ff
KW
1080 if (!qdict_haskey(*options, "filename")) {
1081 qdict_put(*options, "filename", qstring_from_str(filename));
1082 parse_filename = true;
1083 } else {
1084 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1085 "the same time");
1086 return -EINVAL;
1087 }
035fccdf
KW
1088 }
1089
c2ad1b0c 1090 /* Find the right block driver */
f54120ff 1091 filename = qdict_get_try_str(*options, "filename");
5acd9d81 1092 drvname = qdict_get_try_str(*options, "driver");
f54120ff 1093
17b005f1
KW
1094 if (drv) {
1095 if (drvname) {
1096 error_setg(errp, "Driver specified twice");
1097 return -EINVAL;
1098 }
1099 drvname = drv->format_name;
1100 qdict_put(*options, "driver", qstring_from_str(drvname));
1101 } else {
1102 if (!drvname && protocol) {
1103 if (filename) {
1104 drv = bdrv_find_protocol(filename, parse_filename);
1105 if (!drv) {
1106 error_setg(errp, "Unknown protocol");
1107 return -EINVAL;
1108 }
1109
1110 drvname = drv->format_name;
1111 qdict_put(*options, "driver", qstring_from_str(drvname));
1112 } else {
1113 error_setg(errp, "Must specify either driver or file");
f54120ff
KW
1114 return -EINVAL;
1115 }
17b005f1
KW
1116 } else if (drvname) {
1117 drv = bdrv_find_format(drvname);
1118 if (!drv) {
1119 error_setg(errp, "Unknown driver '%s'", drvname);
1120 return -ENOENT;
1121 }
98289620 1122 }
c2ad1b0c
KW
1123 }
1124
17b005f1 1125 assert(drv || !protocol);
c2ad1b0c 1126
f54120ff 1127 /* Driver-specific filename parsing */
17b005f1 1128 if (drv && drv->bdrv_parse_filename && parse_filename) {
5acd9d81 1129 drv->bdrv_parse_filename(filename, *options, &local_err);
84d18f06 1130 if (local_err) {
34b5d2c6 1131 error_propagate(errp, local_err);
f54120ff 1132 return -EINVAL;
6963a30d 1133 }
cd5d031e
HR
1134
1135 if (!drv->bdrv_needs_filename) {
1136 qdict_del(*options, "filename");
cd5d031e 1137 }
6963a30d
KW
1138 }
1139
f54120ff
KW
1140 return 0;
1141}
1142
8d24cce1
FZ
1143void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1144{
1145
826b6ca0
FZ
1146 if (bs->backing_hd) {
1147 assert(bs->backing_blocker);
1148 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1149 } else if (backing_hd) {
1150 error_setg(&bs->backing_blocker,
1151 "device is used as backing hd of '%s'",
1152 bs->device_name);
1153 }
1154
8d24cce1
FZ
1155 bs->backing_hd = backing_hd;
1156 if (!backing_hd) {
826b6ca0
FZ
1157 error_free(bs->backing_blocker);
1158 bs->backing_blocker = NULL;
8d24cce1
FZ
1159 goto out;
1160 }
1161 bs->open_flags &= ~BDRV_O_NO_BACKING;
1162 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1163 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1164 backing_hd->drv ? backing_hd->drv->format_name : "");
826b6ca0
FZ
1165
1166 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1167 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1168 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
1169 bs->backing_blocker);
8d24cce1 1170out:
3baca891 1171 bdrv_refresh_limits(bs, NULL);
8d24cce1
FZ
1172}
1173
31ca6d07
KW
1174/*
1175 * Opens the backing file for a BlockDriverState if not yet open
1176 *
1177 * options is a QDict of options to pass to the block drivers, or NULL for an
1178 * empty set of options. The reference to the QDict is transferred to this
1179 * function (even on failure), so if the caller intends to reuse the dictionary,
1180 * it needs to use QINCREF() before calling bdrv_file_open.
1181 */
34b5d2c6 1182int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
9156df12 1183{
1ba4b6a5 1184 char *backing_filename = g_malloc0(PATH_MAX);
317fc44e 1185 int ret = 0;
9156df12 1186 BlockDriver *back_drv = NULL;
8d24cce1 1187 BlockDriverState *backing_hd;
34b5d2c6 1188 Error *local_err = NULL;
9156df12
PB
1189
1190 if (bs->backing_hd != NULL) {
31ca6d07 1191 QDECREF(options);
1ba4b6a5 1192 goto free_exit;
9156df12
PB
1193 }
1194
31ca6d07
KW
1195 /* NULL means an empty set of options */
1196 if (options == NULL) {
1197 options = qdict_new();
1198 }
1199
9156df12 1200 bs->open_flags &= ~BDRV_O_NO_BACKING;
1cb6f506
KW
1201 if (qdict_haskey(options, "file.filename")) {
1202 backing_filename[0] = '\0';
1203 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
31ca6d07 1204 QDECREF(options);
1ba4b6a5 1205 goto free_exit;
dbecebdd 1206 } else {
1ba4b6a5 1207 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX);
9156df12
PB
1208 }
1209
8ee79e70
KW
1210 if (!bs->drv || !bs->drv->supports_backing) {
1211 ret = -EINVAL;
1212 error_setg(errp, "Driver doesn't support backing files");
1213 QDECREF(options);
1214 goto free_exit;
1215 }
1216
8d24cce1
FZ
1217 backing_hd = bdrv_new("", errp);
1218
9156df12
PB
1219 if (bs->backing_format[0] != '\0') {
1220 back_drv = bdrv_find_format(bs->backing_format);
1221 }
1222
f67503e5 1223 assert(bs->backing_hd == NULL);
8d24cce1 1224 ret = bdrv_open(&backing_hd,
ddf5636d 1225 *backing_filename ? backing_filename : NULL, NULL, options,
317fc44e 1226 bdrv_backing_flags(bs->open_flags), back_drv, &local_err);
9156df12 1227 if (ret < 0) {
8d24cce1
FZ
1228 bdrv_unref(backing_hd);
1229 backing_hd = NULL;
9156df12 1230 bs->open_flags |= BDRV_O_NO_BACKING;
b04b6b6e
FZ
1231 error_setg(errp, "Could not open backing file: %s",
1232 error_get_pretty(local_err));
1233 error_free(local_err);
1ba4b6a5 1234 goto free_exit;
9156df12 1235 }
8d24cce1 1236 bdrv_set_backing_hd(bs, backing_hd);
d80ac658 1237
1ba4b6a5
BC
1238free_exit:
1239 g_free(backing_filename);
1240 return ret;
9156df12
PB
1241}
1242
da557aac
HR
1243/*
1244 * Opens a disk image whose options are given as BlockdevRef in another block
1245 * device's options.
1246 *
da557aac
HR
1247 * If allow_none is true, no image will be opened if filename is false and no
1248 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1249 *
1250 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1251 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1252 * itself, all options starting with "${bdref_key}." are considered part of the
1253 * BlockdevRef.
1254 *
1255 * The BlockdevRef will be removed from the options QDict.
f67503e5
HR
1256 *
1257 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
da557aac
HR
1258 */
1259int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1260 QDict *options, const char *bdref_key, int flags,
f7d9fd8c 1261 bool allow_none, Error **errp)
da557aac
HR
1262{
1263 QDict *image_options;
1264 int ret;
1265 char *bdref_key_dot;
1266 const char *reference;
1267
f67503e5
HR
1268 assert(pbs);
1269 assert(*pbs == NULL);
1270
da557aac
HR
1271 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1272 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1273 g_free(bdref_key_dot);
1274
1275 reference = qdict_get_try_str(options, bdref_key);
1276 if (!filename && !reference && !qdict_size(image_options)) {
1277 if (allow_none) {
1278 ret = 0;
1279 } else {
1280 error_setg(errp, "A block device must be specified for \"%s\"",
1281 bdref_key);
1282 ret = -EINVAL;
1283 }
b20e61e0 1284 QDECREF(image_options);
da557aac
HR
1285 goto done;
1286 }
1287
f7d9fd8c 1288 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
da557aac
HR
1289
1290done:
1291 qdict_del(options, bdref_key);
1292 return ret;
1293}
1294
6b8aeca5 1295int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
b998875d
KW
1296{
1297 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1ba4b6a5 1298 char *tmp_filename = g_malloc0(PATH_MAX + 1);
b998875d
KW
1299 int64_t total_size;
1300 BlockDriver *bdrv_qcow2;
83d0521a 1301 QemuOpts *opts = NULL;
b998875d
KW
1302 QDict *snapshot_options;
1303 BlockDriverState *bs_snapshot;
1304 Error *local_err;
1305 int ret;
1306
1307 /* if snapshot, we create a temporary backing file and open it
1308 instead of opening 'filename' directly */
1309
1310 /* Get the required size from the image */
f187743a
KW
1311 total_size = bdrv_getlength(bs);
1312 if (total_size < 0) {
6b8aeca5 1313 ret = total_size;
f187743a 1314 error_setg_errno(errp, -total_size, "Could not get image size");
1ba4b6a5 1315 goto out;
f187743a
KW
1316 }
1317 total_size &= BDRV_SECTOR_MASK;
b998875d
KW
1318
1319 /* Create the temporary image */
1ba4b6a5 1320 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
b998875d
KW
1321 if (ret < 0) {
1322 error_setg_errno(errp, -ret, "Could not get temporary filename");
1ba4b6a5 1323 goto out;
b998875d
KW
1324 }
1325
1326 bdrv_qcow2 = bdrv_find_format("qcow2");
c282e1fd
CL
1327 opts = qemu_opts_create(bdrv_qcow2->create_opts, NULL, 0,
1328 &error_abort);
83d0521a 1329 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size);
c282e1fd 1330 ret = bdrv_create(bdrv_qcow2, tmp_filename, opts, &local_err);
83d0521a 1331 qemu_opts_del(opts);
b998875d
KW
1332 if (ret < 0) {
1333 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1334 "'%s': %s", tmp_filename,
1335 error_get_pretty(local_err));
1336 error_free(local_err);
1ba4b6a5 1337 goto out;
b998875d
KW
1338 }
1339
1340 /* Prepare a new options QDict for the temporary file */
1341 snapshot_options = qdict_new();
1342 qdict_put(snapshot_options, "file.driver",
1343 qstring_from_str("file"));
1344 qdict_put(snapshot_options, "file.filename",
1345 qstring_from_str(tmp_filename));
1346
98522f63 1347 bs_snapshot = bdrv_new("", &error_abort);
b998875d
KW
1348
1349 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
b1e6fc08 1350 flags, bdrv_qcow2, &local_err);
b998875d
KW
1351 if (ret < 0) {
1352 error_propagate(errp, local_err);
1ba4b6a5 1353 goto out;
b998875d
KW
1354 }
1355
1356 bdrv_append(bs_snapshot, bs);
1ba4b6a5
BC
1357
1358out:
1359 g_free(tmp_filename);
6b8aeca5 1360 return ret;
b998875d
KW
1361}
1362
b6ce07aa
KW
1363/*
1364 * Opens a disk image (raw, qcow2, vmdk, ...)
de9c0cec
KW
1365 *
1366 * options is a QDict of options to pass to the block drivers, or NULL for an
1367 * empty set of options. The reference to the QDict belongs to the block layer
1368 * after the call (even on failure), so if the caller intends to reuse the
1369 * dictionary, it needs to use QINCREF() before calling bdrv_open.
f67503e5
HR
1370 *
1371 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1372 * If it is not NULL, the referenced BDS will be reused.
ddf5636d
HR
1373 *
1374 * The reference parameter may be used to specify an existing block device which
1375 * should be opened. If specified, neither options nor a filename may be given,
1376 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
b6ce07aa 1377 */
ddf5636d
HR
1378int bdrv_open(BlockDriverState **pbs, const char *filename,
1379 const char *reference, QDict *options, int flags,
1380 BlockDriver *drv, Error **errp)
ea2384d3 1381{
b6ce07aa 1382 int ret;
f67503e5 1383 BlockDriverState *file = NULL, *bs;
74fe54f2 1384 const char *drvname;
34b5d2c6 1385 Error *local_err = NULL;
b1e6fc08 1386 int snapshot_flags = 0;
712e7874 1387
f67503e5
HR
1388 assert(pbs);
1389
ddf5636d
HR
1390 if (reference) {
1391 bool options_non_empty = options ? qdict_size(options) : false;
1392 QDECREF(options);
1393
1394 if (*pbs) {
1395 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1396 "another block device");
1397 return -EINVAL;
1398 }
1399
1400 if (filename || options_non_empty) {
1401 error_setg(errp, "Cannot reference an existing block device with "
1402 "additional options or a new filename");
1403 return -EINVAL;
1404 }
1405
1406 bs = bdrv_lookup_bs(reference, reference, errp);
1407 if (!bs) {
1408 return -ENODEV;
1409 }
1410 bdrv_ref(bs);
1411 *pbs = bs;
1412 return 0;
1413 }
1414
f67503e5
HR
1415 if (*pbs) {
1416 bs = *pbs;
1417 } else {
98522f63 1418 bs = bdrv_new("", &error_abort);
f67503e5
HR
1419 }
1420
de9c0cec
KW
1421 /* NULL means an empty set of options */
1422 if (options == NULL) {
1423 options = qdict_new();
1424 }
1425
17b005f1 1426 ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
462f5bcf
KW
1427 if (local_err) {
1428 goto fail;
1429 }
1430
76c591b0
KW
1431 /* Find the right image format driver */
1432 drv = NULL;
1433 drvname = qdict_get_try_str(options, "driver");
1434 if (drvname) {
1435 drv = bdrv_find_format(drvname);
1436 qdict_del(options, "driver");
1437 if (!drv) {
1438 error_setg(errp, "Unknown driver: '%s'", drvname);
1439 ret = -EINVAL;
1440 goto fail;
1441 }
1442 }
1443
1444 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1445 if (drv && !drv->bdrv_file_open) {
1446 /* If the user explicitly wants a format driver here, we'll need to add
1447 * another layer for the protocol in bs->file */
1448 flags &= ~BDRV_O_PROTOCOL;
1449 }
1450
de9c0cec 1451 bs->options = options;
b6ad491a 1452 options = qdict_clone_shallow(options);
de9c0cec 1453
f500a6d3 1454 /* Open image file without format layer */
f4788adc
KW
1455 if ((flags & BDRV_O_PROTOCOL) == 0) {
1456 if (flags & BDRV_O_RDWR) {
1457 flags |= BDRV_O_ALLOW_RDWR;
1458 }
1459 if (flags & BDRV_O_SNAPSHOT) {
1460 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1461 flags = bdrv_backing_flags(flags);
1462 }
f500a6d3 1463
f4788adc
KW
1464 assert(file == NULL);
1465 ret = bdrv_open_image(&file, filename, options, "file",
1466 bdrv_inherited_flags(flags),
1467 true, &local_err);
1468 if (ret < 0) {
1469 goto fail;
1470 }
f500a6d3
KW
1471 }
1472
76c591b0
KW
1473 /* Image format probing */
1474 if (!drv && file) {
17b005f1
KW
1475 ret = find_image_format(file, filename, &drv, &local_err);
1476 if (ret < 0) {
8bfea15d 1477 goto fail;
2a05cbe4 1478 }
76c591b0 1479 } else if (!drv) {
17b005f1
KW
1480 error_setg(errp, "Must specify either driver or file");
1481 ret = -EINVAL;
8bfea15d 1482 goto fail;
ea2384d3 1483 }
b6ce07aa
KW
1484
1485 /* Open the image */
34b5d2c6 1486 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
b6ce07aa 1487 if (ret < 0) {
8bfea15d 1488 goto fail;
6987307c
CH
1489 }
1490
2a05cbe4 1491 if (file && (bs->file != file)) {
4f6fd349 1492 bdrv_unref(file);
f500a6d3
KW
1493 file = NULL;
1494 }
1495
b6ce07aa 1496 /* If there is a backing file, use it */
9156df12 1497 if ((flags & BDRV_O_NO_BACKING) == 0) {
31ca6d07
KW
1498 QDict *backing_options;
1499
5726d872 1500 qdict_extract_subqdict(options, &backing_options, "backing.");
34b5d2c6 1501 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
b6ce07aa 1502 if (ret < 0) {
b6ad491a 1503 goto close_and_fail;
b6ce07aa 1504 }
b6ce07aa
KW
1505 }
1506
b998875d
KW
1507 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1508 * temporary snapshot afterwards. */
b1e6fc08 1509 if (snapshot_flags) {
6b8aeca5 1510 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
b998875d 1511 if (local_err) {
b998875d
KW
1512 goto close_and_fail;
1513 }
1514 }
1515
b6ad491a 1516 /* Check if any unknown options were used */
5acd9d81 1517 if (options && (qdict_size(options) != 0)) {
b6ad491a 1518 const QDictEntry *entry = qdict_first(options);
5acd9d81
HR
1519 if (flags & BDRV_O_PROTOCOL) {
1520 error_setg(errp, "Block protocol '%s' doesn't support the option "
1521 "'%s'", drv->format_name, entry->key);
1522 } else {
1523 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1524 "support the option '%s'", drv->format_name,
1525 bs->device_name, entry->key);
1526 }
b6ad491a
KW
1527
1528 ret = -EINVAL;
1529 goto close_and_fail;
1530 }
b6ad491a 1531
b6ce07aa 1532 if (!bdrv_key_required(bs)) {
7d4b4ba5 1533 bdrv_dev_change_media_cb(bs, true);
c3adb58f
MA
1534 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1535 && !runstate_check(RUN_STATE_INMIGRATE)
1536 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1537 error_setg(errp,
1538 "Guest must be stopped for opening of encrypted image");
1539 ret = -EBUSY;
1540 goto close_and_fail;
b6ce07aa
KW
1541 }
1542
c3adb58f 1543 QDECREF(options);
f67503e5 1544 *pbs = bs;
b6ce07aa
KW
1545 return 0;
1546
8bfea15d 1547fail:
f500a6d3 1548 if (file != NULL) {
4f6fd349 1549 bdrv_unref(file);
f500a6d3 1550 }
de9c0cec 1551 QDECREF(bs->options);
b6ad491a 1552 QDECREF(options);
de9c0cec 1553 bs->options = NULL;
f67503e5
HR
1554 if (!*pbs) {
1555 /* If *pbs is NULL, a new BDS has been created in this function and
1556 needs to be freed now. Otherwise, it does not need to be closed,
1557 since it has not really been opened yet. */
1558 bdrv_unref(bs);
1559 }
84d18f06 1560 if (local_err) {
34b5d2c6
HR
1561 error_propagate(errp, local_err);
1562 }
b6ad491a 1563 return ret;
de9c0cec 1564
b6ad491a 1565close_and_fail:
f67503e5
HR
1566 /* See fail path, but now the BDS has to be always closed */
1567 if (*pbs) {
1568 bdrv_close(bs);
1569 } else {
1570 bdrv_unref(bs);
1571 }
b6ad491a 1572 QDECREF(options);
84d18f06 1573 if (local_err) {
34b5d2c6
HR
1574 error_propagate(errp, local_err);
1575 }
b6ce07aa
KW
1576 return ret;
1577}
1578
e971aa12
JC
1579typedef struct BlockReopenQueueEntry {
1580 bool prepared;
1581 BDRVReopenState state;
1582 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1583} BlockReopenQueueEntry;
1584
1585/*
1586 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1587 * reopen of multiple devices.
1588 *
1589 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1590 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1591 * be created and initialized. This newly created BlockReopenQueue should be
1592 * passed back in for subsequent calls that are intended to be of the same
1593 * atomic 'set'.
1594 *
1595 * bs is the BlockDriverState to add to the reopen queue.
1596 *
1597 * flags contains the open flags for the associated bs
1598 *
1599 * returns a pointer to bs_queue, which is either the newly allocated
1600 * bs_queue, or the existing bs_queue being used.
1601 *
1602 */
1603BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1604 BlockDriverState *bs, int flags)
1605{
1606 assert(bs != NULL);
1607
1608 BlockReopenQueueEntry *bs_entry;
1609 if (bs_queue == NULL) {
1610 bs_queue = g_new0(BlockReopenQueue, 1);
1611 QSIMPLEQ_INIT(bs_queue);
1612 }
1613
f1f25a2e
KW
1614 /* bdrv_open() masks this flag out */
1615 flags &= ~BDRV_O_PROTOCOL;
1616
e971aa12 1617 if (bs->file) {
f1f25a2e 1618 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
e971aa12
JC
1619 }
1620
1621 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1622 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1623
1624 bs_entry->state.bs = bs;
1625 bs_entry->state.flags = flags;
1626
1627 return bs_queue;
1628}
1629
1630/*
1631 * Reopen multiple BlockDriverStates atomically & transactionally.
1632 *
1633 * The queue passed in (bs_queue) must have been built up previous
1634 * via bdrv_reopen_queue().
1635 *
1636 * Reopens all BDS specified in the queue, with the appropriate
1637 * flags. All devices are prepared for reopen, and failure of any
1638 * device will cause all device changes to be abandonded, and intermediate
1639 * data cleaned up.
1640 *
1641 * If all devices prepare successfully, then the changes are committed
1642 * to all devices.
1643 *
1644 */
1645int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1646{
1647 int ret = -1;
1648 BlockReopenQueueEntry *bs_entry, *next;
1649 Error *local_err = NULL;
1650
1651 assert(bs_queue != NULL);
1652
1653 bdrv_drain_all();
1654
1655 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1656 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1657 error_propagate(errp, local_err);
1658 goto cleanup;
1659 }
1660 bs_entry->prepared = true;
1661 }
1662
1663 /* If we reach this point, we have success and just need to apply the
1664 * changes
1665 */
1666 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1667 bdrv_reopen_commit(&bs_entry->state);
1668 }
1669
1670 ret = 0;
1671
1672cleanup:
1673 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1674 if (ret && bs_entry->prepared) {
1675 bdrv_reopen_abort(&bs_entry->state);
1676 }
1677 g_free(bs_entry);
1678 }
1679 g_free(bs_queue);
1680 return ret;
1681}
1682
1683
1684/* Reopen a single BlockDriverState with the specified flags. */
1685int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1686{
1687 int ret = -1;
1688 Error *local_err = NULL;
1689 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1690
1691 ret = bdrv_reopen_multiple(queue, &local_err);
1692 if (local_err != NULL) {
1693 error_propagate(errp, local_err);
1694 }
1695 return ret;
1696}
1697
1698
1699/*
1700 * Prepares a BlockDriverState for reopen. All changes are staged in the
1701 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1702 * the block driver layer .bdrv_reopen_prepare()
1703 *
1704 * bs is the BlockDriverState to reopen
1705 * flags are the new open flags
1706 * queue is the reopen queue
1707 *
1708 * Returns 0 on success, non-zero on error. On error errp will be set
1709 * as well.
1710 *
1711 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1712 * It is the responsibility of the caller to then call the abort() or
1713 * commit() for any other BDS that have been left in a prepare() state
1714 *
1715 */
1716int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1717 Error **errp)
1718{
1719 int ret = -1;
1720 Error *local_err = NULL;
1721 BlockDriver *drv;
1722
1723 assert(reopen_state != NULL);
1724 assert(reopen_state->bs->drv != NULL);
1725 drv = reopen_state->bs->drv;
1726
1727 /* if we are to stay read-only, do not allow permission change
1728 * to r/w */
1729 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1730 reopen_state->flags & BDRV_O_RDWR) {
1731 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
1732 reopen_state->bs->device_name);
1733 goto error;
1734 }
1735
1736
1737 ret = bdrv_flush(reopen_state->bs);
1738 if (ret) {
1739 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1740 strerror(-ret));
1741 goto error;
1742 }
1743
1744 if (drv->bdrv_reopen_prepare) {
1745 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1746 if (ret) {
1747 if (local_err != NULL) {
1748 error_propagate(errp, local_err);
1749 } else {
d8b6895f
LC
1750 error_setg(errp, "failed while preparing to reopen image '%s'",
1751 reopen_state->bs->filename);
e971aa12
JC
1752 }
1753 goto error;
1754 }
1755 } else {
1756 /* It is currently mandatory to have a bdrv_reopen_prepare()
1757 * handler for each supported drv. */
1758 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
1759 drv->format_name, reopen_state->bs->device_name,
1760 "reopening of file");
1761 ret = -1;
1762 goto error;
1763 }
1764
1765 ret = 0;
1766
1767error:
1768 return ret;
1769}
1770
1771/*
1772 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1773 * makes them final by swapping the staging BlockDriverState contents into
1774 * the active BlockDriverState contents.
1775 */
1776void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1777{
1778 BlockDriver *drv;
1779
1780 assert(reopen_state != NULL);
1781 drv = reopen_state->bs->drv;
1782 assert(drv != NULL);
1783
1784 /* If there are any driver level actions to take */
1785 if (drv->bdrv_reopen_commit) {
1786 drv->bdrv_reopen_commit(reopen_state);
1787 }
1788
1789 /* set BDS specific flags now */
1790 reopen_state->bs->open_flags = reopen_state->flags;
1791 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1792 BDRV_O_CACHE_WB);
1793 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
355ef4ac 1794
3baca891 1795 bdrv_refresh_limits(reopen_state->bs, NULL);
e971aa12
JC
1796}
1797
1798/*
1799 * Abort the reopen, and delete and free the staged changes in
1800 * reopen_state
1801 */
1802void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1803{
1804 BlockDriver *drv;
1805
1806 assert(reopen_state != NULL);
1807 drv = reopen_state->bs->drv;
1808 assert(drv != NULL);
1809
1810 if (drv->bdrv_reopen_abort) {
1811 drv->bdrv_reopen_abort(reopen_state);
1812 }
1813}
1814
1815
fc01f7e7
FB
1816void bdrv_close(BlockDriverState *bs)
1817{
3cbc002c
PB
1818 if (bs->job) {
1819 block_job_cancel_sync(bs->job);
1820 }
58fda173
SH
1821 bdrv_drain_all(); /* complete I/O */
1822 bdrv_flush(bs);
1823 bdrv_drain_all(); /* in case flush left pending I/O */
d7d512f6 1824 notifier_list_notify(&bs->close_notifiers, bs);
7094f12f 1825
3cbc002c 1826 if (bs->drv) {
557df6ac 1827 if (bs->backing_hd) {
826b6ca0
FZ
1828 BlockDriverState *backing_hd = bs->backing_hd;
1829 bdrv_set_backing_hd(bs, NULL);
1830 bdrv_unref(backing_hd);
557df6ac 1831 }
ea2384d3 1832 bs->drv->bdrv_close(bs);
7267c094 1833 g_free(bs->opaque);
ea2384d3
FB
1834 bs->opaque = NULL;
1835 bs->drv = NULL;
53fec9d3 1836 bs->copy_on_read = 0;
a275fa42
PB
1837 bs->backing_file[0] = '\0';
1838 bs->backing_format[0] = '\0';
6405875c
PB
1839 bs->total_sectors = 0;
1840 bs->encrypted = 0;
1841 bs->valid_key = 0;
1842 bs->sg = 0;
1843 bs->growable = 0;
0d51b4de 1844 bs->zero_beyond_eof = false;
de9c0cec
KW
1845 QDECREF(bs->options);
1846 bs->options = NULL;
b338082b 1847
66f82cee 1848 if (bs->file != NULL) {
4f6fd349 1849 bdrv_unref(bs->file);
0ac9377d 1850 bs->file = NULL;
66f82cee 1851 }
b338082b 1852 }
98f90dba 1853
9ca11154
PH
1854 bdrv_dev_change_media_cb(bs, false);
1855
98f90dba
ZYW
1856 /*throttling disk I/O limits*/
1857 if (bs->io_limits_enabled) {
1858 bdrv_io_limits_disable(bs);
1859 }
b338082b
FB
1860}
1861
2bc93fed
MK
1862void bdrv_close_all(void)
1863{
1864 BlockDriverState *bs;
1865
dc364f4c 1866 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
ed78cda3
SH
1867 AioContext *aio_context = bdrv_get_aio_context(bs);
1868
1869 aio_context_acquire(aio_context);
2bc93fed 1870 bdrv_close(bs);
ed78cda3 1871 aio_context_release(aio_context);
2bc93fed
MK
1872 }
1873}
1874
88266f5a
SH
1875/* Check if any requests are in-flight (including throttled requests) */
1876static bool bdrv_requests_pending(BlockDriverState *bs)
1877{
1878 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1879 return true;
1880 }
cc0681c4
BC
1881 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1882 return true;
1883 }
1884 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
88266f5a
SH
1885 return true;
1886 }
1887 if (bs->file && bdrv_requests_pending(bs->file)) {
1888 return true;
1889 }
1890 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1891 return true;
1892 }
1893 return false;
1894}
1895
922453bc
SH
1896/*
1897 * Wait for pending requests to complete across all BlockDriverStates
1898 *
1899 * This function does not flush data to disk, use bdrv_flush_all() for that
1900 * after calling this function.
4c355d53
ZYW
1901 *
1902 * Note that completion of an asynchronous I/O operation can trigger any
1903 * number of other I/O operations on other devices---for example a coroutine
1904 * can be arbitrarily complex and a constant flow of I/O can come until the
1905 * coroutine is complete. Because of this, it is not possible to have a
1906 * function to drain a single device's I/O queue.
922453bc
SH
1907 */
1908void bdrv_drain_all(void)
1909{
88266f5a
SH
1910 /* Always run first iteration so any pending completion BHs run */
1911 bool busy = true;
922453bc
SH
1912 BlockDriverState *bs;
1913
88266f5a 1914 while (busy) {
9b536adc
SH
1915 busy = false;
1916
dc364f4c 1917 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
9b536adc
SH
1918 AioContext *aio_context = bdrv_get_aio_context(bs);
1919 bool bs_busy;
1920
1921 aio_context_acquire(aio_context);
448ad91d 1922 bdrv_flush_io_queue(bs);
0b06ef3b 1923 bdrv_start_throttled_reqs(bs);
9b536adc
SH
1924 bs_busy = bdrv_requests_pending(bs);
1925 bs_busy |= aio_poll(aio_context, bs_busy);
1926 aio_context_release(aio_context);
922453bc 1927
9b536adc
SH
1928 busy |= bs_busy;
1929 }
922453bc
SH
1930 }
1931}
1932
dc364f4c
BC
1933/* make a BlockDriverState anonymous by removing from bdrv_state and
1934 * graph_bdrv_state list.
d22b2f41
RH
1935 Also, NULL terminate the device_name to prevent double remove */
1936void bdrv_make_anon(BlockDriverState *bs)
1937{
1938 if (bs->device_name[0] != '\0') {
dc364f4c 1939 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
d22b2f41
RH
1940 }
1941 bs->device_name[0] = '\0';
dc364f4c
BC
1942 if (bs->node_name[0] != '\0') {
1943 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1944 }
1945 bs->node_name[0] = '\0';
d22b2f41
RH
1946}
1947
e023b2e2
PB
1948static void bdrv_rebind(BlockDriverState *bs)
1949{
1950 if (bs->drv && bs->drv->bdrv_rebind) {
1951 bs->drv->bdrv_rebind(bs);
1952 }
1953}
1954
4ddc07ca
PB
1955static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1956 BlockDriverState *bs_src)
8802d1fd 1957{
4ddc07ca 1958 /* move some fields that need to stay attached to the device */
8802d1fd
JC
1959
1960 /* dev info */
4ddc07ca
PB
1961 bs_dest->dev_ops = bs_src->dev_ops;
1962 bs_dest->dev_opaque = bs_src->dev_opaque;
1963 bs_dest->dev = bs_src->dev;
1b7fd729 1964 bs_dest->guest_block_size = bs_src->guest_block_size;
4ddc07ca 1965 bs_dest->copy_on_read = bs_src->copy_on_read;
8802d1fd 1966
4ddc07ca 1967 bs_dest->enable_write_cache = bs_src->enable_write_cache;
c4a248a1 1968
cc0681c4
BC
1969 /* i/o throttled req */
1970 memcpy(&bs_dest->throttle_state,
1971 &bs_src->throttle_state,
1972 sizeof(ThrottleState));
1973 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1974 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
4ddc07ca 1975 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
8802d1fd 1976
8802d1fd 1977 /* r/w error */
4ddc07ca
PB
1978 bs_dest->on_read_error = bs_src->on_read_error;
1979 bs_dest->on_write_error = bs_src->on_write_error;
8802d1fd
JC
1980
1981 /* i/o status */
4ddc07ca
PB
1982 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
1983 bs_dest->iostatus = bs_src->iostatus;
8802d1fd 1984
a9fc4408 1985 /* dirty bitmap */
e4654d2d 1986 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
a9fc4408 1987
9fcb0251
FZ
1988 /* reference count */
1989 bs_dest->refcnt = bs_src->refcnt;
1990
a9fc4408 1991 /* job */
4ddc07ca 1992 bs_dest->job = bs_src->job;
a9fc4408 1993
8802d1fd 1994 /* keep the same entry in bdrv_states */
4ddc07ca
PB
1995 pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
1996 bs_src->device_name);
dc364f4c 1997 bs_dest->device_list = bs_src->device_list;
fbe40ff7
FZ
1998 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
1999 sizeof(bs_dest->op_blockers));
4ddc07ca 2000}
8802d1fd 2001
4ddc07ca
PB
2002/*
2003 * Swap bs contents for two image chains while they are live,
2004 * while keeping required fields on the BlockDriverState that is
2005 * actually attached to a device.
2006 *
2007 * This will modify the BlockDriverState fields, and swap contents
2008 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2009 *
2010 * bs_new is required to be anonymous.
2011 *
2012 * This function does not create any image files.
2013 */
2014void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2015{
2016 BlockDriverState tmp;
f6801b83 2017
90ce8a06
BC
2018 /* The code needs to swap the node_name but simply swapping node_list won't
2019 * work so first remove the nodes from the graph list, do the swap then
2020 * insert them back if needed.
2021 */
2022 if (bs_new->node_name[0] != '\0') {
2023 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2024 }
2025 if (bs_old->node_name[0] != '\0') {
2026 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2027 }
2028
4ddc07ca
PB
2029 /* bs_new must be anonymous and shouldn't have anything fancy enabled */
2030 assert(bs_new->device_name[0] == '\0');
e4654d2d 2031 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
4ddc07ca
PB
2032 assert(bs_new->job == NULL);
2033 assert(bs_new->dev == NULL);
4ddc07ca 2034 assert(bs_new->io_limits_enabled == false);
cc0681c4 2035 assert(!throttle_have_timer(&bs_new->throttle_state));
8802d1fd 2036
4ddc07ca
PB
2037 tmp = *bs_new;
2038 *bs_new = *bs_old;
2039 *bs_old = tmp;
a9fc4408 2040
4ddc07ca
PB
2041 /* there are some fields that should not be swapped, move them back */
2042 bdrv_move_feature_fields(&tmp, bs_old);
2043 bdrv_move_feature_fields(bs_old, bs_new);
2044 bdrv_move_feature_fields(bs_new, &tmp);
8802d1fd 2045
4ddc07ca
PB
2046 /* bs_new shouldn't be in bdrv_states even after the swap! */
2047 assert(bs_new->device_name[0] == '\0');
2048
2049 /* Check a few fields that should remain attached to the device */
2050 assert(bs_new->dev == NULL);
2051 assert(bs_new->job == NULL);
4ddc07ca 2052 assert(bs_new->io_limits_enabled == false);
cc0681c4 2053 assert(!throttle_have_timer(&bs_new->throttle_state));
e023b2e2 2054
90ce8a06
BC
2055 /* insert the nodes back into the graph node list if needed */
2056 if (bs_new->node_name[0] != '\0') {
2057 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2058 }
2059 if (bs_old->node_name[0] != '\0') {
2060 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2061 }
2062
e023b2e2 2063 bdrv_rebind(bs_new);
4ddc07ca
PB
2064 bdrv_rebind(bs_old);
2065}
2066
2067/*
2068 * Add new bs contents at the top of an image chain while the chain is
2069 * live, while keeping required fields on the top layer.
2070 *
2071 * This will modify the BlockDriverState fields, and swap contents
2072 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2073 *
2074 * bs_new is required to be anonymous.
2075 *
2076 * This function does not create any image files.
2077 */
2078void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2079{
2080 bdrv_swap(bs_new, bs_top);
2081
2082 /* The contents of 'tmp' will become bs_top, as we are
2083 * swapping bs_new and bs_top contents. */
8d24cce1 2084 bdrv_set_backing_hd(bs_top, bs_new);
8802d1fd
JC
2085}
2086
4f6fd349 2087static void bdrv_delete(BlockDriverState *bs)
b338082b 2088{
fa879d62 2089 assert(!bs->dev);
3e914655 2090 assert(!bs->job);
3718d8ab 2091 assert(bdrv_op_blocker_is_empty(bs));
4f6fd349 2092 assert(!bs->refcnt);
e4654d2d 2093 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
18846dee 2094
e1b5c52e
SH
2095 bdrv_close(bs);
2096
1b7bdbc1 2097 /* remove from list, if necessary */
d22b2f41 2098 bdrv_make_anon(bs);
34c6f050 2099
7267c094 2100 g_free(bs);
fc01f7e7
FB
2101}
2102
fa879d62
MA
2103int bdrv_attach_dev(BlockDriverState *bs, void *dev)
2104/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 2105{
fa879d62 2106 if (bs->dev) {
18846dee
MA
2107 return -EBUSY;
2108 }
fa879d62 2109 bs->dev = dev;
28a7282a 2110 bdrv_iostatus_reset(bs);
18846dee
MA
2111 return 0;
2112}
2113
fa879d62
MA
2114/* TODO qdevified devices don't use this, remove when devices are qdevified */
2115void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 2116{
fa879d62
MA
2117 if (bdrv_attach_dev(bs, dev) < 0) {
2118 abort();
2119 }
2120}
2121
2122void bdrv_detach_dev(BlockDriverState *bs, void *dev)
2123/* TODO change to DeviceState *dev when all users are qdevified */
2124{
2125 assert(bs->dev == dev);
2126 bs->dev = NULL;
0e49de52
MA
2127 bs->dev_ops = NULL;
2128 bs->dev_opaque = NULL;
1b7fd729 2129 bs->guest_block_size = 512;
18846dee
MA
2130}
2131
fa879d62
MA
2132/* TODO change to return DeviceState * when all users are qdevified */
2133void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 2134{
fa879d62 2135 return bs->dev;
18846dee
MA
2136}
2137
0e49de52
MA
2138void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
2139 void *opaque)
2140{
2141 bs->dev_ops = ops;
2142 bs->dev_opaque = opaque;
2143}
2144
7d4b4ba5 2145static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 2146{
145feb17 2147 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
6f382ed2 2148 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
7d4b4ba5 2149 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
6f382ed2
LC
2150 if (tray_was_closed) {
2151 /* tray open */
a5ee7bd4
WX
2152 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
2153 true, &error_abort);
6f382ed2
LC
2154 }
2155 if (load) {
2156 /* tray close */
a5ee7bd4
WX
2157 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
2158 false, &error_abort);
6f382ed2 2159 }
145feb17
MA
2160 }
2161}
2162
2c6942fa
MA
2163bool bdrv_dev_has_removable_media(BlockDriverState *bs)
2164{
2165 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
2166}
2167
025ccaa7
PB
2168void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
2169{
2170 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
2171 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
2172 }
2173}
2174
e4def80b
MA
2175bool bdrv_dev_is_tray_open(BlockDriverState *bs)
2176{
2177 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
2178 return bs->dev_ops->is_tray_open(bs->dev_opaque);
2179 }
2180 return false;
2181}
2182
145feb17
MA
2183static void bdrv_dev_resize_cb(BlockDriverState *bs)
2184{
2185 if (bs->dev_ops && bs->dev_ops->resize_cb) {
2186 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
2187 }
2188}
2189
f107639a
MA
2190bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
2191{
2192 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
2193 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
2194 }
2195 return false;
2196}
2197
e97fc193
AL
2198/*
2199 * Run consistency checks on an image
2200 *
e076f338 2201 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 2202 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 2203 * check are stored in res.
e97fc193 2204 */
4534ff54 2205int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
e97fc193
AL
2206{
2207 if (bs->drv->bdrv_check == NULL) {
2208 return -ENOTSUP;
2209 }
2210
e076f338 2211 memset(res, 0, sizeof(*res));
4534ff54 2212 return bs->drv->bdrv_check(bs, res, fix);
e97fc193
AL
2213}
2214
8a426614
KW
2215#define COMMIT_BUF_SECTORS 2048
2216
33e3963e
FB
2217/* commit COW file into the raw image */
2218int bdrv_commit(BlockDriverState *bs)
2219{
19cb3738 2220 BlockDriver *drv = bs->drv;
72706ea4 2221 int64_t sector, total_sectors, length, backing_length;
8a426614 2222 int n, ro, open_flags;
0bce597d 2223 int ret = 0;
72706ea4 2224 uint8_t *buf = NULL;
c2cba3d9 2225 char filename[PATH_MAX];
33e3963e 2226
19cb3738
FB
2227 if (!drv)
2228 return -ENOMEDIUM;
4dca4b63
NS
2229
2230 if (!bs->backing_hd) {
2231 return -ENOTSUP;
33e3963e
FB
2232 }
2233
3718d8ab
FZ
2234 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) ||
2235 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) {
2d3735d3
SH
2236 return -EBUSY;
2237 }
2238
4dca4b63 2239 ro = bs->backing_hd->read_only;
c2cba3d9
JM
2240 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2241 pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
4dca4b63
NS
2242 open_flags = bs->backing_hd->open_flags;
2243
2244 if (ro) {
0bce597d
JC
2245 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2246 return -EACCES;
4dca4b63 2247 }
ea2384d3 2248 }
33e3963e 2249
72706ea4
JC
2250 length = bdrv_getlength(bs);
2251 if (length < 0) {
2252 ret = length;
2253 goto ro_cleanup;
2254 }
2255
2256 backing_length = bdrv_getlength(bs->backing_hd);
2257 if (backing_length < 0) {
2258 ret = backing_length;
2259 goto ro_cleanup;
2260 }
2261
2262 /* If our top snapshot is larger than the backing file image,
2263 * grow the backing file image if possible. If not possible,
2264 * we must return an error */
2265 if (length > backing_length) {
2266 ret = bdrv_truncate(bs->backing_hd, length);
2267 if (ret < 0) {
2268 goto ro_cleanup;
2269 }
2270 }
2271
2272 total_sectors = length >> BDRV_SECTOR_BITS;
7267c094 2273 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
2274
2275 for (sector = 0; sector < total_sectors; sector += n) {
d663640c
PB
2276 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2277 if (ret < 0) {
2278 goto ro_cleanup;
2279 }
2280 if (ret) {
dabfa6cc
KW
2281 ret = bdrv_read(bs, sector, buf, n);
2282 if (ret < 0) {
8a426614
KW
2283 goto ro_cleanup;
2284 }
2285
dabfa6cc
KW
2286 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2287 if (ret < 0) {
8a426614
KW
2288 goto ro_cleanup;
2289 }
ea2384d3 2290 }
33e3963e 2291 }
95389c86 2292
1d44952f
CH
2293 if (drv->bdrv_make_empty) {
2294 ret = drv->bdrv_make_empty(bs);
dabfa6cc
KW
2295 if (ret < 0) {
2296 goto ro_cleanup;
2297 }
1d44952f
CH
2298 bdrv_flush(bs);
2299 }
95389c86 2300
3f5075ae
CH
2301 /*
2302 * Make sure all data we wrote to the backing device is actually
2303 * stable on disk.
2304 */
dabfa6cc 2305 if (bs->backing_hd) {
3f5075ae 2306 bdrv_flush(bs->backing_hd);
dabfa6cc 2307 }
4dca4b63 2308
dabfa6cc 2309 ret = 0;
4dca4b63 2310ro_cleanup:
7267c094 2311 g_free(buf);
4dca4b63
NS
2312
2313 if (ro) {
0bce597d
JC
2314 /* ignoring error return here */
2315 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
4dca4b63
NS
2316 }
2317
1d44952f 2318 return ret;
33e3963e
FB
2319}
2320
e8877497 2321int bdrv_commit_all(void)
6ab4b5ab
MA
2322{
2323 BlockDriverState *bs;
2324
dc364f4c 2325 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
ed78cda3
SH
2326 AioContext *aio_context = bdrv_get_aio_context(bs);
2327
2328 aio_context_acquire(aio_context);
272d2d8e
JC
2329 if (bs->drv && bs->backing_hd) {
2330 int ret = bdrv_commit(bs);
2331 if (ret < 0) {
ed78cda3 2332 aio_context_release(aio_context);
272d2d8e
JC
2333 return ret;
2334 }
e8877497 2335 }
ed78cda3 2336 aio_context_release(aio_context);
6ab4b5ab 2337 }
e8877497 2338 return 0;
6ab4b5ab
MA
2339}
2340
dbffbdcf
SH
2341/**
2342 * Remove an active request from the tracked requests list
2343 *
2344 * This function should be called when a tracked request is completing.
2345 */
2346static void tracked_request_end(BdrvTrackedRequest *req)
2347{
2dbafdc0
KW
2348 if (req->serialising) {
2349 req->bs->serialising_in_flight--;
2350 }
2351
dbffbdcf 2352 QLIST_REMOVE(req, list);
f4658285 2353 qemu_co_queue_restart_all(&req->wait_queue);
dbffbdcf
SH
2354}
2355
2356/**
2357 * Add an active request to the tracked requests list
2358 */
2359static void tracked_request_begin(BdrvTrackedRequest *req,
2360 BlockDriverState *bs,
793ed47a
KW
2361 int64_t offset,
2362 unsigned int bytes, bool is_write)
dbffbdcf
SH
2363{
2364 *req = (BdrvTrackedRequest){
2365 .bs = bs,
2dbafdc0
KW
2366 .offset = offset,
2367 .bytes = bytes,
2368 .is_write = is_write,
2369 .co = qemu_coroutine_self(),
2370 .serialising = false,
7327145f
KW
2371 .overlap_offset = offset,
2372 .overlap_bytes = bytes,
dbffbdcf
SH
2373 };
2374
f4658285
SH
2375 qemu_co_queue_init(&req->wait_queue);
2376
dbffbdcf
SH
2377 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2378}
2379
e96126ff 2380static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
2dbafdc0 2381{
7327145f 2382 int64_t overlap_offset = req->offset & ~(align - 1);
e96126ff
KW
2383 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2384 - overlap_offset;
7327145f 2385
2dbafdc0
KW
2386 if (!req->serialising) {
2387 req->bs->serialising_in_flight++;
2388 req->serialising = true;
2389 }
7327145f
KW
2390
2391 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2392 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
2dbafdc0
KW
2393}
2394
d83947ac
SH
2395/**
2396 * Round a region to cluster boundaries
2397 */
343bded4
PB
2398void bdrv_round_to_clusters(BlockDriverState *bs,
2399 int64_t sector_num, int nb_sectors,
2400 int64_t *cluster_sector_num,
2401 int *cluster_nb_sectors)
d83947ac
SH
2402{
2403 BlockDriverInfo bdi;
2404
2405 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2406 *cluster_sector_num = sector_num;
2407 *cluster_nb_sectors = nb_sectors;
2408 } else {
2409 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2410 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2411 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2412 nb_sectors, c);
2413 }
2414}
2415
7327145f 2416static int bdrv_get_cluster_size(BlockDriverState *bs)
793ed47a
KW
2417{
2418 BlockDriverInfo bdi;
7327145f 2419 int ret;
793ed47a 2420
7327145f
KW
2421 ret = bdrv_get_info(bs, &bdi);
2422 if (ret < 0 || bdi.cluster_size == 0) {
2423 return bs->request_alignment;
793ed47a 2424 } else {
7327145f 2425 return bdi.cluster_size;
793ed47a
KW
2426 }
2427}
2428
f4658285 2429static bool tracked_request_overlaps(BdrvTrackedRequest *req,
793ed47a
KW
2430 int64_t offset, unsigned int bytes)
2431{
d83947ac 2432 /* aaaa bbbb */
7327145f 2433 if (offset >= req->overlap_offset + req->overlap_bytes) {
d83947ac
SH
2434 return false;
2435 }
2436 /* bbbb aaaa */
7327145f 2437 if (req->overlap_offset >= offset + bytes) {
d83947ac
SH
2438 return false;
2439 }
2440 return true;
f4658285
SH
2441}
2442
28de2dcd 2443static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
f4658285 2444{
2dbafdc0 2445 BlockDriverState *bs = self->bs;
f4658285
SH
2446 BdrvTrackedRequest *req;
2447 bool retry;
28de2dcd 2448 bool waited = false;
f4658285 2449
2dbafdc0 2450 if (!bs->serialising_in_flight) {
28de2dcd 2451 return false;
2dbafdc0
KW
2452 }
2453
f4658285
SH
2454 do {
2455 retry = false;
2456 QLIST_FOREACH(req, &bs->tracked_requests, list) {
2dbafdc0 2457 if (req == self || (!req->serialising && !self->serialising)) {
65afd211
KW
2458 continue;
2459 }
7327145f
KW
2460 if (tracked_request_overlaps(req, self->overlap_offset,
2461 self->overlap_bytes))
2462 {
5f8b6491
SH
2463 /* Hitting this means there was a reentrant request, for
2464 * example, a block driver issuing nested requests. This must
2465 * never happen since it means deadlock.
2466 */
2467 assert(qemu_coroutine_self() != req->co);
2468
6460440f
KW
2469 /* If the request is already (indirectly) waiting for us, or
2470 * will wait for us as soon as it wakes up, then just go on
2471 * (instead of producing a deadlock in the former case). */
2472 if (!req->waiting_for) {
2473 self->waiting_for = req;
2474 qemu_co_queue_wait(&req->wait_queue);
2475 self->waiting_for = NULL;
2476 retry = true;
28de2dcd 2477 waited = true;
6460440f
KW
2478 break;
2479 }
f4658285
SH
2480 }
2481 }
2482 } while (retry);
28de2dcd
KW
2483
2484 return waited;
f4658285
SH
2485}
2486
756e6736
KW
2487/*
2488 * Return values:
2489 * 0 - success
2490 * -EINVAL - backing format specified, but no file
2491 * -ENOSPC - can't update the backing file because no space is left in the
2492 * image file header
2493 * -ENOTSUP - format driver doesn't support changing the backing file
2494 */
2495int bdrv_change_backing_file(BlockDriverState *bs,
2496 const char *backing_file, const char *backing_fmt)
2497{
2498 BlockDriver *drv = bs->drv;
469ef350 2499 int ret;
756e6736 2500
5f377794
PB
2501 /* Backing file format doesn't make sense without a backing file */
2502 if (backing_fmt && !backing_file) {
2503 return -EINVAL;
2504 }
2505
756e6736 2506 if (drv->bdrv_change_backing_file != NULL) {
469ef350 2507 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
756e6736 2508 } else {
469ef350 2509 ret = -ENOTSUP;
756e6736 2510 }
469ef350
PB
2511
2512 if (ret == 0) {
2513 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2514 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2515 }
2516 return ret;
756e6736
KW
2517}
2518
6ebdcee2
JC
2519/*
2520 * Finds the image layer in the chain that has 'bs' as its backing file.
2521 *
2522 * active is the current topmost image.
2523 *
2524 * Returns NULL if bs is not found in active's image chain,
2525 * or if active == bs.
4caf0fcd
JC
2526 *
2527 * Returns the bottommost base image if bs == NULL.
6ebdcee2
JC
2528 */
2529BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2530 BlockDriverState *bs)
2531{
4caf0fcd
JC
2532 while (active && bs != active->backing_hd) {
2533 active = active->backing_hd;
6ebdcee2
JC
2534 }
2535
4caf0fcd
JC
2536 return active;
2537}
6ebdcee2 2538
4caf0fcd
JC
2539/* Given a BDS, searches for the base layer. */
2540BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2541{
2542 return bdrv_find_overlay(bs, NULL);
6ebdcee2
JC
2543}
2544
2545typedef struct BlkIntermediateStates {
2546 BlockDriverState *bs;
2547 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2548} BlkIntermediateStates;
2549
2550
2551/*
2552 * Drops images above 'base' up to and including 'top', and sets the image
2553 * above 'top' to have base as its backing file.
2554 *
2555 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2556 * information in 'bs' can be properly updated.
2557 *
2558 * E.g., this will convert the following chain:
2559 * bottom <- base <- intermediate <- top <- active
2560 *
2561 * to
2562 *
2563 * bottom <- base <- active
2564 *
2565 * It is allowed for bottom==base, in which case it converts:
2566 *
2567 * base <- intermediate <- top <- active
2568 *
2569 * to
2570 *
2571 * base <- active
2572 *
54e26900
JC
2573 * If backing_file_str is non-NULL, it will be used when modifying top's
2574 * overlay image metadata.
2575 *
6ebdcee2
JC
2576 * Error conditions:
2577 * if active == top, that is considered an error
2578 *
2579 */
2580int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
54e26900 2581 BlockDriverState *base, const char *backing_file_str)
6ebdcee2
JC
2582{
2583 BlockDriverState *intermediate;
2584 BlockDriverState *base_bs = NULL;
2585 BlockDriverState *new_top_bs = NULL;
2586 BlkIntermediateStates *intermediate_state, *next;
2587 int ret = -EIO;
2588
2589 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2590 QSIMPLEQ_INIT(&states_to_delete);
2591
2592 if (!top->drv || !base->drv) {
2593 goto exit;
2594 }
2595
2596 new_top_bs = bdrv_find_overlay(active, top);
2597
2598 if (new_top_bs == NULL) {
2599 /* we could not find the image above 'top', this is an error */
2600 goto exit;
2601 }
2602
2603 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2604 * to do, no intermediate images */
2605 if (new_top_bs->backing_hd == base) {
2606 ret = 0;
2607 goto exit;
2608 }
2609
2610 intermediate = top;
2611
2612 /* now we will go down through the list, and add each BDS we find
2613 * into our deletion queue, until we hit the 'base'
2614 */
2615 while (intermediate) {
2616 intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
2617 intermediate_state->bs = intermediate;
2618 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2619
2620 if (intermediate->backing_hd == base) {
2621 base_bs = intermediate->backing_hd;
2622 break;
2623 }
2624 intermediate = intermediate->backing_hd;
2625 }
2626 if (base_bs == NULL) {
2627 /* something went wrong, we did not end at the base. safely
2628 * unravel everything, and exit with error */
2629 goto exit;
2630 }
2631
2632 /* success - we can delete the intermediate states, and link top->base */
54e26900
JC
2633 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2634 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
6ebdcee2
JC
2635 base_bs->drv ? base_bs->drv->format_name : "");
2636 if (ret) {
2637 goto exit;
2638 }
920beae1 2639 bdrv_set_backing_hd(new_top_bs, base_bs);
6ebdcee2
JC
2640
2641 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2642 /* so that bdrv_close() does not recursively close the chain */
920beae1 2643 bdrv_set_backing_hd(intermediate_state->bs, NULL);
4f6fd349 2644 bdrv_unref(intermediate_state->bs);
6ebdcee2
JC
2645 }
2646 ret = 0;
2647
2648exit:
2649 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2650 g_free(intermediate_state);
2651 }
2652 return ret;
2653}
2654
2655
71d0770c
AL
2656static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2657 size_t size)
2658{
2659 int64_t len;
2660
1dd3a447
KW
2661 if (size > INT_MAX) {
2662 return -EIO;
2663 }
2664
71d0770c
AL
2665 if (!bdrv_is_inserted(bs))
2666 return -ENOMEDIUM;
2667
2668 if (bs->growable)
2669 return 0;
2670
2671 len = bdrv_getlength(bs);
2672
fbb7b4e0
KW
2673 if (offset < 0)
2674 return -EIO;
2675
2676 if ((offset > len) || (len - offset < size))
71d0770c
AL
2677 return -EIO;
2678
2679 return 0;
2680}
2681
2682static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2683 int nb_sectors)
2684{
54db38a4 2685 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
8f4754ed
KW
2686 return -EIO;
2687 }
2688
eb5a3165
JS
2689 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2690 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
2691}
2692
1c9805a3
SH
2693typedef struct RwCo {
2694 BlockDriverState *bs;
775aa8b6 2695 int64_t offset;
1c9805a3
SH
2696 QEMUIOVector *qiov;
2697 bool is_write;
2698 int ret;
4105eaaa 2699 BdrvRequestFlags flags;
1c9805a3
SH
2700} RwCo;
2701
2702static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 2703{
1c9805a3 2704 RwCo *rwco = opaque;
ea2384d3 2705
1c9805a3 2706 if (!rwco->is_write) {
775aa8b6
KW
2707 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2708 rwco->qiov->size, rwco->qiov,
4105eaaa 2709 rwco->flags);
775aa8b6
KW
2710 } else {
2711 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2712 rwco->qiov->size, rwco->qiov,
2713 rwco->flags);
1c9805a3
SH
2714 }
2715}
e7a8a783 2716
1c9805a3 2717/*
8d3b1a2d 2718 * Process a vectored synchronous request using coroutines
1c9805a3 2719 */
775aa8b6
KW
2720static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2721 QEMUIOVector *qiov, bool is_write,
2722 BdrvRequestFlags flags)
1c9805a3 2723{
1c9805a3
SH
2724 Coroutine *co;
2725 RwCo rwco = {
2726 .bs = bs,
775aa8b6 2727 .offset = offset,
8d3b1a2d 2728 .qiov = qiov,
1c9805a3
SH
2729 .is_write = is_write,
2730 .ret = NOT_DONE,
4105eaaa 2731 .flags = flags,
1c9805a3 2732 };
e7a8a783 2733
498e386c
ZYW
2734 /**
2735 * In sync call context, when the vcpu is blocked, this throttling timer
2736 * will not fire; so the I/O throttling function has to be disabled here
2737 * if it has been enabled.
2738 */
2739 if (bs->io_limits_enabled) {
2740 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2741 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2742 bdrv_io_limits_disable(bs);
2743 }
2744
1c9805a3
SH
2745 if (qemu_in_coroutine()) {
2746 /* Fast-path if already in coroutine context */
2747 bdrv_rw_co_entry(&rwco);
2748 } else {
2572b37a
SH
2749 AioContext *aio_context = bdrv_get_aio_context(bs);
2750
1c9805a3
SH
2751 co = qemu_coroutine_create(bdrv_rw_co_entry);
2752 qemu_coroutine_enter(co, &rwco);
2753 while (rwco.ret == NOT_DONE) {
2572b37a 2754 aio_poll(aio_context, true);
1c9805a3
SH
2755 }
2756 }
2757 return rwco.ret;
2758}
b338082b 2759
8d3b1a2d
KW
2760/*
2761 * Process a synchronous request using coroutines
2762 */
2763static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
4105eaaa 2764 int nb_sectors, bool is_write, BdrvRequestFlags flags)
8d3b1a2d
KW
2765{
2766 QEMUIOVector qiov;
2767 struct iovec iov = {
2768 .iov_base = (void *)buf,
2769 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2770 };
2771
da15ee51
KW
2772 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
2773 return -EINVAL;
2774 }
2775
8d3b1a2d 2776 qemu_iovec_init_external(&qiov, &iov, 1);
775aa8b6
KW
2777 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2778 &qiov, is_write, flags);
8d3b1a2d
KW
2779}
2780
1c9805a3
SH
2781/* return < 0 if error. See bdrv_write() for the return codes */
2782int bdrv_read(BlockDriverState *bs, int64_t sector_num,
2783 uint8_t *buf, int nb_sectors)
2784{
4105eaaa 2785 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
fc01f7e7
FB
2786}
2787
07d27a44
MA
2788/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2789int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2790 uint8_t *buf, int nb_sectors)
2791{
2792 bool enabled;
2793 int ret;
2794
2795 enabled = bs->io_limits_enabled;
2796 bs->io_limits_enabled = false;
4e7395e8 2797 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
07d27a44
MA
2798 bs->io_limits_enabled = enabled;
2799 return ret;
2800}
2801
5fafdf24 2802/* Return < 0 if error. Important errors are:
19cb3738
FB
2803 -EIO generic I/O error (may happen for all errors)
2804 -ENOMEDIUM No media inserted.
2805 -EINVAL Invalid sector number or nb_sectors
2806 -EACCES Trying to write a read-only device
2807*/
5fafdf24 2808int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
2809 const uint8_t *buf, int nb_sectors)
2810{
4105eaaa 2811 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
83f64091
FB
2812}
2813
aa7bfbff
PL
2814int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2815 int nb_sectors, BdrvRequestFlags flags)
4105eaaa
PL
2816{
2817 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
aa7bfbff 2818 BDRV_REQ_ZERO_WRITE | flags);
8d3b1a2d
KW
2819}
2820
d75cbb5e
PL
2821/*
2822 * Completely zero out a block device with the help of bdrv_write_zeroes.
2823 * The operation is sped up by checking the block status and only writing
2824 * zeroes to the device if they currently do not return zeroes. Optional
2825 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2826 *
2827 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2828 */
2829int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2830{
d32f7c10 2831 int64_t target_sectors, ret, nb_sectors, sector_num = 0;
d75cbb5e
PL
2832 int n;
2833
d32f7c10
MA
2834 target_sectors = bdrv_nb_sectors(bs);
2835 if (target_sectors < 0) {
2836 return target_sectors;
9ce10c0b 2837 }
9ce10c0b 2838
d75cbb5e 2839 for (;;) {
d32f7c10 2840 nb_sectors = target_sectors - sector_num;
d75cbb5e
PL
2841 if (nb_sectors <= 0) {
2842 return 0;
2843 }
2844 if (nb_sectors > INT_MAX) {
2845 nb_sectors = INT_MAX;
2846 }
2847 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
3d94ce60
PL
2848 if (ret < 0) {
2849 error_report("error getting block status at sector %" PRId64 ": %s",
2850 sector_num, strerror(-ret));
2851 return ret;
2852 }
d75cbb5e
PL
2853 if (ret & BDRV_BLOCK_ZERO) {
2854 sector_num += n;
2855 continue;
2856 }
2857 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2858 if (ret < 0) {
2859 error_report("error writing zeroes at sector %" PRId64 ": %s",
2860 sector_num, strerror(-ret));
2861 return ret;
2862 }
2863 sector_num += n;
2864 }
2865}
2866
a3ef6571 2867int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
83f64091 2868{
a3ef6571
KW
2869 QEMUIOVector qiov;
2870 struct iovec iov = {
2871 .iov_base = (void *)buf,
2872 .iov_len = bytes,
2873 };
9a8c4cce 2874 int ret;
83f64091 2875
a3ef6571
KW
2876 if (bytes < 0) {
2877 return -EINVAL;
83f64091
FB
2878 }
2879
a3ef6571
KW
2880 qemu_iovec_init_external(&qiov, &iov, 1);
2881 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2882 if (ret < 0) {
2883 return ret;
83f64091 2884 }
a3ef6571
KW
2885
2886 return bytes;
83f64091
FB
2887}
2888
8d3b1a2d 2889int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
83f64091 2890{
9a8c4cce 2891 int ret;
83f64091 2892
8407d5d7
KW
2893 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2894 if (ret < 0) {
2895 return ret;
83f64091
FB
2896 }
2897
8d3b1a2d
KW
2898 return qiov->size;
2899}
2900
2901int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
8407d5d7 2902 const void *buf, int bytes)
8d3b1a2d
KW
2903{
2904 QEMUIOVector qiov;
2905 struct iovec iov = {
2906 .iov_base = (void *) buf,
8407d5d7 2907 .iov_len = bytes,
8d3b1a2d
KW
2908 };
2909
8407d5d7
KW
2910 if (bytes < 0) {
2911 return -EINVAL;
2912 }
2913
8d3b1a2d
KW
2914 qemu_iovec_init_external(&qiov, &iov, 1);
2915 return bdrv_pwritev(bs, offset, &qiov);
83f64091 2916}
83f64091 2917
f08145fe
KW
2918/*
2919 * Writes to the file and ensures that no writes are reordered across this
2920 * request (acts as a barrier)
2921 *
2922 * Returns 0 on success, -errno in error cases.
2923 */
2924int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2925 const void *buf, int count)
2926{
2927 int ret;
2928
2929 ret = bdrv_pwrite(bs, offset, buf, count);
2930 if (ret < 0) {
2931 return ret;
2932 }
2933
f05fa4ad
PB
2934 /* No flush needed for cache modes that already do it */
2935 if (bs->enable_write_cache) {
f08145fe
KW
2936 bdrv_flush(bs);
2937 }
2938
2939 return 0;
2940}
2941
470c0504 2942static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
ab185921
SH
2943 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2944{
2945 /* Perform I/O through a temporary buffer so that users who scribble over
2946 * their read buffer while the operation is in progress do not end up
2947 * modifying the image file. This is critical for zero-copy guest I/O
2948 * where anything might happen inside guest memory.
2949 */
2950 void *bounce_buffer;
2951
79c053bd 2952 BlockDriver *drv = bs->drv;
ab185921
SH
2953 struct iovec iov;
2954 QEMUIOVector bounce_qiov;
2955 int64_t cluster_sector_num;
2956 int cluster_nb_sectors;
2957 size_t skip_bytes;
2958 int ret;
2959
2960 /* Cover entire cluster so no additional backing file I/O is required when
2961 * allocating cluster in the image file.
2962 */
343bded4
PB
2963 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
2964 &cluster_sector_num, &cluster_nb_sectors);
ab185921 2965
470c0504
SH
2966 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
2967 cluster_sector_num, cluster_nb_sectors);
ab185921
SH
2968
2969 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
2970 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
2971 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
2972
79c053bd
SH
2973 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
2974 &bounce_qiov);
ab185921
SH
2975 if (ret < 0) {
2976 goto err;
2977 }
2978
79c053bd
SH
2979 if (drv->bdrv_co_write_zeroes &&
2980 buffer_is_zero(bounce_buffer, iov.iov_len)) {
621f0589 2981 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
aa7bfbff 2982 cluster_nb_sectors, 0);
79c053bd 2983 } else {
f05fa4ad
PB
2984 /* This does not change the data on the disk, it is not necessary
2985 * to flush even in cache=writethrough mode.
2986 */
79c053bd 2987 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
ab185921 2988 &bounce_qiov);
79c053bd
SH
2989 }
2990
ab185921
SH
2991 if (ret < 0) {
2992 /* It might be okay to ignore write errors for guest requests. If this
2993 * is a deliberate copy-on-read then we don't want to ignore the error.
2994 * Simply report it in all cases.
2995 */
2996 goto err;
2997 }
2998
2999 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
03396148
MT
3000 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
3001 nb_sectors * BDRV_SECTOR_SIZE);
ab185921
SH
3002
3003err:
3004 qemu_vfree(bounce_buffer);
3005 return ret;
3006}
3007
c5fbe571 3008/*
d0c7f642
KW
3009 * Forwards an already correctly aligned request to the BlockDriver. This
3010 * handles copy on read and zeroing after EOF; any other features must be
3011 * implemented by the caller.
c5fbe571 3012 */
d0c7f642 3013static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
65afd211 3014 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
ec746e10 3015 int64_t align, QEMUIOVector *qiov, int flags)
da1fa91d
KW
3016{
3017 BlockDriver *drv = bs->drv;
dbffbdcf 3018 int ret;
da1fa91d 3019
d0c7f642
KW
3020 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3021 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
da1fa91d 3022
d0c7f642
KW
3023 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3024 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
8eb029c2 3025 assert(!qiov || bytes == qiov->size);
d0c7f642
KW
3026
3027 /* Handle Copy on Read and associated serialisation */
470c0504 3028 if (flags & BDRV_REQ_COPY_ON_READ) {
7327145f
KW
3029 /* If we touch the same cluster it counts as an overlap. This
3030 * guarantees that allocating writes will be serialized and not race
3031 * with each other for the same cluster. For example, in copy-on-read
3032 * it ensures that the CoR read and write operations are atomic and
3033 * guest writes cannot interleave between them. */
3034 mark_request_serialising(req, bdrv_get_cluster_size(bs));
470c0504
SH
3035 }
3036
2dbafdc0 3037 wait_serialising_requests(req);
f4658285 3038
470c0504 3039 if (flags & BDRV_REQ_COPY_ON_READ) {
ab185921
SH
3040 int pnum;
3041
bdad13b9 3042 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
ab185921
SH
3043 if (ret < 0) {
3044 goto out;
3045 }
3046
3047 if (!ret || pnum != nb_sectors) {
470c0504 3048 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
ab185921
SH
3049 goto out;
3050 }
3051 }
3052
d0c7f642 3053 /* Forward the request to the BlockDriver */
893a8f62
MK
3054 if (!(bs->zero_beyond_eof && bs->growable)) {
3055 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3056 } else {
3057 /* Read zeros after EOF of growable BDSes */
4049082c 3058 int64_t total_sectors, max_nb_sectors;
893a8f62 3059
4049082c
MA
3060 total_sectors = bdrv_nb_sectors(bs);
3061 if (total_sectors < 0) {
3062 ret = total_sectors;
893a8f62
MK
3063 goto out;
3064 }
3065
5f5bcd80
KW
3066 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
3067 align >> BDRV_SECTOR_BITS);
893a8f62 3068 if (max_nb_sectors > 0) {
33f461e0
KW
3069 QEMUIOVector local_qiov;
3070 size_t local_sectors;
3071
3072 max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS);
3073 local_sectors = MIN(max_nb_sectors, nb_sectors);
3074
3075 qemu_iovec_init(&local_qiov, qiov->niov);
3076 qemu_iovec_concat(&local_qiov, qiov, 0,
3077 local_sectors * BDRV_SECTOR_SIZE);
3078
3079 ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
3080 &local_qiov);
3081
3082 qemu_iovec_destroy(&local_qiov);
893a8f62
MK
3083 } else {
3084 ret = 0;
3085 }
3086
3087 /* Reading beyond end of file is supposed to produce zeroes */
3088 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3089 uint64_t offset = MAX(0, total_sectors - sector_num);
3090 uint64_t bytes = (sector_num + nb_sectors - offset) *
3091 BDRV_SECTOR_SIZE;
3092 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3093 }
3094 }
ab185921
SH
3095
3096out:
dbffbdcf 3097 return ret;
da1fa91d
KW
3098}
3099
d0c7f642
KW
3100/*
3101 * Handle a read request in coroutine context
3102 */
1b0288ae
KW
3103static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
3104 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
d0c7f642
KW
3105 BdrvRequestFlags flags)
3106{
3107 BlockDriver *drv = bs->drv;
65afd211
KW
3108 BdrvTrackedRequest req;
3109
1b0288ae
KW
3110 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3111 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3112 uint8_t *head_buf = NULL;
3113 uint8_t *tail_buf = NULL;
3114 QEMUIOVector local_qiov;
3115 bool use_local_qiov = false;
d0c7f642
KW
3116 int ret;
3117
3118 if (!drv) {
3119 return -ENOMEDIUM;
3120 }
1b0288ae 3121 if (bdrv_check_byte_request(bs, offset, bytes)) {
d0c7f642
KW
3122 return -EIO;
3123 }
3124
3125 if (bs->copy_on_read) {
3126 flags |= BDRV_REQ_COPY_ON_READ;
3127 }
3128
3129 /* throttling disk I/O */
3130 if (bs->io_limits_enabled) {
d5103588 3131 bdrv_io_limits_intercept(bs, bytes, false);
1b0288ae
KW
3132 }
3133
3134 /* Align read if necessary by padding qiov */
3135 if (offset & (align - 1)) {
3136 head_buf = qemu_blockalign(bs, align);
3137 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3138 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3139 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3140 use_local_qiov = true;
3141
3142 bytes += offset & (align - 1);
3143 offset = offset & ~(align - 1);
3144 }
3145
3146 if ((offset + bytes) & (align - 1)) {
3147 if (!use_local_qiov) {
3148 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3149 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3150 use_local_qiov = true;
3151 }
3152 tail_buf = qemu_blockalign(bs, align);
3153 qemu_iovec_add(&local_qiov, tail_buf,
3154 align - ((offset + bytes) & (align - 1)));
3155
3156 bytes = ROUND_UP(bytes, align);
3157 }
3158
65afd211 3159 tracked_request_begin(&req, bs, offset, bytes, false);
ec746e10 3160 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
1b0288ae
KW
3161 use_local_qiov ? &local_qiov : qiov,
3162 flags);
65afd211 3163 tracked_request_end(&req);
1b0288ae
KW
3164
3165 if (use_local_qiov) {
3166 qemu_iovec_destroy(&local_qiov);
3167 qemu_vfree(head_buf);
3168 qemu_vfree(tail_buf);
d0c7f642
KW
3169 }
3170
d0c7f642
KW
3171 return ret;
3172}
3173
1b0288ae
KW
3174static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3175 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3176 BdrvRequestFlags flags)
3177{
3178 if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
3179 return -EINVAL;
3180 }
3181
3182 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3183 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3184}
3185
c5fbe571 3186int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
3187 int nb_sectors, QEMUIOVector *qiov)
3188{
c5fbe571 3189 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 3190
470c0504
SH
3191 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3192}
3193
3194int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3195 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3196{
3197 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3198
3199 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3200 BDRV_REQ_COPY_ON_READ);
c5fbe571
SH
3201}
3202
c31cb707
PL
3203/* if no limit is specified in the BlockLimits use a default
3204 * of 32768 512-byte sectors (16 MiB) per request.
3205 */
3206#define MAX_WRITE_ZEROES_DEFAULT 32768
3207
f08f2dda 3208static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
aa7bfbff 3209 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
f08f2dda
SH
3210{
3211 BlockDriver *drv = bs->drv;
3212 QEMUIOVector qiov;
c31cb707
PL
3213 struct iovec iov = {0};
3214 int ret = 0;
f08f2dda 3215
c31cb707
PL
3216 int max_write_zeroes = bs->bl.max_write_zeroes ?
3217 bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
621f0589 3218
c31cb707
PL
3219 while (nb_sectors > 0 && !ret) {
3220 int num = nb_sectors;
3221
b8d71c09
PB
3222 /* Align request. Block drivers can expect the "bulk" of the request
3223 * to be aligned.
3224 */
3225 if (bs->bl.write_zeroes_alignment
3226 && num > bs->bl.write_zeroes_alignment) {
3227 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3228 /* Make a small request up to the first aligned sector. */
c31cb707 3229 num = bs->bl.write_zeroes_alignment;
b8d71c09
PB
3230 num -= sector_num % bs->bl.write_zeroes_alignment;
3231 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3232 /* Shorten the request to the last aligned sector. num cannot
3233 * underflow because num > bs->bl.write_zeroes_alignment.
3234 */
3235 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
c31cb707 3236 }
621f0589 3237 }
f08f2dda 3238
c31cb707
PL
3239 /* limit request size */
3240 if (num > max_write_zeroes) {
3241 num = max_write_zeroes;
3242 }
3243
3244 ret = -ENOTSUP;
3245 /* First try the efficient write zeroes operation */
3246 if (drv->bdrv_co_write_zeroes) {
3247 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3248 }
3249
3250 if (ret == -ENOTSUP) {
3251 /* Fall back to bounce buffer if write zeroes is unsupported */
3252 iov.iov_len = num * BDRV_SECTOR_SIZE;
3253 if (iov.iov_base == NULL) {
b8d71c09
PB
3254 iov.iov_base = qemu_blockalign(bs, num * BDRV_SECTOR_SIZE);
3255 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
c31cb707
PL
3256 }
3257 qemu_iovec_init_external(&qiov, &iov, 1);
f08f2dda 3258
c31cb707 3259 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
b8d71c09
PB
3260
3261 /* Keep bounce buffer around if it is big enough for all
3262 * all future requests.
3263 */
3264 if (num < max_write_zeroes) {
3265 qemu_vfree(iov.iov_base);
3266 iov.iov_base = NULL;
3267 }
c31cb707
PL
3268 }
3269
3270 sector_num += num;
3271 nb_sectors -= num;
3272 }
f08f2dda
SH
3273
3274 qemu_vfree(iov.iov_base);
3275 return ret;
3276}
3277
c5fbe571 3278/*
b404f720 3279 * Forwards an already correctly aligned write request to the BlockDriver.
c5fbe571 3280 */
b404f720 3281static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
65afd211
KW
3282 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3283 QEMUIOVector *qiov, int flags)
c5fbe571
SH
3284{
3285 BlockDriver *drv = bs->drv;
28de2dcd 3286 bool waited;
6b7cb247 3287 int ret;
da1fa91d 3288
b404f720
KW
3289 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3290 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
f4658285 3291
b404f720
KW
3292 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3293 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
8eb029c2 3294 assert(!qiov || bytes == qiov->size);
cc0681c4 3295
28de2dcd
KW
3296 waited = wait_serialising_requests(req);
3297 assert(!waited || !req->serialising);
af91f9a7
KW
3298 assert(req->overlap_offset <= offset);
3299 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
244eadef 3300
65afd211 3301 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
d616b224 3302
465bee1d
PL
3303 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3304 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3305 qemu_iovec_is_zero(qiov)) {
3306 flags |= BDRV_REQ_ZERO_WRITE;
3307 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3308 flags |= BDRV_REQ_MAY_UNMAP;
3309 }
3310 }
3311
d616b224
SH
3312 if (ret < 0) {
3313 /* Do nothing, write notifier decided to fail this request */
3314 } else if (flags & BDRV_REQ_ZERO_WRITE) {
9e1cb96d 3315 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
aa7bfbff 3316 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
f08f2dda 3317 } else {
9e1cb96d 3318 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
f08f2dda
SH
3319 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3320 }
9e1cb96d 3321 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
6b7cb247 3322
f05fa4ad
PB
3323 if (ret == 0 && !bs->enable_write_cache) {
3324 ret = bdrv_co_flush(bs);
3325 }
3326
e4654d2d 3327 bdrv_set_dirty(bs, sector_num, nb_sectors);
da1fa91d
KW
3328
3329 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
3330 bs->wr_highest_sector = sector_num + nb_sectors - 1;
3331 }
df2a6f29
PB
3332 if (bs->growable && ret >= 0) {
3333 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3334 }
da1fa91d 3335
6b7cb247 3336 return ret;
da1fa91d
KW
3337}
3338
b404f720
KW
3339/*
3340 * Handle a write request in coroutine context
3341 */
6601553e
KW
3342static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3343 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
b404f720
KW
3344 BdrvRequestFlags flags)
3345{
65afd211 3346 BdrvTrackedRequest req;
3b8242e0
KW
3347 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3348 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3349 uint8_t *head_buf = NULL;
3350 uint8_t *tail_buf = NULL;
3351 QEMUIOVector local_qiov;
3352 bool use_local_qiov = false;
b404f720
KW
3353 int ret;
3354
3355 if (!bs->drv) {
3356 return -ENOMEDIUM;
3357 }
3358 if (bs->read_only) {
3359 return -EACCES;
3360 }
6601553e 3361 if (bdrv_check_byte_request(bs, offset, bytes)) {
b404f720
KW
3362 return -EIO;
3363 }
3364
b404f720
KW
3365 /* throttling disk I/O */
3366 if (bs->io_limits_enabled) {
d5103588 3367 bdrv_io_limits_intercept(bs, bytes, true);
b404f720
KW
3368 }
3369
3b8242e0
KW
3370 /*
3371 * Align write if necessary by performing a read-modify-write cycle.
3372 * Pad qiov with the read parts and be sure to have a tracked request not
3373 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3374 */
65afd211 3375 tracked_request_begin(&req, bs, offset, bytes, true);
3b8242e0
KW
3376
3377 if (offset & (align - 1)) {
3378 QEMUIOVector head_qiov;
3379 struct iovec head_iov;
3380
3381 mark_request_serialising(&req, align);
3382 wait_serialising_requests(&req);
3383
3384 head_buf = qemu_blockalign(bs, align);
3385 head_iov = (struct iovec) {
3386 .iov_base = head_buf,
3387 .iov_len = align,
3388 };
3389 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3390
9e1cb96d 3391 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
3b8242e0
KW
3392 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3393 align, &head_qiov, 0);
3394 if (ret < 0) {
3395 goto fail;
3396 }
9e1cb96d 3397 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
3b8242e0
KW
3398
3399 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3400 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3401 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3402 use_local_qiov = true;
3403
3404 bytes += offset & (align - 1);
3405 offset = offset & ~(align - 1);
3406 }
3407
3408 if ((offset + bytes) & (align - 1)) {
3409 QEMUIOVector tail_qiov;
3410 struct iovec tail_iov;
3411 size_t tail_bytes;
28de2dcd 3412 bool waited;
3b8242e0
KW
3413
3414 mark_request_serialising(&req, align);
28de2dcd
KW
3415 waited = wait_serialising_requests(&req);
3416 assert(!waited || !use_local_qiov);
3b8242e0
KW
3417
3418 tail_buf = qemu_blockalign(bs, align);
3419 tail_iov = (struct iovec) {
3420 .iov_base = tail_buf,
3421 .iov_len = align,
3422 };
3423 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3424
9e1cb96d 3425 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
3b8242e0
KW
3426 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3427 align, &tail_qiov, 0);
3428 if (ret < 0) {
3429 goto fail;
3430 }
9e1cb96d 3431 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
3b8242e0
KW
3432
3433 if (!use_local_qiov) {
3434 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3435 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3436 use_local_qiov = true;
3437 }
3438
3439 tail_bytes = (offset + bytes) & (align - 1);
3440 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3441
3442 bytes = ROUND_UP(bytes, align);
3443 }
3444
3445 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3446 use_local_qiov ? &local_qiov : qiov,
3447 flags);
3448
3449fail:
65afd211 3450 tracked_request_end(&req);
b404f720 3451
3b8242e0
KW
3452 if (use_local_qiov) {
3453 qemu_iovec_destroy(&local_qiov);
3b8242e0 3454 }
99c4a85c
KW
3455 qemu_vfree(head_buf);
3456 qemu_vfree(tail_buf);
3b8242e0 3457
b404f720
KW
3458 return ret;
3459}
3460
6601553e
KW
3461static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3462 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3463 BdrvRequestFlags flags)
3464{
3465 if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
3466 return -EINVAL;
3467 }
3468
3469 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3470 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3471}
3472
c5fbe571
SH
3473int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3474 int nb_sectors, QEMUIOVector *qiov)
3475{
3476 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3477
f08f2dda
SH
3478 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3479}
3480
3481int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
aa7bfbff
PL
3482 int64_t sector_num, int nb_sectors,
3483 BdrvRequestFlags flags)
f08f2dda 3484{
94d6ff21 3485 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
f08f2dda 3486
d32f35cb
PL
3487 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3488 flags &= ~BDRV_REQ_MAY_UNMAP;
3489 }
3490
f08f2dda 3491 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
aa7bfbff 3492 BDRV_REQ_ZERO_WRITE | flags);
c5fbe571
SH
3493}
3494
83f64091
FB
3495/**
3496 * Truncate file to 'offset' bytes (needed only for file protocols)
3497 */
3498int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3499{
3500 BlockDriver *drv = bs->drv;
51762288 3501 int ret;
83f64091 3502 if (!drv)
19cb3738 3503 return -ENOMEDIUM;
83f64091
FB
3504 if (!drv->bdrv_truncate)
3505 return -ENOTSUP;
59f2689d
NS
3506 if (bs->read_only)
3507 return -EACCES;
9c75e168 3508
51762288
SH
3509 ret = drv->bdrv_truncate(bs, offset);
3510 if (ret == 0) {
3511 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 3512 bdrv_dev_resize_cb(bs);
51762288
SH
3513 }
3514 return ret;
83f64091
FB
3515}
3516
4a1d5e1f
FZ
3517/**
3518 * Length of a allocated file in bytes. Sparse files are counted by actual
3519 * allocated space. Return < 0 if error or unknown.
3520 */
3521int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3522{
3523 BlockDriver *drv = bs->drv;
3524 if (!drv) {
3525 return -ENOMEDIUM;
3526 }
3527 if (drv->bdrv_get_allocated_file_size) {
3528 return drv->bdrv_get_allocated_file_size(bs);
3529 }
3530 if (bs->file) {
3531 return bdrv_get_allocated_file_size(bs->file);
3532 }
3533 return -ENOTSUP;
3534}
3535
83f64091 3536/**
65a9bb25 3537 * Return number of sectors on success, -errno on error.
83f64091 3538 */
65a9bb25 3539int64_t bdrv_nb_sectors(BlockDriverState *bs)
83f64091
FB
3540{
3541 BlockDriver *drv = bs->drv;
65a9bb25 3542
83f64091 3543 if (!drv)
19cb3738 3544 return -ENOMEDIUM;
51762288 3545
b94a2610
KW
3546 if (drv->has_variable_length) {
3547 int ret = refresh_total_sectors(bs, bs->total_sectors);
3548 if (ret < 0) {
3549 return ret;
46a4e4e6 3550 }
83f64091 3551 }
65a9bb25
MA
3552 return bs->total_sectors;
3553}
3554
3555/**
3556 * Return length in bytes on success, -errno on error.
3557 * The length is always a multiple of BDRV_SECTOR_SIZE.
3558 */
3559int64_t bdrv_getlength(BlockDriverState *bs)
3560{
3561 int64_t ret = bdrv_nb_sectors(bs);
3562
3563 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
fc01f7e7
FB
3564}
3565
19cb3738 3566/* return 0 as number of sectors if no device present or error */
96b8f136 3567void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 3568{
65a9bb25
MA
3569 int64_t nb_sectors = bdrv_nb_sectors(bs);
3570
3571 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
fc01f7e7 3572}
cf98951b 3573
ff06f5f3
PB
3574void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3575 BlockdevOnError on_write_error)
abd7f68d
MA
3576{
3577 bs->on_read_error = on_read_error;
3578 bs->on_write_error = on_write_error;
3579}
3580
1ceee0d5 3581BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
abd7f68d
MA
3582{
3583 return is_read ? bs->on_read_error : bs->on_write_error;
3584}
3585
3e1caa5f
PB
3586BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3587{
3588 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3589
3590 switch (on_err) {
3591 case BLOCKDEV_ON_ERROR_ENOSPC:
a589569f
WX
3592 return (error == ENOSPC) ?
3593 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
3e1caa5f 3594 case BLOCKDEV_ON_ERROR_STOP:
a589569f 3595 return BLOCK_ERROR_ACTION_STOP;
3e1caa5f 3596 case BLOCKDEV_ON_ERROR_REPORT:
a589569f 3597 return BLOCK_ERROR_ACTION_REPORT;
3e1caa5f 3598 case BLOCKDEV_ON_ERROR_IGNORE:
a589569f 3599 return BLOCK_ERROR_ACTION_IGNORE;
3e1caa5f
PB
3600 default:
3601 abort();
3602 }
3603}
3604
3605/* This is done by device models because, while the block layer knows
3606 * about the error, it does not know whether an operation comes from
3607 * the device or the block layer (from a job, for example).
3608 */
3609void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3610 bool is_read, int error)
3611{
3612 assert(error >= 0);
2bd3bce8 3613
a589569f 3614 if (action == BLOCK_ERROR_ACTION_STOP) {
2bd3bce8
PB
3615 /* First set the iostatus, so that "info block" returns an iostatus
3616 * that matches the events raised so far (an additional error iostatus
3617 * is fine, but not a lost one).
3618 */
3e1caa5f 3619 bdrv_iostatus_set_err(bs, error);
2bd3bce8
PB
3620
3621 /* Then raise the request to stop the VM and the event.
3622 * qemu_system_vmstop_request_prepare has two effects. First,
3623 * it ensures that the STOP event always comes after the
3624 * BLOCK_IO_ERROR event. Second, it ensures that even if management
3625 * can observe the STOP event and do a "cont" before the STOP
3626 * event is issued, the VM will not stop. In this case, vm_start()
3627 * also ensures that the STOP/RESUME pair of events is emitted.
3628 */
3629 qemu_system_vmstop_request_prepare();
5a2d2cbd
WX
3630 qapi_event_send_block_io_error(bdrv_get_device_name(bs),
3631 is_read ? IO_OPERATION_TYPE_READ :
3632 IO_OPERATION_TYPE_WRITE,
3633 action, &error_abort);
2bd3bce8
PB
3634 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
3635 } else {
5a2d2cbd
WX
3636 qapi_event_send_block_io_error(bdrv_get_device_name(bs),
3637 is_read ? IO_OPERATION_TYPE_READ :
3638 IO_OPERATION_TYPE_WRITE,
3639 action, &error_abort);
3e1caa5f
PB
3640 }
3641}
3642
b338082b
FB
3643int bdrv_is_read_only(BlockDriverState *bs)
3644{
3645 return bs->read_only;
3646}
3647
985a03b0
TS
3648int bdrv_is_sg(BlockDriverState *bs)
3649{
3650 return bs->sg;
3651}
3652
e900a7b7
CH
3653int bdrv_enable_write_cache(BlockDriverState *bs)
3654{
3655 return bs->enable_write_cache;
3656}
3657
425b0148
PB
3658void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3659{
3660 bs->enable_write_cache = wce;
55b110f2
JC
3661
3662 /* so a reopen() will preserve wce */
3663 if (wce) {
3664 bs->open_flags |= BDRV_O_CACHE_WB;
3665 } else {
3666 bs->open_flags &= ~BDRV_O_CACHE_WB;
3667 }
425b0148
PB
3668}
3669
ea2384d3
FB
3670int bdrv_is_encrypted(BlockDriverState *bs)
3671{
3672 if (bs->backing_hd && bs->backing_hd->encrypted)
3673 return 1;
3674 return bs->encrypted;
3675}
3676
c0f4ce77
AL
3677int bdrv_key_required(BlockDriverState *bs)
3678{
3679 BlockDriverState *backing_hd = bs->backing_hd;
3680
3681 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3682 return 1;
3683 return (bs->encrypted && !bs->valid_key);
3684}
3685
ea2384d3
FB
3686int bdrv_set_key(BlockDriverState *bs, const char *key)
3687{
3688 int ret;
3689 if (bs->backing_hd && bs->backing_hd->encrypted) {
3690 ret = bdrv_set_key(bs->backing_hd, key);
3691 if (ret < 0)
3692 return ret;
3693 if (!bs->encrypted)
3694 return 0;
3695 }
fd04a2ae
SH
3696 if (!bs->encrypted) {
3697 return -EINVAL;
3698 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3699 return -ENOMEDIUM;
3700 }
c0f4ce77 3701 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
3702 if (ret < 0) {
3703 bs->valid_key = 0;
3704 } else if (!bs->valid_key) {
3705 bs->valid_key = 1;
3706 /* call the change callback now, we skipped it on open */
7d4b4ba5 3707 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 3708 }
c0f4ce77 3709 return ret;
ea2384d3
FB
3710}
3711
f8d6bba1 3712const char *bdrv_get_format_name(BlockDriverState *bs)
ea2384d3 3713{
f8d6bba1 3714 return bs->drv ? bs->drv->format_name : NULL;
ea2384d3
FB
3715}
3716
5fafdf24 3717void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
3718 void *opaque)
3719{
3720 BlockDriver *drv;
e855e4fb
JC
3721 int count = 0;
3722 const char **formats = NULL;
ea2384d3 3723
8a22f02a 3724 QLIST_FOREACH(drv, &bdrv_drivers, list) {
e855e4fb
JC
3725 if (drv->format_name) {
3726 bool found = false;
3727 int i = count;
3728 while (formats && i && !found) {
3729 found = !strcmp(formats[--i], drv->format_name);
3730 }
3731
3732 if (!found) {
3733 formats = g_realloc(formats, (count + 1) * sizeof(char *));
3734 formats[count++] = drv->format_name;
3735 it(opaque, drv->format_name);
3736 }
3737 }
ea2384d3 3738 }
e855e4fb 3739 g_free(formats);
ea2384d3
FB
3740}
3741
dc364f4c 3742/* This function is to find block backend bs */
b338082b
FB
3743BlockDriverState *bdrv_find(const char *name)
3744{
3745 BlockDriverState *bs;
3746
dc364f4c 3747 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1b7bdbc1 3748 if (!strcmp(name, bs->device_name)) {
b338082b 3749 return bs;
1b7bdbc1 3750 }
b338082b
FB
3751 }
3752 return NULL;
3753}
3754
dc364f4c
BC
3755/* This function is to find a node in the bs graph */
3756BlockDriverState *bdrv_find_node(const char *node_name)
3757{
3758 BlockDriverState *bs;
3759
3760 assert(node_name);
3761
3762 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3763 if (!strcmp(node_name, bs->node_name)) {
3764 return bs;
3765 }
3766 }
3767 return NULL;
3768}
3769
c13163fb
BC
3770/* Put this QMP function here so it can access the static graph_bdrv_states. */
3771BlockDeviceInfoList *bdrv_named_nodes_list(void)
3772{
3773 BlockDeviceInfoList *list, *entry;
3774 BlockDriverState *bs;
3775
3776 list = NULL;
3777 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3778 entry = g_malloc0(sizeof(*entry));
3779 entry->value = bdrv_block_device_info(bs);
3780 entry->next = list;
3781 list = entry;
3782 }
3783
3784 return list;
3785}
3786
12d3ba82
BC
3787BlockDriverState *bdrv_lookup_bs(const char *device,
3788 const char *node_name,
3789 Error **errp)
3790{
3791 BlockDriverState *bs = NULL;
3792
12d3ba82
BC
3793 if (device) {
3794 bs = bdrv_find(device);
3795
dd67fa50
BC
3796 if (bs) {
3797 return bs;
12d3ba82 3798 }
12d3ba82
BC
3799 }
3800
dd67fa50
BC
3801 if (node_name) {
3802 bs = bdrv_find_node(node_name);
12d3ba82 3803
dd67fa50
BC
3804 if (bs) {
3805 return bs;
3806 }
12d3ba82
BC
3807 }
3808
dd67fa50
BC
3809 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3810 device ? device : "",
3811 node_name ? node_name : "");
3812 return NULL;
12d3ba82
BC
3813}
3814
5a6684d2
JC
3815/* If 'base' is in the same chain as 'top', return true. Otherwise,
3816 * return false. If either argument is NULL, return false. */
3817bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
3818{
3819 while (top && top != base) {
3820 top = top->backing_hd;
3821 }
3822
3823 return top != NULL;
3824}
3825
2f399b0a
MA
3826BlockDriverState *bdrv_next(BlockDriverState *bs)
3827{
3828 if (!bs) {
3829 return QTAILQ_FIRST(&bdrv_states);
3830 }
dc364f4c 3831 return QTAILQ_NEXT(bs, device_list);
2f399b0a
MA
3832}
3833
51de9760 3834void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
3835{
3836 BlockDriverState *bs;
3837
dc364f4c 3838 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
51de9760 3839 it(opaque, bs);
81d0912d
FB
3840 }
3841}
3842
ea2384d3
FB
3843const char *bdrv_get_device_name(BlockDriverState *bs)
3844{
3845 return bs->device_name;
3846}
3847
c8433287
MA
3848int bdrv_get_flags(BlockDriverState *bs)
3849{
3850 return bs->open_flags;
3851}
3852
f0f0fdfe 3853int bdrv_flush_all(void)
c6ca28d6
AL
3854{
3855 BlockDriverState *bs;
f0f0fdfe 3856 int result = 0;
c6ca28d6 3857
dc364f4c 3858 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
ed78cda3
SH
3859 AioContext *aio_context = bdrv_get_aio_context(bs);
3860 int ret;
3861
3862 aio_context_acquire(aio_context);
3863 ret = bdrv_flush(bs);
f0f0fdfe
KW
3864 if (ret < 0 && !result) {
3865 result = ret;
3866 }
ed78cda3 3867 aio_context_release(aio_context);
1b7bdbc1 3868 }
f0f0fdfe
KW
3869
3870 return result;
c6ca28d6
AL
3871}
3872
3ac21627
PL
3873int bdrv_has_zero_init_1(BlockDriverState *bs)
3874{
3875 return 1;
3876}
3877
f2feebbd
KW
3878int bdrv_has_zero_init(BlockDriverState *bs)
3879{
3880 assert(bs->drv);
3881
11212d8f
PB
3882 /* If BS is a copy on write image, it is initialized to
3883 the contents of the base image, which may not be zeroes. */
3884 if (bs->backing_hd) {
3885 return 0;
3886 }
336c1c12
KW
3887 if (bs->drv->bdrv_has_zero_init) {
3888 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
3889 }
3890
3ac21627
PL
3891 /* safe default */
3892 return 0;
f2feebbd
KW
3893}
3894
4ce78691
PL
3895bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3896{
3897 BlockDriverInfo bdi;
3898
3899 if (bs->backing_hd) {
3900 return false;
3901 }
3902
3903 if (bdrv_get_info(bs, &bdi) == 0) {
3904 return bdi.unallocated_blocks_are_zero;
3905 }
3906
3907 return false;
3908}
3909
3910bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3911{
3912 BlockDriverInfo bdi;
3913
3914 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
3915 return false;
3916 }
3917
3918 if (bdrv_get_info(bs, &bdi) == 0) {
3919 return bdi.can_write_zeroes_with_unmap;
3920 }
3921
3922 return false;
3923}
3924
b6b8a333 3925typedef struct BdrvCoGetBlockStatusData {
376ae3f1 3926 BlockDriverState *bs;
b35b2bba 3927 BlockDriverState *base;
376ae3f1
SH
3928 int64_t sector_num;
3929 int nb_sectors;
3930 int *pnum;
b6b8a333 3931 int64_t ret;
376ae3f1 3932 bool done;
b6b8a333 3933} BdrvCoGetBlockStatusData;
376ae3f1 3934
f58c7b35
TS
3935/*
3936 * Returns true iff the specified sector is present in the disk image. Drivers
3937 * not implementing the functionality are assumed to not support backing files,
3938 * hence all their sectors are reported as allocated.
3939 *
bd9533e3
SH
3940 * If 'sector_num' is beyond the end of the disk image the return value is 0
3941 * and 'pnum' is set to 0.
3942 *
f58c7b35
TS
3943 * 'pnum' is set to the number of sectors (including and immediately following
3944 * the specified sector) that are known to be in the same
3945 * allocated/unallocated state.
3946 *
bd9533e3
SH
3947 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
3948 * beyond the end of the disk image it will be clamped.
f58c7b35 3949 */
b6b8a333
PB
3950static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
3951 int64_t sector_num,
3952 int nb_sectors, int *pnum)
f58c7b35 3953{
30a7f2fc 3954 int64_t total_sectors;
bd9533e3 3955 int64_t n;
5daa74a6 3956 int64_t ret, ret2;
bd9533e3 3957
30a7f2fc
MA
3958 total_sectors = bdrv_nb_sectors(bs);
3959 if (total_sectors < 0) {
3960 return total_sectors;
617ccb46
PB
3961 }
3962
30a7f2fc 3963 if (sector_num >= total_sectors) {
bd9533e3
SH
3964 *pnum = 0;
3965 return 0;
3966 }
3967
30a7f2fc 3968 n = total_sectors - sector_num;
bd9533e3
SH
3969 if (n < nb_sectors) {
3970 nb_sectors = n;
3971 }
3972
b6b8a333 3973 if (!bs->drv->bdrv_co_get_block_status) {
bd9533e3 3974 *pnum = nb_sectors;
e88ae226 3975 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
918e92d7
PB
3976 if (bs->drv->protocol_name) {
3977 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
3978 }
3979 return ret;
f58c7b35 3980 }
6aebab14 3981
415b5b01
PB
3982 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
3983 if (ret < 0) {
3e0a233d 3984 *pnum = 0;
415b5b01
PB
3985 return ret;
3986 }
3987
92bc50a5
PL
3988 if (ret & BDRV_BLOCK_RAW) {
3989 assert(ret & BDRV_BLOCK_OFFSET_VALID);
3990 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
3991 *pnum, pnum);
3992 }
3993
e88ae226
KW
3994 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
3995 ret |= BDRV_BLOCK_ALLOCATED;
3996 }
3997
c3d86884
PL
3998 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
3999 if (bdrv_unallocated_blocks_are_zero(bs)) {
f0ad5712 4000 ret |= BDRV_BLOCK_ZERO;
1f9db224 4001 } else if (bs->backing_hd) {
f0ad5712 4002 BlockDriverState *bs2 = bs->backing_hd;
30a7f2fc
MA
4003 int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
4004 if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
f0ad5712
PB
4005 ret |= BDRV_BLOCK_ZERO;
4006 }
4007 }
415b5b01 4008 }
5daa74a6
PB
4009
4010 if (bs->file &&
4011 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
4012 (ret & BDRV_BLOCK_OFFSET_VALID)) {
4013 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4014 *pnum, pnum);
4015 if (ret2 >= 0) {
4016 /* Ignore errors. This is just providing extra information, it
4017 * is useful but not necessary.
4018 */
4019 ret |= (ret2 & BDRV_BLOCK_ZERO);
4020 }
4021 }
4022
415b5b01 4023 return ret;
060f51c9
SH
4024}
4025
b6b8a333
PB
4026/* Coroutine wrapper for bdrv_get_block_status() */
4027static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
060f51c9 4028{
b6b8a333 4029 BdrvCoGetBlockStatusData *data = opaque;
060f51c9
SH
4030 BlockDriverState *bs = data->bs;
4031
b6b8a333
PB
4032 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
4033 data->pnum);
060f51c9
SH
4034 data->done = true;
4035}
4036
4037/*
b6b8a333 4038 * Synchronous wrapper around bdrv_co_get_block_status().
060f51c9 4039 *
b6b8a333 4040 * See bdrv_co_get_block_status() for details.
060f51c9 4041 */
b6b8a333
PB
4042int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4043 int nb_sectors, int *pnum)
060f51c9 4044{
6aebab14 4045 Coroutine *co;
b6b8a333 4046 BdrvCoGetBlockStatusData data = {
6aebab14
SH
4047 .bs = bs,
4048 .sector_num = sector_num,
4049 .nb_sectors = nb_sectors,
4050 .pnum = pnum,
4051 .done = false,
4052 };
4053
bdad13b9
PB
4054 if (qemu_in_coroutine()) {
4055 /* Fast-path if already in coroutine context */
b6b8a333 4056 bdrv_get_block_status_co_entry(&data);
bdad13b9 4057 } else {
2572b37a
SH
4058 AioContext *aio_context = bdrv_get_aio_context(bs);
4059
b6b8a333 4060 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
bdad13b9
PB
4061 qemu_coroutine_enter(co, &data);
4062 while (!data.done) {
2572b37a 4063 aio_poll(aio_context, true);
bdad13b9 4064 }
6aebab14
SH
4065 }
4066 return data.ret;
f58c7b35
TS
4067}
4068
b6b8a333
PB
4069int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4070 int nb_sectors, int *pnum)
4071{
4333bb71
PB
4072 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
4073 if (ret < 0) {
4074 return ret;
4075 }
01fb2705 4076 return !!(ret & BDRV_BLOCK_ALLOCATED);
b6b8a333
PB
4077}
4078
188a7bbf
PB
4079/*
4080 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4081 *
4082 * Return true if the given sector is allocated in any image between
4083 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4084 * sector is allocated in any image of the chain. Return false otherwise.
4085 *
4086 * 'pnum' is set to the number of sectors (including and immediately following
4087 * the specified sector) that are known to be in the same
4088 * allocated/unallocated state.
4089 *
4090 */
4f578637
PB
4091int bdrv_is_allocated_above(BlockDriverState *top,
4092 BlockDriverState *base,
4093 int64_t sector_num,
4094 int nb_sectors, int *pnum)
188a7bbf
PB
4095{
4096 BlockDriverState *intermediate;
4097 int ret, n = nb_sectors;
4098
4099 intermediate = top;
4100 while (intermediate && intermediate != base) {
4101 int pnum_inter;
bdad13b9
PB
4102 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4103 &pnum_inter);
188a7bbf
PB
4104 if (ret < 0) {
4105 return ret;
4106 } else if (ret) {
4107 *pnum = pnum_inter;
4108 return 1;
4109 }
4110
4111 /*
4112 * [sector_num, nb_sectors] is unallocated on top but intermediate
4113 * might have
4114 *
4115 * [sector_num+x, nr_sectors] allocated.
4116 */
63ba17d3
VI
4117 if (n > pnum_inter &&
4118 (intermediate == top ||
4119 sector_num + pnum_inter < intermediate->total_sectors)) {
188a7bbf
PB
4120 n = pnum_inter;
4121 }
4122
4123 intermediate = intermediate->backing_hd;
4124 }
4125
4126 *pnum = n;
4127 return 0;
4128}
4129
045df330
AL
4130const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4131{
4132 if (bs->backing_hd && bs->backing_hd->encrypted)
4133 return bs->backing_file;
4134 else if (bs->encrypted)
4135 return bs->filename;
4136 else
4137 return NULL;
4138}
4139
5fafdf24 4140void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
4141 char *filename, int filename_size)
4142{
3574c608 4143 pstrcpy(filename, filename_size, bs->backing_file);
83f64091
FB
4144}
4145
5fafdf24 4146int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
4147 const uint8_t *buf, int nb_sectors)
4148{
4149 BlockDriver *drv = bs->drv;
4150 if (!drv)
19cb3738 4151 return -ENOMEDIUM;
faea38e7
FB
4152 if (!drv->bdrv_write_compressed)
4153 return -ENOTSUP;
fbb7b4e0
KW
4154 if (bdrv_check_request(bs, sector_num, nb_sectors))
4155 return -EIO;
a55eb92c 4156
e4654d2d 4157 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
a55eb92c 4158
faea38e7
FB
4159 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4160}
3b46e624 4161
faea38e7
FB
4162int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4163{
4164 BlockDriver *drv = bs->drv;
4165 if (!drv)
19cb3738 4166 return -ENOMEDIUM;
faea38e7
FB
4167 if (!drv->bdrv_get_info)
4168 return -ENOTSUP;
4169 memset(bdi, 0, sizeof(*bdi));
4170 return drv->bdrv_get_info(bs, bdi);
4171}
4172
eae041fe
HR
4173ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4174{
4175 BlockDriver *drv = bs->drv;
4176 if (drv && drv->bdrv_get_specific_info) {
4177 return drv->bdrv_get_specific_info(bs);
4178 }
4179 return NULL;
4180}
4181
45566e9c
CH
4182int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
4183 int64_t pos, int size)
cf8074b3
KW
4184{
4185 QEMUIOVector qiov;
4186 struct iovec iov = {
4187 .iov_base = (void *) buf,
4188 .iov_len = size,
4189 };
4190
4191 qemu_iovec_init_external(&qiov, &iov, 1);
4192 return bdrv_writev_vmstate(bs, &qiov, pos);
4193}
4194
4195int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
178e08a5
AL
4196{
4197 BlockDriver *drv = bs->drv;
cf8074b3
KW
4198
4199 if (!drv) {
178e08a5 4200 return -ENOMEDIUM;
cf8074b3
KW
4201 } else if (drv->bdrv_save_vmstate) {
4202 return drv->bdrv_save_vmstate(bs, qiov, pos);
4203 } else if (bs->file) {
4204 return bdrv_writev_vmstate(bs->file, qiov, pos);
4205 }
4206
7cdb1f6d 4207 return -ENOTSUP;
178e08a5
AL
4208}
4209
45566e9c
CH
4210int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4211 int64_t pos, int size)
178e08a5
AL
4212{
4213 BlockDriver *drv = bs->drv;
4214 if (!drv)
4215 return -ENOMEDIUM;
7cdb1f6d
MK
4216 if (drv->bdrv_load_vmstate)
4217 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4218 if (bs->file)
4219 return bdrv_load_vmstate(bs->file, buf, pos, size);
4220 return -ENOTSUP;
178e08a5
AL
4221}
4222
8b9b0cc2
KW
4223void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4224{
bf736fe3 4225 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
8b9b0cc2
KW
4226 return;
4227 }
4228
bf736fe3 4229 bs->drv->bdrv_debug_event(bs, event);
41c695c7
KW
4230}
4231
4232int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4233 const char *tag)
4234{
4235 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4236 bs = bs->file;
4237 }
4238
4239 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4240 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4241 }
4242
4243 return -ENOTSUP;
4244}
4245
4cc70e93
FZ
4246int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4247{
4248 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4249 bs = bs->file;
4250 }
4251
4252 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4253 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4254 }
4255
4256 return -ENOTSUP;
4257}
4258
41c695c7
KW
4259int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4260{
938789ea 4261 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
41c695c7
KW
4262 bs = bs->file;
4263 }
8b9b0cc2 4264
41c695c7
KW
4265 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4266 return bs->drv->bdrv_debug_resume(bs, tag);
4267 }
4268
4269 return -ENOTSUP;
4270}
4271
4272bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4273{
4274 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4275 bs = bs->file;
4276 }
4277
4278 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4279 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4280 }
4281
4282 return false;
8b9b0cc2
KW
4283}
4284
199630b6
BS
4285int bdrv_is_snapshot(BlockDriverState *bs)
4286{
4287 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4288}
4289
b1b1d783
JC
4290/* backing_file can either be relative, or absolute, or a protocol. If it is
4291 * relative, it must be relative to the chain. So, passing in bs->filename
4292 * from a BDS as backing_file should not be done, as that may be relative to
4293 * the CWD rather than the chain. */
e8a6bb9c
MT
4294BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4295 const char *backing_file)
4296{
b1b1d783
JC
4297 char *filename_full = NULL;
4298 char *backing_file_full = NULL;
4299 char *filename_tmp = NULL;
4300 int is_protocol = 0;
4301 BlockDriverState *curr_bs = NULL;
4302 BlockDriverState *retval = NULL;
4303
4304 if (!bs || !bs->drv || !backing_file) {
e8a6bb9c
MT
4305 return NULL;
4306 }
4307
b1b1d783
JC
4308 filename_full = g_malloc(PATH_MAX);
4309 backing_file_full = g_malloc(PATH_MAX);
4310 filename_tmp = g_malloc(PATH_MAX);
4311
4312 is_protocol = path_has_protocol(backing_file);
4313
4314 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4315
4316 /* If either of the filename paths is actually a protocol, then
4317 * compare unmodified paths; otherwise make paths relative */
4318 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4319 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4320 retval = curr_bs->backing_hd;
4321 break;
4322 }
e8a6bb9c 4323 } else {
b1b1d783
JC
4324 /* If not an absolute filename path, make it relative to the current
4325 * image's filename path */
4326 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4327 backing_file);
4328
4329 /* We are going to compare absolute pathnames */
4330 if (!realpath(filename_tmp, filename_full)) {
4331 continue;
4332 }
4333
4334 /* We need to make sure the backing filename we are comparing against
4335 * is relative to the current image filename (or absolute) */
4336 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4337 curr_bs->backing_file);
4338
4339 if (!realpath(filename_tmp, backing_file_full)) {
4340 continue;
4341 }
4342
4343 if (strcmp(backing_file_full, filename_full) == 0) {
4344 retval = curr_bs->backing_hd;
4345 break;
4346 }
e8a6bb9c
MT
4347 }
4348 }
4349
b1b1d783
JC
4350 g_free(filename_full);
4351 g_free(backing_file_full);
4352 g_free(filename_tmp);
4353 return retval;
e8a6bb9c
MT
4354}
4355
f198fd1c
BC
4356int bdrv_get_backing_file_depth(BlockDriverState *bs)
4357{
4358 if (!bs->drv) {
4359 return 0;
4360 }
4361
4362 if (!bs->backing_hd) {
4363 return 0;
4364 }
4365
4366 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4367}
4368
ea2384d3 4369/**************************************************************/
83f64091 4370/* async I/Os */
ea2384d3 4371
3b69e4b9 4372BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 4373 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 4374 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 4375{
bbf0a440
SH
4376 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4377
d20d9b7c 4378 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
8c5873d6 4379 cb, opaque, false);
ea2384d3
FB
4380}
4381
f141eafe
AL
4382BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4383 QEMUIOVector *qiov, int nb_sectors,
4384 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 4385{
bbf0a440
SH
4386 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4387
d20d9b7c 4388 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
8c5873d6 4389 cb, opaque, true);
83f64091
FB
4390}
4391
d5ef94d4
PB
4392BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
4393 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
4394 BlockDriverCompletionFunc *cb, void *opaque)
4395{
4396 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4397
4398 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4399 BDRV_REQ_ZERO_WRITE | flags,
4400 cb, opaque, true);
4401}
4402
40b4f539
KW
4403
4404typedef struct MultiwriteCB {
4405 int error;
4406 int num_requests;
4407 int num_callbacks;
4408 struct {
4409 BlockDriverCompletionFunc *cb;
4410 void *opaque;
4411 QEMUIOVector *free_qiov;
40b4f539
KW
4412 } callbacks[];
4413} MultiwriteCB;
4414
4415static void multiwrite_user_cb(MultiwriteCB *mcb)
4416{
4417 int i;
4418
4419 for (i = 0; i < mcb->num_callbacks; i++) {
4420 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
4421 if (mcb->callbacks[i].free_qiov) {
4422 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4423 }
7267c094 4424 g_free(mcb->callbacks[i].free_qiov);
40b4f539
KW
4425 }
4426}
4427
4428static void multiwrite_cb(void *opaque, int ret)
4429{
4430 MultiwriteCB *mcb = opaque;
4431
6d519a5f
SH
4432 trace_multiwrite_cb(mcb, ret);
4433
cb6d3ca0 4434 if (ret < 0 && !mcb->error) {
40b4f539 4435 mcb->error = ret;
40b4f539
KW
4436 }
4437
4438 mcb->num_requests--;
4439 if (mcb->num_requests == 0) {
de189a1b 4440 multiwrite_user_cb(mcb);
7267c094 4441 g_free(mcb);
40b4f539
KW
4442 }
4443}
4444
4445static int multiwrite_req_compare(const void *a, const void *b)
4446{
77be4366
CH
4447 const BlockRequest *req1 = a, *req2 = b;
4448
4449 /*
4450 * Note that we can't simply subtract req2->sector from req1->sector
4451 * here as that could overflow the return value.
4452 */
4453 if (req1->sector > req2->sector) {
4454 return 1;
4455 } else if (req1->sector < req2->sector) {
4456 return -1;
4457 } else {
4458 return 0;
4459 }
40b4f539
KW
4460}
4461
4462/*
4463 * Takes a bunch of requests and tries to merge them. Returns the number of
4464 * requests that remain after merging.
4465 */
4466static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4467 int num_reqs, MultiwriteCB *mcb)
4468{
4469 int i, outidx;
4470
4471 // Sort requests by start sector
4472 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4473
4474 // Check if adjacent requests touch the same clusters. If so, combine them,
4475 // filling up gaps with zero sectors.
4476 outidx = 0;
4477 for (i = 1; i < num_reqs; i++) {
4478 int merge = 0;
4479 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4480
b6a127a1 4481 // Handle exactly sequential writes and overlapping writes.
40b4f539
KW
4482 if (reqs[i].sector <= oldreq_last) {
4483 merge = 1;
4484 }
4485
e2a305fb
CH
4486 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4487 merge = 0;
4488 }
4489
40b4f539
KW
4490 if (merge) {
4491 size_t size;
7267c094 4492 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
4493 qemu_iovec_init(qiov,
4494 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4495
4496 // Add the first request to the merged one. If the requests are
4497 // overlapping, drop the last sectors of the first request.
4498 size = (reqs[i].sector - reqs[outidx].sector) << 9;
1b093c48 4499 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
40b4f539 4500
b6a127a1
PB
4501 // We should need to add any zeros between the two requests
4502 assert (reqs[i].sector <= oldreq_last);
40b4f539
KW
4503
4504 // Add the second request
1b093c48 4505 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
40b4f539 4506
cbf1dff2 4507 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
4508 reqs[outidx].qiov = qiov;
4509
4510 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4511 } else {
4512 outidx++;
4513 reqs[outidx].sector = reqs[i].sector;
4514 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4515 reqs[outidx].qiov = reqs[i].qiov;
4516 }
4517 }
4518
4519 return outidx + 1;
4520}
4521
4522/*
4523 * Submit multiple AIO write requests at once.
4524 *
4525 * On success, the function returns 0 and all requests in the reqs array have
4526 * been submitted. In error case this function returns -1, and any of the
4527 * requests may or may not be submitted yet. In particular, this means that the
4528 * callback will be called for some of the requests, for others it won't. The
4529 * caller must check the error field of the BlockRequest to wait for the right
4530 * callbacks (if error != 0, no callback will be called).
4531 *
4532 * The implementation may modify the contents of the reqs array, e.g. to merge
4533 * requests. However, the fields opaque and error are left unmodified as they
4534 * are used to signal failure for a single request to the caller.
4535 */
4536int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4537{
40b4f539
KW
4538 MultiwriteCB *mcb;
4539 int i;
4540
301db7c2
RH
4541 /* don't submit writes if we don't have a medium */
4542 if (bs->drv == NULL) {
4543 for (i = 0; i < num_reqs; i++) {
4544 reqs[i].error = -ENOMEDIUM;
4545 }
4546 return -1;
4547 }
4548
40b4f539
KW
4549 if (num_reqs == 0) {
4550 return 0;
4551 }
4552
4553 // Create MultiwriteCB structure
7267c094 4554 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
4555 mcb->num_requests = 0;
4556 mcb->num_callbacks = num_reqs;
4557
4558 for (i = 0; i < num_reqs; i++) {
4559 mcb->callbacks[i].cb = reqs[i].cb;
4560 mcb->callbacks[i].opaque = reqs[i].opaque;
4561 }
4562
4563 // Check for mergable requests
4564 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4565
6d519a5f
SH
4566 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4567
df9309fb
PB
4568 /* Run the aio requests. */
4569 mcb->num_requests = num_reqs;
40b4f539 4570 for (i = 0; i < num_reqs; i++) {
d20d9b7c
PB
4571 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4572 reqs[i].nb_sectors, reqs[i].flags,
4573 multiwrite_cb, mcb,
4574 true);
40b4f539
KW
4575 }
4576
4577 return 0;
40b4f539
KW
4578}
4579
83f64091 4580void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 4581{
d7331bed 4582 acb->aiocb_info->cancel(acb);
83f64091
FB
4583}
4584
4585/**************************************************************/
4586/* async block device emulation */
4587
c16b5a2c
CH
4588typedef struct BlockDriverAIOCBSync {
4589 BlockDriverAIOCB common;
4590 QEMUBH *bh;
4591 int ret;
4592 /* vector translation state */
4593 QEMUIOVector *qiov;
4594 uint8_t *bounce;
4595 int is_write;
4596} BlockDriverAIOCBSync;
4597
4598static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
4599{
b666d239
KW
4600 BlockDriverAIOCBSync *acb =
4601 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 4602 qemu_bh_delete(acb->bh);
36afc451 4603 acb->bh = NULL;
c16b5a2c
CH
4604 qemu_aio_release(acb);
4605}
4606
d7331bed 4607static const AIOCBInfo bdrv_em_aiocb_info = {
c16b5a2c
CH
4608 .aiocb_size = sizeof(BlockDriverAIOCBSync),
4609 .cancel = bdrv_aio_cancel_em,
4610};
4611
ce1a14dc 4612static void bdrv_aio_bh_cb(void *opaque)
83f64091 4613{
ce1a14dc 4614 BlockDriverAIOCBSync *acb = opaque;
f141eafe 4615
f141eafe 4616 if (!acb->is_write)
03396148 4617 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
ceb42de8 4618 qemu_vfree(acb->bounce);
ce1a14dc 4619 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 4620 qemu_bh_delete(acb->bh);
36afc451 4621 acb->bh = NULL;
ce1a14dc 4622 qemu_aio_release(acb);
83f64091 4623}
beac80cd 4624
f141eafe
AL
4625static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4626 int64_t sector_num,
4627 QEMUIOVector *qiov,
4628 int nb_sectors,
4629 BlockDriverCompletionFunc *cb,
4630 void *opaque,
4631 int is_write)
4632
83f64091 4633{
ce1a14dc 4634 BlockDriverAIOCBSync *acb;
ce1a14dc 4635
d7331bed 4636 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
f141eafe
AL
4637 acb->is_write = is_write;
4638 acb->qiov = qiov;
e268ca52 4639 acb->bounce = qemu_blockalign(bs, qiov->size);
2572b37a 4640 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
f141eafe
AL
4641
4642 if (is_write) {
d5e6b161 4643 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
1ed20acf 4644 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 4645 } else {
1ed20acf 4646 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
4647 }
4648
ce1a14dc 4649 qemu_bh_schedule(acb->bh);
f141eafe 4650
ce1a14dc 4651 return &acb->common;
beac80cd
FB
4652}
4653
f141eafe
AL
4654static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
4655 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 4656 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 4657{
f141eafe
AL
4658 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
4659}
83f64091 4660
f141eafe
AL
4661static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
4662 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4663 BlockDriverCompletionFunc *cb, void *opaque)
4664{
4665 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 4666}
beac80cd 4667
68485420
KW
4668
4669typedef struct BlockDriverAIOCBCoroutine {
4670 BlockDriverAIOCB common;
4671 BlockRequest req;
4672 bool is_write;
d318aea9 4673 bool *done;
68485420
KW
4674 QEMUBH* bh;
4675} BlockDriverAIOCBCoroutine;
4676
4677static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
4678{
2572b37a 4679 AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
d318aea9
KW
4680 BlockDriverAIOCBCoroutine *acb =
4681 container_of(blockacb, BlockDriverAIOCBCoroutine, common);
4682 bool done = false;
4683
4684 acb->done = &done;
4685 while (!done) {
2572b37a 4686 aio_poll(aio_context, true);
d318aea9 4687 }
68485420
KW
4688}
4689
d7331bed 4690static const AIOCBInfo bdrv_em_co_aiocb_info = {
68485420
KW
4691 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
4692 .cancel = bdrv_aio_co_cancel_em,
4693};
4694
35246a68 4695static void bdrv_co_em_bh(void *opaque)
68485420
KW
4696{
4697 BlockDriverAIOCBCoroutine *acb = opaque;
4698
4699 acb->common.cb(acb->common.opaque, acb->req.error);
d318aea9
KW
4700
4701 if (acb->done) {
4702 *acb->done = true;
4703 }
4704
68485420
KW
4705 qemu_bh_delete(acb->bh);
4706 qemu_aio_release(acb);
4707}
4708
b2a61371
SH
4709/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4710static void coroutine_fn bdrv_co_do_rw(void *opaque)
4711{
4712 BlockDriverAIOCBCoroutine *acb = opaque;
4713 BlockDriverState *bs = acb->common.bs;
4714
4715 if (!acb->is_write) {
4716 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
d20d9b7c 4717 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
b2a61371
SH
4718 } else {
4719 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
d20d9b7c 4720 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
b2a61371
SH
4721 }
4722
2572b37a 4723 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
b2a61371
SH
4724 qemu_bh_schedule(acb->bh);
4725}
4726
68485420
KW
4727static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4728 int64_t sector_num,
4729 QEMUIOVector *qiov,
4730 int nb_sectors,
d20d9b7c 4731 BdrvRequestFlags flags,
68485420
KW
4732 BlockDriverCompletionFunc *cb,
4733 void *opaque,
8c5873d6 4734 bool is_write)
68485420
KW
4735{
4736 Coroutine *co;
4737 BlockDriverAIOCBCoroutine *acb;
4738
d7331bed 4739 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
68485420
KW
4740 acb->req.sector = sector_num;
4741 acb->req.nb_sectors = nb_sectors;
4742 acb->req.qiov = qiov;
d20d9b7c 4743 acb->req.flags = flags;
68485420 4744 acb->is_write = is_write;
d318aea9 4745 acb->done = NULL;
68485420 4746
8c5873d6 4747 co = qemu_coroutine_create(bdrv_co_do_rw);
68485420
KW
4748 qemu_coroutine_enter(co, acb);
4749
4750 return &acb->common;
4751}
4752
07f07615 4753static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
b2e12bc6 4754{
07f07615
PB
4755 BlockDriverAIOCBCoroutine *acb = opaque;
4756 BlockDriverState *bs = acb->common.bs;
b2e12bc6 4757
07f07615 4758 acb->req.error = bdrv_co_flush(bs);
2572b37a 4759 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
b2e12bc6 4760 qemu_bh_schedule(acb->bh);
b2e12bc6
CH
4761}
4762
07f07615 4763BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
016f5cf6
AG
4764 BlockDriverCompletionFunc *cb, void *opaque)
4765{
07f07615 4766 trace_bdrv_aio_flush(bs, opaque);
016f5cf6 4767
07f07615
PB
4768 Coroutine *co;
4769 BlockDriverAIOCBCoroutine *acb;
016f5cf6 4770
d7331bed 4771 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
d318aea9
KW
4772 acb->done = NULL;
4773
07f07615
PB
4774 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4775 qemu_coroutine_enter(co, acb);
016f5cf6 4776
016f5cf6
AG
4777 return &acb->common;
4778}
4779
4265d620
PB
4780static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4781{
4782 BlockDriverAIOCBCoroutine *acb = opaque;
4783 BlockDriverState *bs = acb->common.bs;
4784
4785 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
2572b37a 4786 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
4265d620
PB
4787 qemu_bh_schedule(acb->bh);
4788}
4789
4790BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
4791 int64_t sector_num, int nb_sectors,
4792 BlockDriverCompletionFunc *cb, void *opaque)
4793{
4794 Coroutine *co;
4795 BlockDriverAIOCBCoroutine *acb;
4796
4797 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4798
d7331bed 4799 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
4265d620
PB
4800 acb->req.sector = sector_num;
4801 acb->req.nb_sectors = nb_sectors;
d318aea9 4802 acb->done = NULL;
4265d620
PB
4803 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
4804 qemu_coroutine_enter(co, acb);
4805
4806 return &acb->common;
4807}
4808
ea2384d3
FB
4809void bdrv_init(void)
4810{
5efa9d5a 4811 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 4812}
ce1a14dc 4813
eb852011
MA
4814void bdrv_init_with_whitelist(void)
4815{
4816 use_bdrv_whitelist = 1;
4817 bdrv_init();
4818}
4819
d7331bed 4820void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
c16b5a2c 4821 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 4822{
ce1a14dc
PB
4823 BlockDriverAIOCB *acb;
4824
d7331bed
SH
4825 acb = g_slice_alloc(aiocb_info->aiocb_size);
4826 acb->aiocb_info = aiocb_info;
ce1a14dc
PB
4827 acb->bs = bs;
4828 acb->cb = cb;
4829 acb->opaque = opaque;
4830 return acb;
4831}
4832
4833void qemu_aio_release(void *p)
4834{
d37c975f 4835 BlockDriverAIOCB *acb = p;
d7331bed 4836 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
ce1a14dc 4837}
19cb3738 4838
f9f05dc5
KW
4839/**************************************************************/
4840/* Coroutine block device emulation */
4841
4842typedef struct CoroutineIOCompletion {
4843 Coroutine *coroutine;
4844 int ret;
4845} CoroutineIOCompletion;
4846
4847static void bdrv_co_io_em_complete(void *opaque, int ret)
4848{
4849 CoroutineIOCompletion *co = opaque;
4850
4851 co->ret = ret;
4852 qemu_coroutine_enter(co->coroutine, NULL);
4853}
4854
4855static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
4856 int nb_sectors, QEMUIOVector *iov,
4857 bool is_write)
4858{
4859 CoroutineIOCompletion co = {
4860 .coroutine = qemu_coroutine_self(),
4861 };
4862 BlockDriverAIOCB *acb;
4863
4864 if (is_write) {
a652d160
SH
4865 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
4866 bdrv_co_io_em_complete, &co);
f9f05dc5 4867 } else {
a652d160
SH
4868 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
4869 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
4870 }
4871
59370aaa 4872 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
4873 if (!acb) {
4874 return -EIO;
4875 }
4876 qemu_coroutine_yield();
4877
4878 return co.ret;
4879}
4880
4881static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
4882 int64_t sector_num, int nb_sectors,
4883 QEMUIOVector *iov)
4884{
4885 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
4886}
4887
4888static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
4889 int64_t sector_num, int nb_sectors,
4890 QEMUIOVector *iov)
4891{
4892 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
4893}
4894
07f07615 4895static void coroutine_fn bdrv_flush_co_entry(void *opaque)
e7a8a783 4896{
07f07615
PB
4897 RwCo *rwco = opaque;
4898
4899 rwco->ret = bdrv_co_flush(rwco->bs);
4900}
4901
4902int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
4903{
eb489bb1
KW
4904 int ret;
4905
29cdb251 4906 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
07f07615 4907 return 0;
eb489bb1
KW
4908 }
4909
ca716364 4910 /* Write back cached data to the OS even with cache=unsafe */
bf736fe3 4911 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
eb489bb1
KW
4912 if (bs->drv->bdrv_co_flush_to_os) {
4913 ret = bs->drv->bdrv_co_flush_to_os(bs);
4914 if (ret < 0) {
4915 return ret;
4916 }
4917 }
4918
ca716364
KW
4919 /* But don't actually force it to the disk with cache=unsafe */
4920 if (bs->open_flags & BDRV_O_NO_FLUSH) {
d4c82329 4921 goto flush_parent;
ca716364
KW
4922 }
4923
bf736fe3 4924 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
eb489bb1 4925 if (bs->drv->bdrv_co_flush_to_disk) {
29cdb251 4926 ret = bs->drv->bdrv_co_flush_to_disk(bs);
07f07615
PB
4927 } else if (bs->drv->bdrv_aio_flush) {
4928 BlockDriverAIOCB *acb;
4929 CoroutineIOCompletion co = {
4930 .coroutine = qemu_coroutine_self(),
4931 };
4932
4933 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
4934 if (acb == NULL) {
29cdb251 4935 ret = -EIO;
07f07615
PB
4936 } else {
4937 qemu_coroutine_yield();
29cdb251 4938 ret = co.ret;
07f07615 4939 }
07f07615
PB
4940 } else {
4941 /*
4942 * Some block drivers always operate in either writethrough or unsafe
4943 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
4944 * know how the server works (because the behaviour is hardcoded or
4945 * depends on server-side configuration), so we can't ensure that
4946 * everything is safe on disk. Returning an error doesn't work because
4947 * that would break guests even if the server operates in writethrough
4948 * mode.
4949 *
4950 * Let's hope the user knows what he's doing.
4951 */
29cdb251 4952 ret = 0;
07f07615 4953 }
29cdb251
PB
4954 if (ret < 0) {
4955 return ret;
4956 }
4957
4958 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
4959 * in the case of cache=unsafe, so there are no useless flushes.
4960 */
d4c82329 4961flush_parent:
29cdb251 4962 return bdrv_co_flush(bs->file);
07f07615
PB
4963}
4964
5a8a30db 4965void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
0f15423c 4966{
5a8a30db
KW
4967 Error *local_err = NULL;
4968 int ret;
4969
3456a8d1
KW
4970 if (!bs->drv) {
4971 return;
4972 }
4973
4974 if (bs->drv->bdrv_invalidate_cache) {
5a8a30db 4975 bs->drv->bdrv_invalidate_cache(bs, &local_err);
3456a8d1 4976 } else if (bs->file) {
5a8a30db
KW
4977 bdrv_invalidate_cache(bs->file, &local_err);
4978 }
4979 if (local_err) {
4980 error_propagate(errp, local_err);
4981 return;
0f15423c 4982 }
3456a8d1 4983
5a8a30db
KW
4984 ret = refresh_total_sectors(bs, bs->total_sectors);
4985 if (ret < 0) {
4986 error_setg_errno(errp, -ret, "Could not refresh total sector count");
4987 return;
4988 }
0f15423c
AL
4989}
4990
5a8a30db 4991void bdrv_invalidate_cache_all(Error **errp)
0f15423c
AL
4992{
4993 BlockDriverState *bs;
5a8a30db 4994 Error *local_err = NULL;
0f15423c 4995
dc364f4c 4996 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
ed78cda3
SH
4997 AioContext *aio_context = bdrv_get_aio_context(bs);
4998
4999 aio_context_acquire(aio_context);
5a8a30db 5000 bdrv_invalidate_cache(bs, &local_err);
ed78cda3 5001 aio_context_release(aio_context);
5a8a30db
KW
5002 if (local_err) {
5003 error_propagate(errp, local_err);
5004 return;
5005 }
0f15423c
AL
5006 }
5007}
5008
07789269
BC
5009void bdrv_clear_incoming_migration_all(void)
5010{
5011 BlockDriverState *bs;
5012
dc364f4c 5013 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
ed78cda3
SH
5014 AioContext *aio_context = bdrv_get_aio_context(bs);
5015
5016 aio_context_acquire(aio_context);
07789269 5017 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
ed78cda3 5018 aio_context_release(aio_context);
07789269
BC
5019 }
5020}
5021
07f07615
PB
5022int bdrv_flush(BlockDriverState *bs)
5023{
5024 Coroutine *co;
5025 RwCo rwco = {
5026 .bs = bs,
5027 .ret = NOT_DONE,
e7a8a783 5028 };
e7a8a783 5029
07f07615
PB
5030 if (qemu_in_coroutine()) {
5031 /* Fast-path if already in coroutine context */
5032 bdrv_flush_co_entry(&rwco);
5033 } else {
2572b37a
SH
5034 AioContext *aio_context = bdrv_get_aio_context(bs);
5035
07f07615
PB
5036 co = qemu_coroutine_create(bdrv_flush_co_entry);
5037 qemu_coroutine_enter(co, &rwco);
5038 while (rwco.ret == NOT_DONE) {
2572b37a 5039 aio_poll(aio_context, true);
07f07615 5040 }
e7a8a783 5041 }
07f07615
PB
5042
5043 return rwco.ret;
e7a8a783
KW
5044}
5045
775aa8b6
KW
5046typedef struct DiscardCo {
5047 BlockDriverState *bs;
5048 int64_t sector_num;
5049 int nb_sectors;
5050 int ret;
5051} DiscardCo;
4265d620
PB
5052static void coroutine_fn bdrv_discard_co_entry(void *opaque)
5053{
775aa8b6 5054 DiscardCo *rwco = opaque;
4265d620
PB
5055
5056 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
5057}
5058
6f14da52
PL
5059/* if no limit is specified in the BlockLimits use a default
5060 * of 32768 512-byte sectors (16 MiB) per request.
5061 */
5062#define MAX_DISCARD_DEFAULT 32768
5063
4265d620
PB
5064int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
5065 int nb_sectors)
5066{
d51e9fe5
PB
5067 int max_discard;
5068
4265d620
PB
5069 if (!bs->drv) {
5070 return -ENOMEDIUM;
5071 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
5072 return -EIO;
5073 } else if (bs->read_only) {
5074 return -EROFS;
df702c9b
PB
5075 }
5076
e4654d2d 5077 bdrv_reset_dirty(bs, sector_num, nb_sectors);
df702c9b 5078
9e8f1835
PB
5079 /* Do nothing if disabled. */
5080 if (!(bs->open_flags & BDRV_O_UNMAP)) {
5081 return 0;
5082 }
5083
d51e9fe5
PB
5084 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
5085 return 0;
5086 }
6f14da52 5087
d51e9fe5
PB
5088 max_discard = bs->bl.max_discard ? bs->bl.max_discard : MAX_DISCARD_DEFAULT;
5089 while (nb_sectors > 0) {
5090 int ret;
5091 int num = nb_sectors;
6f14da52 5092
d51e9fe5
PB
5093 /* align request */
5094 if (bs->bl.discard_alignment &&
5095 num >= bs->bl.discard_alignment &&
5096 sector_num % bs->bl.discard_alignment) {
5097 if (num > bs->bl.discard_alignment) {
5098 num = bs->bl.discard_alignment;
6f14da52 5099 }
d51e9fe5
PB
5100 num -= sector_num % bs->bl.discard_alignment;
5101 }
6f14da52 5102
d51e9fe5
PB
5103 /* limit request size */
5104 if (num > max_discard) {
5105 num = max_discard;
5106 }
6f14da52 5107
d51e9fe5 5108 if (bs->drv->bdrv_co_discard) {
6f14da52 5109 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
d51e9fe5
PB
5110 } else {
5111 BlockDriverAIOCB *acb;
5112 CoroutineIOCompletion co = {
5113 .coroutine = qemu_coroutine_self(),
5114 };
5115
5116 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
5117 bdrv_co_io_em_complete, &co);
5118 if (acb == NULL) {
5119 return -EIO;
5120 } else {
5121 qemu_coroutine_yield();
5122 ret = co.ret;
6f14da52 5123 }
6f14da52 5124 }
7ce21016 5125 if (ret && ret != -ENOTSUP) {
d51e9fe5 5126 return ret;
4265d620 5127 }
d51e9fe5
PB
5128
5129 sector_num += num;
5130 nb_sectors -= num;
4265d620 5131 }
d51e9fe5 5132 return 0;
4265d620
PB
5133}
5134
5135int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
5136{
5137 Coroutine *co;
775aa8b6 5138 DiscardCo rwco = {
4265d620
PB
5139 .bs = bs,
5140 .sector_num = sector_num,
5141 .nb_sectors = nb_sectors,
5142 .ret = NOT_DONE,
5143 };
5144
5145 if (qemu_in_coroutine()) {
5146 /* Fast-path if already in coroutine context */
5147 bdrv_discard_co_entry(&rwco);
5148 } else {
2572b37a
SH
5149 AioContext *aio_context = bdrv_get_aio_context(bs);
5150
4265d620
PB
5151 co = qemu_coroutine_create(bdrv_discard_co_entry);
5152 qemu_coroutine_enter(co, &rwco);
5153 while (rwco.ret == NOT_DONE) {
2572b37a 5154 aio_poll(aio_context, true);
4265d620
PB
5155 }
5156 }
5157
5158 return rwco.ret;
5159}
5160
19cb3738
FB
5161/**************************************************************/
5162/* removable device support */
5163
5164/**
5165 * Return TRUE if the media is present
5166 */
5167int bdrv_is_inserted(BlockDriverState *bs)
5168{
5169 BlockDriver *drv = bs->drv;
a1aff5bf 5170
19cb3738
FB
5171 if (!drv)
5172 return 0;
5173 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
5174 return 1;
5175 return drv->bdrv_is_inserted(bs);
19cb3738
FB
5176}
5177
5178/**
8e49ca46
MA
5179 * Return whether the media changed since the last call to this
5180 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
5181 */
5182int bdrv_media_changed(BlockDriverState *bs)
5183{
5184 BlockDriver *drv = bs->drv;
19cb3738 5185
8e49ca46
MA
5186 if (drv && drv->bdrv_media_changed) {
5187 return drv->bdrv_media_changed(bs);
5188 }
5189 return -ENOTSUP;
19cb3738
FB
5190}
5191
5192/**
5193 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5194 */
f36f3949 5195void bdrv_eject(BlockDriverState *bs, bool eject_flag)
19cb3738
FB
5196{
5197 BlockDriver *drv = bs->drv;
19cb3738 5198
822e1cd1
MA
5199 if (drv && drv->bdrv_eject) {
5200 drv->bdrv_eject(bs, eject_flag);
19cb3738 5201 }
6f382ed2
LC
5202
5203 if (bs->device_name[0] != '\0') {
a5ee7bd4
WX
5204 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
5205 eject_flag, &error_abort);
6f382ed2 5206 }
19cb3738
FB
5207}
5208
19cb3738
FB
5209/**
5210 * Lock or unlock the media (if it is locked, the user won't be able
5211 * to eject it manually).
5212 */
025e849a 5213void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
5214{
5215 BlockDriver *drv = bs->drv;
5216
025e849a 5217 trace_bdrv_lock_medium(bs, locked);
b8c6d095 5218
025e849a
MA
5219 if (drv && drv->bdrv_lock_medium) {
5220 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
5221 }
5222}
985a03b0
TS
5223
5224/* needed for generic scsi interface */
5225
5226int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5227{
5228 BlockDriver *drv = bs->drv;
5229
5230 if (drv && drv->bdrv_ioctl)
5231 return drv->bdrv_ioctl(bs, req, buf);
5232 return -ENOTSUP;
5233}
7d780669 5234
221f715d
AL
5235BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
5236 unsigned long int req, void *buf,
5237 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 5238{
221f715d 5239 BlockDriver *drv = bs->drv;
7d780669 5240
221f715d
AL
5241 if (drv && drv->bdrv_aio_ioctl)
5242 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5243 return NULL;
7d780669 5244}
e268ca52 5245
1b7fd729 5246void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
7b6f9300 5247{
1b7fd729 5248 bs->guest_block_size = align;
7b6f9300 5249}
7cd1e32a 5250
e268ca52
AL
5251void *qemu_blockalign(BlockDriverState *bs, size_t size)
5252{
339064d5 5253 return qemu_memalign(bdrv_opt_mem_align(bs), size);
e268ca52 5254}
7cd1e32a 5255
c53b1c51
SH
5256/*
5257 * Check if all memory in this vector is sector aligned.
5258 */
5259bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5260{
5261 int i;
339064d5 5262 size_t alignment = bdrv_opt_mem_align(bs);
c53b1c51
SH
5263
5264 for (i = 0; i < qiov->niov; i++) {
339064d5 5265 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
c53b1c51 5266 return false;
1ff735bd 5267 }
339064d5 5268 if (qiov->iov[i].iov_len % alignment) {
1ff735bd 5269 return false;
c53b1c51
SH
5270 }
5271 }
5272
5273 return true;
5274}
5275
b8afb520
FZ
5276BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
5277 Error **errp)
7cd1e32a
LS
5278{
5279 int64_t bitmap_size;
e4654d2d 5280 BdrvDirtyBitmap *bitmap;
a55eb92c 5281
50717e94
PB
5282 assert((granularity & (granularity - 1)) == 0);
5283
e4654d2d
FZ
5284 granularity >>= BDRV_SECTOR_BITS;
5285 assert(granularity);
b8afb520
FZ
5286 bitmap_size = bdrv_getlength(bs);
5287 if (bitmap_size < 0) {
5288 error_setg_errno(errp, -bitmap_size, "could not get length of device");
5289 errno = -bitmap_size;
5290 return NULL;
5291 }
5292 bitmap_size >>= BDRV_SECTOR_BITS;
e4654d2d
FZ
5293 bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
5294 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
5295 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5296 return bitmap;
5297}
5298
5299void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5300{
5301 BdrvDirtyBitmap *bm, *next;
5302 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5303 if (bm == bitmap) {
5304 QLIST_REMOVE(bitmap, list);
5305 hbitmap_free(bitmap->bitmap);
5306 g_free(bitmap);
5307 return;
a55eb92c 5308 }
7cd1e32a
LS
5309 }
5310}
5311
21b56835
FZ
5312BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5313{
5314 BdrvDirtyBitmap *bm;
5315 BlockDirtyInfoList *list = NULL;
5316 BlockDirtyInfoList **plist = &list;
5317
5318 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
5319 BlockDirtyInfo *info = g_malloc0(sizeof(BlockDirtyInfo));
5320 BlockDirtyInfoList *entry = g_malloc0(sizeof(BlockDirtyInfoList));
5321 info->count = bdrv_get_dirty_count(bs, bm);
5322 info->granularity =
5323 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
5324 entry->value = info;
5325 *plist = entry;
5326 plist = &entry->next;
5327 }
5328
5329 return list;
5330}
5331
e4654d2d 5332int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
7cd1e32a 5333{
e4654d2d
FZ
5334 if (bitmap) {
5335 return hbitmap_get(bitmap->bitmap, sector);
7cd1e32a
LS
5336 } else {
5337 return 0;
5338 }
5339}
5340
e4654d2d
FZ
5341void bdrv_dirty_iter_init(BlockDriverState *bs,
5342 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
1755da16 5343{
e4654d2d 5344 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
1755da16
PB
5345}
5346
5347void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5348 int nr_sectors)
5349{
e4654d2d
FZ
5350 BdrvDirtyBitmap *bitmap;
5351 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5352 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5353 }
1755da16
PB
5354}
5355
e4654d2d 5356void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
7cd1e32a 5357{
e4654d2d
FZ
5358 BdrvDirtyBitmap *bitmap;
5359 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5360 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5361 }
7cd1e32a 5362}
aaa0eb75 5363
e4654d2d 5364int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
aaa0eb75 5365{
e4654d2d 5366 return hbitmap_count(bitmap->bitmap);
aaa0eb75 5367}
f88e1a42 5368
9fcb0251
FZ
5369/* Get a reference to bs */
5370void bdrv_ref(BlockDriverState *bs)
5371{
5372 bs->refcnt++;
5373}
5374
5375/* Release a previously grabbed reference to bs.
5376 * If after releasing, reference count is zero, the BlockDriverState is
5377 * deleted. */
5378void bdrv_unref(BlockDriverState *bs)
5379{
5380 assert(bs->refcnt > 0);
5381 if (--bs->refcnt == 0) {
5382 bdrv_delete(bs);
5383 }
5384}
5385
fbe40ff7
FZ
5386struct BdrvOpBlocker {
5387 Error *reason;
5388 QLIST_ENTRY(BdrvOpBlocker) list;
5389};
5390
5391bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5392{
5393 BdrvOpBlocker *blocker;
5394 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5395 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5396 blocker = QLIST_FIRST(&bs->op_blockers[op]);
5397 if (errp) {
5398 error_setg(errp, "Device '%s' is busy: %s",
5399 bs->device_name, error_get_pretty(blocker->reason));
5400 }
5401 return true;
5402 }
5403 return false;
5404}
5405
5406void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5407{
5408 BdrvOpBlocker *blocker;
5409 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5410
5411 blocker = g_malloc0(sizeof(BdrvOpBlocker));
5412 blocker->reason = reason;
5413 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5414}
5415
5416void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5417{
5418 BdrvOpBlocker *blocker, *next;
5419 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5420 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5421 if (blocker->reason == reason) {
5422 QLIST_REMOVE(blocker, list);
5423 g_free(blocker);
5424 }
5425 }
5426}
5427
5428void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5429{
5430 int i;
5431 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5432 bdrv_op_block(bs, i, reason);
5433 }
5434}
5435
5436void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5437{
5438 int i;
5439 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5440 bdrv_op_unblock(bs, i, reason);
5441 }
5442}
5443
5444bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5445{
5446 int i;
5447
5448 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5449 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5450 return false;
5451 }
5452 }
5453 return true;
5454}
5455
28a7282a
LC
5456void bdrv_iostatus_enable(BlockDriverState *bs)
5457{
d6bf279e 5458 bs->iostatus_enabled = true;
58e21ef5 5459 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
5460}
5461
5462/* The I/O status is only enabled if the drive explicitly
5463 * enables it _and_ the VM is configured to stop on errors */
5464bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5465{
d6bf279e 5466 return (bs->iostatus_enabled &&
92aa5c6d
PB
5467 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5468 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5469 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
28a7282a
LC
5470}
5471
5472void bdrv_iostatus_disable(BlockDriverState *bs)
5473{
d6bf279e 5474 bs->iostatus_enabled = false;
28a7282a
LC
5475}
5476
5477void bdrv_iostatus_reset(BlockDriverState *bs)
5478{
5479 if (bdrv_iostatus_is_enabled(bs)) {
58e21ef5 5480 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3bd293c3
PB
5481 if (bs->job) {
5482 block_job_iostatus_reset(bs->job);
5483 }
28a7282a
LC
5484 }
5485}
5486
28a7282a
LC
5487void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5488{
3e1caa5f
PB
5489 assert(bdrv_iostatus_is_enabled(bs));
5490 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
58e21ef5
LC
5491 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5492 BLOCK_DEVICE_IO_STATUS_FAILED;
28a7282a
LC
5493 }
5494}
5495
a597e79c
CH
5496void
5497bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
5498 enum BlockAcctType type)
5499{
5500 assert(type < BDRV_MAX_IOTYPE);
5501
5502 cookie->bytes = bytes;
c488c7f6 5503 cookie->start_time_ns = get_clock();
a597e79c
CH
5504 cookie->type = type;
5505}
5506
5507void
5508bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
5509{
5510 assert(cookie->type < BDRV_MAX_IOTYPE);
5511
5512 bs->nr_bytes[cookie->type] += cookie->bytes;
5513 bs->nr_ops[cookie->type]++;
c488c7f6 5514 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
5515}
5516
d92ada22
LC
5517void bdrv_img_create(const char *filename, const char *fmt,
5518 const char *base_filename, const char *base_fmt,
f382d43a
MR
5519 char *options, uint64_t img_size, int flags,
5520 Error **errp, bool quiet)
f88e1a42 5521{
83d0521a
CL
5522 QemuOptsList *create_opts = NULL;
5523 QemuOpts *opts = NULL;
5524 const char *backing_fmt, *backing_file;
5525 int64_t size;
f88e1a42 5526 BlockDriver *drv, *proto_drv;
96df67d1 5527 BlockDriver *backing_drv = NULL;
cc84d90f 5528 Error *local_err = NULL;
f88e1a42
JS
5529 int ret = 0;
5530
5531 /* Find driver and parse its options */
5532 drv = bdrv_find_format(fmt);
5533 if (!drv) {
71c79813 5534 error_setg(errp, "Unknown file format '%s'", fmt);
d92ada22 5535 return;
f88e1a42
JS
5536 }
5537
98289620 5538 proto_drv = bdrv_find_protocol(filename, true);
f88e1a42 5539 if (!proto_drv) {
71c79813 5540 error_setg(errp, "Unknown protocol '%s'", filename);
d92ada22 5541 return;
f88e1a42
JS
5542 }
5543
c282e1fd
CL
5544 create_opts = qemu_opts_append(create_opts, drv->create_opts);
5545 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
f88e1a42
JS
5546
5547 /* Create parameter list with default values */
83d0521a
CL
5548 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5549 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size);
f88e1a42
JS
5550
5551 /* Parse -o options */
5552 if (options) {
83d0521a
CL
5553 if (qemu_opts_do_parse(opts, options, NULL) != 0) {
5554 error_setg(errp, "Invalid options for file format '%s'", fmt);
f88e1a42
JS
5555 goto out;
5556 }
5557 }
5558
5559 if (base_filename) {
83d0521a 5560 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename)) {
71c79813
LC
5561 error_setg(errp, "Backing file not supported for file format '%s'",
5562 fmt);
f88e1a42
JS
5563 goto out;
5564 }
5565 }
5566
5567 if (base_fmt) {
83d0521a 5568 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt)) {
71c79813
LC
5569 error_setg(errp, "Backing file format not supported for file "
5570 "format '%s'", fmt);
f88e1a42
JS
5571 goto out;
5572 }
5573 }
5574
83d0521a
CL
5575 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5576 if (backing_file) {
5577 if (!strcmp(filename, backing_file)) {
71c79813
LC
5578 error_setg(errp, "Error: Trying to create an image with the "
5579 "same filename as the backing file");
792da93a
JS
5580 goto out;
5581 }
5582 }
5583
83d0521a
CL
5584 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5585 if (backing_fmt) {
5586 backing_drv = bdrv_find_format(backing_fmt);
96df67d1 5587 if (!backing_drv) {
71c79813 5588 error_setg(errp, "Unknown backing file format '%s'",
83d0521a 5589 backing_fmt);
f88e1a42
JS
5590 goto out;
5591 }
5592 }
5593
5594 // The size for the image must always be specified, with one exception:
5595 // If we are using a backing file, we can obtain the size from there
83d0521a
CL
5596 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5597 if (size == -1) {
5598 if (backing_file) {
66f6b814 5599 BlockDriverState *bs;
f88e1a42 5600 uint64_t size;
63090dac
PB
5601 int back_flags;
5602
5603 /* backing files always opened read-only */
5604 back_flags =
5605 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
f88e1a42 5606
f67503e5 5607 bs = NULL;
83d0521a 5608 ret = bdrv_open(&bs, backing_file, NULL, NULL, back_flags,
cc84d90f 5609 backing_drv, &local_err);
f88e1a42 5610 if (ret < 0) {
cc84d90f 5611 error_setg_errno(errp, -ret, "Could not open '%s': %s",
83d0521a 5612 backing_file,
cc84d90f
HR
5613 error_get_pretty(local_err));
5614 error_free(local_err);
5615 local_err = NULL;
f88e1a42
JS
5616 goto out;
5617 }
5618 bdrv_get_geometry(bs, &size);
5619 size *= 512;
5620
83d0521a 5621 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size);
66f6b814
HR
5622
5623 bdrv_unref(bs);
f88e1a42 5624 } else {
71c79813 5625 error_setg(errp, "Image creation needs a size parameter");
f88e1a42
JS
5626 goto out;
5627 }
5628 }
5629
f382d43a
MR
5630 if (!quiet) {
5631 printf("Formatting '%s', fmt=%s ", filename, fmt);
83d0521a 5632 qemu_opts_print(opts);
f382d43a
MR
5633 puts("");
5634 }
83d0521a 5635
c282e1fd 5636 ret = bdrv_create(drv, filename, opts, &local_err);
83d0521a 5637
cc84d90f
HR
5638 if (ret == -EFBIG) {
5639 /* This is generally a better message than whatever the driver would
5640 * deliver (especially because of the cluster_size_hint), since that
5641 * is most probably not much different from "image too large". */
5642 const char *cluster_size_hint = "";
83d0521a 5643 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
cc84d90f 5644 cluster_size_hint = " (try using a larger cluster size)";
f88e1a42 5645 }
cc84d90f
HR
5646 error_setg(errp, "The image size is too large for file format '%s'"
5647 "%s", fmt, cluster_size_hint);
5648 error_free(local_err);
5649 local_err = NULL;
f88e1a42
JS
5650 }
5651
5652out:
83d0521a
CL
5653 qemu_opts_del(opts);
5654 qemu_opts_free(create_opts);
84d18f06 5655 if (local_err) {
cc84d90f
HR
5656 error_propagate(errp, local_err);
5657 }
f88e1a42 5658}
85d126f3
SH
5659
5660AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5661{
dcd04228
SH
5662 return bs->aio_context;
5663}
5664
5665void bdrv_detach_aio_context(BlockDriverState *bs)
5666{
5667 if (!bs->drv) {
5668 return;
5669 }
5670
13af91eb
SH
5671 if (bs->io_limits_enabled) {
5672 throttle_detach_aio_context(&bs->throttle_state);
5673 }
dcd04228
SH
5674 if (bs->drv->bdrv_detach_aio_context) {
5675 bs->drv->bdrv_detach_aio_context(bs);
5676 }
5677 if (bs->file) {
5678 bdrv_detach_aio_context(bs->file);
5679 }
5680 if (bs->backing_hd) {
5681 bdrv_detach_aio_context(bs->backing_hd);
5682 }
5683
5684 bs->aio_context = NULL;
5685}
5686
5687void bdrv_attach_aio_context(BlockDriverState *bs,
5688 AioContext *new_context)
5689{
5690 if (!bs->drv) {
5691 return;
5692 }
5693
5694 bs->aio_context = new_context;
5695
5696 if (bs->backing_hd) {
5697 bdrv_attach_aio_context(bs->backing_hd, new_context);
5698 }
5699 if (bs->file) {
5700 bdrv_attach_aio_context(bs->file, new_context);
5701 }
5702 if (bs->drv->bdrv_attach_aio_context) {
5703 bs->drv->bdrv_attach_aio_context(bs, new_context);
5704 }
13af91eb
SH
5705 if (bs->io_limits_enabled) {
5706 throttle_attach_aio_context(&bs->throttle_state, new_context);
5707 }
dcd04228
SH
5708}
5709
5710void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
5711{
5712 bdrv_drain_all(); /* ensure there are no in-flight requests */
5713
5714 bdrv_detach_aio_context(bs);
5715
5716 /* This function executes in the old AioContext so acquire the new one in
5717 * case it runs in a different thread.
5718 */
5719 aio_context_acquire(new_context);
5720 bdrv_attach_aio_context(bs, new_context);
5721 aio_context_release(new_context);
85d126f3 5722}
d616b224
SH
5723
5724void bdrv_add_before_write_notifier(BlockDriverState *bs,
5725 NotifierWithReturn *notifier)
5726{
5727 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5728}
6f176b48 5729
c282e1fd 5730int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts)
6f176b48 5731{
c282e1fd 5732 if (!bs->drv->bdrv_amend_options) {
6f176b48
HR
5733 return -ENOTSUP;
5734 }
c282e1fd 5735 return bs->drv->bdrv_amend_options(bs, opts);
6f176b48 5736}
f6186f49 5737
b5042a36
BC
5738/* This function will be called by the bdrv_recurse_is_first_non_filter method
5739 * of block filter and by bdrv_is_first_non_filter.
5740 * It is used to test if the given bs is the candidate or recurse more in the
5741 * node graph.
212a5a8f 5742 */
b5042a36 5743bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
212a5a8f 5744 BlockDriverState *candidate)
f6186f49 5745{
b5042a36
BC
5746 /* return false if basic checks fails */
5747 if (!bs || !bs->drv) {
212a5a8f 5748 return false;
f6186f49
BC
5749 }
5750
b5042a36
BC
5751 /* the code reached a non block filter driver -> check if the bs is
5752 * the same as the candidate. It's the recursion termination condition.
5753 */
5754 if (!bs->drv->is_filter) {
5755 return bs == candidate;
212a5a8f 5756 }
b5042a36 5757 /* Down this path the driver is a block filter driver */
212a5a8f 5758
b5042a36
BC
5759 /* If the block filter recursion method is defined use it to recurse down
5760 * the node graph.
5761 */
5762 if (bs->drv->bdrv_recurse_is_first_non_filter) {
212a5a8f 5763 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
f6186f49
BC
5764 }
5765
b5042a36
BC
5766 /* the driver is a block filter but don't allow to recurse -> return false
5767 */
5768 return false;
f6186f49
BC
5769}
5770
212a5a8f
BC
5771/* This function checks if the candidate is the first non filter bs down it's
5772 * bs chain. Since we don't have pointers to parents it explore all bs chains
5773 * from the top. Some filters can choose not to pass down the recursion.
5774 */
5775bool bdrv_is_first_non_filter(BlockDriverState *candidate)
f6186f49 5776{
212a5a8f
BC
5777 BlockDriverState *bs;
5778
5779 /* walk down the bs forest recursively */
5780 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5781 bool perm;
5782
b5042a36 5783 /* try to recurse in this top level bs */
e6dc8a1f 5784 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
212a5a8f
BC
5785
5786 /* candidate is the first non filter */
5787 if (perm) {
5788 return true;
5789 }
5790 }
5791
5792 return false;
f6186f49 5793}
09158f00
BC
5794
5795BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
5796{
5797 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
5798 if (!to_replace_bs) {
5799 error_setg(errp, "Node name '%s' not found", node_name);
5800 return NULL;
5801 }
5802
5803 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
5804 return NULL;
5805 }
5806
5807 /* We don't want arbitrary node of the BDS chain to be replaced only the top
5808 * most non filter in order to prevent data corruption.
5809 * Another benefit is that this tests exclude backing files which are
5810 * blocked by the backing blockers.
5811 */
5812 if (!bdrv_is_first_non_filter(to_replace_bs)) {
5813 error_setg(errp, "Only top most non filter can be replaced");
5814 return NULL;
5815 }
5816
5817 return to_replace_bs;
5818}
448ad91d
ML
5819
5820void bdrv_io_plug(BlockDriverState *bs)
5821{
5822 BlockDriver *drv = bs->drv;
5823 if (drv && drv->bdrv_io_plug) {
5824 drv->bdrv_io_plug(bs);
5825 } else if (bs->file) {
5826 bdrv_io_plug(bs->file);
5827 }
5828}
5829
5830void bdrv_io_unplug(BlockDriverState *bs)
5831{
5832 BlockDriver *drv = bs->drv;
5833 if (drv && drv->bdrv_io_unplug) {
5834 drv->bdrv_io_unplug(bs);
5835 } else if (bs->file) {
5836 bdrv_io_unplug(bs->file);
5837 }
5838}
5839
5840void bdrv_flush_io_queue(BlockDriverState *bs)
5841{
5842 BlockDriver *drv = bs->drv;
5843 if (drv && drv->bdrv_flush_io_queue) {
5844 drv->bdrv_flush_io_queue(bs);
5845 } else if (bs->file) {
5846 bdrv_flush_io_queue(bs->file);
5847 }
5848}