]> git.proxmox.com Git - mirror_qemu.git/blame - block.c
block: Add list of children to BlockDriverState
[mirror_qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
737e150e
PB
27#include "block/block_int.h"
28#include "block/blockjob.h"
1de7afc9 29#include "qemu/module.h"
7b1b5d19 30#include "qapi/qmp/qjson.h"
bfb197e0 31#include "sysemu/block-backend.h"
9c17d615 32#include "sysemu/sysemu.h"
1de7afc9 33#include "qemu/notify.h"
737e150e 34#include "block/coroutine.h"
c13163fb 35#include "block/qapi.h"
b2023818 36#include "qmp-commands.h"
1de7afc9 37#include "qemu/timer.h"
a5ee7bd4 38#include "qapi-event.h"
fc01f7e7 39
71e72a19 40#ifdef CONFIG_BSD
7674e7bf
FB
41#include <sys/types.h>
42#include <sys/stat.h>
43#include <sys/ioctl.h>
72cf2d4f 44#include <sys/queue.h>
c5e97233 45#ifndef __DragonFly__
7674e7bf
FB
46#include <sys/disk.h>
47#endif
c5e97233 48#endif
7674e7bf 49
49dc768d
AL
50#ifdef _WIN32
51#include <windows.h>
52#endif
53
9bd2b08f
JS
54/**
55 * A BdrvDirtyBitmap can be in three possible states:
56 * (1) successor is NULL and disabled is false: full r/w mode
57 * (2) successor is NULL and disabled is true: read only mode ("disabled")
58 * (3) successor is set: frozen mode.
59 * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
60 * or enabled. A frozen bitmap can only abdicate() or reclaim().
61 */
e4654d2d 62struct BdrvDirtyBitmap {
aa0c7ca5
JS
63 HBitmap *bitmap; /* Dirty sector bitmap implementation */
64 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
65 char *name; /* Optional non-empty unique ID */
66 int64_t size; /* Size of the bitmap (Number of sectors) */
67 bool disabled; /* Bitmap is read-only */
e4654d2d
FZ
68 QLIST_ENTRY(BdrvDirtyBitmap) list;
69};
70
1c9805a3
SH
71#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
72
1b7bdbc1
SH
73static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
74 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 75
dc364f4c
BC
76static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
77 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
78
8a22f02a
SH
79static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 81
f3930ed0
KW
82static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
83 const char *reference, QDict *options, int flags,
84 BlockDriverState *parent,
85 const BdrvChildRole *child_role,
86 BlockDriver *drv, Error **errp);
87
ce1ffea8 88static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
eb852011
MA
89/* If non-zero, use only whitelisted block drivers */
90static int use_bdrv_whitelist;
91
9e0b22f4
SH
92#ifdef _WIN32
93static int is_windows_drive_prefix(const char *filename)
94{
95 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
96 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
97 filename[1] == ':');
98}
99
100int is_windows_drive(const char *filename)
101{
102 if (is_windows_drive_prefix(filename) &&
103 filename[2] == '\0')
104 return 1;
105 if (strstart(filename, "\\\\.\\", NULL) ||
106 strstart(filename, "//./", NULL))
107 return 1;
108 return 0;
109}
110#endif
111
339064d5
KW
112size_t bdrv_opt_mem_align(BlockDriverState *bs)
113{
114 if (!bs || !bs->drv) {
459b4e66
DL
115 /* page size or 4k (hdd sector size) should be on the safe side */
116 return MAX(4096, getpagesize());
339064d5
KW
117 }
118
119 return bs->bl.opt_mem_alignment;
120}
121
4196d2f0
DL
122size_t bdrv_min_mem_align(BlockDriverState *bs)
123{
124 if (!bs || !bs->drv) {
459b4e66
DL
125 /* page size or 4k (hdd sector size) should be on the safe side */
126 return MAX(4096, getpagesize());
4196d2f0
DL
127 }
128
129 return bs->bl.min_mem_alignment;
130}
131
9e0b22f4 132/* check if the path starts with "<protocol>:" */
5c98415b 133int path_has_protocol(const char *path)
9e0b22f4 134{
947995c0
PB
135 const char *p;
136
9e0b22f4
SH
137#ifdef _WIN32
138 if (is_windows_drive(path) ||
139 is_windows_drive_prefix(path)) {
140 return 0;
141 }
947995c0
PB
142 p = path + strcspn(path, ":/\\");
143#else
144 p = path + strcspn(path, ":/");
9e0b22f4
SH
145#endif
146
947995c0 147 return *p == ':';
9e0b22f4
SH
148}
149
83f64091 150int path_is_absolute(const char *path)
3b0d4f61 151{
21664424
FB
152#ifdef _WIN32
153 /* specific case for names like: "\\.\d:" */
f53f4da9 154 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
21664424 155 return 1;
f53f4da9
PB
156 }
157 return (*path == '/' || *path == '\\');
3b9f94e1 158#else
f53f4da9 159 return (*path == '/');
3b9f94e1 160#endif
3b0d4f61
FB
161}
162
83f64091
FB
163/* if filename is absolute, just copy it to dest. Otherwise, build a
164 path to it by considering it is relative to base_path. URL are
165 supported. */
166void path_combine(char *dest, int dest_size,
167 const char *base_path,
168 const char *filename)
3b0d4f61 169{
83f64091
FB
170 const char *p, *p1;
171 int len;
172
173 if (dest_size <= 0)
174 return;
175 if (path_is_absolute(filename)) {
176 pstrcpy(dest, dest_size, filename);
177 } else {
178 p = strchr(base_path, ':');
179 if (p)
180 p++;
181 else
182 p = base_path;
3b9f94e1
FB
183 p1 = strrchr(base_path, '/');
184#ifdef _WIN32
185 {
186 const char *p2;
187 p2 = strrchr(base_path, '\\');
188 if (!p1 || p2 > p1)
189 p1 = p2;
190 }
191#endif
83f64091
FB
192 if (p1)
193 p1++;
194 else
195 p1 = base_path;
196 if (p1 > p)
197 p = p1;
198 len = p - base_path;
199 if (len > dest_size - 1)
200 len = dest_size - 1;
201 memcpy(dest, base_path, len);
202 dest[len] = '\0';
203 pstrcat(dest, dest_size, filename);
3b0d4f61 204 }
3b0d4f61
FB
205}
206
0a82855a
HR
207void bdrv_get_full_backing_filename_from_filename(const char *backed,
208 const char *backing,
9f07429e
HR
209 char *dest, size_t sz,
210 Error **errp)
dc5a1371 211{
9f07429e
HR
212 if (backing[0] == '\0' || path_has_protocol(backing) ||
213 path_is_absolute(backing))
214 {
0a82855a 215 pstrcpy(dest, sz, backing);
9f07429e
HR
216 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
217 error_setg(errp, "Cannot use relative backing file names for '%s'",
218 backed);
dc5a1371 219 } else {
0a82855a 220 path_combine(dest, sz, backed, backing);
dc5a1371
PB
221 }
222}
223
9f07429e
HR
224void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
225 Error **errp)
0a82855a 226{
9f07429e
HR
227 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
228
229 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
230 dest, sz, errp);
0a82855a
HR
231}
232
0eb7217e
SH
233void bdrv_register(BlockDriver *bdrv)
234{
235 bdrv_setup_io_funcs(bdrv);
b2e12bc6 236
8a22f02a 237 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 238}
b338082b 239
7f06d47e 240BlockDriverState *bdrv_new_root(void)
b338082b 241{
7f06d47e 242 BlockDriverState *bs = bdrv_new();
e4e9986b 243
e4e9986b 244 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
e4e9986b
MA
245 return bs;
246}
247
248BlockDriverState *bdrv_new(void)
249{
250 BlockDriverState *bs;
251 int i;
252
5839e53b 253 bs = g_new0(BlockDriverState, 1);
e4654d2d 254 QLIST_INIT(&bs->dirty_bitmaps);
fbe40ff7
FZ
255 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
256 QLIST_INIT(&bs->op_blockers[i]);
257 }
28a7282a 258 bdrv_iostatus_disable(bs);
d7d512f6 259 notifier_list_init(&bs->close_notifiers);
d616b224 260 notifier_with_return_list_init(&bs->before_write_notifiers);
cc0681c4
BC
261 qemu_co_queue_init(&bs->throttled_reqs[0]);
262 qemu_co_queue_init(&bs->throttled_reqs[1]);
9fcb0251 263 bs->refcnt = 1;
dcd04228 264 bs->aio_context = qemu_get_aio_context();
d7d512f6 265
b338082b
FB
266 return bs;
267}
268
d7d512f6
PB
269void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
270{
271 notifier_list_add(&bs->close_notifiers, notify);
272}
273
ea2384d3
FB
274BlockDriver *bdrv_find_format(const char *format_name)
275{
276 BlockDriver *drv1;
8a22f02a
SH
277 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
278 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 279 return drv1;
8a22f02a 280 }
ea2384d3
FB
281 }
282 return NULL;
283}
284
b64ec4e4 285static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
eb852011 286{
b64ec4e4
FZ
287 static const char *whitelist_rw[] = {
288 CONFIG_BDRV_RW_WHITELIST
289 };
290 static const char *whitelist_ro[] = {
291 CONFIG_BDRV_RO_WHITELIST
eb852011
MA
292 };
293 const char **p;
294
b64ec4e4 295 if (!whitelist_rw[0] && !whitelist_ro[0]) {
eb852011 296 return 1; /* no whitelist, anything goes */
b64ec4e4 297 }
eb852011 298
b64ec4e4 299 for (p = whitelist_rw; *p; p++) {
eb852011
MA
300 if (!strcmp(drv->format_name, *p)) {
301 return 1;
302 }
303 }
b64ec4e4
FZ
304 if (read_only) {
305 for (p = whitelist_ro; *p; p++) {
306 if (!strcmp(drv->format_name, *p)) {
307 return 1;
308 }
309 }
310 }
eb852011
MA
311 return 0;
312}
313
b64ec4e4
FZ
314BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
315 bool read_only)
eb852011
MA
316{
317 BlockDriver *drv = bdrv_find_format(format_name);
b64ec4e4 318 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
eb852011
MA
319}
320
5b7e1542
ZYW
321typedef struct CreateCo {
322 BlockDriver *drv;
323 char *filename;
83d0521a 324 QemuOpts *opts;
5b7e1542 325 int ret;
cc84d90f 326 Error *err;
5b7e1542
ZYW
327} CreateCo;
328
329static void coroutine_fn bdrv_create_co_entry(void *opaque)
330{
cc84d90f
HR
331 Error *local_err = NULL;
332 int ret;
333
5b7e1542
ZYW
334 CreateCo *cco = opaque;
335 assert(cco->drv);
336
c282e1fd 337 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
84d18f06 338 if (local_err) {
cc84d90f
HR
339 error_propagate(&cco->err, local_err);
340 }
341 cco->ret = ret;
5b7e1542
ZYW
342}
343
0e7e1989 344int bdrv_create(BlockDriver *drv, const char* filename,
83d0521a 345 QemuOpts *opts, Error **errp)
ea2384d3 346{
5b7e1542
ZYW
347 int ret;
348
349 Coroutine *co;
350 CreateCo cco = {
351 .drv = drv,
352 .filename = g_strdup(filename),
83d0521a 353 .opts = opts,
5b7e1542 354 .ret = NOT_DONE,
cc84d90f 355 .err = NULL,
5b7e1542
ZYW
356 };
357
c282e1fd 358 if (!drv->bdrv_create) {
cc84d90f 359 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
80168bff
LC
360 ret = -ENOTSUP;
361 goto out;
5b7e1542
ZYW
362 }
363
364 if (qemu_in_coroutine()) {
365 /* Fast-path if already in coroutine context */
366 bdrv_create_co_entry(&cco);
367 } else {
368 co = qemu_coroutine_create(bdrv_create_co_entry);
369 qemu_coroutine_enter(co, &cco);
370 while (cco.ret == NOT_DONE) {
b47ec2c4 371 aio_poll(qemu_get_aio_context(), true);
5b7e1542
ZYW
372 }
373 }
374
375 ret = cco.ret;
cc84d90f 376 if (ret < 0) {
84d18f06 377 if (cco.err) {
cc84d90f
HR
378 error_propagate(errp, cco.err);
379 } else {
380 error_setg_errno(errp, -ret, "Could not create image");
381 }
382 }
0e7e1989 383
80168bff
LC
384out:
385 g_free(cco.filename);
5b7e1542 386 return ret;
ea2384d3
FB
387}
388
c282e1fd 389int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
84a12e66
CH
390{
391 BlockDriver *drv;
cc84d90f
HR
392 Error *local_err = NULL;
393 int ret;
84a12e66 394
b65a5e12 395 drv = bdrv_find_protocol(filename, true, errp);
84a12e66 396 if (drv == NULL) {
16905d71 397 return -ENOENT;
84a12e66
CH
398 }
399
c282e1fd 400 ret = bdrv_create(drv, filename, opts, &local_err);
84d18f06 401 if (local_err) {
cc84d90f
HR
402 error_propagate(errp, local_err);
403 }
404 return ret;
84a12e66
CH
405}
406
892b7de8
ET
407/**
408 * Try to get @bs's logical and physical block size.
409 * On success, store them in @bsz struct and return 0.
410 * On failure return -errno.
411 * @bs must not be empty.
412 */
413int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
414{
415 BlockDriver *drv = bs->drv;
416
417 if (drv && drv->bdrv_probe_blocksizes) {
418 return drv->bdrv_probe_blocksizes(bs, bsz);
419 }
420
421 return -ENOTSUP;
422}
423
424/**
425 * Try to get @bs's geometry (cyls, heads, sectors).
426 * On success, store them in @geo struct and return 0.
427 * On failure return -errno.
428 * @bs must not be empty.
429 */
430int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
431{
432 BlockDriver *drv = bs->drv;
433
434 if (drv && drv->bdrv_probe_geometry) {
435 return drv->bdrv_probe_geometry(bs, geo);
436 }
437
438 return -ENOTSUP;
439}
440
eba25057
JM
441/*
442 * Create a uniquely-named empty temporary file.
443 * Return 0 upon success, otherwise a negative errno value.
444 */
445int get_tmp_filename(char *filename, int size)
d5249393 446{
eba25057 447#ifdef _WIN32
3b9f94e1 448 char temp_dir[MAX_PATH];
eba25057
JM
449 /* GetTempFileName requires that its output buffer (4th param)
450 have length MAX_PATH or greater. */
451 assert(size >= MAX_PATH);
452 return (GetTempPath(MAX_PATH, temp_dir)
453 && GetTempFileName(temp_dir, "qem", 0, filename)
454 ? 0 : -GetLastError());
d5249393 455#else
67b915a5 456 int fd;
7ccfb2eb 457 const char *tmpdir;
0badc1ee 458 tmpdir = getenv("TMPDIR");
69bef793
AS
459 if (!tmpdir) {
460 tmpdir = "/var/tmp";
461 }
eba25057
JM
462 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
463 return -EOVERFLOW;
464 }
ea2384d3 465 fd = mkstemp(filename);
fe235a06
DH
466 if (fd < 0) {
467 return -errno;
468 }
469 if (close(fd) != 0) {
470 unlink(filename);
eba25057
JM
471 return -errno;
472 }
473 return 0;
d5249393 474#endif
eba25057 475}
fc01f7e7 476
84a12e66
CH
477/*
478 * Detect host devices. By convention, /dev/cdrom[N] is always
479 * recognized as a host CDROM.
480 */
481static BlockDriver *find_hdev_driver(const char *filename)
482{
483 int score_max = 0, score;
484 BlockDriver *drv = NULL, *d;
485
486 QLIST_FOREACH(d, &bdrv_drivers, list) {
487 if (d->bdrv_probe_device) {
488 score = d->bdrv_probe_device(filename);
489 if (score > score_max) {
490 score_max = score;
491 drv = d;
492 }
493 }
494 }
495
496 return drv;
497}
498
98289620 499BlockDriver *bdrv_find_protocol(const char *filename,
b65a5e12
HR
500 bool allow_protocol_prefix,
501 Error **errp)
83f64091
FB
502{
503 BlockDriver *drv1;
504 char protocol[128];
1cec71e3 505 int len;
83f64091 506 const char *p;
19cb3738 507
66f82cee
KW
508 /* TODO Drivers without bdrv_file_open must be specified explicitly */
509
39508e7a
CH
510 /*
511 * XXX(hch): we really should not let host device detection
512 * override an explicit protocol specification, but moving this
513 * later breaks access to device names with colons in them.
514 * Thanks to the brain-dead persistent naming schemes on udev-
515 * based Linux systems those actually are quite common.
516 */
517 drv1 = find_hdev_driver(filename);
518 if (drv1) {
519 return drv1;
520 }
521
98289620 522 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
ef810437 523 return &bdrv_file;
84a12e66 524 }
98289620 525
9e0b22f4
SH
526 p = strchr(filename, ':');
527 assert(p != NULL);
1cec71e3
AL
528 len = p - filename;
529 if (len > sizeof(protocol) - 1)
530 len = sizeof(protocol) - 1;
531 memcpy(protocol, filename, len);
532 protocol[len] = '\0';
8a22f02a 533 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 534 if (drv1->protocol_name &&
8a22f02a 535 !strcmp(drv1->protocol_name, protocol)) {
83f64091 536 return drv1;
8a22f02a 537 }
83f64091 538 }
b65a5e12
HR
539
540 error_setg(errp, "Unknown protocol '%s'", protocol);
83f64091
FB
541 return NULL;
542}
543
c6684249
MA
544/*
545 * Guess image format by probing its contents.
546 * This is not a good idea when your image is raw (CVE-2008-2004), but
547 * we do it anyway for backward compatibility.
548 *
549 * @buf contains the image's first @buf_size bytes.
7cddd372
KW
550 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
551 * but can be smaller if the image file is smaller)
c6684249
MA
552 * @filename is its filename.
553 *
554 * For all block drivers, call the bdrv_probe() method to get its
555 * probing score.
556 * Return the first block driver with the highest probing score.
557 */
38f3ef57
KW
558BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
559 const char *filename)
c6684249
MA
560{
561 int score_max = 0, score;
562 BlockDriver *drv = NULL, *d;
563
564 QLIST_FOREACH(d, &bdrv_drivers, list) {
565 if (d->bdrv_probe) {
566 score = d->bdrv_probe(buf, buf_size, filename);
567 if (score > score_max) {
568 score_max = score;
569 drv = d;
570 }
571 }
572 }
573
574 return drv;
575}
576
f500a6d3 577static int find_image_format(BlockDriverState *bs, const char *filename,
34b5d2c6 578 BlockDriver **pdrv, Error **errp)
f3a5d3f8 579{
c6684249 580 BlockDriver *drv;
7cddd372 581 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
f500a6d3 582 int ret = 0;
f8ea0b00 583
08a00559 584 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
8e895599 585 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
ef810437 586 *pdrv = &bdrv_raw;
c98ac35d 587 return ret;
1a396859 588 }
f8ea0b00 589
83f64091 590 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
83f64091 591 if (ret < 0) {
34b5d2c6
HR
592 error_setg_errno(errp, -ret, "Could not read image for determining its "
593 "format");
c98ac35d
SW
594 *pdrv = NULL;
595 return ret;
83f64091
FB
596 }
597
c6684249 598 drv = bdrv_probe_all(buf, ret, filename);
c98ac35d 599 if (!drv) {
34b5d2c6
HR
600 error_setg(errp, "Could not determine image format: No compatible "
601 "driver found");
c98ac35d
SW
602 ret = -ENOENT;
603 }
604 *pdrv = drv;
605 return ret;
ea2384d3
FB
606}
607
51762288
SH
608/**
609 * Set the current 'total_sectors' value
65a9bb25 610 * Return 0 on success, -errno on error.
51762288
SH
611 */
612static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
613{
614 BlockDriver *drv = bs->drv;
615
396759ad
NB
616 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
617 if (bs->sg)
618 return 0;
619
51762288
SH
620 /* query actual device if possible, otherwise just trust the hint */
621 if (drv->bdrv_getlength) {
622 int64_t length = drv->bdrv_getlength(bs);
623 if (length < 0) {
624 return length;
625 }
7e382003 626 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
51762288
SH
627 }
628
629 bs->total_sectors = hint;
630 return 0;
631}
632
9e8f1835
PB
633/**
634 * Set open flags for a given discard mode
635 *
636 * Return 0 on success, -1 if the discard mode was invalid.
637 */
638int bdrv_parse_discard_flags(const char *mode, int *flags)
639{
640 *flags &= ~BDRV_O_UNMAP;
641
642 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
643 /* do nothing */
644 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
645 *flags |= BDRV_O_UNMAP;
646 } else {
647 return -1;
648 }
649
650 return 0;
651}
652
c3993cdc
SH
653/**
654 * Set open flags for a given cache mode
655 *
656 * Return 0 on success, -1 if the cache mode was invalid.
657 */
658int bdrv_parse_cache_flags(const char *mode, int *flags)
659{
660 *flags &= ~BDRV_O_CACHE_MASK;
661
662 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
663 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
664 } else if (!strcmp(mode, "directsync")) {
665 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
666 } else if (!strcmp(mode, "writeback")) {
667 *flags |= BDRV_O_CACHE_WB;
668 } else if (!strcmp(mode, "unsafe")) {
669 *flags |= BDRV_O_CACHE_WB;
670 *flags |= BDRV_O_NO_FLUSH;
671 } else if (!strcmp(mode, "writethrough")) {
672 /* this is the default */
673 } else {
674 return -1;
675 }
676
677 return 0;
678}
679
b1e6fc08
KW
680/*
681 * Returns the flags that a temporary snapshot should get, based on the
682 * originally requested flags (the originally requested image will have flags
683 * like a backing file)
684 */
685static int bdrv_temp_snapshot_flags(int flags)
686{
687 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
688}
689
0b50cc88 690/*
f3930ed0
KW
691 * Returns the flags that bs->file should get if a protocol driver is expected,
692 * based on the given flags for the parent BDS
0b50cc88
KW
693 */
694static int bdrv_inherited_flags(int flags)
695{
696 /* Enable protocol handling, disable format probing for bs->file */
697 flags |= BDRV_O_PROTOCOL;
698
699 /* Our block drivers take care to send flushes and respect unmap policy,
700 * so we can enable both unconditionally on lower layers. */
701 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
702
0b50cc88 703 /* Clear flags that only apply to the top layer */
5669b44d 704 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
0b50cc88
KW
705
706 return flags;
707}
708
f3930ed0
KW
709const BdrvChildRole child_file = {
710 .inherit_flags = bdrv_inherited_flags,
711};
712
713/*
714 * Returns the flags that bs->file should get if the use of formats (and not
715 * only protocols) is permitted for it, based on the given flags for the parent
716 * BDS
717 */
718static int bdrv_inherited_fmt_flags(int parent_flags)
719{
720 int flags = child_file.inherit_flags(parent_flags);
721 return flags & ~BDRV_O_PROTOCOL;
722}
723
724const BdrvChildRole child_format = {
725 .inherit_flags = bdrv_inherited_fmt_flags,
726};
727
317fc44e
KW
728/*
729 * Returns the flags that bs->backing_hd should get, based on the given flags
730 * for the parent BDS
731 */
732static int bdrv_backing_flags(int flags)
733{
734 /* backing files always opened read-only */
735 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
736
737 /* snapshot=on is handled on the top layer */
8bfea15d 738 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
317fc44e
KW
739
740 return flags;
741}
742
f3930ed0
KW
743static const BdrvChildRole child_backing = {
744 .inherit_flags = bdrv_backing_flags,
745};
746
7b272452
KW
747static int bdrv_open_flags(BlockDriverState *bs, int flags)
748{
749 int open_flags = flags | BDRV_O_CACHE_WB;
750
751 /*
752 * Clear flags that are internal to the block layer before opening the
753 * image.
754 */
20cca275 755 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
7b272452
KW
756
757 /*
758 * Snapshots should be writable.
759 */
8bfea15d 760 if (flags & BDRV_O_TEMPORARY) {
7b272452
KW
761 open_flags |= BDRV_O_RDWR;
762 }
763
764 return open_flags;
765}
766
636ea370
KW
767static void bdrv_assign_node_name(BlockDriverState *bs,
768 const char *node_name,
769 Error **errp)
6913c0c2
BC
770{
771 if (!node_name) {
636ea370 772 return;
6913c0c2
BC
773 }
774
9aebf3b8 775 /* Check for empty string or invalid characters */
f5bebbbb 776 if (!id_wellformed(node_name)) {
9aebf3b8 777 error_setg(errp, "Invalid node name");
636ea370 778 return;
6913c0c2
BC
779 }
780
0c5e94ee 781 /* takes care of avoiding namespaces collisions */
7f06d47e 782 if (blk_by_name(node_name)) {
0c5e94ee
BC
783 error_setg(errp, "node-name=%s is conflicting with a device id",
784 node_name);
636ea370 785 return;
0c5e94ee
BC
786 }
787
6913c0c2
BC
788 /* takes care of avoiding duplicates node names */
789 if (bdrv_find_node(node_name)) {
790 error_setg(errp, "Duplicate node name");
636ea370 791 return;
6913c0c2
BC
792 }
793
794 /* copy node name into the bs and insert it into the graph list */
795 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
796 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
6913c0c2
BC
797}
798
18edf289
KW
799static QemuOptsList bdrv_runtime_opts = {
800 .name = "bdrv_common",
801 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
802 .desc = {
803 {
804 .name = "node-name",
805 .type = QEMU_OPT_STRING,
806 .help = "Node name of the block device node",
807 },
808 { /* end of list */ }
809 },
810};
811
57915332
KW
812/*
813 * Common part for opening disk images and files
b6ad491a
KW
814 *
815 * Removes all processed options from *options.
57915332 816 */
f500a6d3 817static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
34b5d2c6 818 QDict *options, int flags, BlockDriver *drv, Error **errp)
57915332
KW
819{
820 int ret, open_flags;
035fccdf 821 const char *filename;
6913c0c2 822 const char *node_name = NULL;
18edf289 823 QemuOpts *opts;
34b5d2c6 824 Error *local_err = NULL;
57915332
KW
825
826 assert(drv != NULL);
6405875c 827 assert(bs->file == NULL);
707ff828 828 assert(options != NULL && bs->options != options);
57915332 829
45673671
KW
830 if (file != NULL) {
831 filename = file->filename;
832 } else {
833 filename = qdict_get_try_str(options, "filename");
834 }
835
765003db
KW
836 if (drv->bdrv_needs_filename && !filename) {
837 error_setg(errp, "The '%s' block driver requires a file name",
838 drv->format_name);
839 return -EINVAL;
840 }
841
45673671 842 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
28dcee10 843
18edf289
KW
844 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
845 qemu_opts_absorb_qdict(opts, options, &local_err);
846 if (local_err) {
847 error_propagate(errp, local_err);
848 ret = -EINVAL;
849 goto fail_opts;
850 }
851
852 node_name = qemu_opt_get(opts, "node-name");
636ea370 853 bdrv_assign_node_name(bs, node_name, &local_err);
0fb6395c 854 if (local_err) {
636ea370 855 error_propagate(errp, local_err);
18edf289
KW
856 ret = -EINVAL;
857 goto fail_opts;
6913c0c2 858 }
6913c0c2 859
1b7fd729 860 bs->guest_block_size = 512;
c25f53b0 861 bs->request_alignment = 512;
0d51b4de 862 bs->zero_beyond_eof = true;
b64ec4e4
FZ
863 open_flags = bdrv_open_flags(bs, flags);
864 bs->read_only = !(open_flags & BDRV_O_RDWR);
865
866 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
8f94a6e4
KW
867 error_setg(errp,
868 !bs->read_only && bdrv_is_whitelisted(drv, true)
869 ? "Driver '%s' can only be used for read-only devices"
870 : "Driver '%s' is not whitelisted",
871 drv->format_name);
18edf289
KW
872 ret = -ENOTSUP;
873 goto fail_opts;
b64ec4e4 874 }
57915332 875
53fec9d3 876 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
0ebd24e0
KW
877 if (flags & BDRV_O_COPY_ON_READ) {
878 if (!bs->read_only) {
879 bdrv_enable_copy_on_read(bs);
880 } else {
881 error_setg(errp, "Can't use copy-on-read on read-only device");
18edf289
KW
882 ret = -EINVAL;
883 goto fail_opts;
0ebd24e0 884 }
53fec9d3
SH
885 }
886
c2ad1b0c
KW
887 if (filename != NULL) {
888 pstrcpy(bs->filename, sizeof(bs->filename), filename);
889 } else {
890 bs->filename[0] = '\0';
891 }
91af7014 892 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
57915332 893
57915332 894 bs->drv = drv;
7267c094 895 bs->opaque = g_malloc0(drv->instance_size);
57915332 896
03f541bd 897 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
e7c63796 898
66f82cee
KW
899 /* Open the image, either directly or using a protocol */
900 if (drv->bdrv_file_open) {
5d186eb0 901 assert(file == NULL);
030be321 902 assert(!drv->bdrv_needs_filename || filename != NULL);
34b5d2c6 903 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
f500a6d3 904 } else {
2af5ef70 905 if (file == NULL) {
34b5d2c6
HR
906 error_setg(errp, "Can't use '%s' as a block driver for the "
907 "protocol level", drv->format_name);
2af5ef70
KW
908 ret = -EINVAL;
909 goto free_and_fail;
910 }
f500a6d3 911 bs->file = file;
34b5d2c6 912 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
66f82cee
KW
913 }
914
57915332 915 if (ret < 0) {
84d18f06 916 if (local_err) {
34b5d2c6 917 error_propagate(errp, local_err);
2fa9aa59
DH
918 } else if (bs->filename[0]) {
919 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
34b5d2c6
HR
920 } else {
921 error_setg_errno(errp, -ret, "Could not open image");
922 }
57915332
KW
923 goto free_and_fail;
924 }
925
a1f688f4
MA
926 if (bs->encrypted) {
927 error_report("Encrypted images are deprecated");
928 error_printf("Support for them will be removed in a future release.\n"
929 "You can use 'qemu-img convert' to convert your image"
930 " to an unencrypted one.\n");
931 }
932
51762288
SH
933 ret = refresh_total_sectors(bs, bs->total_sectors);
934 if (ret < 0) {
34b5d2c6 935 error_setg_errno(errp, -ret, "Could not refresh total sector count");
51762288 936 goto free_and_fail;
57915332 937 }
51762288 938
3baca891
KW
939 bdrv_refresh_limits(bs, &local_err);
940 if (local_err) {
941 error_propagate(errp, local_err);
942 ret = -EINVAL;
943 goto free_and_fail;
944 }
945
c25f53b0 946 assert(bdrv_opt_mem_align(bs) != 0);
4196d2f0 947 assert(bdrv_min_mem_align(bs) != 0);
47ea2de2 948 assert((bs->request_alignment != 0) || bs->sg);
18edf289
KW
949
950 qemu_opts_del(opts);
57915332
KW
951 return 0;
952
953free_and_fail:
f500a6d3 954 bs->file = NULL;
7267c094 955 g_free(bs->opaque);
57915332
KW
956 bs->opaque = NULL;
957 bs->drv = NULL;
18edf289
KW
958fail_opts:
959 qemu_opts_del(opts);
57915332
KW
960 return ret;
961}
962
5e5c4f63
KW
963static QDict *parse_json_filename(const char *filename, Error **errp)
964{
965 QObject *options_obj;
966 QDict *options;
967 int ret;
968
969 ret = strstart(filename, "json:", &filename);
970 assert(ret);
971
972 options_obj = qobject_from_json(filename);
973 if (!options_obj) {
974 error_setg(errp, "Could not parse the JSON options");
975 return NULL;
976 }
977
978 if (qobject_type(options_obj) != QTYPE_QDICT) {
979 qobject_decref(options_obj);
980 error_setg(errp, "Invalid JSON object given");
981 return NULL;
982 }
983
984 options = qobject_to_qdict(options_obj);
985 qdict_flatten(options);
986
987 return options;
988}
989
b6ce07aa 990/*
f54120ff
KW
991 * Fills in default options for opening images and converts the legacy
992 * filename/flags pair to option QDict entries.
53a29513
HR
993 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
994 * block driver has been specified explicitly.
b6ce07aa 995 */
53a29513
HR
996static int bdrv_fill_options(QDict **options, const char **pfilename,
997 int *flags, BlockDriver *drv, Error **errp)
ea2384d3 998{
5e5c4f63 999 const char *filename = *pfilename;
c2ad1b0c 1000 const char *drvname;
53a29513 1001 bool protocol = *flags & BDRV_O_PROTOCOL;
e3fa4bfa 1002 bool parse_filename = false;
53a29513 1003 BlockDriver *tmp_drv;
34b5d2c6 1004 Error *local_err = NULL;
83f64091 1005
5e5c4f63
KW
1006 /* Parse json: pseudo-protocol */
1007 if (filename && g_str_has_prefix(filename, "json:")) {
1008 QDict *json_options = parse_json_filename(filename, &local_err);
1009 if (local_err) {
1010 error_propagate(errp, local_err);
1011 return -EINVAL;
1012 }
1013
1014 /* Options given in the filename have lower priority than options
1015 * specified directly */
1016 qdict_join(*options, json_options, false);
1017 QDECREF(json_options);
1018 *pfilename = filename = NULL;
1019 }
1020
53a29513
HR
1021 drvname = qdict_get_try_str(*options, "driver");
1022
1023 /* If the user has explicitly specified the driver, this choice should
1024 * override the BDRV_O_PROTOCOL flag */
1025 tmp_drv = drv;
1026 if (!tmp_drv && drvname) {
1027 tmp_drv = bdrv_find_format(drvname);
1028 }
1029 if (tmp_drv) {
1030 protocol = tmp_drv->bdrv_file_open;
1031 }
1032
1033 if (protocol) {
1034 *flags |= BDRV_O_PROTOCOL;
1035 } else {
1036 *flags &= ~BDRV_O_PROTOCOL;
1037 }
1038
035fccdf 1039 /* Fetch the file name from the options QDict if necessary */
17b005f1 1040 if (protocol && filename) {
f54120ff
KW
1041 if (!qdict_haskey(*options, "filename")) {
1042 qdict_put(*options, "filename", qstring_from_str(filename));
1043 parse_filename = true;
1044 } else {
1045 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1046 "the same time");
1047 return -EINVAL;
1048 }
035fccdf
KW
1049 }
1050
c2ad1b0c 1051 /* Find the right block driver */
f54120ff 1052 filename = qdict_get_try_str(*options, "filename");
f54120ff 1053
17b005f1
KW
1054 if (drv) {
1055 if (drvname) {
1056 error_setg(errp, "Driver specified twice");
1057 return -EINVAL;
1058 }
1059 drvname = drv->format_name;
1060 qdict_put(*options, "driver", qstring_from_str(drvname));
1061 } else {
1062 if (!drvname && protocol) {
1063 if (filename) {
b65a5e12 1064 drv = bdrv_find_protocol(filename, parse_filename, errp);
17b005f1 1065 if (!drv) {
17b005f1
KW
1066 return -EINVAL;
1067 }
1068
1069 drvname = drv->format_name;
1070 qdict_put(*options, "driver", qstring_from_str(drvname));
1071 } else {
1072 error_setg(errp, "Must specify either driver or file");
f54120ff
KW
1073 return -EINVAL;
1074 }
17b005f1
KW
1075 } else if (drvname) {
1076 drv = bdrv_find_format(drvname);
1077 if (!drv) {
1078 error_setg(errp, "Unknown driver '%s'", drvname);
1079 return -ENOENT;
1080 }
98289620 1081 }
c2ad1b0c
KW
1082 }
1083
17b005f1 1084 assert(drv || !protocol);
c2ad1b0c 1085
f54120ff 1086 /* Driver-specific filename parsing */
17b005f1 1087 if (drv && drv->bdrv_parse_filename && parse_filename) {
5acd9d81 1088 drv->bdrv_parse_filename(filename, *options, &local_err);
84d18f06 1089 if (local_err) {
34b5d2c6 1090 error_propagate(errp, local_err);
f54120ff 1091 return -EINVAL;
6963a30d 1092 }
cd5d031e
HR
1093
1094 if (!drv->bdrv_needs_filename) {
1095 qdict_del(*options, "filename");
cd5d031e 1096 }
6963a30d
KW
1097 }
1098
f54120ff
KW
1099 return 0;
1100}
1101
8d24cce1
FZ
1102void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1103{
1104
826b6ca0
FZ
1105 if (bs->backing_hd) {
1106 assert(bs->backing_blocker);
1107 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1108 } else if (backing_hd) {
1109 error_setg(&bs->backing_blocker,
81e5f78a
AG
1110 "node is used as backing hd of '%s'",
1111 bdrv_get_device_or_node_name(bs));
826b6ca0
FZ
1112 }
1113
8d24cce1
FZ
1114 bs->backing_hd = backing_hd;
1115 if (!backing_hd) {
826b6ca0
FZ
1116 error_free(bs->backing_blocker);
1117 bs->backing_blocker = NULL;
8d24cce1
FZ
1118 goto out;
1119 }
1120 bs->open_flags &= ~BDRV_O_NO_BACKING;
1121 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1122 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1123 backing_hd->drv ? backing_hd->drv->format_name : "");
826b6ca0
FZ
1124
1125 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1126 /* Otherwise we won't be able to commit due to check in bdrv_commit */
bb00021d 1127 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
826b6ca0 1128 bs->backing_blocker);
8d24cce1 1129out:
3baca891 1130 bdrv_refresh_limits(bs, NULL);
8d24cce1
FZ
1131}
1132
31ca6d07
KW
1133/*
1134 * Opens the backing file for a BlockDriverState if not yet open
1135 *
1136 * options is a QDict of options to pass to the block drivers, or NULL for an
1137 * empty set of options. The reference to the QDict is transferred to this
1138 * function (even on failure), so if the caller intends to reuse the dictionary,
1139 * it needs to use QINCREF() before calling bdrv_file_open.
1140 */
34b5d2c6 1141int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
9156df12 1142{
1ba4b6a5 1143 char *backing_filename = g_malloc0(PATH_MAX);
317fc44e 1144 int ret = 0;
8d24cce1 1145 BlockDriverState *backing_hd;
34b5d2c6 1146 Error *local_err = NULL;
9156df12
PB
1147
1148 if (bs->backing_hd != NULL) {
31ca6d07 1149 QDECREF(options);
1ba4b6a5 1150 goto free_exit;
9156df12
PB
1151 }
1152
31ca6d07
KW
1153 /* NULL means an empty set of options */
1154 if (options == NULL) {
1155 options = qdict_new();
1156 }
1157
9156df12 1158 bs->open_flags &= ~BDRV_O_NO_BACKING;
1cb6f506
KW
1159 if (qdict_haskey(options, "file.filename")) {
1160 backing_filename[0] = '\0';
1161 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
31ca6d07 1162 QDECREF(options);
1ba4b6a5 1163 goto free_exit;
dbecebdd 1164 } else {
9f07429e
HR
1165 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1166 &local_err);
1167 if (local_err) {
1168 ret = -EINVAL;
1169 error_propagate(errp, local_err);
1170 QDECREF(options);
1171 goto free_exit;
1172 }
9156df12
PB
1173 }
1174
8ee79e70
KW
1175 if (!bs->drv || !bs->drv->supports_backing) {
1176 ret = -EINVAL;
1177 error_setg(errp, "Driver doesn't support backing files");
1178 QDECREF(options);
1179 goto free_exit;
1180 }
1181
e4e9986b 1182 backing_hd = bdrv_new();
8d24cce1 1183
c5f6e493
KW
1184 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1185 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
9156df12
PB
1186 }
1187
f67503e5 1188 assert(bs->backing_hd == NULL);
f3930ed0
KW
1189 ret = bdrv_open_inherit(&backing_hd,
1190 *backing_filename ? backing_filename : NULL,
1191 NULL, options, 0, bs, &child_backing,
1192 NULL, &local_err);
9156df12 1193 if (ret < 0) {
8d24cce1
FZ
1194 bdrv_unref(backing_hd);
1195 backing_hd = NULL;
9156df12 1196 bs->open_flags |= BDRV_O_NO_BACKING;
b04b6b6e
FZ
1197 error_setg(errp, "Could not open backing file: %s",
1198 error_get_pretty(local_err));
1199 error_free(local_err);
1ba4b6a5 1200 goto free_exit;
9156df12 1201 }
8d24cce1 1202 bdrv_set_backing_hd(bs, backing_hd);
d80ac658 1203
1ba4b6a5
BC
1204free_exit:
1205 g_free(backing_filename);
1206 return ret;
9156df12
PB
1207}
1208
da557aac
HR
1209/*
1210 * Opens a disk image whose options are given as BlockdevRef in another block
1211 * device's options.
1212 *
da557aac
HR
1213 * If allow_none is true, no image will be opened if filename is false and no
1214 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1215 *
1216 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1217 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1218 * itself, all options starting with "${bdref_key}." are considered part of the
1219 * BlockdevRef.
1220 *
1221 * The BlockdevRef will be removed from the options QDict.
f67503e5
HR
1222 *
1223 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
da557aac
HR
1224 */
1225int bdrv_open_image(BlockDriverState **pbs, const char *filename,
f3930ed0
KW
1226 QDict *options, const char *bdref_key,
1227 BlockDriverState* parent, const BdrvChildRole *child_role,
f7d9fd8c 1228 bool allow_none, Error **errp)
da557aac
HR
1229{
1230 QDict *image_options;
1231 int ret;
1232 char *bdref_key_dot;
1233 const char *reference;
1234
f67503e5
HR
1235 assert(pbs);
1236 assert(*pbs == NULL);
1237
da557aac
HR
1238 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1239 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1240 g_free(bdref_key_dot);
1241
1242 reference = qdict_get_try_str(options, bdref_key);
1243 if (!filename && !reference && !qdict_size(image_options)) {
1244 if (allow_none) {
1245 ret = 0;
1246 } else {
1247 error_setg(errp, "A block device must be specified for \"%s\"",
1248 bdref_key);
1249 ret = -EINVAL;
1250 }
b20e61e0 1251 QDECREF(image_options);
da557aac
HR
1252 goto done;
1253 }
1254
f3930ed0
KW
1255 ret = bdrv_open_inherit(pbs, filename, reference, image_options, 0,
1256 parent, child_role, NULL, errp);
da557aac
HR
1257
1258done:
1259 qdict_del(options, bdref_key);
1260 return ret;
1261}
1262
6b8aeca5 1263int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
b998875d
KW
1264{
1265 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1ba4b6a5 1266 char *tmp_filename = g_malloc0(PATH_MAX + 1);
b998875d 1267 int64_t total_size;
83d0521a 1268 QemuOpts *opts = NULL;
b998875d
KW
1269 QDict *snapshot_options;
1270 BlockDriverState *bs_snapshot;
1271 Error *local_err;
1272 int ret;
1273
1274 /* if snapshot, we create a temporary backing file and open it
1275 instead of opening 'filename' directly */
1276
1277 /* Get the required size from the image */
f187743a
KW
1278 total_size = bdrv_getlength(bs);
1279 if (total_size < 0) {
6b8aeca5 1280 ret = total_size;
f187743a 1281 error_setg_errno(errp, -total_size, "Could not get image size");
1ba4b6a5 1282 goto out;
f187743a 1283 }
b998875d
KW
1284
1285 /* Create the temporary image */
1ba4b6a5 1286 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
b998875d
KW
1287 if (ret < 0) {
1288 error_setg_errno(errp, -ret, "Could not get temporary filename");
1ba4b6a5 1289 goto out;
b998875d
KW
1290 }
1291
ef810437 1292 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
c282e1fd 1293 &error_abort);
39101f25 1294 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
ef810437 1295 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
83d0521a 1296 qemu_opts_del(opts);
b998875d
KW
1297 if (ret < 0) {
1298 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1299 "'%s': %s", tmp_filename,
1300 error_get_pretty(local_err));
1301 error_free(local_err);
1ba4b6a5 1302 goto out;
b998875d
KW
1303 }
1304
1305 /* Prepare a new options QDict for the temporary file */
1306 snapshot_options = qdict_new();
1307 qdict_put(snapshot_options, "file.driver",
1308 qstring_from_str("file"));
1309 qdict_put(snapshot_options, "file.filename",
1310 qstring_from_str(tmp_filename));
1311
e4e9986b 1312 bs_snapshot = bdrv_new();
b998875d
KW
1313
1314 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
ef810437 1315 flags, &bdrv_qcow2, &local_err);
b998875d
KW
1316 if (ret < 0) {
1317 error_propagate(errp, local_err);
1ba4b6a5 1318 goto out;
b998875d
KW
1319 }
1320
1321 bdrv_append(bs_snapshot, bs);
1ba4b6a5
BC
1322
1323out:
1324 g_free(tmp_filename);
6b8aeca5 1325 return ret;
b998875d
KW
1326}
1327
6e93e7c4
KW
1328static void bdrv_attach_child(BlockDriverState *parent_bs,
1329 BlockDriverState *child_bs,
1330 const BdrvChildRole *child_role)
1331{
1332 BdrvChild *child = g_new(BdrvChild, 1);
1333 *child = (BdrvChild) {
1334 .bs = child_bs,
1335 .role = child_role,
1336 };
1337
1338 QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1339}
1340
b6ce07aa
KW
1341/*
1342 * Opens a disk image (raw, qcow2, vmdk, ...)
de9c0cec
KW
1343 *
1344 * options is a QDict of options to pass to the block drivers, or NULL for an
1345 * empty set of options. The reference to the QDict belongs to the block layer
1346 * after the call (even on failure), so if the caller intends to reuse the
1347 * dictionary, it needs to use QINCREF() before calling bdrv_open.
f67503e5
HR
1348 *
1349 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1350 * If it is not NULL, the referenced BDS will be reused.
ddf5636d
HR
1351 *
1352 * The reference parameter may be used to specify an existing block device which
1353 * should be opened. If specified, neither options nor a filename may be given,
1354 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
b6ce07aa 1355 */
f3930ed0
KW
1356static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1357 const char *reference, QDict *options, int flags,
1358 BlockDriverState *parent,
1359 const BdrvChildRole *child_role,
1360 BlockDriver *drv, Error **errp)
ea2384d3 1361{
b6ce07aa 1362 int ret;
f67503e5 1363 BlockDriverState *file = NULL, *bs;
74fe54f2 1364 const char *drvname;
34b5d2c6 1365 Error *local_err = NULL;
b1e6fc08 1366 int snapshot_flags = 0;
712e7874 1367
f67503e5 1368 assert(pbs);
f3930ed0
KW
1369 assert(!child_role || !flags);
1370 assert(!child_role == !parent);
f67503e5 1371
ddf5636d
HR
1372 if (reference) {
1373 bool options_non_empty = options ? qdict_size(options) : false;
1374 QDECREF(options);
1375
1376 if (*pbs) {
1377 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1378 "another block device");
1379 return -EINVAL;
1380 }
1381
1382 if (filename || options_non_empty) {
1383 error_setg(errp, "Cannot reference an existing block device with "
1384 "additional options or a new filename");
1385 return -EINVAL;
1386 }
1387
1388 bs = bdrv_lookup_bs(reference, reference, errp);
1389 if (!bs) {
1390 return -ENODEV;
1391 }
1392 bdrv_ref(bs);
6e93e7c4
KW
1393 if (child_role) {
1394 bdrv_attach_child(parent, bs, child_role);
1395 }
ddf5636d
HR
1396 *pbs = bs;
1397 return 0;
1398 }
1399
f67503e5
HR
1400 if (*pbs) {
1401 bs = *pbs;
1402 } else {
e4e9986b 1403 bs = bdrv_new();
f67503e5
HR
1404 }
1405
de9c0cec
KW
1406 /* NULL means an empty set of options */
1407 if (options == NULL) {
1408 options = qdict_new();
1409 }
1410
f3930ed0
KW
1411 if (child_role) {
1412 flags = child_role->inherit_flags(parent->open_flags);
1413 }
1414
53a29513 1415 ret = bdrv_fill_options(&options, &filename, &flags, drv, &local_err);
462f5bcf
KW
1416 if (local_err) {
1417 goto fail;
1418 }
1419
76c591b0
KW
1420 /* Find the right image format driver */
1421 drv = NULL;
1422 drvname = qdict_get_try_str(options, "driver");
1423 if (drvname) {
1424 drv = bdrv_find_format(drvname);
1425 qdict_del(options, "driver");
1426 if (!drv) {
1427 error_setg(errp, "Unknown driver: '%s'", drvname);
1428 ret = -EINVAL;
1429 goto fail;
1430 }
1431 }
1432
1433 assert(drvname || !(flags & BDRV_O_PROTOCOL));
76c591b0 1434
f3930ed0 1435 bs->open_flags = flags;
de9c0cec 1436 bs->options = options;
b6ad491a 1437 options = qdict_clone_shallow(options);
de9c0cec 1438
f500a6d3 1439 /* Open image file without format layer */
f4788adc
KW
1440 if ((flags & BDRV_O_PROTOCOL) == 0) {
1441 if (flags & BDRV_O_RDWR) {
1442 flags |= BDRV_O_ALLOW_RDWR;
1443 }
1444 if (flags & BDRV_O_SNAPSHOT) {
1445 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1446 flags = bdrv_backing_flags(flags);
1447 }
f500a6d3 1448
f4788adc 1449 assert(file == NULL);
f3930ed0 1450 bs->open_flags = flags;
f4788adc 1451 ret = bdrv_open_image(&file, filename, options, "file",
f3930ed0 1452 bs, &child_file, true, &local_err);
f4788adc
KW
1453 if (ret < 0) {
1454 goto fail;
1455 }
f500a6d3
KW
1456 }
1457
76c591b0 1458 /* Image format probing */
38f3ef57 1459 bs->probed = !drv;
76c591b0 1460 if (!drv && file) {
17b005f1
KW
1461 ret = find_image_format(file, filename, &drv, &local_err);
1462 if (ret < 0) {
8bfea15d 1463 goto fail;
2a05cbe4 1464 }
76c591b0 1465 } else if (!drv) {
17b005f1
KW
1466 error_setg(errp, "Must specify either driver or file");
1467 ret = -EINVAL;
8bfea15d 1468 goto fail;
ea2384d3 1469 }
b6ce07aa 1470
53a29513
HR
1471 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1472 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1473 /* file must be NULL if a protocol BDS is about to be created
1474 * (the inverse results in an error message from bdrv_open_common()) */
1475 assert(!(flags & BDRV_O_PROTOCOL) || !file);
1476
b6ce07aa 1477 /* Open the image */
34b5d2c6 1478 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
b6ce07aa 1479 if (ret < 0) {
8bfea15d 1480 goto fail;
6987307c
CH
1481 }
1482
2a05cbe4 1483 if (file && (bs->file != file)) {
4f6fd349 1484 bdrv_unref(file);
f500a6d3
KW
1485 file = NULL;
1486 }
1487
b6ce07aa 1488 /* If there is a backing file, use it */
9156df12 1489 if ((flags & BDRV_O_NO_BACKING) == 0) {
31ca6d07
KW
1490 QDict *backing_options;
1491
5726d872 1492 qdict_extract_subqdict(options, &backing_options, "backing.");
34b5d2c6 1493 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
b6ce07aa 1494 if (ret < 0) {
b6ad491a 1495 goto close_and_fail;
b6ce07aa 1496 }
b6ce07aa
KW
1497 }
1498
91af7014
HR
1499 bdrv_refresh_filename(bs);
1500
b998875d
KW
1501 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1502 * temporary snapshot afterwards. */
b1e6fc08 1503 if (snapshot_flags) {
6b8aeca5 1504 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
b998875d 1505 if (local_err) {
b998875d
KW
1506 goto close_and_fail;
1507 }
1508 }
1509
b6ad491a 1510 /* Check if any unknown options were used */
5acd9d81 1511 if (options && (qdict_size(options) != 0)) {
b6ad491a 1512 const QDictEntry *entry = qdict_first(options);
5acd9d81
HR
1513 if (flags & BDRV_O_PROTOCOL) {
1514 error_setg(errp, "Block protocol '%s' doesn't support the option "
1515 "'%s'", drv->format_name, entry->key);
1516 } else {
1517 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1518 "support the option '%s'", drv->format_name,
bfb197e0 1519 bdrv_get_device_name(bs), entry->key);
5acd9d81 1520 }
b6ad491a
KW
1521
1522 ret = -EINVAL;
1523 goto close_and_fail;
1524 }
b6ad491a 1525
b6ce07aa 1526 if (!bdrv_key_required(bs)) {
a7f53e26
MA
1527 if (bs->blk) {
1528 blk_dev_change_media_cb(bs->blk, true);
1529 }
c3adb58f
MA
1530 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1531 && !runstate_check(RUN_STATE_INMIGRATE)
1532 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1533 error_setg(errp,
1534 "Guest must be stopped for opening of encrypted image");
1535 ret = -EBUSY;
1536 goto close_and_fail;
b6ce07aa
KW
1537 }
1538
6e93e7c4
KW
1539 if (child_role) {
1540 bdrv_attach_child(parent, bs, child_role);
1541 }
1542
c3adb58f 1543 QDECREF(options);
f67503e5 1544 *pbs = bs;
b6ce07aa
KW
1545 return 0;
1546
8bfea15d 1547fail:
f500a6d3 1548 if (file != NULL) {
4f6fd349 1549 bdrv_unref(file);
f500a6d3 1550 }
de9c0cec 1551 QDECREF(bs->options);
b6ad491a 1552 QDECREF(options);
de9c0cec 1553 bs->options = NULL;
f67503e5
HR
1554 if (!*pbs) {
1555 /* If *pbs is NULL, a new BDS has been created in this function and
1556 needs to be freed now. Otherwise, it does not need to be closed,
1557 since it has not really been opened yet. */
1558 bdrv_unref(bs);
1559 }
84d18f06 1560 if (local_err) {
34b5d2c6
HR
1561 error_propagate(errp, local_err);
1562 }
b6ad491a 1563 return ret;
de9c0cec 1564
b6ad491a 1565close_and_fail:
f67503e5
HR
1566 /* See fail path, but now the BDS has to be always closed */
1567 if (*pbs) {
1568 bdrv_close(bs);
1569 } else {
1570 bdrv_unref(bs);
1571 }
b6ad491a 1572 QDECREF(options);
84d18f06 1573 if (local_err) {
34b5d2c6
HR
1574 error_propagate(errp, local_err);
1575 }
b6ce07aa
KW
1576 return ret;
1577}
1578
f3930ed0
KW
1579int bdrv_open(BlockDriverState **pbs, const char *filename,
1580 const char *reference, QDict *options, int flags,
1581 BlockDriver *drv, Error **errp)
1582{
1583 return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1584 NULL, drv, errp);
1585}
1586
e971aa12
JC
1587typedef struct BlockReopenQueueEntry {
1588 bool prepared;
1589 BDRVReopenState state;
1590 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1591} BlockReopenQueueEntry;
1592
1593/*
1594 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1595 * reopen of multiple devices.
1596 *
1597 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1598 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1599 * be created and initialized. This newly created BlockReopenQueue should be
1600 * passed back in for subsequent calls that are intended to be of the same
1601 * atomic 'set'.
1602 *
1603 * bs is the BlockDriverState to add to the reopen queue.
1604 *
1605 * flags contains the open flags for the associated bs
1606 *
1607 * returns a pointer to bs_queue, which is either the newly allocated
1608 * bs_queue, or the existing bs_queue being used.
1609 *
1610 */
1611BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1612 BlockDriverState *bs, int flags)
1613{
1614 assert(bs != NULL);
1615
1616 BlockReopenQueueEntry *bs_entry;
1617 if (bs_queue == NULL) {
1618 bs_queue = g_new0(BlockReopenQueue, 1);
1619 QSIMPLEQ_INIT(bs_queue);
1620 }
1621
f1f25a2e
KW
1622 /* bdrv_open() masks this flag out */
1623 flags &= ~BDRV_O_PROTOCOL;
1624
e971aa12 1625 if (bs->file) {
f1f25a2e 1626 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
e971aa12
JC
1627 }
1628
1629 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1630 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1631
1632 bs_entry->state.bs = bs;
1633 bs_entry->state.flags = flags;
1634
1635 return bs_queue;
1636}
1637
1638/*
1639 * Reopen multiple BlockDriverStates atomically & transactionally.
1640 *
1641 * The queue passed in (bs_queue) must have been built up previous
1642 * via bdrv_reopen_queue().
1643 *
1644 * Reopens all BDS specified in the queue, with the appropriate
1645 * flags. All devices are prepared for reopen, and failure of any
1646 * device will cause all device changes to be abandonded, and intermediate
1647 * data cleaned up.
1648 *
1649 * If all devices prepare successfully, then the changes are committed
1650 * to all devices.
1651 *
1652 */
1653int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1654{
1655 int ret = -1;
1656 BlockReopenQueueEntry *bs_entry, *next;
1657 Error *local_err = NULL;
1658
1659 assert(bs_queue != NULL);
1660
1661 bdrv_drain_all();
1662
1663 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1664 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1665 error_propagate(errp, local_err);
1666 goto cleanup;
1667 }
1668 bs_entry->prepared = true;
1669 }
1670
1671 /* If we reach this point, we have success and just need to apply the
1672 * changes
1673 */
1674 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1675 bdrv_reopen_commit(&bs_entry->state);
1676 }
1677
1678 ret = 0;
1679
1680cleanup:
1681 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1682 if (ret && bs_entry->prepared) {
1683 bdrv_reopen_abort(&bs_entry->state);
1684 }
1685 g_free(bs_entry);
1686 }
1687 g_free(bs_queue);
1688 return ret;
1689}
1690
1691
1692/* Reopen a single BlockDriverState with the specified flags. */
1693int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1694{
1695 int ret = -1;
1696 Error *local_err = NULL;
1697 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1698
1699 ret = bdrv_reopen_multiple(queue, &local_err);
1700 if (local_err != NULL) {
1701 error_propagate(errp, local_err);
1702 }
1703 return ret;
1704}
1705
1706
1707/*
1708 * Prepares a BlockDriverState for reopen. All changes are staged in the
1709 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1710 * the block driver layer .bdrv_reopen_prepare()
1711 *
1712 * bs is the BlockDriverState to reopen
1713 * flags are the new open flags
1714 * queue is the reopen queue
1715 *
1716 * Returns 0 on success, non-zero on error. On error errp will be set
1717 * as well.
1718 *
1719 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1720 * It is the responsibility of the caller to then call the abort() or
1721 * commit() for any other BDS that have been left in a prepare() state
1722 *
1723 */
1724int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1725 Error **errp)
1726{
1727 int ret = -1;
1728 Error *local_err = NULL;
1729 BlockDriver *drv;
1730
1731 assert(reopen_state != NULL);
1732 assert(reopen_state->bs->drv != NULL);
1733 drv = reopen_state->bs->drv;
1734
1735 /* if we are to stay read-only, do not allow permission change
1736 * to r/w */
1737 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1738 reopen_state->flags & BDRV_O_RDWR) {
81e5f78a
AG
1739 error_setg(errp, "Node '%s' is read only",
1740 bdrv_get_device_or_node_name(reopen_state->bs));
e971aa12
JC
1741 goto error;
1742 }
1743
1744
1745 ret = bdrv_flush(reopen_state->bs);
1746 if (ret) {
1747 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1748 strerror(-ret));
1749 goto error;
1750 }
1751
1752 if (drv->bdrv_reopen_prepare) {
1753 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1754 if (ret) {
1755 if (local_err != NULL) {
1756 error_propagate(errp, local_err);
1757 } else {
d8b6895f
LC
1758 error_setg(errp, "failed while preparing to reopen image '%s'",
1759 reopen_state->bs->filename);
e971aa12
JC
1760 }
1761 goto error;
1762 }
1763 } else {
1764 /* It is currently mandatory to have a bdrv_reopen_prepare()
1765 * handler for each supported drv. */
81e5f78a
AG
1766 error_setg(errp, "Block format '%s' used by node '%s' "
1767 "does not support reopening files", drv->format_name,
1768 bdrv_get_device_or_node_name(reopen_state->bs));
e971aa12
JC
1769 ret = -1;
1770 goto error;
1771 }
1772
1773 ret = 0;
1774
1775error:
1776 return ret;
1777}
1778
1779/*
1780 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1781 * makes them final by swapping the staging BlockDriverState contents into
1782 * the active BlockDriverState contents.
1783 */
1784void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1785{
1786 BlockDriver *drv;
1787
1788 assert(reopen_state != NULL);
1789 drv = reopen_state->bs->drv;
1790 assert(drv != NULL);
1791
1792 /* If there are any driver level actions to take */
1793 if (drv->bdrv_reopen_commit) {
1794 drv->bdrv_reopen_commit(reopen_state);
1795 }
1796
1797 /* set BDS specific flags now */
1798 reopen_state->bs->open_flags = reopen_state->flags;
1799 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1800 BDRV_O_CACHE_WB);
1801 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
355ef4ac 1802
3baca891 1803 bdrv_refresh_limits(reopen_state->bs, NULL);
e971aa12
JC
1804}
1805
1806/*
1807 * Abort the reopen, and delete and free the staged changes in
1808 * reopen_state
1809 */
1810void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1811{
1812 BlockDriver *drv;
1813
1814 assert(reopen_state != NULL);
1815 drv = reopen_state->bs->drv;
1816 assert(drv != NULL);
1817
1818 if (drv->bdrv_reopen_abort) {
1819 drv->bdrv_reopen_abort(reopen_state);
1820 }
1821}
1822
1823
fc01f7e7
FB
1824void bdrv_close(BlockDriverState *bs)
1825{
33384421
HR
1826 BdrvAioNotifier *ban, *ban_next;
1827
3cbc002c
PB
1828 if (bs->job) {
1829 block_job_cancel_sync(bs->job);
1830 }
58fda173
SH
1831 bdrv_drain_all(); /* complete I/O */
1832 bdrv_flush(bs);
1833 bdrv_drain_all(); /* in case flush left pending I/O */
d7d512f6 1834 notifier_list_notify(&bs->close_notifiers, bs);
7094f12f 1835
3cbc002c 1836 if (bs->drv) {
6e93e7c4
KW
1837 BdrvChild *child, *next;
1838
1839 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
1840 QLIST_REMOVE(child, next);
1841 g_free(child);
1842 }
1843
557df6ac 1844 if (bs->backing_hd) {
826b6ca0
FZ
1845 BlockDriverState *backing_hd = bs->backing_hd;
1846 bdrv_set_backing_hd(bs, NULL);
1847 bdrv_unref(backing_hd);
557df6ac 1848 }
ea2384d3 1849 bs->drv->bdrv_close(bs);
7267c094 1850 g_free(bs->opaque);
ea2384d3
FB
1851 bs->opaque = NULL;
1852 bs->drv = NULL;
53fec9d3 1853 bs->copy_on_read = 0;
a275fa42
PB
1854 bs->backing_file[0] = '\0';
1855 bs->backing_format[0] = '\0';
6405875c
PB
1856 bs->total_sectors = 0;
1857 bs->encrypted = 0;
1858 bs->valid_key = 0;
1859 bs->sg = 0;
0d51b4de 1860 bs->zero_beyond_eof = false;
de9c0cec
KW
1861 QDECREF(bs->options);
1862 bs->options = NULL;
91af7014
HR
1863 QDECREF(bs->full_open_options);
1864 bs->full_open_options = NULL;
b338082b 1865
66f82cee 1866 if (bs->file != NULL) {
4f6fd349 1867 bdrv_unref(bs->file);
0ac9377d 1868 bs->file = NULL;
66f82cee 1869 }
b338082b 1870 }
98f90dba 1871
a7f53e26
MA
1872 if (bs->blk) {
1873 blk_dev_change_media_cb(bs->blk, false);
1874 }
9ca11154 1875
98f90dba
ZYW
1876 /*throttling disk I/O limits*/
1877 if (bs->io_limits_enabled) {
1878 bdrv_io_limits_disable(bs);
1879 }
33384421
HR
1880
1881 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1882 g_free(ban);
1883 }
1884 QLIST_INIT(&bs->aio_notifiers);
b338082b
FB
1885}
1886
2bc93fed
MK
1887void bdrv_close_all(void)
1888{
1889 BlockDriverState *bs;
1890
dc364f4c 1891 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
ed78cda3
SH
1892 AioContext *aio_context = bdrv_get_aio_context(bs);
1893
1894 aio_context_acquire(aio_context);
2bc93fed 1895 bdrv_close(bs);
ed78cda3 1896 aio_context_release(aio_context);
2bc93fed
MK
1897 }
1898}
1899
dc364f4c
BC
1900/* make a BlockDriverState anonymous by removing from bdrv_state and
1901 * graph_bdrv_state list.
d22b2f41
RH
1902 Also, NULL terminate the device_name to prevent double remove */
1903void bdrv_make_anon(BlockDriverState *bs)
1904{
bfb197e0
MA
1905 /*
1906 * Take care to remove bs from bdrv_states only when it's actually
1907 * in it. Note that bs->device_list.tqe_prev is initially null,
1908 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
1909 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1910 * resetting it to null on remove.
1911 */
1912 if (bs->device_list.tqe_prev) {
dc364f4c 1913 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
bfb197e0 1914 bs->device_list.tqe_prev = NULL;
d22b2f41 1915 }
dc364f4c
BC
1916 if (bs->node_name[0] != '\0') {
1917 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1918 }
1919 bs->node_name[0] = '\0';
d22b2f41
RH
1920}
1921
e023b2e2
PB
1922static void bdrv_rebind(BlockDriverState *bs)
1923{
1924 if (bs->drv && bs->drv->bdrv_rebind) {
1925 bs->drv->bdrv_rebind(bs);
1926 }
1927}
1928
4ddc07ca
PB
1929static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1930 BlockDriverState *bs_src)
8802d1fd 1931{
4ddc07ca 1932 /* move some fields that need to stay attached to the device */
8802d1fd
JC
1933
1934 /* dev info */
1b7fd729 1935 bs_dest->guest_block_size = bs_src->guest_block_size;
4ddc07ca 1936 bs_dest->copy_on_read = bs_src->copy_on_read;
8802d1fd 1937
4ddc07ca 1938 bs_dest->enable_write_cache = bs_src->enable_write_cache;
c4a248a1 1939
cc0681c4
BC
1940 /* i/o throttled req */
1941 memcpy(&bs_dest->throttle_state,
1942 &bs_src->throttle_state,
1943 sizeof(ThrottleState));
1944 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1945 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
4ddc07ca 1946 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
8802d1fd 1947
8802d1fd 1948 /* r/w error */
4ddc07ca
PB
1949 bs_dest->on_read_error = bs_src->on_read_error;
1950 bs_dest->on_write_error = bs_src->on_write_error;
8802d1fd
JC
1951
1952 /* i/o status */
4ddc07ca
PB
1953 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
1954 bs_dest->iostatus = bs_src->iostatus;
8802d1fd 1955
a9fc4408 1956 /* dirty bitmap */
e4654d2d 1957 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
a9fc4408 1958
9fcb0251
FZ
1959 /* reference count */
1960 bs_dest->refcnt = bs_src->refcnt;
1961
a9fc4408 1962 /* job */
4ddc07ca 1963 bs_dest->job = bs_src->job;
a9fc4408 1964
8802d1fd 1965 /* keep the same entry in bdrv_states */
dc364f4c 1966 bs_dest->device_list = bs_src->device_list;
7e7d56d9
MA
1967 bs_dest->blk = bs_src->blk;
1968
fbe40ff7
FZ
1969 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
1970 sizeof(bs_dest->op_blockers));
4ddc07ca 1971}
8802d1fd 1972
4ddc07ca
PB
1973/*
1974 * Swap bs contents for two image chains while they are live,
1975 * while keeping required fields on the BlockDriverState that is
1976 * actually attached to a device.
1977 *
1978 * This will modify the BlockDriverState fields, and swap contents
1979 * between bs_new and bs_old. Both bs_new and bs_old are modified.
1980 *
bfb197e0 1981 * bs_new must not be attached to a BlockBackend.
4ddc07ca
PB
1982 *
1983 * This function does not create any image files.
1984 */
1985void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1986{
1987 BlockDriverState tmp;
f6801b83 1988
6ee4ce1e
KW
1989 bdrv_drain(bs_new);
1990 bdrv_drain(bs_old);
1991
90ce8a06
BC
1992 /* The code needs to swap the node_name but simply swapping node_list won't
1993 * work so first remove the nodes from the graph list, do the swap then
1994 * insert them back if needed.
1995 */
1996 if (bs_new->node_name[0] != '\0') {
1997 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
1998 }
1999 if (bs_old->node_name[0] != '\0') {
2000 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2001 }
2002
bfb197e0 2003 /* bs_new must be unattached and shouldn't have anything fancy enabled */
7e7d56d9 2004 assert(!bs_new->blk);
e4654d2d 2005 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
4ddc07ca 2006 assert(bs_new->job == NULL);
4ddc07ca 2007 assert(bs_new->io_limits_enabled == false);
cc0681c4 2008 assert(!throttle_have_timer(&bs_new->throttle_state));
8802d1fd 2009
4ddc07ca
PB
2010 tmp = *bs_new;
2011 *bs_new = *bs_old;
2012 *bs_old = tmp;
a9fc4408 2013
4ddc07ca
PB
2014 /* there are some fields that should not be swapped, move them back */
2015 bdrv_move_feature_fields(&tmp, bs_old);
2016 bdrv_move_feature_fields(bs_old, bs_new);
2017 bdrv_move_feature_fields(bs_new, &tmp);
8802d1fd 2018
bfb197e0 2019 /* bs_new must remain unattached */
7e7d56d9 2020 assert(!bs_new->blk);
4ddc07ca
PB
2021
2022 /* Check a few fields that should remain attached to the device */
4ddc07ca 2023 assert(bs_new->job == NULL);
4ddc07ca 2024 assert(bs_new->io_limits_enabled == false);
cc0681c4 2025 assert(!throttle_have_timer(&bs_new->throttle_state));
e023b2e2 2026
90ce8a06
BC
2027 /* insert the nodes back into the graph node list if needed */
2028 if (bs_new->node_name[0] != '\0') {
2029 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2030 }
2031 if (bs_old->node_name[0] != '\0') {
2032 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2033 }
2034
6e93e7c4
KW
2035 /*
2036 * Update lh_first.le_prev for non-empty lists.
2037 *
2038 * The head of the op blocker list doesn't change because it is moved back
2039 * in bdrv_move_feature_fields().
2040 */
6ee4ce1e
KW
2041 assert(QLIST_EMPTY(&bs_old->tracked_requests));
2042 assert(QLIST_EMPTY(&bs_new->tracked_requests));
2043
6e93e7c4
KW
2044 QLIST_FIX_HEAD_PTR(&bs_new->children, next);
2045 QLIST_FIX_HEAD_PTR(&bs_old->children, next);
2046
e023b2e2 2047 bdrv_rebind(bs_new);
4ddc07ca
PB
2048 bdrv_rebind(bs_old);
2049}
2050
2051/*
2052 * Add new bs contents at the top of an image chain while the chain is
2053 * live, while keeping required fields on the top layer.
2054 *
2055 * This will modify the BlockDriverState fields, and swap contents
2056 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2057 *
bfb197e0 2058 * bs_new must not be attached to a BlockBackend.
4ddc07ca
PB
2059 *
2060 * This function does not create any image files.
2061 */
2062void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2063{
2064 bdrv_swap(bs_new, bs_top);
2065
2066 /* The contents of 'tmp' will become bs_top, as we are
2067 * swapping bs_new and bs_top contents. */
8d24cce1 2068 bdrv_set_backing_hd(bs_top, bs_new);
6e93e7c4 2069 bdrv_attach_child(bs_top, bs_new, &child_backing);
8802d1fd
JC
2070}
2071
4f6fd349 2072static void bdrv_delete(BlockDriverState *bs)
b338082b 2073{
3e914655 2074 assert(!bs->job);
3718d8ab 2075 assert(bdrv_op_blocker_is_empty(bs));
4f6fd349 2076 assert(!bs->refcnt);
e4654d2d 2077 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
18846dee 2078
e1b5c52e
SH
2079 bdrv_close(bs);
2080
1b7bdbc1 2081 /* remove from list, if necessary */
d22b2f41 2082 bdrv_make_anon(bs);
34c6f050 2083
7267c094 2084 g_free(bs);
fc01f7e7
FB
2085}
2086
e97fc193
AL
2087/*
2088 * Run consistency checks on an image
2089 *
e076f338 2090 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 2091 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 2092 * check are stored in res.
e97fc193 2093 */
4534ff54 2094int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
e97fc193 2095{
908bcd54
HR
2096 if (bs->drv == NULL) {
2097 return -ENOMEDIUM;
2098 }
e97fc193
AL
2099 if (bs->drv->bdrv_check == NULL) {
2100 return -ENOTSUP;
2101 }
2102
e076f338 2103 memset(res, 0, sizeof(*res));
4534ff54 2104 return bs->drv->bdrv_check(bs, res, fix);
e97fc193
AL
2105}
2106
8a426614
KW
2107#define COMMIT_BUF_SECTORS 2048
2108
33e3963e
FB
2109/* commit COW file into the raw image */
2110int bdrv_commit(BlockDriverState *bs)
2111{
19cb3738 2112 BlockDriver *drv = bs->drv;
72706ea4 2113 int64_t sector, total_sectors, length, backing_length;
8a426614 2114 int n, ro, open_flags;
0bce597d 2115 int ret = 0;
72706ea4 2116 uint8_t *buf = NULL;
33e3963e 2117
19cb3738
FB
2118 if (!drv)
2119 return -ENOMEDIUM;
6bb45158 2120
4dca4b63
NS
2121 if (!bs->backing_hd) {
2122 return -ENOTSUP;
33e3963e
FB
2123 }
2124
bb00021d
FZ
2125 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2126 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2d3735d3
SH
2127 return -EBUSY;
2128 }
2129
4dca4b63 2130 ro = bs->backing_hd->read_only;
4dca4b63
NS
2131 open_flags = bs->backing_hd->open_flags;
2132
2133 if (ro) {
0bce597d
JC
2134 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2135 return -EACCES;
4dca4b63 2136 }
ea2384d3 2137 }
33e3963e 2138
72706ea4
JC
2139 length = bdrv_getlength(bs);
2140 if (length < 0) {
2141 ret = length;
2142 goto ro_cleanup;
2143 }
2144
2145 backing_length = bdrv_getlength(bs->backing_hd);
2146 if (backing_length < 0) {
2147 ret = backing_length;
2148 goto ro_cleanup;
2149 }
2150
2151 /* If our top snapshot is larger than the backing file image,
2152 * grow the backing file image if possible. If not possible,
2153 * we must return an error */
2154 if (length > backing_length) {
2155 ret = bdrv_truncate(bs->backing_hd, length);
2156 if (ret < 0) {
2157 goto ro_cleanup;
2158 }
2159 }
2160
2161 total_sectors = length >> BDRV_SECTOR_BITS;
857d4f46
KW
2162
2163 /* qemu_try_blockalign() for bs will choose an alignment that works for
2164 * bs->backing_hd as well, so no need to compare the alignment manually. */
2165 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2166 if (buf == NULL) {
2167 ret = -ENOMEM;
2168 goto ro_cleanup;
2169 }
8a426614
KW
2170
2171 for (sector = 0; sector < total_sectors; sector += n) {
d663640c
PB
2172 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2173 if (ret < 0) {
2174 goto ro_cleanup;
2175 }
2176 if (ret) {
dabfa6cc
KW
2177 ret = bdrv_read(bs, sector, buf, n);
2178 if (ret < 0) {
8a426614
KW
2179 goto ro_cleanup;
2180 }
2181
dabfa6cc
KW
2182 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2183 if (ret < 0) {
8a426614
KW
2184 goto ro_cleanup;
2185 }
ea2384d3 2186 }
33e3963e 2187 }
95389c86 2188
1d44952f
CH
2189 if (drv->bdrv_make_empty) {
2190 ret = drv->bdrv_make_empty(bs);
dabfa6cc
KW
2191 if (ret < 0) {
2192 goto ro_cleanup;
2193 }
1d44952f
CH
2194 bdrv_flush(bs);
2195 }
95389c86 2196
3f5075ae
CH
2197 /*
2198 * Make sure all data we wrote to the backing device is actually
2199 * stable on disk.
2200 */
dabfa6cc 2201 if (bs->backing_hd) {
3f5075ae 2202 bdrv_flush(bs->backing_hd);
dabfa6cc 2203 }
4dca4b63 2204
dabfa6cc 2205 ret = 0;
4dca4b63 2206ro_cleanup:
857d4f46 2207 qemu_vfree(buf);
4dca4b63
NS
2208
2209 if (ro) {
0bce597d
JC
2210 /* ignoring error return here */
2211 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
4dca4b63
NS
2212 }
2213
1d44952f 2214 return ret;
33e3963e
FB
2215}
2216
e8877497 2217int bdrv_commit_all(void)
6ab4b5ab
MA
2218{
2219 BlockDriverState *bs;
2220
dc364f4c 2221 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
ed78cda3
SH
2222 AioContext *aio_context = bdrv_get_aio_context(bs);
2223
2224 aio_context_acquire(aio_context);
272d2d8e
JC
2225 if (bs->drv && bs->backing_hd) {
2226 int ret = bdrv_commit(bs);
2227 if (ret < 0) {
ed78cda3 2228 aio_context_release(aio_context);
272d2d8e
JC
2229 return ret;
2230 }
e8877497 2231 }
ed78cda3 2232 aio_context_release(aio_context);
6ab4b5ab 2233 }
e8877497 2234 return 0;
6ab4b5ab
MA
2235}
2236
756e6736
KW
2237/*
2238 * Return values:
2239 * 0 - success
2240 * -EINVAL - backing format specified, but no file
2241 * -ENOSPC - can't update the backing file because no space is left in the
2242 * image file header
2243 * -ENOTSUP - format driver doesn't support changing the backing file
2244 */
2245int bdrv_change_backing_file(BlockDriverState *bs,
2246 const char *backing_file, const char *backing_fmt)
2247{
2248 BlockDriver *drv = bs->drv;
469ef350 2249 int ret;
756e6736 2250
5f377794
PB
2251 /* Backing file format doesn't make sense without a backing file */
2252 if (backing_fmt && !backing_file) {
2253 return -EINVAL;
2254 }
2255
756e6736 2256 if (drv->bdrv_change_backing_file != NULL) {
469ef350 2257 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
756e6736 2258 } else {
469ef350 2259 ret = -ENOTSUP;
756e6736 2260 }
469ef350
PB
2261
2262 if (ret == 0) {
2263 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2264 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2265 }
2266 return ret;
756e6736
KW
2267}
2268
6ebdcee2
JC
2269/*
2270 * Finds the image layer in the chain that has 'bs' as its backing file.
2271 *
2272 * active is the current topmost image.
2273 *
2274 * Returns NULL if bs is not found in active's image chain,
2275 * or if active == bs.
4caf0fcd
JC
2276 *
2277 * Returns the bottommost base image if bs == NULL.
6ebdcee2
JC
2278 */
2279BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2280 BlockDriverState *bs)
2281{
4caf0fcd
JC
2282 while (active && bs != active->backing_hd) {
2283 active = active->backing_hd;
6ebdcee2
JC
2284 }
2285
4caf0fcd
JC
2286 return active;
2287}
6ebdcee2 2288
4caf0fcd
JC
2289/* Given a BDS, searches for the base layer. */
2290BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2291{
2292 return bdrv_find_overlay(bs, NULL);
6ebdcee2
JC
2293}
2294
2295typedef struct BlkIntermediateStates {
2296 BlockDriverState *bs;
2297 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2298} BlkIntermediateStates;
2299
2300
2301/*
2302 * Drops images above 'base' up to and including 'top', and sets the image
2303 * above 'top' to have base as its backing file.
2304 *
2305 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2306 * information in 'bs' can be properly updated.
2307 *
2308 * E.g., this will convert the following chain:
2309 * bottom <- base <- intermediate <- top <- active
2310 *
2311 * to
2312 *
2313 * bottom <- base <- active
2314 *
2315 * It is allowed for bottom==base, in which case it converts:
2316 *
2317 * base <- intermediate <- top <- active
2318 *
2319 * to
2320 *
2321 * base <- active
2322 *
54e26900
JC
2323 * If backing_file_str is non-NULL, it will be used when modifying top's
2324 * overlay image metadata.
2325 *
6ebdcee2
JC
2326 * Error conditions:
2327 * if active == top, that is considered an error
2328 *
2329 */
2330int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
54e26900 2331 BlockDriverState *base, const char *backing_file_str)
6ebdcee2
JC
2332{
2333 BlockDriverState *intermediate;
2334 BlockDriverState *base_bs = NULL;
2335 BlockDriverState *new_top_bs = NULL;
2336 BlkIntermediateStates *intermediate_state, *next;
2337 int ret = -EIO;
2338
2339 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2340 QSIMPLEQ_INIT(&states_to_delete);
2341
2342 if (!top->drv || !base->drv) {
2343 goto exit;
2344 }
2345
2346 new_top_bs = bdrv_find_overlay(active, top);
2347
2348 if (new_top_bs == NULL) {
2349 /* we could not find the image above 'top', this is an error */
2350 goto exit;
2351 }
2352
2353 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2354 * to do, no intermediate images */
2355 if (new_top_bs->backing_hd == base) {
2356 ret = 0;
2357 goto exit;
2358 }
2359
2360 intermediate = top;
2361
2362 /* now we will go down through the list, and add each BDS we find
2363 * into our deletion queue, until we hit the 'base'
2364 */
2365 while (intermediate) {
5839e53b 2366 intermediate_state = g_new0(BlkIntermediateStates, 1);
6ebdcee2
JC
2367 intermediate_state->bs = intermediate;
2368 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2369
2370 if (intermediate->backing_hd == base) {
2371 base_bs = intermediate->backing_hd;
2372 break;
2373 }
2374 intermediate = intermediate->backing_hd;
2375 }
2376 if (base_bs == NULL) {
2377 /* something went wrong, we did not end at the base. safely
2378 * unravel everything, and exit with error */
2379 goto exit;
2380 }
2381
2382 /* success - we can delete the intermediate states, and link top->base */
54e26900
JC
2383 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2384 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
6ebdcee2
JC
2385 base_bs->drv ? base_bs->drv->format_name : "");
2386 if (ret) {
2387 goto exit;
2388 }
920beae1 2389 bdrv_set_backing_hd(new_top_bs, base_bs);
6ebdcee2
JC
2390
2391 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2392 /* so that bdrv_close() does not recursively close the chain */
920beae1 2393 bdrv_set_backing_hd(intermediate_state->bs, NULL);
4f6fd349 2394 bdrv_unref(intermediate_state->bs);
6ebdcee2
JC
2395 }
2396 ret = 0;
2397
2398exit:
2399 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2400 g_free(intermediate_state);
2401 }
2402 return ret;
2403}
2404
61007b31
SH
2405/**
2406 * Truncate file to 'offset' bytes (needed only for file protocols)
2407 */
2408int bdrv_truncate(BlockDriverState *bs, int64_t offset)
71d0770c 2409{
61007b31
SH
2410 BlockDriver *drv = bs->drv;
2411 int ret;
2412 if (!drv)
71d0770c 2413 return -ENOMEDIUM;
61007b31
SH
2414 if (!drv->bdrv_truncate)
2415 return -ENOTSUP;
2416 if (bs->read_only)
2417 return -EACCES;
71d0770c 2418
61007b31
SH
2419 ret = drv->bdrv_truncate(bs, offset);
2420 if (ret == 0) {
2421 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2422 bdrv_dirty_bitmap_truncate(bs);
2423 if (bs->blk) {
2424 blk_dev_resize_cb(bs->blk);
2425 }
c0191e76 2426 }
61007b31 2427 return ret;
71d0770c
AL
2428}
2429
61007b31
SH
2430/**
2431 * Length of a allocated file in bytes. Sparse files are counted by actual
2432 * allocated space. Return < 0 if error or unknown.
2433 */
2434int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
71d0770c 2435{
61007b31
SH
2436 BlockDriver *drv = bs->drv;
2437 if (!drv) {
2438 return -ENOMEDIUM;
8f4754ed 2439 }
61007b31
SH
2440 if (drv->bdrv_get_allocated_file_size) {
2441 return drv->bdrv_get_allocated_file_size(bs);
2442 }
2443 if (bs->file) {
2444 return bdrv_get_allocated_file_size(bs->file);
1c9805a3 2445 }
61007b31 2446 return -ENOTSUP;
1c9805a3 2447}
e7a8a783 2448
61007b31
SH
2449/**
2450 * Return number of sectors on success, -errno on error.
1c9805a3 2451 */
61007b31 2452int64_t bdrv_nb_sectors(BlockDriverState *bs)
1c9805a3 2453{
61007b31 2454 BlockDriver *drv = bs->drv;
498e386c 2455
61007b31
SH
2456 if (!drv)
2457 return -ENOMEDIUM;
2572b37a 2458
61007b31
SH
2459 if (drv->has_variable_length) {
2460 int ret = refresh_total_sectors(bs, bs->total_sectors);
2461 if (ret < 0) {
2462 return ret;
1c9805a3
SH
2463 }
2464 }
61007b31 2465 return bs->total_sectors;
1c9805a3 2466}
b338082b 2467
61007b31
SH
2468/**
2469 * Return length in bytes on success, -errno on error.
2470 * The length is always a multiple of BDRV_SECTOR_SIZE.
8d3b1a2d 2471 */
61007b31 2472int64_t bdrv_getlength(BlockDriverState *bs)
8d3b1a2d 2473{
61007b31 2474 int64_t ret = bdrv_nb_sectors(bs);
8d3b1a2d 2475
4a9c9ea0 2476 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
61007b31 2477 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
fc01f7e7
FB
2478}
2479
61007b31
SH
2480/* return 0 as number of sectors if no device present or error */
2481void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
07d27a44 2482{
61007b31 2483 int64_t nb_sectors = bdrv_nb_sectors(bs);
07d27a44 2484
61007b31 2485 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
07d27a44
MA
2486}
2487
61007b31
SH
2488void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2489 BlockdevOnError on_write_error)
fc01f7e7 2490{
61007b31
SH
2491 bs->on_read_error = on_read_error;
2492 bs->on_write_error = on_write_error;
83f64091
FB
2493}
2494
61007b31 2495BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
4105eaaa 2496{
61007b31 2497 return is_read ? bs->on_read_error : bs->on_write_error;
8d3b1a2d
KW
2498}
2499
61007b31 2500BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
d75cbb5e 2501{
61007b31 2502 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
9ce10c0b 2503
61007b31
SH
2504 switch (on_err) {
2505 case BLOCKDEV_ON_ERROR_ENOSPC:
2506 return (error == ENOSPC) ?
2507 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
2508 case BLOCKDEV_ON_ERROR_STOP:
2509 return BLOCK_ERROR_ACTION_STOP;
2510 case BLOCKDEV_ON_ERROR_REPORT:
2511 return BLOCK_ERROR_ACTION_REPORT;
2512 case BLOCKDEV_ON_ERROR_IGNORE:
2513 return BLOCK_ERROR_ACTION_IGNORE;
2514 default:
2515 abort();
d75cbb5e
PL
2516 }
2517}
2518
61007b31
SH
2519static void send_qmp_error_event(BlockDriverState *bs,
2520 BlockErrorAction action,
2521 bool is_read, int error)
83f64091 2522{
61007b31 2523 IoOperationType optype;
a3ef6571 2524
61007b31
SH
2525 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
2526 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
2527 bdrv_iostatus_is_enabled(bs),
2528 error == ENOSPC, strerror(error),
2529 &error_abort);
83f64091
FB
2530}
2531
61007b31
SH
2532/* This is done by device models because, while the block layer knows
2533 * about the error, it does not know whether an operation comes from
2534 * the device or the block layer (from a job, for example).
2535 */
2536void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
2537 bool is_read, int error)
83f64091 2538{
61007b31 2539 assert(error >= 0);
83f64091 2540
61007b31
SH
2541 if (action == BLOCK_ERROR_ACTION_STOP) {
2542 /* First set the iostatus, so that "info block" returns an iostatus
2543 * that matches the events raised so far (an additional error iostatus
2544 * is fine, but not a lost one).
2545 */
2546 bdrv_iostatus_set_err(bs, error);
83f64091 2547
61007b31
SH
2548 /* Then raise the request to stop the VM and the event.
2549 * qemu_system_vmstop_request_prepare has two effects. First,
2550 * it ensures that the STOP event always comes after the
2551 * BLOCK_IO_ERROR event. Second, it ensures that even if management
2552 * can observe the STOP event and do a "cont" before the STOP
2553 * event is issued, the VM will not stop. In this case, vm_start()
2554 * also ensures that the STOP/RESUME pair of events is emitted.
2555 */
2556 qemu_system_vmstop_request_prepare();
2557 send_qmp_error_event(bs, action, is_read, error);
2558 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
2559 } else {
2560 send_qmp_error_event(bs, action, is_read, error);
2561 }
8d3b1a2d
KW
2562}
2563
61007b31 2564int bdrv_is_read_only(BlockDriverState *bs)
8d3b1a2d 2565{
61007b31 2566 return bs->read_only;
83f64091 2567}
83f64091 2568
61007b31 2569int bdrv_is_sg(BlockDriverState *bs)
f08145fe 2570{
61007b31 2571 return bs->sg;
f08145fe
KW
2572}
2573
61007b31 2574int bdrv_enable_write_cache(BlockDriverState *bs)
ab185921 2575{
61007b31 2576 return bs->enable_write_cache;
ab185921
SH
2577}
2578
61007b31 2579void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
da1fa91d 2580{
61007b31 2581 bs->enable_write_cache = wce;
ab185921 2582
61007b31
SH
2583 /* so a reopen() will preserve wce */
2584 if (wce) {
2585 bs->open_flags |= BDRV_O_CACHE_WB;
893a8f62 2586 } else {
61007b31 2587 bs->open_flags &= ~BDRV_O_CACHE_WB;
893a8f62 2588 }
da1fa91d
KW
2589}
2590
61007b31 2591int bdrv_is_encrypted(BlockDriverState *bs)
fc3959e4 2592{
61007b31
SH
2593 if (bs->backing_hd && bs->backing_hd->encrypted)
2594 return 1;
2595 return bs->encrypted;
fc3959e4
FZ
2596}
2597
61007b31 2598int bdrv_key_required(BlockDriverState *bs)
fc3959e4 2599{
61007b31
SH
2600 BlockDriverState *backing_hd = bs->backing_hd;
2601
2602 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2603 return 1;
2604 return (bs->encrypted && !bs->valid_key);
fc3959e4
FZ
2605}
2606
61007b31 2607int bdrv_set_key(BlockDriverState *bs, const char *key)
d0c7f642 2608{
d0c7f642 2609 int ret;
61007b31
SH
2610 if (bs->backing_hd && bs->backing_hd->encrypted) {
2611 ret = bdrv_set_key(bs->backing_hd, key);
2612 if (ret < 0)
2613 return ret;
2614 if (!bs->encrypted)
2615 return 0;
2616 }
2617 if (!bs->encrypted) {
2618 return -EINVAL;
2619 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
d0c7f642
KW
2620 return -ENOMEDIUM;
2621 }
61007b31 2622 ret = bs->drv->bdrv_set_key(bs, key);
b9c64947 2623 if (ret < 0) {
61007b31
SH
2624 bs->valid_key = 0;
2625 } else if (!bs->valid_key) {
2626 bs->valid_key = 1;
2627 if (bs->blk) {
2628 /* call the change callback now, we skipped it on open */
2629 blk_dev_change_media_cb(bs->blk, true);
2630 }
1b0288ae 2631 }
61007b31
SH
2632 return ret;
2633}
f08f2dda 2634
c5fbe571 2635/*
61007b31
SH
2636 * Provide an encryption key for @bs.
2637 * If @key is non-null:
2638 * If @bs is not encrypted, fail.
2639 * Else if the key is invalid, fail.
2640 * Else set @bs's key to @key, replacing the existing key, if any.
2641 * If @key is null:
2642 * If @bs is encrypted and still lacks a key, fail.
2643 * Else do nothing.
2644 * On failure, store an error object through @errp if non-null.
c5fbe571 2645 */
61007b31 2646void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
c5fbe571 2647{
61007b31
SH
2648 if (key) {
2649 if (!bdrv_is_encrypted(bs)) {
2650 error_setg(errp, "Node '%s' is not encrypted",
2651 bdrv_get_device_or_node_name(bs));
2652 } else if (bdrv_set_key(bs, key) < 0) {
2653 error_set(errp, QERR_INVALID_PASSWORD);
4d2855a3
MA
2654 }
2655 } else {
2656 if (bdrv_key_required(bs)) {
b1ca6391
MA
2657 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2658 "'%s' (%s) is encrypted",
81e5f78a 2659 bdrv_get_device_or_node_name(bs),
4d2855a3
MA
2660 bdrv_get_encrypted_filename(bs));
2661 }
2662 }
2663}
2664
61007b31 2665const char *bdrv_get_format_name(BlockDriverState *bs)
40b4f539 2666{
61007b31 2667 return bs->drv ? bs->drv->format_name : NULL;
40b4f539
KW
2668}
2669
61007b31 2670static int qsort_strcmp(const void *a, const void *b)
40b4f539 2671{
61007b31 2672 return strcmp(a, b);
40b4f539
KW
2673}
2674
61007b31
SH
2675void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2676 void *opaque)
40b4f539 2677{
61007b31
SH
2678 BlockDriver *drv;
2679 int count = 0;
2680 int i;
2681 const char **formats = NULL;
40b4f539 2682
61007b31
SH
2683 QLIST_FOREACH(drv, &bdrv_drivers, list) {
2684 if (drv->format_name) {
2685 bool found = false;
2686 int i = count;
2687 while (formats && i && !found) {
2688 found = !strcmp(formats[--i], drv->format_name);
2689 }
e2a305fb 2690
61007b31
SH
2691 if (!found) {
2692 formats = g_renew(const char *, formats, count + 1);
2693 formats[count++] = drv->format_name;
2694 }
6c5a42ac 2695 }
61007b31 2696 }
6c5a42ac 2697
61007b31 2698 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
40b4f539 2699
61007b31
SH
2700 for (i = 0; i < count; i++) {
2701 it(opaque, formats[i]);
2702 }
40b4f539 2703
61007b31
SH
2704 g_free(formats);
2705}
40b4f539 2706
61007b31
SH
2707/* This function is to find a node in the bs graph */
2708BlockDriverState *bdrv_find_node(const char *node_name)
2709{
2710 BlockDriverState *bs;
391827eb 2711
61007b31 2712 assert(node_name);
40b4f539 2713
61007b31
SH
2714 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2715 if (!strcmp(node_name, bs->node_name)) {
2716 return bs;
40b4f539
KW
2717 }
2718 }
61007b31 2719 return NULL;
40b4f539
KW
2720}
2721
61007b31
SH
2722/* Put this QMP function here so it can access the static graph_bdrv_states. */
2723BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
40b4f539 2724{
61007b31
SH
2725 BlockDeviceInfoList *list, *entry;
2726 BlockDriverState *bs;
40b4f539 2727
61007b31
SH
2728 list = NULL;
2729 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2730 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2731 if (!info) {
2732 qapi_free_BlockDeviceInfoList(list);
2733 return NULL;
301db7c2 2734 }
61007b31
SH
2735 entry = g_malloc0(sizeof(*entry));
2736 entry->value = info;
2737 entry->next = list;
2738 list = entry;
301db7c2
RH
2739 }
2740
61007b31
SH
2741 return list;
2742}
40b4f539 2743
61007b31
SH
2744BlockDriverState *bdrv_lookup_bs(const char *device,
2745 const char *node_name,
2746 Error **errp)
2747{
2748 BlockBackend *blk;
2749 BlockDriverState *bs;
40b4f539 2750
61007b31
SH
2751 if (device) {
2752 blk = blk_by_name(device);
40b4f539 2753
61007b31
SH
2754 if (blk) {
2755 return blk_bs(blk);
2756 }
2757 }
40b4f539 2758
61007b31
SH
2759 if (node_name) {
2760 bs = bdrv_find_node(node_name);
6d519a5f 2761
61007b31
SH
2762 if (bs) {
2763 return bs;
2764 }
40b4f539
KW
2765 }
2766
61007b31
SH
2767 error_setg(errp, "Cannot find device=%s nor node_name=%s",
2768 device ? device : "",
2769 node_name ? node_name : "");
2770 return NULL;
40b4f539
KW
2771}
2772
61007b31
SH
2773/* If 'base' is in the same chain as 'top', return true. Otherwise,
2774 * return false. If either argument is NULL, return false. */
2775bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
83f64091 2776{
61007b31
SH
2777 while (top && top != base) {
2778 top = top->backing_hd;
02c50efe 2779 }
61007b31
SH
2780
2781 return top != NULL;
02c50efe
FZ
2782}
2783
61007b31 2784BlockDriverState *bdrv_next_node(BlockDriverState *bs)
02c50efe 2785{
61007b31
SH
2786 if (!bs) {
2787 return QTAILQ_FIRST(&graph_bdrv_states);
02c50efe 2788 }
61007b31 2789 return QTAILQ_NEXT(bs, node_list);
83f64091
FB
2790}
2791
61007b31 2792BlockDriverState *bdrv_next(BlockDriverState *bs)
83f64091 2793{
61007b31
SH
2794 if (!bs) {
2795 return QTAILQ_FIRST(&bdrv_states);
857d4f46 2796 }
61007b31 2797 return QTAILQ_NEXT(bs, device_list);
83f64091 2798}
beac80cd 2799
61007b31 2800const char *bdrv_get_node_name(const BlockDriverState *bs)
83f64091 2801{
61007b31 2802 return bs->node_name;
beac80cd
FB
2803}
2804
61007b31
SH
2805/* TODO check what callers really want: bs->node_name or blk_name() */
2806const char *bdrv_get_device_name(const BlockDriverState *bs)
beac80cd 2807{
61007b31 2808 return bs->blk ? blk_name(bs->blk) : "";
f141eafe 2809}
83f64091 2810
61007b31
SH
2811/* This can be used to identify nodes that might not have a device
2812 * name associated. Since node and device names live in the same
2813 * namespace, the result is unambiguous. The exception is if both are
2814 * absent, then this returns an empty (non-null) string. */
2815const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
f141eafe 2816{
61007b31 2817 return bs->blk ? blk_name(bs->blk) : bs->node_name;
beac80cd 2818}
beac80cd 2819
61007b31 2820int bdrv_get_flags(BlockDriverState *bs)
0b5a2445 2821{
61007b31 2822 return bs->open_flags;
0b5a2445
PB
2823}
2824
61007b31 2825int bdrv_has_zero_init_1(BlockDriverState *bs)
68485420 2826{
61007b31 2827 return 1;
0b5a2445
PB
2828}
2829
61007b31 2830int bdrv_has_zero_init(BlockDriverState *bs)
0b5a2445 2831{
61007b31 2832 assert(bs->drv);
0b5a2445 2833
61007b31
SH
2834 /* If BS is a copy on write image, it is initialized to
2835 the contents of the base image, which may not be zeroes. */
2836 if (bs->backing_hd) {
2837 return 0;
2838 }
2839 if (bs->drv->bdrv_has_zero_init) {
2840 return bs->drv->bdrv_has_zero_init(bs);
0b5a2445 2841 }
61007b31
SH
2842
2843 /* safe default */
2844 return 0;
68485420
KW
2845}
2846
61007b31 2847bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
b2a61371 2848{
61007b31 2849 BlockDriverInfo bdi;
b2a61371 2850
61007b31
SH
2851 if (bs->backing_hd) {
2852 return false;
2853 }
2854
2855 if (bdrv_get_info(bs, &bdi) == 0) {
2856 return bdi.unallocated_blocks_are_zero;
b2a61371
SH
2857 }
2858
61007b31 2859 return false;
b2a61371
SH
2860}
2861
61007b31 2862bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
68485420 2863{
61007b31 2864 BlockDriverInfo bdi;
68485420 2865
61007b31
SH
2866 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
2867 return false;
2868 }
68485420 2869
61007b31
SH
2870 if (bdrv_get_info(bs, &bdi) == 0) {
2871 return bdi.can_write_zeroes_with_unmap;
2872 }
68485420 2873
61007b31 2874 return false;
68485420
KW
2875}
2876
61007b31 2877const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
b2e12bc6 2878{
61007b31
SH
2879 if (bs->backing_hd && bs->backing_hd->encrypted)
2880 return bs->backing_file;
2881 else if (bs->encrypted)
2882 return bs->filename;
2883 else
2884 return NULL;
b2e12bc6
CH
2885}
2886
61007b31
SH
2887void bdrv_get_backing_filename(BlockDriverState *bs,
2888 char *filename, int filename_size)
016f5cf6 2889{
61007b31
SH
2890 pstrcpy(filename, filename_size, bs->backing_file);
2891}
d318aea9 2892
61007b31
SH
2893int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2894{
2895 BlockDriver *drv = bs->drv;
2896 if (!drv)
2897 return -ENOMEDIUM;
2898 if (!drv->bdrv_get_info)
2899 return -ENOTSUP;
2900 memset(bdi, 0, sizeof(*bdi));
2901 return drv->bdrv_get_info(bs, bdi);
2902}
016f5cf6 2903
61007b31
SH
2904ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2905{
2906 BlockDriver *drv = bs->drv;
2907 if (drv && drv->bdrv_get_specific_info) {
2908 return drv->bdrv_get_specific_info(bs);
2909 }
2910 return NULL;
016f5cf6
AG
2911}
2912
61007b31 2913void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4265d620 2914{
61007b31
SH
2915 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
2916 return;
2917 }
4265d620 2918
61007b31 2919 bs->drv->bdrv_debug_event(bs, event);
4265d620
PB
2920}
2921
61007b31
SH
2922int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
2923 const char *tag)
4265d620 2924{
61007b31
SH
2925 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
2926 bs = bs->file;
2927 }
4265d620 2928
61007b31
SH
2929 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
2930 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
2931 }
4265d620 2932
61007b31 2933 return -ENOTSUP;
4265d620
PB
2934}
2935
61007b31 2936int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
ea2384d3 2937{
61007b31
SH
2938 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
2939 bs = bs->file;
2940 }
ce1a14dc 2941
61007b31
SH
2942 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
2943 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
2944 }
2945
2946 return -ENOTSUP;
eb852011
MA
2947}
2948
61007b31 2949int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
ce1a14dc 2950{
61007b31
SH
2951 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
2952 bs = bs->file;
2953 }
ce1a14dc 2954
61007b31
SH
2955 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
2956 return bs->drv->bdrv_debug_resume(bs, tag);
2957 }
ce1a14dc 2958
61007b31 2959 return -ENOTSUP;
f197fe2b
FZ
2960}
2961
61007b31 2962bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
ce1a14dc 2963{
61007b31
SH
2964 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
2965 bs = bs->file;
f197fe2b 2966 }
19cb3738 2967
61007b31
SH
2968 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
2969 return bs->drv->bdrv_debug_is_suspended(bs, tag);
2970 }
f9f05dc5 2971
61007b31
SH
2972 return false;
2973}
f9f05dc5 2974
61007b31 2975int bdrv_is_snapshot(BlockDriverState *bs)
f9f05dc5 2976{
61007b31 2977 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
f9f05dc5
KW
2978}
2979
61007b31
SH
2980/* backing_file can either be relative, or absolute, or a protocol. If it is
2981 * relative, it must be relative to the chain. So, passing in bs->filename
2982 * from a BDS as backing_file should not be done, as that may be relative to
2983 * the CWD rather than the chain. */
2984BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2985 const char *backing_file)
f9f05dc5 2986{
61007b31
SH
2987 char *filename_full = NULL;
2988 char *backing_file_full = NULL;
2989 char *filename_tmp = NULL;
2990 int is_protocol = 0;
2991 BlockDriverState *curr_bs = NULL;
2992 BlockDriverState *retval = NULL;
f9f05dc5 2993
61007b31
SH
2994 if (!bs || !bs->drv || !backing_file) {
2995 return NULL;
f9f05dc5
KW
2996 }
2997
61007b31
SH
2998 filename_full = g_malloc(PATH_MAX);
2999 backing_file_full = g_malloc(PATH_MAX);
3000 filename_tmp = g_malloc(PATH_MAX);
f9f05dc5 3001
61007b31 3002 is_protocol = path_has_protocol(backing_file);
f9f05dc5 3003
61007b31 3004 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
f9f05dc5 3005
61007b31
SH
3006 /* If either of the filename paths is actually a protocol, then
3007 * compare unmodified paths; otherwise make paths relative */
3008 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3009 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3010 retval = curr_bs->backing_hd;
3011 break;
3012 }
3013 } else {
3014 /* If not an absolute filename path, make it relative to the current
3015 * image's filename path */
3016 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3017 backing_file);
f9f05dc5 3018
61007b31
SH
3019 /* We are going to compare absolute pathnames */
3020 if (!realpath(filename_tmp, filename_full)) {
3021 continue;
3022 }
07f07615 3023
61007b31
SH
3024 /* We need to make sure the backing filename we are comparing against
3025 * is relative to the current image filename (or absolute) */
3026 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3027 curr_bs->backing_file);
07f07615 3028
61007b31
SH
3029 if (!realpath(filename_tmp, backing_file_full)) {
3030 continue;
3031 }
eb489bb1 3032
61007b31
SH
3033 if (strcmp(backing_file_full, filename_full) == 0) {
3034 retval = curr_bs->backing_hd;
3035 break;
3036 }
3037 }
eb489bb1
KW
3038 }
3039
61007b31
SH
3040 g_free(filename_full);
3041 g_free(backing_file_full);
3042 g_free(filename_tmp);
3043 return retval;
3044}
3045
3046int bdrv_get_backing_file_depth(BlockDriverState *bs)
3047{
3048 if (!bs->drv) {
3049 return 0;
eb489bb1
KW
3050 }
3051
61007b31
SH
3052 if (!bs->backing_hd) {
3053 return 0;
ca716364
KW
3054 }
3055
61007b31
SH
3056 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
3057}
07f07615 3058
61007b31
SH
3059void bdrv_init(void)
3060{
3061 module_call_init(MODULE_INIT_BLOCK);
3062}
29cdb251 3063
61007b31
SH
3064void bdrv_init_with_whitelist(void)
3065{
3066 use_bdrv_whitelist = 1;
3067 bdrv_init();
07f07615
PB
3068}
3069
5a8a30db 3070void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
0f15423c 3071{
5a8a30db
KW
3072 Error *local_err = NULL;
3073 int ret;
3074
3456a8d1
KW
3075 if (!bs->drv) {
3076 return;
3077 }
3078
7ea2d269
AK
3079 if (!(bs->open_flags & BDRV_O_INCOMING)) {
3080 return;
3081 }
3082 bs->open_flags &= ~BDRV_O_INCOMING;
3083
3456a8d1 3084 if (bs->drv->bdrv_invalidate_cache) {
5a8a30db 3085 bs->drv->bdrv_invalidate_cache(bs, &local_err);
3456a8d1 3086 } else if (bs->file) {
5a8a30db
KW
3087 bdrv_invalidate_cache(bs->file, &local_err);
3088 }
3089 if (local_err) {
3090 error_propagate(errp, local_err);
3091 return;
0f15423c 3092 }
3456a8d1 3093
5a8a30db
KW
3094 ret = refresh_total_sectors(bs, bs->total_sectors);
3095 if (ret < 0) {
3096 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3097 return;
3098 }
0f15423c
AL
3099}
3100
5a8a30db 3101void bdrv_invalidate_cache_all(Error **errp)
0f15423c
AL
3102{
3103 BlockDriverState *bs;
5a8a30db 3104 Error *local_err = NULL;
0f15423c 3105
dc364f4c 3106 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
ed78cda3
SH
3107 AioContext *aio_context = bdrv_get_aio_context(bs);
3108
3109 aio_context_acquire(aio_context);
5a8a30db 3110 bdrv_invalidate_cache(bs, &local_err);
ed78cda3 3111 aio_context_release(aio_context);
5a8a30db
KW
3112 if (local_err) {
3113 error_propagate(errp, local_err);
3114 return;
3115 }
0f15423c
AL
3116 }
3117}
3118
19cb3738
FB
3119/**************************************************************/
3120/* removable device support */
3121
3122/**
3123 * Return TRUE if the media is present
3124 */
3125int bdrv_is_inserted(BlockDriverState *bs)
3126{
3127 BlockDriver *drv = bs->drv;
a1aff5bf 3128
19cb3738
FB
3129 if (!drv)
3130 return 0;
3131 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
3132 return 1;
3133 return drv->bdrv_is_inserted(bs);
19cb3738
FB
3134}
3135
3136/**
8e49ca46
MA
3137 * Return whether the media changed since the last call to this
3138 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3139 */
3140int bdrv_media_changed(BlockDriverState *bs)
3141{
3142 BlockDriver *drv = bs->drv;
19cb3738 3143
8e49ca46
MA
3144 if (drv && drv->bdrv_media_changed) {
3145 return drv->bdrv_media_changed(bs);
3146 }
3147 return -ENOTSUP;
19cb3738
FB
3148}
3149
3150/**
3151 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3152 */
f36f3949 3153void bdrv_eject(BlockDriverState *bs, bool eject_flag)
19cb3738
FB
3154{
3155 BlockDriver *drv = bs->drv;
bfb197e0 3156 const char *device_name;
19cb3738 3157
822e1cd1
MA
3158 if (drv && drv->bdrv_eject) {
3159 drv->bdrv_eject(bs, eject_flag);
19cb3738 3160 }
6f382ed2 3161
bfb197e0
MA
3162 device_name = bdrv_get_device_name(bs);
3163 if (device_name[0] != '\0') {
3164 qapi_event_send_device_tray_moved(device_name,
a5ee7bd4 3165 eject_flag, &error_abort);
6f382ed2 3166 }
19cb3738
FB
3167}
3168
19cb3738
FB
3169/**
3170 * Lock or unlock the media (if it is locked, the user won't be able
3171 * to eject it manually).
3172 */
025e849a 3173void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3174{
3175 BlockDriver *drv = bs->drv;
3176
025e849a 3177 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3178
025e849a
MA
3179 if (drv && drv->bdrv_lock_medium) {
3180 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3181 }
3182}
985a03b0 3183
1b7fd729 3184void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
7b6f9300 3185{
1b7fd729 3186 bs->guest_block_size = align;
7b6f9300 3187}
7cd1e32a 3188
0db6e54a
FZ
3189BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3190{
3191 BdrvDirtyBitmap *bm;
3192
3193 assert(name);
3194 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3195 if (bm->name && !strcmp(name, bm->name)) {
3196 return bm;
3197 }
3198 }
3199 return NULL;
3200}
3201
20dca810 3202void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
0db6e54a 3203{
9bd2b08f 3204 assert(!bdrv_dirty_bitmap_frozen(bitmap));
0db6e54a
FZ
3205 g_free(bitmap->name);
3206 bitmap->name = NULL;
3207}
3208
3209BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
5fba6c0e 3210 uint32_t granularity,
0db6e54a 3211 const char *name,
b8afb520 3212 Error **errp)
7cd1e32a
LS
3213{
3214 int64_t bitmap_size;
e4654d2d 3215 BdrvDirtyBitmap *bitmap;
5fba6c0e 3216 uint32_t sector_granularity;
a55eb92c 3217
50717e94
PB
3218 assert((granularity & (granularity - 1)) == 0);
3219
0db6e54a
FZ
3220 if (name && bdrv_find_dirty_bitmap(bs, name)) {
3221 error_setg(errp, "Bitmap already exists: %s", name);
3222 return NULL;
3223 }
5fba6c0e
JS
3224 sector_granularity = granularity >> BDRV_SECTOR_BITS;
3225 assert(sector_granularity);
57322b78 3226 bitmap_size = bdrv_nb_sectors(bs);
b8afb520
FZ
3227 if (bitmap_size < 0) {
3228 error_setg_errno(errp, -bitmap_size, "could not get length of device");
3229 errno = -bitmap_size;
3230 return NULL;
3231 }
5839e53b 3232 bitmap = g_new0(BdrvDirtyBitmap, 1);
5fba6c0e 3233 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
e74e6b78 3234 bitmap->size = bitmap_size;
0db6e54a 3235 bitmap->name = g_strdup(name);
b8e6fb75 3236 bitmap->disabled = false;
e4654d2d
FZ
3237 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3238 return bitmap;
3239}
3240
9bd2b08f
JS
3241bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3242{
3243 return bitmap->successor;
3244}
3245
b8e6fb75
JS
3246bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3247{
9bd2b08f
JS
3248 return !(bitmap->disabled || bitmap->successor);
3249}
3250
9abe3bdc
JS
3251DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3252{
3253 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3254 return DIRTY_BITMAP_STATUS_FROZEN;
3255 } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3256 return DIRTY_BITMAP_STATUS_DISABLED;
3257 } else {
3258 return DIRTY_BITMAP_STATUS_ACTIVE;
3259 }
3260}
3261
9bd2b08f
JS
3262/**
3263 * Create a successor bitmap destined to replace this bitmap after an operation.
3264 * Requires that the bitmap is not frozen and has no successor.
3265 */
3266int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3267 BdrvDirtyBitmap *bitmap, Error **errp)
3268{
3269 uint64_t granularity;
3270 BdrvDirtyBitmap *child;
3271
3272 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3273 error_setg(errp, "Cannot create a successor for a bitmap that is "
3274 "currently frozen");
3275 return -1;
3276 }
3277 assert(!bitmap->successor);
3278
3279 /* Create an anonymous successor */
3280 granularity = bdrv_dirty_bitmap_granularity(bitmap);
3281 child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3282 if (!child) {
3283 return -1;
3284 }
3285
3286 /* Successor will be on or off based on our current state. */
3287 child->disabled = bitmap->disabled;
3288
3289 /* Install the successor and freeze the parent */
3290 bitmap->successor = child;
3291 return 0;
3292}
3293
3294/**
3295 * For a bitmap with a successor, yield our name to the successor,
3296 * delete the old bitmap, and return a handle to the new bitmap.
3297 */
3298BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3299 BdrvDirtyBitmap *bitmap,
3300 Error **errp)
3301{
3302 char *name;
3303 BdrvDirtyBitmap *successor = bitmap->successor;
3304
3305 if (successor == NULL) {
3306 error_setg(errp, "Cannot relinquish control if "
3307 "there's no successor present");
3308 return NULL;
3309 }
3310
3311 name = bitmap->name;
3312 bitmap->name = NULL;
3313 successor->name = name;
3314 bitmap->successor = NULL;
3315 bdrv_release_dirty_bitmap(bs, bitmap);
3316
3317 return successor;
3318}
3319
3320/**
3321 * In cases of failure where we can no longer safely delete the parent,
3322 * we may wish to re-join the parent and child/successor.
3323 * The merged parent will be un-frozen, but not explicitly re-enabled.
3324 */
3325BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3326 BdrvDirtyBitmap *parent,
3327 Error **errp)
3328{
3329 BdrvDirtyBitmap *successor = parent->successor;
3330
3331 if (!successor) {
3332 error_setg(errp, "Cannot reclaim a successor when none is present");
3333 return NULL;
3334 }
3335
3336 if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3337 error_setg(errp, "Merging of parent and successor bitmap failed");
3338 return NULL;
3339 }
3340 bdrv_release_dirty_bitmap(bs, successor);
3341 parent->successor = NULL;
3342
3343 return parent;
b8e6fb75
JS
3344}
3345
ce1ffea8
JS
3346/**
3347 * Truncates _all_ bitmaps attached to a BDS.
3348 */
3349static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3350{
3351 BdrvDirtyBitmap *bitmap;
3352 uint64_t size = bdrv_nb_sectors(bs);
3353
3354 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
06207b0f 3355 assert(!bdrv_dirty_bitmap_frozen(bitmap));
ce1ffea8 3356 hbitmap_truncate(bitmap->bitmap, size);
5270b6a0 3357 bitmap->size = size;
ce1ffea8
JS
3358 }
3359}
3360
e4654d2d
FZ
3361void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3362{
3363 BdrvDirtyBitmap *bm, *next;
3364 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3365 if (bm == bitmap) {
9bd2b08f 3366 assert(!bdrv_dirty_bitmap_frozen(bm));
e4654d2d
FZ
3367 QLIST_REMOVE(bitmap, list);
3368 hbitmap_free(bitmap->bitmap);
0db6e54a 3369 g_free(bitmap->name);
e4654d2d
FZ
3370 g_free(bitmap);
3371 return;
a55eb92c 3372 }
7cd1e32a
LS
3373 }
3374}
3375
b8e6fb75
JS
3376void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3377{
9bd2b08f 3378 assert(!bdrv_dirty_bitmap_frozen(bitmap));
b8e6fb75
JS
3379 bitmap->disabled = true;
3380}
3381
3382void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3383{
9bd2b08f 3384 assert(!bdrv_dirty_bitmap_frozen(bitmap));
b8e6fb75
JS
3385 bitmap->disabled = false;
3386}
3387
21b56835
FZ
3388BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3389{
3390 BdrvDirtyBitmap *bm;
3391 BlockDirtyInfoList *list = NULL;
3392 BlockDirtyInfoList **plist = &list;
3393
3394 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
5839e53b
MA
3395 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3396 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
20dca810 3397 info->count = bdrv_get_dirty_count(bm);
592fdd02 3398 info->granularity = bdrv_dirty_bitmap_granularity(bm);
0db6e54a
FZ
3399 info->has_name = !!bm->name;
3400 info->name = g_strdup(bm->name);
9abe3bdc 3401 info->status = bdrv_dirty_bitmap_status(bm);
21b56835
FZ
3402 entry->value = info;
3403 *plist = entry;
3404 plist = &entry->next;
3405 }
3406
3407 return list;
3408}
3409
e4654d2d 3410int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
7cd1e32a 3411{
e4654d2d
FZ
3412 if (bitmap) {
3413 return hbitmap_get(bitmap->bitmap, sector);
7cd1e32a
LS
3414 } else {
3415 return 0;
3416 }
3417}
3418
341ebc2f
JS
3419/**
3420 * Chooses a default granularity based on the existing cluster size,
3421 * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3422 * is no cluster size information available.
3423 */
3424uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3425{
3426 BlockDriverInfo bdi;
3427 uint32_t granularity;
3428
3429 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3430 granularity = MAX(4096, bdi.cluster_size);
3431 granularity = MIN(65536, granularity);
3432 } else {
3433 granularity = 65536;
3434 }
3435
3436 return granularity;
3437}
3438
592fdd02
JS
3439uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3440{
3441 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3442}
3443
20dca810 3444void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
1755da16 3445{
e4654d2d 3446 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
1755da16
PB
3447}
3448
20dca810 3449void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
c4237dfa
VSO
3450 int64_t cur_sector, int nr_sectors)
3451{
b8e6fb75 3452 assert(bdrv_dirty_bitmap_enabled(bitmap));
c4237dfa
VSO
3453 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3454}
3455
20dca810 3456void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
c4237dfa
VSO
3457 int64_t cur_sector, int nr_sectors)
3458{
b8e6fb75 3459 assert(bdrv_dirty_bitmap_enabled(bitmap));
c4237dfa
VSO
3460 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3461}
3462
e74e6b78
JS
3463void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3464{
3465 assert(bdrv_dirty_bitmap_enabled(bitmap));
3466 hbitmap_reset(bitmap->bitmap, 0, bitmap->size);
3467}
3468
e0c47b6c
SH
3469void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3470 int nr_sectors)
1755da16 3471{
e4654d2d
FZ
3472 BdrvDirtyBitmap *bitmap;
3473 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
b8e6fb75
JS
3474 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3475 continue;
3476 }
e4654d2d
FZ
3477 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3478 }
1755da16
PB
3479}
3480
e0c47b6c
SH
3481void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3482 int nr_sectors)
7cd1e32a 3483{
e4654d2d
FZ
3484 BdrvDirtyBitmap *bitmap;
3485 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
b8e6fb75
JS
3486 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3487 continue;
3488 }
e4654d2d
FZ
3489 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3490 }
7cd1e32a 3491}
aaa0eb75 3492
d58d8453
JS
3493/**
3494 * Advance an HBitmapIter to an arbitrary offset.
3495 */
3496void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3497{
3498 assert(hbi->hb);
3499 hbitmap_iter_init(hbi, hbi->hb, offset);
3500}
3501
20dca810 3502int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
aaa0eb75 3503{
e4654d2d 3504 return hbitmap_count(bitmap->bitmap);
aaa0eb75 3505}
f88e1a42 3506
9fcb0251
FZ
3507/* Get a reference to bs */
3508void bdrv_ref(BlockDriverState *bs)
3509{
3510 bs->refcnt++;
3511}
3512
3513/* Release a previously grabbed reference to bs.
3514 * If after releasing, reference count is zero, the BlockDriverState is
3515 * deleted. */
3516void bdrv_unref(BlockDriverState *bs)
3517{
9a4d5ca6
JC
3518 if (!bs) {
3519 return;
3520 }
9fcb0251
FZ
3521 assert(bs->refcnt > 0);
3522 if (--bs->refcnt == 0) {
3523 bdrv_delete(bs);
3524 }
3525}
3526
fbe40ff7
FZ
3527struct BdrvOpBlocker {
3528 Error *reason;
3529 QLIST_ENTRY(BdrvOpBlocker) list;
3530};
3531
3532bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3533{
3534 BdrvOpBlocker *blocker;
3535 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3536 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3537 blocker = QLIST_FIRST(&bs->op_blockers[op]);
3538 if (errp) {
81e5f78a
AG
3539 error_setg(errp, "Node '%s' is busy: %s",
3540 bdrv_get_device_or_node_name(bs),
bfb197e0 3541 error_get_pretty(blocker->reason));
fbe40ff7
FZ
3542 }
3543 return true;
3544 }
3545 return false;
3546}
3547
3548void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3549{
3550 BdrvOpBlocker *blocker;
3551 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3552
5839e53b 3553 blocker = g_new0(BdrvOpBlocker, 1);
fbe40ff7
FZ
3554 blocker->reason = reason;
3555 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3556}
3557
3558void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3559{
3560 BdrvOpBlocker *blocker, *next;
3561 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3562 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3563 if (blocker->reason == reason) {
3564 QLIST_REMOVE(blocker, list);
3565 g_free(blocker);
3566 }
3567 }
3568}
3569
3570void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3571{
3572 int i;
3573 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3574 bdrv_op_block(bs, i, reason);
3575 }
3576}
3577
3578void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3579{
3580 int i;
3581 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3582 bdrv_op_unblock(bs, i, reason);
3583 }
3584}
3585
3586bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3587{
3588 int i;
3589
3590 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3591 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3592 return false;
3593 }
3594 }
3595 return true;
3596}
3597
28a7282a
LC
3598void bdrv_iostatus_enable(BlockDriverState *bs)
3599{
d6bf279e 3600 bs->iostatus_enabled = true;
58e21ef5 3601 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3602}
3603
3604/* The I/O status is only enabled if the drive explicitly
3605 * enables it _and_ the VM is configured to stop on errors */
3606bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3607{
d6bf279e 3608 return (bs->iostatus_enabled &&
92aa5c6d
PB
3609 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
3610 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
3611 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
28a7282a
LC
3612}
3613
3614void bdrv_iostatus_disable(BlockDriverState *bs)
3615{
d6bf279e 3616 bs->iostatus_enabled = false;
28a7282a
LC
3617}
3618
3619void bdrv_iostatus_reset(BlockDriverState *bs)
3620{
3621 if (bdrv_iostatus_is_enabled(bs)) {
58e21ef5 3622 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3bd293c3
PB
3623 if (bs->job) {
3624 block_job_iostatus_reset(bs->job);
3625 }
28a7282a
LC
3626 }
3627}
3628
28a7282a
LC
3629void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3630{
3e1caa5f
PB
3631 assert(bdrv_iostatus_is_enabled(bs));
3632 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
58e21ef5
LC
3633 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3634 BLOCK_DEVICE_IO_STATUS_FAILED;
28a7282a
LC
3635 }
3636}
3637
d92ada22
LC
3638void bdrv_img_create(const char *filename, const char *fmt,
3639 const char *base_filename, const char *base_fmt,
f382d43a
MR
3640 char *options, uint64_t img_size, int flags,
3641 Error **errp, bool quiet)
f88e1a42 3642{
83d0521a
CL
3643 QemuOptsList *create_opts = NULL;
3644 QemuOpts *opts = NULL;
3645 const char *backing_fmt, *backing_file;
3646 int64_t size;
f88e1a42 3647 BlockDriver *drv, *proto_drv;
96df67d1 3648 BlockDriver *backing_drv = NULL;
cc84d90f 3649 Error *local_err = NULL;
f88e1a42
JS
3650 int ret = 0;
3651
3652 /* Find driver and parse its options */
3653 drv = bdrv_find_format(fmt);
3654 if (!drv) {
71c79813 3655 error_setg(errp, "Unknown file format '%s'", fmt);
d92ada22 3656 return;
f88e1a42
JS
3657 }
3658
b65a5e12 3659 proto_drv = bdrv_find_protocol(filename, true, errp);
f88e1a42 3660 if (!proto_drv) {
d92ada22 3661 return;
f88e1a42
JS
3662 }
3663
c6149724
HR
3664 if (!drv->create_opts) {
3665 error_setg(errp, "Format driver '%s' does not support image creation",
3666 drv->format_name);
3667 return;
3668 }
3669
3670 if (!proto_drv->create_opts) {
3671 error_setg(errp, "Protocol driver '%s' does not support image creation",
3672 proto_drv->format_name);
3673 return;
3674 }
3675
c282e1fd
CL
3676 create_opts = qemu_opts_append(create_opts, drv->create_opts);
3677 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
f88e1a42
JS
3678
3679 /* Create parameter list with default values */
83d0521a 3680 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
39101f25 3681 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
f88e1a42
JS
3682
3683 /* Parse -o options */
3684 if (options) {
dc523cd3
MA
3685 qemu_opts_do_parse(opts, options, NULL, &local_err);
3686 if (local_err) {
3687 error_report_err(local_err);
3688 local_err = NULL;
83d0521a 3689 error_setg(errp, "Invalid options for file format '%s'", fmt);
f88e1a42
JS
3690 goto out;
3691 }
3692 }
3693
3694 if (base_filename) {
f43e47db 3695 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
6be4194b 3696 if (local_err) {
71c79813
LC
3697 error_setg(errp, "Backing file not supported for file format '%s'",
3698 fmt);
f88e1a42
JS
3699 goto out;
3700 }
3701 }
3702
3703 if (base_fmt) {
f43e47db 3704 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
6be4194b 3705 if (local_err) {
71c79813
LC
3706 error_setg(errp, "Backing file format not supported for file "
3707 "format '%s'", fmt);
f88e1a42
JS
3708 goto out;
3709 }
3710 }
3711
83d0521a
CL
3712 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3713 if (backing_file) {
3714 if (!strcmp(filename, backing_file)) {
71c79813
LC
3715 error_setg(errp, "Error: Trying to create an image with the "
3716 "same filename as the backing file");
792da93a
JS
3717 goto out;
3718 }
3719 }
3720
83d0521a
CL
3721 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3722 if (backing_fmt) {
3723 backing_drv = bdrv_find_format(backing_fmt);
96df67d1 3724 if (!backing_drv) {
71c79813 3725 error_setg(errp, "Unknown backing file format '%s'",
83d0521a 3726 backing_fmt);
f88e1a42
JS
3727 goto out;
3728 }
3729 }
3730
3731 // The size for the image must always be specified, with one exception:
3732 // If we are using a backing file, we can obtain the size from there
83d0521a
CL
3733 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3734 if (size == -1) {
3735 if (backing_file) {
66f6b814 3736 BlockDriverState *bs;
29168018 3737 char *full_backing = g_new0(char, PATH_MAX);
52bf1e72 3738 int64_t size;
63090dac
PB
3739 int back_flags;
3740
29168018
HR
3741 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3742 full_backing, PATH_MAX,
3743 &local_err);
3744 if (local_err) {
3745 g_free(full_backing);
3746 goto out;
3747 }
3748
63090dac
PB
3749 /* backing files always opened read-only */
3750 back_flags =
3751 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
f88e1a42 3752
f67503e5 3753 bs = NULL;
29168018 3754 ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
cc84d90f 3755 backing_drv, &local_err);
29168018 3756 g_free(full_backing);
f88e1a42 3757 if (ret < 0) {
f88e1a42
JS
3758 goto out;
3759 }
52bf1e72
MA
3760 size = bdrv_getlength(bs);
3761 if (size < 0) {
3762 error_setg_errno(errp, -size, "Could not get size of '%s'",
3763 backing_file);
3764 bdrv_unref(bs);
3765 goto out;
3766 }
f88e1a42 3767
39101f25 3768 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
66f6b814
HR
3769
3770 bdrv_unref(bs);
f88e1a42 3771 } else {
71c79813 3772 error_setg(errp, "Image creation needs a size parameter");
f88e1a42
JS
3773 goto out;
3774 }
3775 }
3776
f382d43a 3777 if (!quiet) {
43c5d8f8
FZ
3778 printf("Formatting '%s', fmt=%s", filename, fmt);
3779 qemu_opts_print(opts, " ");
f382d43a
MR
3780 puts("");
3781 }
83d0521a 3782
c282e1fd 3783 ret = bdrv_create(drv, filename, opts, &local_err);
83d0521a 3784
cc84d90f
HR
3785 if (ret == -EFBIG) {
3786 /* This is generally a better message than whatever the driver would
3787 * deliver (especially because of the cluster_size_hint), since that
3788 * is most probably not much different from "image too large". */
3789 const char *cluster_size_hint = "";
83d0521a 3790 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
cc84d90f 3791 cluster_size_hint = " (try using a larger cluster size)";
f88e1a42 3792 }
cc84d90f
HR
3793 error_setg(errp, "The image size is too large for file format '%s'"
3794 "%s", fmt, cluster_size_hint);
3795 error_free(local_err);
3796 local_err = NULL;
f88e1a42
JS
3797 }
3798
3799out:
83d0521a
CL
3800 qemu_opts_del(opts);
3801 qemu_opts_free(create_opts);
84d18f06 3802 if (local_err) {
cc84d90f
HR
3803 error_propagate(errp, local_err);
3804 }
f88e1a42 3805}
85d126f3
SH
3806
3807AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3808{
dcd04228
SH
3809 return bs->aio_context;
3810}
3811
3812void bdrv_detach_aio_context(BlockDriverState *bs)
3813{
33384421
HR
3814 BdrvAioNotifier *baf;
3815
dcd04228
SH
3816 if (!bs->drv) {
3817 return;
3818 }
3819
33384421
HR
3820 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3821 baf->detach_aio_context(baf->opaque);
3822 }
3823
13af91eb
SH
3824 if (bs->io_limits_enabled) {
3825 throttle_detach_aio_context(&bs->throttle_state);
3826 }
dcd04228
SH
3827 if (bs->drv->bdrv_detach_aio_context) {
3828 bs->drv->bdrv_detach_aio_context(bs);
3829 }
3830 if (bs->file) {
3831 bdrv_detach_aio_context(bs->file);
3832 }
3833 if (bs->backing_hd) {
3834 bdrv_detach_aio_context(bs->backing_hd);
3835 }
3836
3837 bs->aio_context = NULL;
3838}
3839
3840void bdrv_attach_aio_context(BlockDriverState *bs,
3841 AioContext *new_context)
3842{
33384421
HR
3843 BdrvAioNotifier *ban;
3844
dcd04228
SH
3845 if (!bs->drv) {
3846 return;
3847 }
3848
3849 bs->aio_context = new_context;
3850
3851 if (bs->backing_hd) {
3852 bdrv_attach_aio_context(bs->backing_hd, new_context);
3853 }
3854 if (bs->file) {
3855 bdrv_attach_aio_context(bs->file, new_context);
3856 }
3857 if (bs->drv->bdrv_attach_aio_context) {
3858 bs->drv->bdrv_attach_aio_context(bs, new_context);
3859 }
13af91eb
SH
3860 if (bs->io_limits_enabled) {
3861 throttle_attach_aio_context(&bs->throttle_state, new_context);
3862 }
33384421
HR
3863
3864 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3865 ban->attached_aio_context(new_context, ban->opaque);
3866 }
dcd04228
SH
3867}
3868
3869void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3870{
3871 bdrv_drain_all(); /* ensure there are no in-flight requests */
3872
3873 bdrv_detach_aio_context(bs);
3874
3875 /* This function executes in the old AioContext so acquire the new one in
3876 * case it runs in a different thread.
3877 */
3878 aio_context_acquire(new_context);
3879 bdrv_attach_aio_context(bs, new_context);
3880 aio_context_release(new_context);
85d126f3 3881}
d616b224 3882
33384421
HR
3883void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3884 void (*attached_aio_context)(AioContext *new_context, void *opaque),
3885 void (*detach_aio_context)(void *opaque), void *opaque)
3886{
3887 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3888 *ban = (BdrvAioNotifier){
3889 .attached_aio_context = attached_aio_context,
3890 .detach_aio_context = detach_aio_context,
3891 .opaque = opaque
3892 };
3893
3894 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3895}
3896
3897void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3898 void (*attached_aio_context)(AioContext *,
3899 void *),
3900 void (*detach_aio_context)(void *),
3901 void *opaque)
3902{
3903 BdrvAioNotifier *ban, *ban_next;
3904
3905 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3906 if (ban->attached_aio_context == attached_aio_context &&
3907 ban->detach_aio_context == detach_aio_context &&
3908 ban->opaque == opaque)
3909 {
3910 QLIST_REMOVE(ban, list);
3911 g_free(ban);
3912
3913 return;
3914 }
3915 }
3916
3917 abort();
3918}
3919
77485434
HR
3920int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3921 BlockDriverAmendStatusCB *status_cb)
6f176b48 3922{
c282e1fd 3923 if (!bs->drv->bdrv_amend_options) {
6f176b48
HR
3924 return -ENOTSUP;
3925 }
77485434 3926 return bs->drv->bdrv_amend_options(bs, opts, status_cb);
6f176b48 3927}
f6186f49 3928
b5042a36
BC
3929/* This function will be called by the bdrv_recurse_is_first_non_filter method
3930 * of block filter and by bdrv_is_first_non_filter.
3931 * It is used to test if the given bs is the candidate or recurse more in the
3932 * node graph.
212a5a8f 3933 */
b5042a36 3934bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
212a5a8f 3935 BlockDriverState *candidate)
f6186f49 3936{
b5042a36
BC
3937 /* return false if basic checks fails */
3938 if (!bs || !bs->drv) {
212a5a8f 3939 return false;
f6186f49
BC
3940 }
3941
b5042a36
BC
3942 /* the code reached a non block filter driver -> check if the bs is
3943 * the same as the candidate. It's the recursion termination condition.
3944 */
3945 if (!bs->drv->is_filter) {
3946 return bs == candidate;
212a5a8f 3947 }
b5042a36 3948 /* Down this path the driver is a block filter driver */
212a5a8f 3949
b5042a36
BC
3950 /* If the block filter recursion method is defined use it to recurse down
3951 * the node graph.
3952 */
3953 if (bs->drv->bdrv_recurse_is_first_non_filter) {
212a5a8f 3954 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
f6186f49
BC
3955 }
3956
b5042a36
BC
3957 /* the driver is a block filter but don't allow to recurse -> return false
3958 */
3959 return false;
f6186f49
BC
3960}
3961
212a5a8f
BC
3962/* This function checks if the candidate is the first non filter bs down it's
3963 * bs chain. Since we don't have pointers to parents it explore all bs chains
3964 * from the top. Some filters can choose not to pass down the recursion.
3965 */
3966bool bdrv_is_first_non_filter(BlockDriverState *candidate)
f6186f49 3967{
212a5a8f
BC
3968 BlockDriverState *bs;
3969
3970 /* walk down the bs forest recursively */
3971 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3972 bool perm;
3973
b5042a36 3974 /* try to recurse in this top level bs */
e6dc8a1f 3975 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
212a5a8f
BC
3976
3977 /* candidate is the first non filter */
3978 if (perm) {
3979 return true;
3980 }
3981 }
3982
3983 return false;
f6186f49 3984}
09158f00
BC
3985
3986BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
3987{
3988 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
5a7e7a0b
SH
3989 AioContext *aio_context;
3990
09158f00
BC
3991 if (!to_replace_bs) {
3992 error_setg(errp, "Node name '%s' not found", node_name);
3993 return NULL;
3994 }
3995
5a7e7a0b
SH
3996 aio_context = bdrv_get_aio_context(to_replace_bs);
3997 aio_context_acquire(aio_context);
3998
09158f00 3999 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
5a7e7a0b
SH
4000 to_replace_bs = NULL;
4001 goto out;
09158f00
BC
4002 }
4003
4004 /* We don't want arbitrary node of the BDS chain to be replaced only the top
4005 * most non filter in order to prevent data corruption.
4006 * Another benefit is that this tests exclude backing files which are
4007 * blocked by the backing blockers.
4008 */
4009 if (!bdrv_is_first_non_filter(to_replace_bs)) {
4010 error_setg(errp, "Only top most non filter can be replaced");
5a7e7a0b
SH
4011 to_replace_bs = NULL;
4012 goto out;
09158f00
BC
4013 }
4014
5a7e7a0b
SH
4015out:
4016 aio_context_release(aio_context);
09158f00
BC
4017 return to_replace_bs;
4018}
448ad91d 4019
91af7014
HR
4020static bool append_open_options(QDict *d, BlockDriverState *bs)
4021{
4022 const QDictEntry *entry;
4023 bool found_any = false;
4024
4025 for (entry = qdict_first(bs->options); entry;
4026 entry = qdict_next(bs->options, entry))
4027 {
4028 /* Only take options for this level and exclude all non-driver-specific
4029 * options */
4030 if (!strchr(qdict_entry_key(entry), '.') &&
4031 strcmp(qdict_entry_key(entry), "node-name"))
4032 {
4033 qobject_incref(qdict_entry_value(entry));
4034 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
4035 found_any = true;
4036 }
4037 }
4038
4039 return found_any;
4040}
4041
4042/* Updates the following BDS fields:
4043 * - exact_filename: A filename which may be used for opening a block device
4044 * which (mostly) equals the given BDS (even without any
4045 * other options; so reading and writing must return the same
4046 * results, but caching etc. may be different)
4047 * - full_open_options: Options which, when given when opening a block device
4048 * (without a filename), result in a BDS (mostly)
4049 * equalling the given one
4050 * - filename: If exact_filename is set, it is copied here. Otherwise,
4051 * full_open_options is converted to a JSON object, prefixed with
4052 * "json:" (for use through the JSON pseudo protocol) and put here.
4053 */
4054void bdrv_refresh_filename(BlockDriverState *bs)
4055{
4056 BlockDriver *drv = bs->drv;
4057 QDict *opts;
4058
4059 if (!drv) {
4060 return;
4061 }
4062
4063 /* This BDS's file name will most probably depend on its file's name, so
4064 * refresh that first */
4065 if (bs->file) {
4066 bdrv_refresh_filename(bs->file);
4067 }
4068
4069 if (drv->bdrv_refresh_filename) {
4070 /* Obsolete information is of no use here, so drop the old file name
4071 * information before refreshing it */
4072 bs->exact_filename[0] = '\0';
4073 if (bs->full_open_options) {
4074 QDECREF(bs->full_open_options);
4075 bs->full_open_options = NULL;
4076 }
4077
4078 drv->bdrv_refresh_filename(bs);
4079 } else if (bs->file) {
4080 /* Try to reconstruct valid information from the underlying file */
4081 bool has_open_options;
4082
4083 bs->exact_filename[0] = '\0';
4084 if (bs->full_open_options) {
4085 QDECREF(bs->full_open_options);
4086 bs->full_open_options = NULL;
4087 }
4088
4089 opts = qdict_new();
4090 has_open_options = append_open_options(opts, bs);
4091
4092 /* If no specific options have been given for this BDS, the filename of
4093 * the underlying file should suffice for this one as well */
4094 if (bs->file->exact_filename[0] && !has_open_options) {
4095 strcpy(bs->exact_filename, bs->file->exact_filename);
4096 }
4097 /* Reconstructing the full options QDict is simple for most format block
4098 * drivers, as long as the full options are known for the underlying
4099 * file BDS. The full options QDict of that file BDS should somehow
4100 * contain a representation of the filename, therefore the following
4101 * suffices without querying the (exact_)filename of this BDS. */
4102 if (bs->file->full_open_options) {
4103 qdict_put_obj(opts, "driver",
4104 QOBJECT(qstring_from_str(drv->format_name)));
4105 QINCREF(bs->file->full_open_options);
4106 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
4107
4108 bs->full_open_options = opts;
4109 } else {
4110 QDECREF(opts);
4111 }
4112 } else if (!bs->full_open_options && qdict_size(bs->options)) {
4113 /* There is no underlying file BDS (at least referenced by BDS.file),
4114 * so the full options QDict should be equal to the options given
4115 * specifically for this block device when it was opened (plus the
4116 * driver specification).
4117 * Because those options don't change, there is no need to update
4118 * full_open_options when it's already set. */
4119
4120 opts = qdict_new();
4121 append_open_options(opts, bs);
4122 qdict_put_obj(opts, "driver",
4123 QOBJECT(qstring_from_str(drv->format_name)));
4124
4125 if (bs->exact_filename[0]) {
4126 /* This may not work for all block protocol drivers (some may
4127 * require this filename to be parsed), but we have to find some
4128 * default solution here, so just include it. If some block driver
4129 * does not support pure options without any filename at all or
4130 * needs some special format of the options QDict, it needs to
4131 * implement the driver-specific bdrv_refresh_filename() function.
4132 */
4133 qdict_put_obj(opts, "filename",
4134 QOBJECT(qstring_from_str(bs->exact_filename)));
4135 }
4136
4137 bs->full_open_options = opts;
4138 }
4139
4140 if (bs->exact_filename[0]) {
4141 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4142 } else if (bs->full_open_options) {
4143 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4144 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4145 qstring_get_str(json));
4146 QDECREF(json);
4147 }
4148}
5366d0c8
BC
4149
4150/* This accessor function purpose is to allow the device models to access the
4151 * BlockAcctStats structure embedded inside a BlockDriverState without being
4152 * aware of the BlockDriverState structure layout.
4153 * It will go away when the BlockAcctStats structure will be moved inside
4154 * the device models.
4155 */
4156BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
4157{
4158 return &bs->stats;
4159}