]> git.proxmox.com Git - mirror_qemu.git/blob - block.c
block: Make backing files always writeback
[mirror_qemu.git] / block.c
1 /*
2 * QEMU System Emulator block driver
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24 #include "qemu/osdep.h"
25 #include "trace.h"
26 #include "block/block_int.h"
27 #include "block/blockjob.h"
28 #include "qemu/error-report.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qerror.h"
31 #include "qapi/qmp/qbool.h"
32 #include "qapi/qmp/qjson.h"
33 #include "sysemu/block-backend.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/notify.h"
36 #include "qemu/coroutine.h"
37 #include "block/qapi.h"
38 #include "qmp-commands.h"
39 #include "qemu/timer.h"
40 #include "qapi-event.h"
41 #include "block/throttle-groups.h"
42 #include "qemu/cutils.h"
43 #include "qemu/id.h"
44
45 #ifdef CONFIG_BSD
46 #include <sys/ioctl.h>
47 #include <sys/queue.h>
48 #ifndef __DragonFly__
49 #include <sys/disk.h>
50 #endif
51 #endif
52
53 #ifdef _WIN32
54 #include <windows.h>
55 #endif
56
57 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
58
59 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
60 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
61
62 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
63 QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
64
65 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
66 QLIST_HEAD_INITIALIZER(bdrv_drivers);
67
68 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
69 const char *reference, QDict *options, int flags,
70 BlockDriverState *parent,
71 const BdrvChildRole *child_role, Error **errp);
72
73 /* If non-zero, use only whitelisted block drivers */
74 static int use_bdrv_whitelist;
75
76 static void bdrv_close(BlockDriverState *bs);
77
78 #ifdef _WIN32
79 static int is_windows_drive_prefix(const char *filename)
80 {
81 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
82 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
83 filename[1] == ':');
84 }
85
86 int is_windows_drive(const char *filename)
87 {
88 if (is_windows_drive_prefix(filename) &&
89 filename[2] == '\0')
90 return 1;
91 if (strstart(filename, "\\\\.\\", NULL) ||
92 strstart(filename, "//./", NULL))
93 return 1;
94 return 0;
95 }
96 #endif
97
98 size_t bdrv_opt_mem_align(BlockDriverState *bs)
99 {
100 if (!bs || !bs->drv) {
101 /* page size or 4k (hdd sector size) should be on the safe side */
102 return MAX(4096, getpagesize());
103 }
104
105 return bs->bl.opt_mem_alignment;
106 }
107
108 size_t bdrv_min_mem_align(BlockDriverState *bs)
109 {
110 if (!bs || !bs->drv) {
111 /* page size or 4k (hdd sector size) should be on the safe side */
112 return MAX(4096, getpagesize());
113 }
114
115 return bs->bl.min_mem_alignment;
116 }
117
118 /* check if the path starts with "<protocol>:" */
119 int path_has_protocol(const char *path)
120 {
121 const char *p;
122
123 #ifdef _WIN32
124 if (is_windows_drive(path) ||
125 is_windows_drive_prefix(path)) {
126 return 0;
127 }
128 p = path + strcspn(path, ":/\\");
129 #else
130 p = path + strcspn(path, ":/");
131 #endif
132
133 return *p == ':';
134 }
135
136 int path_is_absolute(const char *path)
137 {
138 #ifdef _WIN32
139 /* specific case for names like: "\\.\d:" */
140 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
141 return 1;
142 }
143 return (*path == '/' || *path == '\\');
144 #else
145 return (*path == '/');
146 #endif
147 }
148
149 /* if filename is absolute, just copy it to dest. Otherwise, build a
150 path to it by considering it is relative to base_path. URL are
151 supported. */
152 void path_combine(char *dest, int dest_size,
153 const char *base_path,
154 const char *filename)
155 {
156 const char *p, *p1;
157 int len;
158
159 if (dest_size <= 0)
160 return;
161 if (path_is_absolute(filename)) {
162 pstrcpy(dest, dest_size, filename);
163 } else {
164 p = strchr(base_path, ':');
165 if (p)
166 p++;
167 else
168 p = base_path;
169 p1 = strrchr(base_path, '/');
170 #ifdef _WIN32
171 {
172 const char *p2;
173 p2 = strrchr(base_path, '\\');
174 if (!p1 || p2 > p1)
175 p1 = p2;
176 }
177 #endif
178 if (p1)
179 p1++;
180 else
181 p1 = base_path;
182 if (p1 > p)
183 p = p1;
184 len = p - base_path;
185 if (len > dest_size - 1)
186 len = dest_size - 1;
187 memcpy(dest, base_path, len);
188 dest[len] = '\0';
189 pstrcat(dest, dest_size, filename);
190 }
191 }
192
193 void bdrv_get_full_backing_filename_from_filename(const char *backed,
194 const char *backing,
195 char *dest, size_t sz,
196 Error **errp)
197 {
198 if (backing[0] == '\0' || path_has_protocol(backing) ||
199 path_is_absolute(backing))
200 {
201 pstrcpy(dest, sz, backing);
202 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
203 error_setg(errp, "Cannot use relative backing file names for '%s'",
204 backed);
205 } else {
206 path_combine(dest, sz, backed, backing);
207 }
208 }
209
210 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
211 Error **errp)
212 {
213 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
214
215 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
216 dest, sz, errp);
217 }
218
219 void bdrv_register(BlockDriver *bdrv)
220 {
221 bdrv_setup_io_funcs(bdrv);
222
223 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
224 }
225
226 BlockDriverState *bdrv_new_root(void)
227 {
228 return bdrv_new();
229 }
230
231 BlockDriverState *bdrv_new(void)
232 {
233 BlockDriverState *bs;
234 int i;
235
236 bs = g_new0(BlockDriverState, 1);
237 QLIST_INIT(&bs->dirty_bitmaps);
238 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
239 QLIST_INIT(&bs->op_blockers[i]);
240 }
241 notifier_with_return_list_init(&bs->before_write_notifiers);
242 qemu_co_queue_init(&bs->throttled_reqs[0]);
243 qemu_co_queue_init(&bs->throttled_reqs[1]);
244 bs->refcnt = 1;
245 bs->aio_context = qemu_get_aio_context();
246
247 QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
248
249 return bs;
250 }
251
252 BlockDriver *bdrv_find_format(const char *format_name)
253 {
254 BlockDriver *drv1;
255 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
256 if (!strcmp(drv1->format_name, format_name)) {
257 return drv1;
258 }
259 }
260 return NULL;
261 }
262
263 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
264 {
265 static const char *whitelist_rw[] = {
266 CONFIG_BDRV_RW_WHITELIST
267 };
268 static const char *whitelist_ro[] = {
269 CONFIG_BDRV_RO_WHITELIST
270 };
271 const char **p;
272
273 if (!whitelist_rw[0] && !whitelist_ro[0]) {
274 return 1; /* no whitelist, anything goes */
275 }
276
277 for (p = whitelist_rw; *p; p++) {
278 if (!strcmp(drv->format_name, *p)) {
279 return 1;
280 }
281 }
282 if (read_only) {
283 for (p = whitelist_ro; *p; p++) {
284 if (!strcmp(drv->format_name, *p)) {
285 return 1;
286 }
287 }
288 }
289 return 0;
290 }
291
292 typedef struct CreateCo {
293 BlockDriver *drv;
294 char *filename;
295 QemuOpts *opts;
296 int ret;
297 Error *err;
298 } CreateCo;
299
300 static void coroutine_fn bdrv_create_co_entry(void *opaque)
301 {
302 Error *local_err = NULL;
303 int ret;
304
305 CreateCo *cco = opaque;
306 assert(cco->drv);
307
308 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
309 if (local_err) {
310 error_propagate(&cco->err, local_err);
311 }
312 cco->ret = ret;
313 }
314
315 int bdrv_create(BlockDriver *drv, const char* filename,
316 QemuOpts *opts, Error **errp)
317 {
318 int ret;
319
320 Coroutine *co;
321 CreateCo cco = {
322 .drv = drv,
323 .filename = g_strdup(filename),
324 .opts = opts,
325 .ret = NOT_DONE,
326 .err = NULL,
327 };
328
329 if (!drv->bdrv_create) {
330 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
331 ret = -ENOTSUP;
332 goto out;
333 }
334
335 if (qemu_in_coroutine()) {
336 /* Fast-path if already in coroutine context */
337 bdrv_create_co_entry(&cco);
338 } else {
339 co = qemu_coroutine_create(bdrv_create_co_entry);
340 qemu_coroutine_enter(co, &cco);
341 while (cco.ret == NOT_DONE) {
342 aio_poll(qemu_get_aio_context(), true);
343 }
344 }
345
346 ret = cco.ret;
347 if (ret < 0) {
348 if (cco.err) {
349 error_propagate(errp, cco.err);
350 } else {
351 error_setg_errno(errp, -ret, "Could not create image");
352 }
353 }
354
355 out:
356 g_free(cco.filename);
357 return ret;
358 }
359
360 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
361 {
362 BlockDriver *drv;
363 Error *local_err = NULL;
364 int ret;
365
366 drv = bdrv_find_protocol(filename, true, errp);
367 if (drv == NULL) {
368 return -ENOENT;
369 }
370
371 ret = bdrv_create(drv, filename, opts, &local_err);
372 if (local_err) {
373 error_propagate(errp, local_err);
374 }
375 return ret;
376 }
377
378 /**
379 * Try to get @bs's logical and physical block size.
380 * On success, store them in @bsz struct and return 0.
381 * On failure return -errno.
382 * @bs must not be empty.
383 */
384 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
385 {
386 BlockDriver *drv = bs->drv;
387
388 if (drv && drv->bdrv_probe_blocksizes) {
389 return drv->bdrv_probe_blocksizes(bs, bsz);
390 }
391
392 return -ENOTSUP;
393 }
394
395 /**
396 * Try to get @bs's geometry (cyls, heads, sectors).
397 * On success, store them in @geo struct and return 0.
398 * On failure return -errno.
399 * @bs must not be empty.
400 */
401 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
402 {
403 BlockDriver *drv = bs->drv;
404
405 if (drv && drv->bdrv_probe_geometry) {
406 return drv->bdrv_probe_geometry(bs, geo);
407 }
408
409 return -ENOTSUP;
410 }
411
412 /*
413 * Create a uniquely-named empty temporary file.
414 * Return 0 upon success, otherwise a negative errno value.
415 */
416 int get_tmp_filename(char *filename, int size)
417 {
418 #ifdef _WIN32
419 char temp_dir[MAX_PATH];
420 /* GetTempFileName requires that its output buffer (4th param)
421 have length MAX_PATH or greater. */
422 assert(size >= MAX_PATH);
423 return (GetTempPath(MAX_PATH, temp_dir)
424 && GetTempFileName(temp_dir, "qem", 0, filename)
425 ? 0 : -GetLastError());
426 #else
427 int fd;
428 const char *tmpdir;
429 tmpdir = getenv("TMPDIR");
430 if (!tmpdir) {
431 tmpdir = "/var/tmp";
432 }
433 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
434 return -EOVERFLOW;
435 }
436 fd = mkstemp(filename);
437 if (fd < 0) {
438 return -errno;
439 }
440 if (close(fd) != 0) {
441 unlink(filename);
442 return -errno;
443 }
444 return 0;
445 #endif
446 }
447
448 /*
449 * Detect host devices. By convention, /dev/cdrom[N] is always
450 * recognized as a host CDROM.
451 */
452 static BlockDriver *find_hdev_driver(const char *filename)
453 {
454 int score_max = 0, score;
455 BlockDriver *drv = NULL, *d;
456
457 QLIST_FOREACH(d, &bdrv_drivers, list) {
458 if (d->bdrv_probe_device) {
459 score = d->bdrv_probe_device(filename);
460 if (score > score_max) {
461 score_max = score;
462 drv = d;
463 }
464 }
465 }
466
467 return drv;
468 }
469
470 BlockDriver *bdrv_find_protocol(const char *filename,
471 bool allow_protocol_prefix,
472 Error **errp)
473 {
474 BlockDriver *drv1;
475 char protocol[128];
476 int len;
477 const char *p;
478
479 /* TODO Drivers without bdrv_file_open must be specified explicitly */
480
481 /*
482 * XXX(hch): we really should not let host device detection
483 * override an explicit protocol specification, but moving this
484 * later breaks access to device names with colons in them.
485 * Thanks to the brain-dead persistent naming schemes on udev-
486 * based Linux systems those actually are quite common.
487 */
488 drv1 = find_hdev_driver(filename);
489 if (drv1) {
490 return drv1;
491 }
492
493 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
494 return &bdrv_file;
495 }
496
497 p = strchr(filename, ':');
498 assert(p != NULL);
499 len = p - filename;
500 if (len > sizeof(protocol) - 1)
501 len = sizeof(protocol) - 1;
502 memcpy(protocol, filename, len);
503 protocol[len] = '\0';
504 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
505 if (drv1->protocol_name &&
506 !strcmp(drv1->protocol_name, protocol)) {
507 return drv1;
508 }
509 }
510
511 error_setg(errp, "Unknown protocol '%s'", protocol);
512 return NULL;
513 }
514
515 /*
516 * Guess image format by probing its contents.
517 * This is not a good idea when your image is raw (CVE-2008-2004), but
518 * we do it anyway for backward compatibility.
519 *
520 * @buf contains the image's first @buf_size bytes.
521 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
522 * but can be smaller if the image file is smaller)
523 * @filename is its filename.
524 *
525 * For all block drivers, call the bdrv_probe() method to get its
526 * probing score.
527 * Return the first block driver with the highest probing score.
528 */
529 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
530 const char *filename)
531 {
532 int score_max = 0, score;
533 BlockDriver *drv = NULL, *d;
534
535 QLIST_FOREACH(d, &bdrv_drivers, list) {
536 if (d->bdrv_probe) {
537 score = d->bdrv_probe(buf, buf_size, filename);
538 if (score > score_max) {
539 score_max = score;
540 drv = d;
541 }
542 }
543 }
544
545 return drv;
546 }
547
548 static int find_image_format(BlockDriverState *bs, const char *filename,
549 BlockDriver **pdrv, Error **errp)
550 {
551 BlockDriver *drv;
552 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
553 int ret = 0;
554
555 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
556 if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
557 *pdrv = &bdrv_raw;
558 return ret;
559 }
560
561 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
562 if (ret < 0) {
563 error_setg_errno(errp, -ret, "Could not read image for determining its "
564 "format");
565 *pdrv = NULL;
566 return ret;
567 }
568
569 drv = bdrv_probe_all(buf, ret, filename);
570 if (!drv) {
571 error_setg(errp, "Could not determine image format: No compatible "
572 "driver found");
573 ret = -ENOENT;
574 }
575 *pdrv = drv;
576 return ret;
577 }
578
579 /**
580 * Set the current 'total_sectors' value
581 * Return 0 on success, -errno on error.
582 */
583 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
584 {
585 BlockDriver *drv = bs->drv;
586
587 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
588 if (bdrv_is_sg(bs))
589 return 0;
590
591 /* query actual device if possible, otherwise just trust the hint */
592 if (drv->bdrv_getlength) {
593 int64_t length = drv->bdrv_getlength(bs);
594 if (length < 0) {
595 return length;
596 }
597 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
598 }
599
600 bs->total_sectors = hint;
601 return 0;
602 }
603
604 /**
605 * Combines a QDict of new block driver @options with any missing options taken
606 * from @old_options, so that leaving out an option defaults to its old value.
607 */
608 static void bdrv_join_options(BlockDriverState *bs, QDict *options,
609 QDict *old_options)
610 {
611 if (bs->drv && bs->drv->bdrv_join_options) {
612 bs->drv->bdrv_join_options(options, old_options);
613 } else {
614 qdict_join(options, old_options, false);
615 }
616 }
617
618 /**
619 * Set open flags for a given discard mode
620 *
621 * Return 0 on success, -1 if the discard mode was invalid.
622 */
623 int bdrv_parse_discard_flags(const char *mode, int *flags)
624 {
625 *flags &= ~BDRV_O_UNMAP;
626
627 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
628 /* do nothing */
629 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
630 *flags |= BDRV_O_UNMAP;
631 } else {
632 return -1;
633 }
634
635 return 0;
636 }
637
638 /**
639 * Set open flags for a given cache mode
640 *
641 * Return 0 on success, -1 if the cache mode was invalid.
642 */
643 int bdrv_parse_cache_flags(const char *mode, int *flags)
644 {
645 *flags &= ~BDRV_O_CACHE_MASK;
646
647 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
648 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
649 } else if (!strcmp(mode, "directsync")) {
650 *flags |= BDRV_O_NOCACHE;
651 } else if (!strcmp(mode, "writeback")) {
652 *flags |= BDRV_O_CACHE_WB;
653 } else if (!strcmp(mode, "unsafe")) {
654 *flags |= BDRV_O_CACHE_WB;
655 *flags |= BDRV_O_NO_FLUSH;
656 } else if (!strcmp(mode, "writethrough")) {
657 /* this is the default */
658 } else {
659 return -1;
660 }
661
662 return 0;
663 }
664
665 /*
666 * Returns the options and flags that a temporary snapshot should get, based on
667 * the originally requested flags (the originally requested image will have
668 * flags like a backing file)
669 */
670 static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
671 int parent_flags, QDict *parent_options)
672 {
673 *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
674
675 /* For temporary files, unconditional cache=unsafe is fine */
676 qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
677 qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
678 qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
679 }
680
681 /*
682 * Returns the options and flags that bs->file should get if a protocol driver
683 * is expected, based on the given options and flags for the parent BDS
684 */
685 static void bdrv_inherited_options(int *child_flags, QDict *child_options,
686 int parent_flags, QDict *parent_options)
687 {
688 int flags = parent_flags;
689
690 /* Enable protocol handling, disable format probing for bs->file */
691 flags |= BDRV_O_PROTOCOL;
692
693 /* If the cache mode isn't explicitly set, inherit direct and no-flush from
694 * the parent. */
695 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
696 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
697
698 /* Our block drivers take care to send flushes and respect unmap policy,
699 * so we can default to enable both on lower layers regardless of the
700 * corresponding parent options. */
701 qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
702 flags |= BDRV_O_UNMAP;
703
704 /* Clear flags that only apply to the top layer */
705 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
706
707 *child_flags = flags;
708 }
709
710 const BdrvChildRole child_file = {
711 .inherit_options = bdrv_inherited_options,
712 };
713
714 /*
715 * Returns the options and flags that bs->file should get if the use of formats
716 * (and not only protocols) is permitted for it, based on the given options and
717 * flags for the parent BDS
718 */
719 static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
720 int parent_flags, QDict *parent_options)
721 {
722 child_file.inherit_options(child_flags, child_options,
723 parent_flags, parent_options);
724
725 *child_flags &= ~BDRV_O_PROTOCOL;
726 }
727
728 const BdrvChildRole child_format = {
729 .inherit_options = bdrv_inherited_fmt_options,
730 };
731
732 /*
733 * Returns the options and flags that bs->backing should get, based on the
734 * given options and flags for the parent BDS
735 */
736 static void bdrv_backing_options(int *child_flags, QDict *child_options,
737 int parent_flags, QDict *parent_options)
738 {
739 int flags = parent_flags;
740
741 /* The cache mode is inherited unmodified for backing files; except WCE,
742 * which is only applied on the top level (BlockBackend) */
743 qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
744 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
745 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
746
747 /* backing files always opened read-only */
748 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
749
750 /* snapshot=on is handled on the top layer */
751 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
752
753 *child_flags = flags;
754 }
755
756 static const BdrvChildRole child_backing = {
757 .inherit_options = bdrv_backing_options,
758 };
759
760 static int bdrv_open_flags(BlockDriverState *bs, int flags)
761 {
762 int open_flags = flags | BDRV_O_CACHE_WB;
763
764 /*
765 * Clear flags that are internal to the block layer before opening the
766 * image.
767 */
768 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
769
770 /*
771 * Snapshots should be writable.
772 */
773 if (flags & BDRV_O_TEMPORARY) {
774 open_flags |= BDRV_O_RDWR;
775 }
776
777 return open_flags;
778 }
779
780 static void update_flags_from_options(int *flags, QemuOpts *opts)
781 {
782 *flags &= ~BDRV_O_CACHE_MASK;
783
784 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_WB));
785 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, false)) {
786 *flags |= BDRV_O_CACHE_WB;
787 }
788
789 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
790 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
791 *flags |= BDRV_O_NO_FLUSH;
792 }
793
794 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
795 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
796 *flags |= BDRV_O_NOCACHE;
797 }
798 }
799
800 static void update_options_from_flags(QDict *options, int flags)
801 {
802 if (!qdict_haskey(options, BDRV_OPT_CACHE_WB)) {
803 qdict_put(options, BDRV_OPT_CACHE_WB,
804 qbool_from_bool(flags & BDRV_O_CACHE_WB));
805 }
806 if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
807 qdict_put(options, BDRV_OPT_CACHE_DIRECT,
808 qbool_from_bool(flags & BDRV_O_NOCACHE));
809 }
810 if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
811 qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
812 qbool_from_bool(flags & BDRV_O_NO_FLUSH));
813 }
814 }
815
816 static void bdrv_assign_node_name(BlockDriverState *bs,
817 const char *node_name,
818 Error **errp)
819 {
820 char *gen_node_name = NULL;
821
822 if (!node_name) {
823 node_name = gen_node_name = id_generate(ID_BLOCK);
824 } else if (!id_wellformed(node_name)) {
825 /*
826 * Check for empty string or invalid characters, but not if it is
827 * generated (generated names use characters not available to the user)
828 */
829 error_setg(errp, "Invalid node name");
830 return;
831 }
832
833 /* takes care of avoiding namespaces collisions */
834 if (blk_by_name(node_name)) {
835 error_setg(errp, "node-name=%s is conflicting with a device id",
836 node_name);
837 goto out;
838 }
839
840 /* takes care of avoiding duplicates node names */
841 if (bdrv_find_node(node_name)) {
842 error_setg(errp, "Duplicate node name");
843 goto out;
844 }
845
846 /* copy node name into the bs and insert it into the graph list */
847 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
848 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
849 out:
850 g_free(gen_node_name);
851 }
852
853 static QemuOptsList bdrv_runtime_opts = {
854 .name = "bdrv_common",
855 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
856 .desc = {
857 {
858 .name = "node-name",
859 .type = QEMU_OPT_STRING,
860 .help = "Node name of the block device node",
861 },
862 {
863 .name = "driver",
864 .type = QEMU_OPT_STRING,
865 .help = "Block driver to use for the node",
866 },
867 {
868 .name = BDRV_OPT_CACHE_WB,
869 .type = QEMU_OPT_BOOL,
870 .help = "Enable writeback mode",
871 },
872 {
873 .name = BDRV_OPT_CACHE_DIRECT,
874 .type = QEMU_OPT_BOOL,
875 .help = "Bypass software writeback cache on the host",
876 },
877 {
878 .name = BDRV_OPT_CACHE_NO_FLUSH,
879 .type = QEMU_OPT_BOOL,
880 .help = "Ignore flush requests",
881 },
882 { /* end of list */ }
883 },
884 };
885
886 /*
887 * Common part for opening disk images and files
888 *
889 * Removes all processed options from *options.
890 */
891 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
892 QDict *options, Error **errp)
893 {
894 int ret, open_flags;
895 const char *filename;
896 const char *driver_name = NULL;
897 const char *node_name = NULL;
898 QemuOpts *opts;
899 BlockDriver *drv;
900 Error *local_err = NULL;
901
902 assert(bs->file == NULL);
903 assert(options != NULL && bs->options != options);
904
905 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
906 qemu_opts_absorb_qdict(opts, options, &local_err);
907 if (local_err) {
908 error_propagate(errp, local_err);
909 ret = -EINVAL;
910 goto fail_opts;
911 }
912
913 driver_name = qemu_opt_get(opts, "driver");
914 drv = bdrv_find_format(driver_name);
915 assert(drv != NULL);
916
917 if (file != NULL) {
918 filename = file->bs->filename;
919 } else {
920 filename = qdict_get_try_str(options, "filename");
921 }
922
923 if (drv->bdrv_needs_filename && !filename) {
924 error_setg(errp, "The '%s' block driver requires a file name",
925 drv->format_name);
926 ret = -EINVAL;
927 goto fail_opts;
928 }
929
930 trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
931 drv->format_name);
932
933 node_name = qemu_opt_get(opts, "node-name");
934 bdrv_assign_node_name(bs, node_name, &local_err);
935 if (local_err) {
936 error_propagate(errp, local_err);
937 ret = -EINVAL;
938 goto fail_opts;
939 }
940
941 bs->request_alignment = 512;
942 bs->zero_beyond_eof = true;
943 bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
944
945 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
946 error_setg(errp,
947 !bs->read_only && bdrv_is_whitelisted(drv, true)
948 ? "Driver '%s' can only be used for read-only devices"
949 : "Driver '%s' is not whitelisted",
950 drv->format_name);
951 ret = -ENOTSUP;
952 goto fail_opts;
953 }
954
955 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
956 if (bs->open_flags & BDRV_O_COPY_ON_READ) {
957 if (!bs->read_only) {
958 bdrv_enable_copy_on_read(bs);
959 } else {
960 error_setg(errp, "Can't use copy-on-read on read-only device");
961 ret = -EINVAL;
962 goto fail_opts;
963 }
964 }
965
966 if (filename != NULL) {
967 pstrcpy(bs->filename, sizeof(bs->filename), filename);
968 } else {
969 bs->filename[0] = '\0';
970 }
971 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
972
973 bs->drv = drv;
974 bs->opaque = g_malloc0(drv->instance_size);
975
976 /* Apply cache mode options */
977 update_flags_from_options(&bs->open_flags, opts);
978 bdrv_set_enable_write_cache(bs, bs->open_flags & BDRV_O_CACHE_WB);
979
980 /* Open the image, either directly or using a protocol */
981 open_flags = bdrv_open_flags(bs, bs->open_flags);
982 if (drv->bdrv_file_open) {
983 assert(file == NULL);
984 assert(!drv->bdrv_needs_filename || filename != NULL);
985 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
986 } else {
987 if (file == NULL) {
988 error_setg(errp, "Can't use '%s' as a block driver for the "
989 "protocol level", drv->format_name);
990 ret = -EINVAL;
991 goto free_and_fail;
992 }
993 bs->file = file;
994 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
995 }
996
997 if (ret < 0) {
998 if (local_err) {
999 error_propagate(errp, local_err);
1000 } else if (bs->filename[0]) {
1001 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
1002 } else {
1003 error_setg_errno(errp, -ret, "Could not open image");
1004 }
1005 goto free_and_fail;
1006 }
1007
1008 if (bs->encrypted) {
1009 error_report("Encrypted images are deprecated");
1010 error_printf("Support for them will be removed in a future release.\n"
1011 "You can use 'qemu-img convert' to convert your image"
1012 " to an unencrypted one.\n");
1013 }
1014
1015 ret = refresh_total_sectors(bs, bs->total_sectors);
1016 if (ret < 0) {
1017 error_setg_errno(errp, -ret, "Could not refresh total sector count");
1018 goto free_and_fail;
1019 }
1020
1021 bdrv_refresh_limits(bs, &local_err);
1022 if (local_err) {
1023 error_propagate(errp, local_err);
1024 ret = -EINVAL;
1025 goto free_and_fail;
1026 }
1027
1028 assert(bdrv_opt_mem_align(bs) != 0);
1029 assert(bdrv_min_mem_align(bs) != 0);
1030 assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
1031
1032 qemu_opts_del(opts);
1033 return 0;
1034
1035 free_and_fail:
1036 bs->file = NULL;
1037 g_free(bs->opaque);
1038 bs->opaque = NULL;
1039 bs->drv = NULL;
1040 fail_opts:
1041 qemu_opts_del(opts);
1042 return ret;
1043 }
1044
1045 static QDict *parse_json_filename(const char *filename, Error **errp)
1046 {
1047 QObject *options_obj;
1048 QDict *options;
1049 int ret;
1050
1051 ret = strstart(filename, "json:", &filename);
1052 assert(ret);
1053
1054 options_obj = qobject_from_json(filename);
1055 if (!options_obj) {
1056 error_setg(errp, "Could not parse the JSON options");
1057 return NULL;
1058 }
1059
1060 if (qobject_type(options_obj) != QTYPE_QDICT) {
1061 qobject_decref(options_obj);
1062 error_setg(errp, "Invalid JSON object given");
1063 return NULL;
1064 }
1065
1066 options = qobject_to_qdict(options_obj);
1067 qdict_flatten(options);
1068
1069 return options;
1070 }
1071
1072 static void parse_json_protocol(QDict *options, const char **pfilename,
1073 Error **errp)
1074 {
1075 QDict *json_options;
1076 Error *local_err = NULL;
1077
1078 /* Parse json: pseudo-protocol */
1079 if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1080 return;
1081 }
1082
1083 json_options = parse_json_filename(*pfilename, &local_err);
1084 if (local_err) {
1085 error_propagate(errp, local_err);
1086 return;
1087 }
1088
1089 /* Options given in the filename have lower priority than options
1090 * specified directly */
1091 qdict_join(options, json_options, false);
1092 QDECREF(json_options);
1093 *pfilename = NULL;
1094 }
1095
1096 /*
1097 * Fills in default options for opening images and converts the legacy
1098 * filename/flags pair to option QDict entries.
1099 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1100 * block driver has been specified explicitly.
1101 */
1102 static int bdrv_fill_options(QDict **options, const char *filename,
1103 int *flags, Error **errp)
1104 {
1105 const char *drvname;
1106 bool protocol = *flags & BDRV_O_PROTOCOL;
1107 bool parse_filename = false;
1108 BlockDriver *drv = NULL;
1109 Error *local_err = NULL;
1110
1111 drvname = qdict_get_try_str(*options, "driver");
1112 if (drvname) {
1113 drv = bdrv_find_format(drvname);
1114 if (!drv) {
1115 error_setg(errp, "Unknown driver '%s'", drvname);
1116 return -ENOENT;
1117 }
1118 /* If the user has explicitly specified the driver, this choice should
1119 * override the BDRV_O_PROTOCOL flag */
1120 protocol = drv->bdrv_file_open;
1121 }
1122
1123 if (protocol) {
1124 *flags |= BDRV_O_PROTOCOL;
1125 } else {
1126 *flags &= ~BDRV_O_PROTOCOL;
1127 }
1128
1129 /* Translate cache options from flags into options */
1130 update_options_from_flags(*options, *flags);
1131
1132 /* Fetch the file name from the options QDict if necessary */
1133 if (protocol && filename) {
1134 if (!qdict_haskey(*options, "filename")) {
1135 qdict_put(*options, "filename", qstring_from_str(filename));
1136 parse_filename = true;
1137 } else {
1138 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1139 "the same time");
1140 return -EINVAL;
1141 }
1142 }
1143
1144 /* Find the right block driver */
1145 filename = qdict_get_try_str(*options, "filename");
1146
1147 if (!drvname && protocol) {
1148 if (filename) {
1149 drv = bdrv_find_protocol(filename, parse_filename, errp);
1150 if (!drv) {
1151 return -EINVAL;
1152 }
1153
1154 drvname = drv->format_name;
1155 qdict_put(*options, "driver", qstring_from_str(drvname));
1156 } else {
1157 error_setg(errp, "Must specify either driver or file");
1158 return -EINVAL;
1159 }
1160 }
1161
1162 assert(drv || !protocol);
1163
1164 /* Driver-specific filename parsing */
1165 if (drv && drv->bdrv_parse_filename && parse_filename) {
1166 drv->bdrv_parse_filename(filename, *options, &local_err);
1167 if (local_err) {
1168 error_propagate(errp, local_err);
1169 return -EINVAL;
1170 }
1171
1172 if (!drv->bdrv_needs_filename) {
1173 qdict_del(*options, "filename");
1174 }
1175 }
1176
1177 return 0;
1178 }
1179
1180 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
1181 const char *child_name,
1182 const BdrvChildRole *child_role)
1183 {
1184 BdrvChild *child = g_new(BdrvChild, 1);
1185 *child = (BdrvChild) {
1186 .bs = child_bs,
1187 .name = g_strdup(child_name),
1188 .role = child_role,
1189 };
1190
1191 QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1192
1193 return child;
1194 }
1195
1196 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1197 BlockDriverState *child_bs,
1198 const char *child_name,
1199 const BdrvChildRole *child_role)
1200 {
1201 BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
1202 QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1203 return child;
1204 }
1205
1206 static void bdrv_detach_child(BdrvChild *child)
1207 {
1208 if (child->next.le_prev) {
1209 QLIST_REMOVE(child, next);
1210 child->next.le_prev = NULL;
1211 }
1212 QLIST_REMOVE(child, next_parent);
1213 g_free(child->name);
1214 g_free(child);
1215 }
1216
1217 void bdrv_root_unref_child(BdrvChild *child)
1218 {
1219 BlockDriverState *child_bs;
1220
1221 child_bs = child->bs;
1222 bdrv_detach_child(child);
1223 bdrv_unref(child_bs);
1224 }
1225
1226 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1227 {
1228 if (child == NULL) {
1229 return;
1230 }
1231
1232 if (child->bs->inherits_from == parent) {
1233 child->bs->inherits_from = NULL;
1234 }
1235
1236 bdrv_root_unref_child(child);
1237 }
1238
1239 /*
1240 * Sets the backing file link of a BDS. A new reference is created; callers
1241 * which don't need their own reference any more must call bdrv_unref().
1242 */
1243 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1244 {
1245 if (backing_hd) {
1246 bdrv_ref(backing_hd);
1247 }
1248
1249 if (bs->backing) {
1250 assert(bs->backing_blocker);
1251 bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1252 bdrv_unref_child(bs, bs->backing);
1253 } else if (backing_hd) {
1254 error_setg(&bs->backing_blocker,
1255 "node is used as backing hd of '%s'",
1256 bdrv_get_device_or_node_name(bs));
1257 }
1258
1259 if (!backing_hd) {
1260 error_free(bs->backing_blocker);
1261 bs->backing_blocker = NULL;
1262 bs->backing = NULL;
1263 goto out;
1264 }
1265 bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
1266 bs->open_flags &= ~BDRV_O_NO_BACKING;
1267 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1268 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1269 backing_hd->drv ? backing_hd->drv->format_name : "");
1270
1271 bdrv_op_block_all(backing_hd, bs->backing_blocker);
1272 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1273 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1274 bs->backing_blocker);
1275 out:
1276 bdrv_refresh_limits(bs, NULL);
1277 }
1278
1279 /*
1280 * Opens the backing file for a BlockDriverState if not yet open
1281 *
1282 * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
1283 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1284 * itself, all options starting with "${bdref_key}." are considered part of the
1285 * BlockdevRef.
1286 *
1287 * TODO Can this be unified with bdrv_open_image()?
1288 */
1289 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
1290 const char *bdref_key, Error **errp)
1291 {
1292 char *backing_filename = g_malloc0(PATH_MAX);
1293 char *bdref_key_dot;
1294 const char *reference = NULL;
1295 int ret = 0;
1296 BlockDriverState *backing_hd;
1297 QDict *options;
1298 QDict *tmp_parent_options = NULL;
1299 Error *local_err = NULL;
1300
1301 if (bs->backing != NULL) {
1302 goto free_exit;
1303 }
1304
1305 /* NULL means an empty set of options */
1306 if (parent_options == NULL) {
1307 tmp_parent_options = qdict_new();
1308 parent_options = tmp_parent_options;
1309 }
1310
1311 bs->open_flags &= ~BDRV_O_NO_BACKING;
1312
1313 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1314 qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
1315 g_free(bdref_key_dot);
1316
1317 reference = qdict_get_try_str(parent_options, bdref_key);
1318 if (reference || qdict_haskey(options, "file.filename")) {
1319 backing_filename[0] = '\0';
1320 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1321 QDECREF(options);
1322 goto free_exit;
1323 } else {
1324 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1325 &local_err);
1326 if (local_err) {
1327 ret = -EINVAL;
1328 error_propagate(errp, local_err);
1329 QDECREF(options);
1330 goto free_exit;
1331 }
1332 }
1333
1334 if (!bs->drv || !bs->drv->supports_backing) {
1335 ret = -EINVAL;
1336 error_setg(errp, "Driver doesn't support backing files");
1337 QDECREF(options);
1338 goto free_exit;
1339 }
1340
1341 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1342 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1343 }
1344
1345 backing_hd = NULL;
1346 ret = bdrv_open_inherit(&backing_hd,
1347 *backing_filename ? backing_filename : NULL,
1348 reference, options, 0, bs, &child_backing,
1349 errp);
1350 if (ret < 0) {
1351 bs->open_flags |= BDRV_O_NO_BACKING;
1352 error_prepend(errp, "Could not open backing file: ");
1353 goto free_exit;
1354 }
1355
1356 /* Hook up the backing file link; drop our reference, bs owns the
1357 * backing_hd reference now */
1358 bdrv_set_backing_hd(bs, backing_hd);
1359 bdrv_unref(backing_hd);
1360
1361 qdict_del(parent_options, bdref_key);
1362
1363 free_exit:
1364 g_free(backing_filename);
1365 QDECREF(tmp_parent_options);
1366 return ret;
1367 }
1368
1369 /*
1370 * Opens a disk image whose options are given as BlockdevRef in another block
1371 * device's options.
1372 *
1373 * If allow_none is true, no image will be opened if filename is false and no
1374 * BlockdevRef is given. NULL will be returned, but errp remains unset.
1375 *
1376 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1377 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1378 * itself, all options starting with "${bdref_key}." are considered part of the
1379 * BlockdevRef.
1380 *
1381 * The BlockdevRef will be removed from the options QDict.
1382 */
1383 BdrvChild *bdrv_open_child(const char *filename,
1384 QDict *options, const char *bdref_key,
1385 BlockDriverState* parent,
1386 const BdrvChildRole *child_role,
1387 bool allow_none, Error **errp)
1388 {
1389 BdrvChild *c = NULL;
1390 BlockDriverState *bs;
1391 QDict *image_options;
1392 int ret;
1393 char *bdref_key_dot;
1394 const char *reference;
1395
1396 assert(child_role != NULL);
1397
1398 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1399 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1400 g_free(bdref_key_dot);
1401
1402 reference = qdict_get_try_str(options, bdref_key);
1403 if (!filename && !reference && !qdict_size(image_options)) {
1404 if (!allow_none) {
1405 error_setg(errp, "A block device must be specified for \"%s\"",
1406 bdref_key);
1407 }
1408 QDECREF(image_options);
1409 goto done;
1410 }
1411
1412 bs = NULL;
1413 ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1414 parent, child_role, errp);
1415 if (ret < 0) {
1416 goto done;
1417 }
1418
1419 c = bdrv_attach_child(parent, bs, bdref_key, child_role);
1420
1421 done:
1422 qdict_del(options, bdref_key);
1423 return c;
1424 }
1425
1426 static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
1427 QDict *snapshot_options, Error **errp)
1428 {
1429 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1430 char *tmp_filename = g_malloc0(PATH_MAX + 1);
1431 int64_t total_size;
1432 QemuOpts *opts = NULL;
1433 BlockDriverState *bs_snapshot;
1434 Error *local_err = NULL;
1435 int ret;
1436
1437 /* if snapshot, we create a temporary backing file and open it
1438 instead of opening 'filename' directly */
1439
1440 /* Get the required size from the image */
1441 total_size = bdrv_getlength(bs);
1442 if (total_size < 0) {
1443 ret = total_size;
1444 error_setg_errno(errp, -total_size, "Could not get image size");
1445 goto out;
1446 }
1447
1448 /* Create the temporary image */
1449 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1450 if (ret < 0) {
1451 error_setg_errno(errp, -ret, "Could not get temporary filename");
1452 goto out;
1453 }
1454
1455 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1456 &error_abort);
1457 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1458 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
1459 qemu_opts_del(opts);
1460 if (ret < 0) {
1461 error_prepend(errp, "Could not create temporary overlay '%s': ",
1462 tmp_filename);
1463 goto out;
1464 }
1465
1466 /* Prepare options QDict for the temporary file */
1467 qdict_put(snapshot_options, "file.driver",
1468 qstring_from_str("file"));
1469 qdict_put(snapshot_options, "file.filename",
1470 qstring_from_str(tmp_filename));
1471 qdict_put(snapshot_options, "driver",
1472 qstring_from_str("qcow2"));
1473
1474 bs_snapshot = bdrv_new();
1475
1476 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1477 flags, &local_err);
1478 snapshot_options = NULL;
1479 if (ret < 0) {
1480 error_propagate(errp, local_err);
1481 goto out;
1482 }
1483
1484 bdrv_append(bs_snapshot, bs);
1485
1486 out:
1487 QDECREF(snapshot_options);
1488 g_free(tmp_filename);
1489 return ret;
1490 }
1491
1492 /*
1493 * Opens a disk image (raw, qcow2, vmdk, ...)
1494 *
1495 * options is a QDict of options to pass to the block drivers, or NULL for an
1496 * empty set of options. The reference to the QDict belongs to the block layer
1497 * after the call (even on failure), so if the caller intends to reuse the
1498 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1499 *
1500 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1501 * If it is not NULL, the referenced BDS will be reused.
1502 *
1503 * The reference parameter may be used to specify an existing block device which
1504 * should be opened. If specified, neither options nor a filename may be given,
1505 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1506 */
1507 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1508 const char *reference, QDict *options, int flags,
1509 BlockDriverState *parent,
1510 const BdrvChildRole *child_role, Error **errp)
1511 {
1512 int ret;
1513 BdrvChild *file = NULL;
1514 BlockDriverState *bs;
1515 BlockDriver *drv = NULL;
1516 const char *drvname;
1517 const char *backing;
1518 Error *local_err = NULL;
1519 QDict *snapshot_options = NULL;
1520 int snapshot_flags = 0;
1521
1522 assert(pbs);
1523 assert(!child_role || !flags);
1524 assert(!child_role == !parent);
1525
1526 if (reference) {
1527 bool options_non_empty = options ? qdict_size(options) : false;
1528 QDECREF(options);
1529
1530 if (*pbs) {
1531 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1532 "another block device");
1533 return -EINVAL;
1534 }
1535
1536 if (filename || options_non_empty) {
1537 error_setg(errp, "Cannot reference an existing block device with "
1538 "additional options or a new filename");
1539 return -EINVAL;
1540 }
1541
1542 bs = bdrv_lookup_bs(reference, reference, errp);
1543 if (!bs) {
1544 return -ENODEV;
1545 }
1546 bdrv_ref(bs);
1547 *pbs = bs;
1548 return 0;
1549 }
1550
1551 if (*pbs) {
1552 bs = *pbs;
1553 } else {
1554 bs = bdrv_new();
1555 }
1556
1557 /* NULL means an empty set of options */
1558 if (options == NULL) {
1559 options = qdict_new();
1560 }
1561
1562 /* json: syntax counts as explicit options, as if in the QDict */
1563 parse_json_protocol(options, &filename, &local_err);
1564 if (local_err) {
1565 ret = -EINVAL;
1566 goto fail;
1567 }
1568
1569 bs->explicit_options = qdict_clone_shallow(options);
1570
1571 if (child_role) {
1572 bs->inherits_from = parent;
1573 child_role->inherit_options(&flags, options,
1574 parent->open_flags, parent->options);
1575 }
1576
1577 ret = bdrv_fill_options(&options, filename, &flags, &local_err);
1578 if (local_err) {
1579 goto fail;
1580 }
1581
1582 bs->open_flags = flags;
1583 bs->options = options;
1584 options = qdict_clone_shallow(options);
1585
1586 /* Find the right image format driver */
1587 drvname = qdict_get_try_str(options, "driver");
1588 if (drvname) {
1589 drv = bdrv_find_format(drvname);
1590 if (!drv) {
1591 error_setg(errp, "Unknown driver: '%s'", drvname);
1592 ret = -EINVAL;
1593 goto fail;
1594 }
1595 }
1596
1597 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1598
1599 backing = qdict_get_try_str(options, "backing");
1600 if (backing && *backing == '\0') {
1601 flags |= BDRV_O_NO_BACKING;
1602 qdict_del(options, "backing");
1603 }
1604
1605 /* Open image file without format layer */
1606 if ((flags & BDRV_O_PROTOCOL) == 0) {
1607 if (flags & BDRV_O_RDWR) {
1608 flags |= BDRV_O_ALLOW_RDWR;
1609 }
1610 if (flags & BDRV_O_SNAPSHOT) {
1611 snapshot_options = qdict_new();
1612 bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
1613 flags, options);
1614 bdrv_backing_options(&flags, options, flags, options);
1615 }
1616
1617 bs->open_flags = flags;
1618
1619 file = bdrv_open_child(filename, options, "file", bs,
1620 &child_file, true, &local_err);
1621 if (local_err) {
1622 ret = -EINVAL;
1623 goto fail;
1624 }
1625 }
1626
1627 /* Image format probing */
1628 bs->probed = !drv;
1629 if (!drv && file) {
1630 ret = find_image_format(file->bs, filename, &drv, &local_err);
1631 if (ret < 0) {
1632 goto fail;
1633 }
1634 /*
1635 * This option update would logically belong in bdrv_fill_options(),
1636 * but we first need to open bs->file for the probing to work, while
1637 * opening bs->file already requires the (mostly) final set of options
1638 * so that cache mode etc. can be inherited.
1639 *
1640 * Adding the driver later is somewhat ugly, but it's not an option
1641 * that would ever be inherited, so it's correct. We just need to make
1642 * sure to update both bs->options (which has the full effective
1643 * options for bs) and options (which has file.* already removed).
1644 */
1645 qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
1646 qdict_put(options, "driver", qstring_from_str(drv->format_name));
1647 } else if (!drv) {
1648 error_setg(errp, "Must specify either driver or file");
1649 ret = -EINVAL;
1650 goto fail;
1651 }
1652
1653 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1654 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1655 /* file must be NULL if a protocol BDS is about to be created
1656 * (the inverse results in an error message from bdrv_open_common()) */
1657 assert(!(flags & BDRV_O_PROTOCOL) || !file);
1658
1659 /* Open the image */
1660 ret = bdrv_open_common(bs, file, options, &local_err);
1661 if (ret < 0) {
1662 goto fail;
1663 }
1664
1665 if (file && (bs->file != file)) {
1666 bdrv_unref_child(bs, file);
1667 file = NULL;
1668 }
1669
1670 /* If there is a backing file, use it */
1671 if ((flags & BDRV_O_NO_BACKING) == 0) {
1672 ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
1673 if (ret < 0) {
1674 goto close_and_fail;
1675 }
1676 }
1677
1678 bdrv_refresh_filename(bs);
1679
1680 /* Check if any unknown options were used */
1681 if (options && (qdict_size(options) != 0)) {
1682 const QDictEntry *entry = qdict_first(options);
1683 if (flags & BDRV_O_PROTOCOL) {
1684 error_setg(errp, "Block protocol '%s' doesn't support the option "
1685 "'%s'", drv->format_name, entry->key);
1686 } else {
1687 error_setg(errp,
1688 "Block format '%s' does not support the option '%s'",
1689 drv->format_name, entry->key);
1690 }
1691
1692 ret = -EINVAL;
1693 goto close_and_fail;
1694 }
1695
1696 if (!bdrv_key_required(bs)) {
1697 if (bs->blk) {
1698 blk_dev_change_media_cb(bs->blk, true);
1699 }
1700 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1701 && !runstate_check(RUN_STATE_INMIGRATE)
1702 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1703 error_setg(errp,
1704 "Guest must be stopped for opening of encrypted image");
1705 ret = -EBUSY;
1706 goto close_and_fail;
1707 }
1708
1709 QDECREF(options);
1710 *pbs = bs;
1711
1712 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1713 * temporary snapshot afterwards. */
1714 if (snapshot_flags) {
1715 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options,
1716 &local_err);
1717 snapshot_options = NULL;
1718 if (local_err) {
1719 goto close_and_fail;
1720 }
1721 }
1722
1723 return 0;
1724
1725 fail:
1726 if (file != NULL) {
1727 bdrv_unref_child(bs, file);
1728 }
1729 QDECREF(snapshot_options);
1730 QDECREF(bs->explicit_options);
1731 QDECREF(bs->options);
1732 QDECREF(options);
1733 bs->options = NULL;
1734 if (!*pbs) {
1735 /* If *pbs is NULL, a new BDS has been created in this function and
1736 needs to be freed now. Otherwise, it does not need to be closed,
1737 since it has not really been opened yet. */
1738 bdrv_unref(bs);
1739 }
1740 if (local_err) {
1741 error_propagate(errp, local_err);
1742 }
1743 return ret;
1744
1745 close_and_fail:
1746 /* See fail path, but now the BDS has to be always closed */
1747 if (*pbs) {
1748 bdrv_close(bs);
1749 } else {
1750 bdrv_unref(bs);
1751 }
1752 QDECREF(snapshot_options);
1753 QDECREF(options);
1754 if (local_err) {
1755 error_propagate(errp, local_err);
1756 }
1757 return ret;
1758 }
1759
1760 int bdrv_open(BlockDriverState **pbs, const char *filename,
1761 const char *reference, QDict *options, int flags, Error **errp)
1762 {
1763 return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1764 NULL, errp);
1765 }
1766
1767 typedef struct BlockReopenQueueEntry {
1768 bool prepared;
1769 BDRVReopenState state;
1770 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1771 } BlockReopenQueueEntry;
1772
1773 /*
1774 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1775 * reopen of multiple devices.
1776 *
1777 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1778 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1779 * be created and initialized. This newly created BlockReopenQueue should be
1780 * passed back in for subsequent calls that are intended to be of the same
1781 * atomic 'set'.
1782 *
1783 * bs is the BlockDriverState to add to the reopen queue.
1784 *
1785 * options contains the changed options for the associated bs
1786 * (the BlockReopenQueue takes ownership)
1787 *
1788 * flags contains the open flags for the associated bs
1789 *
1790 * returns a pointer to bs_queue, which is either the newly allocated
1791 * bs_queue, or the existing bs_queue being used.
1792 *
1793 */
1794 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
1795 BlockDriverState *bs,
1796 QDict *options,
1797 int flags,
1798 const BdrvChildRole *role,
1799 QDict *parent_options,
1800 int parent_flags)
1801 {
1802 assert(bs != NULL);
1803
1804 BlockReopenQueueEntry *bs_entry;
1805 BdrvChild *child;
1806 QDict *old_options, *explicit_options;
1807
1808 if (bs_queue == NULL) {
1809 bs_queue = g_new0(BlockReopenQueue, 1);
1810 QSIMPLEQ_INIT(bs_queue);
1811 }
1812
1813 if (!options) {
1814 options = qdict_new();
1815 }
1816
1817 /*
1818 * Precedence of options:
1819 * 1. Explicitly passed in options (highest)
1820 * 2. Set in flags (only for top level)
1821 * 3. Retained from explicitly set options of bs
1822 * 4. Inherited from parent node
1823 * 5. Retained from effective options of bs
1824 */
1825
1826 if (!parent_options) {
1827 /*
1828 * Any setting represented by flags is always updated. If the
1829 * corresponding QDict option is set, it takes precedence. Otherwise
1830 * the flag is translated into a QDict option. The old setting of bs is
1831 * not considered.
1832 */
1833 update_options_from_flags(options, flags);
1834 }
1835
1836 /* Old explicitly set values (don't overwrite by inherited value) */
1837 old_options = qdict_clone_shallow(bs->explicit_options);
1838 bdrv_join_options(bs, options, old_options);
1839 QDECREF(old_options);
1840
1841 explicit_options = qdict_clone_shallow(options);
1842
1843 /* Inherit from parent node */
1844 if (parent_options) {
1845 assert(!flags);
1846 role->inherit_options(&flags, options, parent_flags, parent_options);
1847 }
1848
1849 /* Old values are used for options that aren't set yet */
1850 old_options = qdict_clone_shallow(bs->options);
1851 bdrv_join_options(bs, options, old_options);
1852 QDECREF(old_options);
1853
1854 /* bdrv_open() masks this flag out */
1855 flags &= ~BDRV_O_PROTOCOL;
1856
1857 QLIST_FOREACH(child, &bs->children, next) {
1858 QDict *new_child_options;
1859 char *child_key_dot;
1860
1861 /* reopen can only change the options of block devices that were
1862 * implicitly created and inherited options. For other (referenced)
1863 * block devices, a syntax like "backing.foo" results in an error. */
1864 if (child->bs->inherits_from != bs) {
1865 continue;
1866 }
1867
1868 child_key_dot = g_strdup_printf("%s.", child->name);
1869 qdict_extract_subqdict(options, &new_child_options, child_key_dot);
1870 g_free(child_key_dot);
1871
1872 bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
1873 child->role, options, flags);
1874 }
1875
1876 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1877 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1878
1879 bs_entry->state.bs = bs;
1880 bs_entry->state.options = options;
1881 bs_entry->state.explicit_options = explicit_options;
1882 bs_entry->state.flags = flags;
1883
1884 return bs_queue;
1885 }
1886
1887 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1888 BlockDriverState *bs,
1889 QDict *options, int flags)
1890 {
1891 return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
1892 NULL, NULL, 0);
1893 }
1894
1895 /*
1896 * Reopen multiple BlockDriverStates atomically & transactionally.
1897 *
1898 * The queue passed in (bs_queue) must have been built up previous
1899 * via bdrv_reopen_queue().
1900 *
1901 * Reopens all BDS specified in the queue, with the appropriate
1902 * flags. All devices are prepared for reopen, and failure of any
1903 * device will cause all device changes to be abandonded, and intermediate
1904 * data cleaned up.
1905 *
1906 * If all devices prepare successfully, then the changes are committed
1907 * to all devices.
1908 *
1909 */
1910 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1911 {
1912 int ret = -1;
1913 BlockReopenQueueEntry *bs_entry, *next;
1914 Error *local_err = NULL;
1915
1916 assert(bs_queue != NULL);
1917
1918 bdrv_drain_all();
1919
1920 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1921 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1922 error_propagate(errp, local_err);
1923 goto cleanup;
1924 }
1925 bs_entry->prepared = true;
1926 }
1927
1928 /* If we reach this point, we have success and just need to apply the
1929 * changes
1930 */
1931 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1932 bdrv_reopen_commit(&bs_entry->state);
1933 }
1934
1935 ret = 0;
1936
1937 cleanup:
1938 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1939 if (ret && bs_entry->prepared) {
1940 bdrv_reopen_abort(&bs_entry->state);
1941 } else if (ret) {
1942 QDECREF(bs_entry->state.explicit_options);
1943 }
1944 QDECREF(bs_entry->state.options);
1945 g_free(bs_entry);
1946 }
1947 g_free(bs_queue);
1948 return ret;
1949 }
1950
1951
1952 /* Reopen a single BlockDriverState with the specified flags. */
1953 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1954 {
1955 int ret = -1;
1956 Error *local_err = NULL;
1957 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1958
1959 ret = bdrv_reopen_multiple(queue, &local_err);
1960 if (local_err != NULL) {
1961 error_propagate(errp, local_err);
1962 }
1963 return ret;
1964 }
1965
1966
1967 /*
1968 * Prepares a BlockDriverState for reopen. All changes are staged in the
1969 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1970 * the block driver layer .bdrv_reopen_prepare()
1971 *
1972 * bs is the BlockDriverState to reopen
1973 * flags are the new open flags
1974 * queue is the reopen queue
1975 *
1976 * Returns 0 on success, non-zero on error. On error errp will be set
1977 * as well.
1978 *
1979 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1980 * It is the responsibility of the caller to then call the abort() or
1981 * commit() for any other BDS that have been left in a prepare() state
1982 *
1983 */
1984 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1985 Error **errp)
1986 {
1987 int ret = -1;
1988 Error *local_err = NULL;
1989 BlockDriver *drv;
1990 QemuOpts *opts;
1991 const char *value;
1992
1993 assert(reopen_state != NULL);
1994 assert(reopen_state->bs->drv != NULL);
1995 drv = reopen_state->bs->drv;
1996
1997 /* Process generic block layer options */
1998 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1999 qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
2000 if (local_err) {
2001 error_propagate(errp, local_err);
2002 ret = -EINVAL;
2003 goto error;
2004 }
2005
2006 update_flags_from_options(&reopen_state->flags, opts);
2007
2008 /* If a guest device is attached, it owns WCE */
2009 if (reopen_state->bs->blk && blk_get_attached_dev(reopen_state->bs->blk)) {
2010 bool old_wce = bdrv_enable_write_cache(reopen_state->bs);
2011 bool new_wce = (reopen_state->flags & BDRV_O_CACHE_WB);
2012 if (old_wce != new_wce) {
2013 error_setg(errp, "Cannot change cache.writeback: Device attached");
2014 ret = -EINVAL;
2015 goto error;
2016 }
2017 }
2018
2019 /* node-name and driver must be unchanged. Put them back into the QDict, so
2020 * that they are checked at the end of this function. */
2021 value = qemu_opt_get(opts, "node-name");
2022 if (value) {
2023 qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
2024 }
2025
2026 value = qemu_opt_get(opts, "driver");
2027 if (value) {
2028 qdict_put(reopen_state->options, "driver", qstring_from_str(value));
2029 }
2030
2031 /* if we are to stay read-only, do not allow permission change
2032 * to r/w */
2033 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
2034 reopen_state->flags & BDRV_O_RDWR) {
2035 error_setg(errp, "Node '%s' is read only",
2036 bdrv_get_device_or_node_name(reopen_state->bs));
2037 goto error;
2038 }
2039
2040
2041 ret = bdrv_flush(reopen_state->bs);
2042 if (ret) {
2043 error_setg_errno(errp, -ret, "Error flushing drive");
2044 goto error;
2045 }
2046
2047 if (drv->bdrv_reopen_prepare) {
2048 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
2049 if (ret) {
2050 if (local_err != NULL) {
2051 error_propagate(errp, local_err);
2052 } else {
2053 error_setg(errp, "failed while preparing to reopen image '%s'",
2054 reopen_state->bs->filename);
2055 }
2056 goto error;
2057 }
2058 } else {
2059 /* It is currently mandatory to have a bdrv_reopen_prepare()
2060 * handler for each supported drv. */
2061 error_setg(errp, "Block format '%s' used by node '%s' "
2062 "does not support reopening files", drv->format_name,
2063 bdrv_get_device_or_node_name(reopen_state->bs));
2064 ret = -1;
2065 goto error;
2066 }
2067
2068 /* Options that are not handled are only okay if they are unchanged
2069 * compared to the old state. It is expected that some options are only
2070 * used for the initial open, but not reopen (e.g. filename) */
2071 if (qdict_size(reopen_state->options)) {
2072 const QDictEntry *entry = qdict_first(reopen_state->options);
2073
2074 do {
2075 QString *new_obj = qobject_to_qstring(entry->value);
2076 const char *new = qstring_get_str(new_obj);
2077 const char *old = qdict_get_try_str(reopen_state->bs->options,
2078 entry->key);
2079
2080 if (!old || strcmp(new, old)) {
2081 error_setg(errp, "Cannot change the option '%s'", entry->key);
2082 ret = -EINVAL;
2083 goto error;
2084 }
2085 } while ((entry = qdict_next(reopen_state->options, entry)));
2086 }
2087
2088 ret = 0;
2089
2090 error:
2091 qemu_opts_del(opts);
2092 return ret;
2093 }
2094
2095 /*
2096 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
2097 * makes them final by swapping the staging BlockDriverState contents into
2098 * the active BlockDriverState contents.
2099 */
2100 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
2101 {
2102 BlockDriver *drv;
2103
2104 assert(reopen_state != NULL);
2105 drv = reopen_state->bs->drv;
2106 assert(drv != NULL);
2107
2108 /* If there are any driver level actions to take */
2109 if (drv->bdrv_reopen_commit) {
2110 drv->bdrv_reopen_commit(reopen_state);
2111 }
2112
2113 /* set BDS specific flags now */
2114 QDECREF(reopen_state->bs->explicit_options);
2115
2116 reopen_state->bs->explicit_options = reopen_state->explicit_options;
2117 reopen_state->bs->open_flags = reopen_state->flags;
2118 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
2119 BDRV_O_CACHE_WB);
2120 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
2121
2122 bdrv_refresh_limits(reopen_state->bs, NULL);
2123 }
2124
2125 /*
2126 * Abort the reopen, and delete and free the staged changes in
2127 * reopen_state
2128 */
2129 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
2130 {
2131 BlockDriver *drv;
2132
2133 assert(reopen_state != NULL);
2134 drv = reopen_state->bs->drv;
2135 assert(drv != NULL);
2136
2137 if (drv->bdrv_reopen_abort) {
2138 drv->bdrv_reopen_abort(reopen_state);
2139 }
2140
2141 QDECREF(reopen_state->explicit_options);
2142 }
2143
2144
2145 static void bdrv_close(BlockDriverState *bs)
2146 {
2147 BdrvAioNotifier *ban, *ban_next;
2148
2149 assert(!bs->job);
2150
2151 /* Disable I/O limits and drain all pending throttled requests */
2152 if (bs->throttle_state) {
2153 bdrv_io_limits_disable(bs);
2154 }
2155
2156 bdrv_drained_begin(bs); /* complete I/O */
2157 bdrv_flush(bs);
2158 bdrv_drain(bs); /* in case flush left pending I/O */
2159
2160 bdrv_release_named_dirty_bitmaps(bs);
2161 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2162
2163 if (bs->blk) {
2164 blk_dev_change_media_cb(bs->blk, false);
2165 }
2166
2167 if (bs->drv) {
2168 BdrvChild *child, *next;
2169
2170 bs->drv->bdrv_close(bs);
2171 bs->drv = NULL;
2172
2173 bdrv_set_backing_hd(bs, NULL);
2174
2175 if (bs->file != NULL) {
2176 bdrv_unref_child(bs, bs->file);
2177 bs->file = NULL;
2178 }
2179
2180 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
2181 /* TODO Remove bdrv_unref() from drivers' close function and use
2182 * bdrv_unref_child() here */
2183 if (child->bs->inherits_from == bs) {
2184 child->bs->inherits_from = NULL;
2185 }
2186 bdrv_detach_child(child);
2187 }
2188
2189 g_free(bs->opaque);
2190 bs->opaque = NULL;
2191 bs->copy_on_read = 0;
2192 bs->backing_file[0] = '\0';
2193 bs->backing_format[0] = '\0';
2194 bs->total_sectors = 0;
2195 bs->encrypted = 0;
2196 bs->valid_key = 0;
2197 bs->sg = 0;
2198 bs->zero_beyond_eof = false;
2199 QDECREF(bs->options);
2200 QDECREF(bs->explicit_options);
2201 bs->options = NULL;
2202 QDECREF(bs->full_open_options);
2203 bs->full_open_options = NULL;
2204 }
2205
2206 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
2207 g_free(ban);
2208 }
2209 QLIST_INIT(&bs->aio_notifiers);
2210 bdrv_drained_end(bs);
2211 }
2212
2213 void bdrv_close_all(void)
2214 {
2215 BlockDriverState *bs;
2216 AioContext *aio_context;
2217
2218 /* Drop references from requests still in flight, such as canceled block
2219 * jobs whose AIO context has not been polled yet */
2220 bdrv_drain_all();
2221
2222 blk_remove_all_bs();
2223 blockdev_close_all_bdrv_states();
2224
2225 /* Cancel all block jobs */
2226 while (!QTAILQ_EMPTY(&all_bdrv_states)) {
2227 QTAILQ_FOREACH(bs, &all_bdrv_states, bs_list) {
2228 aio_context = bdrv_get_aio_context(bs);
2229
2230 aio_context_acquire(aio_context);
2231 if (bs->job) {
2232 block_job_cancel_sync(bs->job);
2233 aio_context_release(aio_context);
2234 break;
2235 }
2236 aio_context_release(aio_context);
2237 }
2238
2239 /* All the remaining BlockDriverStates are referenced directly or
2240 * indirectly from block jobs, so there needs to be at least one BDS
2241 * directly used by a block job */
2242 assert(bs);
2243 }
2244 }
2245
2246 /* Fields that need to stay with the top-level BDS */
2247 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2248 BlockDriverState *bs_src)
2249 {
2250 /* move some fields that need to stay attached to the device */
2251
2252 /* dev info */
2253 bs_dest->enable_write_cache = bs_src->enable_write_cache;
2254 }
2255
2256 static void change_parent_backing_link(BlockDriverState *from,
2257 BlockDriverState *to)
2258 {
2259 BdrvChild *c, *next;
2260
2261 if (from->blk) {
2262 /* FIXME We bypass blk_set_bs(), so we need to make these updates
2263 * manually. The root problem is not in this change function, but the
2264 * existence of BlockDriverState.blk. */
2265 to->blk = from->blk;
2266 from->blk = NULL;
2267 }
2268
2269 QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2270 assert(c->role != &child_backing);
2271 c->bs = to;
2272 QLIST_REMOVE(c, next_parent);
2273 QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2274 bdrv_ref(to);
2275 bdrv_unref(from);
2276 }
2277 }
2278
2279 static void swap_feature_fields(BlockDriverState *bs_top,
2280 BlockDriverState *bs_new)
2281 {
2282 BlockDriverState tmp;
2283
2284 bdrv_move_feature_fields(&tmp, bs_top);
2285 bdrv_move_feature_fields(bs_top, bs_new);
2286 bdrv_move_feature_fields(bs_new, &tmp);
2287
2288 assert(!bs_new->throttle_state);
2289 if (bs_top->throttle_state) {
2290 assert(bs_top->io_limits_enabled);
2291 bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2292 bdrv_io_limits_disable(bs_top);
2293 }
2294 }
2295
2296 /*
2297 * Add new bs contents at the top of an image chain while the chain is
2298 * live, while keeping required fields on the top layer.
2299 *
2300 * This will modify the BlockDriverState fields, and swap contents
2301 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2302 *
2303 * bs_new must not be attached to a BlockBackend.
2304 *
2305 * This function does not create any image files.
2306 *
2307 * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2308 * that's what the callers commonly need. bs_new will be referenced by the old
2309 * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2310 * reference of its own, it must call bdrv_ref().
2311 */
2312 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2313 {
2314 assert(!bdrv_requests_pending(bs_top));
2315 assert(!bdrv_requests_pending(bs_new));
2316
2317 bdrv_ref(bs_top);
2318 change_parent_backing_link(bs_top, bs_new);
2319
2320 /* Some fields always stay on top of the backing file chain */
2321 swap_feature_fields(bs_top, bs_new);
2322
2323 bdrv_set_backing_hd(bs_new, bs_top);
2324 bdrv_unref(bs_top);
2325
2326 /* bs_new is now referenced by its new parents, we don't need the
2327 * additional reference any more. */
2328 bdrv_unref(bs_new);
2329 }
2330
2331 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2332 {
2333 assert(!bdrv_requests_pending(old));
2334 assert(!bdrv_requests_pending(new));
2335
2336 bdrv_ref(old);
2337
2338 if (old->blk) {
2339 /* As long as these fields aren't in BlockBackend, but in the top-level
2340 * BlockDriverState, it's not possible for a BDS to have two BBs.
2341 *
2342 * We really want to copy the fields from old to new, but we go for a
2343 * swap instead so that pointers aren't duplicated and cause trouble.
2344 * (Also, bdrv_swap() used to do the same.) */
2345 assert(!new->blk);
2346 swap_feature_fields(old, new);
2347 }
2348 change_parent_backing_link(old, new);
2349
2350 /* Change backing files if a previously independent node is added to the
2351 * chain. For active commit, we replace top by its own (indirect) backing
2352 * file and don't do anything here so we don't build a loop. */
2353 if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2354 bdrv_set_backing_hd(new, backing_bs(old));
2355 bdrv_set_backing_hd(old, NULL);
2356 }
2357
2358 bdrv_unref(old);
2359 }
2360
2361 static void bdrv_delete(BlockDriverState *bs)
2362 {
2363 assert(!bs->job);
2364 assert(bdrv_op_blocker_is_empty(bs));
2365 assert(!bs->refcnt);
2366
2367 bdrv_close(bs);
2368
2369 /* remove from list, if necessary */
2370 if (bs->node_name[0] != '\0') {
2371 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2372 }
2373 QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
2374
2375 g_free(bs);
2376 }
2377
2378 /*
2379 * Run consistency checks on an image
2380 *
2381 * Returns 0 if the check could be completed (it doesn't mean that the image is
2382 * free of errors) or -errno when an internal error occurred. The results of the
2383 * check are stored in res.
2384 */
2385 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2386 {
2387 if (bs->drv == NULL) {
2388 return -ENOMEDIUM;
2389 }
2390 if (bs->drv->bdrv_check == NULL) {
2391 return -ENOTSUP;
2392 }
2393
2394 memset(res, 0, sizeof(*res));
2395 return bs->drv->bdrv_check(bs, res, fix);
2396 }
2397
2398 #define COMMIT_BUF_SECTORS 2048
2399
2400 /* commit COW file into the raw image */
2401 int bdrv_commit(BlockDriverState *bs)
2402 {
2403 BlockDriver *drv = bs->drv;
2404 int64_t sector, total_sectors, length, backing_length;
2405 int n, ro, open_flags;
2406 int ret = 0;
2407 uint8_t *buf = NULL;
2408
2409 if (!drv)
2410 return -ENOMEDIUM;
2411
2412 if (!bs->backing) {
2413 return -ENOTSUP;
2414 }
2415
2416 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2417 bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2418 return -EBUSY;
2419 }
2420
2421 ro = bs->backing->bs->read_only;
2422 open_flags = bs->backing->bs->open_flags;
2423
2424 if (ro) {
2425 if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2426 return -EACCES;
2427 }
2428 }
2429
2430 length = bdrv_getlength(bs);
2431 if (length < 0) {
2432 ret = length;
2433 goto ro_cleanup;
2434 }
2435
2436 backing_length = bdrv_getlength(bs->backing->bs);
2437 if (backing_length < 0) {
2438 ret = backing_length;
2439 goto ro_cleanup;
2440 }
2441
2442 /* If our top snapshot is larger than the backing file image,
2443 * grow the backing file image if possible. If not possible,
2444 * we must return an error */
2445 if (length > backing_length) {
2446 ret = bdrv_truncate(bs->backing->bs, length);
2447 if (ret < 0) {
2448 goto ro_cleanup;
2449 }
2450 }
2451
2452 total_sectors = length >> BDRV_SECTOR_BITS;
2453
2454 /* qemu_try_blockalign() for bs will choose an alignment that works for
2455 * bs->backing->bs as well, so no need to compare the alignment manually. */
2456 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2457 if (buf == NULL) {
2458 ret = -ENOMEM;
2459 goto ro_cleanup;
2460 }
2461
2462 for (sector = 0; sector < total_sectors; sector += n) {
2463 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2464 if (ret < 0) {
2465 goto ro_cleanup;
2466 }
2467 if (ret) {
2468 ret = bdrv_read(bs, sector, buf, n);
2469 if (ret < 0) {
2470 goto ro_cleanup;
2471 }
2472
2473 ret = bdrv_write(bs->backing->bs, sector, buf, n);
2474 if (ret < 0) {
2475 goto ro_cleanup;
2476 }
2477 }
2478 }
2479
2480 if (drv->bdrv_make_empty) {
2481 ret = drv->bdrv_make_empty(bs);
2482 if (ret < 0) {
2483 goto ro_cleanup;
2484 }
2485 bdrv_flush(bs);
2486 }
2487
2488 /*
2489 * Make sure all data we wrote to the backing device is actually
2490 * stable on disk.
2491 */
2492 if (bs->backing) {
2493 bdrv_flush(bs->backing->bs);
2494 }
2495
2496 ret = 0;
2497 ro_cleanup:
2498 qemu_vfree(buf);
2499
2500 if (ro) {
2501 /* ignoring error return here */
2502 bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2503 }
2504
2505 return ret;
2506 }
2507
2508 /*
2509 * Return values:
2510 * 0 - success
2511 * -EINVAL - backing format specified, but no file
2512 * -ENOSPC - can't update the backing file because no space is left in the
2513 * image file header
2514 * -ENOTSUP - format driver doesn't support changing the backing file
2515 */
2516 int bdrv_change_backing_file(BlockDriverState *bs,
2517 const char *backing_file, const char *backing_fmt)
2518 {
2519 BlockDriver *drv = bs->drv;
2520 int ret;
2521
2522 /* Backing file format doesn't make sense without a backing file */
2523 if (backing_fmt && !backing_file) {
2524 return -EINVAL;
2525 }
2526
2527 if (drv->bdrv_change_backing_file != NULL) {
2528 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2529 } else {
2530 ret = -ENOTSUP;
2531 }
2532
2533 if (ret == 0) {
2534 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2535 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2536 }
2537 return ret;
2538 }
2539
2540 /*
2541 * Finds the image layer in the chain that has 'bs' as its backing file.
2542 *
2543 * active is the current topmost image.
2544 *
2545 * Returns NULL if bs is not found in active's image chain,
2546 * or if active == bs.
2547 *
2548 * Returns the bottommost base image if bs == NULL.
2549 */
2550 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2551 BlockDriverState *bs)
2552 {
2553 while (active && bs != backing_bs(active)) {
2554 active = backing_bs(active);
2555 }
2556
2557 return active;
2558 }
2559
2560 /* Given a BDS, searches for the base layer. */
2561 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2562 {
2563 return bdrv_find_overlay(bs, NULL);
2564 }
2565
2566 /*
2567 * Drops images above 'base' up to and including 'top', and sets the image
2568 * above 'top' to have base as its backing file.
2569 *
2570 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2571 * information in 'bs' can be properly updated.
2572 *
2573 * E.g., this will convert the following chain:
2574 * bottom <- base <- intermediate <- top <- active
2575 *
2576 * to
2577 *
2578 * bottom <- base <- active
2579 *
2580 * It is allowed for bottom==base, in which case it converts:
2581 *
2582 * base <- intermediate <- top <- active
2583 *
2584 * to
2585 *
2586 * base <- active
2587 *
2588 * If backing_file_str is non-NULL, it will be used when modifying top's
2589 * overlay image metadata.
2590 *
2591 * Error conditions:
2592 * if active == top, that is considered an error
2593 *
2594 */
2595 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2596 BlockDriverState *base, const char *backing_file_str)
2597 {
2598 BlockDriverState *new_top_bs = NULL;
2599 int ret = -EIO;
2600
2601 if (!top->drv || !base->drv) {
2602 goto exit;
2603 }
2604
2605 new_top_bs = bdrv_find_overlay(active, top);
2606
2607 if (new_top_bs == NULL) {
2608 /* we could not find the image above 'top', this is an error */
2609 goto exit;
2610 }
2611
2612 /* special case of new_top_bs->backing->bs already pointing to base - nothing
2613 * to do, no intermediate images */
2614 if (backing_bs(new_top_bs) == base) {
2615 ret = 0;
2616 goto exit;
2617 }
2618
2619 /* Make sure that base is in the backing chain of top */
2620 if (!bdrv_chain_contains(top, base)) {
2621 goto exit;
2622 }
2623
2624 /* success - we can delete the intermediate states, and link top->base */
2625 backing_file_str = backing_file_str ? backing_file_str : base->filename;
2626 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2627 base->drv ? base->drv->format_name : "");
2628 if (ret) {
2629 goto exit;
2630 }
2631 bdrv_set_backing_hd(new_top_bs, base);
2632
2633 ret = 0;
2634 exit:
2635 return ret;
2636 }
2637
2638 /**
2639 * Truncate file to 'offset' bytes (needed only for file protocols)
2640 */
2641 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2642 {
2643 BlockDriver *drv = bs->drv;
2644 int ret;
2645 if (!drv)
2646 return -ENOMEDIUM;
2647 if (!drv->bdrv_truncate)
2648 return -ENOTSUP;
2649 if (bs->read_only)
2650 return -EACCES;
2651
2652 ret = drv->bdrv_truncate(bs, offset);
2653 if (ret == 0) {
2654 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2655 bdrv_dirty_bitmap_truncate(bs);
2656 if (bs->blk) {
2657 blk_dev_resize_cb(bs->blk);
2658 }
2659 }
2660 return ret;
2661 }
2662
2663 /**
2664 * Length of a allocated file in bytes. Sparse files are counted by actual
2665 * allocated space. Return < 0 if error or unknown.
2666 */
2667 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2668 {
2669 BlockDriver *drv = bs->drv;
2670 if (!drv) {
2671 return -ENOMEDIUM;
2672 }
2673 if (drv->bdrv_get_allocated_file_size) {
2674 return drv->bdrv_get_allocated_file_size(bs);
2675 }
2676 if (bs->file) {
2677 return bdrv_get_allocated_file_size(bs->file->bs);
2678 }
2679 return -ENOTSUP;
2680 }
2681
2682 /**
2683 * Return number of sectors on success, -errno on error.
2684 */
2685 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2686 {
2687 BlockDriver *drv = bs->drv;
2688
2689 if (!drv)
2690 return -ENOMEDIUM;
2691
2692 if (drv->has_variable_length) {
2693 int ret = refresh_total_sectors(bs, bs->total_sectors);
2694 if (ret < 0) {
2695 return ret;
2696 }
2697 }
2698 return bs->total_sectors;
2699 }
2700
2701 /**
2702 * Return length in bytes on success, -errno on error.
2703 * The length is always a multiple of BDRV_SECTOR_SIZE.
2704 */
2705 int64_t bdrv_getlength(BlockDriverState *bs)
2706 {
2707 int64_t ret = bdrv_nb_sectors(bs);
2708
2709 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2710 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2711 }
2712
2713 /* return 0 as number of sectors if no device present or error */
2714 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2715 {
2716 int64_t nb_sectors = bdrv_nb_sectors(bs);
2717
2718 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2719 }
2720
2721 int bdrv_is_read_only(BlockDriverState *bs)
2722 {
2723 return bs->read_only;
2724 }
2725
2726 int bdrv_is_sg(BlockDriverState *bs)
2727 {
2728 return bs->sg;
2729 }
2730
2731 int bdrv_enable_write_cache(BlockDriverState *bs)
2732 {
2733 return bs->enable_write_cache;
2734 }
2735
2736 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2737 {
2738 bs->enable_write_cache = wce;
2739
2740 /* so a reopen() will preserve wce */
2741 if (wce) {
2742 bs->open_flags |= BDRV_O_CACHE_WB;
2743 } else {
2744 bs->open_flags &= ~BDRV_O_CACHE_WB;
2745 }
2746 }
2747
2748 int bdrv_is_encrypted(BlockDriverState *bs)
2749 {
2750 if (bs->backing && bs->backing->bs->encrypted) {
2751 return 1;
2752 }
2753 return bs->encrypted;
2754 }
2755
2756 int bdrv_key_required(BlockDriverState *bs)
2757 {
2758 BdrvChild *backing = bs->backing;
2759
2760 if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2761 return 1;
2762 }
2763 return (bs->encrypted && !bs->valid_key);
2764 }
2765
2766 int bdrv_set_key(BlockDriverState *bs, const char *key)
2767 {
2768 int ret;
2769 if (bs->backing && bs->backing->bs->encrypted) {
2770 ret = bdrv_set_key(bs->backing->bs, key);
2771 if (ret < 0)
2772 return ret;
2773 if (!bs->encrypted)
2774 return 0;
2775 }
2776 if (!bs->encrypted) {
2777 return -EINVAL;
2778 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2779 return -ENOMEDIUM;
2780 }
2781 ret = bs->drv->bdrv_set_key(bs, key);
2782 if (ret < 0) {
2783 bs->valid_key = 0;
2784 } else if (!bs->valid_key) {
2785 bs->valid_key = 1;
2786 if (bs->blk) {
2787 /* call the change callback now, we skipped it on open */
2788 blk_dev_change_media_cb(bs->blk, true);
2789 }
2790 }
2791 return ret;
2792 }
2793
2794 /*
2795 * Provide an encryption key for @bs.
2796 * If @key is non-null:
2797 * If @bs is not encrypted, fail.
2798 * Else if the key is invalid, fail.
2799 * Else set @bs's key to @key, replacing the existing key, if any.
2800 * If @key is null:
2801 * If @bs is encrypted and still lacks a key, fail.
2802 * Else do nothing.
2803 * On failure, store an error object through @errp if non-null.
2804 */
2805 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2806 {
2807 if (key) {
2808 if (!bdrv_is_encrypted(bs)) {
2809 error_setg(errp, "Node '%s' is not encrypted",
2810 bdrv_get_device_or_node_name(bs));
2811 } else if (bdrv_set_key(bs, key) < 0) {
2812 error_setg(errp, QERR_INVALID_PASSWORD);
2813 }
2814 } else {
2815 if (bdrv_key_required(bs)) {
2816 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2817 "'%s' (%s) is encrypted",
2818 bdrv_get_device_or_node_name(bs),
2819 bdrv_get_encrypted_filename(bs));
2820 }
2821 }
2822 }
2823
2824 const char *bdrv_get_format_name(BlockDriverState *bs)
2825 {
2826 return bs->drv ? bs->drv->format_name : NULL;
2827 }
2828
2829 static int qsort_strcmp(const void *a, const void *b)
2830 {
2831 return strcmp(a, b);
2832 }
2833
2834 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2835 void *opaque)
2836 {
2837 BlockDriver *drv;
2838 int count = 0;
2839 int i;
2840 const char **formats = NULL;
2841
2842 QLIST_FOREACH(drv, &bdrv_drivers, list) {
2843 if (drv->format_name) {
2844 bool found = false;
2845 int i = count;
2846 while (formats && i && !found) {
2847 found = !strcmp(formats[--i], drv->format_name);
2848 }
2849
2850 if (!found) {
2851 formats = g_renew(const char *, formats, count + 1);
2852 formats[count++] = drv->format_name;
2853 }
2854 }
2855 }
2856
2857 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2858
2859 for (i = 0; i < count; i++) {
2860 it(opaque, formats[i]);
2861 }
2862
2863 g_free(formats);
2864 }
2865
2866 /* This function is to find a node in the bs graph */
2867 BlockDriverState *bdrv_find_node(const char *node_name)
2868 {
2869 BlockDriverState *bs;
2870
2871 assert(node_name);
2872
2873 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2874 if (!strcmp(node_name, bs->node_name)) {
2875 return bs;
2876 }
2877 }
2878 return NULL;
2879 }
2880
2881 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2882 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2883 {
2884 BlockDeviceInfoList *list, *entry;
2885 BlockDriverState *bs;
2886
2887 list = NULL;
2888 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2889 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2890 if (!info) {
2891 qapi_free_BlockDeviceInfoList(list);
2892 return NULL;
2893 }
2894 entry = g_malloc0(sizeof(*entry));
2895 entry->value = info;
2896 entry->next = list;
2897 list = entry;
2898 }
2899
2900 return list;
2901 }
2902
2903 BlockDriverState *bdrv_lookup_bs(const char *device,
2904 const char *node_name,
2905 Error **errp)
2906 {
2907 BlockBackend *blk;
2908 BlockDriverState *bs;
2909
2910 if (device) {
2911 blk = blk_by_name(device);
2912
2913 if (blk) {
2914 bs = blk_bs(blk);
2915 if (!bs) {
2916 error_setg(errp, "Device '%s' has no medium", device);
2917 }
2918
2919 return bs;
2920 }
2921 }
2922
2923 if (node_name) {
2924 bs = bdrv_find_node(node_name);
2925
2926 if (bs) {
2927 return bs;
2928 }
2929 }
2930
2931 error_setg(errp, "Cannot find device=%s nor node_name=%s",
2932 device ? device : "",
2933 node_name ? node_name : "");
2934 return NULL;
2935 }
2936
2937 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2938 * return false. If either argument is NULL, return false. */
2939 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2940 {
2941 while (top && top != base) {
2942 top = backing_bs(top);
2943 }
2944
2945 return top != NULL;
2946 }
2947
2948 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2949 {
2950 if (!bs) {
2951 return QTAILQ_FIRST(&graph_bdrv_states);
2952 }
2953 return QTAILQ_NEXT(bs, node_list);
2954 }
2955
2956 /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
2957 * the monitor or attached to a BlockBackend */
2958 BlockDriverState *bdrv_next(BlockDriverState *bs)
2959 {
2960 if (!bs || bs->blk) {
2961 bs = blk_next_root_bs(bs);
2962 if (bs) {
2963 return bs;
2964 }
2965 }
2966
2967 /* Ignore all BDSs that are attached to a BlockBackend here; they have been
2968 * handled by the above block already */
2969 do {
2970 bs = bdrv_next_monitor_owned(bs);
2971 } while (bs && bs->blk);
2972 return bs;
2973 }
2974
2975 const char *bdrv_get_node_name(const BlockDriverState *bs)
2976 {
2977 return bs->node_name;
2978 }
2979
2980 /* TODO check what callers really want: bs->node_name or blk_name() */
2981 const char *bdrv_get_device_name(const BlockDriverState *bs)
2982 {
2983 return bs->blk ? blk_name(bs->blk) : "";
2984 }
2985
2986 /* This can be used to identify nodes that might not have a device
2987 * name associated. Since node and device names live in the same
2988 * namespace, the result is unambiguous. The exception is if both are
2989 * absent, then this returns an empty (non-null) string. */
2990 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2991 {
2992 return bs->blk ? blk_name(bs->blk) : bs->node_name;
2993 }
2994
2995 int bdrv_get_flags(BlockDriverState *bs)
2996 {
2997 return bs->open_flags;
2998 }
2999
3000 int bdrv_has_zero_init_1(BlockDriverState *bs)
3001 {
3002 return 1;
3003 }
3004
3005 int bdrv_has_zero_init(BlockDriverState *bs)
3006 {
3007 assert(bs->drv);
3008
3009 /* If BS is a copy on write image, it is initialized to
3010 the contents of the base image, which may not be zeroes. */
3011 if (bs->backing) {
3012 return 0;
3013 }
3014 if (bs->drv->bdrv_has_zero_init) {
3015 return bs->drv->bdrv_has_zero_init(bs);
3016 }
3017
3018 /* safe default */
3019 return 0;
3020 }
3021
3022 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3023 {
3024 BlockDriverInfo bdi;
3025
3026 if (bs->backing) {
3027 return false;
3028 }
3029
3030 if (bdrv_get_info(bs, &bdi) == 0) {
3031 return bdi.unallocated_blocks_are_zero;
3032 }
3033
3034 return false;
3035 }
3036
3037 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3038 {
3039 BlockDriverInfo bdi;
3040
3041 if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
3042 return false;
3043 }
3044
3045 if (bdrv_get_info(bs, &bdi) == 0) {
3046 return bdi.can_write_zeroes_with_unmap;
3047 }
3048
3049 return false;
3050 }
3051
3052 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3053 {
3054 if (bs->backing && bs->backing->bs->encrypted)
3055 return bs->backing_file;
3056 else if (bs->encrypted)
3057 return bs->filename;
3058 else
3059 return NULL;
3060 }
3061
3062 void bdrv_get_backing_filename(BlockDriverState *bs,
3063 char *filename, int filename_size)
3064 {
3065 pstrcpy(filename, filename_size, bs->backing_file);
3066 }
3067
3068 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3069 {
3070 BlockDriver *drv = bs->drv;
3071 if (!drv)
3072 return -ENOMEDIUM;
3073 if (!drv->bdrv_get_info)
3074 return -ENOTSUP;
3075 memset(bdi, 0, sizeof(*bdi));
3076 return drv->bdrv_get_info(bs, bdi);
3077 }
3078
3079 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3080 {
3081 BlockDriver *drv = bs->drv;
3082 if (drv && drv->bdrv_get_specific_info) {
3083 return drv->bdrv_get_specific_info(bs);
3084 }
3085 return NULL;
3086 }
3087
3088 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
3089 {
3090 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3091 return;
3092 }
3093
3094 bs->drv->bdrv_debug_event(bs, event);
3095 }
3096
3097 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3098 const char *tag)
3099 {
3100 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3101 bs = bs->file ? bs->file->bs : NULL;
3102 }
3103
3104 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3105 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3106 }
3107
3108 return -ENOTSUP;
3109 }
3110
3111 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
3112 {
3113 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
3114 bs = bs->file ? bs->file->bs : NULL;
3115 }
3116
3117 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3118 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3119 }
3120
3121 return -ENOTSUP;
3122 }
3123
3124 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3125 {
3126 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
3127 bs = bs->file ? bs->file->bs : NULL;
3128 }
3129
3130 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3131 return bs->drv->bdrv_debug_resume(bs, tag);
3132 }
3133
3134 return -ENOTSUP;
3135 }
3136
3137 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3138 {
3139 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3140 bs = bs->file ? bs->file->bs : NULL;
3141 }
3142
3143 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3144 return bs->drv->bdrv_debug_is_suspended(bs, tag);
3145 }
3146
3147 return false;
3148 }
3149
3150 int bdrv_is_snapshot(BlockDriverState *bs)
3151 {
3152 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3153 }
3154
3155 /* backing_file can either be relative, or absolute, or a protocol. If it is
3156 * relative, it must be relative to the chain. So, passing in bs->filename
3157 * from a BDS as backing_file should not be done, as that may be relative to
3158 * the CWD rather than the chain. */
3159 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3160 const char *backing_file)
3161 {
3162 char *filename_full = NULL;
3163 char *backing_file_full = NULL;
3164 char *filename_tmp = NULL;
3165 int is_protocol = 0;
3166 BlockDriverState *curr_bs = NULL;
3167 BlockDriverState *retval = NULL;
3168
3169 if (!bs || !bs->drv || !backing_file) {
3170 return NULL;
3171 }
3172
3173 filename_full = g_malloc(PATH_MAX);
3174 backing_file_full = g_malloc(PATH_MAX);
3175 filename_tmp = g_malloc(PATH_MAX);
3176
3177 is_protocol = path_has_protocol(backing_file);
3178
3179 for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
3180
3181 /* If either of the filename paths is actually a protocol, then
3182 * compare unmodified paths; otherwise make paths relative */
3183 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3184 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3185 retval = curr_bs->backing->bs;
3186 break;
3187 }
3188 } else {
3189 /* If not an absolute filename path, make it relative to the current
3190 * image's filename path */
3191 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3192 backing_file);
3193
3194 /* We are going to compare absolute pathnames */
3195 if (!realpath(filename_tmp, filename_full)) {
3196 continue;
3197 }
3198
3199 /* We need to make sure the backing filename we are comparing against
3200 * is relative to the current image filename (or absolute) */
3201 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3202 curr_bs->backing_file);
3203
3204 if (!realpath(filename_tmp, backing_file_full)) {
3205 continue;
3206 }
3207
3208 if (strcmp(backing_file_full, filename_full) == 0) {
3209 retval = curr_bs->backing->bs;
3210 break;
3211 }
3212 }
3213 }
3214
3215 g_free(filename_full);
3216 g_free(backing_file_full);
3217 g_free(filename_tmp);
3218 return retval;
3219 }
3220
3221 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3222 {
3223 if (!bs->drv) {
3224 return 0;
3225 }
3226
3227 if (!bs->backing) {
3228 return 0;
3229 }
3230
3231 return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
3232 }
3233
3234 void bdrv_init(void)
3235 {
3236 module_call_init(MODULE_INIT_BLOCK);
3237 }
3238
3239 void bdrv_init_with_whitelist(void)
3240 {
3241 use_bdrv_whitelist = 1;
3242 bdrv_init();
3243 }
3244
3245 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3246 {
3247 Error *local_err = NULL;
3248 int ret;
3249
3250 if (!bs->drv) {
3251 return;
3252 }
3253
3254 if (!(bs->open_flags & BDRV_O_INACTIVE)) {
3255 return;
3256 }
3257 bs->open_flags &= ~BDRV_O_INACTIVE;
3258
3259 if (bs->drv->bdrv_invalidate_cache) {
3260 bs->drv->bdrv_invalidate_cache(bs, &local_err);
3261 } else if (bs->file) {
3262 bdrv_invalidate_cache(bs->file->bs, &local_err);
3263 }
3264 if (local_err) {
3265 bs->open_flags |= BDRV_O_INACTIVE;
3266 error_propagate(errp, local_err);
3267 return;
3268 }
3269
3270 ret = refresh_total_sectors(bs, bs->total_sectors);
3271 if (ret < 0) {
3272 bs->open_flags |= BDRV_O_INACTIVE;
3273 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3274 return;
3275 }
3276 }
3277
3278 void bdrv_invalidate_cache_all(Error **errp)
3279 {
3280 BlockDriverState *bs = NULL;
3281 Error *local_err = NULL;
3282
3283 while ((bs = bdrv_next(bs)) != NULL) {
3284 AioContext *aio_context = bdrv_get_aio_context(bs);
3285
3286 aio_context_acquire(aio_context);
3287 bdrv_invalidate_cache(bs, &local_err);
3288 aio_context_release(aio_context);
3289 if (local_err) {
3290 error_propagate(errp, local_err);
3291 return;
3292 }
3293 }
3294 }
3295
3296 static int bdrv_inactivate(BlockDriverState *bs)
3297 {
3298 int ret;
3299
3300 if (bs->drv->bdrv_inactivate) {
3301 ret = bs->drv->bdrv_inactivate(bs);
3302 if (ret < 0) {
3303 return ret;
3304 }
3305 }
3306
3307 bs->open_flags |= BDRV_O_INACTIVE;
3308 return 0;
3309 }
3310
3311 int bdrv_inactivate_all(void)
3312 {
3313 BlockDriverState *bs = NULL;
3314 int ret;
3315
3316 while ((bs = bdrv_next(bs)) != NULL) {
3317 AioContext *aio_context = bdrv_get_aio_context(bs);
3318
3319 aio_context_acquire(aio_context);
3320 ret = bdrv_inactivate(bs);
3321 aio_context_release(aio_context);
3322 if (ret < 0) {
3323 return ret;
3324 }
3325 }
3326
3327 return 0;
3328 }
3329
3330 /**************************************************************/
3331 /* removable device support */
3332
3333 /**
3334 * Return TRUE if the media is present
3335 */
3336 bool bdrv_is_inserted(BlockDriverState *bs)
3337 {
3338 BlockDriver *drv = bs->drv;
3339 BdrvChild *child;
3340
3341 if (!drv) {
3342 return false;
3343 }
3344 if (drv->bdrv_is_inserted) {
3345 return drv->bdrv_is_inserted(bs);
3346 }
3347 QLIST_FOREACH(child, &bs->children, next) {
3348 if (!bdrv_is_inserted(child->bs)) {
3349 return false;
3350 }
3351 }
3352 return true;
3353 }
3354
3355 /**
3356 * Return whether the media changed since the last call to this
3357 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3358 */
3359 int bdrv_media_changed(BlockDriverState *bs)
3360 {
3361 BlockDriver *drv = bs->drv;
3362
3363 if (drv && drv->bdrv_media_changed) {
3364 return drv->bdrv_media_changed(bs);
3365 }
3366 return -ENOTSUP;
3367 }
3368
3369 /**
3370 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3371 */
3372 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3373 {
3374 BlockDriver *drv = bs->drv;
3375 const char *device_name;
3376
3377 if (drv && drv->bdrv_eject) {
3378 drv->bdrv_eject(bs, eject_flag);
3379 }
3380
3381 device_name = bdrv_get_device_name(bs);
3382 if (device_name[0] != '\0') {
3383 qapi_event_send_device_tray_moved(device_name,
3384 eject_flag, &error_abort);
3385 }
3386 }
3387
3388 /**
3389 * Lock or unlock the media (if it is locked, the user won't be able
3390 * to eject it manually).
3391 */
3392 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3393 {
3394 BlockDriver *drv = bs->drv;
3395
3396 trace_bdrv_lock_medium(bs, locked);
3397
3398 if (drv && drv->bdrv_lock_medium) {
3399 drv->bdrv_lock_medium(bs, locked);
3400 }
3401 }
3402
3403 /* Get a reference to bs */
3404 void bdrv_ref(BlockDriverState *bs)
3405 {
3406 bs->refcnt++;
3407 }
3408
3409 /* Release a previously grabbed reference to bs.
3410 * If after releasing, reference count is zero, the BlockDriverState is
3411 * deleted. */
3412 void bdrv_unref(BlockDriverState *bs)
3413 {
3414 if (!bs) {
3415 return;
3416 }
3417 assert(bs->refcnt > 0);
3418 if (--bs->refcnt == 0) {
3419 bdrv_delete(bs);
3420 }
3421 }
3422
3423 struct BdrvOpBlocker {
3424 Error *reason;
3425 QLIST_ENTRY(BdrvOpBlocker) list;
3426 };
3427
3428 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3429 {
3430 BdrvOpBlocker *blocker;
3431 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3432 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3433 blocker = QLIST_FIRST(&bs->op_blockers[op]);
3434 if (errp) {
3435 *errp = error_copy(blocker->reason);
3436 error_prepend(errp, "Node '%s' is busy: ",
3437 bdrv_get_device_or_node_name(bs));
3438 }
3439 return true;
3440 }
3441 return false;
3442 }
3443
3444 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3445 {
3446 BdrvOpBlocker *blocker;
3447 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3448
3449 blocker = g_new0(BdrvOpBlocker, 1);
3450 blocker->reason = reason;
3451 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3452 }
3453
3454 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3455 {
3456 BdrvOpBlocker *blocker, *next;
3457 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3458 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3459 if (blocker->reason == reason) {
3460 QLIST_REMOVE(blocker, list);
3461 g_free(blocker);
3462 }
3463 }
3464 }
3465
3466 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3467 {
3468 int i;
3469 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3470 bdrv_op_block(bs, i, reason);
3471 }
3472 }
3473
3474 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3475 {
3476 int i;
3477 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3478 bdrv_op_unblock(bs, i, reason);
3479 }
3480 }
3481
3482 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3483 {
3484 int i;
3485
3486 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3487 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3488 return false;
3489 }
3490 }
3491 return true;
3492 }
3493
3494 void bdrv_img_create(const char *filename, const char *fmt,
3495 const char *base_filename, const char *base_fmt,
3496 char *options, uint64_t img_size, int flags,
3497 Error **errp, bool quiet)
3498 {
3499 QemuOptsList *create_opts = NULL;
3500 QemuOpts *opts = NULL;
3501 const char *backing_fmt, *backing_file;
3502 int64_t size;
3503 BlockDriver *drv, *proto_drv;
3504 Error *local_err = NULL;
3505 int ret = 0;
3506
3507 /* Find driver and parse its options */
3508 drv = bdrv_find_format(fmt);
3509 if (!drv) {
3510 error_setg(errp, "Unknown file format '%s'", fmt);
3511 return;
3512 }
3513
3514 proto_drv = bdrv_find_protocol(filename, true, errp);
3515 if (!proto_drv) {
3516 return;
3517 }
3518
3519 if (!drv->create_opts) {
3520 error_setg(errp, "Format driver '%s' does not support image creation",
3521 drv->format_name);
3522 return;
3523 }
3524
3525 if (!proto_drv->create_opts) {
3526 error_setg(errp, "Protocol driver '%s' does not support image creation",
3527 proto_drv->format_name);
3528 return;
3529 }
3530
3531 create_opts = qemu_opts_append(create_opts, drv->create_opts);
3532 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3533
3534 /* Create parameter list with default values */
3535 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3536 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3537
3538 /* Parse -o options */
3539 if (options) {
3540 qemu_opts_do_parse(opts, options, NULL, &local_err);
3541 if (local_err) {
3542 error_report_err(local_err);
3543 local_err = NULL;
3544 error_setg(errp, "Invalid options for file format '%s'", fmt);
3545 goto out;
3546 }
3547 }
3548
3549 if (base_filename) {
3550 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3551 if (local_err) {
3552 error_setg(errp, "Backing file not supported for file format '%s'",
3553 fmt);
3554 goto out;
3555 }
3556 }
3557
3558 if (base_fmt) {
3559 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3560 if (local_err) {
3561 error_setg(errp, "Backing file format not supported for file "
3562 "format '%s'", fmt);
3563 goto out;
3564 }
3565 }
3566
3567 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3568 if (backing_file) {
3569 if (!strcmp(filename, backing_file)) {
3570 error_setg(errp, "Error: Trying to create an image with the "
3571 "same filename as the backing file");
3572 goto out;
3573 }
3574 }
3575
3576 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3577
3578 // The size for the image must always be specified, with one exception:
3579 // If we are using a backing file, we can obtain the size from there
3580 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3581 if (size == -1) {
3582 if (backing_file) {
3583 BlockDriverState *bs;
3584 char *full_backing = g_new0(char, PATH_MAX);
3585 int64_t size;
3586 int back_flags;
3587 QDict *backing_options = NULL;
3588
3589 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3590 full_backing, PATH_MAX,
3591 &local_err);
3592 if (local_err) {
3593 g_free(full_backing);
3594 goto out;
3595 }
3596
3597 /* backing files always opened read-only */
3598 back_flags =
3599 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3600
3601 if (backing_fmt) {
3602 backing_options = qdict_new();
3603 qdict_put(backing_options, "driver",
3604 qstring_from_str(backing_fmt));
3605 }
3606
3607 bs = NULL;
3608 ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3609 back_flags, &local_err);
3610 g_free(full_backing);
3611 if (ret < 0) {
3612 goto out;
3613 }
3614 size = bdrv_getlength(bs);
3615 if (size < 0) {
3616 error_setg_errno(errp, -size, "Could not get size of '%s'",
3617 backing_file);
3618 bdrv_unref(bs);
3619 goto out;
3620 }
3621
3622 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3623
3624 bdrv_unref(bs);
3625 } else {
3626 error_setg(errp, "Image creation needs a size parameter");
3627 goto out;
3628 }
3629 }
3630
3631 if (!quiet) {
3632 printf("Formatting '%s', fmt=%s ", filename, fmt);
3633 qemu_opts_print(opts, " ");
3634 puts("");
3635 }
3636
3637 ret = bdrv_create(drv, filename, opts, &local_err);
3638
3639 if (ret == -EFBIG) {
3640 /* This is generally a better message than whatever the driver would
3641 * deliver (especially because of the cluster_size_hint), since that
3642 * is most probably not much different from "image too large". */
3643 const char *cluster_size_hint = "";
3644 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3645 cluster_size_hint = " (try using a larger cluster size)";
3646 }
3647 error_setg(errp, "The image size is too large for file format '%s'"
3648 "%s", fmt, cluster_size_hint);
3649 error_free(local_err);
3650 local_err = NULL;
3651 }
3652
3653 out:
3654 qemu_opts_del(opts);
3655 qemu_opts_free(create_opts);
3656 if (local_err) {
3657 error_propagate(errp, local_err);
3658 }
3659 }
3660
3661 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3662 {
3663 return bs->aio_context;
3664 }
3665
3666 void bdrv_detach_aio_context(BlockDriverState *bs)
3667 {
3668 BdrvAioNotifier *baf;
3669
3670 if (!bs->drv) {
3671 return;
3672 }
3673
3674 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3675 baf->detach_aio_context(baf->opaque);
3676 }
3677
3678 if (bs->throttle_state) {
3679 throttle_timers_detach_aio_context(&bs->throttle_timers);
3680 }
3681 if (bs->drv->bdrv_detach_aio_context) {
3682 bs->drv->bdrv_detach_aio_context(bs);
3683 }
3684 if (bs->file) {
3685 bdrv_detach_aio_context(bs->file->bs);
3686 }
3687 if (bs->backing) {
3688 bdrv_detach_aio_context(bs->backing->bs);
3689 }
3690
3691 bs->aio_context = NULL;
3692 }
3693
3694 void bdrv_attach_aio_context(BlockDriverState *bs,
3695 AioContext *new_context)
3696 {
3697 BdrvAioNotifier *ban;
3698
3699 if (!bs->drv) {
3700 return;
3701 }
3702
3703 bs->aio_context = new_context;
3704
3705 if (bs->backing) {
3706 bdrv_attach_aio_context(bs->backing->bs, new_context);
3707 }
3708 if (bs->file) {
3709 bdrv_attach_aio_context(bs->file->bs, new_context);
3710 }
3711 if (bs->drv->bdrv_attach_aio_context) {
3712 bs->drv->bdrv_attach_aio_context(bs, new_context);
3713 }
3714 if (bs->throttle_state) {
3715 throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3716 }
3717
3718 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3719 ban->attached_aio_context(new_context, ban->opaque);
3720 }
3721 }
3722
3723 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3724 {
3725 bdrv_drain(bs); /* ensure there are no in-flight requests */
3726
3727 bdrv_detach_aio_context(bs);
3728
3729 /* This function executes in the old AioContext so acquire the new one in
3730 * case it runs in a different thread.
3731 */
3732 aio_context_acquire(new_context);
3733 bdrv_attach_aio_context(bs, new_context);
3734 aio_context_release(new_context);
3735 }
3736
3737 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3738 void (*attached_aio_context)(AioContext *new_context, void *opaque),
3739 void (*detach_aio_context)(void *opaque), void *opaque)
3740 {
3741 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3742 *ban = (BdrvAioNotifier){
3743 .attached_aio_context = attached_aio_context,
3744 .detach_aio_context = detach_aio_context,
3745 .opaque = opaque
3746 };
3747
3748 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3749 }
3750
3751 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3752 void (*attached_aio_context)(AioContext *,
3753 void *),
3754 void (*detach_aio_context)(void *),
3755 void *opaque)
3756 {
3757 BdrvAioNotifier *ban, *ban_next;
3758
3759 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3760 if (ban->attached_aio_context == attached_aio_context &&
3761 ban->detach_aio_context == detach_aio_context &&
3762 ban->opaque == opaque)
3763 {
3764 QLIST_REMOVE(ban, list);
3765 g_free(ban);
3766
3767 return;
3768 }
3769 }
3770
3771 abort();
3772 }
3773
3774 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3775 BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
3776 {
3777 if (!bs->drv->bdrv_amend_options) {
3778 return -ENOTSUP;
3779 }
3780 return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
3781 }
3782
3783 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3784 * of block filter and by bdrv_is_first_non_filter.
3785 * It is used to test if the given bs is the candidate or recurse more in the
3786 * node graph.
3787 */
3788 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3789 BlockDriverState *candidate)
3790 {
3791 /* return false if basic checks fails */
3792 if (!bs || !bs->drv) {
3793 return false;
3794 }
3795
3796 /* the code reached a non block filter driver -> check if the bs is
3797 * the same as the candidate. It's the recursion termination condition.
3798 */
3799 if (!bs->drv->is_filter) {
3800 return bs == candidate;
3801 }
3802 /* Down this path the driver is a block filter driver */
3803
3804 /* If the block filter recursion method is defined use it to recurse down
3805 * the node graph.
3806 */
3807 if (bs->drv->bdrv_recurse_is_first_non_filter) {
3808 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3809 }
3810
3811 /* the driver is a block filter but don't allow to recurse -> return false
3812 */
3813 return false;
3814 }
3815
3816 /* This function checks if the candidate is the first non filter bs down it's
3817 * bs chain. Since we don't have pointers to parents it explore all bs chains
3818 * from the top. Some filters can choose not to pass down the recursion.
3819 */
3820 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3821 {
3822 BlockDriverState *bs = NULL;
3823
3824 /* walk down the bs forest recursively */
3825 while ((bs = bdrv_next(bs)) != NULL) {
3826 bool perm;
3827
3828 /* try to recurse in this top level bs */
3829 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3830
3831 /* candidate is the first non filter */
3832 if (perm) {
3833 return true;
3834 }
3835 }
3836
3837 return false;
3838 }
3839
3840 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
3841 const char *node_name, Error **errp)
3842 {
3843 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3844 AioContext *aio_context;
3845
3846 if (!to_replace_bs) {
3847 error_setg(errp, "Node name '%s' not found", node_name);
3848 return NULL;
3849 }
3850
3851 aio_context = bdrv_get_aio_context(to_replace_bs);
3852 aio_context_acquire(aio_context);
3853
3854 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3855 to_replace_bs = NULL;
3856 goto out;
3857 }
3858
3859 /* We don't want arbitrary node of the BDS chain to be replaced only the top
3860 * most non filter in order to prevent data corruption.
3861 * Another benefit is that this tests exclude backing files which are
3862 * blocked by the backing blockers.
3863 */
3864 if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
3865 error_setg(errp, "Only top most non filter can be replaced");
3866 to_replace_bs = NULL;
3867 goto out;
3868 }
3869
3870 out:
3871 aio_context_release(aio_context);
3872 return to_replace_bs;
3873 }
3874
3875 static bool append_open_options(QDict *d, BlockDriverState *bs)
3876 {
3877 const QDictEntry *entry;
3878 QemuOptDesc *desc;
3879 BdrvChild *child;
3880 bool found_any = false;
3881 const char *p;
3882
3883 for (entry = qdict_first(bs->options); entry;
3884 entry = qdict_next(bs->options, entry))
3885 {
3886 /* Exclude options for children */
3887 QLIST_FOREACH(child, &bs->children, next) {
3888 if (strstart(qdict_entry_key(entry), child->name, &p)
3889 && (!*p || *p == '.'))
3890 {
3891 break;
3892 }
3893 }
3894 if (child) {
3895 continue;
3896 }
3897
3898 /* And exclude all non-driver-specific options */
3899 for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
3900 if (!strcmp(qdict_entry_key(entry), desc->name)) {
3901 break;
3902 }
3903 }
3904 if (desc->name) {
3905 continue;
3906 }
3907
3908 qobject_incref(qdict_entry_value(entry));
3909 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3910 found_any = true;
3911 }
3912
3913 return found_any;
3914 }
3915
3916 /* Updates the following BDS fields:
3917 * - exact_filename: A filename which may be used for opening a block device
3918 * which (mostly) equals the given BDS (even without any
3919 * other options; so reading and writing must return the same
3920 * results, but caching etc. may be different)
3921 * - full_open_options: Options which, when given when opening a block device
3922 * (without a filename), result in a BDS (mostly)
3923 * equalling the given one
3924 * - filename: If exact_filename is set, it is copied here. Otherwise,
3925 * full_open_options is converted to a JSON object, prefixed with
3926 * "json:" (for use through the JSON pseudo protocol) and put here.
3927 */
3928 void bdrv_refresh_filename(BlockDriverState *bs)
3929 {
3930 BlockDriver *drv = bs->drv;
3931 QDict *opts;
3932
3933 if (!drv) {
3934 return;
3935 }
3936
3937 /* This BDS's file name will most probably depend on its file's name, so
3938 * refresh that first */
3939 if (bs->file) {
3940 bdrv_refresh_filename(bs->file->bs);
3941 }
3942
3943 if (drv->bdrv_refresh_filename) {
3944 /* Obsolete information is of no use here, so drop the old file name
3945 * information before refreshing it */
3946 bs->exact_filename[0] = '\0';
3947 if (bs->full_open_options) {
3948 QDECREF(bs->full_open_options);
3949 bs->full_open_options = NULL;
3950 }
3951
3952 opts = qdict_new();
3953 append_open_options(opts, bs);
3954 drv->bdrv_refresh_filename(bs, opts);
3955 QDECREF(opts);
3956 } else if (bs->file) {
3957 /* Try to reconstruct valid information from the underlying file */
3958 bool has_open_options;
3959
3960 bs->exact_filename[0] = '\0';
3961 if (bs->full_open_options) {
3962 QDECREF(bs->full_open_options);
3963 bs->full_open_options = NULL;
3964 }
3965
3966 opts = qdict_new();
3967 has_open_options = append_open_options(opts, bs);
3968
3969 /* If no specific options have been given for this BDS, the filename of
3970 * the underlying file should suffice for this one as well */
3971 if (bs->file->bs->exact_filename[0] && !has_open_options) {
3972 strcpy(bs->exact_filename, bs->file->bs->exact_filename);
3973 }
3974 /* Reconstructing the full options QDict is simple for most format block
3975 * drivers, as long as the full options are known for the underlying
3976 * file BDS. The full options QDict of that file BDS should somehow
3977 * contain a representation of the filename, therefore the following
3978 * suffices without querying the (exact_)filename of this BDS. */
3979 if (bs->file->bs->full_open_options) {
3980 qdict_put_obj(opts, "driver",
3981 QOBJECT(qstring_from_str(drv->format_name)));
3982 QINCREF(bs->file->bs->full_open_options);
3983 qdict_put_obj(opts, "file",
3984 QOBJECT(bs->file->bs->full_open_options));
3985
3986 bs->full_open_options = opts;
3987 } else {
3988 QDECREF(opts);
3989 }
3990 } else if (!bs->full_open_options && qdict_size(bs->options)) {
3991 /* There is no underlying file BDS (at least referenced by BDS.file),
3992 * so the full options QDict should be equal to the options given
3993 * specifically for this block device when it was opened (plus the
3994 * driver specification).
3995 * Because those options don't change, there is no need to update
3996 * full_open_options when it's already set. */
3997
3998 opts = qdict_new();
3999 append_open_options(opts, bs);
4000 qdict_put_obj(opts, "driver",
4001 QOBJECT(qstring_from_str(drv->format_name)));
4002
4003 if (bs->exact_filename[0]) {
4004 /* This may not work for all block protocol drivers (some may
4005 * require this filename to be parsed), but we have to find some
4006 * default solution here, so just include it. If some block driver
4007 * does not support pure options without any filename at all or
4008 * needs some special format of the options QDict, it needs to
4009 * implement the driver-specific bdrv_refresh_filename() function.
4010 */
4011 qdict_put_obj(opts, "filename",
4012 QOBJECT(qstring_from_str(bs->exact_filename)));
4013 }
4014
4015 bs->full_open_options = opts;
4016 }
4017
4018 if (bs->exact_filename[0]) {
4019 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4020 } else if (bs->full_open_options) {
4021 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4022 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4023 qstring_get_str(json));
4024 QDECREF(json);
4025 }
4026 }