]> git.proxmox.com Git - mirror_qemu.git/blob - block.c
block: Move I/O status and error actions into BB
[mirror_qemu.git] / block.c
1 /*
2 * QEMU System Emulator block driver
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/error-report.h"
30 #include "qemu/module.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qapi/qmp/qjson.h"
33 #include "sysemu/block-backend.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/notify.h"
36 #include "qemu/coroutine.h"
37 #include "block/qapi.h"
38 #include "qmp-commands.h"
39 #include "qemu/timer.h"
40 #include "qapi-event.h"
41 #include "block/throttle-groups.h"
42
43 #ifdef CONFIG_BSD
44 #include <sys/types.h>
45 #include <sys/stat.h>
46 #include <sys/ioctl.h>
47 #include <sys/queue.h>
48 #ifndef __DragonFly__
49 #include <sys/disk.h>
50 #endif
51 #endif
52
53 #ifdef _WIN32
54 #include <windows.h>
55 #endif
56
57 /**
58 * A BdrvDirtyBitmap can be in three possible states:
59 * (1) successor is NULL and disabled is false: full r/w mode
60 * (2) successor is NULL and disabled is true: read only mode ("disabled")
61 * (3) successor is set: frozen mode.
62 * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
63 * or enabled. A frozen bitmap can only abdicate() or reclaim().
64 */
65 struct BdrvDirtyBitmap {
66 HBitmap *bitmap; /* Dirty sector bitmap implementation */
67 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
68 char *name; /* Optional non-empty unique ID */
69 int64_t size; /* Size of the bitmap (Number of sectors) */
70 bool disabled; /* Bitmap is read-only */
71 QLIST_ENTRY(BdrvDirtyBitmap) list;
72 };
73
74 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
75
76 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
77 QTAILQ_HEAD_INITIALIZER(bdrv_states);
78
79 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
80 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
81
82 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
83 QLIST_HEAD_INITIALIZER(bdrv_drivers);
84
85 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
86 const char *reference, QDict *options, int flags,
87 BlockDriverState *parent,
88 const BdrvChildRole *child_role, Error **errp);
89
90 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
91 /* If non-zero, use only whitelisted block drivers */
92 static int use_bdrv_whitelist;
93
94 #ifdef _WIN32
95 static int is_windows_drive_prefix(const char *filename)
96 {
97 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
98 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
99 filename[1] == ':');
100 }
101
102 int is_windows_drive(const char *filename)
103 {
104 if (is_windows_drive_prefix(filename) &&
105 filename[2] == '\0')
106 return 1;
107 if (strstart(filename, "\\\\.\\", NULL) ||
108 strstart(filename, "//./", NULL))
109 return 1;
110 return 0;
111 }
112 #endif
113
114 size_t bdrv_opt_mem_align(BlockDriverState *bs)
115 {
116 if (!bs || !bs->drv) {
117 /* page size or 4k (hdd sector size) should be on the safe side */
118 return MAX(4096, getpagesize());
119 }
120
121 return bs->bl.opt_mem_alignment;
122 }
123
124 size_t bdrv_min_mem_align(BlockDriverState *bs)
125 {
126 if (!bs || !bs->drv) {
127 /* page size or 4k (hdd sector size) should be on the safe side */
128 return MAX(4096, getpagesize());
129 }
130
131 return bs->bl.min_mem_alignment;
132 }
133
134 /* check if the path starts with "<protocol>:" */
135 int path_has_protocol(const char *path)
136 {
137 const char *p;
138
139 #ifdef _WIN32
140 if (is_windows_drive(path) ||
141 is_windows_drive_prefix(path)) {
142 return 0;
143 }
144 p = path + strcspn(path, ":/\\");
145 #else
146 p = path + strcspn(path, ":/");
147 #endif
148
149 return *p == ':';
150 }
151
152 int path_is_absolute(const char *path)
153 {
154 #ifdef _WIN32
155 /* specific case for names like: "\\.\d:" */
156 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
157 return 1;
158 }
159 return (*path == '/' || *path == '\\');
160 #else
161 return (*path == '/');
162 #endif
163 }
164
165 /* if filename is absolute, just copy it to dest. Otherwise, build a
166 path to it by considering it is relative to base_path. URL are
167 supported. */
168 void path_combine(char *dest, int dest_size,
169 const char *base_path,
170 const char *filename)
171 {
172 const char *p, *p1;
173 int len;
174
175 if (dest_size <= 0)
176 return;
177 if (path_is_absolute(filename)) {
178 pstrcpy(dest, dest_size, filename);
179 } else {
180 p = strchr(base_path, ':');
181 if (p)
182 p++;
183 else
184 p = base_path;
185 p1 = strrchr(base_path, '/');
186 #ifdef _WIN32
187 {
188 const char *p2;
189 p2 = strrchr(base_path, '\\');
190 if (!p1 || p2 > p1)
191 p1 = p2;
192 }
193 #endif
194 if (p1)
195 p1++;
196 else
197 p1 = base_path;
198 if (p1 > p)
199 p = p1;
200 len = p - base_path;
201 if (len > dest_size - 1)
202 len = dest_size - 1;
203 memcpy(dest, base_path, len);
204 dest[len] = '\0';
205 pstrcat(dest, dest_size, filename);
206 }
207 }
208
209 void bdrv_get_full_backing_filename_from_filename(const char *backed,
210 const char *backing,
211 char *dest, size_t sz,
212 Error **errp)
213 {
214 if (backing[0] == '\0' || path_has_protocol(backing) ||
215 path_is_absolute(backing))
216 {
217 pstrcpy(dest, sz, backing);
218 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
219 error_setg(errp, "Cannot use relative backing file names for '%s'",
220 backed);
221 } else {
222 path_combine(dest, sz, backed, backing);
223 }
224 }
225
226 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
227 Error **errp)
228 {
229 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
230
231 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
232 dest, sz, errp);
233 }
234
235 void bdrv_register(BlockDriver *bdrv)
236 {
237 bdrv_setup_io_funcs(bdrv);
238
239 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
240 }
241
242 BlockDriverState *bdrv_new_root(void)
243 {
244 BlockDriverState *bs = bdrv_new();
245
246 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
247 return bs;
248 }
249
250 BlockDriverState *bdrv_new(void)
251 {
252 BlockDriverState *bs;
253 int i;
254
255 bs = g_new0(BlockDriverState, 1);
256 QLIST_INIT(&bs->dirty_bitmaps);
257 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
258 QLIST_INIT(&bs->op_blockers[i]);
259 }
260 notifier_list_init(&bs->close_notifiers);
261 notifier_with_return_list_init(&bs->before_write_notifiers);
262 qemu_co_queue_init(&bs->throttled_reqs[0]);
263 qemu_co_queue_init(&bs->throttled_reqs[1]);
264 bs->refcnt = 1;
265 bs->aio_context = qemu_get_aio_context();
266
267 return bs;
268 }
269
270 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
271 {
272 notifier_list_add(&bs->close_notifiers, notify);
273 }
274
275 BlockDriver *bdrv_find_format(const char *format_name)
276 {
277 BlockDriver *drv1;
278 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
279 if (!strcmp(drv1->format_name, format_name)) {
280 return drv1;
281 }
282 }
283 return NULL;
284 }
285
286 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
287 {
288 static const char *whitelist_rw[] = {
289 CONFIG_BDRV_RW_WHITELIST
290 };
291 static const char *whitelist_ro[] = {
292 CONFIG_BDRV_RO_WHITELIST
293 };
294 const char **p;
295
296 if (!whitelist_rw[0] && !whitelist_ro[0]) {
297 return 1; /* no whitelist, anything goes */
298 }
299
300 for (p = whitelist_rw; *p; p++) {
301 if (!strcmp(drv->format_name, *p)) {
302 return 1;
303 }
304 }
305 if (read_only) {
306 for (p = whitelist_ro; *p; p++) {
307 if (!strcmp(drv->format_name, *p)) {
308 return 1;
309 }
310 }
311 }
312 return 0;
313 }
314
315 typedef struct CreateCo {
316 BlockDriver *drv;
317 char *filename;
318 QemuOpts *opts;
319 int ret;
320 Error *err;
321 } CreateCo;
322
323 static void coroutine_fn bdrv_create_co_entry(void *opaque)
324 {
325 Error *local_err = NULL;
326 int ret;
327
328 CreateCo *cco = opaque;
329 assert(cco->drv);
330
331 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
332 if (local_err) {
333 error_propagate(&cco->err, local_err);
334 }
335 cco->ret = ret;
336 }
337
338 int bdrv_create(BlockDriver *drv, const char* filename,
339 QemuOpts *opts, Error **errp)
340 {
341 int ret;
342
343 Coroutine *co;
344 CreateCo cco = {
345 .drv = drv,
346 .filename = g_strdup(filename),
347 .opts = opts,
348 .ret = NOT_DONE,
349 .err = NULL,
350 };
351
352 if (!drv->bdrv_create) {
353 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
354 ret = -ENOTSUP;
355 goto out;
356 }
357
358 if (qemu_in_coroutine()) {
359 /* Fast-path if already in coroutine context */
360 bdrv_create_co_entry(&cco);
361 } else {
362 co = qemu_coroutine_create(bdrv_create_co_entry);
363 qemu_coroutine_enter(co, &cco);
364 while (cco.ret == NOT_DONE) {
365 aio_poll(qemu_get_aio_context(), true);
366 }
367 }
368
369 ret = cco.ret;
370 if (ret < 0) {
371 if (cco.err) {
372 error_propagate(errp, cco.err);
373 } else {
374 error_setg_errno(errp, -ret, "Could not create image");
375 }
376 }
377
378 out:
379 g_free(cco.filename);
380 return ret;
381 }
382
383 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
384 {
385 BlockDriver *drv;
386 Error *local_err = NULL;
387 int ret;
388
389 drv = bdrv_find_protocol(filename, true, errp);
390 if (drv == NULL) {
391 return -ENOENT;
392 }
393
394 ret = bdrv_create(drv, filename, opts, &local_err);
395 if (local_err) {
396 error_propagate(errp, local_err);
397 }
398 return ret;
399 }
400
401 /**
402 * Try to get @bs's logical and physical block size.
403 * On success, store them in @bsz struct and return 0.
404 * On failure return -errno.
405 * @bs must not be empty.
406 */
407 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
408 {
409 BlockDriver *drv = bs->drv;
410
411 if (drv && drv->bdrv_probe_blocksizes) {
412 return drv->bdrv_probe_blocksizes(bs, bsz);
413 }
414
415 return -ENOTSUP;
416 }
417
418 /**
419 * Try to get @bs's geometry (cyls, heads, sectors).
420 * On success, store them in @geo struct and return 0.
421 * On failure return -errno.
422 * @bs must not be empty.
423 */
424 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
425 {
426 BlockDriver *drv = bs->drv;
427
428 if (drv && drv->bdrv_probe_geometry) {
429 return drv->bdrv_probe_geometry(bs, geo);
430 }
431
432 return -ENOTSUP;
433 }
434
435 /*
436 * Create a uniquely-named empty temporary file.
437 * Return 0 upon success, otherwise a negative errno value.
438 */
439 int get_tmp_filename(char *filename, int size)
440 {
441 #ifdef _WIN32
442 char temp_dir[MAX_PATH];
443 /* GetTempFileName requires that its output buffer (4th param)
444 have length MAX_PATH or greater. */
445 assert(size >= MAX_PATH);
446 return (GetTempPath(MAX_PATH, temp_dir)
447 && GetTempFileName(temp_dir, "qem", 0, filename)
448 ? 0 : -GetLastError());
449 #else
450 int fd;
451 const char *tmpdir;
452 tmpdir = getenv("TMPDIR");
453 if (!tmpdir) {
454 tmpdir = "/var/tmp";
455 }
456 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
457 return -EOVERFLOW;
458 }
459 fd = mkstemp(filename);
460 if (fd < 0) {
461 return -errno;
462 }
463 if (close(fd) != 0) {
464 unlink(filename);
465 return -errno;
466 }
467 return 0;
468 #endif
469 }
470
471 /*
472 * Detect host devices. By convention, /dev/cdrom[N] is always
473 * recognized as a host CDROM.
474 */
475 static BlockDriver *find_hdev_driver(const char *filename)
476 {
477 int score_max = 0, score;
478 BlockDriver *drv = NULL, *d;
479
480 QLIST_FOREACH(d, &bdrv_drivers, list) {
481 if (d->bdrv_probe_device) {
482 score = d->bdrv_probe_device(filename);
483 if (score > score_max) {
484 score_max = score;
485 drv = d;
486 }
487 }
488 }
489
490 return drv;
491 }
492
493 BlockDriver *bdrv_find_protocol(const char *filename,
494 bool allow_protocol_prefix,
495 Error **errp)
496 {
497 BlockDriver *drv1;
498 char protocol[128];
499 int len;
500 const char *p;
501
502 /* TODO Drivers without bdrv_file_open must be specified explicitly */
503
504 /*
505 * XXX(hch): we really should not let host device detection
506 * override an explicit protocol specification, but moving this
507 * later breaks access to device names with colons in them.
508 * Thanks to the brain-dead persistent naming schemes on udev-
509 * based Linux systems those actually are quite common.
510 */
511 drv1 = find_hdev_driver(filename);
512 if (drv1) {
513 return drv1;
514 }
515
516 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
517 return &bdrv_file;
518 }
519
520 p = strchr(filename, ':');
521 assert(p != NULL);
522 len = p - filename;
523 if (len > sizeof(protocol) - 1)
524 len = sizeof(protocol) - 1;
525 memcpy(protocol, filename, len);
526 protocol[len] = '\0';
527 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
528 if (drv1->protocol_name &&
529 !strcmp(drv1->protocol_name, protocol)) {
530 return drv1;
531 }
532 }
533
534 error_setg(errp, "Unknown protocol '%s'", protocol);
535 return NULL;
536 }
537
538 /*
539 * Guess image format by probing its contents.
540 * This is not a good idea when your image is raw (CVE-2008-2004), but
541 * we do it anyway for backward compatibility.
542 *
543 * @buf contains the image's first @buf_size bytes.
544 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
545 * but can be smaller if the image file is smaller)
546 * @filename is its filename.
547 *
548 * For all block drivers, call the bdrv_probe() method to get its
549 * probing score.
550 * Return the first block driver with the highest probing score.
551 */
552 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
553 const char *filename)
554 {
555 int score_max = 0, score;
556 BlockDriver *drv = NULL, *d;
557
558 QLIST_FOREACH(d, &bdrv_drivers, list) {
559 if (d->bdrv_probe) {
560 score = d->bdrv_probe(buf, buf_size, filename);
561 if (score > score_max) {
562 score_max = score;
563 drv = d;
564 }
565 }
566 }
567
568 return drv;
569 }
570
571 static int find_image_format(BlockDriverState *bs, const char *filename,
572 BlockDriver **pdrv, Error **errp)
573 {
574 BlockDriver *drv;
575 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
576 int ret = 0;
577
578 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
579 if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
580 *pdrv = &bdrv_raw;
581 return ret;
582 }
583
584 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
585 if (ret < 0) {
586 error_setg_errno(errp, -ret, "Could not read image for determining its "
587 "format");
588 *pdrv = NULL;
589 return ret;
590 }
591
592 drv = bdrv_probe_all(buf, ret, filename);
593 if (!drv) {
594 error_setg(errp, "Could not determine image format: No compatible "
595 "driver found");
596 ret = -ENOENT;
597 }
598 *pdrv = drv;
599 return ret;
600 }
601
602 /**
603 * Set the current 'total_sectors' value
604 * Return 0 on success, -errno on error.
605 */
606 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
607 {
608 BlockDriver *drv = bs->drv;
609
610 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
611 if (bdrv_is_sg(bs))
612 return 0;
613
614 /* query actual device if possible, otherwise just trust the hint */
615 if (drv->bdrv_getlength) {
616 int64_t length = drv->bdrv_getlength(bs);
617 if (length < 0) {
618 return length;
619 }
620 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
621 }
622
623 bs->total_sectors = hint;
624 return 0;
625 }
626
627 /**
628 * Set open flags for a given discard mode
629 *
630 * Return 0 on success, -1 if the discard mode was invalid.
631 */
632 int bdrv_parse_discard_flags(const char *mode, int *flags)
633 {
634 *flags &= ~BDRV_O_UNMAP;
635
636 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
637 /* do nothing */
638 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
639 *flags |= BDRV_O_UNMAP;
640 } else {
641 return -1;
642 }
643
644 return 0;
645 }
646
647 /**
648 * Set open flags for a given cache mode
649 *
650 * Return 0 on success, -1 if the cache mode was invalid.
651 */
652 int bdrv_parse_cache_flags(const char *mode, int *flags)
653 {
654 *flags &= ~BDRV_O_CACHE_MASK;
655
656 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
657 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
658 } else if (!strcmp(mode, "directsync")) {
659 *flags |= BDRV_O_NOCACHE;
660 } else if (!strcmp(mode, "writeback")) {
661 *flags |= BDRV_O_CACHE_WB;
662 } else if (!strcmp(mode, "unsafe")) {
663 *flags |= BDRV_O_CACHE_WB;
664 *flags |= BDRV_O_NO_FLUSH;
665 } else if (!strcmp(mode, "writethrough")) {
666 /* this is the default */
667 } else {
668 return -1;
669 }
670
671 return 0;
672 }
673
674 /*
675 * Returns the flags that a temporary snapshot should get, based on the
676 * originally requested flags (the originally requested image will have flags
677 * like a backing file)
678 */
679 static int bdrv_temp_snapshot_flags(int flags)
680 {
681 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
682 }
683
684 /*
685 * Returns the flags that bs->file should get if a protocol driver is expected,
686 * based on the given flags for the parent BDS
687 */
688 static int bdrv_inherited_flags(int flags)
689 {
690 /* Enable protocol handling, disable format probing for bs->file */
691 flags |= BDRV_O_PROTOCOL;
692
693 /* Our block drivers take care to send flushes and respect unmap policy,
694 * so we can enable both unconditionally on lower layers. */
695 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
696
697 /* Clear flags that only apply to the top layer */
698 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
699
700 return flags;
701 }
702
703 const BdrvChildRole child_file = {
704 .inherit_flags = bdrv_inherited_flags,
705 };
706
707 /*
708 * Returns the flags that bs->file should get if the use of formats (and not
709 * only protocols) is permitted for it, based on the given flags for the parent
710 * BDS
711 */
712 static int bdrv_inherited_fmt_flags(int parent_flags)
713 {
714 int flags = child_file.inherit_flags(parent_flags);
715 return flags & ~BDRV_O_PROTOCOL;
716 }
717
718 const BdrvChildRole child_format = {
719 .inherit_flags = bdrv_inherited_fmt_flags,
720 };
721
722 /*
723 * Returns the flags that bs->backing should get, based on the given flags
724 * for the parent BDS
725 */
726 static int bdrv_backing_flags(int flags)
727 {
728 /* backing files always opened read-only */
729 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
730
731 /* snapshot=on is handled on the top layer */
732 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
733
734 return flags;
735 }
736
737 static const BdrvChildRole child_backing = {
738 .inherit_flags = bdrv_backing_flags,
739 };
740
741 static int bdrv_open_flags(BlockDriverState *bs, int flags)
742 {
743 int open_flags = flags | BDRV_O_CACHE_WB;
744
745 /*
746 * Clear flags that are internal to the block layer before opening the
747 * image.
748 */
749 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
750
751 /*
752 * Snapshots should be writable.
753 */
754 if (flags & BDRV_O_TEMPORARY) {
755 open_flags |= BDRV_O_RDWR;
756 }
757
758 return open_flags;
759 }
760
761 static void bdrv_assign_node_name(BlockDriverState *bs,
762 const char *node_name,
763 Error **errp)
764 {
765 char *gen_node_name = NULL;
766
767 if (!node_name) {
768 node_name = gen_node_name = id_generate(ID_BLOCK);
769 } else if (!id_wellformed(node_name)) {
770 /*
771 * Check for empty string or invalid characters, but not if it is
772 * generated (generated names use characters not available to the user)
773 */
774 error_setg(errp, "Invalid node name");
775 return;
776 }
777
778 /* takes care of avoiding namespaces collisions */
779 if (blk_by_name(node_name)) {
780 error_setg(errp, "node-name=%s is conflicting with a device id",
781 node_name);
782 goto out;
783 }
784
785 /* takes care of avoiding duplicates node names */
786 if (bdrv_find_node(node_name)) {
787 error_setg(errp, "Duplicate node name");
788 goto out;
789 }
790
791 /* copy node name into the bs and insert it into the graph list */
792 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
793 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
794 out:
795 g_free(gen_node_name);
796 }
797
798 static QemuOptsList bdrv_runtime_opts = {
799 .name = "bdrv_common",
800 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
801 .desc = {
802 {
803 .name = "node-name",
804 .type = QEMU_OPT_STRING,
805 .help = "Node name of the block device node",
806 },
807 { /* end of list */ }
808 },
809 };
810
811 /*
812 * Common part for opening disk images and files
813 *
814 * Removes all processed options from *options.
815 */
816 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
817 QDict *options, int flags, BlockDriver *drv, Error **errp)
818 {
819 int ret, open_flags;
820 const char *filename;
821 const char *node_name = NULL;
822 QemuOpts *opts;
823 Error *local_err = NULL;
824
825 assert(drv != NULL);
826 assert(bs->file == NULL);
827 assert(options != NULL && bs->options != options);
828
829 if (file != NULL) {
830 filename = file->bs->filename;
831 } else {
832 filename = qdict_get_try_str(options, "filename");
833 }
834
835 if (drv->bdrv_needs_filename && !filename) {
836 error_setg(errp, "The '%s' block driver requires a file name",
837 drv->format_name);
838 return -EINVAL;
839 }
840
841 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
842
843 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
844 qemu_opts_absorb_qdict(opts, options, &local_err);
845 if (local_err) {
846 error_propagate(errp, local_err);
847 ret = -EINVAL;
848 goto fail_opts;
849 }
850
851 node_name = qemu_opt_get(opts, "node-name");
852 bdrv_assign_node_name(bs, node_name, &local_err);
853 if (local_err) {
854 error_propagate(errp, local_err);
855 ret = -EINVAL;
856 goto fail_opts;
857 }
858
859 bs->request_alignment = 512;
860 bs->zero_beyond_eof = true;
861 open_flags = bdrv_open_flags(bs, flags);
862 bs->read_only = !(open_flags & BDRV_O_RDWR);
863
864 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
865 error_setg(errp,
866 !bs->read_only && bdrv_is_whitelisted(drv, true)
867 ? "Driver '%s' can only be used for read-only devices"
868 : "Driver '%s' is not whitelisted",
869 drv->format_name);
870 ret = -ENOTSUP;
871 goto fail_opts;
872 }
873
874 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
875 if (flags & BDRV_O_COPY_ON_READ) {
876 if (!bs->read_only) {
877 bdrv_enable_copy_on_read(bs);
878 } else {
879 error_setg(errp, "Can't use copy-on-read on read-only device");
880 ret = -EINVAL;
881 goto fail_opts;
882 }
883 }
884
885 if (filename != NULL) {
886 pstrcpy(bs->filename, sizeof(bs->filename), filename);
887 } else {
888 bs->filename[0] = '\0';
889 }
890 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
891
892 bs->drv = drv;
893 bs->opaque = g_malloc0(drv->instance_size);
894
895 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
896
897 /* Open the image, either directly or using a protocol */
898 if (drv->bdrv_file_open) {
899 assert(file == NULL);
900 assert(!drv->bdrv_needs_filename || filename != NULL);
901 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
902 } else {
903 if (file == NULL) {
904 error_setg(errp, "Can't use '%s' as a block driver for the "
905 "protocol level", drv->format_name);
906 ret = -EINVAL;
907 goto free_and_fail;
908 }
909 bs->file = file;
910 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
911 }
912
913 if (ret < 0) {
914 if (local_err) {
915 error_propagate(errp, local_err);
916 } else if (bs->filename[0]) {
917 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
918 } else {
919 error_setg_errno(errp, -ret, "Could not open image");
920 }
921 goto free_and_fail;
922 }
923
924 if (bs->encrypted) {
925 error_report("Encrypted images are deprecated");
926 error_printf("Support for them will be removed in a future release.\n"
927 "You can use 'qemu-img convert' to convert your image"
928 " to an unencrypted one.\n");
929 }
930
931 ret = refresh_total_sectors(bs, bs->total_sectors);
932 if (ret < 0) {
933 error_setg_errno(errp, -ret, "Could not refresh total sector count");
934 goto free_and_fail;
935 }
936
937 bdrv_refresh_limits(bs, &local_err);
938 if (local_err) {
939 error_propagate(errp, local_err);
940 ret = -EINVAL;
941 goto free_and_fail;
942 }
943
944 assert(bdrv_opt_mem_align(bs) != 0);
945 assert(bdrv_min_mem_align(bs) != 0);
946 assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
947
948 qemu_opts_del(opts);
949 return 0;
950
951 free_and_fail:
952 bs->file = NULL;
953 g_free(bs->opaque);
954 bs->opaque = NULL;
955 bs->drv = NULL;
956 fail_opts:
957 qemu_opts_del(opts);
958 return ret;
959 }
960
961 static QDict *parse_json_filename(const char *filename, Error **errp)
962 {
963 QObject *options_obj;
964 QDict *options;
965 int ret;
966
967 ret = strstart(filename, "json:", &filename);
968 assert(ret);
969
970 options_obj = qobject_from_json(filename);
971 if (!options_obj) {
972 error_setg(errp, "Could not parse the JSON options");
973 return NULL;
974 }
975
976 if (qobject_type(options_obj) != QTYPE_QDICT) {
977 qobject_decref(options_obj);
978 error_setg(errp, "Invalid JSON object given");
979 return NULL;
980 }
981
982 options = qobject_to_qdict(options_obj);
983 qdict_flatten(options);
984
985 return options;
986 }
987
988 /*
989 * Fills in default options for opening images and converts the legacy
990 * filename/flags pair to option QDict entries.
991 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
992 * block driver has been specified explicitly.
993 */
994 static int bdrv_fill_options(QDict **options, const char **pfilename,
995 int *flags, Error **errp)
996 {
997 const char *filename = *pfilename;
998 const char *drvname;
999 bool protocol = *flags & BDRV_O_PROTOCOL;
1000 bool parse_filename = false;
1001 BlockDriver *drv = NULL;
1002 Error *local_err = NULL;
1003
1004 /* Parse json: pseudo-protocol */
1005 if (filename && g_str_has_prefix(filename, "json:")) {
1006 QDict *json_options = parse_json_filename(filename, &local_err);
1007 if (local_err) {
1008 error_propagate(errp, local_err);
1009 return -EINVAL;
1010 }
1011
1012 /* Options given in the filename have lower priority than options
1013 * specified directly */
1014 qdict_join(*options, json_options, false);
1015 QDECREF(json_options);
1016 *pfilename = filename = NULL;
1017 }
1018
1019 drvname = qdict_get_try_str(*options, "driver");
1020 if (drvname) {
1021 drv = bdrv_find_format(drvname);
1022 if (!drv) {
1023 error_setg(errp, "Unknown driver '%s'", drvname);
1024 return -ENOENT;
1025 }
1026 /* If the user has explicitly specified the driver, this choice should
1027 * override the BDRV_O_PROTOCOL flag */
1028 protocol = drv->bdrv_file_open;
1029 }
1030
1031 if (protocol) {
1032 *flags |= BDRV_O_PROTOCOL;
1033 } else {
1034 *flags &= ~BDRV_O_PROTOCOL;
1035 }
1036
1037 /* Fetch the file name from the options QDict if necessary */
1038 if (protocol && filename) {
1039 if (!qdict_haskey(*options, "filename")) {
1040 qdict_put(*options, "filename", qstring_from_str(filename));
1041 parse_filename = true;
1042 } else {
1043 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1044 "the same time");
1045 return -EINVAL;
1046 }
1047 }
1048
1049 /* Find the right block driver */
1050 filename = qdict_get_try_str(*options, "filename");
1051
1052 if (!drvname && protocol) {
1053 if (filename) {
1054 drv = bdrv_find_protocol(filename, parse_filename, errp);
1055 if (!drv) {
1056 return -EINVAL;
1057 }
1058
1059 drvname = drv->format_name;
1060 qdict_put(*options, "driver", qstring_from_str(drvname));
1061 } else {
1062 error_setg(errp, "Must specify either driver or file");
1063 return -EINVAL;
1064 }
1065 }
1066
1067 assert(drv || !protocol);
1068
1069 /* Driver-specific filename parsing */
1070 if (drv && drv->bdrv_parse_filename && parse_filename) {
1071 drv->bdrv_parse_filename(filename, *options, &local_err);
1072 if (local_err) {
1073 error_propagate(errp, local_err);
1074 return -EINVAL;
1075 }
1076
1077 if (!drv->bdrv_needs_filename) {
1078 qdict_del(*options, "filename");
1079 }
1080 }
1081
1082 if (runstate_check(RUN_STATE_INMIGRATE)) {
1083 *flags |= BDRV_O_INCOMING;
1084 }
1085
1086 return 0;
1087 }
1088
1089 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1090 BlockDriverState *child_bs,
1091 const BdrvChildRole *child_role)
1092 {
1093 BdrvChild *child = g_new(BdrvChild, 1);
1094 *child = (BdrvChild) {
1095 .bs = child_bs,
1096 .role = child_role,
1097 };
1098
1099 QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1100 QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1101
1102 return child;
1103 }
1104
1105 static void bdrv_detach_child(BdrvChild *child)
1106 {
1107 QLIST_REMOVE(child, next);
1108 QLIST_REMOVE(child, next_parent);
1109 g_free(child);
1110 }
1111
1112 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1113 {
1114 BlockDriverState *child_bs;
1115
1116 if (child == NULL) {
1117 return;
1118 }
1119
1120 if (child->bs->inherits_from == parent) {
1121 child->bs->inherits_from = NULL;
1122 }
1123
1124 child_bs = child->bs;
1125 bdrv_detach_child(child);
1126 bdrv_unref(child_bs);
1127 }
1128
1129 /*
1130 * Sets the backing file link of a BDS. A new reference is created; callers
1131 * which don't need their own reference any more must call bdrv_unref().
1132 */
1133 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1134 {
1135 if (backing_hd) {
1136 bdrv_ref(backing_hd);
1137 }
1138
1139 if (bs->backing) {
1140 assert(bs->backing_blocker);
1141 bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1142 bdrv_unref_child(bs, bs->backing);
1143 } else if (backing_hd) {
1144 error_setg(&bs->backing_blocker,
1145 "node is used as backing hd of '%s'",
1146 bdrv_get_device_or_node_name(bs));
1147 }
1148
1149 if (!backing_hd) {
1150 error_free(bs->backing_blocker);
1151 bs->backing_blocker = NULL;
1152 bs->backing = NULL;
1153 goto out;
1154 }
1155 bs->backing = bdrv_attach_child(bs, backing_hd, &child_backing);
1156 bs->open_flags &= ~BDRV_O_NO_BACKING;
1157 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1158 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1159 backing_hd->drv ? backing_hd->drv->format_name : "");
1160
1161 bdrv_op_block_all(backing_hd, bs->backing_blocker);
1162 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1163 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1164 bs->backing_blocker);
1165 out:
1166 bdrv_refresh_limits(bs, NULL);
1167 }
1168
1169 /*
1170 * Opens the backing file for a BlockDriverState if not yet open
1171 *
1172 * options is a QDict of options to pass to the block drivers, or NULL for an
1173 * empty set of options. The reference to the QDict is transferred to this
1174 * function (even on failure), so if the caller intends to reuse the dictionary,
1175 * it needs to use QINCREF() before calling bdrv_file_open.
1176 */
1177 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1178 {
1179 char *backing_filename = g_malloc0(PATH_MAX);
1180 int ret = 0;
1181 BlockDriverState *backing_hd;
1182 Error *local_err = NULL;
1183
1184 if (bs->backing != NULL) {
1185 QDECREF(options);
1186 goto free_exit;
1187 }
1188
1189 /* NULL means an empty set of options */
1190 if (options == NULL) {
1191 options = qdict_new();
1192 }
1193
1194 bs->open_flags &= ~BDRV_O_NO_BACKING;
1195 if (qdict_haskey(options, "file.filename")) {
1196 backing_filename[0] = '\0';
1197 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1198 QDECREF(options);
1199 goto free_exit;
1200 } else {
1201 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1202 &local_err);
1203 if (local_err) {
1204 ret = -EINVAL;
1205 error_propagate(errp, local_err);
1206 QDECREF(options);
1207 goto free_exit;
1208 }
1209 }
1210
1211 if (!bs->drv || !bs->drv->supports_backing) {
1212 ret = -EINVAL;
1213 error_setg(errp, "Driver doesn't support backing files");
1214 QDECREF(options);
1215 goto free_exit;
1216 }
1217
1218 backing_hd = bdrv_new();
1219
1220 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1221 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1222 }
1223
1224 assert(bs->backing == NULL);
1225 ret = bdrv_open_inherit(&backing_hd,
1226 *backing_filename ? backing_filename : NULL,
1227 NULL, options, 0, bs, &child_backing, &local_err);
1228 if (ret < 0) {
1229 bdrv_unref(backing_hd);
1230 backing_hd = NULL;
1231 bs->open_flags |= BDRV_O_NO_BACKING;
1232 error_setg(errp, "Could not open backing file: %s",
1233 error_get_pretty(local_err));
1234 error_free(local_err);
1235 goto free_exit;
1236 }
1237
1238 /* Hook up the backing file link; drop our reference, bs owns the
1239 * backing_hd reference now */
1240 bdrv_set_backing_hd(bs, backing_hd);
1241 bdrv_unref(backing_hd);
1242
1243 free_exit:
1244 g_free(backing_filename);
1245 return ret;
1246 }
1247
1248 /*
1249 * Opens a disk image whose options are given as BlockdevRef in another block
1250 * device's options.
1251 *
1252 * If allow_none is true, no image will be opened if filename is false and no
1253 * BlockdevRef is given. NULL will be returned, but errp remains unset.
1254 *
1255 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1256 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1257 * itself, all options starting with "${bdref_key}." are considered part of the
1258 * BlockdevRef.
1259 *
1260 * The BlockdevRef will be removed from the options QDict.
1261 */
1262 BdrvChild *bdrv_open_child(const char *filename,
1263 QDict *options, const char *bdref_key,
1264 BlockDriverState* parent,
1265 const BdrvChildRole *child_role,
1266 bool allow_none, Error **errp)
1267 {
1268 BdrvChild *c = NULL;
1269 BlockDriverState *bs;
1270 QDict *image_options;
1271 int ret;
1272 char *bdref_key_dot;
1273 const char *reference;
1274
1275 assert(child_role != NULL);
1276
1277 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1278 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1279 g_free(bdref_key_dot);
1280
1281 reference = qdict_get_try_str(options, bdref_key);
1282 if (!filename && !reference && !qdict_size(image_options)) {
1283 if (!allow_none) {
1284 error_setg(errp, "A block device must be specified for \"%s\"",
1285 bdref_key);
1286 }
1287 QDECREF(image_options);
1288 goto done;
1289 }
1290
1291 bs = NULL;
1292 ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1293 parent, child_role, errp);
1294 if (ret < 0) {
1295 goto done;
1296 }
1297
1298 c = bdrv_attach_child(parent, bs, child_role);
1299
1300 done:
1301 qdict_del(options, bdref_key);
1302 return c;
1303 }
1304
1305 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1306 {
1307 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1308 char *tmp_filename = g_malloc0(PATH_MAX + 1);
1309 int64_t total_size;
1310 QemuOpts *opts = NULL;
1311 QDict *snapshot_options;
1312 BlockDriverState *bs_snapshot;
1313 Error *local_err = NULL;
1314 int ret;
1315
1316 /* if snapshot, we create a temporary backing file and open it
1317 instead of opening 'filename' directly */
1318
1319 /* Get the required size from the image */
1320 total_size = bdrv_getlength(bs);
1321 if (total_size < 0) {
1322 ret = total_size;
1323 error_setg_errno(errp, -total_size, "Could not get image size");
1324 goto out;
1325 }
1326
1327 /* Create the temporary image */
1328 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1329 if (ret < 0) {
1330 error_setg_errno(errp, -ret, "Could not get temporary filename");
1331 goto out;
1332 }
1333
1334 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1335 &error_abort);
1336 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1337 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1338 qemu_opts_del(opts);
1339 if (ret < 0) {
1340 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1341 "'%s': %s", tmp_filename,
1342 error_get_pretty(local_err));
1343 error_free(local_err);
1344 goto out;
1345 }
1346
1347 /* Prepare a new options QDict for the temporary file */
1348 snapshot_options = qdict_new();
1349 qdict_put(snapshot_options, "file.driver",
1350 qstring_from_str("file"));
1351 qdict_put(snapshot_options, "file.filename",
1352 qstring_from_str(tmp_filename));
1353 qdict_put(snapshot_options, "driver",
1354 qstring_from_str("qcow2"));
1355
1356 bs_snapshot = bdrv_new();
1357
1358 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1359 flags, &local_err);
1360 if (ret < 0) {
1361 error_propagate(errp, local_err);
1362 goto out;
1363 }
1364
1365 bdrv_append(bs_snapshot, bs);
1366
1367 out:
1368 g_free(tmp_filename);
1369 return ret;
1370 }
1371
1372 /*
1373 * Opens a disk image (raw, qcow2, vmdk, ...)
1374 *
1375 * options is a QDict of options to pass to the block drivers, or NULL for an
1376 * empty set of options. The reference to the QDict belongs to the block layer
1377 * after the call (even on failure), so if the caller intends to reuse the
1378 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1379 *
1380 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1381 * If it is not NULL, the referenced BDS will be reused.
1382 *
1383 * The reference parameter may be used to specify an existing block device which
1384 * should be opened. If specified, neither options nor a filename may be given,
1385 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1386 */
1387 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1388 const char *reference, QDict *options, int flags,
1389 BlockDriverState *parent,
1390 const BdrvChildRole *child_role, Error **errp)
1391 {
1392 int ret;
1393 BdrvChild *file = NULL;
1394 BlockDriverState *bs;
1395 BlockDriver *drv = NULL;
1396 const char *drvname;
1397 Error *local_err = NULL;
1398 int snapshot_flags = 0;
1399
1400 assert(pbs);
1401 assert(!child_role || !flags);
1402 assert(!child_role == !parent);
1403
1404 if (reference) {
1405 bool options_non_empty = options ? qdict_size(options) : false;
1406 QDECREF(options);
1407
1408 if (*pbs) {
1409 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1410 "another block device");
1411 return -EINVAL;
1412 }
1413
1414 if (filename || options_non_empty) {
1415 error_setg(errp, "Cannot reference an existing block device with "
1416 "additional options or a new filename");
1417 return -EINVAL;
1418 }
1419
1420 bs = bdrv_lookup_bs(reference, reference, errp);
1421 if (!bs) {
1422 return -ENODEV;
1423 }
1424 bdrv_ref(bs);
1425 *pbs = bs;
1426 return 0;
1427 }
1428
1429 if (*pbs) {
1430 bs = *pbs;
1431 } else {
1432 bs = bdrv_new();
1433 }
1434
1435 /* NULL means an empty set of options */
1436 if (options == NULL) {
1437 options = qdict_new();
1438 }
1439
1440 if (child_role) {
1441 bs->inherits_from = parent;
1442 flags = child_role->inherit_flags(parent->open_flags);
1443 }
1444
1445 ret = bdrv_fill_options(&options, &filename, &flags, &local_err);
1446 if (local_err) {
1447 goto fail;
1448 }
1449
1450 /* Find the right image format driver */
1451 drvname = qdict_get_try_str(options, "driver");
1452 if (drvname) {
1453 drv = bdrv_find_format(drvname);
1454 qdict_del(options, "driver");
1455 if (!drv) {
1456 error_setg(errp, "Unknown driver: '%s'", drvname);
1457 ret = -EINVAL;
1458 goto fail;
1459 }
1460 }
1461
1462 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1463
1464 bs->open_flags = flags;
1465 bs->options = options;
1466 options = qdict_clone_shallow(options);
1467
1468 /* Open image file without format layer */
1469 if ((flags & BDRV_O_PROTOCOL) == 0) {
1470 if (flags & BDRV_O_RDWR) {
1471 flags |= BDRV_O_ALLOW_RDWR;
1472 }
1473 if (flags & BDRV_O_SNAPSHOT) {
1474 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1475 flags = bdrv_backing_flags(flags);
1476 }
1477
1478 bs->open_flags = flags;
1479
1480 file = bdrv_open_child(filename, options, "file", bs,
1481 &child_file, true, &local_err);
1482 if (local_err) {
1483 ret = -EINVAL;
1484 goto fail;
1485 }
1486 }
1487
1488 /* Image format probing */
1489 bs->probed = !drv;
1490 if (!drv && file) {
1491 ret = find_image_format(file->bs, filename, &drv, &local_err);
1492 if (ret < 0) {
1493 goto fail;
1494 }
1495 } else if (!drv) {
1496 error_setg(errp, "Must specify either driver or file");
1497 ret = -EINVAL;
1498 goto fail;
1499 }
1500
1501 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1502 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1503 /* file must be NULL if a protocol BDS is about to be created
1504 * (the inverse results in an error message from bdrv_open_common()) */
1505 assert(!(flags & BDRV_O_PROTOCOL) || !file);
1506
1507 /* Open the image */
1508 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1509 if (ret < 0) {
1510 goto fail;
1511 }
1512
1513 if (file && (bs->file != file)) {
1514 bdrv_unref_child(bs, file);
1515 file = NULL;
1516 }
1517
1518 /* If there is a backing file, use it */
1519 if ((flags & BDRV_O_NO_BACKING) == 0) {
1520 QDict *backing_options;
1521
1522 qdict_extract_subqdict(options, &backing_options, "backing.");
1523 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1524 if (ret < 0) {
1525 goto close_and_fail;
1526 }
1527 }
1528
1529 bdrv_refresh_filename(bs);
1530
1531 /* Check if any unknown options were used */
1532 if (options && (qdict_size(options) != 0)) {
1533 const QDictEntry *entry = qdict_first(options);
1534 if (flags & BDRV_O_PROTOCOL) {
1535 error_setg(errp, "Block protocol '%s' doesn't support the option "
1536 "'%s'", drv->format_name, entry->key);
1537 } else {
1538 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1539 "support the option '%s'", drv->format_name,
1540 bdrv_get_device_name(bs), entry->key);
1541 }
1542
1543 ret = -EINVAL;
1544 goto close_and_fail;
1545 }
1546
1547 if (!bdrv_key_required(bs)) {
1548 if (bs->blk) {
1549 blk_dev_change_media_cb(bs->blk, true);
1550 }
1551 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1552 && !runstate_check(RUN_STATE_INMIGRATE)
1553 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1554 error_setg(errp,
1555 "Guest must be stopped for opening of encrypted image");
1556 ret = -EBUSY;
1557 goto close_and_fail;
1558 }
1559
1560 QDECREF(options);
1561 *pbs = bs;
1562
1563 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1564 * temporary snapshot afterwards. */
1565 if (snapshot_flags) {
1566 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1567 if (local_err) {
1568 goto close_and_fail;
1569 }
1570 }
1571
1572 return 0;
1573
1574 fail:
1575 if (file != NULL) {
1576 bdrv_unref_child(bs, file);
1577 }
1578 QDECREF(bs->options);
1579 QDECREF(options);
1580 bs->options = NULL;
1581 if (!*pbs) {
1582 /* If *pbs is NULL, a new BDS has been created in this function and
1583 needs to be freed now. Otherwise, it does not need to be closed,
1584 since it has not really been opened yet. */
1585 bdrv_unref(bs);
1586 }
1587 if (local_err) {
1588 error_propagate(errp, local_err);
1589 }
1590 return ret;
1591
1592 close_and_fail:
1593 /* See fail path, but now the BDS has to be always closed */
1594 if (*pbs) {
1595 bdrv_close(bs);
1596 } else {
1597 bdrv_unref(bs);
1598 }
1599 QDECREF(options);
1600 if (local_err) {
1601 error_propagate(errp, local_err);
1602 }
1603 return ret;
1604 }
1605
1606 int bdrv_open(BlockDriverState **pbs, const char *filename,
1607 const char *reference, QDict *options, int flags, Error **errp)
1608 {
1609 return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1610 NULL, errp);
1611 }
1612
1613 typedef struct BlockReopenQueueEntry {
1614 bool prepared;
1615 BDRVReopenState state;
1616 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1617 } BlockReopenQueueEntry;
1618
1619 /*
1620 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1621 * reopen of multiple devices.
1622 *
1623 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1624 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1625 * be created and initialized. This newly created BlockReopenQueue should be
1626 * passed back in for subsequent calls that are intended to be of the same
1627 * atomic 'set'.
1628 *
1629 * bs is the BlockDriverState to add to the reopen queue.
1630 *
1631 * options contains the changed options for the associated bs
1632 * (the BlockReopenQueue takes ownership)
1633 *
1634 * flags contains the open flags for the associated bs
1635 *
1636 * returns a pointer to bs_queue, which is either the newly allocated
1637 * bs_queue, or the existing bs_queue being used.
1638 *
1639 */
1640 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1641 BlockDriverState *bs,
1642 QDict *options, int flags)
1643 {
1644 assert(bs != NULL);
1645
1646 BlockReopenQueueEntry *bs_entry;
1647 BdrvChild *child;
1648 QDict *old_options;
1649
1650 if (bs_queue == NULL) {
1651 bs_queue = g_new0(BlockReopenQueue, 1);
1652 QSIMPLEQ_INIT(bs_queue);
1653 }
1654
1655 if (!options) {
1656 options = qdict_new();
1657 }
1658
1659 old_options = qdict_clone_shallow(bs->options);
1660 qdict_join(options, old_options, false);
1661 QDECREF(old_options);
1662
1663 /* bdrv_open() masks this flag out */
1664 flags &= ~BDRV_O_PROTOCOL;
1665
1666 QLIST_FOREACH(child, &bs->children, next) {
1667 int child_flags;
1668
1669 if (child->bs->inherits_from != bs) {
1670 continue;
1671 }
1672
1673 child_flags = child->role->inherit_flags(flags);
1674 /* TODO Pass down child flags (backing.*, extents.*, ...) */
1675 bdrv_reopen_queue(bs_queue, child->bs, NULL, child_flags);
1676 }
1677
1678 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1679 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1680
1681 bs_entry->state.bs = bs;
1682 bs_entry->state.options = options;
1683 bs_entry->state.flags = flags;
1684
1685 return bs_queue;
1686 }
1687
1688 /*
1689 * Reopen multiple BlockDriverStates atomically & transactionally.
1690 *
1691 * The queue passed in (bs_queue) must have been built up previous
1692 * via bdrv_reopen_queue().
1693 *
1694 * Reopens all BDS specified in the queue, with the appropriate
1695 * flags. All devices are prepared for reopen, and failure of any
1696 * device will cause all device changes to be abandonded, and intermediate
1697 * data cleaned up.
1698 *
1699 * If all devices prepare successfully, then the changes are committed
1700 * to all devices.
1701 *
1702 */
1703 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1704 {
1705 int ret = -1;
1706 BlockReopenQueueEntry *bs_entry, *next;
1707 Error *local_err = NULL;
1708
1709 assert(bs_queue != NULL);
1710
1711 bdrv_drain_all();
1712
1713 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1714 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1715 error_propagate(errp, local_err);
1716 goto cleanup;
1717 }
1718 bs_entry->prepared = true;
1719 }
1720
1721 /* If we reach this point, we have success and just need to apply the
1722 * changes
1723 */
1724 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1725 bdrv_reopen_commit(&bs_entry->state);
1726 }
1727
1728 ret = 0;
1729
1730 cleanup:
1731 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1732 if (ret && bs_entry->prepared) {
1733 bdrv_reopen_abort(&bs_entry->state);
1734 }
1735 QDECREF(bs_entry->state.options);
1736 g_free(bs_entry);
1737 }
1738 g_free(bs_queue);
1739 return ret;
1740 }
1741
1742
1743 /* Reopen a single BlockDriverState with the specified flags. */
1744 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1745 {
1746 int ret = -1;
1747 Error *local_err = NULL;
1748 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1749
1750 ret = bdrv_reopen_multiple(queue, &local_err);
1751 if (local_err != NULL) {
1752 error_propagate(errp, local_err);
1753 }
1754 return ret;
1755 }
1756
1757
1758 /*
1759 * Prepares a BlockDriverState for reopen. All changes are staged in the
1760 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1761 * the block driver layer .bdrv_reopen_prepare()
1762 *
1763 * bs is the BlockDriverState to reopen
1764 * flags are the new open flags
1765 * queue is the reopen queue
1766 *
1767 * Returns 0 on success, non-zero on error. On error errp will be set
1768 * as well.
1769 *
1770 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1771 * It is the responsibility of the caller to then call the abort() or
1772 * commit() for any other BDS that have been left in a prepare() state
1773 *
1774 */
1775 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1776 Error **errp)
1777 {
1778 int ret = -1;
1779 Error *local_err = NULL;
1780 BlockDriver *drv;
1781
1782 assert(reopen_state != NULL);
1783 assert(reopen_state->bs->drv != NULL);
1784 drv = reopen_state->bs->drv;
1785
1786 /* if we are to stay read-only, do not allow permission change
1787 * to r/w */
1788 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1789 reopen_state->flags & BDRV_O_RDWR) {
1790 error_setg(errp, "Node '%s' is read only",
1791 bdrv_get_device_or_node_name(reopen_state->bs));
1792 goto error;
1793 }
1794
1795
1796 ret = bdrv_flush(reopen_state->bs);
1797 if (ret) {
1798 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1799 strerror(-ret));
1800 goto error;
1801 }
1802
1803 if (drv->bdrv_reopen_prepare) {
1804 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1805 if (ret) {
1806 if (local_err != NULL) {
1807 error_propagate(errp, local_err);
1808 } else {
1809 error_setg(errp, "failed while preparing to reopen image '%s'",
1810 reopen_state->bs->filename);
1811 }
1812 goto error;
1813 }
1814 } else {
1815 /* It is currently mandatory to have a bdrv_reopen_prepare()
1816 * handler for each supported drv. */
1817 error_setg(errp, "Block format '%s' used by node '%s' "
1818 "does not support reopening files", drv->format_name,
1819 bdrv_get_device_or_node_name(reopen_state->bs));
1820 ret = -1;
1821 goto error;
1822 }
1823
1824 /* Options that are not handled are only okay if they are unchanged
1825 * compared to the old state. It is expected that some options are only
1826 * used for the initial open, but not reopen (e.g. filename) */
1827 if (qdict_size(reopen_state->options)) {
1828 const QDictEntry *entry = qdict_first(reopen_state->options);
1829
1830 do {
1831 QString *new_obj = qobject_to_qstring(entry->value);
1832 const char *new = qstring_get_str(new_obj);
1833 const char *old = qdict_get_try_str(reopen_state->bs->options,
1834 entry->key);
1835
1836 if (!old || strcmp(new, old)) {
1837 error_setg(errp, "Cannot change the option '%s'", entry->key);
1838 ret = -EINVAL;
1839 goto error;
1840 }
1841 } while ((entry = qdict_next(reopen_state->options, entry)));
1842 }
1843
1844 ret = 0;
1845
1846 error:
1847 return ret;
1848 }
1849
1850 /*
1851 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1852 * makes them final by swapping the staging BlockDriverState contents into
1853 * the active BlockDriverState contents.
1854 */
1855 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1856 {
1857 BlockDriver *drv;
1858
1859 assert(reopen_state != NULL);
1860 drv = reopen_state->bs->drv;
1861 assert(drv != NULL);
1862
1863 /* If there are any driver level actions to take */
1864 if (drv->bdrv_reopen_commit) {
1865 drv->bdrv_reopen_commit(reopen_state);
1866 }
1867
1868 /* set BDS specific flags now */
1869 reopen_state->bs->open_flags = reopen_state->flags;
1870 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1871 BDRV_O_CACHE_WB);
1872 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1873
1874 bdrv_refresh_limits(reopen_state->bs, NULL);
1875 }
1876
1877 /*
1878 * Abort the reopen, and delete and free the staged changes in
1879 * reopen_state
1880 */
1881 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1882 {
1883 BlockDriver *drv;
1884
1885 assert(reopen_state != NULL);
1886 drv = reopen_state->bs->drv;
1887 assert(drv != NULL);
1888
1889 if (drv->bdrv_reopen_abort) {
1890 drv->bdrv_reopen_abort(reopen_state);
1891 }
1892 }
1893
1894
1895 void bdrv_close(BlockDriverState *bs)
1896 {
1897 BdrvAioNotifier *ban, *ban_next;
1898
1899 if (bs->job) {
1900 block_job_cancel_sync(bs->job);
1901 }
1902
1903 /* Disable I/O limits and drain all pending throttled requests */
1904 if (bs->io_limits_enabled) {
1905 bdrv_io_limits_disable(bs);
1906 }
1907
1908 bdrv_drain(bs); /* complete I/O */
1909 bdrv_flush(bs);
1910 bdrv_drain(bs); /* in case flush left pending I/O */
1911 notifier_list_notify(&bs->close_notifiers, bs);
1912
1913 if (bs->blk) {
1914 blk_dev_change_media_cb(bs->blk, false);
1915 }
1916
1917 if (bs->drv) {
1918 BdrvChild *child, *next;
1919
1920 bs->drv->bdrv_close(bs);
1921 bs->drv = NULL;
1922
1923 bdrv_set_backing_hd(bs, NULL);
1924
1925 if (bs->file != NULL) {
1926 bdrv_unref_child(bs, bs->file);
1927 bs->file = NULL;
1928 }
1929
1930 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
1931 /* TODO Remove bdrv_unref() from drivers' close function and use
1932 * bdrv_unref_child() here */
1933 if (child->bs->inherits_from == bs) {
1934 child->bs->inherits_from = NULL;
1935 }
1936 bdrv_detach_child(child);
1937 }
1938
1939 g_free(bs->opaque);
1940 bs->opaque = NULL;
1941 bs->copy_on_read = 0;
1942 bs->backing_file[0] = '\0';
1943 bs->backing_format[0] = '\0';
1944 bs->total_sectors = 0;
1945 bs->encrypted = 0;
1946 bs->valid_key = 0;
1947 bs->sg = 0;
1948 bs->zero_beyond_eof = false;
1949 QDECREF(bs->options);
1950 bs->options = NULL;
1951 QDECREF(bs->full_open_options);
1952 bs->full_open_options = NULL;
1953 }
1954
1955 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1956 g_free(ban);
1957 }
1958 QLIST_INIT(&bs->aio_notifiers);
1959 }
1960
1961 void bdrv_close_all(void)
1962 {
1963 BlockDriverState *bs;
1964
1965 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1966 AioContext *aio_context = bdrv_get_aio_context(bs);
1967
1968 aio_context_acquire(aio_context);
1969 bdrv_close(bs);
1970 aio_context_release(aio_context);
1971 }
1972 }
1973
1974 /* make a BlockDriverState anonymous by removing from bdrv_state and
1975 * graph_bdrv_state list.
1976 Also, NULL terminate the device_name to prevent double remove */
1977 void bdrv_make_anon(BlockDriverState *bs)
1978 {
1979 /*
1980 * Take care to remove bs from bdrv_states only when it's actually
1981 * in it. Note that bs->device_list.tqe_prev is initially null,
1982 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
1983 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1984 * resetting it to null on remove.
1985 */
1986 if (bs->device_list.tqe_prev) {
1987 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1988 bs->device_list.tqe_prev = NULL;
1989 }
1990 if (bs->node_name[0] != '\0') {
1991 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1992 }
1993 bs->node_name[0] = '\0';
1994 }
1995
1996 /* Fields that need to stay with the top-level BDS */
1997 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1998 BlockDriverState *bs_src)
1999 {
2000 /* move some fields that need to stay attached to the device */
2001
2002 /* dev info */
2003 bs_dest->copy_on_read = bs_src->copy_on_read;
2004
2005 bs_dest->enable_write_cache = bs_src->enable_write_cache;
2006
2007 /* dirty bitmap */
2008 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
2009 }
2010
2011 static void change_parent_backing_link(BlockDriverState *from,
2012 BlockDriverState *to)
2013 {
2014 BdrvChild *c, *next;
2015
2016 QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2017 assert(c->role != &child_backing);
2018 c->bs = to;
2019 QLIST_REMOVE(c, next_parent);
2020 QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2021 bdrv_ref(to);
2022 bdrv_unref(from);
2023 }
2024 if (from->blk) {
2025 blk_set_bs(from->blk, to);
2026 if (!to->device_list.tqe_prev) {
2027 QTAILQ_INSERT_BEFORE(from, to, device_list);
2028 }
2029 QTAILQ_REMOVE(&bdrv_states, from, device_list);
2030 }
2031 }
2032
2033 static void swap_feature_fields(BlockDriverState *bs_top,
2034 BlockDriverState *bs_new)
2035 {
2036 BlockDriverState tmp;
2037
2038 bdrv_move_feature_fields(&tmp, bs_top);
2039 bdrv_move_feature_fields(bs_top, bs_new);
2040 bdrv_move_feature_fields(bs_new, &tmp);
2041
2042 assert(!bs_new->throttle_state);
2043 if (bs_top->throttle_state) {
2044 assert(bs_top->io_limits_enabled);
2045 bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2046 bdrv_io_limits_disable(bs_top);
2047 }
2048 }
2049
2050 /*
2051 * Add new bs contents at the top of an image chain while the chain is
2052 * live, while keeping required fields on the top layer.
2053 *
2054 * This will modify the BlockDriverState fields, and swap contents
2055 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2056 *
2057 * bs_new must not be attached to a BlockBackend.
2058 *
2059 * This function does not create any image files.
2060 *
2061 * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2062 * that's what the callers commonly need. bs_new will be referenced by the old
2063 * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2064 * reference of its own, it must call bdrv_ref().
2065 */
2066 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2067 {
2068 assert(!bdrv_requests_pending(bs_top));
2069 assert(!bdrv_requests_pending(bs_new));
2070
2071 bdrv_ref(bs_top);
2072 change_parent_backing_link(bs_top, bs_new);
2073
2074 /* Some fields always stay on top of the backing file chain */
2075 swap_feature_fields(bs_top, bs_new);
2076
2077 bdrv_set_backing_hd(bs_new, bs_top);
2078 bdrv_unref(bs_top);
2079
2080 /* bs_new is now referenced by its new parents, we don't need the
2081 * additional reference any more. */
2082 bdrv_unref(bs_new);
2083 }
2084
2085 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2086 {
2087 assert(!bdrv_requests_pending(old));
2088 assert(!bdrv_requests_pending(new));
2089
2090 bdrv_ref(old);
2091
2092 if (old->blk) {
2093 /* As long as these fields aren't in BlockBackend, but in the top-level
2094 * BlockDriverState, it's not possible for a BDS to have two BBs.
2095 *
2096 * We really want to copy the fields from old to new, but we go for a
2097 * swap instead so that pointers aren't duplicated and cause trouble.
2098 * (Also, bdrv_swap() used to do the same.) */
2099 assert(!new->blk);
2100 swap_feature_fields(old, new);
2101 }
2102 change_parent_backing_link(old, new);
2103
2104 /* Change backing files if a previously independent node is added to the
2105 * chain. For active commit, we replace top by its own (indirect) backing
2106 * file and don't do anything here so we don't build a loop. */
2107 if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2108 bdrv_set_backing_hd(new, backing_bs(old));
2109 bdrv_set_backing_hd(old, NULL);
2110 }
2111
2112 bdrv_unref(old);
2113 }
2114
2115 static void bdrv_delete(BlockDriverState *bs)
2116 {
2117 assert(!bs->job);
2118 assert(bdrv_op_blocker_is_empty(bs));
2119 assert(!bs->refcnt);
2120 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2121
2122 bdrv_close(bs);
2123
2124 /* remove from list, if necessary */
2125 bdrv_make_anon(bs);
2126
2127 g_free(bs);
2128 }
2129
2130 /*
2131 * Run consistency checks on an image
2132 *
2133 * Returns 0 if the check could be completed (it doesn't mean that the image is
2134 * free of errors) or -errno when an internal error occurred. The results of the
2135 * check are stored in res.
2136 */
2137 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2138 {
2139 if (bs->drv == NULL) {
2140 return -ENOMEDIUM;
2141 }
2142 if (bs->drv->bdrv_check == NULL) {
2143 return -ENOTSUP;
2144 }
2145
2146 memset(res, 0, sizeof(*res));
2147 return bs->drv->bdrv_check(bs, res, fix);
2148 }
2149
2150 #define COMMIT_BUF_SECTORS 2048
2151
2152 /* commit COW file into the raw image */
2153 int bdrv_commit(BlockDriverState *bs)
2154 {
2155 BlockDriver *drv = bs->drv;
2156 int64_t sector, total_sectors, length, backing_length;
2157 int n, ro, open_flags;
2158 int ret = 0;
2159 uint8_t *buf = NULL;
2160
2161 if (!drv)
2162 return -ENOMEDIUM;
2163
2164 if (!bs->backing) {
2165 return -ENOTSUP;
2166 }
2167
2168 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2169 bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2170 return -EBUSY;
2171 }
2172
2173 ro = bs->backing->bs->read_only;
2174 open_flags = bs->backing->bs->open_flags;
2175
2176 if (ro) {
2177 if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2178 return -EACCES;
2179 }
2180 }
2181
2182 length = bdrv_getlength(bs);
2183 if (length < 0) {
2184 ret = length;
2185 goto ro_cleanup;
2186 }
2187
2188 backing_length = bdrv_getlength(bs->backing->bs);
2189 if (backing_length < 0) {
2190 ret = backing_length;
2191 goto ro_cleanup;
2192 }
2193
2194 /* If our top snapshot is larger than the backing file image,
2195 * grow the backing file image if possible. If not possible,
2196 * we must return an error */
2197 if (length > backing_length) {
2198 ret = bdrv_truncate(bs->backing->bs, length);
2199 if (ret < 0) {
2200 goto ro_cleanup;
2201 }
2202 }
2203
2204 total_sectors = length >> BDRV_SECTOR_BITS;
2205
2206 /* qemu_try_blockalign() for bs will choose an alignment that works for
2207 * bs->backing->bs as well, so no need to compare the alignment manually. */
2208 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2209 if (buf == NULL) {
2210 ret = -ENOMEM;
2211 goto ro_cleanup;
2212 }
2213
2214 for (sector = 0; sector < total_sectors; sector += n) {
2215 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2216 if (ret < 0) {
2217 goto ro_cleanup;
2218 }
2219 if (ret) {
2220 ret = bdrv_read(bs, sector, buf, n);
2221 if (ret < 0) {
2222 goto ro_cleanup;
2223 }
2224
2225 ret = bdrv_write(bs->backing->bs, sector, buf, n);
2226 if (ret < 0) {
2227 goto ro_cleanup;
2228 }
2229 }
2230 }
2231
2232 if (drv->bdrv_make_empty) {
2233 ret = drv->bdrv_make_empty(bs);
2234 if (ret < 0) {
2235 goto ro_cleanup;
2236 }
2237 bdrv_flush(bs);
2238 }
2239
2240 /*
2241 * Make sure all data we wrote to the backing device is actually
2242 * stable on disk.
2243 */
2244 if (bs->backing) {
2245 bdrv_flush(bs->backing->bs);
2246 }
2247
2248 ret = 0;
2249 ro_cleanup:
2250 qemu_vfree(buf);
2251
2252 if (ro) {
2253 /* ignoring error return here */
2254 bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2255 }
2256
2257 return ret;
2258 }
2259
2260 int bdrv_commit_all(void)
2261 {
2262 BlockDriverState *bs;
2263
2264 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2265 AioContext *aio_context = bdrv_get_aio_context(bs);
2266
2267 aio_context_acquire(aio_context);
2268 if (bs->drv && bs->backing) {
2269 int ret = bdrv_commit(bs);
2270 if (ret < 0) {
2271 aio_context_release(aio_context);
2272 return ret;
2273 }
2274 }
2275 aio_context_release(aio_context);
2276 }
2277 return 0;
2278 }
2279
2280 /*
2281 * Return values:
2282 * 0 - success
2283 * -EINVAL - backing format specified, but no file
2284 * -ENOSPC - can't update the backing file because no space is left in the
2285 * image file header
2286 * -ENOTSUP - format driver doesn't support changing the backing file
2287 */
2288 int bdrv_change_backing_file(BlockDriverState *bs,
2289 const char *backing_file, const char *backing_fmt)
2290 {
2291 BlockDriver *drv = bs->drv;
2292 int ret;
2293
2294 /* Backing file format doesn't make sense without a backing file */
2295 if (backing_fmt && !backing_file) {
2296 return -EINVAL;
2297 }
2298
2299 if (drv->bdrv_change_backing_file != NULL) {
2300 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2301 } else {
2302 ret = -ENOTSUP;
2303 }
2304
2305 if (ret == 0) {
2306 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2307 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2308 }
2309 return ret;
2310 }
2311
2312 /*
2313 * Finds the image layer in the chain that has 'bs' as its backing file.
2314 *
2315 * active is the current topmost image.
2316 *
2317 * Returns NULL if bs is not found in active's image chain,
2318 * or if active == bs.
2319 *
2320 * Returns the bottommost base image if bs == NULL.
2321 */
2322 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2323 BlockDriverState *bs)
2324 {
2325 while (active && bs != backing_bs(active)) {
2326 active = backing_bs(active);
2327 }
2328
2329 return active;
2330 }
2331
2332 /* Given a BDS, searches for the base layer. */
2333 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2334 {
2335 return bdrv_find_overlay(bs, NULL);
2336 }
2337
2338 /*
2339 * Drops images above 'base' up to and including 'top', and sets the image
2340 * above 'top' to have base as its backing file.
2341 *
2342 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2343 * information in 'bs' can be properly updated.
2344 *
2345 * E.g., this will convert the following chain:
2346 * bottom <- base <- intermediate <- top <- active
2347 *
2348 * to
2349 *
2350 * bottom <- base <- active
2351 *
2352 * It is allowed for bottom==base, in which case it converts:
2353 *
2354 * base <- intermediate <- top <- active
2355 *
2356 * to
2357 *
2358 * base <- active
2359 *
2360 * If backing_file_str is non-NULL, it will be used when modifying top's
2361 * overlay image metadata.
2362 *
2363 * Error conditions:
2364 * if active == top, that is considered an error
2365 *
2366 */
2367 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2368 BlockDriverState *base, const char *backing_file_str)
2369 {
2370 BlockDriverState *new_top_bs = NULL;
2371 int ret = -EIO;
2372
2373 if (!top->drv || !base->drv) {
2374 goto exit;
2375 }
2376
2377 new_top_bs = bdrv_find_overlay(active, top);
2378
2379 if (new_top_bs == NULL) {
2380 /* we could not find the image above 'top', this is an error */
2381 goto exit;
2382 }
2383
2384 /* special case of new_top_bs->backing->bs already pointing to base - nothing
2385 * to do, no intermediate images */
2386 if (backing_bs(new_top_bs) == base) {
2387 ret = 0;
2388 goto exit;
2389 }
2390
2391 /* Make sure that base is in the backing chain of top */
2392 if (!bdrv_chain_contains(top, base)) {
2393 goto exit;
2394 }
2395
2396 /* success - we can delete the intermediate states, and link top->base */
2397 backing_file_str = backing_file_str ? backing_file_str : base->filename;
2398 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2399 base->drv ? base->drv->format_name : "");
2400 if (ret) {
2401 goto exit;
2402 }
2403 bdrv_set_backing_hd(new_top_bs, base);
2404
2405 ret = 0;
2406 exit:
2407 return ret;
2408 }
2409
2410 /**
2411 * Truncate file to 'offset' bytes (needed only for file protocols)
2412 */
2413 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2414 {
2415 BlockDriver *drv = bs->drv;
2416 int ret;
2417 if (!drv)
2418 return -ENOMEDIUM;
2419 if (!drv->bdrv_truncate)
2420 return -ENOTSUP;
2421 if (bs->read_only)
2422 return -EACCES;
2423
2424 ret = drv->bdrv_truncate(bs, offset);
2425 if (ret == 0) {
2426 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2427 bdrv_dirty_bitmap_truncate(bs);
2428 if (bs->blk) {
2429 blk_dev_resize_cb(bs->blk);
2430 }
2431 }
2432 return ret;
2433 }
2434
2435 /**
2436 * Length of a allocated file in bytes. Sparse files are counted by actual
2437 * allocated space. Return < 0 if error or unknown.
2438 */
2439 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2440 {
2441 BlockDriver *drv = bs->drv;
2442 if (!drv) {
2443 return -ENOMEDIUM;
2444 }
2445 if (drv->bdrv_get_allocated_file_size) {
2446 return drv->bdrv_get_allocated_file_size(bs);
2447 }
2448 if (bs->file) {
2449 return bdrv_get_allocated_file_size(bs->file->bs);
2450 }
2451 return -ENOTSUP;
2452 }
2453
2454 /**
2455 * Return number of sectors on success, -errno on error.
2456 */
2457 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2458 {
2459 BlockDriver *drv = bs->drv;
2460
2461 if (!drv)
2462 return -ENOMEDIUM;
2463
2464 if (drv->has_variable_length) {
2465 int ret = refresh_total_sectors(bs, bs->total_sectors);
2466 if (ret < 0) {
2467 return ret;
2468 }
2469 }
2470 return bs->total_sectors;
2471 }
2472
2473 /**
2474 * Return length in bytes on success, -errno on error.
2475 * The length is always a multiple of BDRV_SECTOR_SIZE.
2476 */
2477 int64_t bdrv_getlength(BlockDriverState *bs)
2478 {
2479 int64_t ret = bdrv_nb_sectors(bs);
2480
2481 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2482 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2483 }
2484
2485 /* return 0 as number of sectors if no device present or error */
2486 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2487 {
2488 int64_t nb_sectors = bdrv_nb_sectors(bs);
2489
2490 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2491 }
2492
2493 int bdrv_is_read_only(BlockDriverState *bs)
2494 {
2495 return bs->read_only;
2496 }
2497
2498 int bdrv_is_sg(BlockDriverState *bs)
2499 {
2500 return bs->sg;
2501 }
2502
2503 int bdrv_enable_write_cache(BlockDriverState *bs)
2504 {
2505 return bs->enable_write_cache;
2506 }
2507
2508 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2509 {
2510 bs->enable_write_cache = wce;
2511
2512 /* so a reopen() will preserve wce */
2513 if (wce) {
2514 bs->open_flags |= BDRV_O_CACHE_WB;
2515 } else {
2516 bs->open_flags &= ~BDRV_O_CACHE_WB;
2517 }
2518 }
2519
2520 int bdrv_is_encrypted(BlockDriverState *bs)
2521 {
2522 if (bs->backing && bs->backing->bs->encrypted) {
2523 return 1;
2524 }
2525 return bs->encrypted;
2526 }
2527
2528 int bdrv_key_required(BlockDriverState *bs)
2529 {
2530 BdrvChild *backing = bs->backing;
2531
2532 if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2533 return 1;
2534 }
2535 return (bs->encrypted && !bs->valid_key);
2536 }
2537
2538 int bdrv_set_key(BlockDriverState *bs, const char *key)
2539 {
2540 int ret;
2541 if (bs->backing && bs->backing->bs->encrypted) {
2542 ret = bdrv_set_key(bs->backing->bs, key);
2543 if (ret < 0)
2544 return ret;
2545 if (!bs->encrypted)
2546 return 0;
2547 }
2548 if (!bs->encrypted) {
2549 return -EINVAL;
2550 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2551 return -ENOMEDIUM;
2552 }
2553 ret = bs->drv->bdrv_set_key(bs, key);
2554 if (ret < 0) {
2555 bs->valid_key = 0;
2556 } else if (!bs->valid_key) {
2557 bs->valid_key = 1;
2558 if (bs->blk) {
2559 /* call the change callback now, we skipped it on open */
2560 blk_dev_change_media_cb(bs->blk, true);
2561 }
2562 }
2563 return ret;
2564 }
2565
2566 /*
2567 * Provide an encryption key for @bs.
2568 * If @key is non-null:
2569 * If @bs is not encrypted, fail.
2570 * Else if the key is invalid, fail.
2571 * Else set @bs's key to @key, replacing the existing key, if any.
2572 * If @key is null:
2573 * If @bs is encrypted and still lacks a key, fail.
2574 * Else do nothing.
2575 * On failure, store an error object through @errp if non-null.
2576 */
2577 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2578 {
2579 if (key) {
2580 if (!bdrv_is_encrypted(bs)) {
2581 error_setg(errp, "Node '%s' is not encrypted",
2582 bdrv_get_device_or_node_name(bs));
2583 } else if (bdrv_set_key(bs, key) < 0) {
2584 error_setg(errp, QERR_INVALID_PASSWORD);
2585 }
2586 } else {
2587 if (bdrv_key_required(bs)) {
2588 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2589 "'%s' (%s) is encrypted",
2590 bdrv_get_device_or_node_name(bs),
2591 bdrv_get_encrypted_filename(bs));
2592 }
2593 }
2594 }
2595
2596 const char *bdrv_get_format_name(BlockDriverState *bs)
2597 {
2598 return bs->drv ? bs->drv->format_name : NULL;
2599 }
2600
2601 static int qsort_strcmp(const void *a, const void *b)
2602 {
2603 return strcmp(a, b);
2604 }
2605
2606 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2607 void *opaque)
2608 {
2609 BlockDriver *drv;
2610 int count = 0;
2611 int i;
2612 const char **formats = NULL;
2613
2614 QLIST_FOREACH(drv, &bdrv_drivers, list) {
2615 if (drv->format_name) {
2616 bool found = false;
2617 int i = count;
2618 while (formats && i && !found) {
2619 found = !strcmp(formats[--i], drv->format_name);
2620 }
2621
2622 if (!found) {
2623 formats = g_renew(const char *, formats, count + 1);
2624 formats[count++] = drv->format_name;
2625 }
2626 }
2627 }
2628
2629 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2630
2631 for (i = 0; i < count; i++) {
2632 it(opaque, formats[i]);
2633 }
2634
2635 g_free(formats);
2636 }
2637
2638 /* This function is to find a node in the bs graph */
2639 BlockDriverState *bdrv_find_node(const char *node_name)
2640 {
2641 BlockDriverState *bs;
2642
2643 assert(node_name);
2644
2645 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2646 if (!strcmp(node_name, bs->node_name)) {
2647 return bs;
2648 }
2649 }
2650 return NULL;
2651 }
2652
2653 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2654 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2655 {
2656 BlockDeviceInfoList *list, *entry;
2657 BlockDriverState *bs;
2658
2659 list = NULL;
2660 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2661 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2662 if (!info) {
2663 qapi_free_BlockDeviceInfoList(list);
2664 return NULL;
2665 }
2666 entry = g_malloc0(sizeof(*entry));
2667 entry->value = info;
2668 entry->next = list;
2669 list = entry;
2670 }
2671
2672 return list;
2673 }
2674
2675 BlockDriverState *bdrv_lookup_bs(const char *device,
2676 const char *node_name,
2677 Error **errp)
2678 {
2679 BlockBackend *blk;
2680 BlockDriverState *bs;
2681
2682 if (device) {
2683 blk = blk_by_name(device);
2684
2685 if (blk) {
2686 return blk_bs(blk);
2687 }
2688 }
2689
2690 if (node_name) {
2691 bs = bdrv_find_node(node_name);
2692
2693 if (bs) {
2694 return bs;
2695 }
2696 }
2697
2698 error_setg(errp, "Cannot find device=%s nor node_name=%s",
2699 device ? device : "",
2700 node_name ? node_name : "");
2701 return NULL;
2702 }
2703
2704 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2705 * return false. If either argument is NULL, return false. */
2706 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2707 {
2708 while (top && top != base) {
2709 top = backing_bs(top);
2710 }
2711
2712 return top != NULL;
2713 }
2714
2715 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2716 {
2717 if (!bs) {
2718 return QTAILQ_FIRST(&graph_bdrv_states);
2719 }
2720 return QTAILQ_NEXT(bs, node_list);
2721 }
2722
2723 BlockDriverState *bdrv_next(BlockDriverState *bs)
2724 {
2725 if (!bs) {
2726 return QTAILQ_FIRST(&bdrv_states);
2727 }
2728 return QTAILQ_NEXT(bs, device_list);
2729 }
2730
2731 const char *bdrv_get_node_name(const BlockDriverState *bs)
2732 {
2733 return bs->node_name;
2734 }
2735
2736 /* TODO check what callers really want: bs->node_name or blk_name() */
2737 const char *bdrv_get_device_name(const BlockDriverState *bs)
2738 {
2739 return bs->blk ? blk_name(bs->blk) : "";
2740 }
2741
2742 /* This can be used to identify nodes that might not have a device
2743 * name associated. Since node and device names live in the same
2744 * namespace, the result is unambiguous. The exception is if both are
2745 * absent, then this returns an empty (non-null) string. */
2746 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2747 {
2748 return bs->blk ? blk_name(bs->blk) : bs->node_name;
2749 }
2750
2751 int bdrv_get_flags(BlockDriverState *bs)
2752 {
2753 return bs->open_flags;
2754 }
2755
2756 int bdrv_has_zero_init_1(BlockDriverState *bs)
2757 {
2758 return 1;
2759 }
2760
2761 int bdrv_has_zero_init(BlockDriverState *bs)
2762 {
2763 assert(bs->drv);
2764
2765 /* If BS is a copy on write image, it is initialized to
2766 the contents of the base image, which may not be zeroes. */
2767 if (bs->backing) {
2768 return 0;
2769 }
2770 if (bs->drv->bdrv_has_zero_init) {
2771 return bs->drv->bdrv_has_zero_init(bs);
2772 }
2773
2774 /* safe default */
2775 return 0;
2776 }
2777
2778 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2779 {
2780 BlockDriverInfo bdi;
2781
2782 if (bs->backing) {
2783 return false;
2784 }
2785
2786 if (bdrv_get_info(bs, &bdi) == 0) {
2787 return bdi.unallocated_blocks_are_zero;
2788 }
2789
2790 return false;
2791 }
2792
2793 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2794 {
2795 BlockDriverInfo bdi;
2796
2797 if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
2798 return false;
2799 }
2800
2801 if (bdrv_get_info(bs, &bdi) == 0) {
2802 return bdi.can_write_zeroes_with_unmap;
2803 }
2804
2805 return false;
2806 }
2807
2808 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2809 {
2810 if (bs->backing && bs->backing->bs->encrypted)
2811 return bs->backing_file;
2812 else if (bs->encrypted)
2813 return bs->filename;
2814 else
2815 return NULL;
2816 }
2817
2818 void bdrv_get_backing_filename(BlockDriverState *bs,
2819 char *filename, int filename_size)
2820 {
2821 pstrcpy(filename, filename_size, bs->backing_file);
2822 }
2823
2824 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2825 {
2826 BlockDriver *drv = bs->drv;
2827 if (!drv)
2828 return -ENOMEDIUM;
2829 if (!drv->bdrv_get_info)
2830 return -ENOTSUP;
2831 memset(bdi, 0, sizeof(*bdi));
2832 return drv->bdrv_get_info(bs, bdi);
2833 }
2834
2835 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2836 {
2837 BlockDriver *drv = bs->drv;
2838 if (drv && drv->bdrv_get_specific_info) {
2839 return drv->bdrv_get_specific_info(bs);
2840 }
2841 return NULL;
2842 }
2843
2844 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2845 {
2846 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
2847 return;
2848 }
2849
2850 bs->drv->bdrv_debug_event(bs, event);
2851 }
2852
2853 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
2854 const char *tag)
2855 {
2856 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
2857 bs = bs->file ? bs->file->bs : NULL;
2858 }
2859
2860 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
2861 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
2862 }
2863
2864 return -ENOTSUP;
2865 }
2866
2867 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
2868 {
2869 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
2870 bs = bs->file ? bs->file->bs : NULL;
2871 }
2872
2873 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
2874 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
2875 }
2876
2877 return -ENOTSUP;
2878 }
2879
2880 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
2881 {
2882 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
2883 bs = bs->file ? bs->file->bs : NULL;
2884 }
2885
2886 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
2887 return bs->drv->bdrv_debug_resume(bs, tag);
2888 }
2889
2890 return -ENOTSUP;
2891 }
2892
2893 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
2894 {
2895 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
2896 bs = bs->file ? bs->file->bs : NULL;
2897 }
2898
2899 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
2900 return bs->drv->bdrv_debug_is_suspended(bs, tag);
2901 }
2902
2903 return false;
2904 }
2905
2906 int bdrv_is_snapshot(BlockDriverState *bs)
2907 {
2908 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2909 }
2910
2911 /* backing_file can either be relative, or absolute, or a protocol. If it is
2912 * relative, it must be relative to the chain. So, passing in bs->filename
2913 * from a BDS as backing_file should not be done, as that may be relative to
2914 * the CWD rather than the chain. */
2915 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2916 const char *backing_file)
2917 {
2918 char *filename_full = NULL;
2919 char *backing_file_full = NULL;
2920 char *filename_tmp = NULL;
2921 int is_protocol = 0;
2922 BlockDriverState *curr_bs = NULL;
2923 BlockDriverState *retval = NULL;
2924
2925 if (!bs || !bs->drv || !backing_file) {
2926 return NULL;
2927 }
2928
2929 filename_full = g_malloc(PATH_MAX);
2930 backing_file_full = g_malloc(PATH_MAX);
2931 filename_tmp = g_malloc(PATH_MAX);
2932
2933 is_protocol = path_has_protocol(backing_file);
2934
2935 for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
2936
2937 /* If either of the filename paths is actually a protocol, then
2938 * compare unmodified paths; otherwise make paths relative */
2939 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
2940 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
2941 retval = curr_bs->backing->bs;
2942 break;
2943 }
2944 } else {
2945 /* If not an absolute filename path, make it relative to the current
2946 * image's filename path */
2947 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2948 backing_file);
2949
2950 /* We are going to compare absolute pathnames */
2951 if (!realpath(filename_tmp, filename_full)) {
2952 continue;
2953 }
2954
2955 /* We need to make sure the backing filename we are comparing against
2956 * is relative to the current image filename (or absolute) */
2957 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2958 curr_bs->backing_file);
2959
2960 if (!realpath(filename_tmp, backing_file_full)) {
2961 continue;
2962 }
2963
2964 if (strcmp(backing_file_full, filename_full) == 0) {
2965 retval = curr_bs->backing->bs;
2966 break;
2967 }
2968 }
2969 }
2970
2971 g_free(filename_full);
2972 g_free(backing_file_full);
2973 g_free(filename_tmp);
2974 return retval;
2975 }
2976
2977 int bdrv_get_backing_file_depth(BlockDriverState *bs)
2978 {
2979 if (!bs->drv) {
2980 return 0;
2981 }
2982
2983 if (!bs->backing) {
2984 return 0;
2985 }
2986
2987 return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
2988 }
2989
2990 void bdrv_init(void)
2991 {
2992 module_call_init(MODULE_INIT_BLOCK);
2993 }
2994
2995 void bdrv_init_with_whitelist(void)
2996 {
2997 use_bdrv_whitelist = 1;
2998 bdrv_init();
2999 }
3000
3001 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3002 {
3003 Error *local_err = NULL;
3004 int ret;
3005
3006 if (!bs->drv) {
3007 return;
3008 }
3009
3010 if (!(bs->open_flags & BDRV_O_INCOMING)) {
3011 return;
3012 }
3013 bs->open_flags &= ~BDRV_O_INCOMING;
3014
3015 if (bs->drv->bdrv_invalidate_cache) {
3016 bs->drv->bdrv_invalidate_cache(bs, &local_err);
3017 } else if (bs->file) {
3018 bdrv_invalidate_cache(bs->file->bs, &local_err);
3019 }
3020 if (local_err) {
3021 error_propagate(errp, local_err);
3022 return;
3023 }
3024
3025 ret = refresh_total_sectors(bs, bs->total_sectors);
3026 if (ret < 0) {
3027 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3028 return;
3029 }
3030 }
3031
3032 void bdrv_invalidate_cache_all(Error **errp)
3033 {
3034 BlockDriverState *bs;
3035 Error *local_err = NULL;
3036
3037 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3038 AioContext *aio_context = bdrv_get_aio_context(bs);
3039
3040 aio_context_acquire(aio_context);
3041 bdrv_invalidate_cache(bs, &local_err);
3042 aio_context_release(aio_context);
3043 if (local_err) {
3044 error_propagate(errp, local_err);
3045 return;
3046 }
3047 }
3048 }
3049
3050 /**************************************************************/
3051 /* removable device support */
3052
3053 /**
3054 * Return TRUE if the media is present
3055 */
3056 bool bdrv_is_inserted(BlockDriverState *bs)
3057 {
3058 BlockDriver *drv = bs->drv;
3059 BdrvChild *child;
3060
3061 if (!drv) {
3062 return false;
3063 }
3064 if (drv->bdrv_is_inserted) {
3065 return drv->bdrv_is_inserted(bs);
3066 }
3067 QLIST_FOREACH(child, &bs->children, next) {
3068 if (!bdrv_is_inserted(child->bs)) {
3069 return false;
3070 }
3071 }
3072 return true;
3073 }
3074
3075 /**
3076 * Return whether the media changed since the last call to this
3077 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3078 */
3079 int bdrv_media_changed(BlockDriverState *bs)
3080 {
3081 BlockDriver *drv = bs->drv;
3082
3083 if (drv && drv->bdrv_media_changed) {
3084 return drv->bdrv_media_changed(bs);
3085 }
3086 return -ENOTSUP;
3087 }
3088
3089 /**
3090 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3091 */
3092 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3093 {
3094 BlockDriver *drv = bs->drv;
3095 const char *device_name;
3096
3097 if (drv && drv->bdrv_eject) {
3098 drv->bdrv_eject(bs, eject_flag);
3099 }
3100
3101 device_name = bdrv_get_device_name(bs);
3102 if (device_name[0] != '\0') {
3103 qapi_event_send_device_tray_moved(device_name,
3104 eject_flag, &error_abort);
3105 }
3106 }
3107
3108 /**
3109 * Lock or unlock the media (if it is locked, the user won't be able
3110 * to eject it manually).
3111 */
3112 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3113 {
3114 BlockDriver *drv = bs->drv;
3115
3116 trace_bdrv_lock_medium(bs, locked);
3117
3118 if (drv && drv->bdrv_lock_medium) {
3119 drv->bdrv_lock_medium(bs, locked);
3120 }
3121 }
3122
3123 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3124 {
3125 BdrvDirtyBitmap *bm;
3126
3127 assert(name);
3128 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3129 if (bm->name && !strcmp(name, bm->name)) {
3130 return bm;
3131 }
3132 }
3133 return NULL;
3134 }
3135
3136 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3137 {
3138 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3139 g_free(bitmap->name);
3140 bitmap->name = NULL;
3141 }
3142
3143 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3144 uint32_t granularity,
3145 const char *name,
3146 Error **errp)
3147 {
3148 int64_t bitmap_size;
3149 BdrvDirtyBitmap *bitmap;
3150 uint32_t sector_granularity;
3151
3152 assert((granularity & (granularity - 1)) == 0);
3153
3154 if (name && bdrv_find_dirty_bitmap(bs, name)) {
3155 error_setg(errp, "Bitmap already exists: %s", name);
3156 return NULL;
3157 }
3158 sector_granularity = granularity >> BDRV_SECTOR_BITS;
3159 assert(sector_granularity);
3160 bitmap_size = bdrv_nb_sectors(bs);
3161 if (bitmap_size < 0) {
3162 error_setg_errno(errp, -bitmap_size, "could not get length of device");
3163 errno = -bitmap_size;
3164 return NULL;
3165 }
3166 bitmap = g_new0(BdrvDirtyBitmap, 1);
3167 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3168 bitmap->size = bitmap_size;
3169 bitmap->name = g_strdup(name);
3170 bitmap->disabled = false;
3171 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3172 return bitmap;
3173 }
3174
3175 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3176 {
3177 return bitmap->successor;
3178 }
3179
3180 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3181 {
3182 return !(bitmap->disabled || bitmap->successor);
3183 }
3184
3185 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3186 {
3187 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3188 return DIRTY_BITMAP_STATUS_FROZEN;
3189 } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3190 return DIRTY_BITMAP_STATUS_DISABLED;
3191 } else {
3192 return DIRTY_BITMAP_STATUS_ACTIVE;
3193 }
3194 }
3195
3196 /**
3197 * Create a successor bitmap destined to replace this bitmap after an operation.
3198 * Requires that the bitmap is not frozen and has no successor.
3199 */
3200 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3201 BdrvDirtyBitmap *bitmap, Error **errp)
3202 {
3203 uint64_t granularity;
3204 BdrvDirtyBitmap *child;
3205
3206 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3207 error_setg(errp, "Cannot create a successor for a bitmap that is "
3208 "currently frozen");
3209 return -1;
3210 }
3211 assert(!bitmap->successor);
3212
3213 /* Create an anonymous successor */
3214 granularity = bdrv_dirty_bitmap_granularity(bitmap);
3215 child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3216 if (!child) {
3217 return -1;
3218 }
3219
3220 /* Successor will be on or off based on our current state. */
3221 child->disabled = bitmap->disabled;
3222
3223 /* Install the successor and freeze the parent */
3224 bitmap->successor = child;
3225 return 0;
3226 }
3227
3228 /**
3229 * For a bitmap with a successor, yield our name to the successor,
3230 * delete the old bitmap, and return a handle to the new bitmap.
3231 */
3232 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3233 BdrvDirtyBitmap *bitmap,
3234 Error **errp)
3235 {
3236 char *name;
3237 BdrvDirtyBitmap *successor = bitmap->successor;
3238
3239 if (successor == NULL) {
3240 error_setg(errp, "Cannot relinquish control if "
3241 "there's no successor present");
3242 return NULL;
3243 }
3244
3245 name = bitmap->name;
3246 bitmap->name = NULL;
3247 successor->name = name;
3248 bitmap->successor = NULL;
3249 bdrv_release_dirty_bitmap(bs, bitmap);
3250
3251 return successor;
3252 }
3253
3254 /**
3255 * In cases of failure where we can no longer safely delete the parent,
3256 * we may wish to re-join the parent and child/successor.
3257 * The merged parent will be un-frozen, but not explicitly re-enabled.
3258 */
3259 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3260 BdrvDirtyBitmap *parent,
3261 Error **errp)
3262 {
3263 BdrvDirtyBitmap *successor = parent->successor;
3264
3265 if (!successor) {
3266 error_setg(errp, "Cannot reclaim a successor when none is present");
3267 return NULL;
3268 }
3269
3270 if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3271 error_setg(errp, "Merging of parent and successor bitmap failed");
3272 return NULL;
3273 }
3274 bdrv_release_dirty_bitmap(bs, successor);
3275 parent->successor = NULL;
3276
3277 return parent;
3278 }
3279
3280 /**
3281 * Truncates _all_ bitmaps attached to a BDS.
3282 */
3283 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3284 {
3285 BdrvDirtyBitmap *bitmap;
3286 uint64_t size = bdrv_nb_sectors(bs);
3287
3288 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3289 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3290 hbitmap_truncate(bitmap->bitmap, size);
3291 bitmap->size = size;
3292 }
3293 }
3294
3295 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3296 {
3297 BdrvDirtyBitmap *bm, *next;
3298 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3299 if (bm == bitmap) {
3300 assert(!bdrv_dirty_bitmap_frozen(bm));
3301 QLIST_REMOVE(bitmap, list);
3302 hbitmap_free(bitmap->bitmap);
3303 g_free(bitmap->name);
3304 g_free(bitmap);
3305 return;
3306 }
3307 }
3308 }
3309
3310 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3311 {
3312 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3313 bitmap->disabled = true;
3314 }
3315
3316 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3317 {
3318 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3319 bitmap->disabled = false;
3320 }
3321
3322 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3323 {
3324 BdrvDirtyBitmap *bm;
3325 BlockDirtyInfoList *list = NULL;
3326 BlockDirtyInfoList **plist = &list;
3327
3328 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3329 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3330 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3331 info->count = bdrv_get_dirty_count(bm);
3332 info->granularity = bdrv_dirty_bitmap_granularity(bm);
3333 info->has_name = !!bm->name;
3334 info->name = g_strdup(bm->name);
3335 info->status = bdrv_dirty_bitmap_status(bm);
3336 entry->value = info;
3337 *plist = entry;
3338 plist = &entry->next;
3339 }
3340
3341 return list;
3342 }
3343
3344 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3345 {
3346 if (bitmap) {
3347 return hbitmap_get(bitmap->bitmap, sector);
3348 } else {
3349 return 0;
3350 }
3351 }
3352
3353 /**
3354 * Chooses a default granularity based on the existing cluster size,
3355 * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3356 * is no cluster size information available.
3357 */
3358 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3359 {
3360 BlockDriverInfo bdi;
3361 uint32_t granularity;
3362
3363 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3364 granularity = MAX(4096, bdi.cluster_size);
3365 granularity = MIN(65536, granularity);
3366 } else {
3367 granularity = 65536;
3368 }
3369
3370 return granularity;
3371 }
3372
3373 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3374 {
3375 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3376 }
3377
3378 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3379 {
3380 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3381 }
3382
3383 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3384 int64_t cur_sector, int nr_sectors)
3385 {
3386 assert(bdrv_dirty_bitmap_enabled(bitmap));
3387 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3388 }
3389
3390 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3391 int64_t cur_sector, int nr_sectors)
3392 {
3393 assert(bdrv_dirty_bitmap_enabled(bitmap));
3394 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3395 }
3396
3397 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3398 {
3399 assert(bdrv_dirty_bitmap_enabled(bitmap));
3400 hbitmap_reset_all(bitmap->bitmap);
3401 }
3402
3403 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3404 int nr_sectors)
3405 {
3406 BdrvDirtyBitmap *bitmap;
3407 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3408 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3409 continue;
3410 }
3411 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3412 }
3413 }
3414
3415 /**
3416 * Advance an HBitmapIter to an arbitrary offset.
3417 */
3418 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3419 {
3420 assert(hbi->hb);
3421 hbitmap_iter_init(hbi, hbi->hb, offset);
3422 }
3423
3424 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3425 {
3426 return hbitmap_count(bitmap->bitmap);
3427 }
3428
3429 /* Get a reference to bs */
3430 void bdrv_ref(BlockDriverState *bs)
3431 {
3432 bs->refcnt++;
3433 }
3434
3435 /* Release a previously grabbed reference to bs.
3436 * If after releasing, reference count is zero, the BlockDriverState is
3437 * deleted. */
3438 void bdrv_unref(BlockDriverState *bs)
3439 {
3440 if (!bs) {
3441 return;
3442 }
3443 assert(bs->refcnt > 0);
3444 if (--bs->refcnt == 0) {
3445 bdrv_delete(bs);
3446 }
3447 }
3448
3449 struct BdrvOpBlocker {
3450 Error *reason;
3451 QLIST_ENTRY(BdrvOpBlocker) list;
3452 };
3453
3454 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3455 {
3456 BdrvOpBlocker *blocker;
3457 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3458 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3459 blocker = QLIST_FIRST(&bs->op_blockers[op]);
3460 if (errp) {
3461 error_setg(errp, "Node '%s' is busy: %s",
3462 bdrv_get_device_or_node_name(bs),
3463 error_get_pretty(blocker->reason));
3464 }
3465 return true;
3466 }
3467 return false;
3468 }
3469
3470 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3471 {
3472 BdrvOpBlocker *blocker;
3473 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3474
3475 blocker = g_new0(BdrvOpBlocker, 1);
3476 blocker->reason = reason;
3477 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3478 }
3479
3480 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3481 {
3482 BdrvOpBlocker *blocker, *next;
3483 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3484 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3485 if (blocker->reason == reason) {
3486 QLIST_REMOVE(blocker, list);
3487 g_free(blocker);
3488 }
3489 }
3490 }
3491
3492 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3493 {
3494 int i;
3495 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3496 bdrv_op_block(bs, i, reason);
3497 }
3498 }
3499
3500 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3501 {
3502 int i;
3503 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3504 bdrv_op_unblock(bs, i, reason);
3505 }
3506 }
3507
3508 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3509 {
3510 int i;
3511
3512 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3513 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3514 return false;
3515 }
3516 }
3517 return true;
3518 }
3519
3520 void bdrv_img_create(const char *filename, const char *fmt,
3521 const char *base_filename, const char *base_fmt,
3522 char *options, uint64_t img_size, int flags,
3523 Error **errp, bool quiet)
3524 {
3525 QemuOptsList *create_opts = NULL;
3526 QemuOpts *opts = NULL;
3527 const char *backing_fmt, *backing_file;
3528 int64_t size;
3529 BlockDriver *drv, *proto_drv;
3530 Error *local_err = NULL;
3531 int ret = 0;
3532
3533 /* Find driver and parse its options */
3534 drv = bdrv_find_format(fmt);
3535 if (!drv) {
3536 error_setg(errp, "Unknown file format '%s'", fmt);
3537 return;
3538 }
3539
3540 proto_drv = bdrv_find_protocol(filename, true, errp);
3541 if (!proto_drv) {
3542 return;
3543 }
3544
3545 if (!drv->create_opts) {
3546 error_setg(errp, "Format driver '%s' does not support image creation",
3547 drv->format_name);
3548 return;
3549 }
3550
3551 if (!proto_drv->create_opts) {
3552 error_setg(errp, "Protocol driver '%s' does not support image creation",
3553 proto_drv->format_name);
3554 return;
3555 }
3556
3557 create_opts = qemu_opts_append(create_opts, drv->create_opts);
3558 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3559
3560 /* Create parameter list with default values */
3561 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3562 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3563
3564 /* Parse -o options */
3565 if (options) {
3566 qemu_opts_do_parse(opts, options, NULL, &local_err);
3567 if (local_err) {
3568 error_report_err(local_err);
3569 local_err = NULL;
3570 error_setg(errp, "Invalid options for file format '%s'", fmt);
3571 goto out;
3572 }
3573 }
3574
3575 if (base_filename) {
3576 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3577 if (local_err) {
3578 error_setg(errp, "Backing file not supported for file format '%s'",
3579 fmt);
3580 goto out;
3581 }
3582 }
3583
3584 if (base_fmt) {
3585 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3586 if (local_err) {
3587 error_setg(errp, "Backing file format not supported for file "
3588 "format '%s'", fmt);
3589 goto out;
3590 }
3591 }
3592
3593 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3594 if (backing_file) {
3595 if (!strcmp(filename, backing_file)) {
3596 error_setg(errp, "Error: Trying to create an image with the "
3597 "same filename as the backing file");
3598 goto out;
3599 }
3600 }
3601
3602 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3603
3604 // The size for the image must always be specified, with one exception:
3605 // If we are using a backing file, we can obtain the size from there
3606 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3607 if (size == -1) {
3608 if (backing_file) {
3609 BlockDriverState *bs;
3610 char *full_backing = g_new0(char, PATH_MAX);
3611 int64_t size;
3612 int back_flags;
3613 QDict *backing_options = NULL;
3614
3615 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3616 full_backing, PATH_MAX,
3617 &local_err);
3618 if (local_err) {
3619 g_free(full_backing);
3620 goto out;
3621 }
3622
3623 /* backing files always opened read-only */
3624 back_flags =
3625 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3626
3627 if (backing_fmt) {
3628 backing_options = qdict_new();
3629 qdict_put(backing_options, "driver",
3630 qstring_from_str(backing_fmt));
3631 }
3632
3633 bs = NULL;
3634 ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3635 back_flags, &local_err);
3636 g_free(full_backing);
3637 if (ret < 0) {
3638 goto out;
3639 }
3640 size = bdrv_getlength(bs);
3641 if (size < 0) {
3642 error_setg_errno(errp, -size, "Could not get size of '%s'",
3643 backing_file);
3644 bdrv_unref(bs);
3645 goto out;
3646 }
3647
3648 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3649
3650 bdrv_unref(bs);
3651 } else {
3652 error_setg(errp, "Image creation needs a size parameter");
3653 goto out;
3654 }
3655 }
3656
3657 if (!quiet) {
3658 printf("Formatting '%s', fmt=%s ", filename, fmt);
3659 qemu_opts_print(opts, " ");
3660 puts("");
3661 }
3662
3663 ret = bdrv_create(drv, filename, opts, &local_err);
3664
3665 if (ret == -EFBIG) {
3666 /* This is generally a better message than whatever the driver would
3667 * deliver (especially because of the cluster_size_hint), since that
3668 * is most probably not much different from "image too large". */
3669 const char *cluster_size_hint = "";
3670 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3671 cluster_size_hint = " (try using a larger cluster size)";
3672 }
3673 error_setg(errp, "The image size is too large for file format '%s'"
3674 "%s", fmt, cluster_size_hint);
3675 error_free(local_err);
3676 local_err = NULL;
3677 }
3678
3679 out:
3680 qemu_opts_del(opts);
3681 qemu_opts_free(create_opts);
3682 if (local_err) {
3683 error_propagate(errp, local_err);
3684 }
3685 }
3686
3687 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3688 {
3689 return bs->aio_context;
3690 }
3691
3692 void bdrv_detach_aio_context(BlockDriverState *bs)
3693 {
3694 BdrvAioNotifier *baf;
3695
3696 if (!bs->drv) {
3697 return;
3698 }
3699
3700 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3701 baf->detach_aio_context(baf->opaque);
3702 }
3703
3704 if (bs->io_limits_enabled) {
3705 throttle_timers_detach_aio_context(&bs->throttle_timers);
3706 }
3707 if (bs->drv->bdrv_detach_aio_context) {
3708 bs->drv->bdrv_detach_aio_context(bs);
3709 }
3710 if (bs->file) {
3711 bdrv_detach_aio_context(bs->file->bs);
3712 }
3713 if (bs->backing) {
3714 bdrv_detach_aio_context(bs->backing->bs);
3715 }
3716
3717 bs->aio_context = NULL;
3718 }
3719
3720 void bdrv_attach_aio_context(BlockDriverState *bs,
3721 AioContext *new_context)
3722 {
3723 BdrvAioNotifier *ban;
3724
3725 if (!bs->drv) {
3726 return;
3727 }
3728
3729 bs->aio_context = new_context;
3730
3731 if (bs->backing) {
3732 bdrv_attach_aio_context(bs->backing->bs, new_context);
3733 }
3734 if (bs->file) {
3735 bdrv_attach_aio_context(bs->file->bs, new_context);
3736 }
3737 if (bs->drv->bdrv_attach_aio_context) {
3738 bs->drv->bdrv_attach_aio_context(bs, new_context);
3739 }
3740 if (bs->io_limits_enabled) {
3741 throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3742 }
3743
3744 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3745 ban->attached_aio_context(new_context, ban->opaque);
3746 }
3747 }
3748
3749 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3750 {
3751 bdrv_drain(bs); /* ensure there are no in-flight requests */
3752
3753 bdrv_detach_aio_context(bs);
3754
3755 /* This function executes in the old AioContext so acquire the new one in
3756 * case it runs in a different thread.
3757 */
3758 aio_context_acquire(new_context);
3759 bdrv_attach_aio_context(bs, new_context);
3760 aio_context_release(new_context);
3761 }
3762
3763 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3764 void (*attached_aio_context)(AioContext *new_context, void *opaque),
3765 void (*detach_aio_context)(void *opaque), void *opaque)
3766 {
3767 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3768 *ban = (BdrvAioNotifier){
3769 .attached_aio_context = attached_aio_context,
3770 .detach_aio_context = detach_aio_context,
3771 .opaque = opaque
3772 };
3773
3774 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3775 }
3776
3777 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3778 void (*attached_aio_context)(AioContext *,
3779 void *),
3780 void (*detach_aio_context)(void *),
3781 void *opaque)
3782 {
3783 BdrvAioNotifier *ban, *ban_next;
3784
3785 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3786 if (ban->attached_aio_context == attached_aio_context &&
3787 ban->detach_aio_context == detach_aio_context &&
3788 ban->opaque == opaque)
3789 {
3790 QLIST_REMOVE(ban, list);
3791 g_free(ban);
3792
3793 return;
3794 }
3795 }
3796
3797 abort();
3798 }
3799
3800 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3801 BlockDriverAmendStatusCB *status_cb)
3802 {
3803 if (!bs->drv->bdrv_amend_options) {
3804 return -ENOTSUP;
3805 }
3806 return bs->drv->bdrv_amend_options(bs, opts, status_cb);
3807 }
3808
3809 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3810 * of block filter and by bdrv_is_first_non_filter.
3811 * It is used to test if the given bs is the candidate or recurse more in the
3812 * node graph.
3813 */
3814 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3815 BlockDriverState *candidate)
3816 {
3817 /* return false if basic checks fails */
3818 if (!bs || !bs->drv) {
3819 return false;
3820 }
3821
3822 /* the code reached a non block filter driver -> check if the bs is
3823 * the same as the candidate. It's the recursion termination condition.
3824 */
3825 if (!bs->drv->is_filter) {
3826 return bs == candidate;
3827 }
3828 /* Down this path the driver is a block filter driver */
3829
3830 /* If the block filter recursion method is defined use it to recurse down
3831 * the node graph.
3832 */
3833 if (bs->drv->bdrv_recurse_is_first_non_filter) {
3834 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3835 }
3836
3837 /* the driver is a block filter but don't allow to recurse -> return false
3838 */
3839 return false;
3840 }
3841
3842 /* This function checks if the candidate is the first non filter bs down it's
3843 * bs chain. Since we don't have pointers to parents it explore all bs chains
3844 * from the top. Some filters can choose not to pass down the recursion.
3845 */
3846 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3847 {
3848 BlockDriverState *bs;
3849
3850 /* walk down the bs forest recursively */
3851 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3852 bool perm;
3853
3854 /* try to recurse in this top level bs */
3855 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3856
3857 /* candidate is the first non filter */
3858 if (perm) {
3859 return true;
3860 }
3861 }
3862
3863 return false;
3864 }
3865
3866 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
3867 const char *node_name, Error **errp)
3868 {
3869 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3870 AioContext *aio_context;
3871
3872 if (!to_replace_bs) {
3873 error_setg(errp, "Node name '%s' not found", node_name);
3874 return NULL;
3875 }
3876
3877 aio_context = bdrv_get_aio_context(to_replace_bs);
3878 aio_context_acquire(aio_context);
3879
3880 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3881 to_replace_bs = NULL;
3882 goto out;
3883 }
3884
3885 /* We don't want arbitrary node of the BDS chain to be replaced only the top
3886 * most non filter in order to prevent data corruption.
3887 * Another benefit is that this tests exclude backing files which are
3888 * blocked by the backing blockers.
3889 */
3890 if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
3891 error_setg(errp, "Only top most non filter can be replaced");
3892 to_replace_bs = NULL;
3893 goto out;
3894 }
3895
3896 out:
3897 aio_context_release(aio_context);
3898 return to_replace_bs;
3899 }
3900
3901 static bool append_open_options(QDict *d, BlockDriverState *bs)
3902 {
3903 const QDictEntry *entry;
3904 bool found_any = false;
3905
3906 for (entry = qdict_first(bs->options); entry;
3907 entry = qdict_next(bs->options, entry))
3908 {
3909 /* Only take options for this level and exclude all non-driver-specific
3910 * options */
3911 if (!strchr(qdict_entry_key(entry), '.') &&
3912 strcmp(qdict_entry_key(entry), "node-name"))
3913 {
3914 qobject_incref(qdict_entry_value(entry));
3915 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3916 found_any = true;
3917 }
3918 }
3919
3920 return found_any;
3921 }
3922
3923 /* Updates the following BDS fields:
3924 * - exact_filename: A filename which may be used for opening a block device
3925 * which (mostly) equals the given BDS (even without any
3926 * other options; so reading and writing must return the same
3927 * results, but caching etc. may be different)
3928 * - full_open_options: Options which, when given when opening a block device
3929 * (without a filename), result in a BDS (mostly)
3930 * equalling the given one
3931 * - filename: If exact_filename is set, it is copied here. Otherwise,
3932 * full_open_options is converted to a JSON object, prefixed with
3933 * "json:" (for use through the JSON pseudo protocol) and put here.
3934 */
3935 void bdrv_refresh_filename(BlockDriverState *bs)
3936 {
3937 BlockDriver *drv = bs->drv;
3938 QDict *opts;
3939
3940 if (!drv) {
3941 return;
3942 }
3943
3944 /* This BDS's file name will most probably depend on its file's name, so
3945 * refresh that first */
3946 if (bs->file) {
3947 bdrv_refresh_filename(bs->file->bs);
3948 }
3949
3950 if (drv->bdrv_refresh_filename) {
3951 /* Obsolete information is of no use here, so drop the old file name
3952 * information before refreshing it */
3953 bs->exact_filename[0] = '\0';
3954 if (bs->full_open_options) {
3955 QDECREF(bs->full_open_options);
3956 bs->full_open_options = NULL;
3957 }
3958
3959 drv->bdrv_refresh_filename(bs);
3960 } else if (bs->file) {
3961 /* Try to reconstruct valid information from the underlying file */
3962 bool has_open_options;
3963
3964 bs->exact_filename[0] = '\0';
3965 if (bs->full_open_options) {
3966 QDECREF(bs->full_open_options);
3967 bs->full_open_options = NULL;
3968 }
3969
3970 opts = qdict_new();
3971 has_open_options = append_open_options(opts, bs);
3972
3973 /* If no specific options have been given for this BDS, the filename of
3974 * the underlying file should suffice for this one as well */
3975 if (bs->file->bs->exact_filename[0] && !has_open_options) {
3976 strcpy(bs->exact_filename, bs->file->bs->exact_filename);
3977 }
3978 /* Reconstructing the full options QDict is simple for most format block
3979 * drivers, as long as the full options are known for the underlying
3980 * file BDS. The full options QDict of that file BDS should somehow
3981 * contain a representation of the filename, therefore the following
3982 * suffices without querying the (exact_)filename of this BDS. */
3983 if (bs->file->bs->full_open_options) {
3984 qdict_put_obj(opts, "driver",
3985 QOBJECT(qstring_from_str(drv->format_name)));
3986 QINCREF(bs->file->bs->full_open_options);
3987 qdict_put_obj(opts, "file",
3988 QOBJECT(bs->file->bs->full_open_options));
3989
3990 bs->full_open_options = opts;
3991 } else {
3992 QDECREF(opts);
3993 }
3994 } else if (!bs->full_open_options && qdict_size(bs->options)) {
3995 /* There is no underlying file BDS (at least referenced by BDS.file),
3996 * so the full options QDict should be equal to the options given
3997 * specifically for this block device when it was opened (plus the
3998 * driver specification).
3999 * Because those options don't change, there is no need to update
4000 * full_open_options when it's already set. */
4001
4002 opts = qdict_new();
4003 append_open_options(opts, bs);
4004 qdict_put_obj(opts, "driver",
4005 QOBJECT(qstring_from_str(drv->format_name)));
4006
4007 if (bs->exact_filename[0]) {
4008 /* This may not work for all block protocol drivers (some may
4009 * require this filename to be parsed), but we have to find some
4010 * default solution here, so just include it. If some block driver
4011 * does not support pure options without any filename at all or
4012 * needs some special format of the options QDict, it needs to
4013 * implement the driver-specific bdrv_refresh_filename() function.
4014 */
4015 qdict_put_obj(opts, "filename",
4016 QOBJECT(qstring_from_str(bs->exact_filename)));
4017 }
4018
4019 bs->full_open_options = opts;
4020 }
4021
4022 if (bs->exact_filename[0]) {
4023 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4024 } else if (bs->full_open_options) {
4025 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4026 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4027 qstring_get_str(json));
4028 QDECREF(json);
4029 }
4030 }