]> git.proxmox.com Git - mirror_qemu.git/blob - block.c
block: Use QemuOpts in bdrv_open_common()
[mirror_qemu.git] / block.c
1 /*
2 * QEMU System Emulator block driver
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qjson.h"
31 #include "sysemu/block-backend.h"
32 #include "sysemu/sysemu.h"
33 #include "qemu/notify.h"
34 #include "block/coroutine.h"
35 #include "block/qapi.h"
36 #include "qmp-commands.h"
37 #include "qemu/timer.h"
38 #include "qapi-event.h"
39
40 #ifdef CONFIG_BSD
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <sys/ioctl.h>
44 #include <sys/queue.h>
45 #ifndef __DragonFly__
46 #include <sys/disk.h>
47 #endif
48 #endif
49
50 #ifdef _WIN32
51 #include <windows.h>
52 #endif
53
54 /**
55 * A BdrvDirtyBitmap can be in three possible states:
56 * (1) successor is NULL and disabled is false: full r/w mode
57 * (2) successor is NULL and disabled is true: read only mode ("disabled")
58 * (3) successor is set: frozen mode.
59 * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
60 * or enabled. A frozen bitmap can only abdicate() or reclaim().
61 */
62 struct BdrvDirtyBitmap {
63 HBitmap *bitmap; /* Dirty sector bitmap implementation */
64 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
65 char *name; /* Optional non-empty unique ID */
66 int64_t size; /* Size of the bitmap (Number of sectors) */
67 bool disabled; /* Bitmap is read-only */
68 QLIST_ENTRY(BdrvDirtyBitmap) list;
69 };
70
71 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
72
73 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
74 QTAILQ_HEAD_INITIALIZER(bdrv_states);
75
76 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
77 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
78
79 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80 QLIST_HEAD_INITIALIZER(bdrv_drivers);
81
82 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
83 /* If non-zero, use only whitelisted block drivers */
84 static int use_bdrv_whitelist;
85
86 #ifdef _WIN32
87 static int is_windows_drive_prefix(const char *filename)
88 {
89 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
90 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
91 filename[1] == ':');
92 }
93
94 int is_windows_drive(const char *filename)
95 {
96 if (is_windows_drive_prefix(filename) &&
97 filename[2] == '\0')
98 return 1;
99 if (strstart(filename, "\\\\.\\", NULL) ||
100 strstart(filename, "//./", NULL))
101 return 1;
102 return 0;
103 }
104 #endif
105
106 size_t bdrv_opt_mem_align(BlockDriverState *bs)
107 {
108 if (!bs || !bs->drv) {
109 /* page size or 4k (hdd sector size) should be on the safe side */
110 return MAX(4096, getpagesize());
111 }
112
113 return bs->bl.opt_mem_alignment;
114 }
115
116 size_t bdrv_min_mem_align(BlockDriverState *bs)
117 {
118 if (!bs || !bs->drv) {
119 /* page size or 4k (hdd sector size) should be on the safe side */
120 return MAX(4096, getpagesize());
121 }
122
123 return bs->bl.min_mem_alignment;
124 }
125
126 /* check if the path starts with "<protocol>:" */
127 int path_has_protocol(const char *path)
128 {
129 const char *p;
130
131 #ifdef _WIN32
132 if (is_windows_drive(path) ||
133 is_windows_drive_prefix(path)) {
134 return 0;
135 }
136 p = path + strcspn(path, ":/\\");
137 #else
138 p = path + strcspn(path, ":/");
139 #endif
140
141 return *p == ':';
142 }
143
144 int path_is_absolute(const char *path)
145 {
146 #ifdef _WIN32
147 /* specific case for names like: "\\.\d:" */
148 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
149 return 1;
150 }
151 return (*path == '/' || *path == '\\');
152 #else
153 return (*path == '/');
154 #endif
155 }
156
157 /* if filename is absolute, just copy it to dest. Otherwise, build a
158 path to it by considering it is relative to base_path. URL are
159 supported. */
160 void path_combine(char *dest, int dest_size,
161 const char *base_path,
162 const char *filename)
163 {
164 const char *p, *p1;
165 int len;
166
167 if (dest_size <= 0)
168 return;
169 if (path_is_absolute(filename)) {
170 pstrcpy(dest, dest_size, filename);
171 } else {
172 p = strchr(base_path, ':');
173 if (p)
174 p++;
175 else
176 p = base_path;
177 p1 = strrchr(base_path, '/');
178 #ifdef _WIN32
179 {
180 const char *p2;
181 p2 = strrchr(base_path, '\\');
182 if (!p1 || p2 > p1)
183 p1 = p2;
184 }
185 #endif
186 if (p1)
187 p1++;
188 else
189 p1 = base_path;
190 if (p1 > p)
191 p = p1;
192 len = p - base_path;
193 if (len > dest_size - 1)
194 len = dest_size - 1;
195 memcpy(dest, base_path, len);
196 dest[len] = '\0';
197 pstrcat(dest, dest_size, filename);
198 }
199 }
200
201 void bdrv_get_full_backing_filename_from_filename(const char *backed,
202 const char *backing,
203 char *dest, size_t sz,
204 Error **errp)
205 {
206 if (backing[0] == '\0' || path_has_protocol(backing) ||
207 path_is_absolute(backing))
208 {
209 pstrcpy(dest, sz, backing);
210 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
211 error_setg(errp, "Cannot use relative backing file names for '%s'",
212 backed);
213 } else {
214 path_combine(dest, sz, backed, backing);
215 }
216 }
217
218 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
219 Error **errp)
220 {
221 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
222
223 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
224 dest, sz, errp);
225 }
226
227 void bdrv_register(BlockDriver *bdrv)
228 {
229 bdrv_setup_io_funcs(bdrv);
230
231 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
232 }
233
234 BlockDriverState *bdrv_new_root(void)
235 {
236 BlockDriverState *bs = bdrv_new();
237
238 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
239 return bs;
240 }
241
242 BlockDriverState *bdrv_new(void)
243 {
244 BlockDriverState *bs;
245 int i;
246
247 bs = g_new0(BlockDriverState, 1);
248 QLIST_INIT(&bs->dirty_bitmaps);
249 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
250 QLIST_INIT(&bs->op_blockers[i]);
251 }
252 bdrv_iostatus_disable(bs);
253 notifier_list_init(&bs->close_notifiers);
254 notifier_with_return_list_init(&bs->before_write_notifiers);
255 qemu_co_queue_init(&bs->throttled_reqs[0]);
256 qemu_co_queue_init(&bs->throttled_reqs[1]);
257 bs->refcnt = 1;
258 bs->aio_context = qemu_get_aio_context();
259
260 return bs;
261 }
262
263 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
264 {
265 notifier_list_add(&bs->close_notifiers, notify);
266 }
267
268 BlockDriver *bdrv_find_format(const char *format_name)
269 {
270 BlockDriver *drv1;
271 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
272 if (!strcmp(drv1->format_name, format_name)) {
273 return drv1;
274 }
275 }
276 return NULL;
277 }
278
279 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
280 {
281 static const char *whitelist_rw[] = {
282 CONFIG_BDRV_RW_WHITELIST
283 };
284 static const char *whitelist_ro[] = {
285 CONFIG_BDRV_RO_WHITELIST
286 };
287 const char **p;
288
289 if (!whitelist_rw[0] && !whitelist_ro[0]) {
290 return 1; /* no whitelist, anything goes */
291 }
292
293 for (p = whitelist_rw; *p; p++) {
294 if (!strcmp(drv->format_name, *p)) {
295 return 1;
296 }
297 }
298 if (read_only) {
299 for (p = whitelist_ro; *p; p++) {
300 if (!strcmp(drv->format_name, *p)) {
301 return 1;
302 }
303 }
304 }
305 return 0;
306 }
307
308 BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
309 bool read_only)
310 {
311 BlockDriver *drv = bdrv_find_format(format_name);
312 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
313 }
314
315 typedef struct CreateCo {
316 BlockDriver *drv;
317 char *filename;
318 QemuOpts *opts;
319 int ret;
320 Error *err;
321 } CreateCo;
322
323 static void coroutine_fn bdrv_create_co_entry(void *opaque)
324 {
325 Error *local_err = NULL;
326 int ret;
327
328 CreateCo *cco = opaque;
329 assert(cco->drv);
330
331 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
332 if (local_err) {
333 error_propagate(&cco->err, local_err);
334 }
335 cco->ret = ret;
336 }
337
338 int bdrv_create(BlockDriver *drv, const char* filename,
339 QemuOpts *opts, Error **errp)
340 {
341 int ret;
342
343 Coroutine *co;
344 CreateCo cco = {
345 .drv = drv,
346 .filename = g_strdup(filename),
347 .opts = opts,
348 .ret = NOT_DONE,
349 .err = NULL,
350 };
351
352 if (!drv->bdrv_create) {
353 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
354 ret = -ENOTSUP;
355 goto out;
356 }
357
358 if (qemu_in_coroutine()) {
359 /* Fast-path if already in coroutine context */
360 bdrv_create_co_entry(&cco);
361 } else {
362 co = qemu_coroutine_create(bdrv_create_co_entry);
363 qemu_coroutine_enter(co, &cco);
364 while (cco.ret == NOT_DONE) {
365 aio_poll(qemu_get_aio_context(), true);
366 }
367 }
368
369 ret = cco.ret;
370 if (ret < 0) {
371 if (cco.err) {
372 error_propagate(errp, cco.err);
373 } else {
374 error_setg_errno(errp, -ret, "Could not create image");
375 }
376 }
377
378 out:
379 g_free(cco.filename);
380 return ret;
381 }
382
383 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
384 {
385 BlockDriver *drv;
386 Error *local_err = NULL;
387 int ret;
388
389 drv = bdrv_find_protocol(filename, true, errp);
390 if (drv == NULL) {
391 return -ENOENT;
392 }
393
394 ret = bdrv_create(drv, filename, opts, &local_err);
395 if (local_err) {
396 error_propagate(errp, local_err);
397 }
398 return ret;
399 }
400
401 /**
402 * Try to get @bs's logical and physical block size.
403 * On success, store them in @bsz struct and return 0.
404 * On failure return -errno.
405 * @bs must not be empty.
406 */
407 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
408 {
409 BlockDriver *drv = bs->drv;
410
411 if (drv && drv->bdrv_probe_blocksizes) {
412 return drv->bdrv_probe_blocksizes(bs, bsz);
413 }
414
415 return -ENOTSUP;
416 }
417
418 /**
419 * Try to get @bs's geometry (cyls, heads, sectors).
420 * On success, store them in @geo struct and return 0.
421 * On failure return -errno.
422 * @bs must not be empty.
423 */
424 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
425 {
426 BlockDriver *drv = bs->drv;
427
428 if (drv && drv->bdrv_probe_geometry) {
429 return drv->bdrv_probe_geometry(bs, geo);
430 }
431
432 return -ENOTSUP;
433 }
434
435 /*
436 * Create a uniquely-named empty temporary file.
437 * Return 0 upon success, otherwise a negative errno value.
438 */
439 int get_tmp_filename(char *filename, int size)
440 {
441 #ifdef _WIN32
442 char temp_dir[MAX_PATH];
443 /* GetTempFileName requires that its output buffer (4th param)
444 have length MAX_PATH or greater. */
445 assert(size >= MAX_PATH);
446 return (GetTempPath(MAX_PATH, temp_dir)
447 && GetTempFileName(temp_dir, "qem", 0, filename)
448 ? 0 : -GetLastError());
449 #else
450 int fd;
451 const char *tmpdir;
452 tmpdir = getenv("TMPDIR");
453 if (!tmpdir) {
454 tmpdir = "/var/tmp";
455 }
456 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
457 return -EOVERFLOW;
458 }
459 fd = mkstemp(filename);
460 if (fd < 0) {
461 return -errno;
462 }
463 if (close(fd) != 0) {
464 unlink(filename);
465 return -errno;
466 }
467 return 0;
468 #endif
469 }
470
471 /*
472 * Detect host devices. By convention, /dev/cdrom[N] is always
473 * recognized as a host CDROM.
474 */
475 static BlockDriver *find_hdev_driver(const char *filename)
476 {
477 int score_max = 0, score;
478 BlockDriver *drv = NULL, *d;
479
480 QLIST_FOREACH(d, &bdrv_drivers, list) {
481 if (d->bdrv_probe_device) {
482 score = d->bdrv_probe_device(filename);
483 if (score > score_max) {
484 score_max = score;
485 drv = d;
486 }
487 }
488 }
489
490 return drv;
491 }
492
493 BlockDriver *bdrv_find_protocol(const char *filename,
494 bool allow_protocol_prefix,
495 Error **errp)
496 {
497 BlockDriver *drv1;
498 char protocol[128];
499 int len;
500 const char *p;
501
502 /* TODO Drivers without bdrv_file_open must be specified explicitly */
503
504 /*
505 * XXX(hch): we really should not let host device detection
506 * override an explicit protocol specification, but moving this
507 * later breaks access to device names with colons in them.
508 * Thanks to the brain-dead persistent naming schemes on udev-
509 * based Linux systems those actually are quite common.
510 */
511 drv1 = find_hdev_driver(filename);
512 if (drv1) {
513 return drv1;
514 }
515
516 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
517 return &bdrv_file;
518 }
519
520 p = strchr(filename, ':');
521 assert(p != NULL);
522 len = p - filename;
523 if (len > sizeof(protocol) - 1)
524 len = sizeof(protocol) - 1;
525 memcpy(protocol, filename, len);
526 protocol[len] = '\0';
527 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
528 if (drv1->protocol_name &&
529 !strcmp(drv1->protocol_name, protocol)) {
530 return drv1;
531 }
532 }
533
534 error_setg(errp, "Unknown protocol '%s'", protocol);
535 return NULL;
536 }
537
538 /*
539 * Guess image format by probing its contents.
540 * This is not a good idea when your image is raw (CVE-2008-2004), but
541 * we do it anyway for backward compatibility.
542 *
543 * @buf contains the image's first @buf_size bytes.
544 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
545 * but can be smaller if the image file is smaller)
546 * @filename is its filename.
547 *
548 * For all block drivers, call the bdrv_probe() method to get its
549 * probing score.
550 * Return the first block driver with the highest probing score.
551 */
552 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
553 const char *filename)
554 {
555 int score_max = 0, score;
556 BlockDriver *drv = NULL, *d;
557
558 QLIST_FOREACH(d, &bdrv_drivers, list) {
559 if (d->bdrv_probe) {
560 score = d->bdrv_probe(buf, buf_size, filename);
561 if (score > score_max) {
562 score_max = score;
563 drv = d;
564 }
565 }
566 }
567
568 return drv;
569 }
570
571 static int find_image_format(BlockDriverState *bs, const char *filename,
572 BlockDriver **pdrv, Error **errp)
573 {
574 BlockDriver *drv;
575 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
576 int ret = 0;
577
578 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
579 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
580 *pdrv = &bdrv_raw;
581 return ret;
582 }
583
584 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
585 if (ret < 0) {
586 error_setg_errno(errp, -ret, "Could not read image for determining its "
587 "format");
588 *pdrv = NULL;
589 return ret;
590 }
591
592 drv = bdrv_probe_all(buf, ret, filename);
593 if (!drv) {
594 error_setg(errp, "Could not determine image format: No compatible "
595 "driver found");
596 ret = -ENOENT;
597 }
598 *pdrv = drv;
599 return ret;
600 }
601
602 /**
603 * Set the current 'total_sectors' value
604 * Return 0 on success, -errno on error.
605 */
606 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
607 {
608 BlockDriver *drv = bs->drv;
609
610 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
611 if (bs->sg)
612 return 0;
613
614 /* query actual device if possible, otherwise just trust the hint */
615 if (drv->bdrv_getlength) {
616 int64_t length = drv->bdrv_getlength(bs);
617 if (length < 0) {
618 return length;
619 }
620 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
621 }
622
623 bs->total_sectors = hint;
624 return 0;
625 }
626
627 /**
628 * Set open flags for a given discard mode
629 *
630 * Return 0 on success, -1 if the discard mode was invalid.
631 */
632 int bdrv_parse_discard_flags(const char *mode, int *flags)
633 {
634 *flags &= ~BDRV_O_UNMAP;
635
636 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
637 /* do nothing */
638 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
639 *flags |= BDRV_O_UNMAP;
640 } else {
641 return -1;
642 }
643
644 return 0;
645 }
646
647 /**
648 * Set open flags for a given cache mode
649 *
650 * Return 0 on success, -1 if the cache mode was invalid.
651 */
652 int bdrv_parse_cache_flags(const char *mode, int *flags)
653 {
654 *flags &= ~BDRV_O_CACHE_MASK;
655
656 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
657 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
658 } else if (!strcmp(mode, "directsync")) {
659 *flags |= BDRV_O_NOCACHE;
660 } else if (!strcmp(mode, "writeback")) {
661 *flags |= BDRV_O_CACHE_WB;
662 } else if (!strcmp(mode, "unsafe")) {
663 *flags |= BDRV_O_CACHE_WB;
664 *flags |= BDRV_O_NO_FLUSH;
665 } else if (!strcmp(mode, "writethrough")) {
666 /* this is the default */
667 } else {
668 return -1;
669 }
670
671 return 0;
672 }
673
674 /*
675 * Returns the flags that a temporary snapshot should get, based on the
676 * originally requested flags (the originally requested image will have flags
677 * like a backing file)
678 */
679 static int bdrv_temp_snapshot_flags(int flags)
680 {
681 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
682 }
683
684 /*
685 * Returns the flags that bs->file should get, based on the given flags for
686 * the parent BDS
687 */
688 static int bdrv_inherited_flags(int flags)
689 {
690 /* Enable protocol handling, disable format probing for bs->file */
691 flags |= BDRV_O_PROTOCOL;
692
693 /* Our block drivers take care to send flushes and respect unmap policy,
694 * so we can enable both unconditionally on lower layers. */
695 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
696
697 /* Clear flags that only apply to the top layer */
698 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
699
700 return flags;
701 }
702
703 /*
704 * Returns the flags that bs->backing_hd should get, based on the given flags
705 * for the parent BDS
706 */
707 static int bdrv_backing_flags(int flags)
708 {
709 /* backing files always opened read-only */
710 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
711
712 /* snapshot=on is handled on the top layer */
713 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
714
715 return flags;
716 }
717
718 static int bdrv_open_flags(BlockDriverState *bs, int flags)
719 {
720 int open_flags = flags | BDRV_O_CACHE_WB;
721
722 /*
723 * Clear flags that are internal to the block layer before opening the
724 * image.
725 */
726 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
727
728 /*
729 * Snapshots should be writable.
730 */
731 if (flags & BDRV_O_TEMPORARY) {
732 open_flags |= BDRV_O_RDWR;
733 }
734
735 return open_flags;
736 }
737
738 static void bdrv_assign_node_name(BlockDriverState *bs,
739 const char *node_name,
740 Error **errp)
741 {
742 if (!node_name) {
743 return;
744 }
745
746 /* Check for empty string or invalid characters */
747 if (!id_wellformed(node_name)) {
748 error_setg(errp, "Invalid node name");
749 return;
750 }
751
752 /* takes care of avoiding namespaces collisions */
753 if (blk_by_name(node_name)) {
754 error_setg(errp, "node-name=%s is conflicting with a device id",
755 node_name);
756 return;
757 }
758
759 /* takes care of avoiding duplicates node names */
760 if (bdrv_find_node(node_name)) {
761 error_setg(errp, "Duplicate node name");
762 return;
763 }
764
765 /* copy node name into the bs and insert it into the graph list */
766 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
767 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
768 }
769
770 static QemuOptsList bdrv_runtime_opts = {
771 .name = "bdrv_common",
772 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
773 .desc = {
774 {
775 .name = "node-name",
776 .type = QEMU_OPT_STRING,
777 .help = "Node name of the block device node",
778 },
779 { /* end of list */ }
780 },
781 };
782
783 /*
784 * Common part for opening disk images and files
785 *
786 * Removes all processed options from *options.
787 */
788 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
789 QDict *options, int flags, BlockDriver *drv, Error **errp)
790 {
791 int ret, open_flags;
792 const char *filename;
793 const char *node_name = NULL;
794 QemuOpts *opts;
795 Error *local_err = NULL;
796
797 assert(drv != NULL);
798 assert(bs->file == NULL);
799 assert(options != NULL && bs->options != options);
800
801 if (file != NULL) {
802 filename = file->filename;
803 } else {
804 filename = qdict_get_try_str(options, "filename");
805 }
806
807 if (drv->bdrv_needs_filename && !filename) {
808 error_setg(errp, "The '%s' block driver requires a file name",
809 drv->format_name);
810 return -EINVAL;
811 }
812
813 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
814
815 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
816 qemu_opts_absorb_qdict(opts, options, &local_err);
817 if (local_err) {
818 error_propagate(errp, local_err);
819 ret = -EINVAL;
820 goto fail_opts;
821 }
822
823 node_name = qemu_opt_get(opts, "node-name");
824 bdrv_assign_node_name(bs, node_name, &local_err);
825 if (local_err) {
826 error_propagate(errp, local_err);
827 ret = -EINVAL;
828 goto fail_opts;
829 }
830
831 bs->open_flags = flags;
832 bs->guest_block_size = 512;
833 bs->request_alignment = 512;
834 bs->zero_beyond_eof = true;
835 open_flags = bdrv_open_flags(bs, flags);
836 bs->read_only = !(open_flags & BDRV_O_RDWR);
837
838 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
839 error_setg(errp,
840 !bs->read_only && bdrv_is_whitelisted(drv, true)
841 ? "Driver '%s' can only be used for read-only devices"
842 : "Driver '%s' is not whitelisted",
843 drv->format_name);
844 ret = -ENOTSUP;
845 goto fail_opts;
846 }
847
848 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
849 if (flags & BDRV_O_COPY_ON_READ) {
850 if (!bs->read_only) {
851 bdrv_enable_copy_on_read(bs);
852 } else {
853 error_setg(errp, "Can't use copy-on-read on read-only device");
854 ret = -EINVAL;
855 goto fail_opts;
856 }
857 }
858
859 if (filename != NULL) {
860 pstrcpy(bs->filename, sizeof(bs->filename), filename);
861 } else {
862 bs->filename[0] = '\0';
863 }
864 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
865
866 bs->drv = drv;
867 bs->opaque = g_malloc0(drv->instance_size);
868
869 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
870
871 /* Open the image, either directly or using a protocol */
872 if (drv->bdrv_file_open) {
873 assert(file == NULL);
874 assert(!drv->bdrv_needs_filename || filename != NULL);
875 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
876 } else {
877 if (file == NULL) {
878 error_setg(errp, "Can't use '%s' as a block driver for the "
879 "protocol level", drv->format_name);
880 ret = -EINVAL;
881 goto free_and_fail;
882 }
883 bs->file = file;
884 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
885 }
886
887 if (ret < 0) {
888 if (local_err) {
889 error_propagate(errp, local_err);
890 } else if (bs->filename[0]) {
891 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
892 } else {
893 error_setg_errno(errp, -ret, "Could not open image");
894 }
895 goto free_and_fail;
896 }
897
898 if (bs->encrypted) {
899 error_report("Encrypted images are deprecated");
900 error_printf("Support for them will be removed in a future release.\n"
901 "You can use 'qemu-img convert' to convert your image"
902 " to an unencrypted one.\n");
903 }
904
905 ret = refresh_total_sectors(bs, bs->total_sectors);
906 if (ret < 0) {
907 error_setg_errno(errp, -ret, "Could not refresh total sector count");
908 goto free_and_fail;
909 }
910
911 bdrv_refresh_limits(bs, &local_err);
912 if (local_err) {
913 error_propagate(errp, local_err);
914 ret = -EINVAL;
915 goto free_and_fail;
916 }
917
918 assert(bdrv_opt_mem_align(bs) != 0);
919 assert(bdrv_min_mem_align(bs) != 0);
920 assert((bs->request_alignment != 0) || bs->sg);
921
922 qemu_opts_del(opts);
923 return 0;
924
925 free_and_fail:
926 bs->file = NULL;
927 g_free(bs->opaque);
928 bs->opaque = NULL;
929 bs->drv = NULL;
930 fail_opts:
931 qemu_opts_del(opts);
932 return ret;
933 }
934
935 static QDict *parse_json_filename(const char *filename, Error **errp)
936 {
937 QObject *options_obj;
938 QDict *options;
939 int ret;
940
941 ret = strstart(filename, "json:", &filename);
942 assert(ret);
943
944 options_obj = qobject_from_json(filename);
945 if (!options_obj) {
946 error_setg(errp, "Could not parse the JSON options");
947 return NULL;
948 }
949
950 if (qobject_type(options_obj) != QTYPE_QDICT) {
951 qobject_decref(options_obj);
952 error_setg(errp, "Invalid JSON object given");
953 return NULL;
954 }
955
956 options = qobject_to_qdict(options_obj);
957 qdict_flatten(options);
958
959 return options;
960 }
961
962 /*
963 * Fills in default options for opening images and converts the legacy
964 * filename/flags pair to option QDict entries.
965 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
966 * block driver has been specified explicitly.
967 */
968 static int bdrv_fill_options(QDict **options, const char **pfilename,
969 int *flags, BlockDriver *drv, Error **errp)
970 {
971 const char *filename = *pfilename;
972 const char *drvname;
973 bool protocol = *flags & BDRV_O_PROTOCOL;
974 bool parse_filename = false;
975 BlockDriver *tmp_drv;
976 Error *local_err = NULL;
977
978 /* Parse json: pseudo-protocol */
979 if (filename && g_str_has_prefix(filename, "json:")) {
980 QDict *json_options = parse_json_filename(filename, &local_err);
981 if (local_err) {
982 error_propagate(errp, local_err);
983 return -EINVAL;
984 }
985
986 /* Options given in the filename have lower priority than options
987 * specified directly */
988 qdict_join(*options, json_options, false);
989 QDECREF(json_options);
990 *pfilename = filename = NULL;
991 }
992
993 drvname = qdict_get_try_str(*options, "driver");
994
995 /* If the user has explicitly specified the driver, this choice should
996 * override the BDRV_O_PROTOCOL flag */
997 tmp_drv = drv;
998 if (!tmp_drv && drvname) {
999 tmp_drv = bdrv_find_format(drvname);
1000 }
1001 if (tmp_drv) {
1002 protocol = tmp_drv->bdrv_file_open;
1003 }
1004
1005 if (protocol) {
1006 *flags |= BDRV_O_PROTOCOL;
1007 } else {
1008 *flags &= ~BDRV_O_PROTOCOL;
1009 }
1010
1011 /* Fetch the file name from the options QDict if necessary */
1012 if (protocol && filename) {
1013 if (!qdict_haskey(*options, "filename")) {
1014 qdict_put(*options, "filename", qstring_from_str(filename));
1015 parse_filename = true;
1016 } else {
1017 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1018 "the same time");
1019 return -EINVAL;
1020 }
1021 }
1022
1023 /* Find the right block driver */
1024 filename = qdict_get_try_str(*options, "filename");
1025
1026 if (drv) {
1027 if (drvname) {
1028 error_setg(errp, "Driver specified twice");
1029 return -EINVAL;
1030 }
1031 drvname = drv->format_name;
1032 qdict_put(*options, "driver", qstring_from_str(drvname));
1033 } else {
1034 if (!drvname && protocol) {
1035 if (filename) {
1036 drv = bdrv_find_protocol(filename, parse_filename, errp);
1037 if (!drv) {
1038 return -EINVAL;
1039 }
1040
1041 drvname = drv->format_name;
1042 qdict_put(*options, "driver", qstring_from_str(drvname));
1043 } else {
1044 error_setg(errp, "Must specify either driver or file");
1045 return -EINVAL;
1046 }
1047 } else if (drvname) {
1048 drv = bdrv_find_format(drvname);
1049 if (!drv) {
1050 error_setg(errp, "Unknown driver '%s'", drvname);
1051 return -ENOENT;
1052 }
1053 }
1054 }
1055
1056 assert(drv || !protocol);
1057
1058 /* Driver-specific filename parsing */
1059 if (drv && drv->bdrv_parse_filename && parse_filename) {
1060 drv->bdrv_parse_filename(filename, *options, &local_err);
1061 if (local_err) {
1062 error_propagate(errp, local_err);
1063 return -EINVAL;
1064 }
1065
1066 if (!drv->bdrv_needs_filename) {
1067 qdict_del(*options, "filename");
1068 }
1069 }
1070
1071 return 0;
1072 }
1073
1074 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1075 {
1076
1077 if (bs->backing_hd) {
1078 assert(bs->backing_blocker);
1079 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1080 } else if (backing_hd) {
1081 error_setg(&bs->backing_blocker,
1082 "node is used as backing hd of '%s'",
1083 bdrv_get_device_or_node_name(bs));
1084 }
1085
1086 bs->backing_hd = backing_hd;
1087 if (!backing_hd) {
1088 error_free(bs->backing_blocker);
1089 bs->backing_blocker = NULL;
1090 goto out;
1091 }
1092 bs->open_flags &= ~BDRV_O_NO_BACKING;
1093 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1094 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1095 backing_hd->drv ? backing_hd->drv->format_name : "");
1096
1097 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1098 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1099 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1100 bs->backing_blocker);
1101 out:
1102 bdrv_refresh_limits(bs, NULL);
1103 }
1104
1105 /*
1106 * Opens the backing file for a BlockDriverState if not yet open
1107 *
1108 * options is a QDict of options to pass to the block drivers, or NULL for an
1109 * empty set of options. The reference to the QDict is transferred to this
1110 * function (even on failure), so if the caller intends to reuse the dictionary,
1111 * it needs to use QINCREF() before calling bdrv_file_open.
1112 */
1113 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1114 {
1115 char *backing_filename = g_malloc0(PATH_MAX);
1116 int ret = 0;
1117 BlockDriverState *backing_hd;
1118 Error *local_err = NULL;
1119
1120 if (bs->backing_hd != NULL) {
1121 QDECREF(options);
1122 goto free_exit;
1123 }
1124
1125 /* NULL means an empty set of options */
1126 if (options == NULL) {
1127 options = qdict_new();
1128 }
1129
1130 bs->open_flags &= ~BDRV_O_NO_BACKING;
1131 if (qdict_haskey(options, "file.filename")) {
1132 backing_filename[0] = '\0';
1133 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1134 QDECREF(options);
1135 goto free_exit;
1136 } else {
1137 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1138 &local_err);
1139 if (local_err) {
1140 ret = -EINVAL;
1141 error_propagate(errp, local_err);
1142 QDECREF(options);
1143 goto free_exit;
1144 }
1145 }
1146
1147 if (!bs->drv || !bs->drv->supports_backing) {
1148 ret = -EINVAL;
1149 error_setg(errp, "Driver doesn't support backing files");
1150 QDECREF(options);
1151 goto free_exit;
1152 }
1153
1154 backing_hd = bdrv_new();
1155
1156 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1157 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1158 }
1159
1160 assert(bs->backing_hd == NULL);
1161 ret = bdrv_open(&backing_hd,
1162 *backing_filename ? backing_filename : NULL, NULL, options,
1163 bdrv_backing_flags(bs->open_flags), NULL, &local_err);
1164 if (ret < 0) {
1165 bdrv_unref(backing_hd);
1166 backing_hd = NULL;
1167 bs->open_flags |= BDRV_O_NO_BACKING;
1168 error_setg(errp, "Could not open backing file: %s",
1169 error_get_pretty(local_err));
1170 error_free(local_err);
1171 goto free_exit;
1172 }
1173 bdrv_set_backing_hd(bs, backing_hd);
1174
1175 free_exit:
1176 g_free(backing_filename);
1177 return ret;
1178 }
1179
1180 /*
1181 * Opens a disk image whose options are given as BlockdevRef in another block
1182 * device's options.
1183 *
1184 * If allow_none is true, no image will be opened if filename is false and no
1185 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1186 *
1187 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1188 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1189 * itself, all options starting with "${bdref_key}." are considered part of the
1190 * BlockdevRef.
1191 *
1192 * The BlockdevRef will be removed from the options QDict.
1193 *
1194 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1195 */
1196 int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1197 QDict *options, const char *bdref_key, int flags,
1198 bool allow_none, Error **errp)
1199 {
1200 QDict *image_options;
1201 int ret;
1202 char *bdref_key_dot;
1203 const char *reference;
1204
1205 assert(pbs);
1206 assert(*pbs == NULL);
1207
1208 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1209 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1210 g_free(bdref_key_dot);
1211
1212 reference = qdict_get_try_str(options, bdref_key);
1213 if (!filename && !reference && !qdict_size(image_options)) {
1214 if (allow_none) {
1215 ret = 0;
1216 } else {
1217 error_setg(errp, "A block device must be specified for \"%s\"",
1218 bdref_key);
1219 ret = -EINVAL;
1220 }
1221 QDECREF(image_options);
1222 goto done;
1223 }
1224
1225 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
1226
1227 done:
1228 qdict_del(options, bdref_key);
1229 return ret;
1230 }
1231
1232 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1233 {
1234 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1235 char *tmp_filename = g_malloc0(PATH_MAX + 1);
1236 int64_t total_size;
1237 QemuOpts *opts = NULL;
1238 QDict *snapshot_options;
1239 BlockDriverState *bs_snapshot;
1240 Error *local_err;
1241 int ret;
1242
1243 /* if snapshot, we create a temporary backing file and open it
1244 instead of opening 'filename' directly */
1245
1246 /* Get the required size from the image */
1247 total_size = bdrv_getlength(bs);
1248 if (total_size < 0) {
1249 ret = total_size;
1250 error_setg_errno(errp, -total_size, "Could not get image size");
1251 goto out;
1252 }
1253
1254 /* Create the temporary image */
1255 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1256 if (ret < 0) {
1257 error_setg_errno(errp, -ret, "Could not get temporary filename");
1258 goto out;
1259 }
1260
1261 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1262 &error_abort);
1263 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1264 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1265 qemu_opts_del(opts);
1266 if (ret < 0) {
1267 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1268 "'%s': %s", tmp_filename,
1269 error_get_pretty(local_err));
1270 error_free(local_err);
1271 goto out;
1272 }
1273
1274 /* Prepare a new options QDict for the temporary file */
1275 snapshot_options = qdict_new();
1276 qdict_put(snapshot_options, "file.driver",
1277 qstring_from_str("file"));
1278 qdict_put(snapshot_options, "file.filename",
1279 qstring_from_str(tmp_filename));
1280
1281 bs_snapshot = bdrv_new();
1282
1283 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1284 flags, &bdrv_qcow2, &local_err);
1285 if (ret < 0) {
1286 error_propagate(errp, local_err);
1287 goto out;
1288 }
1289
1290 bdrv_append(bs_snapshot, bs);
1291
1292 out:
1293 g_free(tmp_filename);
1294 return ret;
1295 }
1296
1297 /*
1298 * Opens a disk image (raw, qcow2, vmdk, ...)
1299 *
1300 * options is a QDict of options to pass to the block drivers, or NULL for an
1301 * empty set of options. The reference to the QDict belongs to the block layer
1302 * after the call (even on failure), so if the caller intends to reuse the
1303 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1304 *
1305 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1306 * If it is not NULL, the referenced BDS will be reused.
1307 *
1308 * The reference parameter may be used to specify an existing block device which
1309 * should be opened. If specified, neither options nor a filename may be given,
1310 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1311 */
1312 int bdrv_open(BlockDriverState **pbs, const char *filename,
1313 const char *reference, QDict *options, int flags,
1314 BlockDriver *drv, Error **errp)
1315 {
1316 int ret;
1317 BlockDriverState *file = NULL, *bs;
1318 const char *drvname;
1319 Error *local_err = NULL;
1320 int snapshot_flags = 0;
1321
1322 assert(pbs);
1323
1324 if (reference) {
1325 bool options_non_empty = options ? qdict_size(options) : false;
1326 QDECREF(options);
1327
1328 if (*pbs) {
1329 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1330 "another block device");
1331 return -EINVAL;
1332 }
1333
1334 if (filename || options_non_empty) {
1335 error_setg(errp, "Cannot reference an existing block device with "
1336 "additional options or a new filename");
1337 return -EINVAL;
1338 }
1339
1340 bs = bdrv_lookup_bs(reference, reference, errp);
1341 if (!bs) {
1342 return -ENODEV;
1343 }
1344 bdrv_ref(bs);
1345 *pbs = bs;
1346 return 0;
1347 }
1348
1349 if (*pbs) {
1350 bs = *pbs;
1351 } else {
1352 bs = bdrv_new();
1353 }
1354
1355 /* NULL means an empty set of options */
1356 if (options == NULL) {
1357 options = qdict_new();
1358 }
1359
1360 ret = bdrv_fill_options(&options, &filename, &flags, drv, &local_err);
1361 if (local_err) {
1362 goto fail;
1363 }
1364
1365 /* Find the right image format driver */
1366 drv = NULL;
1367 drvname = qdict_get_try_str(options, "driver");
1368 if (drvname) {
1369 drv = bdrv_find_format(drvname);
1370 qdict_del(options, "driver");
1371 if (!drv) {
1372 error_setg(errp, "Unknown driver: '%s'", drvname);
1373 ret = -EINVAL;
1374 goto fail;
1375 }
1376 }
1377
1378 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1379
1380 bs->options = options;
1381 options = qdict_clone_shallow(options);
1382
1383 /* Open image file without format layer */
1384 if ((flags & BDRV_O_PROTOCOL) == 0) {
1385 if (flags & BDRV_O_RDWR) {
1386 flags |= BDRV_O_ALLOW_RDWR;
1387 }
1388 if (flags & BDRV_O_SNAPSHOT) {
1389 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1390 flags = bdrv_backing_flags(flags);
1391 }
1392
1393 assert(file == NULL);
1394 ret = bdrv_open_image(&file, filename, options, "file",
1395 bdrv_inherited_flags(flags),
1396 true, &local_err);
1397 if (ret < 0) {
1398 goto fail;
1399 }
1400 }
1401
1402 /* Image format probing */
1403 bs->probed = !drv;
1404 if (!drv && file) {
1405 ret = find_image_format(file, filename, &drv, &local_err);
1406 if (ret < 0) {
1407 goto fail;
1408 }
1409 } else if (!drv) {
1410 error_setg(errp, "Must specify either driver or file");
1411 ret = -EINVAL;
1412 goto fail;
1413 }
1414
1415 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1416 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1417 /* file must be NULL if a protocol BDS is about to be created
1418 * (the inverse results in an error message from bdrv_open_common()) */
1419 assert(!(flags & BDRV_O_PROTOCOL) || !file);
1420
1421 /* Open the image */
1422 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1423 if (ret < 0) {
1424 goto fail;
1425 }
1426
1427 if (file && (bs->file != file)) {
1428 bdrv_unref(file);
1429 file = NULL;
1430 }
1431
1432 /* If there is a backing file, use it */
1433 if ((flags & BDRV_O_NO_BACKING) == 0) {
1434 QDict *backing_options;
1435
1436 qdict_extract_subqdict(options, &backing_options, "backing.");
1437 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1438 if (ret < 0) {
1439 goto close_and_fail;
1440 }
1441 }
1442
1443 bdrv_refresh_filename(bs);
1444
1445 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1446 * temporary snapshot afterwards. */
1447 if (snapshot_flags) {
1448 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1449 if (local_err) {
1450 goto close_and_fail;
1451 }
1452 }
1453
1454 /* Check if any unknown options were used */
1455 if (options && (qdict_size(options) != 0)) {
1456 const QDictEntry *entry = qdict_first(options);
1457 if (flags & BDRV_O_PROTOCOL) {
1458 error_setg(errp, "Block protocol '%s' doesn't support the option "
1459 "'%s'", drv->format_name, entry->key);
1460 } else {
1461 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1462 "support the option '%s'", drv->format_name,
1463 bdrv_get_device_name(bs), entry->key);
1464 }
1465
1466 ret = -EINVAL;
1467 goto close_and_fail;
1468 }
1469
1470 if (!bdrv_key_required(bs)) {
1471 if (bs->blk) {
1472 blk_dev_change_media_cb(bs->blk, true);
1473 }
1474 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1475 && !runstate_check(RUN_STATE_INMIGRATE)
1476 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1477 error_setg(errp,
1478 "Guest must be stopped for opening of encrypted image");
1479 ret = -EBUSY;
1480 goto close_and_fail;
1481 }
1482
1483 QDECREF(options);
1484 *pbs = bs;
1485 return 0;
1486
1487 fail:
1488 if (file != NULL) {
1489 bdrv_unref(file);
1490 }
1491 QDECREF(bs->options);
1492 QDECREF(options);
1493 bs->options = NULL;
1494 if (!*pbs) {
1495 /* If *pbs is NULL, a new BDS has been created in this function and
1496 needs to be freed now. Otherwise, it does not need to be closed,
1497 since it has not really been opened yet. */
1498 bdrv_unref(bs);
1499 }
1500 if (local_err) {
1501 error_propagate(errp, local_err);
1502 }
1503 return ret;
1504
1505 close_and_fail:
1506 /* See fail path, but now the BDS has to be always closed */
1507 if (*pbs) {
1508 bdrv_close(bs);
1509 } else {
1510 bdrv_unref(bs);
1511 }
1512 QDECREF(options);
1513 if (local_err) {
1514 error_propagate(errp, local_err);
1515 }
1516 return ret;
1517 }
1518
1519 typedef struct BlockReopenQueueEntry {
1520 bool prepared;
1521 BDRVReopenState state;
1522 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1523 } BlockReopenQueueEntry;
1524
1525 /*
1526 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1527 * reopen of multiple devices.
1528 *
1529 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1530 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1531 * be created and initialized. This newly created BlockReopenQueue should be
1532 * passed back in for subsequent calls that are intended to be of the same
1533 * atomic 'set'.
1534 *
1535 * bs is the BlockDriverState to add to the reopen queue.
1536 *
1537 * flags contains the open flags for the associated bs
1538 *
1539 * returns a pointer to bs_queue, which is either the newly allocated
1540 * bs_queue, or the existing bs_queue being used.
1541 *
1542 */
1543 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1544 BlockDriverState *bs, int flags)
1545 {
1546 assert(bs != NULL);
1547
1548 BlockReopenQueueEntry *bs_entry;
1549 if (bs_queue == NULL) {
1550 bs_queue = g_new0(BlockReopenQueue, 1);
1551 QSIMPLEQ_INIT(bs_queue);
1552 }
1553
1554 /* bdrv_open() masks this flag out */
1555 flags &= ~BDRV_O_PROTOCOL;
1556
1557 if (bs->file) {
1558 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
1559 }
1560
1561 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1562 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1563
1564 bs_entry->state.bs = bs;
1565 bs_entry->state.flags = flags;
1566
1567 return bs_queue;
1568 }
1569
1570 /*
1571 * Reopen multiple BlockDriverStates atomically & transactionally.
1572 *
1573 * The queue passed in (bs_queue) must have been built up previous
1574 * via bdrv_reopen_queue().
1575 *
1576 * Reopens all BDS specified in the queue, with the appropriate
1577 * flags. All devices are prepared for reopen, and failure of any
1578 * device will cause all device changes to be abandonded, and intermediate
1579 * data cleaned up.
1580 *
1581 * If all devices prepare successfully, then the changes are committed
1582 * to all devices.
1583 *
1584 */
1585 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1586 {
1587 int ret = -1;
1588 BlockReopenQueueEntry *bs_entry, *next;
1589 Error *local_err = NULL;
1590
1591 assert(bs_queue != NULL);
1592
1593 bdrv_drain_all();
1594
1595 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1596 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1597 error_propagate(errp, local_err);
1598 goto cleanup;
1599 }
1600 bs_entry->prepared = true;
1601 }
1602
1603 /* If we reach this point, we have success and just need to apply the
1604 * changes
1605 */
1606 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1607 bdrv_reopen_commit(&bs_entry->state);
1608 }
1609
1610 ret = 0;
1611
1612 cleanup:
1613 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1614 if (ret && bs_entry->prepared) {
1615 bdrv_reopen_abort(&bs_entry->state);
1616 }
1617 g_free(bs_entry);
1618 }
1619 g_free(bs_queue);
1620 return ret;
1621 }
1622
1623
1624 /* Reopen a single BlockDriverState with the specified flags. */
1625 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1626 {
1627 int ret = -1;
1628 Error *local_err = NULL;
1629 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1630
1631 ret = bdrv_reopen_multiple(queue, &local_err);
1632 if (local_err != NULL) {
1633 error_propagate(errp, local_err);
1634 }
1635 return ret;
1636 }
1637
1638
1639 /*
1640 * Prepares a BlockDriverState for reopen. All changes are staged in the
1641 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1642 * the block driver layer .bdrv_reopen_prepare()
1643 *
1644 * bs is the BlockDriverState to reopen
1645 * flags are the new open flags
1646 * queue is the reopen queue
1647 *
1648 * Returns 0 on success, non-zero on error. On error errp will be set
1649 * as well.
1650 *
1651 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1652 * It is the responsibility of the caller to then call the abort() or
1653 * commit() for any other BDS that have been left in a prepare() state
1654 *
1655 */
1656 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1657 Error **errp)
1658 {
1659 int ret = -1;
1660 Error *local_err = NULL;
1661 BlockDriver *drv;
1662
1663 assert(reopen_state != NULL);
1664 assert(reopen_state->bs->drv != NULL);
1665 drv = reopen_state->bs->drv;
1666
1667 /* if we are to stay read-only, do not allow permission change
1668 * to r/w */
1669 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1670 reopen_state->flags & BDRV_O_RDWR) {
1671 error_setg(errp, "Node '%s' is read only",
1672 bdrv_get_device_or_node_name(reopen_state->bs));
1673 goto error;
1674 }
1675
1676
1677 ret = bdrv_flush(reopen_state->bs);
1678 if (ret) {
1679 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1680 strerror(-ret));
1681 goto error;
1682 }
1683
1684 if (drv->bdrv_reopen_prepare) {
1685 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1686 if (ret) {
1687 if (local_err != NULL) {
1688 error_propagate(errp, local_err);
1689 } else {
1690 error_setg(errp, "failed while preparing to reopen image '%s'",
1691 reopen_state->bs->filename);
1692 }
1693 goto error;
1694 }
1695 } else {
1696 /* It is currently mandatory to have a bdrv_reopen_prepare()
1697 * handler for each supported drv. */
1698 error_setg(errp, "Block format '%s' used by node '%s' "
1699 "does not support reopening files", drv->format_name,
1700 bdrv_get_device_or_node_name(reopen_state->bs));
1701 ret = -1;
1702 goto error;
1703 }
1704
1705 ret = 0;
1706
1707 error:
1708 return ret;
1709 }
1710
1711 /*
1712 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1713 * makes them final by swapping the staging BlockDriverState contents into
1714 * the active BlockDriverState contents.
1715 */
1716 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1717 {
1718 BlockDriver *drv;
1719
1720 assert(reopen_state != NULL);
1721 drv = reopen_state->bs->drv;
1722 assert(drv != NULL);
1723
1724 /* If there are any driver level actions to take */
1725 if (drv->bdrv_reopen_commit) {
1726 drv->bdrv_reopen_commit(reopen_state);
1727 }
1728
1729 /* set BDS specific flags now */
1730 reopen_state->bs->open_flags = reopen_state->flags;
1731 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1732 BDRV_O_CACHE_WB);
1733 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1734
1735 bdrv_refresh_limits(reopen_state->bs, NULL);
1736 }
1737
1738 /*
1739 * Abort the reopen, and delete and free the staged changes in
1740 * reopen_state
1741 */
1742 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1743 {
1744 BlockDriver *drv;
1745
1746 assert(reopen_state != NULL);
1747 drv = reopen_state->bs->drv;
1748 assert(drv != NULL);
1749
1750 if (drv->bdrv_reopen_abort) {
1751 drv->bdrv_reopen_abort(reopen_state);
1752 }
1753 }
1754
1755
1756 void bdrv_close(BlockDriverState *bs)
1757 {
1758 BdrvAioNotifier *ban, *ban_next;
1759
1760 if (bs->job) {
1761 block_job_cancel_sync(bs->job);
1762 }
1763 bdrv_drain_all(); /* complete I/O */
1764 bdrv_flush(bs);
1765 bdrv_drain_all(); /* in case flush left pending I/O */
1766 notifier_list_notify(&bs->close_notifiers, bs);
1767
1768 if (bs->drv) {
1769 if (bs->backing_hd) {
1770 BlockDriverState *backing_hd = bs->backing_hd;
1771 bdrv_set_backing_hd(bs, NULL);
1772 bdrv_unref(backing_hd);
1773 }
1774 bs->drv->bdrv_close(bs);
1775 g_free(bs->opaque);
1776 bs->opaque = NULL;
1777 bs->drv = NULL;
1778 bs->copy_on_read = 0;
1779 bs->backing_file[0] = '\0';
1780 bs->backing_format[0] = '\0';
1781 bs->total_sectors = 0;
1782 bs->encrypted = 0;
1783 bs->valid_key = 0;
1784 bs->sg = 0;
1785 bs->zero_beyond_eof = false;
1786 QDECREF(bs->options);
1787 bs->options = NULL;
1788 QDECREF(bs->full_open_options);
1789 bs->full_open_options = NULL;
1790
1791 if (bs->file != NULL) {
1792 bdrv_unref(bs->file);
1793 bs->file = NULL;
1794 }
1795 }
1796
1797 if (bs->blk) {
1798 blk_dev_change_media_cb(bs->blk, false);
1799 }
1800
1801 /*throttling disk I/O limits*/
1802 if (bs->io_limits_enabled) {
1803 bdrv_io_limits_disable(bs);
1804 }
1805
1806 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1807 g_free(ban);
1808 }
1809 QLIST_INIT(&bs->aio_notifiers);
1810 }
1811
1812 void bdrv_close_all(void)
1813 {
1814 BlockDriverState *bs;
1815
1816 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1817 AioContext *aio_context = bdrv_get_aio_context(bs);
1818
1819 aio_context_acquire(aio_context);
1820 bdrv_close(bs);
1821 aio_context_release(aio_context);
1822 }
1823 }
1824
1825 /* make a BlockDriverState anonymous by removing from bdrv_state and
1826 * graph_bdrv_state list.
1827 Also, NULL terminate the device_name to prevent double remove */
1828 void bdrv_make_anon(BlockDriverState *bs)
1829 {
1830 /*
1831 * Take care to remove bs from bdrv_states only when it's actually
1832 * in it. Note that bs->device_list.tqe_prev is initially null,
1833 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
1834 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1835 * resetting it to null on remove.
1836 */
1837 if (bs->device_list.tqe_prev) {
1838 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1839 bs->device_list.tqe_prev = NULL;
1840 }
1841 if (bs->node_name[0] != '\0') {
1842 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1843 }
1844 bs->node_name[0] = '\0';
1845 }
1846
1847 static void bdrv_rebind(BlockDriverState *bs)
1848 {
1849 if (bs->drv && bs->drv->bdrv_rebind) {
1850 bs->drv->bdrv_rebind(bs);
1851 }
1852 }
1853
1854 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1855 BlockDriverState *bs_src)
1856 {
1857 /* move some fields that need to stay attached to the device */
1858
1859 /* dev info */
1860 bs_dest->guest_block_size = bs_src->guest_block_size;
1861 bs_dest->copy_on_read = bs_src->copy_on_read;
1862
1863 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1864
1865 /* i/o throttled req */
1866 memcpy(&bs_dest->throttle_state,
1867 &bs_src->throttle_state,
1868 sizeof(ThrottleState));
1869 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1870 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
1871 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
1872
1873 /* r/w error */
1874 bs_dest->on_read_error = bs_src->on_read_error;
1875 bs_dest->on_write_error = bs_src->on_write_error;
1876
1877 /* i/o status */
1878 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
1879 bs_dest->iostatus = bs_src->iostatus;
1880
1881 /* dirty bitmap */
1882 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
1883
1884 /* reference count */
1885 bs_dest->refcnt = bs_src->refcnt;
1886
1887 /* job */
1888 bs_dest->job = bs_src->job;
1889
1890 /* keep the same entry in bdrv_states */
1891 bs_dest->device_list = bs_src->device_list;
1892 bs_dest->blk = bs_src->blk;
1893
1894 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
1895 sizeof(bs_dest->op_blockers));
1896 }
1897
1898 /*
1899 * Swap bs contents for two image chains while they are live,
1900 * while keeping required fields on the BlockDriverState that is
1901 * actually attached to a device.
1902 *
1903 * This will modify the BlockDriverState fields, and swap contents
1904 * between bs_new and bs_old. Both bs_new and bs_old are modified.
1905 *
1906 * bs_new must not be attached to a BlockBackend.
1907 *
1908 * This function does not create any image files.
1909 */
1910 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1911 {
1912 BlockDriverState tmp;
1913
1914 /* The code needs to swap the node_name but simply swapping node_list won't
1915 * work so first remove the nodes from the graph list, do the swap then
1916 * insert them back if needed.
1917 */
1918 if (bs_new->node_name[0] != '\0') {
1919 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
1920 }
1921 if (bs_old->node_name[0] != '\0') {
1922 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
1923 }
1924
1925 /* bs_new must be unattached and shouldn't have anything fancy enabled */
1926 assert(!bs_new->blk);
1927 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
1928 assert(bs_new->job == NULL);
1929 assert(bs_new->io_limits_enabled == false);
1930 assert(!throttle_have_timer(&bs_new->throttle_state));
1931
1932 tmp = *bs_new;
1933 *bs_new = *bs_old;
1934 *bs_old = tmp;
1935
1936 /* there are some fields that should not be swapped, move them back */
1937 bdrv_move_feature_fields(&tmp, bs_old);
1938 bdrv_move_feature_fields(bs_old, bs_new);
1939 bdrv_move_feature_fields(bs_new, &tmp);
1940
1941 /* bs_new must remain unattached */
1942 assert(!bs_new->blk);
1943
1944 /* Check a few fields that should remain attached to the device */
1945 assert(bs_new->job == NULL);
1946 assert(bs_new->io_limits_enabled == false);
1947 assert(!throttle_have_timer(&bs_new->throttle_state));
1948
1949 /* insert the nodes back into the graph node list if needed */
1950 if (bs_new->node_name[0] != '\0') {
1951 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
1952 }
1953 if (bs_old->node_name[0] != '\0') {
1954 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
1955 }
1956
1957 bdrv_rebind(bs_new);
1958 bdrv_rebind(bs_old);
1959 }
1960
1961 /*
1962 * Add new bs contents at the top of an image chain while the chain is
1963 * live, while keeping required fields on the top layer.
1964 *
1965 * This will modify the BlockDriverState fields, and swap contents
1966 * between bs_new and bs_top. Both bs_new and bs_top are modified.
1967 *
1968 * bs_new must not be attached to a BlockBackend.
1969 *
1970 * This function does not create any image files.
1971 */
1972 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
1973 {
1974 bdrv_swap(bs_new, bs_top);
1975
1976 /* The contents of 'tmp' will become bs_top, as we are
1977 * swapping bs_new and bs_top contents. */
1978 bdrv_set_backing_hd(bs_top, bs_new);
1979 }
1980
1981 static void bdrv_delete(BlockDriverState *bs)
1982 {
1983 assert(!bs->job);
1984 assert(bdrv_op_blocker_is_empty(bs));
1985 assert(!bs->refcnt);
1986 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
1987
1988 bdrv_close(bs);
1989
1990 /* remove from list, if necessary */
1991 bdrv_make_anon(bs);
1992
1993 g_free(bs);
1994 }
1995
1996 /*
1997 * Run consistency checks on an image
1998 *
1999 * Returns 0 if the check could be completed (it doesn't mean that the image is
2000 * free of errors) or -errno when an internal error occurred. The results of the
2001 * check are stored in res.
2002 */
2003 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2004 {
2005 if (bs->drv == NULL) {
2006 return -ENOMEDIUM;
2007 }
2008 if (bs->drv->bdrv_check == NULL) {
2009 return -ENOTSUP;
2010 }
2011
2012 memset(res, 0, sizeof(*res));
2013 return bs->drv->bdrv_check(bs, res, fix);
2014 }
2015
2016 #define COMMIT_BUF_SECTORS 2048
2017
2018 /* commit COW file into the raw image */
2019 int bdrv_commit(BlockDriverState *bs)
2020 {
2021 BlockDriver *drv = bs->drv;
2022 int64_t sector, total_sectors, length, backing_length;
2023 int n, ro, open_flags;
2024 int ret = 0;
2025 uint8_t *buf = NULL;
2026
2027 if (!drv)
2028 return -ENOMEDIUM;
2029
2030 if (!bs->backing_hd) {
2031 return -ENOTSUP;
2032 }
2033
2034 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2035 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2036 return -EBUSY;
2037 }
2038
2039 ro = bs->backing_hd->read_only;
2040 open_flags = bs->backing_hd->open_flags;
2041
2042 if (ro) {
2043 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2044 return -EACCES;
2045 }
2046 }
2047
2048 length = bdrv_getlength(bs);
2049 if (length < 0) {
2050 ret = length;
2051 goto ro_cleanup;
2052 }
2053
2054 backing_length = bdrv_getlength(bs->backing_hd);
2055 if (backing_length < 0) {
2056 ret = backing_length;
2057 goto ro_cleanup;
2058 }
2059
2060 /* If our top snapshot is larger than the backing file image,
2061 * grow the backing file image if possible. If not possible,
2062 * we must return an error */
2063 if (length > backing_length) {
2064 ret = bdrv_truncate(bs->backing_hd, length);
2065 if (ret < 0) {
2066 goto ro_cleanup;
2067 }
2068 }
2069
2070 total_sectors = length >> BDRV_SECTOR_BITS;
2071
2072 /* qemu_try_blockalign() for bs will choose an alignment that works for
2073 * bs->backing_hd as well, so no need to compare the alignment manually. */
2074 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2075 if (buf == NULL) {
2076 ret = -ENOMEM;
2077 goto ro_cleanup;
2078 }
2079
2080 for (sector = 0; sector < total_sectors; sector += n) {
2081 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2082 if (ret < 0) {
2083 goto ro_cleanup;
2084 }
2085 if (ret) {
2086 ret = bdrv_read(bs, sector, buf, n);
2087 if (ret < 0) {
2088 goto ro_cleanup;
2089 }
2090
2091 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2092 if (ret < 0) {
2093 goto ro_cleanup;
2094 }
2095 }
2096 }
2097
2098 if (drv->bdrv_make_empty) {
2099 ret = drv->bdrv_make_empty(bs);
2100 if (ret < 0) {
2101 goto ro_cleanup;
2102 }
2103 bdrv_flush(bs);
2104 }
2105
2106 /*
2107 * Make sure all data we wrote to the backing device is actually
2108 * stable on disk.
2109 */
2110 if (bs->backing_hd) {
2111 bdrv_flush(bs->backing_hd);
2112 }
2113
2114 ret = 0;
2115 ro_cleanup:
2116 qemu_vfree(buf);
2117
2118 if (ro) {
2119 /* ignoring error return here */
2120 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
2121 }
2122
2123 return ret;
2124 }
2125
2126 int bdrv_commit_all(void)
2127 {
2128 BlockDriverState *bs;
2129
2130 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2131 AioContext *aio_context = bdrv_get_aio_context(bs);
2132
2133 aio_context_acquire(aio_context);
2134 if (bs->drv && bs->backing_hd) {
2135 int ret = bdrv_commit(bs);
2136 if (ret < 0) {
2137 aio_context_release(aio_context);
2138 return ret;
2139 }
2140 }
2141 aio_context_release(aio_context);
2142 }
2143 return 0;
2144 }
2145
2146 /*
2147 * Return values:
2148 * 0 - success
2149 * -EINVAL - backing format specified, but no file
2150 * -ENOSPC - can't update the backing file because no space is left in the
2151 * image file header
2152 * -ENOTSUP - format driver doesn't support changing the backing file
2153 */
2154 int bdrv_change_backing_file(BlockDriverState *bs,
2155 const char *backing_file, const char *backing_fmt)
2156 {
2157 BlockDriver *drv = bs->drv;
2158 int ret;
2159
2160 /* Backing file format doesn't make sense without a backing file */
2161 if (backing_fmt && !backing_file) {
2162 return -EINVAL;
2163 }
2164
2165 if (drv->bdrv_change_backing_file != NULL) {
2166 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2167 } else {
2168 ret = -ENOTSUP;
2169 }
2170
2171 if (ret == 0) {
2172 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2173 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2174 }
2175 return ret;
2176 }
2177
2178 /*
2179 * Finds the image layer in the chain that has 'bs' as its backing file.
2180 *
2181 * active is the current topmost image.
2182 *
2183 * Returns NULL if bs is not found in active's image chain,
2184 * or if active == bs.
2185 *
2186 * Returns the bottommost base image if bs == NULL.
2187 */
2188 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2189 BlockDriverState *bs)
2190 {
2191 while (active && bs != active->backing_hd) {
2192 active = active->backing_hd;
2193 }
2194
2195 return active;
2196 }
2197
2198 /* Given a BDS, searches for the base layer. */
2199 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2200 {
2201 return bdrv_find_overlay(bs, NULL);
2202 }
2203
2204 typedef struct BlkIntermediateStates {
2205 BlockDriverState *bs;
2206 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2207 } BlkIntermediateStates;
2208
2209
2210 /*
2211 * Drops images above 'base' up to and including 'top', and sets the image
2212 * above 'top' to have base as its backing file.
2213 *
2214 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2215 * information in 'bs' can be properly updated.
2216 *
2217 * E.g., this will convert the following chain:
2218 * bottom <- base <- intermediate <- top <- active
2219 *
2220 * to
2221 *
2222 * bottom <- base <- active
2223 *
2224 * It is allowed for bottom==base, in which case it converts:
2225 *
2226 * base <- intermediate <- top <- active
2227 *
2228 * to
2229 *
2230 * base <- active
2231 *
2232 * If backing_file_str is non-NULL, it will be used when modifying top's
2233 * overlay image metadata.
2234 *
2235 * Error conditions:
2236 * if active == top, that is considered an error
2237 *
2238 */
2239 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2240 BlockDriverState *base, const char *backing_file_str)
2241 {
2242 BlockDriverState *intermediate;
2243 BlockDriverState *base_bs = NULL;
2244 BlockDriverState *new_top_bs = NULL;
2245 BlkIntermediateStates *intermediate_state, *next;
2246 int ret = -EIO;
2247
2248 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2249 QSIMPLEQ_INIT(&states_to_delete);
2250
2251 if (!top->drv || !base->drv) {
2252 goto exit;
2253 }
2254
2255 new_top_bs = bdrv_find_overlay(active, top);
2256
2257 if (new_top_bs == NULL) {
2258 /* we could not find the image above 'top', this is an error */
2259 goto exit;
2260 }
2261
2262 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2263 * to do, no intermediate images */
2264 if (new_top_bs->backing_hd == base) {
2265 ret = 0;
2266 goto exit;
2267 }
2268
2269 intermediate = top;
2270
2271 /* now we will go down through the list, and add each BDS we find
2272 * into our deletion queue, until we hit the 'base'
2273 */
2274 while (intermediate) {
2275 intermediate_state = g_new0(BlkIntermediateStates, 1);
2276 intermediate_state->bs = intermediate;
2277 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2278
2279 if (intermediate->backing_hd == base) {
2280 base_bs = intermediate->backing_hd;
2281 break;
2282 }
2283 intermediate = intermediate->backing_hd;
2284 }
2285 if (base_bs == NULL) {
2286 /* something went wrong, we did not end at the base. safely
2287 * unravel everything, and exit with error */
2288 goto exit;
2289 }
2290
2291 /* success - we can delete the intermediate states, and link top->base */
2292 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2293 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2294 base_bs->drv ? base_bs->drv->format_name : "");
2295 if (ret) {
2296 goto exit;
2297 }
2298 bdrv_set_backing_hd(new_top_bs, base_bs);
2299
2300 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2301 /* so that bdrv_close() does not recursively close the chain */
2302 bdrv_set_backing_hd(intermediate_state->bs, NULL);
2303 bdrv_unref(intermediate_state->bs);
2304 }
2305 ret = 0;
2306
2307 exit:
2308 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2309 g_free(intermediate_state);
2310 }
2311 return ret;
2312 }
2313
2314 /**
2315 * Truncate file to 'offset' bytes (needed only for file protocols)
2316 */
2317 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2318 {
2319 BlockDriver *drv = bs->drv;
2320 int ret;
2321 if (!drv)
2322 return -ENOMEDIUM;
2323 if (!drv->bdrv_truncate)
2324 return -ENOTSUP;
2325 if (bs->read_only)
2326 return -EACCES;
2327
2328 ret = drv->bdrv_truncate(bs, offset);
2329 if (ret == 0) {
2330 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2331 bdrv_dirty_bitmap_truncate(bs);
2332 if (bs->blk) {
2333 blk_dev_resize_cb(bs->blk);
2334 }
2335 }
2336 return ret;
2337 }
2338
2339 /**
2340 * Length of a allocated file in bytes. Sparse files are counted by actual
2341 * allocated space. Return < 0 if error or unknown.
2342 */
2343 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2344 {
2345 BlockDriver *drv = bs->drv;
2346 if (!drv) {
2347 return -ENOMEDIUM;
2348 }
2349 if (drv->bdrv_get_allocated_file_size) {
2350 return drv->bdrv_get_allocated_file_size(bs);
2351 }
2352 if (bs->file) {
2353 return bdrv_get_allocated_file_size(bs->file);
2354 }
2355 return -ENOTSUP;
2356 }
2357
2358 /**
2359 * Return number of sectors on success, -errno on error.
2360 */
2361 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2362 {
2363 BlockDriver *drv = bs->drv;
2364
2365 if (!drv)
2366 return -ENOMEDIUM;
2367
2368 if (drv->has_variable_length) {
2369 int ret = refresh_total_sectors(bs, bs->total_sectors);
2370 if (ret < 0) {
2371 return ret;
2372 }
2373 }
2374 return bs->total_sectors;
2375 }
2376
2377 /**
2378 * Return length in bytes on success, -errno on error.
2379 * The length is always a multiple of BDRV_SECTOR_SIZE.
2380 */
2381 int64_t bdrv_getlength(BlockDriverState *bs)
2382 {
2383 int64_t ret = bdrv_nb_sectors(bs);
2384
2385 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2386 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2387 }
2388
2389 /* return 0 as number of sectors if no device present or error */
2390 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2391 {
2392 int64_t nb_sectors = bdrv_nb_sectors(bs);
2393
2394 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2395 }
2396
2397 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2398 BlockdevOnError on_write_error)
2399 {
2400 bs->on_read_error = on_read_error;
2401 bs->on_write_error = on_write_error;
2402 }
2403
2404 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
2405 {
2406 return is_read ? bs->on_read_error : bs->on_write_error;
2407 }
2408
2409 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
2410 {
2411 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
2412
2413 switch (on_err) {
2414 case BLOCKDEV_ON_ERROR_ENOSPC:
2415 return (error == ENOSPC) ?
2416 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
2417 case BLOCKDEV_ON_ERROR_STOP:
2418 return BLOCK_ERROR_ACTION_STOP;
2419 case BLOCKDEV_ON_ERROR_REPORT:
2420 return BLOCK_ERROR_ACTION_REPORT;
2421 case BLOCKDEV_ON_ERROR_IGNORE:
2422 return BLOCK_ERROR_ACTION_IGNORE;
2423 default:
2424 abort();
2425 }
2426 }
2427
2428 static void send_qmp_error_event(BlockDriverState *bs,
2429 BlockErrorAction action,
2430 bool is_read, int error)
2431 {
2432 IoOperationType optype;
2433
2434 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
2435 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
2436 bdrv_iostatus_is_enabled(bs),
2437 error == ENOSPC, strerror(error),
2438 &error_abort);
2439 }
2440
2441 /* This is done by device models because, while the block layer knows
2442 * about the error, it does not know whether an operation comes from
2443 * the device or the block layer (from a job, for example).
2444 */
2445 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
2446 bool is_read, int error)
2447 {
2448 assert(error >= 0);
2449
2450 if (action == BLOCK_ERROR_ACTION_STOP) {
2451 /* First set the iostatus, so that "info block" returns an iostatus
2452 * that matches the events raised so far (an additional error iostatus
2453 * is fine, but not a lost one).
2454 */
2455 bdrv_iostatus_set_err(bs, error);
2456
2457 /* Then raise the request to stop the VM and the event.
2458 * qemu_system_vmstop_request_prepare has two effects. First,
2459 * it ensures that the STOP event always comes after the
2460 * BLOCK_IO_ERROR event. Second, it ensures that even if management
2461 * can observe the STOP event and do a "cont" before the STOP
2462 * event is issued, the VM will not stop. In this case, vm_start()
2463 * also ensures that the STOP/RESUME pair of events is emitted.
2464 */
2465 qemu_system_vmstop_request_prepare();
2466 send_qmp_error_event(bs, action, is_read, error);
2467 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
2468 } else {
2469 send_qmp_error_event(bs, action, is_read, error);
2470 }
2471 }
2472
2473 int bdrv_is_read_only(BlockDriverState *bs)
2474 {
2475 return bs->read_only;
2476 }
2477
2478 int bdrv_is_sg(BlockDriverState *bs)
2479 {
2480 return bs->sg;
2481 }
2482
2483 int bdrv_enable_write_cache(BlockDriverState *bs)
2484 {
2485 return bs->enable_write_cache;
2486 }
2487
2488 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2489 {
2490 bs->enable_write_cache = wce;
2491
2492 /* so a reopen() will preserve wce */
2493 if (wce) {
2494 bs->open_flags |= BDRV_O_CACHE_WB;
2495 } else {
2496 bs->open_flags &= ~BDRV_O_CACHE_WB;
2497 }
2498 }
2499
2500 int bdrv_is_encrypted(BlockDriverState *bs)
2501 {
2502 if (bs->backing_hd && bs->backing_hd->encrypted)
2503 return 1;
2504 return bs->encrypted;
2505 }
2506
2507 int bdrv_key_required(BlockDriverState *bs)
2508 {
2509 BlockDriverState *backing_hd = bs->backing_hd;
2510
2511 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2512 return 1;
2513 return (bs->encrypted && !bs->valid_key);
2514 }
2515
2516 int bdrv_set_key(BlockDriverState *bs, const char *key)
2517 {
2518 int ret;
2519 if (bs->backing_hd && bs->backing_hd->encrypted) {
2520 ret = bdrv_set_key(bs->backing_hd, key);
2521 if (ret < 0)
2522 return ret;
2523 if (!bs->encrypted)
2524 return 0;
2525 }
2526 if (!bs->encrypted) {
2527 return -EINVAL;
2528 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2529 return -ENOMEDIUM;
2530 }
2531 ret = bs->drv->bdrv_set_key(bs, key);
2532 if (ret < 0) {
2533 bs->valid_key = 0;
2534 } else if (!bs->valid_key) {
2535 bs->valid_key = 1;
2536 if (bs->blk) {
2537 /* call the change callback now, we skipped it on open */
2538 blk_dev_change_media_cb(bs->blk, true);
2539 }
2540 }
2541 return ret;
2542 }
2543
2544 /*
2545 * Provide an encryption key for @bs.
2546 * If @key is non-null:
2547 * If @bs is not encrypted, fail.
2548 * Else if the key is invalid, fail.
2549 * Else set @bs's key to @key, replacing the existing key, if any.
2550 * If @key is null:
2551 * If @bs is encrypted and still lacks a key, fail.
2552 * Else do nothing.
2553 * On failure, store an error object through @errp if non-null.
2554 */
2555 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2556 {
2557 if (key) {
2558 if (!bdrv_is_encrypted(bs)) {
2559 error_setg(errp, "Node '%s' is not encrypted",
2560 bdrv_get_device_or_node_name(bs));
2561 } else if (bdrv_set_key(bs, key) < 0) {
2562 error_set(errp, QERR_INVALID_PASSWORD);
2563 }
2564 } else {
2565 if (bdrv_key_required(bs)) {
2566 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2567 "'%s' (%s) is encrypted",
2568 bdrv_get_device_or_node_name(bs),
2569 bdrv_get_encrypted_filename(bs));
2570 }
2571 }
2572 }
2573
2574 const char *bdrv_get_format_name(BlockDriverState *bs)
2575 {
2576 return bs->drv ? bs->drv->format_name : NULL;
2577 }
2578
2579 static int qsort_strcmp(const void *a, const void *b)
2580 {
2581 return strcmp(a, b);
2582 }
2583
2584 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2585 void *opaque)
2586 {
2587 BlockDriver *drv;
2588 int count = 0;
2589 int i;
2590 const char **formats = NULL;
2591
2592 QLIST_FOREACH(drv, &bdrv_drivers, list) {
2593 if (drv->format_name) {
2594 bool found = false;
2595 int i = count;
2596 while (formats && i && !found) {
2597 found = !strcmp(formats[--i], drv->format_name);
2598 }
2599
2600 if (!found) {
2601 formats = g_renew(const char *, formats, count + 1);
2602 formats[count++] = drv->format_name;
2603 }
2604 }
2605 }
2606
2607 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2608
2609 for (i = 0; i < count; i++) {
2610 it(opaque, formats[i]);
2611 }
2612
2613 g_free(formats);
2614 }
2615
2616 /* This function is to find a node in the bs graph */
2617 BlockDriverState *bdrv_find_node(const char *node_name)
2618 {
2619 BlockDriverState *bs;
2620
2621 assert(node_name);
2622
2623 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2624 if (!strcmp(node_name, bs->node_name)) {
2625 return bs;
2626 }
2627 }
2628 return NULL;
2629 }
2630
2631 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2632 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2633 {
2634 BlockDeviceInfoList *list, *entry;
2635 BlockDriverState *bs;
2636
2637 list = NULL;
2638 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2639 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2640 if (!info) {
2641 qapi_free_BlockDeviceInfoList(list);
2642 return NULL;
2643 }
2644 entry = g_malloc0(sizeof(*entry));
2645 entry->value = info;
2646 entry->next = list;
2647 list = entry;
2648 }
2649
2650 return list;
2651 }
2652
2653 BlockDriverState *bdrv_lookup_bs(const char *device,
2654 const char *node_name,
2655 Error **errp)
2656 {
2657 BlockBackend *blk;
2658 BlockDriverState *bs;
2659
2660 if (device) {
2661 blk = blk_by_name(device);
2662
2663 if (blk) {
2664 return blk_bs(blk);
2665 }
2666 }
2667
2668 if (node_name) {
2669 bs = bdrv_find_node(node_name);
2670
2671 if (bs) {
2672 return bs;
2673 }
2674 }
2675
2676 error_setg(errp, "Cannot find device=%s nor node_name=%s",
2677 device ? device : "",
2678 node_name ? node_name : "");
2679 return NULL;
2680 }
2681
2682 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2683 * return false. If either argument is NULL, return false. */
2684 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2685 {
2686 while (top && top != base) {
2687 top = top->backing_hd;
2688 }
2689
2690 return top != NULL;
2691 }
2692
2693 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2694 {
2695 if (!bs) {
2696 return QTAILQ_FIRST(&graph_bdrv_states);
2697 }
2698 return QTAILQ_NEXT(bs, node_list);
2699 }
2700
2701 BlockDriverState *bdrv_next(BlockDriverState *bs)
2702 {
2703 if (!bs) {
2704 return QTAILQ_FIRST(&bdrv_states);
2705 }
2706 return QTAILQ_NEXT(bs, device_list);
2707 }
2708
2709 const char *bdrv_get_node_name(const BlockDriverState *bs)
2710 {
2711 return bs->node_name;
2712 }
2713
2714 /* TODO check what callers really want: bs->node_name or blk_name() */
2715 const char *bdrv_get_device_name(const BlockDriverState *bs)
2716 {
2717 return bs->blk ? blk_name(bs->blk) : "";
2718 }
2719
2720 /* This can be used to identify nodes that might not have a device
2721 * name associated. Since node and device names live in the same
2722 * namespace, the result is unambiguous. The exception is if both are
2723 * absent, then this returns an empty (non-null) string. */
2724 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2725 {
2726 return bs->blk ? blk_name(bs->blk) : bs->node_name;
2727 }
2728
2729 int bdrv_get_flags(BlockDriverState *bs)
2730 {
2731 return bs->open_flags;
2732 }
2733
2734 int bdrv_has_zero_init_1(BlockDriverState *bs)
2735 {
2736 return 1;
2737 }
2738
2739 int bdrv_has_zero_init(BlockDriverState *bs)
2740 {
2741 assert(bs->drv);
2742
2743 /* If BS is a copy on write image, it is initialized to
2744 the contents of the base image, which may not be zeroes. */
2745 if (bs->backing_hd) {
2746 return 0;
2747 }
2748 if (bs->drv->bdrv_has_zero_init) {
2749 return bs->drv->bdrv_has_zero_init(bs);
2750 }
2751
2752 /* safe default */
2753 return 0;
2754 }
2755
2756 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2757 {
2758 BlockDriverInfo bdi;
2759
2760 if (bs->backing_hd) {
2761 return false;
2762 }
2763
2764 if (bdrv_get_info(bs, &bdi) == 0) {
2765 return bdi.unallocated_blocks_are_zero;
2766 }
2767
2768 return false;
2769 }
2770
2771 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2772 {
2773 BlockDriverInfo bdi;
2774
2775 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
2776 return false;
2777 }
2778
2779 if (bdrv_get_info(bs, &bdi) == 0) {
2780 return bdi.can_write_zeroes_with_unmap;
2781 }
2782
2783 return false;
2784 }
2785
2786 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2787 {
2788 if (bs->backing_hd && bs->backing_hd->encrypted)
2789 return bs->backing_file;
2790 else if (bs->encrypted)
2791 return bs->filename;
2792 else
2793 return NULL;
2794 }
2795
2796 void bdrv_get_backing_filename(BlockDriverState *bs,
2797 char *filename, int filename_size)
2798 {
2799 pstrcpy(filename, filename_size, bs->backing_file);
2800 }
2801
2802 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2803 {
2804 BlockDriver *drv = bs->drv;
2805 if (!drv)
2806 return -ENOMEDIUM;
2807 if (!drv->bdrv_get_info)
2808 return -ENOTSUP;
2809 memset(bdi, 0, sizeof(*bdi));
2810 return drv->bdrv_get_info(bs, bdi);
2811 }
2812
2813 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2814 {
2815 BlockDriver *drv = bs->drv;
2816 if (drv && drv->bdrv_get_specific_info) {
2817 return drv->bdrv_get_specific_info(bs);
2818 }
2819 return NULL;
2820 }
2821
2822 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2823 {
2824 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
2825 return;
2826 }
2827
2828 bs->drv->bdrv_debug_event(bs, event);
2829 }
2830
2831 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
2832 const char *tag)
2833 {
2834 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
2835 bs = bs->file;
2836 }
2837
2838 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
2839 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
2840 }
2841
2842 return -ENOTSUP;
2843 }
2844
2845 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
2846 {
2847 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
2848 bs = bs->file;
2849 }
2850
2851 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
2852 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
2853 }
2854
2855 return -ENOTSUP;
2856 }
2857
2858 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
2859 {
2860 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
2861 bs = bs->file;
2862 }
2863
2864 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
2865 return bs->drv->bdrv_debug_resume(bs, tag);
2866 }
2867
2868 return -ENOTSUP;
2869 }
2870
2871 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
2872 {
2873 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
2874 bs = bs->file;
2875 }
2876
2877 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
2878 return bs->drv->bdrv_debug_is_suspended(bs, tag);
2879 }
2880
2881 return false;
2882 }
2883
2884 int bdrv_is_snapshot(BlockDriverState *bs)
2885 {
2886 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2887 }
2888
2889 /* backing_file can either be relative, or absolute, or a protocol. If it is
2890 * relative, it must be relative to the chain. So, passing in bs->filename
2891 * from a BDS as backing_file should not be done, as that may be relative to
2892 * the CWD rather than the chain. */
2893 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2894 const char *backing_file)
2895 {
2896 char *filename_full = NULL;
2897 char *backing_file_full = NULL;
2898 char *filename_tmp = NULL;
2899 int is_protocol = 0;
2900 BlockDriverState *curr_bs = NULL;
2901 BlockDriverState *retval = NULL;
2902
2903 if (!bs || !bs->drv || !backing_file) {
2904 return NULL;
2905 }
2906
2907 filename_full = g_malloc(PATH_MAX);
2908 backing_file_full = g_malloc(PATH_MAX);
2909 filename_tmp = g_malloc(PATH_MAX);
2910
2911 is_protocol = path_has_protocol(backing_file);
2912
2913 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
2914
2915 /* If either of the filename paths is actually a protocol, then
2916 * compare unmodified paths; otherwise make paths relative */
2917 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
2918 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
2919 retval = curr_bs->backing_hd;
2920 break;
2921 }
2922 } else {
2923 /* If not an absolute filename path, make it relative to the current
2924 * image's filename path */
2925 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2926 backing_file);
2927
2928 /* We are going to compare absolute pathnames */
2929 if (!realpath(filename_tmp, filename_full)) {
2930 continue;
2931 }
2932
2933 /* We need to make sure the backing filename we are comparing against
2934 * is relative to the current image filename (or absolute) */
2935 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2936 curr_bs->backing_file);
2937
2938 if (!realpath(filename_tmp, backing_file_full)) {
2939 continue;
2940 }
2941
2942 if (strcmp(backing_file_full, filename_full) == 0) {
2943 retval = curr_bs->backing_hd;
2944 break;
2945 }
2946 }
2947 }
2948
2949 g_free(filename_full);
2950 g_free(backing_file_full);
2951 g_free(filename_tmp);
2952 return retval;
2953 }
2954
2955 int bdrv_get_backing_file_depth(BlockDriverState *bs)
2956 {
2957 if (!bs->drv) {
2958 return 0;
2959 }
2960
2961 if (!bs->backing_hd) {
2962 return 0;
2963 }
2964
2965 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
2966 }
2967
2968 void bdrv_init(void)
2969 {
2970 module_call_init(MODULE_INIT_BLOCK);
2971 }
2972
2973 void bdrv_init_with_whitelist(void)
2974 {
2975 use_bdrv_whitelist = 1;
2976 bdrv_init();
2977 }
2978
2979 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
2980 {
2981 Error *local_err = NULL;
2982 int ret;
2983
2984 if (!bs->drv) {
2985 return;
2986 }
2987
2988 if (!(bs->open_flags & BDRV_O_INCOMING)) {
2989 return;
2990 }
2991 bs->open_flags &= ~BDRV_O_INCOMING;
2992
2993 if (bs->drv->bdrv_invalidate_cache) {
2994 bs->drv->bdrv_invalidate_cache(bs, &local_err);
2995 } else if (bs->file) {
2996 bdrv_invalidate_cache(bs->file, &local_err);
2997 }
2998 if (local_err) {
2999 error_propagate(errp, local_err);
3000 return;
3001 }
3002
3003 ret = refresh_total_sectors(bs, bs->total_sectors);
3004 if (ret < 0) {
3005 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3006 return;
3007 }
3008 }
3009
3010 void bdrv_invalidate_cache_all(Error **errp)
3011 {
3012 BlockDriverState *bs;
3013 Error *local_err = NULL;
3014
3015 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3016 AioContext *aio_context = bdrv_get_aio_context(bs);
3017
3018 aio_context_acquire(aio_context);
3019 bdrv_invalidate_cache(bs, &local_err);
3020 aio_context_release(aio_context);
3021 if (local_err) {
3022 error_propagate(errp, local_err);
3023 return;
3024 }
3025 }
3026 }
3027
3028 /**************************************************************/
3029 /* removable device support */
3030
3031 /**
3032 * Return TRUE if the media is present
3033 */
3034 int bdrv_is_inserted(BlockDriverState *bs)
3035 {
3036 BlockDriver *drv = bs->drv;
3037
3038 if (!drv)
3039 return 0;
3040 if (!drv->bdrv_is_inserted)
3041 return 1;
3042 return drv->bdrv_is_inserted(bs);
3043 }
3044
3045 /**
3046 * Return whether the media changed since the last call to this
3047 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3048 */
3049 int bdrv_media_changed(BlockDriverState *bs)
3050 {
3051 BlockDriver *drv = bs->drv;
3052
3053 if (drv && drv->bdrv_media_changed) {
3054 return drv->bdrv_media_changed(bs);
3055 }
3056 return -ENOTSUP;
3057 }
3058
3059 /**
3060 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3061 */
3062 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3063 {
3064 BlockDriver *drv = bs->drv;
3065 const char *device_name;
3066
3067 if (drv && drv->bdrv_eject) {
3068 drv->bdrv_eject(bs, eject_flag);
3069 }
3070
3071 device_name = bdrv_get_device_name(bs);
3072 if (device_name[0] != '\0') {
3073 qapi_event_send_device_tray_moved(device_name,
3074 eject_flag, &error_abort);
3075 }
3076 }
3077
3078 /**
3079 * Lock or unlock the media (if it is locked, the user won't be able
3080 * to eject it manually).
3081 */
3082 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3083 {
3084 BlockDriver *drv = bs->drv;
3085
3086 trace_bdrv_lock_medium(bs, locked);
3087
3088 if (drv && drv->bdrv_lock_medium) {
3089 drv->bdrv_lock_medium(bs, locked);
3090 }
3091 }
3092
3093 void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
3094 {
3095 bs->guest_block_size = align;
3096 }
3097
3098 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3099 {
3100 BdrvDirtyBitmap *bm;
3101
3102 assert(name);
3103 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3104 if (bm->name && !strcmp(name, bm->name)) {
3105 return bm;
3106 }
3107 }
3108 return NULL;
3109 }
3110
3111 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3112 {
3113 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3114 g_free(bitmap->name);
3115 bitmap->name = NULL;
3116 }
3117
3118 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3119 uint32_t granularity,
3120 const char *name,
3121 Error **errp)
3122 {
3123 int64_t bitmap_size;
3124 BdrvDirtyBitmap *bitmap;
3125 uint32_t sector_granularity;
3126
3127 assert((granularity & (granularity - 1)) == 0);
3128
3129 if (name && bdrv_find_dirty_bitmap(bs, name)) {
3130 error_setg(errp, "Bitmap already exists: %s", name);
3131 return NULL;
3132 }
3133 sector_granularity = granularity >> BDRV_SECTOR_BITS;
3134 assert(sector_granularity);
3135 bitmap_size = bdrv_nb_sectors(bs);
3136 if (bitmap_size < 0) {
3137 error_setg_errno(errp, -bitmap_size, "could not get length of device");
3138 errno = -bitmap_size;
3139 return NULL;
3140 }
3141 bitmap = g_new0(BdrvDirtyBitmap, 1);
3142 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3143 bitmap->size = bitmap_size;
3144 bitmap->name = g_strdup(name);
3145 bitmap->disabled = false;
3146 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3147 return bitmap;
3148 }
3149
3150 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3151 {
3152 return bitmap->successor;
3153 }
3154
3155 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3156 {
3157 return !(bitmap->disabled || bitmap->successor);
3158 }
3159
3160 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3161 {
3162 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3163 return DIRTY_BITMAP_STATUS_FROZEN;
3164 } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3165 return DIRTY_BITMAP_STATUS_DISABLED;
3166 } else {
3167 return DIRTY_BITMAP_STATUS_ACTIVE;
3168 }
3169 }
3170
3171 /**
3172 * Create a successor bitmap destined to replace this bitmap after an operation.
3173 * Requires that the bitmap is not frozen and has no successor.
3174 */
3175 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3176 BdrvDirtyBitmap *bitmap, Error **errp)
3177 {
3178 uint64_t granularity;
3179 BdrvDirtyBitmap *child;
3180
3181 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3182 error_setg(errp, "Cannot create a successor for a bitmap that is "
3183 "currently frozen");
3184 return -1;
3185 }
3186 assert(!bitmap->successor);
3187
3188 /* Create an anonymous successor */
3189 granularity = bdrv_dirty_bitmap_granularity(bitmap);
3190 child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3191 if (!child) {
3192 return -1;
3193 }
3194
3195 /* Successor will be on or off based on our current state. */
3196 child->disabled = bitmap->disabled;
3197
3198 /* Install the successor and freeze the parent */
3199 bitmap->successor = child;
3200 return 0;
3201 }
3202
3203 /**
3204 * For a bitmap with a successor, yield our name to the successor,
3205 * delete the old bitmap, and return a handle to the new bitmap.
3206 */
3207 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3208 BdrvDirtyBitmap *bitmap,
3209 Error **errp)
3210 {
3211 char *name;
3212 BdrvDirtyBitmap *successor = bitmap->successor;
3213
3214 if (successor == NULL) {
3215 error_setg(errp, "Cannot relinquish control if "
3216 "there's no successor present");
3217 return NULL;
3218 }
3219
3220 name = bitmap->name;
3221 bitmap->name = NULL;
3222 successor->name = name;
3223 bitmap->successor = NULL;
3224 bdrv_release_dirty_bitmap(bs, bitmap);
3225
3226 return successor;
3227 }
3228
3229 /**
3230 * In cases of failure where we can no longer safely delete the parent,
3231 * we may wish to re-join the parent and child/successor.
3232 * The merged parent will be un-frozen, but not explicitly re-enabled.
3233 */
3234 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3235 BdrvDirtyBitmap *parent,
3236 Error **errp)
3237 {
3238 BdrvDirtyBitmap *successor = parent->successor;
3239
3240 if (!successor) {
3241 error_setg(errp, "Cannot reclaim a successor when none is present");
3242 return NULL;
3243 }
3244
3245 if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3246 error_setg(errp, "Merging of parent and successor bitmap failed");
3247 return NULL;
3248 }
3249 bdrv_release_dirty_bitmap(bs, successor);
3250 parent->successor = NULL;
3251
3252 return parent;
3253 }
3254
3255 /**
3256 * Truncates _all_ bitmaps attached to a BDS.
3257 */
3258 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3259 {
3260 BdrvDirtyBitmap *bitmap;
3261 uint64_t size = bdrv_nb_sectors(bs);
3262
3263 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3264 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3265 hbitmap_truncate(bitmap->bitmap, size);
3266 bitmap->size = size;
3267 }
3268 }
3269
3270 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3271 {
3272 BdrvDirtyBitmap *bm, *next;
3273 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3274 if (bm == bitmap) {
3275 assert(!bdrv_dirty_bitmap_frozen(bm));
3276 QLIST_REMOVE(bitmap, list);
3277 hbitmap_free(bitmap->bitmap);
3278 g_free(bitmap->name);
3279 g_free(bitmap);
3280 return;
3281 }
3282 }
3283 }
3284
3285 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3286 {
3287 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3288 bitmap->disabled = true;
3289 }
3290
3291 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3292 {
3293 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3294 bitmap->disabled = false;
3295 }
3296
3297 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3298 {
3299 BdrvDirtyBitmap *bm;
3300 BlockDirtyInfoList *list = NULL;
3301 BlockDirtyInfoList **plist = &list;
3302
3303 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3304 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3305 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3306 info->count = bdrv_get_dirty_count(bm);
3307 info->granularity = bdrv_dirty_bitmap_granularity(bm);
3308 info->has_name = !!bm->name;
3309 info->name = g_strdup(bm->name);
3310 info->status = bdrv_dirty_bitmap_status(bm);
3311 entry->value = info;
3312 *plist = entry;
3313 plist = &entry->next;
3314 }
3315
3316 return list;
3317 }
3318
3319 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3320 {
3321 if (bitmap) {
3322 return hbitmap_get(bitmap->bitmap, sector);
3323 } else {
3324 return 0;
3325 }
3326 }
3327
3328 /**
3329 * Chooses a default granularity based on the existing cluster size,
3330 * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3331 * is no cluster size information available.
3332 */
3333 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3334 {
3335 BlockDriverInfo bdi;
3336 uint32_t granularity;
3337
3338 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3339 granularity = MAX(4096, bdi.cluster_size);
3340 granularity = MIN(65536, granularity);
3341 } else {
3342 granularity = 65536;
3343 }
3344
3345 return granularity;
3346 }
3347
3348 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3349 {
3350 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3351 }
3352
3353 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3354 {
3355 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3356 }
3357
3358 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3359 int64_t cur_sector, int nr_sectors)
3360 {
3361 assert(bdrv_dirty_bitmap_enabled(bitmap));
3362 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3363 }
3364
3365 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3366 int64_t cur_sector, int nr_sectors)
3367 {
3368 assert(bdrv_dirty_bitmap_enabled(bitmap));
3369 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3370 }
3371
3372 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3373 {
3374 assert(bdrv_dirty_bitmap_enabled(bitmap));
3375 hbitmap_reset(bitmap->bitmap, 0, bitmap->size);
3376 }
3377
3378 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3379 int nr_sectors)
3380 {
3381 BdrvDirtyBitmap *bitmap;
3382 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3383 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3384 continue;
3385 }
3386 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3387 }
3388 }
3389
3390 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3391 int nr_sectors)
3392 {
3393 BdrvDirtyBitmap *bitmap;
3394 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3395 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3396 continue;
3397 }
3398 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3399 }
3400 }
3401
3402 /**
3403 * Advance an HBitmapIter to an arbitrary offset.
3404 */
3405 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3406 {
3407 assert(hbi->hb);
3408 hbitmap_iter_init(hbi, hbi->hb, offset);
3409 }
3410
3411 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3412 {
3413 return hbitmap_count(bitmap->bitmap);
3414 }
3415
3416 /* Get a reference to bs */
3417 void bdrv_ref(BlockDriverState *bs)
3418 {
3419 bs->refcnt++;
3420 }
3421
3422 /* Release a previously grabbed reference to bs.
3423 * If after releasing, reference count is zero, the BlockDriverState is
3424 * deleted. */
3425 void bdrv_unref(BlockDriverState *bs)
3426 {
3427 if (!bs) {
3428 return;
3429 }
3430 assert(bs->refcnt > 0);
3431 if (--bs->refcnt == 0) {
3432 bdrv_delete(bs);
3433 }
3434 }
3435
3436 struct BdrvOpBlocker {
3437 Error *reason;
3438 QLIST_ENTRY(BdrvOpBlocker) list;
3439 };
3440
3441 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3442 {
3443 BdrvOpBlocker *blocker;
3444 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3445 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3446 blocker = QLIST_FIRST(&bs->op_blockers[op]);
3447 if (errp) {
3448 error_setg(errp, "Node '%s' is busy: %s",
3449 bdrv_get_device_or_node_name(bs),
3450 error_get_pretty(blocker->reason));
3451 }
3452 return true;
3453 }
3454 return false;
3455 }
3456
3457 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3458 {
3459 BdrvOpBlocker *blocker;
3460 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3461
3462 blocker = g_new0(BdrvOpBlocker, 1);
3463 blocker->reason = reason;
3464 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3465 }
3466
3467 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3468 {
3469 BdrvOpBlocker *blocker, *next;
3470 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3471 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3472 if (blocker->reason == reason) {
3473 QLIST_REMOVE(blocker, list);
3474 g_free(blocker);
3475 }
3476 }
3477 }
3478
3479 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3480 {
3481 int i;
3482 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3483 bdrv_op_block(bs, i, reason);
3484 }
3485 }
3486
3487 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3488 {
3489 int i;
3490 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3491 bdrv_op_unblock(bs, i, reason);
3492 }
3493 }
3494
3495 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3496 {
3497 int i;
3498
3499 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3500 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3501 return false;
3502 }
3503 }
3504 return true;
3505 }
3506
3507 void bdrv_iostatus_enable(BlockDriverState *bs)
3508 {
3509 bs->iostatus_enabled = true;
3510 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3511 }
3512
3513 /* The I/O status is only enabled if the drive explicitly
3514 * enables it _and_ the VM is configured to stop on errors */
3515 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3516 {
3517 return (bs->iostatus_enabled &&
3518 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
3519 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
3520 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
3521 }
3522
3523 void bdrv_iostatus_disable(BlockDriverState *bs)
3524 {
3525 bs->iostatus_enabled = false;
3526 }
3527
3528 void bdrv_iostatus_reset(BlockDriverState *bs)
3529 {
3530 if (bdrv_iostatus_is_enabled(bs)) {
3531 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3532 if (bs->job) {
3533 block_job_iostatus_reset(bs->job);
3534 }
3535 }
3536 }
3537
3538 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3539 {
3540 assert(bdrv_iostatus_is_enabled(bs));
3541 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3542 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3543 BLOCK_DEVICE_IO_STATUS_FAILED;
3544 }
3545 }
3546
3547 void bdrv_img_create(const char *filename, const char *fmt,
3548 const char *base_filename, const char *base_fmt,
3549 char *options, uint64_t img_size, int flags,
3550 Error **errp, bool quiet)
3551 {
3552 QemuOptsList *create_opts = NULL;
3553 QemuOpts *opts = NULL;
3554 const char *backing_fmt, *backing_file;
3555 int64_t size;
3556 BlockDriver *drv, *proto_drv;
3557 BlockDriver *backing_drv = NULL;
3558 Error *local_err = NULL;
3559 int ret = 0;
3560
3561 /* Find driver and parse its options */
3562 drv = bdrv_find_format(fmt);
3563 if (!drv) {
3564 error_setg(errp, "Unknown file format '%s'", fmt);
3565 return;
3566 }
3567
3568 proto_drv = bdrv_find_protocol(filename, true, errp);
3569 if (!proto_drv) {
3570 return;
3571 }
3572
3573 if (!drv->create_opts) {
3574 error_setg(errp, "Format driver '%s' does not support image creation",
3575 drv->format_name);
3576 return;
3577 }
3578
3579 if (!proto_drv->create_opts) {
3580 error_setg(errp, "Protocol driver '%s' does not support image creation",
3581 proto_drv->format_name);
3582 return;
3583 }
3584
3585 create_opts = qemu_opts_append(create_opts, drv->create_opts);
3586 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3587
3588 /* Create parameter list with default values */
3589 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3590 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3591
3592 /* Parse -o options */
3593 if (options) {
3594 qemu_opts_do_parse(opts, options, NULL, &local_err);
3595 if (local_err) {
3596 error_report_err(local_err);
3597 local_err = NULL;
3598 error_setg(errp, "Invalid options for file format '%s'", fmt);
3599 goto out;
3600 }
3601 }
3602
3603 if (base_filename) {
3604 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3605 if (local_err) {
3606 error_setg(errp, "Backing file not supported for file format '%s'",
3607 fmt);
3608 goto out;
3609 }
3610 }
3611
3612 if (base_fmt) {
3613 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3614 if (local_err) {
3615 error_setg(errp, "Backing file format not supported for file "
3616 "format '%s'", fmt);
3617 goto out;
3618 }
3619 }
3620
3621 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3622 if (backing_file) {
3623 if (!strcmp(filename, backing_file)) {
3624 error_setg(errp, "Error: Trying to create an image with the "
3625 "same filename as the backing file");
3626 goto out;
3627 }
3628 }
3629
3630 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3631 if (backing_fmt) {
3632 backing_drv = bdrv_find_format(backing_fmt);
3633 if (!backing_drv) {
3634 error_setg(errp, "Unknown backing file format '%s'",
3635 backing_fmt);
3636 goto out;
3637 }
3638 }
3639
3640 // The size for the image must always be specified, with one exception:
3641 // If we are using a backing file, we can obtain the size from there
3642 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3643 if (size == -1) {
3644 if (backing_file) {
3645 BlockDriverState *bs;
3646 char *full_backing = g_new0(char, PATH_MAX);
3647 int64_t size;
3648 int back_flags;
3649
3650 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3651 full_backing, PATH_MAX,
3652 &local_err);
3653 if (local_err) {
3654 g_free(full_backing);
3655 goto out;
3656 }
3657
3658 /* backing files always opened read-only */
3659 back_flags =
3660 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3661
3662 bs = NULL;
3663 ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
3664 backing_drv, &local_err);
3665 g_free(full_backing);
3666 if (ret < 0) {
3667 goto out;
3668 }
3669 size = bdrv_getlength(bs);
3670 if (size < 0) {
3671 error_setg_errno(errp, -size, "Could not get size of '%s'",
3672 backing_file);
3673 bdrv_unref(bs);
3674 goto out;
3675 }
3676
3677 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3678
3679 bdrv_unref(bs);
3680 } else {
3681 error_setg(errp, "Image creation needs a size parameter");
3682 goto out;
3683 }
3684 }
3685
3686 if (!quiet) {
3687 printf("Formatting '%s', fmt=%s", filename, fmt);
3688 qemu_opts_print(opts, " ");
3689 puts("");
3690 }
3691
3692 ret = bdrv_create(drv, filename, opts, &local_err);
3693
3694 if (ret == -EFBIG) {
3695 /* This is generally a better message than whatever the driver would
3696 * deliver (especially because of the cluster_size_hint), since that
3697 * is most probably not much different from "image too large". */
3698 const char *cluster_size_hint = "";
3699 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3700 cluster_size_hint = " (try using a larger cluster size)";
3701 }
3702 error_setg(errp, "The image size is too large for file format '%s'"
3703 "%s", fmt, cluster_size_hint);
3704 error_free(local_err);
3705 local_err = NULL;
3706 }
3707
3708 out:
3709 qemu_opts_del(opts);
3710 qemu_opts_free(create_opts);
3711 if (local_err) {
3712 error_propagate(errp, local_err);
3713 }
3714 }
3715
3716 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3717 {
3718 return bs->aio_context;
3719 }
3720
3721 void bdrv_detach_aio_context(BlockDriverState *bs)
3722 {
3723 BdrvAioNotifier *baf;
3724
3725 if (!bs->drv) {
3726 return;
3727 }
3728
3729 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3730 baf->detach_aio_context(baf->opaque);
3731 }
3732
3733 if (bs->io_limits_enabled) {
3734 throttle_detach_aio_context(&bs->throttle_state);
3735 }
3736 if (bs->drv->bdrv_detach_aio_context) {
3737 bs->drv->bdrv_detach_aio_context(bs);
3738 }
3739 if (bs->file) {
3740 bdrv_detach_aio_context(bs->file);
3741 }
3742 if (bs->backing_hd) {
3743 bdrv_detach_aio_context(bs->backing_hd);
3744 }
3745
3746 bs->aio_context = NULL;
3747 }
3748
3749 void bdrv_attach_aio_context(BlockDriverState *bs,
3750 AioContext *new_context)
3751 {
3752 BdrvAioNotifier *ban;
3753
3754 if (!bs->drv) {
3755 return;
3756 }
3757
3758 bs->aio_context = new_context;
3759
3760 if (bs->backing_hd) {
3761 bdrv_attach_aio_context(bs->backing_hd, new_context);
3762 }
3763 if (bs->file) {
3764 bdrv_attach_aio_context(bs->file, new_context);
3765 }
3766 if (bs->drv->bdrv_attach_aio_context) {
3767 bs->drv->bdrv_attach_aio_context(bs, new_context);
3768 }
3769 if (bs->io_limits_enabled) {
3770 throttle_attach_aio_context(&bs->throttle_state, new_context);
3771 }
3772
3773 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3774 ban->attached_aio_context(new_context, ban->opaque);
3775 }
3776 }
3777
3778 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3779 {
3780 bdrv_drain_all(); /* ensure there are no in-flight requests */
3781
3782 bdrv_detach_aio_context(bs);
3783
3784 /* This function executes in the old AioContext so acquire the new one in
3785 * case it runs in a different thread.
3786 */
3787 aio_context_acquire(new_context);
3788 bdrv_attach_aio_context(bs, new_context);
3789 aio_context_release(new_context);
3790 }
3791
3792 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3793 void (*attached_aio_context)(AioContext *new_context, void *opaque),
3794 void (*detach_aio_context)(void *opaque), void *opaque)
3795 {
3796 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3797 *ban = (BdrvAioNotifier){
3798 .attached_aio_context = attached_aio_context,
3799 .detach_aio_context = detach_aio_context,
3800 .opaque = opaque
3801 };
3802
3803 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3804 }
3805
3806 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3807 void (*attached_aio_context)(AioContext *,
3808 void *),
3809 void (*detach_aio_context)(void *),
3810 void *opaque)
3811 {
3812 BdrvAioNotifier *ban, *ban_next;
3813
3814 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3815 if (ban->attached_aio_context == attached_aio_context &&
3816 ban->detach_aio_context == detach_aio_context &&
3817 ban->opaque == opaque)
3818 {
3819 QLIST_REMOVE(ban, list);
3820 g_free(ban);
3821
3822 return;
3823 }
3824 }
3825
3826 abort();
3827 }
3828
3829 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3830 BlockDriverAmendStatusCB *status_cb)
3831 {
3832 if (!bs->drv->bdrv_amend_options) {
3833 return -ENOTSUP;
3834 }
3835 return bs->drv->bdrv_amend_options(bs, opts, status_cb);
3836 }
3837
3838 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3839 * of block filter and by bdrv_is_first_non_filter.
3840 * It is used to test if the given bs is the candidate or recurse more in the
3841 * node graph.
3842 */
3843 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3844 BlockDriverState *candidate)
3845 {
3846 /* return false if basic checks fails */
3847 if (!bs || !bs->drv) {
3848 return false;
3849 }
3850
3851 /* the code reached a non block filter driver -> check if the bs is
3852 * the same as the candidate. It's the recursion termination condition.
3853 */
3854 if (!bs->drv->is_filter) {
3855 return bs == candidate;
3856 }
3857 /* Down this path the driver is a block filter driver */
3858
3859 /* If the block filter recursion method is defined use it to recurse down
3860 * the node graph.
3861 */
3862 if (bs->drv->bdrv_recurse_is_first_non_filter) {
3863 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3864 }
3865
3866 /* the driver is a block filter but don't allow to recurse -> return false
3867 */
3868 return false;
3869 }
3870
3871 /* This function checks if the candidate is the first non filter bs down it's
3872 * bs chain. Since we don't have pointers to parents it explore all bs chains
3873 * from the top. Some filters can choose not to pass down the recursion.
3874 */
3875 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3876 {
3877 BlockDriverState *bs;
3878
3879 /* walk down the bs forest recursively */
3880 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3881 bool perm;
3882
3883 /* try to recurse in this top level bs */
3884 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3885
3886 /* candidate is the first non filter */
3887 if (perm) {
3888 return true;
3889 }
3890 }
3891
3892 return false;
3893 }
3894
3895 BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
3896 {
3897 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3898 AioContext *aio_context;
3899
3900 if (!to_replace_bs) {
3901 error_setg(errp, "Node name '%s' not found", node_name);
3902 return NULL;
3903 }
3904
3905 aio_context = bdrv_get_aio_context(to_replace_bs);
3906 aio_context_acquire(aio_context);
3907
3908 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3909 to_replace_bs = NULL;
3910 goto out;
3911 }
3912
3913 /* We don't want arbitrary node of the BDS chain to be replaced only the top
3914 * most non filter in order to prevent data corruption.
3915 * Another benefit is that this tests exclude backing files which are
3916 * blocked by the backing blockers.
3917 */
3918 if (!bdrv_is_first_non_filter(to_replace_bs)) {
3919 error_setg(errp, "Only top most non filter can be replaced");
3920 to_replace_bs = NULL;
3921 goto out;
3922 }
3923
3924 out:
3925 aio_context_release(aio_context);
3926 return to_replace_bs;
3927 }
3928
3929 static bool append_open_options(QDict *d, BlockDriverState *bs)
3930 {
3931 const QDictEntry *entry;
3932 bool found_any = false;
3933
3934 for (entry = qdict_first(bs->options); entry;
3935 entry = qdict_next(bs->options, entry))
3936 {
3937 /* Only take options for this level and exclude all non-driver-specific
3938 * options */
3939 if (!strchr(qdict_entry_key(entry), '.') &&
3940 strcmp(qdict_entry_key(entry), "node-name"))
3941 {
3942 qobject_incref(qdict_entry_value(entry));
3943 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3944 found_any = true;
3945 }
3946 }
3947
3948 return found_any;
3949 }
3950
3951 /* Updates the following BDS fields:
3952 * - exact_filename: A filename which may be used for opening a block device
3953 * which (mostly) equals the given BDS (even without any
3954 * other options; so reading and writing must return the same
3955 * results, but caching etc. may be different)
3956 * - full_open_options: Options which, when given when opening a block device
3957 * (without a filename), result in a BDS (mostly)
3958 * equalling the given one
3959 * - filename: If exact_filename is set, it is copied here. Otherwise,
3960 * full_open_options is converted to a JSON object, prefixed with
3961 * "json:" (for use through the JSON pseudo protocol) and put here.
3962 */
3963 void bdrv_refresh_filename(BlockDriverState *bs)
3964 {
3965 BlockDriver *drv = bs->drv;
3966 QDict *opts;
3967
3968 if (!drv) {
3969 return;
3970 }
3971
3972 /* This BDS's file name will most probably depend on its file's name, so
3973 * refresh that first */
3974 if (bs->file) {
3975 bdrv_refresh_filename(bs->file);
3976 }
3977
3978 if (drv->bdrv_refresh_filename) {
3979 /* Obsolete information is of no use here, so drop the old file name
3980 * information before refreshing it */
3981 bs->exact_filename[0] = '\0';
3982 if (bs->full_open_options) {
3983 QDECREF(bs->full_open_options);
3984 bs->full_open_options = NULL;
3985 }
3986
3987 drv->bdrv_refresh_filename(bs);
3988 } else if (bs->file) {
3989 /* Try to reconstruct valid information from the underlying file */
3990 bool has_open_options;
3991
3992 bs->exact_filename[0] = '\0';
3993 if (bs->full_open_options) {
3994 QDECREF(bs->full_open_options);
3995 bs->full_open_options = NULL;
3996 }
3997
3998 opts = qdict_new();
3999 has_open_options = append_open_options(opts, bs);
4000
4001 /* If no specific options have been given for this BDS, the filename of
4002 * the underlying file should suffice for this one as well */
4003 if (bs->file->exact_filename[0] && !has_open_options) {
4004 strcpy(bs->exact_filename, bs->file->exact_filename);
4005 }
4006 /* Reconstructing the full options QDict is simple for most format block
4007 * drivers, as long as the full options are known for the underlying
4008 * file BDS. The full options QDict of that file BDS should somehow
4009 * contain a representation of the filename, therefore the following
4010 * suffices without querying the (exact_)filename of this BDS. */
4011 if (bs->file->full_open_options) {
4012 qdict_put_obj(opts, "driver",
4013 QOBJECT(qstring_from_str(drv->format_name)));
4014 QINCREF(bs->file->full_open_options);
4015 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
4016
4017 bs->full_open_options = opts;
4018 } else {
4019 QDECREF(opts);
4020 }
4021 } else if (!bs->full_open_options && qdict_size(bs->options)) {
4022 /* There is no underlying file BDS (at least referenced by BDS.file),
4023 * so the full options QDict should be equal to the options given
4024 * specifically for this block device when it was opened (plus the
4025 * driver specification).
4026 * Because those options don't change, there is no need to update
4027 * full_open_options when it's already set. */
4028
4029 opts = qdict_new();
4030 append_open_options(opts, bs);
4031 qdict_put_obj(opts, "driver",
4032 QOBJECT(qstring_from_str(drv->format_name)));
4033
4034 if (bs->exact_filename[0]) {
4035 /* This may not work for all block protocol drivers (some may
4036 * require this filename to be parsed), but we have to find some
4037 * default solution here, so just include it. If some block driver
4038 * does not support pure options without any filename at all or
4039 * needs some special format of the options QDict, it needs to
4040 * implement the driver-specific bdrv_refresh_filename() function.
4041 */
4042 qdict_put_obj(opts, "filename",
4043 QOBJECT(qstring_from_str(bs->exact_filename)));
4044 }
4045
4046 bs->full_open_options = opts;
4047 }
4048
4049 if (bs->exact_filename[0]) {
4050 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4051 } else if (bs->full_open_options) {
4052 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4053 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4054 qstring_get_str(json));
4055 QDECREF(json);
4056 }
4057 }
4058
4059 /* This accessor function purpose is to allow the device models to access the
4060 * BlockAcctStats structure embedded inside a BlockDriverState without being
4061 * aware of the BlockDriverState structure layout.
4062 * It will go away when the BlockAcctStats structure will be moved inside
4063 * the device models.
4064 */
4065 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
4066 {
4067 return &bs->stats;
4068 }