]> git.proxmox.com Git - mirror_qemu.git/blob - block.c
f9c6bb74048760e503ef55e3fb6dfed06d68a502
[mirror_qemu.git] / block.c
1 /*
2 * QEMU System Emulator block driver
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qjson.h"
31 #include "sysemu/block-backend.h"
32 #include "sysemu/sysemu.h"
33 #include "qemu/notify.h"
34 #include "block/coroutine.h"
35 #include "block/qapi.h"
36 #include "qmp-commands.h"
37 #include "qemu/timer.h"
38 #include "qapi-event.h"
39
40 #ifdef CONFIG_BSD
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <sys/ioctl.h>
44 #include <sys/queue.h>
45 #ifndef __DragonFly__
46 #include <sys/disk.h>
47 #endif
48 #endif
49
50 #ifdef _WIN32
51 #include <windows.h>
52 #endif
53
54 /**
55 * A BdrvDirtyBitmap can be in three possible states:
56 * (1) successor is NULL and disabled is false: full r/w mode
57 * (2) successor is NULL and disabled is true: read only mode ("disabled")
58 * (3) successor is set: frozen mode.
59 * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
60 * or enabled. A frozen bitmap can only abdicate() or reclaim().
61 */
62 struct BdrvDirtyBitmap {
63 HBitmap *bitmap; /* Dirty sector bitmap implementation */
64 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
65 char *name; /* Optional non-empty unique ID */
66 int64_t size; /* Size of the bitmap (Number of sectors) */
67 bool disabled; /* Bitmap is read-only */
68 QLIST_ENTRY(BdrvDirtyBitmap) list;
69 };
70
71 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
72
73 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
74 QTAILQ_HEAD_INITIALIZER(bdrv_states);
75
76 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
77 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
78
79 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80 QLIST_HEAD_INITIALIZER(bdrv_drivers);
81
82 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
83 /* If non-zero, use only whitelisted block drivers */
84 static int use_bdrv_whitelist;
85
86 #ifdef _WIN32
87 static int is_windows_drive_prefix(const char *filename)
88 {
89 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
90 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
91 filename[1] == ':');
92 }
93
94 int is_windows_drive(const char *filename)
95 {
96 if (is_windows_drive_prefix(filename) &&
97 filename[2] == '\0')
98 return 1;
99 if (strstart(filename, "\\\\.\\", NULL) ||
100 strstart(filename, "//./", NULL))
101 return 1;
102 return 0;
103 }
104 #endif
105
106 size_t bdrv_opt_mem_align(BlockDriverState *bs)
107 {
108 if (!bs || !bs->drv) {
109 /* page size or 4k (hdd sector size) should be on the safe side */
110 return MAX(4096, getpagesize());
111 }
112
113 return bs->bl.opt_mem_alignment;
114 }
115
116 size_t bdrv_min_mem_align(BlockDriverState *bs)
117 {
118 if (!bs || !bs->drv) {
119 /* page size or 4k (hdd sector size) should be on the safe side */
120 return MAX(4096, getpagesize());
121 }
122
123 return bs->bl.min_mem_alignment;
124 }
125
126 /* check if the path starts with "<protocol>:" */
127 int path_has_protocol(const char *path)
128 {
129 const char *p;
130
131 #ifdef _WIN32
132 if (is_windows_drive(path) ||
133 is_windows_drive_prefix(path)) {
134 return 0;
135 }
136 p = path + strcspn(path, ":/\\");
137 #else
138 p = path + strcspn(path, ":/");
139 #endif
140
141 return *p == ':';
142 }
143
144 int path_is_absolute(const char *path)
145 {
146 #ifdef _WIN32
147 /* specific case for names like: "\\.\d:" */
148 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
149 return 1;
150 }
151 return (*path == '/' || *path == '\\');
152 #else
153 return (*path == '/');
154 #endif
155 }
156
157 /* if filename is absolute, just copy it to dest. Otherwise, build a
158 path to it by considering it is relative to base_path. URL are
159 supported. */
160 void path_combine(char *dest, int dest_size,
161 const char *base_path,
162 const char *filename)
163 {
164 const char *p, *p1;
165 int len;
166
167 if (dest_size <= 0)
168 return;
169 if (path_is_absolute(filename)) {
170 pstrcpy(dest, dest_size, filename);
171 } else {
172 p = strchr(base_path, ':');
173 if (p)
174 p++;
175 else
176 p = base_path;
177 p1 = strrchr(base_path, '/');
178 #ifdef _WIN32
179 {
180 const char *p2;
181 p2 = strrchr(base_path, '\\');
182 if (!p1 || p2 > p1)
183 p1 = p2;
184 }
185 #endif
186 if (p1)
187 p1++;
188 else
189 p1 = base_path;
190 if (p1 > p)
191 p = p1;
192 len = p - base_path;
193 if (len > dest_size - 1)
194 len = dest_size - 1;
195 memcpy(dest, base_path, len);
196 dest[len] = '\0';
197 pstrcat(dest, dest_size, filename);
198 }
199 }
200
201 void bdrv_get_full_backing_filename_from_filename(const char *backed,
202 const char *backing,
203 char *dest, size_t sz,
204 Error **errp)
205 {
206 if (backing[0] == '\0' || path_has_protocol(backing) ||
207 path_is_absolute(backing))
208 {
209 pstrcpy(dest, sz, backing);
210 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
211 error_setg(errp, "Cannot use relative backing file names for '%s'",
212 backed);
213 } else {
214 path_combine(dest, sz, backed, backing);
215 }
216 }
217
218 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
219 Error **errp)
220 {
221 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
222
223 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
224 dest, sz, errp);
225 }
226
227 void bdrv_register(BlockDriver *bdrv)
228 {
229 bdrv_setup_io_funcs(bdrv);
230
231 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
232 }
233
234 BlockDriverState *bdrv_new_root(void)
235 {
236 BlockDriverState *bs = bdrv_new();
237
238 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
239 return bs;
240 }
241
242 BlockDriverState *bdrv_new(void)
243 {
244 BlockDriverState *bs;
245 int i;
246
247 bs = g_new0(BlockDriverState, 1);
248 QLIST_INIT(&bs->dirty_bitmaps);
249 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
250 QLIST_INIT(&bs->op_blockers[i]);
251 }
252 bdrv_iostatus_disable(bs);
253 notifier_list_init(&bs->close_notifiers);
254 notifier_with_return_list_init(&bs->before_write_notifiers);
255 qemu_co_queue_init(&bs->throttled_reqs[0]);
256 qemu_co_queue_init(&bs->throttled_reqs[1]);
257 bs->refcnt = 1;
258 bs->aio_context = qemu_get_aio_context();
259
260 return bs;
261 }
262
263 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
264 {
265 notifier_list_add(&bs->close_notifiers, notify);
266 }
267
268 BlockDriver *bdrv_find_format(const char *format_name)
269 {
270 BlockDriver *drv1;
271 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
272 if (!strcmp(drv1->format_name, format_name)) {
273 return drv1;
274 }
275 }
276 return NULL;
277 }
278
279 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
280 {
281 static const char *whitelist_rw[] = {
282 CONFIG_BDRV_RW_WHITELIST
283 };
284 static const char *whitelist_ro[] = {
285 CONFIG_BDRV_RO_WHITELIST
286 };
287 const char **p;
288
289 if (!whitelist_rw[0] && !whitelist_ro[0]) {
290 return 1; /* no whitelist, anything goes */
291 }
292
293 for (p = whitelist_rw; *p; p++) {
294 if (!strcmp(drv->format_name, *p)) {
295 return 1;
296 }
297 }
298 if (read_only) {
299 for (p = whitelist_ro; *p; p++) {
300 if (!strcmp(drv->format_name, *p)) {
301 return 1;
302 }
303 }
304 }
305 return 0;
306 }
307
308 BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
309 bool read_only)
310 {
311 BlockDriver *drv = bdrv_find_format(format_name);
312 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
313 }
314
315 typedef struct CreateCo {
316 BlockDriver *drv;
317 char *filename;
318 QemuOpts *opts;
319 int ret;
320 Error *err;
321 } CreateCo;
322
323 static void coroutine_fn bdrv_create_co_entry(void *opaque)
324 {
325 Error *local_err = NULL;
326 int ret;
327
328 CreateCo *cco = opaque;
329 assert(cco->drv);
330
331 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
332 if (local_err) {
333 error_propagate(&cco->err, local_err);
334 }
335 cco->ret = ret;
336 }
337
338 int bdrv_create(BlockDriver *drv, const char* filename,
339 QemuOpts *opts, Error **errp)
340 {
341 int ret;
342
343 Coroutine *co;
344 CreateCo cco = {
345 .drv = drv,
346 .filename = g_strdup(filename),
347 .opts = opts,
348 .ret = NOT_DONE,
349 .err = NULL,
350 };
351
352 if (!drv->bdrv_create) {
353 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
354 ret = -ENOTSUP;
355 goto out;
356 }
357
358 if (qemu_in_coroutine()) {
359 /* Fast-path if already in coroutine context */
360 bdrv_create_co_entry(&cco);
361 } else {
362 co = qemu_coroutine_create(bdrv_create_co_entry);
363 qemu_coroutine_enter(co, &cco);
364 while (cco.ret == NOT_DONE) {
365 aio_poll(qemu_get_aio_context(), true);
366 }
367 }
368
369 ret = cco.ret;
370 if (ret < 0) {
371 if (cco.err) {
372 error_propagate(errp, cco.err);
373 } else {
374 error_setg_errno(errp, -ret, "Could not create image");
375 }
376 }
377
378 out:
379 g_free(cco.filename);
380 return ret;
381 }
382
383 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
384 {
385 BlockDriver *drv;
386 Error *local_err = NULL;
387 int ret;
388
389 drv = bdrv_find_protocol(filename, true, errp);
390 if (drv == NULL) {
391 return -ENOENT;
392 }
393
394 ret = bdrv_create(drv, filename, opts, &local_err);
395 if (local_err) {
396 error_propagate(errp, local_err);
397 }
398 return ret;
399 }
400
401 /**
402 * Try to get @bs's logical and physical block size.
403 * On success, store them in @bsz struct and return 0.
404 * On failure return -errno.
405 * @bs must not be empty.
406 */
407 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
408 {
409 BlockDriver *drv = bs->drv;
410
411 if (drv && drv->bdrv_probe_blocksizes) {
412 return drv->bdrv_probe_blocksizes(bs, bsz);
413 }
414
415 return -ENOTSUP;
416 }
417
418 /**
419 * Try to get @bs's geometry (cyls, heads, sectors).
420 * On success, store them in @geo struct and return 0.
421 * On failure return -errno.
422 * @bs must not be empty.
423 */
424 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
425 {
426 BlockDriver *drv = bs->drv;
427
428 if (drv && drv->bdrv_probe_geometry) {
429 return drv->bdrv_probe_geometry(bs, geo);
430 }
431
432 return -ENOTSUP;
433 }
434
435 /*
436 * Create a uniquely-named empty temporary file.
437 * Return 0 upon success, otherwise a negative errno value.
438 */
439 int get_tmp_filename(char *filename, int size)
440 {
441 #ifdef _WIN32
442 char temp_dir[MAX_PATH];
443 /* GetTempFileName requires that its output buffer (4th param)
444 have length MAX_PATH or greater. */
445 assert(size >= MAX_PATH);
446 return (GetTempPath(MAX_PATH, temp_dir)
447 && GetTempFileName(temp_dir, "qem", 0, filename)
448 ? 0 : -GetLastError());
449 #else
450 int fd;
451 const char *tmpdir;
452 tmpdir = getenv("TMPDIR");
453 if (!tmpdir) {
454 tmpdir = "/var/tmp";
455 }
456 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
457 return -EOVERFLOW;
458 }
459 fd = mkstemp(filename);
460 if (fd < 0) {
461 return -errno;
462 }
463 if (close(fd) != 0) {
464 unlink(filename);
465 return -errno;
466 }
467 return 0;
468 #endif
469 }
470
471 /*
472 * Detect host devices. By convention, /dev/cdrom[N] is always
473 * recognized as a host CDROM.
474 */
475 static BlockDriver *find_hdev_driver(const char *filename)
476 {
477 int score_max = 0, score;
478 BlockDriver *drv = NULL, *d;
479
480 QLIST_FOREACH(d, &bdrv_drivers, list) {
481 if (d->bdrv_probe_device) {
482 score = d->bdrv_probe_device(filename);
483 if (score > score_max) {
484 score_max = score;
485 drv = d;
486 }
487 }
488 }
489
490 return drv;
491 }
492
493 BlockDriver *bdrv_find_protocol(const char *filename,
494 bool allow_protocol_prefix,
495 Error **errp)
496 {
497 BlockDriver *drv1;
498 char protocol[128];
499 int len;
500 const char *p;
501
502 /* TODO Drivers without bdrv_file_open must be specified explicitly */
503
504 /*
505 * XXX(hch): we really should not let host device detection
506 * override an explicit protocol specification, but moving this
507 * later breaks access to device names with colons in them.
508 * Thanks to the brain-dead persistent naming schemes on udev-
509 * based Linux systems those actually are quite common.
510 */
511 drv1 = find_hdev_driver(filename);
512 if (drv1) {
513 return drv1;
514 }
515
516 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
517 return &bdrv_file;
518 }
519
520 p = strchr(filename, ':');
521 assert(p != NULL);
522 len = p - filename;
523 if (len > sizeof(protocol) - 1)
524 len = sizeof(protocol) - 1;
525 memcpy(protocol, filename, len);
526 protocol[len] = '\0';
527 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
528 if (drv1->protocol_name &&
529 !strcmp(drv1->protocol_name, protocol)) {
530 return drv1;
531 }
532 }
533
534 error_setg(errp, "Unknown protocol '%s'", protocol);
535 return NULL;
536 }
537
538 /*
539 * Guess image format by probing its contents.
540 * This is not a good idea when your image is raw (CVE-2008-2004), but
541 * we do it anyway for backward compatibility.
542 *
543 * @buf contains the image's first @buf_size bytes.
544 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
545 * but can be smaller if the image file is smaller)
546 * @filename is its filename.
547 *
548 * For all block drivers, call the bdrv_probe() method to get its
549 * probing score.
550 * Return the first block driver with the highest probing score.
551 */
552 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
553 const char *filename)
554 {
555 int score_max = 0, score;
556 BlockDriver *drv = NULL, *d;
557
558 QLIST_FOREACH(d, &bdrv_drivers, list) {
559 if (d->bdrv_probe) {
560 score = d->bdrv_probe(buf, buf_size, filename);
561 if (score > score_max) {
562 score_max = score;
563 drv = d;
564 }
565 }
566 }
567
568 return drv;
569 }
570
571 static int find_image_format(BlockDriverState *bs, const char *filename,
572 BlockDriver **pdrv, Error **errp)
573 {
574 BlockDriver *drv;
575 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
576 int ret = 0;
577
578 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
579 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
580 *pdrv = &bdrv_raw;
581 return ret;
582 }
583
584 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
585 if (ret < 0) {
586 error_setg_errno(errp, -ret, "Could not read image for determining its "
587 "format");
588 *pdrv = NULL;
589 return ret;
590 }
591
592 drv = bdrv_probe_all(buf, ret, filename);
593 if (!drv) {
594 error_setg(errp, "Could not determine image format: No compatible "
595 "driver found");
596 ret = -ENOENT;
597 }
598 *pdrv = drv;
599 return ret;
600 }
601
602 /**
603 * Set the current 'total_sectors' value
604 * Return 0 on success, -errno on error.
605 */
606 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
607 {
608 BlockDriver *drv = bs->drv;
609
610 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
611 if (bs->sg)
612 return 0;
613
614 /* query actual device if possible, otherwise just trust the hint */
615 if (drv->bdrv_getlength) {
616 int64_t length = drv->bdrv_getlength(bs);
617 if (length < 0) {
618 return length;
619 }
620 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
621 }
622
623 bs->total_sectors = hint;
624 return 0;
625 }
626
627 /**
628 * Set open flags for a given discard mode
629 *
630 * Return 0 on success, -1 if the discard mode was invalid.
631 */
632 int bdrv_parse_discard_flags(const char *mode, int *flags)
633 {
634 *flags &= ~BDRV_O_UNMAP;
635
636 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
637 /* do nothing */
638 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
639 *flags |= BDRV_O_UNMAP;
640 } else {
641 return -1;
642 }
643
644 return 0;
645 }
646
647 /**
648 * Set open flags for a given cache mode
649 *
650 * Return 0 on success, -1 if the cache mode was invalid.
651 */
652 int bdrv_parse_cache_flags(const char *mode, int *flags)
653 {
654 *flags &= ~BDRV_O_CACHE_MASK;
655
656 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
657 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
658 } else if (!strcmp(mode, "directsync")) {
659 *flags |= BDRV_O_NOCACHE;
660 } else if (!strcmp(mode, "writeback")) {
661 *flags |= BDRV_O_CACHE_WB;
662 } else if (!strcmp(mode, "unsafe")) {
663 *flags |= BDRV_O_CACHE_WB;
664 *flags |= BDRV_O_NO_FLUSH;
665 } else if (!strcmp(mode, "writethrough")) {
666 /* this is the default */
667 } else {
668 return -1;
669 }
670
671 return 0;
672 }
673
674 /*
675 * Returns the flags that a temporary snapshot should get, based on the
676 * originally requested flags (the originally requested image will have flags
677 * like a backing file)
678 */
679 static int bdrv_temp_snapshot_flags(int flags)
680 {
681 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
682 }
683
684 /*
685 * Returns the flags that bs->file should get, based on the given flags for
686 * the parent BDS
687 */
688 static int bdrv_inherited_flags(int flags)
689 {
690 /* Enable protocol handling, disable format probing for bs->file */
691 flags |= BDRV_O_PROTOCOL;
692
693 /* Our block drivers take care to send flushes and respect unmap policy,
694 * so we can enable both unconditionally on lower layers. */
695 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
696
697 /* Clear flags that only apply to the top layer */
698 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
699
700 return flags;
701 }
702
703 /*
704 * Returns the flags that bs->backing_hd should get, based on the given flags
705 * for the parent BDS
706 */
707 static int bdrv_backing_flags(int flags)
708 {
709 /* backing files always opened read-only */
710 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
711
712 /* snapshot=on is handled on the top layer */
713 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
714
715 return flags;
716 }
717
718 static int bdrv_open_flags(BlockDriverState *bs, int flags)
719 {
720 int open_flags = flags | BDRV_O_CACHE_WB;
721
722 /*
723 * Clear flags that are internal to the block layer before opening the
724 * image.
725 */
726 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
727
728 /*
729 * Snapshots should be writable.
730 */
731 if (flags & BDRV_O_TEMPORARY) {
732 open_flags |= BDRV_O_RDWR;
733 }
734
735 return open_flags;
736 }
737
738 static void bdrv_assign_node_name(BlockDriverState *bs,
739 const char *node_name,
740 Error **errp)
741 {
742 if (!node_name) {
743 return;
744 }
745
746 /* Check for empty string or invalid characters */
747 if (!id_wellformed(node_name)) {
748 error_setg(errp, "Invalid node name");
749 return;
750 }
751
752 /* takes care of avoiding namespaces collisions */
753 if (blk_by_name(node_name)) {
754 error_setg(errp, "node-name=%s is conflicting with a device id",
755 node_name);
756 return;
757 }
758
759 /* takes care of avoiding duplicates node names */
760 if (bdrv_find_node(node_name)) {
761 error_setg(errp, "Duplicate node name");
762 return;
763 }
764
765 /* copy node name into the bs and insert it into the graph list */
766 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
767 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
768 }
769
770 /*
771 * Common part for opening disk images and files
772 *
773 * Removes all processed options from *options.
774 */
775 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
776 QDict *options, int flags, BlockDriver *drv, Error **errp)
777 {
778 int ret, open_flags;
779 const char *filename;
780 const char *node_name = NULL;
781 Error *local_err = NULL;
782
783 assert(drv != NULL);
784 assert(bs->file == NULL);
785 assert(options != NULL && bs->options != options);
786
787 if (file != NULL) {
788 filename = file->filename;
789 } else {
790 filename = qdict_get_try_str(options, "filename");
791 }
792
793 if (drv->bdrv_needs_filename && !filename) {
794 error_setg(errp, "The '%s' block driver requires a file name",
795 drv->format_name);
796 return -EINVAL;
797 }
798
799 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
800
801 node_name = qdict_get_try_str(options, "node-name");
802 bdrv_assign_node_name(bs, node_name, &local_err);
803 if (local_err) {
804 error_propagate(errp, local_err);
805 return -EINVAL;
806 }
807 qdict_del(options, "node-name");
808
809 /* bdrv_open() with directly using a protocol as drv. This layer is already
810 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
811 * and return immediately. */
812 if (file != NULL && drv->bdrv_file_open) {
813 bdrv_swap(file, bs);
814 return 0;
815 }
816
817 bs->open_flags = flags;
818 bs->guest_block_size = 512;
819 bs->request_alignment = 512;
820 bs->zero_beyond_eof = true;
821 open_flags = bdrv_open_flags(bs, flags);
822 bs->read_only = !(open_flags & BDRV_O_RDWR);
823
824 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
825 error_setg(errp,
826 !bs->read_only && bdrv_is_whitelisted(drv, true)
827 ? "Driver '%s' can only be used for read-only devices"
828 : "Driver '%s' is not whitelisted",
829 drv->format_name);
830 return -ENOTSUP;
831 }
832
833 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
834 if (flags & BDRV_O_COPY_ON_READ) {
835 if (!bs->read_only) {
836 bdrv_enable_copy_on_read(bs);
837 } else {
838 error_setg(errp, "Can't use copy-on-read on read-only device");
839 return -EINVAL;
840 }
841 }
842
843 if (filename != NULL) {
844 pstrcpy(bs->filename, sizeof(bs->filename), filename);
845 } else {
846 bs->filename[0] = '\0';
847 }
848 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
849
850 bs->drv = drv;
851 bs->opaque = g_malloc0(drv->instance_size);
852
853 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
854
855 /* Open the image, either directly or using a protocol */
856 if (drv->bdrv_file_open) {
857 assert(file == NULL);
858 assert(!drv->bdrv_needs_filename || filename != NULL);
859 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
860 } else {
861 if (file == NULL) {
862 error_setg(errp, "Can't use '%s' as a block driver for the "
863 "protocol level", drv->format_name);
864 ret = -EINVAL;
865 goto free_and_fail;
866 }
867 bs->file = file;
868 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
869 }
870
871 if (ret < 0) {
872 if (local_err) {
873 error_propagate(errp, local_err);
874 } else if (bs->filename[0]) {
875 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
876 } else {
877 error_setg_errno(errp, -ret, "Could not open image");
878 }
879 goto free_and_fail;
880 }
881
882 if (bs->encrypted) {
883 error_report("Encrypted images are deprecated");
884 error_printf("Support for them will be removed in a future release.\n"
885 "You can use 'qemu-img convert' to convert your image"
886 " to an unencrypted one.\n");
887 }
888
889 ret = refresh_total_sectors(bs, bs->total_sectors);
890 if (ret < 0) {
891 error_setg_errno(errp, -ret, "Could not refresh total sector count");
892 goto free_and_fail;
893 }
894
895 bdrv_refresh_limits(bs, &local_err);
896 if (local_err) {
897 error_propagate(errp, local_err);
898 ret = -EINVAL;
899 goto free_and_fail;
900 }
901
902 assert(bdrv_opt_mem_align(bs) != 0);
903 assert(bdrv_min_mem_align(bs) != 0);
904 assert((bs->request_alignment != 0) || bs->sg);
905 return 0;
906
907 free_and_fail:
908 bs->file = NULL;
909 g_free(bs->opaque);
910 bs->opaque = NULL;
911 bs->drv = NULL;
912 return ret;
913 }
914
915 static QDict *parse_json_filename(const char *filename, Error **errp)
916 {
917 QObject *options_obj;
918 QDict *options;
919 int ret;
920
921 ret = strstart(filename, "json:", &filename);
922 assert(ret);
923
924 options_obj = qobject_from_json(filename);
925 if (!options_obj) {
926 error_setg(errp, "Could not parse the JSON options");
927 return NULL;
928 }
929
930 if (qobject_type(options_obj) != QTYPE_QDICT) {
931 qobject_decref(options_obj);
932 error_setg(errp, "Invalid JSON object given");
933 return NULL;
934 }
935
936 options = qobject_to_qdict(options_obj);
937 qdict_flatten(options);
938
939 return options;
940 }
941
942 /*
943 * Fills in default options for opening images and converts the legacy
944 * filename/flags pair to option QDict entries.
945 */
946 static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
947 BlockDriver *drv, Error **errp)
948 {
949 const char *filename = *pfilename;
950 const char *drvname;
951 bool protocol = flags & BDRV_O_PROTOCOL;
952 bool parse_filename = false;
953 Error *local_err = NULL;
954
955 /* Parse json: pseudo-protocol */
956 if (filename && g_str_has_prefix(filename, "json:")) {
957 QDict *json_options = parse_json_filename(filename, &local_err);
958 if (local_err) {
959 error_propagate(errp, local_err);
960 return -EINVAL;
961 }
962
963 /* Options given in the filename have lower priority than options
964 * specified directly */
965 qdict_join(*options, json_options, false);
966 QDECREF(json_options);
967 *pfilename = filename = NULL;
968 }
969
970 /* Fetch the file name from the options QDict if necessary */
971 if (protocol && filename) {
972 if (!qdict_haskey(*options, "filename")) {
973 qdict_put(*options, "filename", qstring_from_str(filename));
974 parse_filename = true;
975 } else {
976 error_setg(errp, "Can't specify 'file' and 'filename' options at "
977 "the same time");
978 return -EINVAL;
979 }
980 }
981
982 /* Find the right block driver */
983 filename = qdict_get_try_str(*options, "filename");
984 drvname = qdict_get_try_str(*options, "driver");
985
986 if (drv) {
987 if (drvname) {
988 error_setg(errp, "Driver specified twice");
989 return -EINVAL;
990 }
991 drvname = drv->format_name;
992 qdict_put(*options, "driver", qstring_from_str(drvname));
993 } else {
994 if (!drvname && protocol) {
995 if (filename) {
996 drv = bdrv_find_protocol(filename, parse_filename, errp);
997 if (!drv) {
998 return -EINVAL;
999 }
1000
1001 drvname = drv->format_name;
1002 qdict_put(*options, "driver", qstring_from_str(drvname));
1003 } else {
1004 error_setg(errp, "Must specify either driver or file");
1005 return -EINVAL;
1006 }
1007 } else if (drvname) {
1008 drv = bdrv_find_format(drvname);
1009 if (!drv) {
1010 error_setg(errp, "Unknown driver '%s'", drvname);
1011 return -ENOENT;
1012 }
1013 }
1014 }
1015
1016 assert(drv || !protocol);
1017
1018 /* Driver-specific filename parsing */
1019 if (drv && drv->bdrv_parse_filename && parse_filename) {
1020 drv->bdrv_parse_filename(filename, *options, &local_err);
1021 if (local_err) {
1022 error_propagate(errp, local_err);
1023 return -EINVAL;
1024 }
1025
1026 if (!drv->bdrv_needs_filename) {
1027 qdict_del(*options, "filename");
1028 }
1029 }
1030
1031 return 0;
1032 }
1033
1034 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1035 {
1036
1037 if (bs->backing_hd) {
1038 assert(bs->backing_blocker);
1039 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1040 } else if (backing_hd) {
1041 error_setg(&bs->backing_blocker,
1042 "node is used as backing hd of '%s'",
1043 bdrv_get_device_or_node_name(bs));
1044 }
1045
1046 bs->backing_hd = backing_hd;
1047 if (!backing_hd) {
1048 error_free(bs->backing_blocker);
1049 bs->backing_blocker = NULL;
1050 goto out;
1051 }
1052 bs->open_flags &= ~BDRV_O_NO_BACKING;
1053 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1054 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1055 backing_hd->drv ? backing_hd->drv->format_name : "");
1056
1057 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1058 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1059 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1060 bs->backing_blocker);
1061 out:
1062 bdrv_refresh_limits(bs, NULL);
1063 }
1064
1065 /*
1066 * Opens the backing file for a BlockDriverState if not yet open
1067 *
1068 * options is a QDict of options to pass to the block drivers, or NULL for an
1069 * empty set of options. The reference to the QDict is transferred to this
1070 * function (even on failure), so if the caller intends to reuse the dictionary,
1071 * it needs to use QINCREF() before calling bdrv_file_open.
1072 */
1073 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1074 {
1075 char *backing_filename = g_malloc0(PATH_MAX);
1076 int ret = 0;
1077 BlockDriverState *backing_hd;
1078 Error *local_err = NULL;
1079
1080 if (bs->backing_hd != NULL) {
1081 QDECREF(options);
1082 goto free_exit;
1083 }
1084
1085 /* NULL means an empty set of options */
1086 if (options == NULL) {
1087 options = qdict_new();
1088 }
1089
1090 bs->open_flags &= ~BDRV_O_NO_BACKING;
1091 if (qdict_haskey(options, "file.filename")) {
1092 backing_filename[0] = '\0';
1093 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1094 QDECREF(options);
1095 goto free_exit;
1096 } else {
1097 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1098 &local_err);
1099 if (local_err) {
1100 ret = -EINVAL;
1101 error_propagate(errp, local_err);
1102 QDECREF(options);
1103 goto free_exit;
1104 }
1105 }
1106
1107 if (!bs->drv || !bs->drv->supports_backing) {
1108 ret = -EINVAL;
1109 error_setg(errp, "Driver doesn't support backing files");
1110 QDECREF(options);
1111 goto free_exit;
1112 }
1113
1114 backing_hd = bdrv_new();
1115
1116 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1117 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1118 }
1119
1120 assert(bs->backing_hd == NULL);
1121 ret = bdrv_open(&backing_hd,
1122 *backing_filename ? backing_filename : NULL, NULL, options,
1123 bdrv_backing_flags(bs->open_flags), NULL, &local_err);
1124 if (ret < 0) {
1125 bdrv_unref(backing_hd);
1126 backing_hd = NULL;
1127 bs->open_flags |= BDRV_O_NO_BACKING;
1128 error_setg(errp, "Could not open backing file: %s",
1129 error_get_pretty(local_err));
1130 error_free(local_err);
1131 goto free_exit;
1132 }
1133 bdrv_set_backing_hd(bs, backing_hd);
1134
1135 free_exit:
1136 g_free(backing_filename);
1137 return ret;
1138 }
1139
1140 /*
1141 * Opens a disk image whose options are given as BlockdevRef in another block
1142 * device's options.
1143 *
1144 * If allow_none is true, no image will be opened if filename is false and no
1145 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1146 *
1147 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1148 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1149 * itself, all options starting with "${bdref_key}." are considered part of the
1150 * BlockdevRef.
1151 *
1152 * The BlockdevRef will be removed from the options QDict.
1153 *
1154 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1155 */
1156 int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1157 QDict *options, const char *bdref_key, int flags,
1158 bool allow_none, Error **errp)
1159 {
1160 QDict *image_options;
1161 int ret;
1162 char *bdref_key_dot;
1163 const char *reference;
1164
1165 assert(pbs);
1166 assert(*pbs == NULL);
1167
1168 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1169 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1170 g_free(bdref_key_dot);
1171
1172 reference = qdict_get_try_str(options, bdref_key);
1173 if (!filename && !reference && !qdict_size(image_options)) {
1174 if (allow_none) {
1175 ret = 0;
1176 } else {
1177 error_setg(errp, "A block device must be specified for \"%s\"",
1178 bdref_key);
1179 ret = -EINVAL;
1180 }
1181 QDECREF(image_options);
1182 goto done;
1183 }
1184
1185 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
1186
1187 done:
1188 qdict_del(options, bdref_key);
1189 return ret;
1190 }
1191
1192 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1193 {
1194 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1195 char *tmp_filename = g_malloc0(PATH_MAX + 1);
1196 int64_t total_size;
1197 QemuOpts *opts = NULL;
1198 QDict *snapshot_options;
1199 BlockDriverState *bs_snapshot;
1200 Error *local_err;
1201 int ret;
1202
1203 /* if snapshot, we create a temporary backing file and open it
1204 instead of opening 'filename' directly */
1205
1206 /* Get the required size from the image */
1207 total_size = bdrv_getlength(bs);
1208 if (total_size < 0) {
1209 ret = total_size;
1210 error_setg_errno(errp, -total_size, "Could not get image size");
1211 goto out;
1212 }
1213
1214 /* Create the temporary image */
1215 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1216 if (ret < 0) {
1217 error_setg_errno(errp, -ret, "Could not get temporary filename");
1218 goto out;
1219 }
1220
1221 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1222 &error_abort);
1223 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1224 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1225 qemu_opts_del(opts);
1226 if (ret < 0) {
1227 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1228 "'%s': %s", tmp_filename,
1229 error_get_pretty(local_err));
1230 error_free(local_err);
1231 goto out;
1232 }
1233
1234 /* Prepare a new options QDict for the temporary file */
1235 snapshot_options = qdict_new();
1236 qdict_put(snapshot_options, "file.driver",
1237 qstring_from_str("file"));
1238 qdict_put(snapshot_options, "file.filename",
1239 qstring_from_str(tmp_filename));
1240
1241 bs_snapshot = bdrv_new();
1242
1243 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1244 flags, &bdrv_qcow2, &local_err);
1245 if (ret < 0) {
1246 error_propagate(errp, local_err);
1247 goto out;
1248 }
1249
1250 bdrv_append(bs_snapshot, bs);
1251
1252 out:
1253 g_free(tmp_filename);
1254 return ret;
1255 }
1256
1257 /*
1258 * Opens a disk image (raw, qcow2, vmdk, ...)
1259 *
1260 * options is a QDict of options to pass to the block drivers, or NULL for an
1261 * empty set of options. The reference to the QDict belongs to the block layer
1262 * after the call (even on failure), so if the caller intends to reuse the
1263 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1264 *
1265 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1266 * If it is not NULL, the referenced BDS will be reused.
1267 *
1268 * The reference parameter may be used to specify an existing block device which
1269 * should be opened. If specified, neither options nor a filename may be given,
1270 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1271 */
1272 int bdrv_open(BlockDriverState **pbs, const char *filename,
1273 const char *reference, QDict *options, int flags,
1274 BlockDriver *drv, Error **errp)
1275 {
1276 int ret;
1277 BlockDriverState *file = NULL, *bs;
1278 const char *drvname;
1279 Error *local_err = NULL;
1280 int snapshot_flags = 0;
1281
1282 assert(pbs);
1283
1284 if (reference) {
1285 bool options_non_empty = options ? qdict_size(options) : false;
1286 QDECREF(options);
1287
1288 if (*pbs) {
1289 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1290 "another block device");
1291 return -EINVAL;
1292 }
1293
1294 if (filename || options_non_empty) {
1295 error_setg(errp, "Cannot reference an existing block device with "
1296 "additional options or a new filename");
1297 return -EINVAL;
1298 }
1299
1300 bs = bdrv_lookup_bs(reference, reference, errp);
1301 if (!bs) {
1302 return -ENODEV;
1303 }
1304 bdrv_ref(bs);
1305 *pbs = bs;
1306 return 0;
1307 }
1308
1309 if (*pbs) {
1310 bs = *pbs;
1311 } else {
1312 bs = bdrv_new();
1313 }
1314
1315 /* NULL means an empty set of options */
1316 if (options == NULL) {
1317 options = qdict_new();
1318 }
1319
1320 ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
1321 if (local_err) {
1322 goto fail;
1323 }
1324
1325 /* Find the right image format driver */
1326 drv = NULL;
1327 drvname = qdict_get_try_str(options, "driver");
1328 if (drvname) {
1329 drv = bdrv_find_format(drvname);
1330 qdict_del(options, "driver");
1331 if (!drv) {
1332 error_setg(errp, "Unknown driver: '%s'", drvname);
1333 ret = -EINVAL;
1334 goto fail;
1335 }
1336 }
1337
1338 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1339 if (drv && !drv->bdrv_file_open) {
1340 /* If the user explicitly wants a format driver here, we'll need to add
1341 * another layer for the protocol in bs->file */
1342 flags &= ~BDRV_O_PROTOCOL;
1343 }
1344
1345 bs->options = options;
1346 options = qdict_clone_shallow(options);
1347
1348 /* Open image file without format layer */
1349 if ((flags & BDRV_O_PROTOCOL) == 0) {
1350 if (flags & BDRV_O_RDWR) {
1351 flags |= BDRV_O_ALLOW_RDWR;
1352 }
1353 if (flags & BDRV_O_SNAPSHOT) {
1354 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1355 flags = bdrv_backing_flags(flags);
1356 }
1357
1358 assert(file == NULL);
1359 ret = bdrv_open_image(&file, filename, options, "file",
1360 bdrv_inherited_flags(flags),
1361 true, &local_err);
1362 if (ret < 0) {
1363 goto fail;
1364 }
1365 }
1366
1367 /* Image format probing */
1368 bs->probed = !drv;
1369 if (!drv && file) {
1370 ret = find_image_format(file, filename, &drv, &local_err);
1371 if (ret < 0) {
1372 goto fail;
1373 }
1374 } else if (!drv) {
1375 error_setg(errp, "Must specify either driver or file");
1376 ret = -EINVAL;
1377 goto fail;
1378 }
1379
1380 /* Open the image */
1381 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1382 if (ret < 0) {
1383 goto fail;
1384 }
1385
1386 if (file && (bs->file != file)) {
1387 bdrv_unref(file);
1388 file = NULL;
1389 }
1390
1391 /* If there is a backing file, use it */
1392 if ((flags & BDRV_O_NO_BACKING) == 0) {
1393 QDict *backing_options;
1394
1395 qdict_extract_subqdict(options, &backing_options, "backing.");
1396 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1397 if (ret < 0) {
1398 goto close_and_fail;
1399 }
1400 }
1401
1402 bdrv_refresh_filename(bs);
1403
1404 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1405 * temporary snapshot afterwards. */
1406 if (snapshot_flags) {
1407 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1408 if (local_err) {
1409 goto close_and_fail;
1410 }
1411 }
1412
1413 /* Check if any unknown options were used */
1414 if (options && (qdict_size(options) != 0)) {
1415 const QDictEntry *entry = qdict_first(options);
1416 if (flags & BDRV_O_PROTOCOL) {
1417 error_setg(errp, "Block protocol '%s' doesn't support the option "
1418 "'%s'", drv->format_name, entry->key);
1419 } else {
1420 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1421 "support the option '%s'", drv->format_name,
1422 bdrv_get_device_name(bs), entry->key);
1423 }
1424
1425 ret = -EINVAL;
1426 goto close_and_fail;
1427 }
1428
1429 if (!bdrv_key_required(bs)) {
1430 if (bs->blk) {
1431 blk_dev_change_media_cb(bs->blk, true);
1432 }
1433 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1434 && !runstate_check(RUN_STATE_INMIGRATE)
1435 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1436 error_setg(errp,
1437 "Guest must be stopped for opening of encrypted image");
1438 ret = -EBUSY;
1439 goto close_and_fail;
1440 }
1441
1442 QDECREF(options);
1443 *pbs = bs;
1444 return 0;
1445
1446 fail:
1447 if (file != NULL) {
1448 bdrv_unref(file);
1449 }
1450 QDECREF(bs->options);
1451 QDECREF(options);
1452 bs->options = NULL;
1453 if (!*pbs) {
1454 /* If *pbs is NULL, a new BDS has been created in this function and
1455 needs to be freed now. Otherwise, it does not need to be closed,
1456 since it has not really been opened yet. */
1457 bdrv_unref(bs);
1458 }
1459 if (local_err) {
1460 error_propagate(errp, local_err);
1461 }
1462 return ret;
1463
1464 close_and_fail:
1465 /* See fail path, but now the BDS has to be always closed */
1466 if (*pbs) {
1467 bdrv_close(bs);
1468 } else {
1469 bdrv_unref(bs);
1470 }
1471 QDECREF(options);
1472 if (local_err) {
1473 error_propagate(errp, local_err);
1474 }
1475 return ret;
1476 }
1477
1478 typedef struct BlockReopenQueueEntry {
1479 bool prepared;
1480 BDRVReopenState state;
1481 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1482 } BlockReopenQueueEntry;
1483
1484 /*
1485 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1486 * reopen of multiple devices.
1487 *
1488 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1489 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1490 * be created and initialized. This newly created BlockReopenQueue should be
1491 * passed back in for subsequent calls that are intended to be of the same
1492 * atomic 'set'.
1493 *
1494 * bs is the BlockDriverState to add to the reopen queue.
1495 *
1496 * flags contains the open flags for the associated bs
1497 *
1498 * returns a pointer to bs_queue, which is either the newly allocated
1499 * bs_queue, or the existing bs_queue being used.
1500 *
1501 */
1502 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1503 BlockDriverState *bs, int flags)
1504 {
1505 assert(bs != NULL);
1506
1507 BlockReopenQueueEntry *bs_entry;
1508 if (bs_queue == NULL) {
1509 bs_queue = g_new0(BlockReopenQueue, 1);
1510 QSIMPLEQ_INIT(bs_queue);
1511 }
1512
1513 /* bdrv_open() masks this flag out */
1514 flags &= ~BDRV_O_PROTOCOL;
1515
1516 if (bs->file) {
1517 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
1518 }
1519
1520 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1521 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1522
1523 bs_entry->state.bs = bs;
1524 bs_entry->state.flags = flags;
1525
1526 return bs_queue;
1527 }
1528
1529 /*
1530 * Reopen multiple BlockDriverStates atomically & transactionally.
1531 *
1532 * The queue passed in (bs_queue) must have been built up previous
1533 * via bdrv_reopen_queue().
1534 *
1535 * Reopens all BDS specified in the queue, with the appropriate
1536 * flags. All devices are prepared for reopen, and failure of any
1537 * device will cause all device changes to be abandonded, and intermediate
1538 * data cleaned up.
1539 *
1540 * If all devices prepare successfully, then the changes are committed
1541 * to all devices.
1542 *
1543 */
1544 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1545 {
1546 int ret = -1;
1547 BlockReopenQueueEntry *bs_entry, *next;
1548 Error *local_err = NULL;
1549
1550 assert(bs_queue != NULL);
1551
1552 bdrv_drain_all();
1553
1554 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1555 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1556 error_propagate(errp, local_err);
1557 goto cleanup;
1558 }
1559 bs_entry->prepared = true;
1560 }
1561
1562 /* If we reach this point, we have success and just need to apply the
1563 * changes
1564 */
1565 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1566 bdrv_reopen_commit(&bs_entry->state);
1567 }
1568
1569 ret = 0;
1570
1571 cleanup:
1572 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1573 if (ret && bs_entry->prepared) {
1574 bdrv_reopen_abort(&bs_entry->state);
1575 }
1576 g_free(bs_entry);
1577 }
1578 g_free(bs_queue);
1579 return ret;
1580 }
1581
1582
1583 /* Reopen a single BlockDriverState with the specified flags. */
1584 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1585 {
1586 int ret = -1;
1587 Error *local_err = NULL;
1588 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1589
1590 ret = bdrv_reopen_multiple(queue, &local_err);
1591 if (local_err != NULL) {
1592 error_propagate(errp, local_err);
1593 }
1594 return ret;
1595 }
1596
1597
1598 /*
1599 * Prepares a BlockDriverState for reopen. All changes are staged in the
1600 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1601 * the block driver layer .bdrv_reopen_prepare()
1602 *
1603 * bs is the BlockDriverState to reopen
1604 * flags are the new open flags
1605 * queue is the reopen queue
1606 *
1607 * Returns 0 on success, non-zero on error. On error errp will be set
1608 * as well.
1609 *
1610 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1611 * It is the responsibility of the caller to then call the abort() or
1612 * commit() for any other BDS that have been left in a prepare() state
1613 *
1614 */
1615 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1616 Error **errp)
1617 {
1618 int ret = -1;
1619 Error *local_err = NULL;
1620 BlockDriver *drv;
1621
1622 assert(reopen_state != NULL);
1623 assert(reopen_state->bs->drv != NULL);
1624 drv = reopen_state->bs->drv;
1625
1626 /* if we are to stay read-only, do not allow permission change
1627 * to r/w */
1628 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1629 reopen_state->flags & BDRV_O_RDWR) {
1630 error_setg(errp, "Node '%s' is read only",
1631 bdrv_get_device_or_node_name(reopen_state->bs));
1632 goto error;
1633 }
1634
1635
1636 ret = bdrv_flush(reopen_state->bs);
1637 if (ret) {
1638 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1639 strerror(-ret));
1640 goto error;
1641 }
1642
1643 if (drv->bdrv_reopen_prepare) {
1644 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1645 if (ret) {
1646 if (local_err != NULL) {
1647 error_propagate(errp, local_err);
1648 } else {
1649 error_setg(errp, "failed while preparing to reopen image '%s'",
1650 reopen_state->bs->filename);
1651 }
1652 goto error;
1653 }
1654 } else {
1655 /* It is currently mandatory to have a bdrv_reopen_prepare()
1656 * handler for each supported drv. */
1657 error_setg(errp, "Block format '%s' used by node '%s' "
1658 "does not support reopening files", drv->format_name,
1659 bdrv_get_device_or_node_name(reopen_state->bs));
1660 ret = -1;
1661 goto error;
1662 }
1663
1664 ret = 0;
1665
1666 error:
1667 return ret;
1668 }
1669
1670 /*
1671 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1672 * makes them final by swapping the staging BlockDriverState contents into
1673 * the active BlockDriverState contents.
1674 */
1675 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1676 {
1677 BlockDriver *drv;
1678
1679 assert(reopen_state != NULL);
1680 drv = reopen_state->bs->drv;
1681 assert(drv != NULL);
1682
1683 /* If there are any driver level actions to take */
1684 if (drv->bdrv_reopen_commit) {
1685 drv->bdrv_reopen_commit(reopen_state);
1686 }
1687
1688 /* set BDS specific flags now */
1689 reopen_state->bs->open_flags = reopen_state->flags;
1690 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1691 BDRV_O_CACHE_WB);
1692 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1693
1694 bdrv_refresh_limits(reopen_state->bs, NULL);
1695 }
1696
1697 /*
1698 * Abort the reopen, and delete and free the staged changes in
1699 * reopen_state
1700 */
1701 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1702 {
1703 BlockDriver *drv;
1704
1705 assert(reopen_state != NULL);
1706 drv = reopen_state->bs->drv;
1707 assert(drv != NULL);
1708
1709 if (drv->bdrv_reopen_abort) {
1710 drv->bdrv_reopen_abort(reopen_state);
1711 }
1712 }
1713
1714
1715 void bdrv_close(BlockDriverState *bs)
1716 {
1717 BdrvAioNotifier *ban, *ban_next;
1718
1719 if (bs->job) {
1720 block_job_cancel_sync(bs->job);
1721 }
1722 bdrv_drain_all(); /* complete I/O */
1723 bdrv_flush(bs);
1724 bdrv_drain_all(); /* in case flush left pending I/O */
1725 notifier_list_notify(&bs->close_notifiers, bs);
1726
1727 if (bs->drv) {
1728 if (bs->backing_hd) {
1729 BlockDriverState *backing_hd = bs->backing_hd;
1730 bdrv_set_backing_hd(bs, NULL);
1731 bdrv_unref(backing_hd);
1732 }
1733 bs->drv->bdrv_close(bs);
1734 g_free(bs->opaque);
1735 bs->opaque = NULL;
1736 bs->drv = NULL;
1737 bs->copy_on_read = 0;
1738 bs->backing_file[0] = '\0';
1739 bs->backing_format[0] = '\0';
1740 bs->total_sectors = 0;
1741 bs->encrypted = 0;
1742 bs->valid_key = 0;
1743 bs->sg = 0;
1744 bs->zero_beyond_eof = false;
1745 QDECREF(bs->options);
1746 bs->options = NULL;
1747 QDECREF(bs->full_open_options);
1748 bs->full_open_options = NULL;
1749
1750 if (bs->file != NULL) {
1751 bdrv_unref(bs->file);
1752 bs->file = NULL;
1753 }
1754 }
1755
1756 if (bs->blk) {
1757 blk_dev_change_media_cb(bs->blk, false);
1758 }
1759
1760 /*throttling disk I/O limits*/
1761 if (bs->io_limits_enabled) {
1762 bdrv_io_limits_disable(bs);
1763 }
1764
1765 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1766 g_free(ban);
1767 }
1768 QLIST_INIT(&bs->aio_notifiers);
1769 }
1770
1771 void bdrv_close_all(void)
1772 {
1773 BlockDriverState *bs;
1774
1775 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1776 AioContext *aio_context = bdrv_get_aio_context(bs);
1777
1778 aio_context_acquire(aio_context);
1779 bdrv_close(bs);
1780 aio_context_release(aio_context);
1781 }
1782 }
1783
1784 /* make a BlockDriverState anonymous by removing from bdrv_state and
1785 * graph_bdrv_state list.
1786 Also, NULL terminate the device_name to prevent double remove */
1787 void bdrv_make_anon(BlockDriverState *bs)
1788 {
1789 /*
1790 * Take care to remove bs from bdrv_states only when it's actually
1791 * in it. Note that bs->device_list.tqe_prev is initially null,
1792 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
1793 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1794 * resetting it to null on remove.
1795 */
1796 if (bs->device_list.tqe_prev) {
1797 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1798 bs->device_list.tqe_prev = NULL;
1799 }
1800 if (bs->node_name[0] != '\0') {
1801 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1802 }
1803 bs->node_name[0] = '\0';
1804 }
1805
1806 static void bdrv_rebind(BlockDriverState *bs)
1807 {
1808 if (bs->drv && bs->drv->bdrv_rebind) {
1809 bs->drv->bdrv_rebind(bs);
1810 }
1811 }
1812
1813 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1814 BlockDriverState *bs_src)
1815 {
1816 /* move some fields that need to stay attached to the device */
1817
1818 /* dev info */
1819 bs_dest->guest_block_size = bs_src->guest_block_size;
1820 bs_dest->copy_on_read = bs_src->copy_on_read;
1821
1822 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1823
1824 /* i/o throttled req */
1825 memcpy(&bs_dest->throttle_state,
1826 &bs_src->throttle_state,
1827 sizeof(ThrottleState));
1828 memcpy(&bs_dest->throttle_timers,
1829 &bs_src->throttle_timers,
1830 sizeof(ThrottleTimers));
1831 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1832 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
1833 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
1834
1835 /* r/w error */
1836 bs_dest->on_read_error = bs_src->on_read_error;
1837 bs_dest->on_write_error = bs_src->on_write_error;
1838
1839 /* i/o status */
1840 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
1841 bs_dest->iostatus = bs_src->iostatus;
1842
1843 /* dirty bitmap */
1844 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
1845
1846 /* reference count */
1847 bs_dest->refcnt = bs_src->refcnt;
1848
1849 /* job */
1850 bs_dest->job = bs_src->job;
1851
1852 /* keep the same entry in bdrv_states */
1853 bs_dest->device_list = bs_src->device_list;
1854 bs_dest->blk = bs_src->blk;
1855
1856 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
1857 sizeof(bs_dest->op_blockers));
1858 }
1859
1860 /*
1861 * Swap bs contents for two image chains while they are live,
1862 * while keeping required fields on the BlockDriverState that is
1863 * actually attached to a device.
1864 *
1865 * This will modify the BlockDriverState fields, and swap contents
1866 * between bs_new and bs_old. Both bs_new and bs_old are modified.
1867 *
1868 * bs_new must not be attached to a BlockBackend.
1869 *
1870 * This function does not create any image files.
1871 */
1872 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1873 {
1874 BlockDriverState tmp;
1875
1876 /* The code needs to swap the node_name but simply swapping node_list won't
1877 * work so first remove the nodes from the graph list, do the swap then
1878 * insert them back if needed.
1879 */
1880 if (bs_new->node_name[0] != '\0') {
1881 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
1882 }
1883 if (bs_old->node_name[0] != '\0') {
1884 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
1885 }
1886
1887 /* bs_new must be unattached and shouldn't have anything fancy enabled */
1888 assert(!bs_new->blk);
1889 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
1890 assert(bs_new->job == NULL);
1891 assert(bs_new->io_limits_enabled == false);
1892 assert(!throttle_timers_are_initialized(&bs_new->throttle_timers));
1893
1894 tmp = *bs_new;
1895 *bs_new = *bs_old;
1896 *bs_old = tmp;
1897
1898 /* there are some fields that should not be swapped, move them back */
1899 bdrv_move_feature_fields(&tmp, bs_old);
1900 bdrv_move_feature_fields(bs_old, bs_new);
1901 bdrv_move_feature_fields(bs_new, &tmp);
1902
1903 /* bs_new must remain unattached */
1904 assert(!bs_new->blk);
1905
1906 /* Check a few fields that should remain attached to the device */
1907 assert(bs_new->job == NULL);
1908 assert(bs_new->io_limits_enabled == false);
1909 assert(!throttle_timers_are_initialized(&bs_new->throttle_timers));
1910
1911 /* insert the nodes back into the graph node list if needed */
1912 if (bs_new->node_name[0] != '\0') {
1913 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
1914 }
1915 if (bs_old->node_name[0] != '\0') {
1916 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
1917 }
1918
1919 bdrv_rebind(bs_new);
1920 bdrv_rebind(bs_old);
1921 }
1922
1923 /*
1924 * Add new bs contents at the top of an image chain while the chain is
1925 * live, while keeping required fields on the top layer.
1926 *
1927 * This will modify the BlockDriverState fields, and swap contents
1928 * between bs_new and bs_top. Both bs_new and bs_top are modified.
1929 *
1930 * bs_new must not be attached to a BlockBackend.
1931 *
1932 * This function does not create any image files.
1933 */
1934 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
1935 {
1936 bdrv_swap(bs_new, bs_top);
1937
1938 /* The contents of 'tmp' will become bs_top, as we are
1939 * swapping bs_new and bs_top contents. */
1940 bdrv_set_backing_hd(bs_top, bs_new);
1941 }
1942
1943 static void bdrv_delete(BlockDriverState *bs)
1944 {
1945 assert(!bs->job);
1946 assert(bdrv_op_blocker_is_empty(bs));
1947 assert(!bs->refcnt);
1948 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
1949
1950 bdrv_close(bs);
1951
1952 /* remove from list, if necessary */
1953 bdrv_make_anon(bs);
1954
1955 g_free(bs);
1956 }
1957
1958 /*
1959 * Run consistency checks on an image
1960 *
1961 * Returns 0 if the check could be completed (it doesn't mean that the image is
1962 * free of errors) or -errno when an internal error occurred. The results of the
1963 * check are stored in res.
1964 */
1965 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
1966 {
1967 if (bs->drv == NULL) {
1968 return -ENOMEDIUM;
1969 }
1970 if (bs->drv->bdrv_check == NULL) {
1971 return -ENOTSUP;
1972 }
1973
1974 memset(res, 0, sizeof(*res));
1975 return bs->drv->bdrv_check(bs, res, fix);
1976 }
1977
1978 #define COMMIT_BUF_SECTORS 2048
1979
1980 /* commit COW file into the raw image */
1981 int bdrv_commit(BlockDriverState *bs)
1982 {
1983 BlockDriver *drv = bs->drv;
1984 int64_t sector, total_sectors, length, backing_length;
1985 int n, ro, open_flags;
1986 int ret = 0;
1987 uint8_t *buf = NULL;
1988
1989 if (!drv)
1990 return -ENOMEDIUM;
1991
1992 if (!bs->backing_hd) {
1993 return -ENOTSUP;
1994 }
1995
1996 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
1997 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
1998 return -EBUSY;
1999 }
2000
2001 ro = bs->backing_hd->read_only;
2002 open_flags = bs->backing_hd->open_flags;
2003
2004 if (ro) {
2005 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2006 return -EACCES;
2007 }
2008 }
2009
2010 length = bdrv_getlength(bs);
2011 if (length < 0) {
2012 ret = length;
2013 goto ro_cleanup;
2014 }
2015
2016 backing_length = bdrv_getlength(bs->backing_hd);
2017 if (backing_length < 0) {
2018 ret = backing_length;
2019 goto ro_cleanup;
2020 }
2021
2022 /* If our top snapshot is larger than the backing file image,
2023 * grow the backing file image if possible. If not possible,
2024 * we must return an error */
2025 if (length > backing_length) {
2026 ret = bdrv_truncate(bs->backing_hd, length);
2027 if (ret < 0) {
2028 goto ro_cleanup;
2029 }
2030 }
2031
2032 total_sectors = length >> BDRV_SECTOR_BITS;
2033
2034 /* qemu_try_blockalign() for bs will choose an alignment that works for
2035 * bs->backing_hd as well, so no need to compare the alignment manually. */
2036 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2037 if (buf == NULL) {
2038 ret = -ENOMEM;
2039 goto ro_cleanup;
2040 }
2041
2042 for (sector = 0; sector < total_sectors; sector += n) {
2043 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2044 if (ret < 0) {
2045 goto ro_cleanup;
2046 }
2047 if (ret) {
2048 ret = bdrv_read(bs, sector, buf, n);
2049 if (ret < 0) {
2050 goto ro_cleanup;
2051 }
2052
2053 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2054 if (ret < 0) {
2055 goto ro_cleanup;
2056 }
2057 }
2058 }
2059
2060 if (drv->bdrv_make_empty) {
2061 ret = drv->bdrv_make_empty(bs);
2062 if (ret < 0) {
2063 goto ro_cleanup;
2064 }
2065 bdrv_flush(bs);
2066 }
2067
2068 /*
2069 * Make sure all data we wrote to the backing device is actually
2070 * stable on disk.
2071 */
2072 if (bs->backing_hd) {
2073 bdrv_flush(bs->backing_hd);
2074 }
2075
2076 ret = 0;
2077 ro_cleanup:
2078 qemu_vfree(buf);
2079
2080 if (ro) {
2081 /* ignoring error return here */
2082 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
2083 }
2084
2085 return ret;
2086 }
2087
2088 int bdrv_commit_all(void)
2089 {
2090 BlockDriverState *bs;
2091
2092 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2093 AioContext *aio_context = bdrv_get_aio_context(bs);
2094
2095 aio_context_acquire(aio_context);
2096 if (bs->drv && bs->backing_hd) {
2097 int ret = bdrv_commit(bs);
2098 if (ret < 0) {
2099 aio_context_release(aio_context);
2100 return ret;
2101 }
2102 }
2103 aio_context_release(aio_context);
2104 }
2105 return 0;
2106 }
2107
2108 /*
2109 * Return values:
2110 * 0 - success
2111 * -EINVAL - backing format specified, but no file
2112 * -ENOSPC - can't update the backing file because no space is left in the
2113 * image file header
2114 * -ENOTSUP - format driver doesn't support changing the backing file
2115 */
2116 int bdrv_change_backing_file(BlockDriverState *bs,
2117 const char *backing_file, const char *backing_fmt)
2118 {
2119 BlockDriver *drv = bs->drv;
2120 int ret;
2121
2122 /* Backing file format doesn't make sense without a backing file */
2123 if (backing_fmt && !backing_file) {
2124 return -EINVAL;
2125 }
2126
2127 if (drv->bdrv_change_backing_file != NULL) {
2128 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2129 } else {
2130 ret = -ENOTSUP;
2131 }
2132
2133 if (ret == 0) {
2134 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2135 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2136 }
2137 return ret;
2138 }
2139
2140 /*
2141 * Finds the image layer in the chain that has 'bs' as its backing file.
2142 *
2143 * active is the current topmost image.
2144 *
2145 * Returns NULL if bs is not found in active's image chain,
2146 * or if active == bs.
2147 *
2148 * Returns the bottommost base image if bs == NULL.
2149 */
2150 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2151 BlockDriverState *bs)
2152 {
2153 while (active && bs != active->backing_hd) {
2154 active = active->backing_hd;
2155 }
2156
2157 return active;
2158 }
2159
2160 /* Given a BDS, searches for the base layer. */
2161 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2162 {
2163 return bdrv_find_overlay(bs, NULL);
2164 }
2165
2166 typedef struct BlkIntermediateStates {
2167 BlockDriverState *bs;
2168 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2169 } BlkIntermediateStates;
2170
2171
2172 /*
2173 * Drops images above 'base' up to and including 'top', and sets the image
2174 * above 'top' to have base as its backing file.
2175 *
2176 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2177 * information in 'bs' can be properly updated.
2178 *
2179 * E.g., this will convert the following chain:
2180 * bottom <- base <- intermediate <- top <- active
2181 *
2182 * to
2183 *
2184 * bottom <- base <- active
2185 *
2186 * It is allowed for bottom==base, in which case it converts:
2187 *
2188 * base <- intermediate <- top <- active
2189 *
2190 * to
2191 *
2192 * base <- active
2193 *
2194 * If backing_file_str is non-NULL, it will be used when modifying top's
2195 * overlay image metadata.
2196 *
2197 * Error conditions:
2198 * if active == top, that is considered an error
2199 *
2200 */
2201 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2202 BlockDriverState *base, const char *backing_file_str)
2203 {
2204 BlockDriverState *intermediate;
2205 BlockDriverState *base_bs = NULL;
2206 BlockDriverState *new_top_bs = NULL;
2207 BlkIntermediateStates *intermediate_state, *next;
2208 int ret = -EIO;
2209
2210 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2211 QSIMPLEQ_INIT(&states_to_delete);
2212
2213 if (!top->drv || !base->drv) {
2214 goto exit;
2215 }
2216
2217 new_top_bs = bdrv_find_overlay(active, top);
2218
2219 if (new_top_bs == NULL) {
2220 /* we could not find the image above 'top', this is an error */
2221 goto exit;
2222 }
2223
2224 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2225 * to do, no intermediate images */
2226 if (new_top_bs->backing_hd == base) {
2227 ret = 0;
2228 goto exit;
2229 }
2230
2231 intermediate = top;
2232
2233 /* now we will go down through the list, and add each BDS we find
2234 * into our deletion queue, until we hit the 'base'
2235 */
2236 while (intermediate) {
2237 intermediate_state = g_new0(BlkIntermediateStates, 1);
2238 intermediate_state->bs = intermediate;
2239 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2240
2241 if (intermediate->backing_hd == base) {
2242 base_bs = intermediate->backing_hd;
2243 break;
2244 }
2245 intermediate = intermediate->backing_hd;
2246 }
2247 if (base_bs == NULL) {
2248 /* something went wrong, we did not end at the base. safely
2249 * unravel everything, and exit with error */
2250 goto exit;
2251 }
2252
2253 /* success - we can delete the intermediate states, and link top->base */
2254 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2255 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2256 base_bs->drv ? base_bs->drv->format_name : "");
2257 if (ret) {
2258 goto exit;
2259 }
2260 bdrv_set_backing_hd(new_top_bs, base_bs);
2261
2262 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2263 /* so that bdrv_close() does not recursively close the chain */
2264 bdrv_set_backing_hd(intermediate_state->bs, NULL);
2265 bdrv_unref(intermediate_state->bs);
2266 }
2267 ret = 0;
2268
2269 exit:
2270 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2271 g_free(intermediate_state);
2272 }
2273 return ret;
2274 }
2275
2276 /**
2277 * Truncate file to 'offset' bytes (needed only for file protocols)
2278 */
2279 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2280 {
2281 BlockDriver *drv = bs->drv;
2282 int ret;
2283 if (!drv)
2284 return -ENOMEDIUM;
2285 if (!drv->bdrv_truncate)
2286 return -ENOTSUP;
2287 if (bs->read_only)
2288 return -EACCES;
2289
2290 ret = drv->bdrv_truncate(bs, offset);
2291 if (ret == 0) {
2292 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2293 bdrv_dirty_bitmap_truncate(bs);
2294 if (bs->blk) {
2295 blk_dev_resize_cb(bs->blk);
2296 }
2297 }
2298 return ret;
2299 }
2300
2301 /**
2302 * Length of a allocated file in bytes. Sparse files are counted by actual
2303 * allocated space. Return < 0 if error or unknown.
2304 */
2305 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2306 {
2307 BlockDriver *drv = bs->drv;
2308 if (!drv) {
2309 return -ENOMEDIUM;
2310 }
2311 if (drv->bdrv_get_allocated_file_size) {
2312 return drv->bdrv_get_allocated_file_size(bs);
2313 }
2314 if (bs->file) {
2315 return bdrv_get_allocated_file_size(bs->file);
2316 }
2317 return -ENOTSUP;
2318 }
2319
2320 /**
2321 * Return number of sectors on success, -errno on error.
2322 */
2323 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2324 {
2325 BlockDriver *drv = bs->drv;
2326
2327 if (!drv)
2328 return -ENOMEDIUM;
2329
2330 if (drv->has_variable_length) {
2331 int ret = refresh_total_sectors(bs, bs->total_sectors);
2332 if (ret < 0) {
2333 return ret;
2334 }
2335 }
2336 return bs->total_sectors;
2337 }
2338
2339 /**
2340 * Return length in bytes on success, -errno on error.
2341 * The length is always a multiple of BDRV_SECTOR_SIZE.
2342 */
2343 int64_t bdrv_getlength(BlockDriverState *bs)
2344 {
2345 int64_t ret = bdrv_nb_sectors(bs);
2346
2347 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2348 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2349 }
2350
2351 /* return 0 as number of sectors if no device present or error */
2352 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2353 {
2354 int64_t nb_sectors = bdrv_nb_sectors(bs);
2355
2356 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2357 }
2358
2359 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2360 BlockdevOnError on_write_error)
2361 {
2362 bs->on_read_error = on_read_error;
2363 bs->on_write_error = on_write_error;
2364 }
2365
2366 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
2367 {
2368 return is_read ? bs->on_read_error : bs->on_write_error;
2369 }
2370
2371 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
2372 {
2373 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
2374
2375 switch (on_err) {
2376 case BLOCKDEV_ON_ERROR_ENOSPC:
2377 return (error == ENOSPC) ?
2378 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
2379 case BLOCKDEV_ON_ERROR_STOP:
2380 return BLOCK_ERROR_ACTION_STOP;
2381 case BLOCKDEV_ON_ERROR_REPORT:
2382 return BLOCK_ERROR_ACTION_REPORT;
2383 case BLOCKDEV_ON_ERROR_IGNORE:
2384 return BLOCK_ERROR_ACTION_IGNORE;
2385 default:
2386 abort();
2387 }
2388 }
2389
2390 static void send_qmp_error_event(BlockDriverState *bs,
2391 BlockErrorAction action,
2392 bool is_read, int error)
2393 {
2394 IoOperationType optype;
2395
2396 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
2397 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
2398 bdrv_iostatus_is_enabled(bs),
2399 error == ENOSPC, strerror(error),
2400 &error_abort);
2401 }
2402
2403 /* This is done by device models because, while the block layer knows
2404 * about the error, it does not know whether an operation comes from
2405 * the device or the block layer (from a job, for example).
2406 */
2407 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
2408 bool is_read, int error)
2409 {
2410 assert(error >= 0);
2411
2412 if (action == BLOCK_ERROR_ACTION_STOP) {
2413 /* First set the iostatus, so that "info block" returns an iostatus
2414 * that matches the events raised so far (an additional error iostatus
2415 * is fine, but not a lost one).
2416 */
2417 bdrv_iostatus_set_err(bs, error);
2418
2419 /* Then raise the request to stop the VM and the event.
2420 * qemu_system_vmstop_request_prepare has two effects. First,
2421 * it ensures that the STOP event always comes after the
2422 * BLOCK_IO_ERROR event. Second, it ensures that even if management
2423 * can observe the STOP event and do a "cont" before the STOP
2424 * event is issued, the VM will not stop. In this case, vm_start()
2425 * also ensures that the STOP/RESUME pair of events is emitted.
2426 */
2427 qemu_system_vmstop_request_prepare();
2428 send_qmp_error_event(bs, action, is_read, error);
2429 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
2430 } else {
2431 send_qmp_error_event(bs, action, is_read, error);
2432 }
2433 }
2434
2435 int bdrv_is_read_only(BlockDriverState *bs)
2436 {
2437 return bs->read_only;
2438 }
2439
2440 int bdrv_is_sg(BlockDriverState *bs)
2441 {
2442 return bs->sg;
2443 }
2444
2445 int bdrv_enable_write_cache(BlockDriverState *bs)
2446 {
2447 return bs->enable_write_cache;
2448 }
2449
2450 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2451 {
2452 bs->enable_write_cache = wce;
2453
2454 /* so a reopen() will preserve wce */
2455 if (wce) {
2456 bs->open_flags |= BDRV_O_CACHE_WB;
2457 } else {
2458 bs->open_flags &= ~BDRV_O_CACHE_WB;
2459 }
2460 }
2461
2462 int bdrv_is_encrypted(BlockDriverState *bs)
2463 {
2464 if (bs->backing_hd && bs->backing_hd->encrypted)
2465 return 1;
2466 return bs->encrypted;
2467 }
2468
2469 int bdrv_key_required(BlockDriverState *bs)
2470 {
2471 BlockDriverState *backing_hd = bs->backing_hd;
2472
2473 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2474 return 1;
2475 return (bs->encrypted && !bs->valid_key);
2476 }
2477
2478 int bdrv_set_key(BlockDriverState *bs, const char *key)
2479 {
2480 int ret;
2481 if (bs->backing_hd && bs->backing_hd->encrypted) {
2482 ret = bdrv_set_key(bs->backing_hd, key);
2483 if (ret < 0)
2484 return ret;
2485 if (!bs->encrypted)
2486 return 0;
2487 }
2488 if (!bs->encrypted) {
2489 return -EINVAL;
2490 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2491 return -ENOMEDIUM;
2492 }
2493 ret = bs->drv->bdrv_set_key(bs, key);
2494 if (ret < 0) {
2495 bs->valid_key = 0;
2496 } else if (!bs->valid_key) {
2497 bs->valid_key = 1;
2498 if (bs->blk) {
2499 /* call the change callback now, we skipped it on open */
2500 blk_dev_change_media_cb(bs->blk, true);
2501 }
2502 }
2503 return ret;
2504 }
2505
2506 /*
2507 * Provide an encryption key for @bs.
2508 * If @key is non-null:
2509 * If @bs is not encrypted, fail.
2510 * Else if the key is invalid, fail.
2511 * Else set @bs's key to @key, replacing the existing key, if any.
2512 * If @key is null:
2513 * If @bs is encrypted and still lacks a key, fail.
2514 * Else do nothing.
2515 * On failure, store an error object through @errp if non-null.
2516 */
2517 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2518 {
2519 if (key) {
2520 if (!bdrv_is_encrypted(bs)) {
2521 error_setg(errp, "Node '%s' is not encrypted",
2522 bdrv_get_device_or_node_name(bs));
2523 } else if (bdrv_set_key(bs, key) < 0) {
2524 error_set(errp, QERR_INVALID_PASSWORD);
2525 }
2526 } else {
2527 if (bdrv_key_required(bs)) {
2528 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2529 "'%s' (%s) is encrypted",
2530 bdrv_get_device_or_node_name(bs),
2531 bdrv_get_encrypted_filename(bs));
2532 }
2533 }
2534 }
2535
2536 const char *bdrv_get_format_name(BlockDriverState *bs)
2537 {
2538 return bs->drv ? bs->drv->format_name : NULL;
2539 }
2540
2541 static int qsort_strcmp(const void *a, const void *b)
2542 {
2543 return strcmp(a, b);
2544 }
2545
2546 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2547 void *opaque)
2548 {
2549 BlockDriver *drv;
2550 int count = 0;
2551 int i;
2552 const char **formats = NULL;
2553
2554 QLIST_FOREACH(drv, &bdrv_drivers, list) {
2555 if (drv->format_name) {
2556 bool found = false;
2557 int i = count;
2558 while (formats && i && !found) {
2559 found = !strcmp(formats[--i], drv->format_name);
2560 }
2561
2562 if (!found) {
2563 formats = g_renew(const char *, formats, count + 1);
2564 formats[count++] = drv->format_name;
2565 }
2566 }
2567 }
2568
2569 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2570
2571 for (i = 0; i < count; i++) {
2572 it(opaque, formats[i]);
2573 }
2574
2575 g_free(formats);
2576 }
2577
2578 /* This function is to find a node in the bs graph */
2579 BlockDriverState *bdrv_find_node(const char *node_name)
2580 {
2581 BlockDriverState *bs;
2582
2583 assert(node_name);
2584
2585 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2586 if (!strcmp(node_name, bs->node_name)) {
2587 return bs;
2588 }
2589 }
2590 return NULL;
2591 }
2592
2593 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2594 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2595 {
2596 BlockDeviceInfoList *list, *entry;
2597 BlockDriverState *bs;
2598
2599 list = NULL;
2600 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2601 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2602 if (!info) {
2603 qapi_free_BlockDeviceInfoList(list);
2604 return NULL;
2605 }
2606 entry = g_malloc0(sizeof(*entry));
2607 entry->value = info;
2608 entry->next = list;
2609 list = entry;
2610 }
2611
2612 return list;
2613 }
2614
2615 BlockDriverState *bdrv_lookup_bs(const char *device,
2616 const char *node_name,
2617 Error **errp)
2618 {
2619 BlockBackend *blk;
2620 BlockDriverState *bs;
2621
2622 if (device) {
2623 blk = blk_by_name(device);
2624
2625 if (blk) {
2626 return blk_bs(blk);
2627 }
2628 }
2629
2630 if (node_name) {
2631 bs = bdrv_find_node(node_name);
2632
2633 if (bs) {
2634 return bs;
2635 }
2636 }
2637
2638 error_setg(errp, "Cannot find device=%s nor node_name=%s",
2639 device ? device : "",
2640 node_name ? node_name : "");
2641 return NULL;
2642 }
2643
2644 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2645 * return false. If either argument is NULL, return false. */
2646 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2647 {
2648 while (top && top != base) {
2649 top = top->backing_hd;
2650 }
2651
2652 return top != NULL;
2653 }
2654
2655 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2656 {
2657 if (!bs) {
2658 return QTAILQ_FIRST(&graph_bdrv_states);
2659 }
2660 return QTAILQ_NEXT(bs, node_list);
2661 }
2662
2663 BlockDriverState *bdrv_next(BlockDriverState *bs)
2664 {
2665 if (!bs) {
2666 return QTAILQ_FIRST(&bdrv_states);
2667 }
2668 return QTAILQ_NEXT(bs, device_list);
2669 }
2670
2671 const char *bdrv_get_node_name(const BlockDriverState *bs)
2672 {
2673 return bs->node_name;
2674 }
2675
2676 /* TODO check what callers really want: bs->node_name or blk_name() */
2677 const char *bdrv_get_device_name(const BlockDriverState *bs)
2678 {
2679 return bs->blk ? blk_name(bs->blk) : "";
2680 }
2681
2682 /* This can be used to identify nodes that might not have a device
2683 * name associated. Since node and device names live in the same
2684 * namespace, the result is unambiguous. The exception is if both are
2685 * absent, then this returns an empty (non-null) string. */
2686 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2687 {
2688 return bs->blk ? blk_name(bs->blk) : bs->node_name;
2689 }
2690
2691 int bdrv_get_flags(BlockDriverState *bs)
2692 {
2693 return bs->open_flags;
2694 }
2695
2696 int bdrv_has_zero_init_1(BlockDriverState *bs)
2697 {
2698 return 1;
2699 }
2700
2701 int bdrv_has_zero_init(BlockDriverState *bs)
2702 {
2703 assert(bs->drv);
2704
2705 /* If BS is a copy on write image, it is initialized to
2706 the contents of the base image, which may not be zeroes. */
2707 if (bs->backing_hd) {
2708 return 0;
2709 }
2710 if (bs->drv->bdrv_has_zero_init) {
2711 return bs->drv->bdrv_has_zero_init(bs);
2712 }
2713
2714 /* safe default */
2715 return 0;
2716 }
2717
2718 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2719 {
2720 BlockDriverInfo bdi;
2721
2722 if (bs->backing_hd) {
2723 return false;
2724 }
2725
2726 if (bdrv_get_info(bs, &bdi) == 0) {
2727 return bdi.unallocated_blocks_are_zero;
2728 }
2729
2730 return false;
2731 }
2732
2733 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2734 {
2735 BlockDriverInfo bdi;
2736
2737 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
2738 return false;
2739 }
2740
2741 if (bdrv_get_info(bs, &bdi) == 0) {
2742 return bdi.can_write_zeroes_with_unmap;
2743 }
2744
2745 return false;
2746 }
2747
2748 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2749 {
2750 if (bs->backing_hd && bs->backing_hd->encrypted)
2751 return bs->backing_file;
2752 else if (bs->encrypted)
2753 return bs->filename;
2754 else
2755 return NULL;
2756 }
2757
2758 void bdrv_get_backing_filename(BlockDriverState *bs,
2759 char *filename, int filename_size)
2760 {
2761 pstrcpy(filename, filename_size, bs->backing_file);
2762 }
2763
2764 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2765 {
2766 BlockDriver *drv = bs->drv;
2767 if (!drv)
2768 return -ENOMEDIUM;
2769 if (!drv->bdrv_get_info)
2770 return -ENOTSUP;
2771 memset(bdi, 0, sizeof(*bdi));
2772 return drv->bdrv_get_info(bs, bdi);
2773 }
2774
2775 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2776 {
2777 BlockDriver *drv = bs->drv;
2778 if (drv && drv->bdrv_get_specific_info) {
2779 return drv->bdrv_get_specific_info(bs);
2780 }
2781 return NULL;
2782 }
2783
2784 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2785 {
2786 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
2787 return;
2788 }
2789
2790 bs->drv->bdrv_debug_event(bs, event);
2791 }
2792
2793 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
2794 const char *tag)
2795 {
2796 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
2797 bs = bs->file;
2798 }
2799
2800 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
2801 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
2802 }
2803
2804 return -ENOTSUP;
2805 }
2806
2807 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
2808 {
2809 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
2810 bs = bs->file;
2811 }
2812
2813 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
2814 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
2815 }
2816
2817 return -ENOTSUP;
2818 }
2819
2820 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
2821 {
2822 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
2823 bs = bs->file;
2824 }
2825
2826 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
2827 return bs->drv->bdrv_debug_resume(bs, tag);
2828 }
2829
2830 return -ENOTSUP;
2831 }
2832
2833 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
2834 {
2835 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
2836 bs = bs->file;
2837 }
2838
2839 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
2840 return bs->drv->bdrv_debug_is_suspended(bs, tag);
2841 }
2842
2843 return false;
2844 }
2845
2846 int bdrv_is_snapshot(BlockDriverState *bs)
2847 {
2848 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2849 }
2850
2851 /* backing_file can either be relative, or absolute, or a protocol. If it is
2852 * relative, it must be relative to the chain. So, passing in bs->filename
2853 * from a BDS as backing_file should not be done, as that may be relative to
2854 * the CWD rather than the chain. */
2855 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2856 const char *backing_file)
2857 {
2858 char *filename_full = NULL;
2859 char *backing_file_full = NULL;
2860 char *filename_tmp = NULL;
2861 int is_protocol = 0;
2862 BlockDriverState *curr_bs = NULL;
2863 BlockDriverState *retval = NULL;
2864
2865 if (!bs || !bs->drv || !backing_file) {
2866 return NULL;
2867 }
2868
2869 filename_full = g_malloc(PATH_MAX);
2870 backing_file_full = g_malloc(PATH_MAX);
2871 filename_tmp = g_malloc(PATH_MAX);
2872
2873 is_protocol = path_has_protocol(backing_file);
2874
2875 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
2876
2877 /* If either of the filename paths is actually a protocol, then
2878 * compare unmodified paths; otherwise make paths relative */
2879 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
2880 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
2881 retval = curr_bs->backing_hd;
2882 break;
2883 }
2884 } else {
2885 /* If not an absolute filename path, make it relative to the current
2886 * image's filename path */
2887 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2888 backing_file);
2889
2890 /* We are going to compare absolute pathnames */
2891 if (!realpath(filename_tmp, filename_full)) {
2892 continue;
2893 }
2894
2895 /* We need to make sure the backing filename we are comparing against
2896 * is relative to the current image filename (or absolute) */
2897 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2898 curr_bs->backing_file);
2899
2900 if (!realpath(filename_tmp, backing_file_full)) {
2901 continue;
2902 }
2903
2904 if (strcmp(backing_file_full, filename_full) == 0) {
2905 retval = curr_bs->backing_hd;
2906 break;
2907 }
2908 }
2909 }
2910
2911 g_free(filename_full);
2912 g_free(backing_file_full);
2913 g_free(filename_tmp);
2914 return retval;
2915 }
2916
2917 int bdrv_get_backing_file_depth(BlockDriverState *bs)
2918 {
2919 if (!bs->drv) {
2920 return 0;
2921 }
2922
2923 if (!bs->backing_hd) {
2924 return 0;
2925 }
2926
2927 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
2928 }
2929
2930 void bdrv_init(void)
2931 {
2932 module_call_init(MODULE_INIT_BLOCK);
2933 }
2934
2935 void bdrv_init_with_whitelist(void)
2936 {
2937 use_bdrv_whitelist = 1;
2938 bdrv_init();
2939 }
2940
2941 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
2942 {
2943 Error *local_err = NULL;
2944 int ret;
2945
2946 if (!bs->drv) {
2947 return;
2948 }
2949
2950 if (!(bs->open_flags & BDRV_O_INCOMING)) {
2951 return;
2952 }
2953 bs->open_flags &= ~BDRV_O_INCOMING;
2954
2955 if (bs->drv->bdrv_invalidate_cache) {
2956 bs->drv->bdrv_invalidate_cache(bs, &local_err);
2957 } else if (bs->file) {
2958 bdrv_invalidate_cache(bs->file, &local_err);
2959 }
2960 if (local_err) {
2961 error_propagate(errp, local_err);
2962 return;
2963 }
2964
2965 ret = refresh_total_sectors(bs, bs->total_sectors);
2966 if (ret < 0) {
2967 error_setg_errno(errp, -ret, "Could not refresh total sector count");
2968 return;
2969 }
2970 }
2971
2972 void bdrv_invalidate_cache_all(Error **errp)
2973 {
2974 BlockDriverState *bs;
2975 Error *local_err = NULL;
2976
2977 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2978 AioContext *aio_context = bdrv_get_aio_context(bs);
2979
2980 aio_context_acquire(aio_context);
2981 bdrv_invalidate_cache(bs, &local_err);
2982 aio_context_release(aio_context);
2983 if (local_err) {
2984 error_propagate(errp, local_err);
2985 return;
2986 }
2987 }
2988 }
2989
2990 /**************************************************************/
2991 /* removable device support */
2992
2993 /**
2994 * Return TRUE if the media is present
2995 */
2996 int bdrv_is_inserted(BlockDriverState *bs)
2997 {
2998 BlockDriver *drv = bs->drv;
2999
3000 if (!drv)
3001 return 0;
3002 if (!drv->bdrv_is_inserted)
3003 return 1;
3004 return drv->bdrv_is_inserted(bs);
3005 }
3006
3007 /**
3008 * Return whether the media changed since the last call to this
3009 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3010 */
3011 int bdrv_media_changed(BlockDriverState *bs)
3012 {
3013 BlockDriver *drv = bs->drv;
3014
3015 if (drv && drv->bdrv_media_changed) {
3016 return drv->bdrv_media_changed(bs);
3017 }
3018 return -ENOTSUP;
3019 }
3020
3021 /**
3022 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3023 */
3024 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3025 {
3026 BlockDriver *drv = bs->drv;
3027 const char *device_name;
3028
3029 if (drv && drv->bdrv_eject) {
3030 drv->bdrv_eject(bs, eject_flag);
3031 }
3032
3033 device_name = bdrv_get_device_name(bs);
3034 if (device_name[0] != '\0') {
3035 qapi_event_send_device_tray_moved(device_name,
3036 eject_flag, &error_abort);
3037 }
3038 }
3039
3040 /**
3041 * Lock or unlock the media (if it is locked, the user won't be able
3042 * to eject it manually).
3043 */
3044 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3045 {
3046 BlockDriver *drv = bs->drv;
3047
3048 trace_bdrv_lock_medium(bs, locked);
3049
3050 if (drv && drv->bdrv_lock_medium) {
3051 drv->bdrv_lock_medium(bs, locked);
3052 }
3053 }
3054
3055 void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
3056 {
3057 bs->guest_block_size = align;
3058 }
3059
3060 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3061 {
3062 BdrvDirtyBitmap *bm;
3063
3064 assert(name);
3065 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3066 if (bm->name && !strcmp(name, bm->name)) {
3067 return bm;
3068 }
3069 }
3070 return NULL;
3071 }
3072
3073 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3074 {
3075 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3076 g_free(bitmap->name);
3077 bitmap->name = NULL;
3078 }
3079
3080 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3081 uint32_t granularity,
3082 const char *name,
3083 Error **errp)
3084 {
3085 int64_t bitmap_size;
3086 BdrvDirtyBitmap *bitmap;
3087 uint32_t sector_granularity;
3088
3089 assert((granularity & (granularity - 1)) == 0);
3090
3091 if (name && bdrv_find_dirty_bitmap(bs, name)) {
3092 error_setg(errp, "Bitmap already exists: %s", name);
3093 return NULL;
3094 }
3095 sector_granularity = granularity >> BDRV_SECTOR_BITS;
3096 assert(sector_granularity);
3097 bitmap_size = bdrv_nb_sectors(bs);
3098 if (bitmap_size < 0) {
3099 error_setg_errno(errp, -bitmap_size, "could not get length of device");
3100 errno = -bitmap_size;
3101 return NULL;
3102 }
3103 bitmap = g_new0(BdrvDirtyBitmap, 1);
3104 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3105 bitmap->size = bitmap_size;
3106 bitmap->name = g_strdup(name);
3107 bitmap->disabled = false;
3108 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3109 return bitmap;
3110 }
3111
3112 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3113 {
3114 return bitmap->successor;
3115 }
3116
3117 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3118 {
3119 return !(bitmap->disabled || bitmap->successor);
3120 }
3121
3122 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3123 {
3124 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3125 return DIRTY_BITMAP_STATUS_FROZEN;
3126 } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3127 return DIRTY_BITMAP_STATUS_DISABLED;
3128 } else {
3129 return DIRTY_BITMAP_STATUS_ACTIVE;
3130 }
3131 }
3132
3133 /**
3134 * Create a successor bitmap destined to replace this bitmap after an operation.
3135 * Requires that the bitmap is not frozen and has no successor.
3136 */
3137 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3138 BdrvDirtyBitmap *bitmap, Error **errp)
3139 {
3140 uint64_t granularity;
3141 BdrvDirtyBitmap *child;
3142
3143 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3144 error_setg(errp, "Cannot create a successor for a bitmap that is "
3145 "currently frozen");
3146 return -1;
3147 }
3148 assert(!bitmap->successor);
3149
3150 /* Create an anonymous successor */
3151 granularity = bdrv_dirty_bitmap_granularity(bitmap);
3152 child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3153 if (!child) {
3154 return -1;
3155 }
3156
3157 /* Successor will be on or off based on our current state. */
3158 child->disabled = bitmap->disabled;
3159
3160 /* Install the successor and freeze the parent */
3161 bitmap->successor = child;
3162 return 0;
3163 }
3164
3165 /**
3166 * For a bitmap with a successor, yield our name to the successor,
3167 * delete the old bitmap, and return a handle to the new bitmap.
3168 */
3169 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3170 BdrvDirtyBitmap *bitmap,
3171 Error **errp)
3172 {
3173 char *name;
3174 BdrvDirtyBitmap *successor = bitmap->successor;
3175
3176 if (successor == NULL) {
3177 error_setg(errp, "Cannot relinquish control if "
3178 "there's no successor present");
3179 return NULL;
3180 }
3181
3182 name = bitmap->name;
3183 bitmap->name = NULL;
3184 successor->name = name;
3185 bitmap->successor = NULL;
3186 bdrv_release_dirty_bitmap(bs, bitmap);
3187
3188 return successor;
3189 }
3190
3191 /**
3192 * In cases of failure where we can no longer safely delete the parent,
3193 * we may wish to re-join the parent and child/successor.
3194 * The merged parent will be un-frozen, but not explicitly re-enabled.
3195 */
3196 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3197 BdrvDirtyBitmap *parent,
3198 Error **errp)
3199 {
3200 BdrvDirtyBitmap *successor = parent->successor;
3201
3202 if (!successor) {
3203 error_setg(errp, "Cannot reclaim a successor when none is present");
3204 return NULL;
3205 }
3206
3207 if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3208 error_setg(errp, "Merging of parent and successor bitmap failed");
3209 return NULL;
3210 }
3211 bdrv_release_dirty_bitmap(bs, successor);
3212 parent->successor = NULL;
3213
3214 return parent;
3215 }
3216
3217 /**
3218 * Truncates _all_ bitmaps attached to a BDS.
3219 */
3220 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3221 {
3222 BdrvDirtyBitmap *bitmap;
3223 uint64_t size = bdrv_nb_sectors(bs);
3224
3225 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3226 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3227 continue;
3228 }
3229 hbitmap_truncate(bitmap->bitmap, size);
3230 }
3231 }
3232
3233 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3234 {
3235 BdrvDirtyBitmap *bm, *next;
3236 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3237 if (bm == bitmap) {
3238 assert(!bdrv_dirty_bitmap_frozen(bm));
3239 QLIST_REMOVE(bitmap, list);
3240 hbitmap_free(bitmap->bitmap);
3241 g_free(bitmap->name);
3242 g_free(bitmap);
3243 return;
3244 }
3245 }
3246 }
3247
3248 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3249 {
3250 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3251 bitmap->disabled = true;
3252 }
3253
3254 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3255 {
3256 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3257 bitmap->disabled = false;
3258 }
3259
3260 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3261 {
3262 BdrvDirtyBitmap *bm;
3263 BlockDirtyInfoList *list = NULL;
3264 BlockDirtyInfoList **plist = &list;
3265
3266 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3267 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3268 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3269 info->count = bdrv_get_dirty_count(bm);
3270 info->granularity = bdrv_dirty_bitmap_granularity(bm);
3271 info->has_name = !!bm->name;
3272 info->name = g_strdup(bm->name);
3273 info->status = bdrv_dirty_bitmap_status(bm);
3274 entry->value = info;
3275 *plist = entry;
3276 plist = &entry->next;
3277 }
3278
3279 return list;
3280 }
3281
3282 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3283 {
3284 if (bitmap) {
3285 return hbitmap_get(bitmap->bitmap, sector);
3286 } else {
3287 return 0;
3288 }
3289 }
3290
3291 /**
3292 * Chooses a default granularity based on the existing cluster size,
3293 * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3294 * is no cluster size information available.
3295 */
3296 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3297 {
3298 BlockDriverInfo bdi;
3299 uint32_t granularity;
3300
3301 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3302 granularity = MAX(4096, bdi.cluster_size);
3303 granularity = MIN(65536, granularity);
3304 } else {
3305 granularity = 65536;
3306 }
3307
3308 return granularity;
3309 }
3310
3311 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3312 {
3313 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3314 }
3315
3316 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3317 {
3318 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3319 }
3320
3321 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3322 int64_t cur_sector, int nr_sectors)
3323 {
3324 assert(bdrv_dirty_bitmap_enabled(bitmap));
3325 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3326 }
3327
3328 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3329 int64_t cur_sector, int nr_sectors)
3330 {
3331 assert(bdrv_dirty_bitmap_enabled(bitmap));
3332 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3333 }
3334
3335 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3336 {
3337 assert(bdrv_dirty_bitmap_enabled(bitmap));
3338 hbitmap_reset(bitmap->bitmap, 0, bitmap->size);
3339 }
3340
3341 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3342 int nr_sectors)
3343 {
3344 BdrvDirtyBitmap *bitmap;
3345 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3346 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3347 continue;
3348 }
3349 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3350 }
3351 }
3352
3353 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3354 int nr_sectors)
3355 {
3356 BdrvDirtyBitmap *bitmap;
3357 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3358 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3359 continue;
3360 }
3361 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3362 }
3363 }
3364
3365 /**
3366 * Advance an HBitmapIter to an arbitrary offset.
3367 */
3368 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3369 {
3370 assert(hbi->hb);
3371 hbitmap_iter_init(hbi, hbi->hb, offset);
3372 }
3373
3374 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3375 {
3376 return hbitmap_count(bitmap->bitmap);
3377 }
3378
3379 /* Get a reference to bs */
3380 void bdrv_ref(BlockDriverState *bs)
3381 {
3382 bs->refcnt++;
3383 }
3384
3385 /* Release a previously grabbed reference to bs.
3386 * If after releasing, reference count is zero, the BlockDriverState is
3387 * deleted. */
3388 void bdrv_unref(BlockDriverState *bs)
3389 {
3390 if (!bs) {
3391 return;
3392 }
3393 assert(bs->refcnt > 0);
3394 if (--bs->refcnt == 0) {
3395 bdrv_delete(bs);
3396 }
3397 }
3398
3399 struct BdrvOpBlocker {
3400 Error *reason;
3401 QLIST_ENTRY(BdrvOpBlocker) list;
3402 };
3403
3404 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3405 {
3406 BdrvOpBlocker *blocker;
3407 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3408 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3409 blocker = QLIST_FIRST(&bs->op_blockers[op]);
3410 if (errp) {
3411 error_setg(errp, "Node '%s' is busy: %s",
3412 bdrv_get_device_or_node_name(bs),
3413 error_get_pretty(blocker->reason));
3414 }
3415 return true;
3416 }
3417 return false;
3418 }
3419
3420 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3421 {
3422 BdrvOpBlocker *blocker;
3423 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3424
3425 blocker = g_new0(BdrvOpBlocker, 1);
3426 blocker->reason = reason;
3427 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3428 }
3429
3430 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3431 {
3432 BdrvOpBlocker *blocker, *next;
3433 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3434 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3435 if (blocker->reason == reason) {
3436 QLIST_REMOVE(blocker, list);
3437 g_free(blocker);
3438 }
3439 }
3440 }
3441
3442 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3443 {
3444 int i;
3445 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3446 bdrv_op_block(bs, i, reason);
3447 }
3448 }
3449
3450 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3451 {
3452 int i;
3453 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3454 bdrv_op_unblock(bs, i, reason);
3455 }
3456 }
3457
3458 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3459 {
3460 int i;
3461
3462 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3463 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3464 return false;
3465 }
3466 }
3467 return true;
3468 }
3469
3470 void bdrv_iostatus_enable(BlockDriverState *bs)
3471 {
3472 bs->iostatus_enabled = true;
3473 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3474 }
3475
3476 /* The I/O status is only enabled if the drive explicitly
3477 * enables it _and_ the VM is configured to stop on errors */
3478 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3479 {
3480 return (bs->iostatus_enabled &&
3481 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
3482 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
3483 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
3484 }
3485
3486 void bdrv_iostatus_disable(BlockDriverState *bs)
3487 {
3488 bs->iostatus_enabled = false;
3489 }
3490
3491 void bdrv_iostatus_reset(BlockDriverState *bs)
3492 {
3493 if (bdrv_iostatus_is_enabled(bs)) {
3494 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3495 if (bs->job) {
3496 block_job_iostatus_reset(bs->job);
3497 }
3498 }
3499 }
3500
3501 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3502 {
3503 assert(bdrv_iostatus_is_enabled(bs));
3504 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3505 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3506 BLOCK_DEVICE_IO_STATUS_FAILED;
3507 }
3508 }
3509
3510 void bdrv_img_create(const char *filename, const char *fmt,
3511 const char *base_filename, const char *base_fmt,
3512 char *options, uint64_t img_size, int flags,
3513 Error **errp, bool quiet)
3514 {
3515 QemuOptsList *create_opts = NULL;
3516 QemuOpts *opts = NULL;
3517 const char *backing_fmt, *backing_file;
3518 int64_t size;
3519 BlockDriver *drv, *proto_drv;
3520 BlockDriver *backing_drv = NULL;
3521 Error *local_err = NULL;
3522 int ret = 0;
3523
3524 /* Find driver and parse its options */
3525 drv = bdrv_find_format(fmt);
3526 if (!drv) {
3527 error_setg(errp, "Unknown file format '%s'", fmt);
3528 return;
3529 }
3530
3531 proto_drv = bdrv_find_protocol(filename, true, errp);
3532 if (!proto_drv) {
3533 return;
3534 }
3535
3536 if (!drv->create_opts) {
3537 error_setg(errp, "Format driver '%s' does not support image creation",
3538 drv->format_name);
3539 return;
3540 }
3541
3542 if (!proto_drv->create_opts) {
3543 error_setg(errp, "Protocol driver '%s' does not support image creation",
3544 proto_drv->format_name);
3545 return;
3546 }
3547
3548 create_opts = qemu_opts_append(create_opts, drv->create_opts);
3549 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3550
3551 /* Create parameter list with default values */
3552 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3553 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3554
3555 /* Parse -o options */
3556 if (options) {
3557 qemu_opts_do_parse(opts, options, NULL, &local_err);
3558 if (local_err) {
3559 error_report_err(local_err);
3560 local_err = NULL;
3561 error_setg(errp, "Invalid options for file format '%s'", fmt);
3562 goto out;
3563 }
3564 }
3565
3566 if (base_filename) {
3567 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3568 if (local_err) {
3569 error_setg(errp, "Backing file not supported for file format '%s'",
3570 fmt);
3571 goto out;
3572 }
3573 }
3574
3575 if (base_fmt) {
3576 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3577 if (local_err) {
3578 error_setg(errp, "Backing file format not supported for file "
3579 "format '%s'", fmt);
3580 goto out;
3581 }
3582 }
3583
3584 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3585 if (backing_file) {
3586 if (!strcmp(filename, backing_file)) {
3587 error_setg(errp, "Error: Trying to create an image with the "
3588 "same filename as the backing file");
3589 goto out;
3590 }
3591 }
3592
3593 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3594 if (backing_fmt) {
3595 backing_drv = bdrv_find_format(backing_fmt);
3596 if (!backing_drv) {
3597 error_setg(errp, "Unknown backing file format '%s'",
3598 backing_fmt);
3599 goto out;
3600 }
3601 }
3602
3603 // The size for the image must always be specified, with one exception:
3604 // If we are using a backing file, we can obtain the size from there
3605 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3606 if (size == -1) {
3607 if (backing_file) {
3608 BlockDriverState *bs;
3609 char *full_backing = g_new0(char, PATH_MAX);
3610 int64_t size;
3611 int back_flags;
3612
3613 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3614 full_backing, PATH_MAX,
3615 &local_err);
3616 if (local_err) {
3617 g_free(full_backing);
3618 goto out;
3619 }
3620
3621 /* backing files always opened read-only */
3622 back_flags =
3623 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3624
3625 bs = NULL;
3626 ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
3627 backing_drv, &local_err);
3628 g_free(full_backing);
3629 if (ret < 0) {
3630 goto out;
3631 }
3632 size = bdrv_getlength(bs);
3633 if (size < 0) {
3634 error_setg_errno(errp, -size, "Could not get size of '%s'",
3635 backing_file);
3636 bdrv_unref(bs);
3637 goto out;
3638 }
3639
3640 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3641
3642 bdrv_unref(bs);
3643 } else {
3644 error_setg(errp, "Image creation needs a size parameter");
3645 goto out;
3646 }
3647 }
3648
3649 if (!quiet) {
3650 printf("Formatting '%s', fmt=%s", filename, fmt);
3651 qemu_opts_print(opts, " ");
3652 puts("");
3653 }
3654
3655 ret = bdrv_create(drv, filename, opts, &local_err);
3656
3657 if (ret == -EFBIG) {
3658 /* This is generally a better message than whatever the driver would
3659 * deliver (especially because of the cluster_size_hint), since that
3660 * is most probably not much different from "image too large". */
3661 const char *cluster_size_hint = "";
3662 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3663 cluster_size_hint = " (try using a larger cluster size)";
3664 }
3665 error_setg(errp, "The image size is too large for file format '%s'"
3666 "%s", fmt, cluster_size_hint);
3667 error_free(local_err);
3668 local_err = NULL;
3669 }
3670
3671 out:
3672 qemu_opts_del(opts);
3673 qemu_opts_free(create_opts);
3674 if (local_err) {
3675 error_propagate(errp, local_err);
3676 }
3677 }
3678
3679 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3680 {
3681 return bs->aio_context;
3682 }
3683
3684 void bdrv_detach_aio_context(BlockDriverState *bs)
3685 {
3686 BdrvAioNotifier *baf;
3687
3688 if (!bs->drv) {
3689 return;
3690 }
3691
3692 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3693 baf->detach_aio_context(baf->opaque);
3694 }
3695
3696 if (bs->io_limits_enabled) {
3697 throttle_timers_detach_aio_context(&bs->throttle_timers);
3698 }
3699 if (bs->drv->bdrv_detach_aio_context) {
3700 bs->drv->bdrv_detach_aio_context(bs);
3701 }
3702 if (bs->file) {
3703 bdrv_detach_aio_context(bs->file);
3704 }
3705 if (bs->backing_hd) {
3706 bdrv_detach_aio_context(bs->backing_hd);
3707 }
3708
3709 bs->aio_context = NULL;
3710 }
3711
3712 void bdrv_attach_aio_context(BlockDriverState *bs,
3713 AioContext *new_context)
3714 {
3715 BdrvAioNotifier *ban;
3716
3717 if (!bs->drv) {
3718 return;
3719 }
3720
3721 bs->aio_context = new_context;
3722
3723 if (bs->backing_hd) {
3724 bdrv_attach_aio_context(bs->backing_hd, new_context);
3725 }
3726 if (bs->file) {
3727 bdrv_attach_aio_context(bs->file, new_context);
3728 }
3729 if (bs->drv->bdrv_attach_aio_context) {
3730 bs->drv->bdrv_attach_aio_context(bs, new_context);
3731 }
3732 if (bs->io_limits_enabled) {
3733 throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3734 }
3735
3736 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3737 ban->attached_aio_context(new_context, ban->opaque);
3738 }
3739 }
3740
3741 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3742 {
3743 bdrv_drain_all(); /* ensure there are no in-flight requests */
3744
3745 bdrv_detach_aio_context(bs);
3746
3747 /* This function executes in the old AioContext so acquire the new one in
3748 * case it runs in a different thread.
3749 */
3750 aio_context_acquire(new_context);
3751 bdrv_attach_aio_context(bs, new_context);
3752 aio_context_release(new_context);
3753 }
3754
3755 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3756 void (*attached_aio_context)(AioContext *new_context, void *opaque),
3757 void (*detach_aio_context)(void *opaque), void *opaque)
3758 {
3759 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3760 *ban = (BdrvAioNotifier){
3761 .attached_aio_context = attached_aio_context,
3762 .detach_aio_context = detach_aio_context,
3763 .opaque = opaque
3764 };
3765
3766 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3767 }
3768
3769 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3770 void (*attached_aio_context)(AioContext *,
3771 void *),
3772 void (*detach_aio_context)(void *),
3773 void *opaque)
3774 {
3775 BdrvAioNotifier *ban, *ban_next;
3776
3777 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3778 if (ban->attached_aio_context == attached_aio_context &&
3779 ban->detach_aio_context == detach_aio_context &&
3780 ban->opaque == opaque)
3781 {
3782 QLIST_REMOVE(ban, list);
3783 g_free(ban);
3784
3785 return;
3786 }
3787 }
3788
3789 abort();
3790 }
3791
3792 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3793 BlockDriverAmendStatusCB *status_cb)
3794 {
3795 if (!bs->drv->bdrv_amend_options) {
3796 return -ENOTSUP;
3797 }
3798 return bs->drv->bdrv_amend_options(bs, opts, status_cb);
3799 }
3800
3801 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3802 * of block filter and by bdrv_is_first_non_filter.
3803 * It is used to test if the given bs is the candidate or recurse more in the
3804 * node graph.
3805 */
3806 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3807 BlockDriverState *candidate)
3808 {
3809 /* return false if basic checks fails */
3810 if (!bs || !bs->drv) {
3811 return false;
3812 }
3813
3814 /* the code reached a non block filter driver -> check if the bs is
3815 * the same as the candidate. It's the recursion termination condition.
3816 */
3817 if (!bs->drv->is_filter) {
3818 return bs == candidate;
3819 }
3820 /* Down this path the driver is a block filter driver */
3821
3822 /* If the block filter recursion method is defined use it to recurse down
3823 * the node graph.
3824 */
3825 if (bs->drv->bdrv_recurse_is_first_non_filter) {
3826 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3827 }
3828
3829 /* the driver is a block filter but don't allow to recurse -> return false
3830 */
3831 return false;
3832 }
3833
3834 /* This function checks if the candidate is the first non filter bs down it's
3835 * bs chain. Since we don't have pointers to parents it explore all bs chains
3836 * from the top. Some filters can choose not to pass down the recursion.
3837 */
3838 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3839 {
3840 BlockDriverState *bs;
3841
3842 /* walk down the bs forest recursively */
3843 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3844 bool perm;
3845
3846 /* try to recurse in this top level bs */
3847 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3848
3849 /* candidate is the first non filter */
3850 if (perm) {
3851 return true;
3852 }
3853 }
3854
3855 return false;
3856 }
3857
3858 BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
3859 {
3860 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3861 AioContext *aio_context;
3862
3863 if (!to_replace_bs) {
3864 error_setg(errp, "Node name '%s' not found", node_name);
3865 return NULL;
3866 }
3867
3868 aio_context = bdrv_get_aio_context(to_replace_bs);
3869 aio_context_acquire(aio_context);
3870
3871 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3872 to_replace_bs = NULL;
3873 goto out;
3874 }
3875
3876 /* We don't want arbitrary node of the BDS chain to be replaced only the top
3877 * most non filter in order to prevent data corruption.
3878 * Another benefit is that this tests exclude backing files which are
3879 * blocked by the backing blockers.
3880 */
3881 if (!bdrv_is_first_non_filter(to_replace_bs)) {
3882 error_setg(errp, "Only top most non filter can be replaced");
3883 to_replace_bs = NULL;
3884 goto out;
3885 }
3886
3887 out:
3888 aio_context_release(aio_context);
3889 return to_replace_bs;
3890 }
3891
3892 static bool append_open_options(QDict *d, BlockDriverState *bs)
3893 {
3894 const QDictEntry *entry;
3895 bool found_any = false;
3896
3897 for (entry = qdict_first(bs->options); entry;
3898 entry = qdict_next(bs->options, entry))
3899 {
3900 /* Only take options for this level and exclude all non-driver-specific
3901 * options */
3902 if (!strchr(qdict_entry_key(entry), '.') &&
3903 strcmp(qdict_entry_key(entry), "node-name"))
3904 {
3905 qobject_incref(qdict_entry_value(entry));
3906 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3907 found_any = true;
3908 }
3909 }
3910
3911 return found_any;
3912 }
3913
3914 /* Updates the following BDS fields:
3915 * - exact_filename: A filename which may be used for opening a block device
3916 * which (mostly) equals the given BDS (even without any
3917 * other options; so reading and writing must return the same
3918 * results, but caching etc. may be different)
3919 * - full_open_options: Options which, when given when opening a block device
3920 * (without a filename), result in a BDS (mostly)
3921 * equalling the given one
3922 * - filename: If exact_filename is set, it is copied here. Otherwise,
3923 * full_open_options is converted to a JSON object, prefixed with
3924 * "json:" (for use through the JSON pseudo protocol) and put here.
3925 */
3926 void bdrv_refresh_filename(BlockDriverState *bs)
3927 {
3928 BlockDriver *drv = bs->drv;
3929 QDict *opts;
3930
3931 if (!drv) {
3932 return;
3933 }
3934
3935 /* This BDS's file name will most probably depend on its file's name, so
3936 * refresh that first */
3937 if (bs->file) {
3938 bdrv_refresh_filename(bs->file);
3939 }
3940
3941 if (drv->bdrv_refresh_filename) {
3942 /* Obsolete information is of no use here, so drop the old file name
3943 * information before refreshing it */
3944 bs->exact_filename[0] = '\0';
3945 if (bs->full_open_options) {
3946 QDECREF(bs->full_open_options);
3947 bs->full_open_options = NULL;
3948 }
3949
3950 drv->bdrv_refresh_filename(bs);
3951 } else if (bs->file) {
3952 /* Try to reconstruct valid information from the underlying file */
3953 bool has_open_options;
3954
3955 bs->exact_filename[0] = '\0';
3956 if (bs->full_open_options) {
3957 QDECREF(bs->full_open_options);
3958 bs->full_open_options = NULL;
3959 }
3960
3961 opts = qdict_new();
3962 has_open_options = append_open_options(opts, bs);
3963
3964 /* If no specific options have been given for this BDS, the filename of
3965 * the underlying file should suffice for this one as well */
3966 if (bs->file->exact_filename[0] && !has_open_options) {
3967 strcpy(bs->exact_filename, bs->file->exact_filename);
3968 }
3969 /* Reconstructing the full options QDict is simple for most format block
3970 * drivers, as long as the full options are known for the underlying
3971 * file BDS. The full options QDict of that file BDS should somehow
3972 * contain a representation of the filename, therefore the following
3973 * suffices without querying the (exact_)filename of this BDS. */
3974 if (bs->file->full_open_options) {
3975 qdict_put_obj(opts, "driver",
3976 QOBJECT(qstring_from_str(drv->format_name)));
3977 QINCREF(bs->file->full_open_options);
3978 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
3979
3980 bs->full_open_options = opts;
3981 } else {
3982 QDECREF(opts);
3983 }
3984 } else if (!bs->full_open_options && qdict_size(bs->options)) {
3985 /* There is no underlying file BDS (at least referenced by BDS.file),
3986 * so the full options QDict should be equal to the options given
3987 * specifically for this block device when it was opened (plus the
3988 * driver specification).
3989 * Because those options don't change, there is no need to update
3990 * full_open_options when it's already set. */
3991
3992 opts = qdict_new();
3993 append_open_options(opts, bs);
3994 qdict_put_obj(opts, "driver",
3995 QOBJECT(qstring_from_str(drv->format_name)));
3996
3997 if (bs->exact_filename[0]) {
3998 /* This may not work for all block protocol drivers (some may
3999 * require this filename to be parsed), but we have to find some
4000 * default solution here, so just include it. If some block driver
4001 * does not support pure options without any filename at all or
4002 * needs some special format of the options QDict, it needs to
4003 * implement the driver-specific bdrv_refresh_filename() function.
4004 */
4005 qdict_put_obj(opts, "filename",
4006 QOBJECT(qstring_from_str(bs->exact_filename)));
4007 }
4008
4009 bs->full_open_options = opts;
4010 }
4011
4012 if (bs->exact_filename[0]) {
4013 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4014 } else if (bs->full_open_options) {
4015 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4016 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4017 qstring_get_str(json));
4018 QDECREF(json);
4019 }
4020 }
4021
4022 /* This accessor function purpose is to allow the device models to access the
4023 * BlockAcctStats structure embedded inside a BlockDriverState without being
4024 * aware of the BlockDriverState structure layout.
4025 * It will go away when the BlockAcctStats structure will be moved inside
4026 * the device models.
4027 */
4028 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
4029 {
4030 return &bs->stats;
4031 }