]> git.proxmox.com Git - mirror_qemu.git/blob - block.c
spapr_iommu: Make H_PUT_TCE_INDIRECT endian-safe
[mirror_qemu.git] / block.c
1 /*
2 * QEMU System Emulator block driver
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qjson.h"
31 #include "sysemu/block-backend.h"
32 #include "sysemu/sysemu.h"
33 #include "qemu/notify.h"
34 #include "block/coroutine.h"
35 #include "block/qapi.h"
36 #include "qmp-commands.h"
37 #include "qemu/timer.h"
38 #include "qapi-event.h"
39
40 #ifdef CONFIG_BSD
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <sys/ioctl.h>
44 #include <sys/queue.h>
45 #ifndef __DragonFly__
46 #include <sys/disk.h>
47 #endif
48 #endif
49
50 #ifdef _WIN32
51 #include <windows.h>
52 #endif
53
54 /**
55 * A BdrvDirtyBitmap can be in three possible states:
56 * (1) successor is NULL and disabled is false: full r/w mode
57 * (2) successor is NULL and disabled is true: read only mode ("disabled")
58 * (3) successor is set: frozen mode.
59 * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
60 * or enabled. A frozen bitmap can only abdicate() or reclaim().
61 */
62 struct BdrvDirtyBitmap {
63 HBitmap *bitmap; /* Dirty sector bitmap implementation */
64 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
65 char *name; /* Optional non-empty unique ID */
66 int64_t size; /* Size of the bitmap (Number of sectors) */
67 bool disabled; /* Bitmap is read-only */
68 QLIST_ENTRY(BdrvDirtyBitmap) list;
69 };
70
71 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
72
73 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
74 QTAILQ_HEAD_INITIALIZER(bdrv_states);
75
76 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
77 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
78
79 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80 QLIST_HEAD_INITIALIZER(bdrv_drivers);
81
82 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
83 /* If non-zero, use only whitelisted block drivers */
84 static int use_bdrv_whitelist;
85
86 #ifdef _WIN32
87 static int is_windows_drive_prefix(const char *filename)
88 {
89 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
90 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
91 filename[1] == ':');
92 }
93
94 int is_windows_drive(const char *filename)
95 {
96 if (is_windows_drive_prefix(filename) &&
97 filename[2] == '\0')
98 return 1;
99 if (strstart(filename, "\\\\.\\", NULL) ||
100 strstart(filename, "//./", NULL))
101 return 1;
102 return 0;
103 }
104 #endif
105
106 size_t bdrv_opt_mem_align(BlockDriverState *bs)
107 {
108 if (!bs || !bs->drv) {
109 /* page size or 4k (hdd sector size) should be on the safe side */
110 return MAX(4096, getpagesize());
111 }
112
113 return bs->bl.opt_mem_alignment;
114 }
115
116 size_t bdrv_min_mem_align(BlockDriverState *bs)
117 {
118 if (!bs || !bs->drv) {
119 /* page size or 4k (hdd sector size) should be on the safe side */
120 return MAX(4096, getpagesize());
121 }
122
123 return bs->bl.min_mem_alignment;
124 }
125
126 /* check if the path starts with "<protocol>:" */
127 int path_has_protocol(const char *path)
128 {
129 const char *p;
130
131 #ifdef _WIN32
132 if (is_windows_drive(path) ||
133 is_windows_drive_prefix(path)) {
134 return 0;
135 }
136 p = path + strcspn(path, ":/\\");
137 #else
138 p = path + strcspn(path, ":/");
139 #endif
140
141 return *p == ':';
142 }
143
144 int path_is_absolute(const char *path)
145 {
146 #ifdef _WIN32
147 /* specific case for names like: "\\.\d:" */
148 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
149 return 1;
150 }
151 return (*path == '/' || *path == '\\');
152 #else
153 return (*path == '/');
154 #endif
155 }
156
157 /* if filename is absolute, just copy it to dest. Otherwise, build a
158 path to it by considering it is relative to base_path. URL are
159 supported. */
160 void path_combine(char *dest, int dest_size,
161 const char *base_path,
162 const char *filename)
163 {
164 const char *p, *p1;
165 int len;
166
167 if (dest_size <= 0)
168 return;
169 if (path_is_absolute(filename)) {
170 pstrcpy(dest, dest_size, filename);
171 } else {
172 p = strchr(base_path, ':');
173 if (p)
174 p++;
175 else
176 p = base_path;
177 p1 = strrchr(base_path, '/');
178 #ifdef _WIN32
179 {
180 const char *p2;
181 p2 = strrchr(base_path, '\\');
182 if (!p1 || p2 > p1)
183 p1 = p2;
184 }
185 #endif
186 if (p1)
187 p1++;
188 else
189 p1 = base_path;
190 if (p1 > p)
191 p = p1;
192 len = p - base_path;
193 if (len > dest_size - 1)
194 len = dest_size - 1;
195 memcpy(dest, base_path, len);
196 dest[len] = '\0';
197 pstrcat(dest, dest_size, filename);
198 }
199 }
200
201 void bdrv_get_full_backing_filename_from_filename(const char *backed,
202 const char *backing,
203 char *dest, size_t sz,
204 Error **errp)
205 {
206 if (backing[0] == '\0' || path_has_protocol(backing) ||
207 path_is_absolute(backing))
208 {
209 pstrcpy(dest, sz, backing);
210 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
211 error_setg(errp, "Cannot use relative backing file names for '%s'",
212 backed);
213 } else {
214 path_combine(dest, sz, backed, backing);
215 }
216 }
217
218 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
219 Error **errp)
220 {
221 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
222
223 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
224 dest, sz, errp);
225 }
226
227 void bdrv_register(BlockDriver *bdrv)
228 {
229 bdrv_setup_io_funcs(bdrv);
230
231 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
232 }
233
234 BlockDriverState *bdrv_new_root(void)
235 {
236 BlockDriverState *bs = bdrv_new();
237
238 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
239 return bs;
240 }
241
242 BlockDriverState *bdrv_new(void)
243 {
244 BlockDriverState *bs;
245 int i;
246
247 bs = g_new0(BlockDriverState, 1);
248 QLIST_INIT(&bs->dirty_bitmaps);
249 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
250 QLIST_INIT(&bs->op_blockers[i]);
251 }
252 bdrv_iostatus_disable(bs);
253 notifier_list_init(&bs->close_notifiers);
254 notifier_with_return_list_init(&bs->before_write_notifiers);
255 qemu_co_queue_init(&bs->throttled_reqs[0]);
256 qemu_co_queue_init(&bs->throttled_reqs[1]);
257 bs->refcnt = 1;
258 bs->aio_context = qemu_get_aio_context();
259
260 return bs;
261 }
262
263 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
264 {
265 notifier_list_add(&bs->close_notifiers, notify);
266 }
267
268 BlockDriver *bdrv_find_format(const char *format_name)
269 {
270 BlockDriver *drv1;
271 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
272 if (!strcmp(drv1->format_name, format_name)) {
273 return drv1;
274 }
275 }
276 return NULL;
277 }
278
279 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
280 {
281 static const char *whitelist_rw[] = {
282 CONFIG_BDRV_RW_WHITELIST
283 };
284 static const char *whitelist_ro[] = {
285 CONFIG_BDRV_RO_WHITELIST
286 };
287 const char **p;
288
289 if (!whitelist_rw[0] && !whitelist_ro[0]) {
290 return 1; /* no whitelist, anything goes */
291 }
292
293 for (p = whitelist_rw; *p; p++) {
294 if (!strcmp(drv->format_name, *p)) {
295 return 1;
296 }
297 }
298 if (read_only) {
299 for (p = whitelist_ro; *p; p++) {
300 if (!strcmp(drv->format_name, *p)) {
301 return 1;
302 }
303 }
304 }
305 return 0;
306 }
307
308 BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
309 bool read_only)
310 {
311 BlockDriver *drv = bdrv_find_format(format_name);
312 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
313 }
314
315 typedef struct CreateCo {
316 BlockDriver *drv;
317 char *filename;
318 QemuOpts *opts;
319 int ret;
320 Error *err;
321 } CreateCo;
322
323 static void coroutine_fn bdrv_create_co_entry(void *opaque)
324 {
325 Error *local_err = NULL;
326 int ret;
327
328 CreateCo *cco = opaque;
329 assert(cco->drv);
330
331 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
332 if (local_err) {
333 error_propagate(&cco->err, local_err);
334 }
335 cco->ret = ret;
336 }
337
338 int bdrv_create(BlockDriver *drv, const char* filename,
339 QemuOpts *opts, Error **errp)
340 {
341 int ret;
342
343 Coroutine *co;
344 CreateCo cco = {
345 .drv = drv,
346 .filename = g_strdup(filename),
347 .opts = opts,
348 .ret = NOT_DONE,
349 .err = NULL,
350 };
351
352 if (!drv->bdrv_create) {
353 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
354 ret = -ENOTSUP;
355 goto out;
356 }
357
358 if (qemu_in_coroutine()) {
359 /* Fast-path if already in coroutine context */
360 bdrv_create_co_entry(&cco);
361 } else {
362 co = qemu_coroutine_create(bdrv_create_co_entry);
363 qemu_coroutine_enter(co, &cco);
364 while (cco.ret == NOT_DONE) {
365 aio_poll(qemu_get_aio_context(), true);
366 }
367 }
368
369 ret = cco.ret;
370 if (ret < 0) {
371 if (cco.err) {
372 error_propagate(errp, cco.err);
373 } else {
374 error_setg_errno(errp, -ret, "Could not create image");
375 }
376 }
377
378 out:
379 g_free(cco.filename);
380 return ret;
381 }
382
383 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
384 {
385 BlockDriver *drv;
386 Error *local_err = NULL;
387 int ret;
388
389 drv = bdrv_find_protocol(filename, true, errp);
390 if (drv == NULL) {
391 return -ENOENT;
392 }
393
394 ret = bdrv_create(drv, filename, opts, &local_err);
395 if (local_err) {
396 error_propagate(errp, local_err);
397 }
398 return ret;
399 }
400
401 /**
402 * Try to get @bs's logical and physical block size.
403 * On success, store them in @bsz struct and return 0.
404 * On failure return -errno.
405 * @bs must not be empty.
406 */
407 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
408 {
409 BlockDriver *drv = bs->drv;
410
411 if (drv && drv->bdrv_probe_blocksizes) {
412 return drv->bdrv_probe_blocksizes(bs, bsz);
413 }
414
415 return -ENOTSUP;
416 }
417
418 /**
419 * Try to get @bs's geometry (cyls, heads, sectors).
420 * On success, store them in @geo struct and return 0.
421 * On failure return -errno.
422 * @bs must not be empty.
423 */
424 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
425 {
426 BlockDriver *drv = bs->drv;
427
428 if (drv && drv->bdrv_probe_geometry) {
429 return drv->bdrv_probe_geometry(bs, geo);
430 }
431
432 return -ENOTSUP;
433 }
434
435 /*
436 * Create a uniquely-named empty temporary file.
437 * Return 0 upon success, otherwise a negative errno value.
438 */
439 int get_tmp_filename(char *filename, int size)
440 {
441 #ifdef _WIN32
442 char temp_dir[MAX_PATH];
443 /* GetTempFileName requires that its output buffer (4th param)
444 have length MAX_PATH or greater. */
445 assert(size >= MAX_PATH);
446 return (GetTempPath(MAX_PATH, temp_dir)
447 && GetTempFileName(temp_dir, "qem", 0, filename)
448 ? 0 : -GetLastError());
449 #else
450 int fd;
451 const char *tmpdir;
452 tmpdir = getenv("TMPDIR");
453 if (!tmpdir) {
454 tmpdir = "/var/tmp";
455 }
456 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
457 return -EOVERFLOW;
458 }
459 fd = mkstemp(filename);
460 if (fd < 0) {
461 return -errno;
462 }
463 if (close(fd) != 0) {
464 unlink(filename);
465 return -errno;
466 }
467 return 0;
468 #endif
469 }
470
471 /*
472 * Detect host devices. By convention, /dev/cdrom[N] is always
473 * recognized as a host CDROM.
474 */
475 static BlockDriver *find_hdev_driver(const char *filename)
476 {
477 int score_max = 0, score;
478 BlockDriver *drv = NULL, *d;
479
480 QLIST_FOREACH(d, &bdrv_drivers, list) {
481 if (d->bdrv_probe_device) {
482 score = d->bdrv_probe_device(filename);
483 if (score > score_max) {
484 score_max = score;
485 drv = d;
486 }
487 }
488 }
489
490 return drv;
491 }
492
493 BlockDriver *bdrv_find_protocol(const char *filename,
494 bool allow_protocol_prefix,
495 Error **errp)
496 {
497 BlockDriver *drv1;
498 char protocol[128];
499 int len;
500 const char *p;
501
502 /* TODO Drivers without bdrv_file_open must be specified explicitly */
503
504 /*
505 * XXX(hch): we really should not let host device detection
506 * override an explicit protocol specification, but moving this
507 * later breaks access to device names with colons in them.
508 * Thanks to the brain-dead persistent naming schemes on udev-
509 * based Linux systems those actually are quite common.
510 */
511 drv1 = find_hdev_driver(filename);
512 if (drv1) {
513 return drv1;
514 }
515
516 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
517 return &bdrv_file;
518 }
519
520 p = strchr(filename, ':');
521 assert(p != NULL);
522 len = p - filename;
523 if (len > sizeof(protocol) - 1)
524 len = sizeof(protocol) - 1;
525 memcpy(protocol, filename, len);
526 protocol[len] = '\0';
527 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
528 if (drv1->protocol_name &&
529 !strcmp(drv1->protocol_name, protocol)) {
530 return drv1;
531 }
532 }
533
534 error_setg(errp, "Unknown protocol '%s'", protocol);
535 return NULL;
536 }
537
538 /*
539 * Guess image format by probing its contents.
540 * This is not a good idea when your image is raw (CVE-2008-2004), but
541 * we do it anyway for backward compatibility.
542 *
543 * @buf contains the image's first @buf_size bytes.
544 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
545 * but can be smaller if the image file is smaller)
546 * @filename is its filename.
547 *
548 * For all block drivers, call the bdrv_probe() method to get its
549 * probing score.
550 * Return the first block driver with the highest probing score.
551 */
552 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
553 const char *filename)
554 {
555 int score_max = 0, score;
556 BlockDriver *drv = NULL, *d;
557
558 QLIST_FOREACH(d, &bdrv_drivers, list) {
559 if (d->bdrv_probe) {
560 score = d->bdrv_probe(buf, buf_size, filename);
561 if (score > score_max) {
562 score_max = score;
563 drv = d;
564 }
565 }
566 }
567
568 return drv;
569 }
570
571 static int find_image_format(BlockDriverState *bs, const char *filename,
572 BlockDriver **pdrv, Error **errp)
573 {
574 BlockDriver *drv;
575 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
576 int ret = 0;
577
578 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
579 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
580 *pdrv = &bdrv_raw;
581 return ret;
582 }
583
584 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
585 if (ret < 0) {
586 error_setg_errno(errp, -ret, "Could not read image for determining its "
587 "format");
588 *pdrv = NULL;
589 return ret;
590 }
591
592 drv = bdrv_probe_all(buf, ret, filename);
593 if (!drv) {
594 error_setg(errp, "Could not determine image format: No compatible "
595 "driver found");
596 ret = -ENOENT;
597 }
598 *pdrv = drv;
599 return ret;
600 }
601
602 /**
603 * Set the current 'total_sectors' value
604 * Return 0 on success, -errno on error.
605 */
606 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
607 {
608 BlockDriver *drv = bs->drv;
609
610 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
611 if (bs->sg)
612 return 0;
613
614 /* query actual device if possible, otherwise just trust the hint */
615 if (drv->bdrv_getlength) {
616 int64_t length = drv->bdrv_getlength(bs);
617 if (length < 0) {
618 return length;
619 }
620 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
621 }
622
623 bs->total_sectors = hint;
624 return 0;
625 }
626
627 /**
628 * Set open flags for a given discard mode
629 *
630 * Return 0 on success, -1 if the discard mode was invalid.
631 */
632 int bdrv_parse_discard_flags(const char *mode, int *flags)
633 {
634 *flags &= ~BDRV_O_UNMAP;
635
636 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
637 /* do nothing */
638 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
639 *flags |= BDRV_O_UNMAP;
640 } else {
641 return -1;
642 }
643
644 return 0;
645 }
646
647 /**
648 * Set open flags for a given cache mode
649 *
650 * Return 0 on success, -1 if the cache mode was invalid.
651 */
652 int bdrv_parse_cache_flags(const char *mode, int *flags)
653 {
654 *flags &= ~BDRV_O_CACHE_MASK;
655
656 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
657 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
658 } else if (!strcmp(mode, "directsync")) {
659 *flags |= BDRV_O_NOCACHE;
660 } else if (!strcmp(mode, "writeback")) {
661 *flags |= BDRV_O_CACHE_WB;
662 } else if (!strcmp(mode, "unsafe")) {
663 *flags |= BDRV_O_CACHE_WB;
664 *flags |= BDRV_O_NO_FLUSH;
665 } else if (!strcmp(mode, "writethrough")) {
666 /* this is the default */
667 } else {
668 return -1;
669 }
670
671 return 0;
672 }
673
674 /*
675 * Returns the flags that a temporary snapshot should get, based on the
676 * originally requested flags (the originally requested image will have flags
677 * like a backing file)
678 */
679 static int bdrv_temp_snapshot_flags(int flags)
680 {
681 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
682 }
683
684 /*
685 * Returns the flags that bs->file should get, based on the given flags for
686 * the parent BDS
687 */
688 static int bdrv_inherited_flags(int flags)
689 {
690 /* Enable protocol handling, disable format probing for bs->file */
691 flags |= BDRV_O_PROTOCOL;
692
693 /* Our block drivers take care to send flushes and respect unmap policy,
694 * so we can enable both unconditionally on lower layers. */
695 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
696
697 /* Clear flags that only apply to the top layer */
698 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
699
700 return flags;
701 }
702
703 /*
704 * Returns the flags that bs->backing_hd should get, based on the given flags
705 * for the parent BDS
706 */
707 static int bdrv_backing_flags(int flags)
708 {
709 /* backing files always opened read-only */
710 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
711
712 /* snapshot=on is handled on the top layer */
713 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
714
715 return flags;
716 }
717
718 static int bdrv_open_flags(BlockDriverState *bs, int flags)
719 {
720 int open_flags = flags | BDRV_O_CACHE_WB;
721
722 /*
723 * Clear flags that are internal to the block layer before opening the
724 * image.
725 */
726 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
727
728 /*
729 * Snapshots should be writable.
730 */
731 if (flags & BDRV_O_TEMPORARY) {
732 open_flags |= BDRV_O_RDWR;
733 }
734
735 return open_flags;
736 }
737
738 static void bdrv_assign_node_name(BlockDriverState *bs,
739 const char *node_name,
740 Error **errp)
741 {
742 if (!node_name) {
743 return;
744 }
745
746 /* Check for empty string or invalid characters */
747 if (!id_wellformed(node_name)) {
748 error_setg(errp, "Invalid node name");
749 return;
750 }
751
752 /* takes care of avoiding namespaces collisions */
753 if (blk_by_name(node_name)) {
754 error_setg(errp, "node-name=%s is conflicting with a device id",
755 node_name);
756 return;
757 }
758
759 /* takes care of avoiding duplicates node names */
760 if (bdrv_find_node(node_name)) {
761 error_setg(errp, "Duplicate node name");
762 return;
763 }
764
765 /* copy node name into the bs and insert it into the graph list */
766 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
767 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
768 }
769
770 /*
771 * Common part for opening disk images and files
772 *
773 * Removes all processed options from *options.
774 */
775 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
776 QDict *options, int flags, BlockDriver *drv, Error **errp)
777 {
778 int ret, open_flags;
779 const char *filename;
780 const char *node_name = NULL;
781 Error *local_err = NULL;
782
783 assert(drv != NULL);
784 assert(bs->file == NULL);
785 assert(options != NULL && bs->options != options);
786
787 if (file != NULL) {
788 filename = file->filename;
789 } else {
790 filename = qdict_get_try_str(options, "filename");
791 }
792
793 if (drv->bdrv_needs_filename && !filename) {
794 error_setg(errp, "The '%s' block driver requires a file name",
795 drv->format_name);
796 return -EINVAL;
797 }
798
799 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
800
801 node_name = qdict_get_try_str(options, "node-name");
802 bdrv_assign_node_name(bs, node_name, &local_err);
803 if (local_err) {
804 error_propagate(errp, local_err);
805 return -EINVAL;
806 }
807 qdict_del(options, "node-name");
808
809 /* bdrv_open() with directly using a protocol as drv. This layer is already
810 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
811 * and return immediately. */
812 if (file != NULL && drv->bdrv_file_open) {
813 bdrv_swap(file, bs);
814 return 0;
815 }
816
817 bs->open_flags = flags;
818 bs->guest_block_size = 512;
819 bs->request_alignment = 512;
820 bs->zero_beyond_eof = true;
821 open_flags = bdrv_open_flags(bs, flags);
822 bs->read_only = !(open_flags & BDRV_O_RDWR);
823
824 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
825 error_setg(errp,
826 !bs->read_only && bdrv_is_whitelisted(drv, true)
827 ? "Driver '%s' can only be used for read-only devices"
828 : "Driver '%s' is not whitelisted",
829 drv->format_name);
830 return -ENOTSUP;
831 }
832
833 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
834 if (flags & BDRV_O_COPY_ON_READ) {
835 if (!bs->read_only) {
836 bdrv_enable_copy_on_read(bs);
837 } else {
838 error_setg(errp, "Can't use copy-on-read on read-only device");
839 return -EINVAL;
840 }
841 }
842
843 if (filename != NULL) {
844 pstrcpy(bs->filename, sizeof(bs->filename), filename);
845 } else {
846 bs->filename[0] = '\0';
847 }
848 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
849
850 bs->drv = drv;
851 bs->opaque = g_malloc0(drv->instance_size);
852
853 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
854
855 /* Open the image, either directly or using a protocol */
856 if (drv->bdrv_file_open) {
857 assert(file == NULL);
858 assert(!drv->bdrv_needs_filename || filename != NULL);
859 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
860 } else {
861 if (file == NULL) {
862 error_setg(errp, "Can't use '%s' as a block driver for the "
863 "protocol level", drv->format_name);
864 ret = -EINVAL;
865 goto free_and_fail;
866 }
867 bs->file = file;
868 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
869 }
870
871 if (ret < 0) {
872 if (local_err) {
873 error_propagate(errp, local_err);
874 } else if (bs->filename[0]) {
875 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
876 } else {
877 error_setg_errno(errp, -ret, "Could not open image");
878 }
879 goto free_and_fail;
880 }
881
882 if (bs->encrypted) {
883 error_report("Encrypted images are deprecated");
884 error_printf("Support for them will be removed in a future release.\n"
885 "You can use 'qemu-img convert' to convert your image"
886 " to an unencrypted one.\n");
887 }
888
889 ret = refresh_total_sectors(bs, bs->total_sectors);
890 if (ret < 0) {
891 error_setg_errno(errp, -ret, "Could not refresh total sector count");
892 goto free_and_fail;
893 }
894
895 bdrv_refresh_limits(bs, &local_err);
896 if (local_err) {
897 error_propagate(errp, local_err);
898 ret = -EINVAL;
899 goto free_and_fail;
900 }
901
902 assert(bdrv_opt_mem_align(bs) != 0);
903 assert(bdrv_min_mem_align(bs) != 0);
904 assert((bs->request_alignment != 0) || bs->sg);
905 return 0;
906
907 free_and_fail:
908 bs->file = NULL;
909 g_free(bs->opaque);
910 bs->opaque = NULL;
911 bs->drv = NULL;
912 return ret;
913 }
914
915 static QDict *parse_json_filename(const char *filename, Error **errp)
916 {
917 QObject *options_obj;
918 QDict *options;
919 int ret;
920
921 ret = strstart(filename, "json:", &filename);
922 assert(ret);
923
924 options_obj = qobject_from_json(filename);
925 if (!options_obj) {
926 error_setg(errp, "Could not parse the JSON options");
927 return NULL;
928 }
929
930 if (qobject_type(options_obj) != QTYPE_QDICT) {
931 qobject_decref(options_obj);
932 error_setg(errp, "Invalid JSON object given");
933 return NULL;
934 }
935
936 options = qobject_to_qdict(options_obj);
937 qdict_flatten(options);
938
939 return options;
940 }
941
942 /*
943 * Fills in default options for opening images and converts the legacy
944 * filename/flags pair to option QDict entries.
945 */
946 static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
947 BlockDriver *drv, Error **errp)
948 {
949 const char *filename = *pfilename;
950 const char *drvname;
951 bool protocol = flags & BDRV_O_PROTOCOL;
952 bool parse_filename = false;
953 Error *local_err = NULL;
954
955 /* Parse json: pseudo-protocol */
956 if (filename && g_str_has_prefix(filename, "json:")) {
957 QDict *json_options = parse_json_filename(filename, &local_err);
958 if (local_err) {
959 error_propagate(errp, local_err);
960 return -EINVAL;
961 }
962
963 /* Options given in the filename have lower priority than options
964 * specified directly */
965 qdict_join(*options, json_options, false);
966 QDECREF(json_options);
967 *pfilename = filename = NULL;
968 }
969
970 /* Fetch the file name from the options QDict if necessary */
971 if (protocol && filename) {
972 if (!qdict_haskey(*options, "filename")) {
973 qdict_put(*options, "filename", qstring_from_str(filename));
974 parse_filename = true;
975 } else {
976 error_setg(errp, "Can't specify 'file' and 'filename' options at "
977 "the same time");
978 return -EINVAL;
979 }
980 }
981
982 /* Find the right block driver */
983 filename = qdict_get_try_str(*options, "filename");
984 drvname = qdict_get_try_str(*options, "driver");
985
986 if (drv) {
987 if (drvname) {
988 error_setg(errp, "Driver specified twice");
989 return -EINVAL;
990 }
991 drvname = drv->format_name;
992 qdict_put(*options, "driver", qstring_from_str(drvname));
993 } else {
994 if (!drvname && protocol) {
995 if (filename) {
996 drv = bdrv_find_protocol(filename, parse_filename, errp);
997 if (!drv) {
998 return -EINVAL;
999 }
1000
1001 drvname = drv->format_name;
1002 qdict_put(*options, "driver", qstring_from_str(drvname));
1003 } else {
1004 error_setg(errp, "Must specify either driver or file");
1005 return -EINVAL;
1006 }
1007 } else if (drvname) {
1008 drv = bdrv_find_format(drvname);
1009 if (!drv) {
1010 error_setg(errp, "Unknown driver '%s'", drvname);
1011 return -ENOENT;
1012 }
1013 }
1014 }
1015
1016 assert(drv || !protocol);
1017
1018 /* Driver-specific filename parsing */
1019 if (drv && drv->bdrv_parse_filename && parse_filename) {
1020 drv->bdrv_parse_filename(filename, *options, &local_err);
1021 if (local_err) {
1022 error_propagate(errp, local_err);
1023 return -EINVAL;
1024 }
1025
1026 if (!drv->bdrv_needs_filename) {
1027 qdict_del(*options, "filename");
1028 }
1029 }
1030
1031 return 0;
1032 }
1033
1034 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1035 {
1036
1037 if (bs->backing_hd) {
1038 assert(bs->backing_blocker);
1039 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1040 } else if (backing_hd) {
1041 error_setg(&bs->backing_blocker,
1042 "node is used as backing hd of '%s'",
1043 bdrv_get_device_or_node_name(bs));
1044 }
1045
1046 bs->backing_hd = backing_hd;
1047 if (!backing_hd) {
1048 error_free(bs->backing_blocker);
1049 bs->backing_blocker = NULL;
1050 goto out;
1051 }
1052 bs->open_flags &= ~BDRV_O_NO_BACKING;
1053 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1054 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1055 backing_hd->drv ? backing_hd->drv->format_name : "");
1056
1057 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1058 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1059 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1060 bs->backing_blocker);
1061 out:
1062 bdrv_refresh_limits(bs, NULL);
1063 }
1064
1065 /*
1066 * Opens the backing file for a BlockDriverState if not yet open
1067 *
1068 * options is a QDict of options to pass to the block drivers, or NULL for an
1069 * empty set of options. The reference to the QDict is transferred to this
1070 * function (even on failure), so if the caller intends to reuse the dictionary,
1071 * it needs to use QINCREF() before calling bdrv_file_open.
1072 */
1073 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1074 {
1075 char *backing_filename = g_malloc0(PATH_MAX);
1076 int ret = 0;
1077 BlockDriverState *backing_hd;
1078 Error *local_err = NULL;
1079
1080 if (bs->backing_hd != NULL) {
1081 QDECREF(options);
1082 goto free_exit;
1083 }
1084
1085 /* NULL means an empty set of options */
1086 if (options == NULL) {
1087 options = qdict_new();
1088 }
1089
1090 bs->open_flags &= ~BDRV_O_NO_BACKING;
1091 if (qdict_haskey(options, "file.filename")) {
1092 backing_filename[0] = '\0';
1093 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1094 QDECREF(options);
1095 goto free_exit;
1096 } else {
1097 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1098 &local_err);
1099 if (local_err) {
1100 ret = -EINVAL;
1101 error_propagate(errp, local_err);
1102 QDECREF(options);
1103 goto free_exit;
1104 }
1105 }
1106
1107 if (!bs->drv || !bs->drv->supports_backing) {
1108 ret = -EINVAL;
1109 error_setg(errp, "Driver doesn't support backing files");
1110 QDECREF(options);
1111 goto free_exit;
1112 }
1113
1114 backing_hd = bdrv_new();
1115
1116 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1117 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1118 }
1119
1120 assert(bs->backing_hd == NULL);
1121 ret = bdrv_open(&backing_hd,
1122 *backing_filename ? backing_filename : NULL, NULL, options,
1123 bdrv_backing_flags(bs->open_flags), NULL, &local_err);
1124 if (ret < 0) {
1125 bdrv_unref(backing_hd);
1126 backing_hd = NULL;
1127 bs->open_flags |= BDRV_O_NO_BACKING;
1128 error_setg(errp, "Could not open backing file: %s",
1129 error_get_pretty(local_err));
1130 error_free(local_err);
1131 goto free_exit;
1132 }
1133 bdrv_set_backing_hd(bs, backing_hd);
1134
1135 free_exit:
1136 g_free(backing_filename);
1137 return ret;
1138 }
1139
1140 /*
1141 * Opens a disk image whose options are given as BlockdevRef in another block
1142 * device's options.
1143 *
1144 * If allow_none is true, no image will be opened if filename is false and no
1145 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1146 *
1147 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1148 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1149 * itself, all options starting with "${bdref_key}." are considered part of the
1150 * BlockdevRef.
1151 *
1152 * The BlockdevRef will be removed from the options QDict.
1153 *
1154 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1155 */
1156 int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1157 QDict *options, const char *bdref_key, int flags,
1158 bool allow_none, Error **errp)
1159 {
1160 QDict *image_options;
1161 int ret;
1162 char *bdref_key_dot;
1163 const char *reference;
1164
1165 assert(pbs);
1166 assert(*pbs == NULL);
1167
1168 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1169 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1170 g_free(bdref_key_dot);
1171
1172 reference = qdict_get_try_str(options, bdref_key);
1173 if (!filename && !reference && !qdict_size(image_options)) {
1174 if (allow_none) {
1175 ret = 0;
1176 } else {
1177 error_setg(errp, "A block device must be specified for \"%s\"",
1178 bdref_key);
1179 ret = -EINVAL;
1180 }
1181 QDECREF(image_options);
1182 goto done;
1183 }
1184
1185 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
1186
1187 done:
1188 qdict_del(options, bdref_key);
1189 return ret;
1190 }
1191
1192 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1193 {
1194 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1195 char *tmp_filename = g_malloc0(PATH_MAX + 1);
1196 int64_t total_size;
1197 QemuOpts *opts = NULL;
1198 QDict *snapshot_options;
1199 BlockDriverState *bs_snapshot;
1200 Error *local_err;
1201 int ret;
1202
1203 /* if snapshot, we create a temporary backing file and open it
1204 instead of opening 'filename' directly */
1205
1206 /* Get the required size from the image */
1207 total_size = bdrv_getlength(bs);
1208 if (total_size < 0) {
1209 ret = total_size;
1210 error_setg_errno(errp, -total_size, "Could not get image size");
1211 goto out;
1212 }
1213
1214 /* Create the temporary image */
1215 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1216 if (ret < 0) {
1217 error_setg_errno(errp, -ret, "Could not get temporary filename");
1218 goto out;
1219 }
1220
1221 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1222 &error_abort);
1223 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1224 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1225 qemu_opts_del(opts);
1226 if (ret < 0) {
1227 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1228 "'%s': %s", tmp_filename,
1229 error_get_pretty(local_err));
1230 error_free(local_err);
1231 goto out;
1232 }
1233
1234 /* Prepare a new options QDict for the temporary file */
1235 snapshot_options = qdict_new();
1236 qdict_put(snapshot_options, "file.driver",
1237 qstring_from_str("file"));
1238 qdict_put(snapshot_options, "file.filename",
1239 qstring_from_str(tmp_filename));
1240
1241 bs_snapshot = bdrv_new();
1242
1243 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1244 flags, &bdrv_qcow2, &local_err);
1245 if (ret < 0) {
1246 error_propagate(errp, local_err);
1247 goto out;
1248 }
1249
1250 bdrv_append(bs_snapshot, bs);
1251
1252 out:
1253 g_free(tmp_filename);
1254 return ret;
1255 }
1256
1257 /*
1258 * Opens a disk image (raw, qcow2, vmdk, ...)
1259 *
1260 * options is a QDict of options to pass to the block drivers, or NULL for an
1261 * empty set of options. The reference to the QDict belongs to the block layer
1262 * after the call (even on failure), so if the caller intends to reuse the
1263 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1264 *
1265 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1266 * If it is not NULL, the referenced BDS will be reused.
1267 *
1268 * The reference parameter may be used to specify an existing block device which
1269 * should be opened. If specified, neither options nor a filename may be given,
1270 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1271 */
1272 int bdrv_open(BlockDriverState **pbs, const char *filename,
1273 const char *reference, QDict *options, int flags,
1274 BlockDriver *drv, Error **errp)
1275 {
1276 int ret;
1277 BlockDriverState *file = NULL, *bs;
1278 const char *drvname;
1279 Error *local_err = NULL;
1280 int snapshot_flags = 0;
1281
1282 assert(pbs);
1283
1284 if (reference) {
1285 bool options_non_empty = options ? qdict_size(options) : false;
1286 QDECREF(options);
1287
1288 if (*pbs) {
1289 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1290 "another block device");
1291 return -EINVAL;
1292 }
1293
1294 if (filename || options_non_empty) {
1295 error_setg(errp, "Cannot reference an existing block device with "
1296 "additional options or a new filename");
1297 return -EINVAL;
1298 }
1299
1300 bs = bdrv_lookup_bs(reference, reference, errp);
1301 if (!bs) {
1302 return -ENODEV;
1303 }
1304 bdrv_ref(bs);
1305 *pbs = bs;
1306 return 0;
1307 }
1308
1309 if (*pbs) {
1310 bs = *pbs;
1311 } else {
1312 bs = bdrv_new();
1313 }
1314
1315 /* NULL means an empty set of options */
1316 if (options == NULL) {
1317 options = qdict_new();
1318 }
1319
1320 ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
1321 if (local_err) {
1322 goto fail;
1323 }
1324
1325 /* Find the right image format driver */
1326 drv = NULL;
1327 drvname = qdict_get_try_str(options, "driver");
1328 if (drvname) {
1329 drv = bdrv_find_format(drvname);
1330 qdict_del(options, "driver");
1331 if (!drv) {
1332 error_setg(errp, "Unknown driver: '%s'", drvname);
1333 ret = -EINVAL;
1334 goto fail;
1335 }
1336 }
1337
1338 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1339 if (drv && !drv->bdrv_file_open) {
1340 /* If the user explicitly wants a format driver here, we'll need to add
1341 * another layer for the protocol in bs->file */
1342 flags &= ~BDRV_O_PROTOCOL;
1343 }
1344
1345 bs->options = options;
1346 options = qdict_clone_shallow(options);
1347
1348 /* Open image file without format layer */
1349 if ((flags & BDRV_O_PROTOCOL) == 0) {
1350 if (flags & BDRV_O_RDWR) {
1351 flags |= BDRV_O_ALLOW_RDWR;
1352 }
1353 if (flags & BDRV_O_SNAPSHOT) {
1354 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1355 flags = bdrv_backing_flags(flags);
1356 }
1357
1358 assert(file == NULL);
1359 ret = bdrv_open_image(&file, filename, options, "file",
1360 bdrv_inherited_flags(flags),
1361 true, &local_err);
1362 if (ret < 0) {
1363 goto fail;
1364 }
1365 }
1366
1367 /* Image format probing */
1368 bs->probed = !drv;
1369 if (!drv && file) {
1370 ret = find_image_format(file, filename, &drv, &local_err);
1371 if (ret < 0) {
1372 goto fail;
1373 }
1374 } else if (!drv) {
1375 error_setg(errp, "Must specify either driver or file");
1376 ret = -EINVAL;
1377 goto fail;
1378 }
1379
1380 /* Open the image */
1381 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1382 if (ret < 0) {
1383 goto fail;
1384 }
1385
1386 if (file && (bs->file != file)) {
1387 bdrv_unref(file);
1388 file = NULL;
1389 }
1390
1391 /* If there is a backing file, use it */
1392 if ((flags & BDRV_O_NO_BACKING) == 0) {
1393 QDict *backing_options;
1394
1395 qdict_extract_subqdict(options, &backing_options, "backing.");
1396 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1397 if (ret < 0) {
1398 goto close_and_fail;
1399 }
1400 }
1401
1402 bdrv_refresh_filename(bs);
1403
1404 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1405 * temporary snapshot afterwards. */
1406 if (snapshot_flags) {
1407 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1408 if (local_err) {
1409 goto close_and_fail;
1410 }
1411 }
1412
1413 /* Check if any unknown options were used */
1414 if (options && (qdict_size(options) != 0)) {
1415 const QDictEntry *entry = qdict_first(options);
1416 if (flags & BDRV_O_PROTOCOL) {
1417 error_setg(errp, "Block protocol '%s' doesn't support the option "
1418 "'%s'", drv->format_name, entry->key);
1419 } else {
1420 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1421 "support the option '%s'", drv->format_name,
1422 bdrv_get_device_name(bs), entry->key);
1423 }
1424
1425 ret = -EINVAL;
1426 goto close_and_fail;
1427 }
1428
1429 if (!bdrv_key_required(bs)) {
1430 if (bs->blk) {
1431 blk_dev_change_media_cb(bs->blk, true);
1432 }
1433 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1434 && !runstate_check(RUN_STATE_INMIGRATE)
1435 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1436 error_setg(errp,
1437 "Guest must be stopped for opening of encrypted image");
1438 ret = -EBUSY;
1439 goto close_and_fail;
1440 }
1441
1442 QDECREF(options);
1443 *pbs = bs;
1444 return 0;
1445
1446 fail:
1447 if (file != NULL) {
1448 bdrv_unref(file);
1449 }
1450 QDECREF(bs->options);
1451 QDECREF(options);
1452 bs->options = NULL;
1453 if (!*pbs) {
1454 /* If *pbs is NULL, a new BDS has been created in this function and
1455 needs to be freed now. Otherwise, it does not need to be closed,
1456 since it has not really been opened yet. */
1457 bdrv_unref(bs);
1458 }
1459 if (local_err) {
1460 error_propagate(errp, local_err);
1461 }
1462 return ret;
1463
1464 close_and_fail:
1465 /* See fail path, but now the BDS has to be always closed */
1466 if (*pbs) {
1467 bdrv_close(bs);
1468 } else {
1469 bdrv_unref(bs);
1470 }
1471 QDECREF(options);
1472 if (local_err) {
1473 error_propagate(errp, local_err);
1474 }
1475 return ret;
1476 }
1477
1478 typedef struct BlockReopenQueueEntry {
1479 bool prepared;
1480 BDRVReopenState state;
1481 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1482 } BlockReopenQueueEntry;
1483
1484 /*
1485 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1486 * reopen of multiple devices.
1487 *
1488 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1489 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1490 * be created and initialized. This newly created BlockReopenQueue should be
1491 * passed back in for subsequent calls that are intended to be of the same
1492 * atomic 'set'.
1493 *
1494 * bs is the BlockDriverState to add to the reopen queue.
1495 *
1496 * flags contains the open flags for the associated bs
1497 *
1498 * returns a pointer to bs_queue, which is either the newly allocated
1499 * bs_queue, or the existing bs_queue being used.
1500 *
1501 */
1502 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1503 BlockDriverState *bs, int flags)
1504 {
1505 assert(bs != NULL);
1506
1507 BlockReopenQueueEntry *bs_entry;
1508 if (bs_queue == NULL) {
1509 bs_queue = g_new0(BlockReopenQueue, 1);
1510 QSIMPLEQ_INIT(bs_queue);
1511 }
1512
1513 /* bdrv_open() masks this flag out */
1514 flags &= ~BDRV_O_PROTOCOL;
1515
1516 if (bs->file) {
1517 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
1518 }
1519
1520 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1521 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1522
1523 bs_entry->state.bs = bs;
1524 bs_entry->state.flags = flags;
1525
1526 return bs_queue;
1527 }
1528
1529 /*
1530 * Reopen multiple BlockDriverStates atomically & transactionally.
1531 *
1532 * The queue passed in (bs_queue) must have been built up previous
1533 * via bdrv_reopen_queue().
1534 *
1535 * Reopens all BDS specified in the queue, with the appropriate
1536 * flags. All devices are prepared for reopen, and failure of any
1537 * device will cause all device changes to be abandonded, and intermediate
1538 * data cleaned up.
1539 *
1540 * If all devices prepare successfully, then the changes are committed
1541 * to all devices.
1542 *
1543 */
1544 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1545 {
1546 int ret = -1;
1547 BlockReopenQueueEntry *bs_entry, *next;
1548 Error *local_err = NULL;
1549
1550 assert(bs_queue != NULL);
1551
1552 bdrv_drain_all();
1553
1554 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1555 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1556 error_propagate(errp, local_err);
1557 goto cleanup;
1558 }
1559 bs_entry->prepared = true;
1560 }
1561
1562 /* If we reach this point, we have success and just need to apply the
1563 * changes
1564 */
1565 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1566 bdrv_reopen_commit(&bs_entry->state);
1567 }
1568
1569 ret = 0;
1570
1571 cleanup:
1572 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1573 if (ret && bs_entry->prepared) {
1574 bdrv_reopen_abort(&bs_entry->state);
1575 }
1576 g_free(bs_entry);
1577 }
1578 g_free(bs_queue);
1579 return ret;
1580 }
1581
1582
1583 /* Reopen a single BlockDriverState with the specified flags. */
1584 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1585 {
1586 int ret = -1;
1587 Error *local_err = NULL;
1588 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1589
1590 ret = bdrv_reopen_multiple(queue, &local_err);
1591 if (local_err != NULL) {
1592 error_propagate(errp, local_err);
1593 }
1594 return ret;
1595 }
1596
1597
1598 /*
1599 * Prepares a BlockDriverState for reopen. All changes are staged in the
1600 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1601 * the block driver layer .bdrv_reopen_prepare()
1602 *
1603 * bs is the BlockDriverState to reopen
1604 * flags are the new open flags
1605 * queue is the reopen queue
1606 *
1607 * Returns 0 on success, non-zero on error. On error errp will be set
1608 * as well.
1609 *
1610 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1611 * It is the responsibility of the caller to then call the abort() or
1612 * commit() for any other BDS that have been left in a prepare() state
1613 *
1614 */
1615 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1616 Error **errp)
1617 {
1618 int ret = -1;
1619 Error *local_err = NULL;
1620 BlockDriver *drv;
1621
1622 assert(reopen_state != NULL);
1623 assert(reopen_state->bs->drv != NULL);
1624 drv = reopen_state->bs->drv;
1625
1626 /* if we are to stay read-only, do not allow permission change
1627 * to r/w */
1628 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1629 reopen_state->flags & BDRV_O_RDWR) {
1630 error_setg(errp, "Node '%s' is read only",
1631 bdrv_get_device_or_node_name(reopen_state->bs));
1632 goto error;
1633 }
1634
1635
1636 ret = bdrv_flush(reopen_state->bs);
1637 if (ret) {
1638 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1639 strerror(-ret));
1640 goto error;
1641 }
1642
1643 if (drv->bdrv_reopen_prepare) {
1644 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1645 if (ret) {
1646 if (local_err != NULL) {
1647 error_propagate(errp, local_err);
1648 } else {
1649 error_setg(errp, "failed while preparing to reopen image '%s'",
1650 reopen_state->bs->filename);
1651 }
1652 goto error;
1653 }
1654 } else {
1655 /* It is currently mandatory to have a bdrv_reopen_prepare()
1656 * handler for each supported drv. */
1657 error_setg(errp, "Block format '%s' used by node '%s' "
1658 "does not support reopening files", drv->format_name,
1659 bdrv_get_device_or_node_name(reopen_state->bs));
1660 ret = -1;
1661 goto error;
1662 }
1663
1664 ret = 0;
1665
1666 error:
1667 return ret;
1668 }
1669
1670 /*
1671 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1672 * makes them final by swapping the staging BlockDriverState contents into
1673 * the active BlockDriverState contents.
1674 */
1675 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1676 {
1677 BlockDriver *drv;
1678
1679 assert(reopen_state != NULL);
1680 drv = reopen_state->bs->drv;
1681 assert(drv != NULL);
1682
1683 /* If there are any driver level actions to take */
1684 if (drv->bdrv_reopen_commit) {
1685 drv->bdrv_reopen_commit(reopen_state);
1686 }
1687
1688 /* set BDS specific flags now */
1689 reopen_state->bs->open_flags = reopen_state->flags;
1690 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1691 BDRV_O_CACHE_WB);
1692 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1693
1694 bdrv_refresh_limits(reopen_state->bs, NULL);
1695 }
1696
1697 /*
1698 * Abort the reopen, and delete and free the staged changes in
1699 * reopen_state
1700 */
1701 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1702 {
1703 BlockDriver *drv;
1704
1705 assert(reopen_state != NULL);
1706 drv = reopen_state->bs->drv;
1707 assert(drv != NULL);
1708
1709 if (drv->bdrv_reopen_abort) {
1710 drv->bdrv_reopen_abort(reopen_state);
1711 }
1712 }
1713
1714
1715 void bdrv_close(BlockDriverState *bs)
1716 {
1717 BdrvAioNotifier *ban, *ban_next;
1718
1719 if (bs->job) {
1720 block_job_cancel_sync(bs->job);
1721 }
1722 bdrv_drain_all(); /* complete I/O */
1723 bdrv_flush(bs);
1724 bdrv_drain_all(); /* in case flush left pending I/O */
1725 notifier_list_notify(&bs->close_notifiers, bs);
1726
1727 if (bs->drv) {
1728 if (bs->backing_hd) {
1729 BlockDriverState *backing_hd = bs->backing_hd;
1730 bdrv_set_backing_hd(bs, NULL);
1731 bdrv_unref(backing_hd);
1732 }
1733 bs->drv->bdrv_close(bs);
1734 g_free(bs->opaque);
1735 bs->opaque = NULL;
1736 bs->drv = NULL;
1737 bs->copy_on_read = 0;
1738 bs->backing_file[0] = '\0';
1739 bs->backing_format[0] = '\0';
1740 bs->total_sectors = 0;
1741 bs->encrypted = 0;
1742 bs->valid_key = 0;
1743 bs->sg = 0;
1744 bs->zero_beyond_eof = false;
1745 QDECREF(bs->options);
1746 bs->options = NULL;
1747 QDECREF(bs->full_open_options);
1748 bs->full_open_options = NULL;
1749
1750 if (bs->file != NULL) {
1751 bdrv_unref(bs->file);
1752 bs->file = NULL;
1753 }
1754 }
1755
1756 if (bs->blk) {
1757 blk_dev_change_media_cb(bs->blk, false);
1758 }
1759
1760 /*throttling disk I/O limits*/
1761 if (bs->io_limits_enabled) {
1762 bdrv_io_limits_disable(bs);
1763 }
1764
1765 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1766 g_free(ban);
1767 }
1768 QLIST_INIT(&bs->aio_notifiers);
1769 }
1770
1771 void bdrv_close_all(void)
1772 {
1773 BlockDriverState *bs;
1774
1775 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1776 AioContext *aio_context = bdrv_get_aio_context(bs);
1777
1778 aio_context_acquire(aio_context);
1779 bdrv_close(bs);
1780 aio_context_release(aio_context);
1781 }
1782 }
1783
1784 /* make a BlockDriverState anonymous by removing from bdrv_state and
1785 * graph_bdrv_state list.
1786 Also, NULL terminate the device_name to prevent double remove */
1787 void bdrv_make_anon(BlockDriverState *bs)
1788 {
1789 /*
1790 * Take care to remove bs from bdrv_states only when it's actually
1791 * in it. Note that bs->device_list.tqe_prev is initially null,
1792 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
1793 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1794 * resetting it to null on remove.
1795 */
1796 if (bs->device_list.tqe_prev) {
1797 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1798 bs->device_list.tqe_prev = NULL;
1799 }
1800 if (bs->node_name[0] != '\0') {
1801 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1802 }
1803 bs->node_name[0] = '\0';
1804 }
1805
1806 static void bdrv_rebind(BlockDriverState *bs)
1807 {
1808 if (bs->drv && bs->drv->bdrv_rebind) {
1809 bs->drv->bdrv_rebind(bs);
1810 }
1811 }
1812
1813 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1814 BlockDriverState *bs_src)
1815 {
1816 /* move some fields that need to stay attached to the device */
1817
1818 /* dev info */
1819 bs_dest->guest_block_size = bs_src->guest_block_size;
1820 bs_dest->copy_on_read = bs_src->copy_on_read;
1821
1822 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1823
1824 /* i/o throttled req */
1825 memcpy(&bs_dest->throttle_state,
1826 &bs_src->throttle_state,
1827 sizeof(ThrottleState));
1828 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1829 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
1830 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
1831
1832 /* r/w error */
1833 bs_dest->on_read_error = bs_src->on_read_error;
1834 bs_dest->on_write_error = bs_src->on_write_error;
1835
1836 /* i/o status */
1837 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
1838 bs_dest->iostatus = bs_src->iostatus;
1839
1840 /* dirty bitmap */
1841 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
1842
1843 /* reference count */
1844 bs_dest->refcnt = bs_src->refcnt;
1845
1846 /* job */
1847 bs_dest->job = bs_src->job;
1848
1849 /* keep the same entry in bdrv_states */
1850 bs_dest->device_list = bs_src->device_list;
1851 bs_dest->blk = bs_src->blk;
1852
1853 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
1854 sizeof(bs_dest->op_blockers));
1855 }
1856
1857 /*
1858 * Swap bs contents for two image chains while they are live,
1859 * while keeping required fields on the BlockDriverState that is
1860 * actually attached to a device.
1861 *
1862 * This will modify the BlockDriverState fields, and swap contents
1863 * between bs_new and bs_old. Both bs_new and bs_old are modified.
1864 *
1865 * bs_new must not be attached to a BlockBackend.
1866 *
1867 * This function does not create any image files.
1868 */
1869 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1870 {
1871 BlockDriverState tmp;
1872
1873 /* The code needs to swap the node_name but simply swapping node_list won't
1874 * work so first remove the nodes from the graph list, do the swap then
1875 * insert them back if needed.
1876 */
1877 if (bs_new->node_name[0] != '\0') {
1878 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
1879 }
1880 if (bs_old->node_name[0] != '\0') {
1881 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
1882 }
1883
1884 /* bs_new must be unattached and shouldn't have anything fancy enabled */
1885 assert(!bs_new->blk);
1886 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
1887 assert(bs_new->job == NULL);
1888 assert(bs_new->io_limits_enabled == false);
1889 assert(!throttle_have_timer(&bs_new->throttle_state));
1890
1891 tmp = *bs_new;
1892 *bs_new = *bs_old;
1893 *bs_old = tmp;
1894
1895 /* there are some fields that should not be swapped, move them back */
1896 bdrv_move_feature_fields(&tmp, bs_old);
1897 bdrv_move_feature_fields(bs_old, bs_new);
1898 bdrv_move_feature_fields(bs_new, &tmp);
1899
1900 /* bs_new must remain unattached */
1901 assert(!bs_new->blk);
1902
1903 /* Check a few fields that should remain attached to the device */
1904 assert(bs_new->job == NULL);
1905 assert(bs_new->io_limits_enabled == false);
1906 assert(!throttle_have_timer(&bs_new->throttle_state));
1907
1908 /* insert the nodes back into the graph node list if needed */
1909 if (bs_new->node_name[0] != '\0') {
1910 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
1911 }
1912 if (bs_old->node_name[0] != '\0') {
1913 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
1914 }
1915
1916 bdrv_rebind(bs_new);
1917 bdrv_rebind(bs_old);
1918 }
1919
1920 /*
1921 * Add new bs contents at the top of an image chain while the chain is
1922 * live, while keeping required fields on the top layer.
1923 *
1924 * This will modify the BlockDriverState fields, and swap contents
1925 * between bs_new and bs_top. Both bs_new and bs_top are modified.
1926 *
1927 * bs_new must not be attached to a BlockBackend.
1928 *
1929 * This function does not create any image files.
1930 */
1931 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
1932 {
1933 bdrv_swap(bs_new, bs_top);
1934
1935 /* The contents of 'tmp' will become bs_top, as we are
1936 * swapping bs_new and bs_top contents. */
1937 bdrv_set_backing_hd(bs_top, bs_new);
1938 }
1939
1940 static void bdrv_delete(BlockDriverState *bs)
1941 {
1942 assert(!bs->job);
1943 assert(bdrv_op_blocker_is_empty(bs));
1944 assert(!bs->refcnt);
1945 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
1946
1947 bdrv_close(bs);
1948
1949 /* remove from list, if necessary */
1950 bdrv_make_anon(bs);
1951
1952 g_free(bs);
1953 }
1954
1955 /*
1956 * Run consistency checks on an image
1957 *
1958 * Returns 0 if the check could be completed (it doesn't mean that the image is
1959 * free of errors) or -errno when an internal error occurred. The results of the
1960 * check are stored in res.
1961 */
1962 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
1963 {
1964 if (bs->drv == NULL) {
1965 return -ENOMEDIUM;
1966 }
1967 if (bs->drv->bdrv_check == NULL) {
1968 return -ENOTSUP;
1969 }
1970
1971 memset(res, 0, sizeof(*res));
1972 return bs->drv->bdrv_check(bs, res, fix);
1973 }
1974
1975 #define COMMIT_BUF_SECTORS 2048
1976
1977 /* commit COW file into the raw image */
1978 int bdrv_commit(BlockDriverState *bs)
1979 {
1980 BlockDriver *drv = bs->drv;
1981 int64_t sector, total_sectors, length, backing_length;
1982 int n, ro, open_flags;
1983 int ret = 0;
1984 uint8_t *buf = NULL;
1985
1986 if (!drv)
1987 return -ENOMEDIUM;
1988
1989 if (!bs->backing_hd) {
1990 return -ENOTSUP;
1991 }
1992
1993 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
1994 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
1995 return -EBUSY;
1996 }
1997
1998 ro = bs->backing_hd->read_only;
1999 open_flags = bs->backing_hd->open_flags;
2000
2001 if (ro) {
2002 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2003 return -EACCES;
2004 }
2005 }
2006
2007 length = bdrv_getlength(bs);
2008 if (length < 0) {
2009 ret = length;
2010 goto ro_cleanup;
2011 }
2012
2013 backing_length = bdrv_getlength(bs->backing_hd);
2014 if (backing_length < 0) {
2015 ret = backing_length;
2016 goto ro_cleanup;
2017 }
2018
2019 /* If our top snapshot is larger than the backing file image,
2020 * grow the backing file image if possible. If not possible,
2021 * we must return an error */
2022 if (length > backing_length) {
2023 ret = bdrv_truncate(bs->backing_hd, length);
2024 if (ret < 0) {
2025 goto ro_cleanup;
2026 }
2027 }
2028
2029 total_sectors = length >> BDRV_SECTOR_BITS;
2030
2031 /* qemu_try_blockalign() for bs will choose an alignment that works for
2032 * bs->backing_hd as well, so no need to compare the alignment manually. */
2033 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2034 if (buf == NULL) {
2035 ret = -ENOMEM;
2036 goto ro_cleanup;
2037 }
2038
2039 for (sector = 0; sector < total_sectors; sector += n) {
2040 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2041 if (ret < 0) {
2042 goto ro_cleanup;
2043 }
2044 if (ret) {
2045 ret = bdrv_read(bs, sector, buf, n);
2046 if (ret < 0) {
2047 goto ro_cleanup;
2048 }
2049
2050 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2051 if (ret < 0) {
2052 goto ro_cleanup;
2053 }
2054 }
2055 }
2056
2057 if (drv->bdrv_make_empty) {
2058 ret = drv->bdrv_make_empty(bs);
2059 if (ret < 0) {
2060 goto ro_cleanup;
2061 }
2062 bdrv_flush(bs);
2063 }
2064
2065 /*
2066 * Make sure all data we wrote to the backing device is actually
2067 * stable on disk.
2068 */
2069 if (bs->backing_hd) {
2070 bdrv_flush(bs->backing_hd);
2071 }
2072
2073 ret = 0;
2074 ro_cleanup:
2075 qemu_vfree(buf);
2076
2077 if (ro) {
2078 /* ignoring error return here */
2079 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
2080 }
2081
2082 return ret;
2083 }
2084
2085 int bdrv_commit_all(void)
2086 {
2087 BlockDriverState *bs;
2088
2089 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2090 AioContext *aio_context = bdrv_get_aio_context(bs);
2091
2092 aio_context_acquire(aio_context);
2093 if (bs->drv && bs->backing_hd) {
2094 int ret = bdrv_commit(bs);
2095 if (ret < 0) {
2096 aio_context_release(aio_context);
2097 return ret;
2098 }
2099 }
2100 aio_context_release(aio_context);
2101 }
2102 return 0;
2103 }
2104
2105 /*
2106 * Return values:
2107 * 0 - success
2108 * -EINVAL - backing format specified, but no file
2109 * -ENOSPC - can't update the backing file because no space is left in the
2110 * image file header
2111 * -ENOTSUP - format driver doesn't support changing the backing file
2112 */
2113 int bdrv_change_backing_file(BlockDriverState *bs,
2114 const char *backing_file, const char *backing_fmt)
2115 {
2116 BlockDriver *drv = bs->drv;
2117 int ret;
2118
2119 /* Backing file format doesn't make sense without a backing file */
2120 if (backing_fmt && !backing_file) {
2121 return -EINVAL;
2122 }
2123
2124 if (drv->bdrv_change_backing_file != NULL) {
2125 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2126 } else {
2127 ret = -ENOTSUP;
2128 }
2129
2130 if (ret == 0) {
2131 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2132 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2133 }
2134 return ret;
2135 }
2136
2137 /*
2138 * Finds the image layer in the chain that has 'bs' as its backing file.
2139 *
2140 * active is the current topmost image.
2141 *
2142 * Returns NULL if bs is not found in active's image chain,
2143 * or if active == bs.
2144 *
2145 * Returns the bottommost base image if bs == NULL.
2146 */
2147 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2148 BlockDriverState *bs)
2149 {
2150 while (active && bs != active->backing_hd) {
2151 active = active->backing_hd;
2152 }
2153
2154 return active;
2155 }
2156
2157 /* Given a BDS, searches for the base layer. */
2158 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2159 {
2160 return bdrv_find_overlay(bs, NULL);
2161 }
2162
2163 typedef struct BlkIntermediateStates {
2164 BlockDriverState *bs;
2165 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2166 } BlkIntermediateStates;
2167
2168
2169 /*
2170 * Drops images above 'base' up to and including 'top', and sets the image
2171 * above 'top' to have base as its backing file.
2172 *
2173 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2174 * information in 'bs' can be properly updated.
2175 *
2176 * E.g., this will convert the following chain:
2177 * bottom <- base <- intermediate <- top <- active
2178 *
2179 * to
2180 *
2181 * bottom <- base <- active
2182 *
2183 * It is allowed for bottom==base, in which case it converts:
2184 *
2185 * base <- intermediate <- top <- active
2186 *
2187 * to
2188 *
2189 * base <- active
2190 *
2191 * If backing_file_str is non-NULL, it will be used when modifying top's
2192 * overlay image metadata.
2193 *
2194 * Error conditions:
2195 * if active == top, that is considered an error
2196 *
2197 */
2198 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2199 BlockDriverState *base, const char *backing_file_str)
2200 {
2201 BlockDriverState *intermediate;
2202 BlockDriverState *base_bs = NULL;
2203 BlockDriverState *new_top_bs = NULL;
2204 BlkIntermediateStates *intermediate_state, *next;
2205 int ret = -EIO;
2206
2207 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2208 QSIMPLEQ_INIT(&states_to_delete);
2209
2210 if (!top->drv || !base->drv) {
2211 goto exit;
2212 }
2213
2214 new_top_bs = bdrv_find_overlay(active, top);
2215
2216 if (new_top_bs == NULL) {
2217 /* we could not find the image above 'top', this is an error */
2218 goto exit;
2219 }
2220
2221 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2222 * to do, no intermediate images */
2223 if (new_top_bs->backing_hd == base) {
2224 ret = 0;
2225 goto exit;
2226 }
2227
2228 intermediate = top;
2229
2230 /* now we will go down through the list, and add each BDS we find
2231 * into our deletion queue, until we hit the 'base'
2232 */
2233 while (intermediate) {
2234 intermediate_state = g_new0(BlkIntermediateStates, 1);
2235 intermediate_state->bs = intermediate;
2236 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2237
2238 if (intermediate->backing_hd == base) {
2239 base_bs = intermediate->backing_hd;
2240 break;
2241 }
2242 intermediate = intermediate->backing_hd;
2243 }
2244 if (base_bs == NULL) {
2245 /* something went wrong, we did not end at the base. safely
2246 * unravel everything, and exit with error */
2247 goto exit;
2248 }
2249
2250 /* success - we can delete the intermediate states, and link top->base */
2251 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2252 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2253 base_bs->drv ? base_bs->drv->format_name : "");
2254 if (ret) {
2255 goto exit;
2256 }
2257 bdrv_set_backing_hd(new_top_bs, base_bs);
2258
2259 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2260 /* so that bdrv_close() does not recursively close the chain */
2261 bdrv_set_backing_hd(intermediate_state->bs, NULL);
2262 bdrv_unref(intermediate_state->bs);
2263 }
2264 ret = 0;
2265
2266 exit:
2267 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2268 g_free(intermediate_state);
2269 }
2270 return ret;
2271 }
2272
2273 /**
2274 * Truncate file to 'offset' bytes (needed only for file protocols)
2275 */
2276 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2277 {
2278 BlockDriver *drv = bs->drv;
2279 int ret;
2280 if (!drv)
2281 return -ENOMEDIUM;
2282 if (!drv->bdrv_truncate)
2283 return -ENOTSUP;
2284 if (bs->read_only)
2285 return -EACCES;
2286
2287 ret = drv->bdrv_truncate(bs, offset);
2288 if (ret == 0) {
2289 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2290 bdrv_dirty_bitmap_truncate(bs);
2291 if (bs->blk) {
2292 blk_dev_resize_cb(bs->blk);
2293 }
2294 }
2295 return ret;
2296 }
2297
2298 /**
2299 * Length of a allocated file in bytes. Sparse files are counted by actual
2300 * allocated space. Return < 0 if error or unknown.
2301 */
2302 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2303 {
2304 BlockDriver *drv = bs->drv;
2305 if (!drv) {
2306 return -ENOMEDIUM;
2307 }
2308 if (drv->bdrv_get_allocated_file_size) {
2309 return drv->bdrv_get_allocated_file_size(bs);
2310 }
2311 if (bs->file) {
2312 return bdrv_get_allocated_file_size(bs->file);
2313 }
2314 return -ENOTSUP;
2315 }
2316
2317 /**
2318 * Return number of sectors on success, -errno on error.
2319 */
2320 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2321 {
2322 BlockDriver *drv = bs->drv;
2323
2324 if (!drv)
2325 return -ENOMEDIUM;
2326
2327 if (drv->has_variable_length) {
2328 int ret = refresh_total_sectors(bs, bs->total_sectors);
2329 if (ret < 0) {
2330 return ret;
2331 }
2332 }
2333 return bs->total_sectors;
2334 }
2335
2336 /**
2337 * Return length in bytes on success, -errno on error.
2338 * The length is always a multiple of BDRV_SECTOR_SIZE.
2339 */
2340 int64_t bdrv_getlength(BlockDriverState *bs)
2341 {
2342 int64_t ret = bdrv_nb_sectors(bs);
2343
2344 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2345 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2346 }
2347
2348 /* return 0 as number of sectors if no device present or error */
2349 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2350 {
2351 int64_t nb_sectors = bdrv_nb_sectors(bs);
2352
2353 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2354 }
2355
2356 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2357 BlockdevOnError on_write_error)
2358 {
2359 bs->on_read_error = on_read_error;
2360 bs->on_write_error = on_write_error;
2361 }
2362
2363 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
2364 {
2365 return is_read ? bs->on_read_error : bs->on_write_error;
2366 }
2367
2368 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
2369 {
2370 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
2371
2372 switch (on_err) {
2373 case BLOCKDEV_ON_ERROR_ENOSPC:
2374 return (error == ENOSPC) ?
2375 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
2376 case BLOCKDEV_ON_ERROR_STOP:
2377 return BLOCK_ERROR_ACTION_STOP;
2378 case BLOCKDEV_ON_ERROR_REPORT:
2379 return BLOCK_ERROR_ACTION_REPORT;
2380 case BLOCKDEV_ON_ERROR_IGNORE:
2381 return BLOCK_ERROR_ACTION_IGNORE;
2382 default:
2383 abort();
2384 }
2385 }
2386
2387 static void send_qmp_error_event(BlockDriverState *bs,
2388 BlockErrorAction action,
2389 bool is_read, int error)
2390 {
2391 IoOperationType optype;
2392
2393 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
2394 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
2395 bdrv_iostatus_is_enabled(bs),
2396 error == ENOSPC, strerror(error),
2397 &error_abort);
2398 }
2399
2400 /* This is done by device models because, while the block layer knows
2401 * about the error, it does not know whether an operation comes from
2402 * the device or the block layer (from a job, for example).
2403 */
2404 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
2405 bool is_read, int error)
2406 {
2407 assert(error >= 0);
2408
2409 if (action == BLOCK_ERROR_ACTION_STOP) {
2410 /* First set the iostatus, so that "info block" returns an iostatus
2411 * that matches the events raised so far (an additional error iostatus
2412 * is fine, but not a lost one).
2413 */
2414 bdrv_iostatus_set_err(bs, error);
2415
2416 /* Then raise the request to stop the VM and the event.
2417 * qemu_system_vmstop_request_prepare has two effects. First,
2418 * it ensures that the STOP event always comes after the
2419 * BLOCK_IO_ERROR event. Second, it ensures that even if management
2420 * can observe the STOP event and do a "cont" before the STOP
2421 * event is issued, the VM will not stop. In this case, vm_start()
2422 * also ensures that the STOP/RESUME pair of events is emitted.
2423 */
2424 qemu_system_vmstop_request_prepare();
2425 send_qmp_error_event(bs, action, is_read, error);
2426 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
2427 } else {
2428 send_qmp_error_event(bs, action, is_read, error);
2429 }
2430 }
2431
2432 int bdrv_is_read_only(BlockDriverState *bs)
2433 {
2434 return bs->read_only;
2435 }
2436
2437 int bdrv_is_sg(BlockDriverState *bs)
2438 {
2439 return bs->sg;
2440 }
2441
2442 int bdrv_enable_write_cache(BlockDriverState *bs)
2443 {
2444 return bs->enable_write_cache;
2445 }
2446
2447 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2448 {
2449 bs->enable_write_cache = wce;
2450
2451 /* so a reopen() will preserve wce */
2452 if (wce) {
2453 bs->open_flags |= BDRV_O_CACHE_WB;
2454 } else {
2455 bs->open_flags &= ~BDRV_O_CACHE_WB;
2456 }
2457 }
2458
2459 int bdrv_is_encrypted(BlockDriverState *bs)
2460 {
2461 if (bs->backing_hd && bs->backing_hd->encrypted)
2462 return 1;
2463 return bs->encrypted;
2464 }
2465
2466 int bdrv_key_required(BlockDriverState *bs)
2467 {
2468 BlockDriverState *backing_hd = bs->backing_hd;
2469
2470 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2471 return 1;
2472 return (bs->encrypted && !bs->valid_key);
2473 }
2474
2475 int bdrv_set_key(BlockDriverState *bs, const char *key)
2476 {
2477 int ret;
2478 if (bs->backing_hd && bs->backing_hd->encrypted) {
2479 ret = bdrv_set_key(bs->backing_hd, key);
2480 if (ret < 0)
2481 return ret;
2482 if (!bs->encrypted)
2483 return 0;
2484 }
2485 if (!bs->encrypted) {
2486 return -EINVAL;
2487 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2488 return -ENOMEDIUM;
2489 }
2490 ret = bs->drv->bdrv_set_key(bs, key);
2491 if (ret < 0) {
2492 bs->valid_key = 0;
2493 } else if (!bs->valid_key) {
2494 bs->valid_key = 1;
2495 if (bs->blk) {
2496 /* call the change callback now, we skipped it on open */
2497 blk_dev_change_media_cb(bs->blk, true);
2498 }
2499 }
2500 return ret;
2501 }
2502
2503 /*
2504 * Provide an encryption key for @bs.
2505 * If @key is non-null:
2506 * If @bs is not encrypted, fail.
2507 * Else if the key is invalid, fail.
2508 * Else set @bs's key to @key, replacing the existing key, if any.
2509 * If @key is null:
2510 * If @bs is encrypted and still lacks a key, fail.
2511 * Else do nothing.
2512 * On failure, store an error object through @errp if non-null.
2513 */
2514 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2515 {
2516 if (key) {
2517 if (!bdrv_is_encrypted(bs)) {
2518 error_setg(errp, "Node '%s' is not encrypted",
2519 bdrv_get_device_or_node_name(bs));
2520 } else if (bdrv_set_key(bs, key) < 0) {
2521 error_set(errp, QERR_INVALID_PASSWORD);
2522 }
2523 } else {
2524 if (bdrv_key_required(bs)) {
2525 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2526 "'%s' (%s) is encrypted",
2527 bdrv_get_device_or_node_name(bs),
2528 bdrv_get_encrypted_filename(bs));
2529 }
2530 }
2531 }
2532
2533 const char *bdrv_get_format_name(BlockDriverState *bs)
2534 {
2535 return bs->drv ? bs->drv->format_name : NULL;
2536 }
2537
2538 static int qsort_strcmp(const void *a, const void *b)
2539 {
2540 return strcmp(a, b);
2541 }
2542
2543 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2544 void *opaque)
2545 {
2546 BlockDriver *drv;
2547 int count = 0;
2548 int i;
2549 const char **formats = NULL;
2550
2551 QLIST_FOREACH(drv, &bdrv_drivers, list) {
2552 if (drv->format_name) {
2553 bool found = false;
2554 int i = count;
2555 while (formats && i && !found) {
2556 found = !strcmp(formats[--i], drv->format_name);
2557 }
2558
2559 if (!found) {
2560 formats = g_renew(const char *, formats, count + 1);
2561 formats[count++] = drv->format_name;
2562 }
2563 }
2564 }
2565
2566 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2567
2568 for (i = 0; i < count; i++) {
2569 it(opaque, formats[i]);
2570 }
2571
2572 g_free(formats);
2573 }
2574
2575 /* This function is to find a node in the bs graph */
2576 BlockDriverState *bdrv_find_node(const char *node_name)
2577 {
2578 BlockDriverState *bs;
2579
2580 assert(node_name);
2581
2582 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2583 if (!strcmp(node_name, bs->node_name)) {
2584 return bs;
2585 }
2586 }
2587 return NULL;
2588 }
2589
2590 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2591 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2592 {
2593 BlockDeviceInfoList *list, *entry;
2594 BlockDriverState *bs;
2595
2596 list = NULL;
2597 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2598 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2599 if (!info) {
2600 qapi_free_BlockDeviceInfoList(list);
2601 return NULL;
2602 }
2603 entry = g_malloc0(sizeof(*entry));
2604 entry->value = info;
2605 entry->next = list;
2606 list = entry;
2607 }
2608
2609 return list;
2610 }
2611
2612 BlockDriverState *bdrv_lookup_bs(const char *device,
2613 const char *node_name,
2614 Error **errp)
2615 {
2616 BlockBackend *blk;
2617 BlockDriverState *bs;
2618
2619 if (device) {
2620 blk = blk_by_name(device);
2621
2622 if (blk) {
2623 return blk_bs(blk);
2624 }
2625 }
2626
2627 if (node_name) {
2628 bs = bdrv_find_node(node_name);
2629
2630 if (bs) {
2631 return bs;
2632 }
2633 }
2634
2635 error_setg(errp, "Cannot find device=%s nor node_name=%s",
2636 device ? device : "",
2637 node_name ? node_name : "");
2638 return NULL;
2639 }
2640
2641 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2642 * return false. If either argument is NULL, return false. */
2643 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2644 {
2645 while (top && top != base) {
2646 top = top->backing_hd;
2647 }
2648
2649 return top != NULL;
2650 }
2651
2652 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2653 {
2654 if (!bs) {
2655 return QTAILQ_FIRST(&graph_bdrv_states);
2656 }
2657 return QTAILQ_NEXT(bs, node_list);
2658 }
2659
2660 BlockDriverState *bdrv_next(BlockDriverState *bs)
2661 {
2662 if (!bs) {
2663 return QTAILQ_FIRST(&bdrv_states);
2664 }
2665 return QTAILQ_NEXT(bs, device_list);
2666 }
2667
2668 const char *bdrv_get_node_name(const BlockDriverState *bs)
2669 {
2670 return bs->node_name;
2671 }
2672
2673 /* TODO check what callers really want: bs->node_name or blk_name() */
2674 const char *bdrv_get_device_name(const BlockDriverState *bs)
2675 {
2676 return bs->blk ? blk_name(bs->blk) : "";
2677 }
2678
2679 /* This can be used to identify nodes that might not have a device
2680 * name associated. Since node and device names live in the same
2681 * namespace, the result is unambiguous. The exception is if both are
2682 * absent, then this returns an empty (non-null) string. */
2683 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2684 {
2685 return bs->blk ? blk_name(bs->blk) : bs->node_name;
2686 }
2687
2688 int bdrv_get_flags(BlockDriverState *bs)
2689 {
2690 return bs->open_flags;
2691 }
2692
2693 int bdrv_has_zero_init_1(BlockDriverState *bs)
2694 {
2695 return 1;
2696 }
2697
2698 int bdrv_has_zero_init(BlockDriverState *bs)
2699 {
2700 assert(bs->drv);
2701
2702 /* If BS is a copy on write image, it is initialized to
2703 the contents of the base image, which may not be zeroes. */
2704 if (bs->backing_hd) {
2705 return 0;
2706 }
2707 if (bs->drv->bdrv_has_zero_init) {
2708 return bs->drv->bdrv_has_zero_init(bs);
2709 }
2710
2711 /* safe default */
2712 return 0;
2713 }
2714
2715 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2716 {
2717 BlockDriverInfo bdi;
2718
2719 if (bs->backing_hd) {
2720 return false;
2721 }
2722
2723 if (bdrv_get_info(bs, &bdi) == 0) {
2724 return bdi.unallocated_blocks_are_zero;
2725 }
2726
2727 return false;
2728 }
2729
2730 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2731 {
2732 BlockDriverInfo bdi;
2733
2734 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
2735 return false;
2736 }
2737
2738 if (bdrv_get_info(bs, &bdi) == 0) {
2739 return bdi.can_write_zeroes_with_unmap;
2740 }
2741
2742 return false;
2743 }
2744
2745 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2746 {
2747 if (bs->backing_hd && bs->backing_hd->encrypted)
2748 return bs->backing_file;
2749 else if (bs->encrypted)
2750 return bs->filename;
2751 else
2752 return NULL;
2753 }
2754
2755 void bdrv_get_backing_filename(BlockDriverState *bs,
2756 char *filename, int filename_size)
2757 {
2758 pstrcpy(filename, filename_size, bs->backing_file);
2759 }
2760
2761 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2762 {
2763 BlockDriver *drv = bs->drv;
2764 if (!drv)
2765 return -ENOMEDIUM;
2766 if (!drv->bdrv_get_info)
2767 return -ENOTSUP;
2768 memset(bdi, 0, sizeof(*bdi));
2769 return drv->bdrv_get_info(bs, bdi);
2770 }
2771
2772 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2773 {
2774 BlockDriver *drv = bs->drv;
2775 if (drv && drv->bdrv_get_specific_info) {
2776 return drv->bdrv_get_specific_info(bs);
2777 }
2778 return NULL;
2779 }
2780
2781 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2782 {
2783 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
2784 return;
2785 }
2786
2787 bs->drv->bdrv_debug_event(bs, event);
2788 }
2789
2790 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
2791 const char *tag)
2792 {
2793 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
2794 bs = bs->file;
2795 }
2796
2797 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
2798 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
2799 }
2800
2801 return -ENOTSUP;
2802 }
2803
2804 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
2805 {
2806 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
2807 bs = bs->file;
2808 }
2809
2810 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
2811 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
2812 }
2813
2814 return -ENOTSUP;
2815 }
2816
2817 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
2818 {
2819 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
2820 bs = bs->file;
2821 }
2822
2823 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
2824 return bs->drv->bdrv_debug_resume(bs, tag);
2825 }
2826
2827 return -ENOTSUP;
2828 }
2829
2830 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
2831 {
2832 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
2833 bs = bs->file;
2834 }
2835
2836 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
2837 return bs->drv->bdrv_debug_is_suspended(bs, tag);
2838 }
2839
2840 return false;
2841 }
2842
2843 int bdrv_is_snapshot(BlockDriverState *bs)
2844 {
2845 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2846 }
2847
2848 /* backing_file can either be relative, or absolute, or a protocol. If it is
2849 * relative, it must be relative to the chain. So, passing in bs->filename
2850 * from a BDS as backing_file should not be done, as that may be relative to
2851 * the CWD rather than the chain. */
2852 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2853 const char *backing_file)
2854 {
2855 char *filename_full = NULL;
2856 char *backing_file_full = NULL;
2857 char *filename_tmp = NULL;
2858 int is_protocol = 0;
2859 BlockDriverState *curr_bs = NULL;
2860 BlockDriverState *retval = NULL;
2861
2862 if (!bs || !bs->drv || !backing_file) {
2863 return NULL;
2864 }
2865
2866 filename_full = g_malloc(PATH_MAX);
2867 backing_file_full = g_malloc(PATH_MAX);
2868 filename_tmp = g_malloc(PATH_MAX);
2869
2870 is_protocol = path_has_protocol(backing_file);
2871
2872 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
2873
2874 /* If either of the filename paths is actually a protocol, then
2875 * compare unmodified paths; otherwise make paths relative */
2876 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
2877 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
2878 retval = curr_bs->backing_hd;
2879 break;
2880 }
2881 } else {
2882 /* If not an absolute filename path, make it relative to the current
2883 * image's filename path */
2884 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2885 backing_file);
2886
2887 /* We are going to compare absolute pathnames */
2888 if (!realpath(filename_tmp, filename_full)) {
2889 continue;
2890 }
2891
2892 /* We need to make sure the backing filename we are comparing against
2893 * is relative to the current image filename (or absolute) */
2894 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2895 curr_bs->backing_file);
2896
2897 if (!realpath(filename_tmp, backing_file_full)) {
2898 continue;
2899 }
2900
2901 if (strcmp(backing_file_full, filename_full) == 0) {
2902 retval = curr_bs->backing_hd;
2903 break;
2904 }
2905 }
2906 }
2907
2908 g_free(filename_full);
2909 g_free(backing_file_full);
2910 g_free(filename_tmp);
2911 return retval;
2912 }
2913
2914 int bdrv_get_backing_file_depth(BlockDriverState *bs)
2915 {
2916 if (!bs->drv) {
2917 return 0;
2918 }
2919
2920 if (!bs->backing_hd) {
2921 return 0;
2922 }
2923
2924 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
2925 }
2926
2927 void bdrv_init(void)
2928 {
2929 module_call_init(MODULE_INIT_BLOCK);
2930 }
2931
2932 void bdrv_init_with_whitelist(void)
2933 {
2934 use_bdrv_whitelist = 1;
2935 bdrv_init();
2936 }
2937
2938 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
2939 {
2940 Error *local_err = NULL;
2941 int ret;
2942
2943 if (!bs->drv) {
2944 return;
2945 }
2946
2947 if (!(bs->open_flags & BDRV_O_INCOMING)) {
2948 return;
2949 }
2950 bs->open_flags &= ~BDRV_O_INCOMING;
2951
2952 if (bs->drv->bdrv_invalidate_cache) {
2953 bs->drv->bdrv_invalidate_cache(bs, &local_err);
2954 } else if (bs->file) {
2955 bdrv_invalidate_cache(bs->file, &local_err);
2956 }
2957 if (local_err) {
2958 error_propagate(errp, local_err);
2959 return;
2960 }
2961
2962 ret = refresh_total_sectors(bs, bs->total_sectors);
2963 if (ret < 0) {
2964 error_setg_errno(errp, -ret, "Could not refresh total sector count");
2965 return;
2966 }
2967 }
2968
2969 void bdrv_invalidate_cache_all(Error **errp)
2970 {
2971 BlockDriverState *bs;
2972 Error *local_err = NULL;
2973
2974 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2975 AioContext *aio_context = bdrv_get_aio_context(bs);
2976
2977 aio_context_acquire(aio_context);
2978 bdrv_invalidate_cache(bs, &local_err);
2979 aio_context_release(aio_context);
2980 if (local_err) {
2981 error_propagate(errp, local_err);
2982 return;
2983 }
2984 }
2985 }
2986
2987 /**************************************************************/
2988 /* removable device support */
2989
2990 /**
2991 * Return TRUE if the media is present
2992 */
2993 int bdrv_is_inserted(BlockDriverState *bs)
2994 {
2995 BlockDriver *drv = bs->drv;
2996
2997 if (!drv)
2998 return 0;
2999 if (!drv->bdrv_is_inserted)
3000 return 1;
3001 return drv->bdrv_is_inserted(bs);
3002 }
3003
3004 /**
3005 * Return whether the media changed since the last call to this
3006 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3007 */
3008 int bdrv_media_changed(BlockDriverState *bs)
3009 {
3010 BlockDriver *drv = bs->drv;
3011
3012 if (drv && drv->bdrv_media_changed) {
3013 return drv->bdrv_media_changed(bs);
3014 }
3015 return -ENOTSUP;
3016 }
3017
3018 /**
3019 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3020 */
3021 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3022 {
3023 BlockDriver *drv = bs->drv;
3024 const char *device_name;
3025
3026 if (drv && drv->bdrv_eject) {
3027 drv->bdrv_eject(bs, eject_flag);
3028 }
3029
3030 device_name = bdrv_get_device_name(bs);
3031 if (device_name[0] != '\0') {
3032 qapi_event_send_device_tray_moved(device_name,
3033 eject_flag, &error_abort);
3034 }
3035 }
3036
3037 /**
3038 * Lock or unlock the media (if it is locked, the user won't be able
3039 * to eject it manually).
3040 */
3041 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3042 {
3043 BlockDriver *drv = bs->drv;
3044
3045 trace_bdrv_lock_medium(bs, locked);
3046
3047 if (drv && drv->bdrv_lock_medium) {
3048 drv->bdrv_lock_medium(bs, locked);
3049 }
3050 }
3051
3052 void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
3053 {
3054 bs->guest_block_size = align;
3055 }
3056
3057 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3058 {
3059 BdrvDirtyBitmap *bm;
3060
3061 assert(name);
3062 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3063 if (bm->name && !strcmp(name, bm->name)) {
3064 return bm;
3065 }
3066 }
3067 return NULL;
3068 }
3069
3070 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3071 {
3072 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3073 g_free(bitmap->name);
3074 bitmap->name = NULL;
3075 }
3076
3077 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3078 uint32_t granularity,
3079 const char *name,
3080 Error **errp)
3081 {
3082 int64_t bitmap_size;
3083 BdrvDirtyBitmap *bitmap;
3084 uint32_t sector_granularity;
3085
3086 assert((granularity & (granularity - 1)) == 0);
3087
3088 if (name && bdrv_find_dirty_bitmap(bs, name)) {
3089 error_setg(errp, "Bitmap already exists: %s", name);
3090 return NULL;
3091 }
3092 sector_granularity = granularity >> BDRV_SECTOR_BITS;
3093 assert(sector_granularity);
3094 bitmap_size = bdrv_nb_sectors(bs);
3095 if (bitmap_size < 0) {
3096 error_setg_errno(errp, -bitmap_size, "could not get length of device");
3097 errno = -bitmap_size;
3098 return NULL;
3099 }
3100 bitmap = g_new0(BdrvDirtyBitmap, 1);
3101 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3102 bitmap->size = bitmap_size;
3103 bitmap->name = g_strdup(name);
3104 bitmap->disabled = false;
3105 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3106 return bitmap;
3107 }
3108
3109 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3110 {
3111 return bitmap->successor;
3112 }
3113
3114 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3115 {
3116 return !(bitmap->disabled || bitmap->successor);
3117 }
3118
3119 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3120 {
3121 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3122 return DIRTY_BITMAP_STATUS_FROZEN;
3123 } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3124 return DIRTY_BITMAP_STATUS_DISABLED;
3125 } else {
3126 return DIRTY_BITMAP_STATUS_ACTIVE;
3127 }
3128 }
3129
3130 /**
3131 * Create a successor bitmap destined to replace this bitmap after an operation.
3132 * Requires that the bitmap is not frozen and has no successor.
3133 */
3134 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3135 BdrvDirtyBitmap *bitmap, Error **errp)
3136 {
3137 uint64_t granularity;
3138 BdrvDirtyBitmap *child;
3139
3140 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3141 error_setg(errp, "Cannot create a successor for a bitmap that is "
3142 "currently frozen");
3143 return -1;
3144 }
3145 assert(!bitmap->successor);
3146
3147 /* Create an anonymous successor */
3148 granularity = bdrv_dirty_bitmap_granularity(bitmap);
3149 child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3150 if (!child) {
3151 return -1;
3152 }
3153
3154 /* Successor will be on or off based on our current state. */
3155 child->disabled = bitmap->disabled;
3156
3157 /* Install the successor and freeze the parent */
3158 bitmap->successor = child;
3159 return 0;
3160 }
3161
3162 /**
3163 * For a bitmap with a successor, yield our name to the successor,
3164 * delete the old bitmap, and return a handle to the new bitmap.
3165 */
3166 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3167 BdrvDirtyBitmap *bitmap,
3168 Error **errp)
3169 {
3170 char *name;
3171 BdrvDirtyBitmap *successor = bitmap->successor;
3172
3173 if (successor == NULL) {
3174 error_setg(errp, "Cannot relinquish control if "
3175 "there's no successor present");
3176 return NULL;
3177 }
3178
3179 name = bitmap->name;
3180 bitmap->name = NULL;
3181 successor->name = name;
3182 bitmap->successor = NULL;
3183 bdrv_release_dirty_bitmap(bs, bitmap);
3184
3185 return successor;
3186 }
3187
3188 /**
3189 * In cases of failure where we can no longer safely delete the parent,
3190 * we may wish to re-join the parent and child/successor.
3191 * The merged parent will be un-frozen, but not explicitly re-enabled.
3192 */
3193 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3194 BdrvDirtyBitmap *parent,
3195 Error **errp)
3196 {
3197 BdrvDirtyBitmap *successor = parent->successor;
3198
3199 if (!successor) {
3200 error_setg(errp, "Cannot reclaim a successor when none is present");
3201 return NULL;
3202 }
3203
3204 if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3205 error_setg(errp, "Merging of parent and successor bitmap failed");
3206 return NULL;
3207 }
3208 bdrv_release_dirty_bitmap(bs, successor);
3209 parent->successor = NULL;
3210
3211 return parent;
3212 }
3213
3214 /**
3215 * Truncates _all_ bitmaps attached to a BDS.
3216 */
3217 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3218 {
3219 BdrvDirtyBitmap *bitmap;
3220 uint64_t size = bdrv_nb_sectors(bs);
3221
3222 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3223 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3224 continue;
3225 }
3226 hbitmap_truncate(bitmap->bitmap, size);
3227 }
3228 }
3229
3230 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3231 {
3232 BdrvDirtyBitmap *bm, *next;
3233 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3234 if (bm == bitmap) {
3235 assert(!bdrv_dirty_bitmap_frozen(bm));
3236 QLIST_REMOVE(bitmap, list);
3237 hbitmap_free(bitmap->bitmap);
3238 g_free(bitmap->name);
3239 g_free(bitmap);
3240 return;
3241 }
3242 }
3243 }
3244
3245 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3246 {
3247 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3248 bitmap->disabled = true;
3249 }
3250
3251 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3252 {
3253 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3254 bitmap->disabled = false;
3255 }
3256
3257 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3258 {
3259 BdrvDirtyBitmap *bm;
3260 BlockDirtyInfoList *list = NULL;
3261 BlockDirtyInfoList **plist = &list;
3262
3263 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3264 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3265 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3266 info->count = bdrv_get_dirty_count(bm);
3267 info->granularity = bdrv_dirty_bitmap_granularity(bm);
3268 info->has_name = !!bm->name;
3269 info->name = g_strdup(bm->name);
3270 info->status = bdrv_dirty_bitmap_status(bm);
3271 entry->value = info;
3272 *plist = entry;
3273 plist = &entry->next;
3274 }
3275
3276 return list;
3277 }
3278
3279 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3280 {
3281 if (bitmap) {
3282 return hbitmap_get(bitmap->bitmap, sector);
3283 } else {
3284 return 0;
3285 }
3286 }
3287
3288 /**
3289 * Chooses a default granularity based on the existing cluster size,
3290 * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3291 * is no cluster size information available.
3292 */
3293 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3294 {
3295 BlockDriverInfo bdi;
3296 uint32_t granularity;
3297
3298 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3299 granularity = MAX(4096, bdi.cluster_size);
3300 granularity = MIN(65536, granularity);
3301 } else {
3302 granularity = 65536;
3303 }
3304
3305 return granularity;
3306 }
3307
3308 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3309 {
3310 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3311 }
3312
3313 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3314 {
3315 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3316 }
3317
3318 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3319 int64_t cur_sector, int nr_sectors)
3320 {
3321 assert(bdrv_dirty_bitmap_enabled(bitmap));
3322 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3323 }
3324
3325 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3326 int64_t cur_sector, int nr_sectors)
3327 {
3328 assert(bdrv_dirty_bitmap_enabled(bitmap));
3329 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3330 }
3331
3332 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3333 {
3334 assert(bdrv_dirty_bitmap_enabled(bitmap));
3335 hbitmap_reset(bitmap->bitmap, 0, bitmap->size);
3336 }
3337
3338 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3339 int nr_sectors)
3340 {
3341 BdrvDirtyBitmap *bitmap;
3342 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3343 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3344 continue;
3345 }
3346 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3347 }
3348 }
3349
3350 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3351 int nr_sectors)
3352 {
3353 BdrvDirtyBitmap *bitmap;
3354 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3355 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3356 continue;
3357 }
3358 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3359 }
3360 }
3361
3362 /**
3363 * Advance an HBitmapIter to an arbitrary offset.
3364 */
3365 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3366 {
3367 assert(hbi->hb);
3368 hbitmap_iter_init(hbi, hbi->hb, offset);
3369 }
3370
3371 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3372 {
3373 return hbitmap_count(bitmap->bitmap);
3374 }
3375
3376 /* Get a reference to bs */
3377 void bdrv_ref(BlockDriverState *bs)
3378 {
3379 bs->refcnt++;
3380 }
3381
3382 /* Release a previously grabbed reference to bs.
3383 * If after releasing, reference count is zero, the BlockDriverState is
3384 * deleted. */
3385 void bdrv_unref(BlockDriverState *bs)
3386 {
3387 if (!bs) {
3388 return;
3389 }
3390 assert(bs->refcnt > 0);
3391 if (--bs->refcnt == 0) {
3392 bdrv_delete(bs);
3393 }
3394 }
3395
3396 struct BdrvOpBlocker {
3397 Error *reason;
3398 QLIST_ENTRY(BdrvOpBlocker) list;
3399 };
3400
3401 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3402 {
3403 BdrvOpBlocker *blocker;
3404 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3405 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3406 blocker = QLIST_FIRST(&bs->op_blockers[op]);
3407 if (errp) {
3408 error_setg(errp, "Node '%s' is busy: %s",
3409 bdrv_get_device_or_node_name(bs),
3410 error_get_pretty(blocker->reason));
3411 }
3412 return true;
3413 }
3414 return false;
3415 }
3416
3417 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3418 {
3419 BdrvOpBlocker *blocker;
3420 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3421
3422 blocker = g_new0(BdrvOpBlocker, 1);
3423 blocker->reason = reason;
3424 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3425 }
3426
3427 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3428 {
3429 BdrvOpBlocker *blocker, *next;
3430 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3431 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3432 if (blocker->reason == reason) {
3433 QLIST_REMOVE(blocker, list);
3434 g_free(blocker);
3435 }
3436 }
3437 }
3438
3439 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3440 {
3441 int i;
3442 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3443 bdrv_op_block(bs, i, reason);
3444 }
3445 }
3446
3447 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3448 {
3449 int i;
3450 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3451 bdrv_op_unblock(bs, i, reason);
3452 }
3453 }
3454
3455 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3456 {
3457 int i;
3458
3459 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3460 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3461 return false;
3462 }
3463 }
3464 return true;
3465 }
3466
3467 void bdrv_iostatus_enable(BlockDriverState *bs)
3468 {
3469 bs->iostatus_enabled = true;
3470 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3471 }
3472
3473 /* The I/O status is only enabled if the drive explicitly
3474 * enables it _and_ the VM is configured to stop on errors */
3475 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3476 {
3477 return (bs->iostatus_enabled &&
3478 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
3479 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
3480 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
3481 }
3482
3483 void bdrv_iostatus_disable(BlockDriverState *bs)
3484 {
3485 bs->iostatus_enabled = false;
3486 }
3487
3488 void bdrv_iostatus_reset(BlockDriverState *bs)
3489 {
3490 if (bdrv_iostatus_is_enabled(bs)) {
3491 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3492 if (bs->job) {
3493 block_job_iostatus_reset(bs->job);
3494 }
3495 }
3496 }
3497
3498 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3499 {
3500 assert(bdrv_iostatus_is_enabled(bs));
3501 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3502 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3503 BLOCK_DEVICE_IO_STATUS_FAILED;
3504 }
3505 }
3506
3507 void bdrv_img_create(const char *filename, const char *fmt,
3508 const char *base_filename, const char *base_fmt,
3509 char *options, uint64_t img_size, int flags,
3510 Error **errp, bool quiet)
3511 {
3512 QemuOptsList *create_opts = NULL;
3513 QemuOpts *opts = NULL;
3514 const char *backing_fmt, *backing_file;
3515 int64_t size;
3516 BlockDriver *drv, *proto_drv;
3517 BlockDriver *backing_drv = NULL;
3518 Error *local_err = NULL;
3519 int ret = 0;
3520
3521 /* Find driver and parse its options */
3522 drv = bdrv_find_format(fmt);
3523 if (!drv) {
3524 error_setg(errp, "Unknown file format '%s'", fmt);
3525 return;
3526 }
3527
3528 proto_drv = bdrv_find_protocol(filename, true, errp);
3529 if (!proto_drv) {
3530 return;
3531 }
3532
3533 if (!drv->create_opts) {
3534 error_setg(errp, "Format driver '%s' does not support image creation",
3535 drv->format_name);
3536 return;
3537 }
3538
3539 if (!proto_drv->create_opts) {
3540 error_setg(errp, "Protocol driver '%s' does not support image creation",
3541 proto_drv->format_name);
3542 return;
3543 }
3544
3545 create_opts = qemu_opts_append(create_opts, drv->create_opts);
3546 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3547
3548 /* Create parameter list with default values */
3549 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3550 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3551
3552 /* Parse -o options */
3553 if (options) {
3554 qemu_opts_do_parse(opts, options, NULL, &local_err);
3555 if (local_err) {
3556 error_report_err(local_err);
3557 local_err = NULL;
3558 error_setg(errp, "Invalid options for file format '%s'", fmt);
3559 goto out;
3560 }
3561 }
3562
3563 if (base_filename) {
3564 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3565 if (local_err) {
3566 error_setg(errp, "Backing file not supported for file format '%s'",
3567 fmt);
3568 goto out;
3569 }
3570 }
3571
3572 if (base_fmt) {
3573 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3574 if (local_err) {
3575 error_setg(errp, "Backing file format not supported for file "
3576 "format '%s'", fmt);
3577 goto out;
3578 }
3579 }
3580
3581 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3582 if (backing_file) {
3583 if (!strcmp(filename, backing_file)) {
3584 error_setg(errp, "Error: Trying to create an image with the "
3585 "same filename as the backing file");
3586 goto out;
3587 }
3588 }
3589
3590 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3591 if (backing_fmt) {
3592 backing_drv = bdrv_find_format(backing_fmt);
3593 if (!backing_drv) {
3594 error_setg(errp, "Unknown backing file format '%s'",
3595 backing_fmt);
3596 goto out;
3597 }
3598 }
3599
3600 // The size for the image must always be specified, with one exception:
3601 // If we are using a backing file, we can obtain the size from there
3602 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3603 if (size == -1) {
3604 if (backing_file) {
3605 BlockDriverState *bs;
3606 char *full_backing = g_new0(char, PATH_MAX);
3607 int64_t size;
3608 int back_flags;
3609
3610 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3611 full_backing, PATH_MAX,
3612 &local_err);
3613 if (local_err) {
3614 g_free(full_backing);
3615 goto out;
3616 }
3617
3618 /* backing files always opened read-only */
3619 back_flags =
3620 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3621
3622 bs = NULL;
3623 ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
3624 backing_drv, &local_err);
3625 g_free(full_backing);
3626 if (ret < 0) {
3627 goto out;
3628 }
3629 size = bdrv_getlength(bs);
3630 if (size < 0) {
3631 error_setg_errno(errp, -size, "Could not get size of '%s'",
3632 backing_file);
3633 bdrv_unref(bs);
3634 goto out;
3635 }
3636
3637 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3638
3639 bdrv_unref(bs);
3640 } else {
3641 error_setg(errp, "Image creation needs a size parameter");
3642 goto out;
3643 }
3644 }
3645
3646 if (!quiet) {
3647 printf("Formatting '%s', fmt=%s", filename, fmt);
3648 qemu_opts_print(opts, " ");
3649 puts("");
3650 }
3651
3652 ret = bdrv_create(drv, filename, opts, &local_err);
3653
3654 if (ret == -EFBIG) {
3655 /* This is generally a better message than whatever the driver would
3656 * deliver (especially because of the cluster_size_hint), since that
3657 * is most probably not much different from "image too large". */
3658 const char *cluster_size_hint = "";
3659 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3660 cluster_size_hint = " (try using a larger cluster size)";
3661 }
3662 error_setg(errp, "The image size is too large for file format '%s'"
3663 "%s", fmt, cluster_size_hint);
3664 error_free(local_err);
3665 local_err = NULL;
3666 }
3667
3668 out:
3669 qemu_opts_del(opts);
3670 qemu_opts_free(create_opts);
3671 if (local_err) {
3672 error_propagate(errp, local_err);
3673 }
3674 }
3675
3676 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3677 {
3678 return bs->aio_context;
3679 }
3680
3681 void bdrv_detach_aio_context(BlockDriverState *bs)
3682 {
3683 BdrvAioNotifier *baf;
3684
3685 if (!bs->drv) {
3686 return;
3687 }
3688
3689 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3690 baf->detach_aio_context(baf->opaque);
3691 }
3692
3693 if (bs->io_limits_enabled) {
3694 throttle_detach_aio_context(&bs->throttle_state);
3695 }
3696 if (bs->drv->bdrv_detach_aio_context) {
3697 bs->drv->bdrv_detach_aio_context(bs);
3698 }
3699 if (bs->file) {
3700 bdrv_detach_aio_context(bs->file);
3701 }
3702 if (bs->backing_hd) {
3703 bdrv_detach_aio_context(bs->backing_hd);
3704 }
3705
3706 bs->aio_context = NULL;
3707 }
3708
3709 void bdrv_attach_aio_context(BlockDriverState *bs,
3710 AioContext *new_context)
3711 {
3712 BdrvAioNotifier *ban;
3713
3714 if (!bs->drv) {
3715 return;
3716 }
3717
3718 bs->aio_context = new_context;
3719
3720 if (bs->backing_hd) {
3721 bdrv_attach_aio_context(bs->backing_hd, new_context);
3722 }
3723 if (bs->file) {
3724 bdrv_attach_aio_context(bs->file, new_context);
3725 }
3726 if (bs->drv->bdrv_attach_aio_context) {
3727 bs->drv->bdrv_attach_aio_context(bs, new_context);
3728 }
3729 if (bs->io_limits_enabled) {
3730 throttle_attach_aio_context(&bs->throttle_state, new_context);
3731 }
3732
3733 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3734 ban->attached_aio_context(new_context, ban->opaque);
3735 }
3736 }
3737
3738 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3739 {
3740 bdrv_drain_all(); /* ensure there are no in-flight requests */
3741
3742 bdrv_detach_aio_context(bs);
3743
3744 /* This function executes in the old AioContext so acquire the new one in
3745 * case it runs in a different thread.
3746 */
3747 aio_context_acquire(new_context);
3748 bdrv_attach_aio_context(bs, new_context);
3749 aio_context_release(new_context);
3750 }
3751
3752 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3753 void (*attached_aio_context)(AioContext *new_context, void *opaque),
3754 void (*detach_aio_context)(void *opaque), void *opaque)
3755 {
3756 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3757 *ban = (BdrvAioNotifier){
3758 .attached_aio_context = attached_aio_context,
3759 .detach_aio_context = detach_aio_context,
3760 .opaque = opaque
3761 };
3762
3763 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3764 }
3765
3766 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3767 void (*attached_aio_context)(AioContext *,
3768 void *),
3769 void (*detach_aio_context)(void *),
3770 void *opaque)
3771 {
3772 BdrvAioNotifier *ban, *ban_next;
3773
3774 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3775 if (ban->attached_aio_context == attached_aio_context &&
3776 ban->detach_aio_context == detach_aio_context &&
3777 ban->opaque == opaque)
3778 {
3779 QLIST_REMOVE(ban, list);
3780 g_free(ban);
3781
3782 return;
3783 }
3784 }
3785
3786 abort();
3787 }
3788
3789 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3790 BlockDriverAmendStatusCB *status_cb)
3791 {
3792 if (!bs->drv->bdrv_amend_options) {
3793 return -ENOTSUP;
3794 }
3795 return bs->drv->bdrv_amend_options(bs, opts, status_cb);
3796 }
3797
3798 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3799 * of block filter and by bdrv_is_first_non_filter.
3800 * It is used to test if the given bs is the candidate or recurse more in the
3801 * node graph.
3802 */
3803 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3804 BlockDriverState *candidate)
3805 {
3806 /* return false if basic checks fails */
3807 if (!bs || !bs->drv) {
3808 return false;
3809 }
3810
3811 /* the code reached a non block filter driver -> check if the bs is
3812 * the same as the candidate. It's the recursion termination condition.
3813 */
3814 if (!bs->drv->is_filter) {
3815 return bs == candidate;
3816 }
3817 /* Down this path the driver is a block filter driver */
3818
3819 /* If the block filter recursion method is defined use it to recurse down
3820 * the node graph.
3821 */
3822 if (bs->drv->bdrv_recurse_is_first_non_filter) {
3823 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3824 }
3825
3826 /* the driver is a block filter but don't allow to recurse -> return false
3827 */
3828 return false;
3829 }
3830
3831 /* This function checks if the candidate is the first non filter bs down it's
3832 * bs chain. Since we don't have pointers to parents it explore all bs chains
3833 * from the top. Some filters can choose not to pass down the recursion.
3834 */
3835 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3836 {
3837 BlockDriverState *bs;
3838
3839 /* walk down the bs forest recursively */
3840 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3841 bool perm;
3842
3843 /* try to recurse in this top level bs */
3844 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3845
3846 /* candidate is the first non filter */
3847 if (perm) {
3848 return true;
3849 }
3850 }
3851
3852 return false;
3853 }
3854
3855 BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
3856 {
3857 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3858 AioContext *aio_context;
3859
3860 if (!to_replace_bs) {
3861 error_setg(errp, "Node name '%s' not found", node_name);
3862 return NULL;
3863 }
3864
3865 aio_context = bdrv_get_aio_context(to_replace_bs);
3866 aio_context_acquire(aio_context);
3867
3868 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3869 to_replace_bs = NULL;
3870 goto out;
3871 }
3872
3873 /* We don't want arbitrary node of the BDS chain to be replaced only the top
3874 * most non filter in order to prevent data corruption.
3875 * Another benefit is that this tests exclude backing files which are
3876 * blocked by the backing blockers.
3877 */
3878 if (!bdrv_is_first_non_filter(to_replace_bs)) {
3879 error_setg(errp, "Only top most non filter can be replaced");
3880 to_replace_bs = NULL;
3881 goto out;
3882 }
3883
3884 out:
3885 aio_context_release(aio_context);
3886 return to_replace_bs;
3887 }
3888
3889 static bool append_open_options(QDict *d, BlockDriverState *bs)
3890 {
3891 const QDictEntry *entry;
3892 bool found_any = false;
3893
3894 for (entry = qdict_first(bs->options); entry;
3895 entry = qdict_next(bs->options, entry))
3896 {
3897 /* Only take options for this level and exclude all non-driver-specific
3898 * options */
3899 if (!strchr(qdict_entry_key(entry), '.') &&
3900 strcmp(qdict_entry_key(entry), "node-name"))
3901 {
3902 qobject_incref(qdict_entry_value(entry));
3903 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3904 found_any = true;
3905 }
3906 }
3907
3908 return found_any;
3909 }
3910
3911 /* Updates the following BDS fields:
3912 * - exact_filename: A filename which may be used for opening a block device
3913 * which (mostly) equals the given BDS (even without any
3914 * other options; so reading and writing must return the same
3915 * results, but caching etc. may be different)
3916 * - full_open_options: Options which, when given when opening a block device
3917 * (without a filename), result in a BDS (mostly)
3918 * equalling the given one
3919 * - filename: If exact_filename is set, it is copied here. Otherwise,
3920 * full_open_options is converted to a JSON object, prefixed with
3921 * "json:" (for use through the JSON pseudo protocol) and put here.
3922 */
3923 void bdrv_refresh_filename(BlockDriverState *bs)
3924 {
3925 BlockDriver *drv = bs->drv;
3926 QDict *opts;
3927
3928 if (!drv) {
3929 return;
3930 }
3931
3932 /* This BDS's file name will most probably depend on its file's name, so
3933 * refresh that first */
3934 if (bs->file) {
3935 bdrv_refresh_filename(bs->file);
3936 }
3937
3938 if (drv->bdrv_refresh_filename) {
3939 /* Obsolete information is of no use here, so drop the old file name
3940 * information before refreshing it */
3941 bs->exact_filename[0] = '\0';
3942 if (bs->full_open_options) {
3943 QDECREF(bs->full_open_options);
3944 bs->full_open_options = NULL;
3945 }
3946
3947 drv->bdrv_refresh_filename(bs);
3948 } else if (bs->file) {
3949 /* Try to reconstruct valid information from the underlying file */
3950 bool has_open_options;
3951
3952 bs->exact_filename[0] = '\0';
3953 if (bs->full_open_options) {
3954 QDECREF(bs->full_open_options);
3955 bs->full_open_options = NULL;
3956 }
3957
3958 opts = qdict_new();
3959 has_open_options = append_open_options(opts, bs);
3960
3961 /* If no specific options have been given for this BDS, the filename of
3962 * the underlying file should suffice for this one as well */
3963 if (bs->file->exact_filename[0] && !has_open_options) {
3964 strcpy(bs->exact_filename, bs->file->exact_filename);
3965 }
3966 /* Reconstructing the full options QDict is simple for most format block
3967 * drivers, as long as the full options are known for the underlying
3968 * file BDS. The full options QDict of that file BDS should somehow
3969 * contain a representation of the filename, therefore the following
3970 * suffices without querying the (exact_)filename of this BDS. */
3971 if (bs->file->full_open_options) {
3972 qdict_put_obj(opts, "driver",
3973 QOBJECT(qstring_from_str(drv->format_name)));
3974 QINCREF(bs->file->full_open_options);
3975 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
3976
3977 bs->full_open_options = opts;
3978 } else {
3979 QDECREF(opts);
3980 }
3981 } else if (!bs->full_open_options && qdict_size(bs->options)) {
3982 /* There is no underlying file BDS (at least referenced by BDS.file),
3983 * so the full options QDict should be equal to the options given
3984 * specifically for this block device when it was opened (plus the
3985 * driver specification).
3986 * Because those options don't change, there is no need to update
3987 * full_open_options when it's already set. */
3988
3989 opts = qdict_new();
3990 append_open_options(opts, bs);
3991 qdict_put_obj(opts, "driver",
3992 QOBJECT(qstring_from_str(drv->format_name)));
3993
3994 if (bs->exact_filename[0]) {
3995 /* This may not work for all block protocol drivers (some may
3996 * require this filename to be parsed), but we have to find some
3997 * default solution here, so just include it. If some block driver
3998 * does not support pure options without any filename at all or
3999 * needs some special format of the options QDict, it needs to
4000 * implement the driver-specific bdrv_refresh_filename() function.
4001 */
4002 qdict_put_obj(opts, "filename",
4003 QOBJECT(qstring_from_str(bs->exact_filename)));
4004 }
4005
4006 bs->full_open_options = opts;
4007 }
4008
4009 if (bs->exact_filename[0]) {
4010 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4011 } else if (bs->full_open_options) {
4012 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4013 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4014 qstring_get_str(json));
4015 QDECREF(json);
4016 }
4017 }
4018
4019 /* This accessor function purpose is to allow the device models to access the
4020 * BlockAcctStats structure embedded inside a BlockDriverState without being
4021 * aware of the BlockDriverState structure layout.
4022 * It will go away when the BlockAcctStats structure will be moved inside
4023 * the device models.
4024 */
4025 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
4026 {
4027 return &bs->stats;
4028 }