]> git.proxmox.com Git - qemu.git/blob - block.c
Merge remote-tracking branch 'kwolf/for-anthony' into staging
[qemu.git] / block.c
1 /*
2 * QEMU System Emulator block driver
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "monitor.h"
28 #include "block_int.h"
29 #include "module.h"
30 #include "qemu-objects.h"
31 #include "qemu-coroutine.h"
32
33 #ifdef CONFIG_BSD
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <sys/ioctl.h>
37 #include <sys/queue.h>
38 #ifndef __DragonFly__
39 #include <sys/disk.h>
40 #endif
41 #endif
42
43 #ifdef _WIN32
44 #include <windows.h>
45 #endif
46
47 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
48
49 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
50 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
51 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
52 BlockDriverCompletionFunc *cb, void *opaque);
53 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
54 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
55 BlockDriverCompletionFunc *cb, void *opaque);
56 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
57 BlockDriverCompletionFunc *cb, void *opaque);
58 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
59 BlockDriverCompletionFunc *cb, void *opaque);
60 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
61 int64_t sector_num, int nb_sectors,
62 QEMUIOVector *iov);
63 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors,
65 QEMUIOVector *iov);
66 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
67 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
68 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
69 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
70 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
71 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
72 int64_t sector_num,
73 QEMUIOVector *qiov,
74 int nb_sectors,
75 BlockDriverCompletionFunc *cb,
76 void *opaque,
77 bool is_write);
78 static void coroutine_fn bdrv_co_do_rw(void *opaque);
79
80 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
81 QTAILQ_HEAD_INITIALIZER(bdrv_states);
82
83 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
84 QLIST_HEAD_INITIALIZER(bdrv_drivers);
85
86 /* The device to use for VM snapshots */
87 static BlockDriverState *bs_snapshots;
88
89 /* If non-zero, use only whitelisted block drivers */
90 static int use_bdrv_whitelist;
91
92 #ifdef _WIN32
93 static int is_windows_drive_prefix(const char *filename)
94 {
95 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
96 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
97 filename[1] == ':');
98 }
99
100 int is_windows_drive(const char *filename)
101 {
102 if (is_windows_drive_prefix(filename) &&
103 filename[2] == '\0')
104 return 1;
105 if (strstart(filename, "\\\\.\\", NULL) ||
106 strstart(filename, "//./", NULL))
107 return 1;
108 return 0;
109 }
110 #endif
111
112 /* check if the path starts with "<protocol>:" */
113 static int path_has_protocol(const char *path)
114 {
115 #ifdef _WIN32
116 if (is_windows_drive(path) ||
117 is_windows_drive_prefix(path)) {
118 return 0;
119 }
120 #endif
121
122 return strchr(path, ':') != NULL;
123 }
124
125 int path_is_absolute(const char *path)
126 {
127 const char *p;
128 #ifdef _WIN32
129 /* specific case for names like: "\\.\d:" */
130 if (*path == '/' || *path == '\\')
131 return 1;
132 #endif
133 p = strchr(path, ':');
134 if (p)
135 p++;
136 else
137 p = path;
138 #ifdef _WIN32
139 return (*p == '/' || *p == '\\');
140 #else
141 return (*p == '/');
142 #endif
143 }
144
145 /* if filename is absolute, just copy it to dest. Otherwise, build a
146 path to it by considering it is relative to base_path. URL are
147 supported. */
148 void path_combine(char *dest, int dest_size,
149 const char *base_path,
150 const char *filename)
151 {
152 const char *p, *p1;
153 int len;
154
155 if (dest_size <= 0)
156 return;
157 if (path_is_absolute(filename)) {
158 pstrcpy(dest, dest_size, filename);
159 } else {
160 p = strchr(base_path, ':');
161 if (p)
162 p++;
163 else
164 p = base_path;
165 p1 = strrchr(base_path, '/');
166 #ifdef _WIN32
167 {
168 const char *p2;
169 p2 = strrchr(base_path, '\\');
170 if (!p1 || p2 > p1)
171 p1 = p2;
172 }
173 #endif
174 if (p1)
175 p1++;
176 else
177 p1 = base_path;
178 if (p1 > p)
179 p = p1;
180 len = p - base_path;
181 if (len > dest_size - 1)
182 len = dest_size - 1;
183 memcpy(dest, base_path, len);
184 dest[len] = '\0';
185 pstrcat(dest, dest_size, filename);
186 }
187 }
188
189 void bdrv_register(BlockDriver *bdrv)
190 {
191 /* Block drivers without coroutine functions need emulation */
192 if (!bdrv->bdrv_co_readv) {
193 bdrv->bdrv_co_readv = bdrv_co_readv_em;
194 bdrv->bdrv_co_writev = bdrv_co_writev_em;
195
196 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
197 * the block driver lacks aio we need to emulate that too.
198 */
199 if (!bdrv->bdrv_aio_readv) {
200 /* add AIO emulation layer */
201 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
202 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
203 }
204 }
205
206 if (!bdrv->bdrv_aio_flush)
207 bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
208
209 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
210 }
211
212 /* create a new block device (by default it is empty) */
213 BlockDriverState *bdrv_new(const char *device_name)
214 {
215 BlockDriverState *bs;
216
217 bs = g_malloc0(sizeof(BlockDriverState));
218 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
219 if (device_name[0] != '\0') {
220 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
221 }
222 bdrv_iostatus_disable(bs);
223 return bs;
224 }
225
226 BlockDriver *bdrv_find_format(const char *format_name)
227 {
228 BlockDriver *drv1;
229 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
230 if (!strcmp(drv1->format_name, format_name)) {
231 return drv1;
232 }
233 }
234 return NULL;
235 }
236
237 static int bdrv_is_whitelisted(BlockDriver *drv)
238 {
239 static const char *whitelist[] = {
240 CONFIG_BDRV_WHITELIST
241 };
242 const char **p;
243
244 if (!whitelist[0])
245 return 1; /* no whitelist, anything goes */
246
247 for (p = whitelist; *p; p++) {
248 if (!strcmp(drv->format_name, *p)) {
249 return 1;
250 }
251 }
252 return 0;
253 }
254
255 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
256 {
257 BlockDriver *drv = bdrv_find_format(format_name);
258 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
259 }
260
261 int bdrv_create(BlockDriver *drv, const char* filename,
262 QEMUOptionParameter *options)
263 {
264 if (!drv->bdrv_create)
265 return -ENOTSUP;
266
267 return drv->bdrv_create(filename, options);
268 }
269
270 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
271 {
272 BlockDriver *drv;
273
274 drv = bdrv_find_protocol(filename);
275 if (drv == NULL) {
276 return -ENOENT;
277 }
278
279 return bdrv_create(drv, filename, options);
280 }
281
282 #ifdef _WIN32
283 void get_tmp_filename(char *filename, int size)
284 {
285 char temp_dir[MAX_PATH];
286
287 GetTempPath(MAX_PATH, temp_dir);
288 GetTempFileName(temp_dir, "qem", 0, filename);
289 }
290 #else
291 void get_tmp_filename(char *filename, int size)
292 {
293 int fd;
294 const char *tmpdir;
295 /* XXX: race condition possible */
296 tmpdir = getenv("TMPDIR");
297 if (!tmpdir)
298 tmpdir = "/tmp";
299 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
300 fd = mkstemp(filename);
301 close(fd);
302 }
303 #endif
304
305 /*
306 * Detect host devices. By convention, /dev/cdrom[N] is always
307 * recognized as a host CDROM.
308 */
309 static BlockDriver *find_hdev_driver(const char *filename)
310 {
311 int score_max = 0, score;
312 BlockDriver *drv = NULL, *d;
313
314 QLIST_FOREACH(d, &bdrv_drivers, list) {
315 if (d->bdrv_probe_device) {
316 score = d->bdrv_probe_device(filename);
317 if (score > score_max) {
318 score_max = score;
319 drv = d;
320 }
321 }
322 }
323
324 return drv;
325 }
326
327 BlockDriver *bdrv_find_protocol(const char *filename)
328 {
329 BlockDriver *drv1;
330 char protocol[128];
331 int len;
332 const char *p;
333
334 /* TODO Drivers without bdrv_file_open must be specified explicitly */
335
336 /*
337 * XXX(hch): we really should not let host device detection
338 * override an explicit protocol specification, but moving this
339 * later breaks access to device names with colons in them.
340 * Thanks to the brain-dead persistent naming schemes on udev-
341 * based Linux systems those actually are quite common.
342 */
343 drv1 = find_hdev_driver(filename);
344 if (drv1) {
345 return drv1;
346 }
347
348 if (!path_has_protocol(filename)) {
349 return bdrv_find_format("file");
350 }
351 p = strchr(filename, ':');
352 assert(p != NULL);
353 len = p - filename;
354 if (len > sizeof(protocol) - 1)
355 len = sizeof(protocol) - 1;
356 memcpy(protocol, filename, len);
357 protocol[len] = '\0';
358 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
359 if (drv1->protocol_name &&
360 !strcmp(drv1->protocol_name, protocol)) {
361 return drv1;
362 }
363 }
364 return NULL;
365 }
366
367 static int find_image_format(const char *filename, BlockDriver **pdrv)
368 {
369 int ret, score, score_max;
370 BlockDriver *drv1, *drv;
371 uint8_t buf[2048];
372 BlockDriverState *bs;
373
374 ret = bdrv_file_open(&bs, filename, 0);
375 if (ret < 0) {
376 *pdrv = NULL;
377 return ret;
378 }
379
380 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
381 if (bs->sg || !bdrv_is_inserted(bs)) {
382 bdrv_delete(bs);
383 drv = bdrv_find_format("raw");
384 if (!drv) {
385 ret = -ENOENT;
386 }
387 *pdrv = drv;
388 return ret;
389 }
390
391 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
392 bdrv_delete(bs);
393 if (ret < 0) {
394 *pdrv = NULL;
395 return ret;
396 }
397
398 score_max = 0;
399 drv = NULL;
400 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
401 if (drv1->bdrv_probe) {
402 score = drv1->bdrv_probe(buf, ret, filename);
403 if (score > score_max) {
404 score_max = score;
405 drv = drv1;
406 }
407 }
408 }
409 if (!drv) {
410 ret = -ENOENT;
411 }
412 *pdrv = drv;
413 return ret;
414 }
415
416 /**
417 * Set the current 'total_sectors' value
418 */
419 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
420 {
421 BlockDriver *drv = bs->drv;
422
423 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
424 if (bs->sg)
425 return 0;
426
427 /* query actual device if possible, otherwise just trust the hint */
428 if (drv->bdrv_getlength) {
429 int64_t length = drv->bdrv_getlength(bs);
430 if (length < 0) {
431 return length;
432 }
433 hint = length >> BDRV_SECTOR_BITS;
434 }
435
436 bs->total_sectors = hint;
437 return 0;
438 }
439
440 /**
441 * Set open flags for a given cache mode
442 *
443 * Return 0 on success, -1 if the cache mode was invalid.
444 */
445 int bdrv_parse_cache_flags(const char *mode, int *flags)
446 {
447 *flags &= ~BDRV_O_CACHE_MASK;
448
449 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
450 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
451 } else if (!strcmp(mode, "directsync")) {
452 *flags |= BDRV_O_NOCACHE;
453 } else if (!strcmp(mode, "writeback")) {
454 *flags |= BDRV_O_CACHE_WB;
455 } else if (!strcmp(mode, "unsafe")) {
456 *flags |= BDRV_O_CACHE_WB;
457 *flags |= BDRV_O_NO_FLUSH;
458 } else if (!strcmp(mode, "writethrough")) {
459 /* this is the default */
460 } else {
461 return -1;
462 }
463
464 return 0;
465 }
466
467 /*
468 * Common part for opening disk images and files
469 */
470 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
471 int flags, BlockDriver *drv)
472 {
473 int ret, open_flags;
474
475 assert(drv != NULL);
476
477 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
478
479 bs->file = NULL;
480 bs->total_sectors = 0;
481 bs->encrypted = 0;
482 bs->valid_key = 0;
483 bs->open_flags = flags;
484 bs->buffer_alignment = 512;
485
486 pstrcpy(bs->filename, sizeof(bs->filename), filename);
487
488 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
489 return -ENOTSUP;
490 }
491
492 bs->drv = drv;
493 bs->opaque = g_malloc0(drv->instance_size);
494
495 if (flags & BDRV_O_CACHE_WB)
496 bs->enable_write_cache = 1;
497
498 /*
499 * Clear flags that are internal to the block layer before opening the
500 * image.
501 */
502 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
503
504 /*
505 * Snapshots should be writable.
506 */
507 if (bs->is_temporary) {
508 open_flags |= BDRV_O_RDWR;
509 }
510
511 /* Open the image, either directly or using a protocol */
512 if (drv->bdrv_file_open) {
513 ret = drv->bdrv_file_open(bs, filename, open_flags);
514 } else {
515 ret = bdrv_file_open(&bs->file, filename, open_flags);
516 if (ret >= 0) {
517 ret = drv->bdrv_open(bs, open_flags);
518 }
519 }
520
521 if (ret < 0) {
522 goto free_and_fail;
523 }
524
525 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
526
527 ret = refresh_total_sectors(bs, bs->total_sectors);
528 if (ret < 0) {
529 goto free_and_fail;
530 }
531
532 #ifndef _WIN32
533 if (bs->is_temporary) {
534 unlink(filename);
535 }
536 #endif
537 return 0;
538
539 free_and_fail:
540 if (bs->file) {
541 bdrv_delete(bs->file);
542 bs->file = NULL;
543 }
544 g_free(bs->opaque);
545 bs->opaque = NULL;
546 bs->drv = NULL;
547 return ret;
548 }
549
550 /*
551 * Opens a file using a protocol (file, host_device, nbd, ...)
552 */
553 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
554 {
555 BlockDriverState *bs;
556 BlockDriver *drv;
557 int ret;
558
559 drv = bdrv_find_protocol(filename);
560 if (!drv) {
561 return -ENOENT;
562 }
563
564 bs = bdrv_new("");
565 ret = bdrv_open_common(bs, filename, flags, drv);
566 if (ret < 0) {
567 bdrv_delete(bs);
568 return ret;
569 }
570 bs->growable = 1;
571 *pbs = bs;
572 return 0;
573 }
574
575 /*
576 * Opens a disk image (raw, qcow2, vmdk, ...)
577 */
578 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
579 BlockDriver *drv)
580 {
581 int ret;
582
583 if (flags & BDRV_O_SNAPSHOT) {
584 BlockDriverState *bs1;
585 int64_t total_size;
586 int is_protocol = 0;
587 BlockDriver *bdrv_qcow2;
588 QEMUOptionParameter *options;
589 char tmp_filename[PATH_MAX];
590 char backing_filename[PATH_MAX];
591
592 /* if snapshot, we create a temporary backing file and open it
593 instead of opening 'filename' directly */
594
595 /* if there is a backing file, use it */
596 bs1 = bdrv_new("");
597 ret = bdrv_open(bs1, filename, 0, drv);
598 if (ret < 0) {
599 bdrv_delete(bs1);
600 return ret;
601 }
602 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
603
604 if (bs1->drv && bs1->drv->protocol_name)
605 is_protocol = 1;
606
607 bdrv_delete(bs1);
608
609 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
610
611 /* Real path is meaningless for protocols */
612 if (is_protocol)
613 snprintf(backing_filename, sizeof(backing_filename),
614 "%s", filename);
615 else if (!realpath(filename, backing_filename))
616 return -errno;
617
618 bdrv_qcow2 = bdrv_find_format("qcow2");
619 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
620
621 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
622 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
623 if (drv) {
624 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
625 drv->format_name);
626 }
627
628 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
629 free_option_parameters(options);
630 if (ret < 0) {
631 return ret;
632 }
633
634 filename = tmp_filename;
635 drv = bdrv_qcow2;
636 bs->is_temporary = 1;
637 }
638
639 /* Find the right image format driver */
640 if (!drv) {
641 ret = find_image_format(filename, &drv);
642 }
643
644 if (!drv) {
645 goto unlink_and_fail;
646 }
647
648 /* Open the image */
649 ret = bdrv_open_common(bs, filename, flags, drv);
650 if (ret < 0) {
651 goto unlink_and_fail;
652 }
653
654 /* If there is a backing file, use it */
655 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
656 char backing_filename[PATH_MAX];
657 int back_flags;
658 BlockDriver *back_drv = NULL;
659
660 bs->backing_hd = bdrv_new("");
661
662 if (path_has_protocol(bs->backing_file)) {
663 pstrcpy(backing_filename, sizeof(backing_filename),
664 bs->backing_file);
665 } else {
666 path_combine(backing_filename, sizeof(backing_filename),
667 filename, bs->backing_file);
668 }
669
670 if (bs->backing_format[0] != '\0') {
671 back_drv = bdrv_find_format(bs->backing_format);
672 }
673
674 /* backing files always opened read-only */
675 back_flags =
676 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
677
678 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
679 if (ret < 0) {
680 bdrv_close(bs);
681 return ret;
682 }
683 if (bs->is_temporary) {
684 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
685 } else {
686 /* base image inherits from "parent" */
687 bs->backing_hd->keep_read_only = bs->keep_read_only;
688 }
689 }
690
691 if (!bdrv_key_required(bs)) {
692 bdrv_dev_change_media_cb(bs, true);
693 }
694
695 return 0;
696
697 unlink_and_fail:
698 if (bs->is_temporary) {
699 unlink(filename);
700 }
701 return ret;
702 }
703
704 void bdrv_close(BlockDriverState *bs)
705 {
706 if (bs->drv) {
707 if (bs == bs_snapshots) {
708 bs_snapshots = NULL;
709 }
710 if (bs->backing_hd) {
711 bdrv_delete(bs->backing_hd);
712 bs->backing_hd = NULL;
713 }
714 bs->drv->bdrv_close(bs);
715 g_free(bs->opaque);
716 #ifdef _WIN32
717 if (bs->is_temporary) {
718 unlink(bs->filename);
719 }
720 #endif
721 bs->opaque = NULL;
722 bs->drv = NULL;
723
724 if (bs->file != NULL) {
725 bdrv_close(bs->file);
726 }
727
728 bdrv_dev_change_media_cb(bs, false);
729 }
730 }
731
732 void bdrv_close_all(void)
733 {
734 BlockDriverState *bs;
735
736 QTAILQ_FOREACH(bs, &bdrv_states, list) {
737 bdrv_close(bs);
738 }
739 }
740
741 /* make a BlockDriverState anonymous by removing from bdrv_state list.
742 Also, NULL terminate the device_name to prevent double remove */
743 void bdrv_make_anon(BlockDriverState *bs)
744 {
745 if (bs->device_name[0] != '\0') {
746 QTAILQ_REMOVE(&bdrv_states, bs, list);
747 }
748 bs->device_name[0] = '\0';
749 }
750
751 void bdrv_delete(BlockDriverState *bs)
752 {
753 assert(!bs->dev);
754
755 /* remove from list, if necessary */
756 bdrv_make_anon(bs);
757
758 bdrv_close(bs);
759 if (bs->file != NULL) {
760 bdrv_delete(bs->file);
761 }
762
763 assert(bs != bs_snapshots);
764 g_free(bs);
765 }
766
767 int bdrv_attach_dev(BlockDriverState *bs, void *dev)
768 /* TODO change to DeviceState *dev when all users are qdevified */
769 {
770 if (bs->dev) {
771 return -EBUSY;
772 }
773 bs->dev = dev;
774 bdrv_iostatus_reset(bs);
775 return 0;
776 }
777
778 /* TODO qdevified devices don't use this, remove when devices are qdevified */
779 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
780 {
781 if (bdrv_attach_dev(bs, dev) < 0) {
782 abort();
783 }
784 }
785
786 void bdrv_detach_dev(BlockDriverState *bs, void *dev)
787 /* TODO change to DeviceState *dev when all users are qdevified */
788 {
789 assert(bs->dev == dev);
790 bs->dev = NULL;
791 bs->dev_ops = NULL;
792 bs->dev_opaque = NULL;
793 bs->buffer_alignment = 512;
794 }
795
796 /* TODO change to return DeviceState * when all users are qdevified */
797 void *bdrv_get_attached_dev(BlockDriverState *bs)
798 {
799 return bs->dev;
800 }
801
802 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
803 void *opaque)
804 {
805 bs->dev_ops = ops;
806 bs->dev_opaque = opaque;
807 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
808 bs_snapshots = NULL;
809 }
810 }
811
812 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
813 {
814 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
815 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
816 }
817 }
818
819 bool bdrv_dev_has_removable_media(BlockDriverState *bs)
820 {
821 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
822 }
823
824 bool bdrv_dev_is_tray_open(BlockDriverState *bs)
825 {
826 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
827 return bs->dev_ops->is_tray_open(bs->dev_opaque);
828 }
829 return false;
830 }
831
832 static void bdrv_dev_resize_cb(BlockDriverState *bs)
833 {
834 if (bs->dev_ops && bs->dev_ops->resize_cb) {
835 bs->dev_ops->resize_cb(bs->dev_opaque);
836 }
837 }
838
839 bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
840 {
841 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
842 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
843 }
844 return false;
845 }
846
847 /*
848 * Run consistency checks on an image
849 *
850 * Returns 0 if the check could be completed (it doesn't mean that the image is
851 * free of errors) or -errno when an internal error occurred. The results of the
852 * check are stored in res.
853 */
854 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
855 {
856 if (bs->drv->bdrv_check == NULL) {
857 return -ENOTSUP;
858 }
859
860 memset(res, 0, sizeof(*res));
861 return bs->drv->bdrv_check(bs, res);
862 }
863
864 #define COMMIT_BUF_SECTORS 2048
865
866 /* commit COW file into the raw image */
867 int bdrv_commit(BlockDriverState *bs)
868 {
869 BlockDriver *drv = bs->drv;
870 BlockDriver *backing_drv;
871 int64_t sector, total_sectors;
872 int n, ro, open_flags;
873 int ret = 0, rw_ret = 0;
874 uint8_t *buf;
875 char filename[1024];
876 BlockDriverState *bs_rw, *bs_ro;
877
878 if (!drv)
879 return -ENOMEDIUM;
880
881 if (!bs->backing_hd) {
882 return -ENOTSUP;
883 }
884
885 if (bs->backing_hd->keep_read_only) {
886 return -EACCES;
887 }
888
889 backing_drv = bs->backing_hd->drv;
890 ro = bs->backing_hd->read_only;
891 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
892 open_flags = bs->backing_hd->open_flags;
893
894 if (ro) {
895 /* re-open as RW */
896 bdrv_delete(bs->backing_hd);
897 bs->backing_hd = NULL;
898 bs_rw = bdrv_new("");
899 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
900 backing_drv);
901 if (rw_ret < 0) {
902 bdrv_delete(bs_rw);
903 /* try to re-open read-only */
904 bs_ro = bdrv_new("");
905 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
906 backing_drv);
907 if (ret < 0) {
908 bdrv_delete(bs_ro);
909 /* drive not functional anymore */
910 bs->drv = NULL;
911 return ret;
912 }
913 bs->backing_hd = bs_ro;
914 return rw_ret;
915 }
916 bs->backing_hd = bs_rw;
917 }
918
919 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
920 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
921
922 for (sector = 0; sector < total_sectors; sector += n) {
923 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
924
925 if (bdrv_read(bs, sector, buf, n) != 0) {
926 ret = -EIO;
927 goto ro_cleanup;
928 }
929
930 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
931 ret = -EIO;
932 goto ro_cleanup;
933 }
934 }
935 }
936
937 if (drv->bdrv_make_empty) {
938 ret = drv->bdrv_make_empty(bs);
939 bdrv_flush(bs);
940 }
941
942 /*
943 * Make sure all data we wrote to the backing device is actually
944 * stable on disk.
945 */
946 if (bs->backing_hd)
947 bdrv_flush(bs->backing_hd);
948
949 ro_cleanup:
950 g_free(buf);
951
952 if (ro) {
953 /* re-open as RO */
954 bdrv_delete(bs->backing_hd);
955 bs->backing_hd = NULL;
956 bs_ro = bdrv_new("");
957 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
958 backing_drv);
959 if (ret < 0) {
960 bdrv_delete(bs_ro);
961 /* drive not functional anymore */
962 bs->drv = NULL;
963 return ret;
964 }
965 bs->backing_hd = bs_ro;
966 bs->backing_hd->keep_read_only = 0;
967 }
968
969 return ret;
970 }
971
972 void bdrv_commit_all(void)
973 {
974 BlockDriverState *bs;
975
976 QTAILQ_FOREACH(bs, &bdrv_states, list) {
977 bdrv_commit(bs);
978 }
979 }
980
981 /*
982 * Return values:
983 * 0 - success
984 * -EINVAL - backing format specified, but no file
985 * -ENOSPC - can't update the backing file because no space is left in the
986 * image file header
987 * -ENOTSUP - format driver doesn't support changing the backing file
988 */
989 int bdrv_change_backing_file(BlockDriverState *bs,
990 const char *backing_file, const char *backing_fmt)
991 {
992 BlockDriver *drv = bs->drv;
993
994 if (drv->bdrv_change_backing_file != NULL) {
995 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
996 } else {
997 return -ENOTSUP;
998 }
999 }
1000
1001 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1002 size_t size)
1003 {
1004 int64_t len;
1005
1006 if (!bdrv_is_inserted(bs))
1007 return -ENOMEDIUM;
1008
1009 if (bs->growable)
1010 return 0;
1011
1012 len = bdrv_getlength(bs);
1013
1014 if (offset < 0)
1015 return -EIO;
1016
1017 if ((offset > len) || (len - offset < size))
1018 return -EIO;
1019
1020 return 0;
1021 }
1022
1023 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1024 int nb_sectors)
1025 {
1026 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1027 nb_sectors * BDRV_SECTOR_SIZE);
1028 }
1029
1030 static inline bool bdrv_has_async_flush(BlockDriver *drv)
1031 {
1032 return drv->bdrv_aio_flush != bdrv_aio_flush_em;
1033 }
1034
1035 typedef struct RwCo {
1036 BlockDriverState *bs;
1037 int64_t sector_num;
1038 int nb_sectors;
1039 QEMUIOVector *qiov;
1040 bool is_write;
1041 int ret;
1042 } RwCo;
1043
1044 static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1045 {
1046 RwCo *rwco = opaque;
1047
1048 if (!rwco->is_write) {
1049 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1050 rwco->nb_sectors, rwco->qiov);
1051 } else {
1052 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1053 rwco->nb_sectors, rwco->qiov);
1054 }
1055 }
1056
1057 /*
1058 * Process a synchronous request using coroutines
1059 */
1060 static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1061 int nb_sectors, bool is_write)
1062 {
1063 QEMUIOVector qiov;
1064 struct iovec iov = {
1065 .iov_base = (void *)buf,
1066 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1067 };
1068 Coroutine *co;
1069 RwCo rwco = {
1070 .bs = bs,
1071 .sector_num = sector_num,
1072 .nb_sectors = nb_sectors,
1073 .qiov = &qiov,
1074 .is_write = is_write,
1075 .ret = NOT_DONE,
1076 };
1077
1078 qemu_iovec_init_external(&qiov, &iov, 1);
1079
1080 if (qemu_in_coroutine()) {
1081 /* Fast-path if already in coroutine context */
1082 bdrv_rw_co_entry(&rwco);
1083 } else {
1084 co = qemu_coroutine_create(bdrv_rw_co_entry);
1085 qemu_coroutine_enter(co, &rwco);
1086 while (rwco.ret == NOT_DONE) {
1087 qemu_aio_wait();
1088 }
1089 }
1090 return rwco.ret;
1091 }
1092
1093 /* return < 0 if error. See bdrv_write() for the return codes */
1094 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1095 uint8_t *buf, int nb_sectors)
1096 {
1097 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
1098 }
1099
1100 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1101 int nb_sectors, int dirty)
1102 {
1103 int64_t start, end;
1104 unsigned long val, idx, bit;
1105
1106 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1107 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1108
1109 for (; start <= end; start++) {
1110 idx = start / (sizeof(unsigned long) * 8);
1111 bit = start % (sizeof(unsigned long) * 8);
1112 val = bs->dirty_bitmap[idx];
1113 if (dirty) {
1114 if (!(val & (1UL << bit))) {
1115 bs->dirty_count++;
1116 val |= 1UL << bit;
1117 }
1118 } else {
1119 if (val & (1UL << bit)) {
1120 bs->dirty_count--;
1121 val &= ~(1UL << bit);
1122 }
1123 }
1124 bs->dirty_bitmap[idx] = val;
1125 }
1126 }
1127
1128 /* Return < 0 if error. Important errors are:
1129 -EIO generic I/O error (may happen for all errors)
1130 -ENOMEDIUM No media inserted.
1131 -EINVAL Invalid sector number or nb_sectors
1132 -EACCES Trying to write a read-only device
1133 */
1134 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1135 const uint8_t *buf, int nb_sectors)
1136 {
1137 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
1138 }
1139
1140 int bdrv_pread(BlockDriverState *bs, int64_t offset,
1141 void *buf, int count1)
1142 {
1143 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1144 int len, nb_sectors, count;
1145 int64_t sector_num;
1146 int ret;
1147
1148 count = count1;
1149 /* first read to align to sector start */
1150 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1151 if (len > count)
1152 len = count;
1153 sector_num = offset >> BDRV_SECTOR_BITS;
1154 if (len > 0) {
1155 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1156 return ret;
1157 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1158 count -= len;
1159 if (count == 0)
1160 return count1;
1161 sector_num++;
1162 buf += len;
1163 }
1164
1165 /* read the sectors "in place" */
1166 nb_sectors = count >> BDRV_SECTOR_BITS;
1167 if (nb_sectors > 0) {
1168 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1169 return ret;
1170 sector_num += nb_sectors;
1171 len = nb_sectors << BDRV_SECTOR_BITS;
1172 buf += len;
1173 count -= len;
1174 }
1175
1176 /* add data from the last sector */
1177 if (count > 0) {
1178 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1179 return ret;
1180 memcpy(buf, tmp_buf, count);
1181 }
1182 return count1;
1183 }
1184
1185 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1186 const void *buf, int count1)
1187 {
1188 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1189 int len, nb_sectors, count;
1190 int64_t sector_num;
1191 int ret;
1192
1193 count = count1;
1194 /* first write to align to sector start */
1195 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1196 if (len > count)
1197 len = count;
1198 sector_num = offset >> BDRV_SECTOR_BITS;
1199 if (len > 0) {
1200 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1201 return ret;
1202 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1203 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1204 return ret;
1205 count -= len;
1206 if (count == 0)
1207 return count1;
1208 sector_num++;
1209 buf += len;
1210 }
1211
1212 /* write the sectors "in place" */
1213 nb_sectors = count >> BDRV_SECTOR_BITS;
1214 if (nb_sectors > 0) {
1215 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1216 return ret;
1217 sector_num += nb_sectors;
1218 len = nb_sectors << BDRV_SECTOR_BITS;
1219 buf += len;
1220 count -= len;
1221 }
1222
1223 /* add data from the last sector */
1224 if (count > 0) {
1225 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1226 return ret;
1227 memcpy(tmp_buf, buf, count);
1228 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1229 return ret;
1230 }
1231 return count1;
1232 }
1233
1234 /*
1235 * Writes to the file and ensures that no writes are reordered across this
1236 * request (acts as a barrier)
1237 *
1238 * Returns 0 on success, -errno in error cases.
1239 */
1240 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1241 const void *buf, int count)
1242 {
1243 int ret;
1244
1245 ret = bdrv_pwrite(bs, offset, buf, count);
1246 if (ret < 0) {
1247 return ret;
1248 }
1249
1250 /* No flush needed for cache modes that use O_DSYNC */
1251 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1252 bdrv_flush(bs);
1253 }
1254
1255 return 0;
1256 }
1257
1258 /*
1259 * Handle a read request in coroutine context
1260 */
1261 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1262 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1263 {
1264 BlockDriver *drv = bs->drv;
1265
1266 if (!drv) {
1267 return -ENOMEDIUM;
1268 }
1269 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1270 return -EIO;
1271 }
1272
1273 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1274 }
1275
1276 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1277 int nb_sectors, QEMUIOVector *qiov)
1278 {
1279 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1280
1281 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1282 }
1283
1284 /*
1285 * Handle a write request in coroutine context
1286 */
1287 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1288 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1289 {
1290 BlockDriver *drv = bs->drv;
1291 int ret;
1292
1293 if (!bs->drv) {
1294 return -ENOMEDIUM;
1295 }
1296 if (bs->read_only) {
1297 return -EACCES;
1298 }
1299 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1300 return -EIO;
1301 }
1302
1303 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1304
1305 if (bs->dirty_bitmap) {
1306 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1307 }
1308
1309 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1310 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1311 }
1312
1313 return ret;
1314 }
1315
1316 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1317 int nb_sectors, QEMUIOVector *qiov)
1318 {
1319 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1320
1321 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1322 }
1323
1324 /**
1325 * Truncate file to 'offset' bytes (needed only for file protocols)
1326 */
1327 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1328 {
1329 BlockDriver *drv = bs->drv;
1330 int ret;
1331 if (!drv)
1332 return -ENOMEDIUM;
1333 if (!drv->bdrv_truncate)
1334 return -ENOTSUP;
1335 if (bs->read_only)
1336 return -EACCES;
1337 if (bdrv_in_use(bs))
1338 return -EBUSY;
1339 ret = drv->bdrv_truncate(bs, offset);
1340 if (ret == 0) {
1341 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1342 bdrv_dev_resize_cb(bs);
1343 }
1344 return ret;
1345 }
1346
1347 /**
1348 * Length of a allocated file in bytes. Sparse files are counted by actual
1349 * allocated space. Return < 0 if error or unknown.
1350 */
1351 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1352 {
1353 BlockDriver *drv = bs->drv;
1354 if (!drv) {
1355 return -ENOMEDIUM;
1356 }
1357 if (drv->bdrv_get_allocated_file_size) {
1358 return drv->bdrv_get_allocated_file_size(bs);
1359 }
1360 if (bs->file) {
1361 return bdrv_get_allocated_file_size(bs->file);
1362 }
1363 return -ENOTSUP;
1364 }
1365
1366 /**
1367 * Length of a file in bytes. Return < 0 if error or unknown.
1368 */
1369 int64_t bdrv_getlength(BlockDriverState *bs)
1370 {
1371 BlockDriver *drv = bs->drv;
1372 if (!drv)
1373 return -ENOMEDIUM;
1374
1375 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
1376 if (drv->bdrv_getlength) {
1377 return drv->bdrv_getlength(bs);
1378 }
1379 }
1380 return bs->total_sectors * BDRV_SECTOR_SIZE;
1381 }
1382
1383 /* return 0 as number of sectors if no device present or error */
1384 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1385 {
1386 int64_t length;
1387 length = bdrv_getlength(bs);
1388 if (length < 0)
1389 length = 0;
1390 else
1391 length = length >> BDRV_SECTOR_BITS;
1392 *nb_sectors_ptr = length;
1393 }
1394
1395 struct partition {
1396 uint8_t boot_ind; /* 0x80 - active */
1397 uint8_t head; /* starting head */
1398 uint8_t sector; /* starting sector */
1399 uint8_t cyl; /* starting cylinder */
1400 uint8_t sys_ind; /* What partition type */
1401 uint8_t end_head; /* end head */
1402 uint8_t end_sector; /* end sector */
1403 uint8_t end_cyl; /* end cylinder */
1404 uint32_t start_sect; /* starting sector counting from 0 */
1405 uint32_t nr_sects; /* nr of sectors in partition */
1406 } QEMU_PACKED;
1407
1408 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1409 static int guess_disk_lchs(BlockDriverState *bs,
1410 int *pcylinders, int *pheads, int *psectors)
1411 {
1412 uint8_t buf[BDRV_SECTOR_SIZE];
1413 int ret, i, heads, sectors, cylinders;
1414 struct partition *p;
1415 uint32_t nr_sects;
1416 uint64_t nb_sectors;
1417
1418 bdrv_get_geometry(bs, &nb_sectors);
1419
1420 ret = bdrv_read(bs, 0, buf, 1);
1421 if (ret < 0)
1422 return -1;
1423 /* test msdos magic */
1424 if (buf[510] != 0x55 || buf[511] != 0xaa)
1425 return -1;
1426 for(i = 0; i < 4; i++) {
1427 p = ((struct partition *)(buf + 0x1be)) + i;
1428 nr_sects = le32_to_cpu(p->nr_sects);
1429 if (nr_sects && p->end_head) {
1430 /* We make the assumption that the partition terminates on
1431 a cylinder boundary */
1432 heads = p->end_head + 1;
1433 sectors = p->end_sector & 63;
1434 if (sectors == 0)
1435 continue;
1436 cylinders = nb_sectors / (heads * sectors);
1437 if (cylinders < 1 || cylinders > 16383)
1438 continue;
1439 *pheads = heads;
1440 *psectors = sectors;
1441 *pcylinders = cylinders;
1442 #if 0
1443 printf("guessed geometry: LCHS=%d %d %d\n",
1444 cylinders, heads, sectors);
1445 #endif
1446 return 0;
1447 }
1448 }
1449 return -1;
1450 }
1451
1452 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1453 {
1454 int translation, lba_detected = 0;
1455 int cylinders, heads, secs;
1456 uint64_t nb_sectors;
1457
1458 /* if a geometry hint is available, use it */
1459 bdrv_get_geometry(bs, &nb_sectors);
1460 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1461 translation = bdrv_get_translation_hint(bs);
1462 if (cylinders != 0) {
1463 *pcyls = cylinders;
1464 *pheads = heads;
1465 *psecs = secs;
1466 } else {
1467 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1468 if (heads > 16) {
1469 /* if heads > 16, it means that a BIOS LBA
1470 translation was active, so the default
1471 hardware geometry is OK */
1472 lba_detected = 1;
1473 goto default_geometry;
1474 } else {
1475 *pcyls = cylinders;
1476 *pheads = heads;
1477 *psecs = secs;
1478 /* disable any translation to be in sync with
1479 the logical geometry */
1480 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1481 bdrv_set_translation_hint(bs,
1482 BIOS_ATA_TRANSLATION_NONE);
1483 }
1484 }
1485 } else {
1486 default_geometry:
1487 /* if no geometry, use a standard physical disk geometry */
1488 cylinders = nb_sectors / (16 * 63);
1489
1490 if (cylinders > 16383)
1491 cylinders = 16383;
1492 else if (cylinders < 2)
1493 cylinders = 2;
1494 *pcyls = cylinders;
1495 *pheads = 16;
1496 *psecs = 63;
1497 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1498 if ((*pcyls * *pheads) <= 131072) {
1499 bdrv_set_translation_hint(bs,
1500 BIOS_ATA_TRANSLATION_LARGE);
1501 } else {
1502 bdrv_set_translation_hint(bs,
1503 BIOS_ATA_TRANSLATION_LBA);
1504 }
1505 }
1506 }
1507 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1508 }
1509 }
1510
1511 void bdrv_set_geometry_hint(BlockDriverState *bs,
1512 int cyls, int heads, int secs)
1513 {
1514 bs->cyls = cyls;
1515 bs->heads = heads;
1516 bs->secs = secs;
1517 }
1518
1519 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1520 {
1521 bs->translation = translation;
1522 }
1523
1524 void bdrv_get_geometry_hint(BlockDriverState *bs,
1525 int *pcyls, int *pheads, int *psecs)
1526 {
1527 *pcyls = bs->cyls;
1528 *pheads = bs->heads;
1529 *psecs = bs->secs;
1530 }
1531
1532 /* Recognize floppy formats */
1533 typedef struct FDFormat {
1534 FDriveType drive;
1535 uint8_t last_sect;
1536 uint8_t max_track;
1537 uint8_t max_head;
1538 } FDFormat;
1539
1540 static const FDFormat fd_formats[] = {
1541 /* First entry is default format */
1542 /* 1.44 MB 3"1/2 floppy disks */
1543 { FDRIVE_DRV_144, 18, 80, 1, },
1544 { FDRIVE_DRV_144, 20, 80, 1, },
1545 { FDRIVE_DRV_144, 21, 80, 1, },
1546 { FDRIVE_DRV_144, 21, 82, 1, },
1547 { FDRIVE_DRV_144, 21, 83, 1, },
1548 { FDRIVE_DRV_144, 22, 80, 1, },
1549 { FDRIVE_DRV_144, 23, 80, 1, },
1550 { FDRIVE_DRV_144, 24, 80, 1, },
1551 /* 2.88 MB 3"1/2 floppy disks */
1552 { FDRIVE_DRV_288, 36, 80, 1, },
1553 { FDRIVE_DRV_288, 39, 80, 1, },
1554 { FDRIVE_DRV_288, 40, 80, 1, },
1555 { FDRIVE_DRV_288, 44, 80, 1, },
1556 { FDRIVE_DRV_288, 48, 80, 1, },
1557 /* 720 kB 3"1/2 floppy disks */
1558 { FDRIVE_DRV_144, 9, 80, 1, },
1559 { FDRIVE_DRV_144, 10, 80, 1, },
1560 { FDRIVE_DRV_144, 10, 82, 1, },
1561 { FDRIVE_DRV_144, 10, 83, 1, },
1562 { FDRIVE_DRV_144, 13, 80, 1, },
1563 { FDRIVE_DRV_144, 14, 80, 1, },
1564 /* 1.2 MB 5"1/4 floppy disks */
1565 { FDRIVE_DRV_120, 15, 80, 1, },
1566 { FDRIVE_DRV_120, 18, 80, 1, },
1567 { FDRIVE_DRV_120, 18, 82, 1, },
1568 { FDRIVE_DRV_120, 18, 83, 1, },
1569 { FDRIVE_DRV_120, 20, 80, 1, },
1570 /* 720 kB 5"1/4 floppy disks */
1571 { FDRIVE_DRV_120, 9, 80, 1, },
1572 { FDRIVE_DRV_120, 11, 80, 1, },
1573 /* 360 kB 5"1/4 floppy disks */
1574 { FDRIVE_DRV_120, 9, 40, 1, },
1575 { FDRIVE_DRV_120, 9, 40, 0, },
1576 { FDRIVE_DRV_120, 10, 41, 1, },
1577 { FDRIVE_DRV_120, 10, 42, 1, },
1578 /* 320 kB 5"1/4 floppy disks */
1579 { FDRIVE_DRV_120, 8, 40, 1, },
1580 { FDRIVE_DRV_120, 8, 40, 0, },
1581 /* 360 kB must match 5"1/4 better than 3"1/2... */
1582 { FDRIVE_DRV_144, 9, 80, 0, },
1583 /* end */
1584 { FDRIVE_DRV_NONE, -1, -1, 0, },
1585 };
1586
1587 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1588 int *max_track, int *last_sect,
1589 FDriveType drive_in, FDriveType *drive)
1590 {
1591 const FDFormat *parse;
1592 uint64_t nb_sectors, size;
1593 int i, first_match, match;
1594
1595 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1596 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1597 /* User defined disk */
1598 } else {
1599 bdrv_get_geometry(bs, &nb_sectors);
1600 match = -1;
1601 first_match = -1;
1602 for (i = 0; ; i++) {
1603 parse = &fd_formats[i];
1604 if (parse->drive == FDRIVE_DRV_NONE) {
1605 break;
1606 }
1607 if (drive_in == parse->drive ||
1608 drive_in == FDRIVE_DRV_NONE) {
1609 size = (parse->max_head + 1) * parse->max_track *
1610 parse->last_sect;
1611 if (nb_sectors == size) {
1612 match = i;
1613 break;
1614 }
1615 if (first_match == -1) {
1616 first_match = i;
1617 }
1618 }
1619 }
1620 if (match == -1) {
1621 if (first_match == -1) {
1622 match = 1;
1623 } else {
1624 match = first_match;
1625 }
1626 parse = &fd_formats[match];
1627 }
1628 *nb_heads = parse->max_head + 1;
1629 *max_track = parse->max_track;
1630 *last_sect = parse->last_sect;
1631 *drive = parse->drive;
1632 }
1633 }
1634
1635 int bdrv_get_translation_hint(BlockDriverState *bs)
1636 {
1637 return bs->translation;
1638 }
1639
1640 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1641 BlockErrorAction on_write_error)
1642 {
1643 bs->on_read_error = on_read_error;
1644 bs->on_write_error = on_write_error;
1645 }
1646
1647 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1648 {
1649 return is_read ? bs->on_read_error : bs->on_write_error;
1650 }
1651
1652 int bdrv_is_read_only(BlockDriverState *bs)
1653 {
1654 return bs->read_only;
1655 }
1656
1657 int bdrv_is_sg(BlockDriverState *bs)
1658 {
1659 return bs->sg;
1660 }
1661
1662 int bdrv_enable_write_cache(BlockDriverState *bs)
1663 {
1664 return bs->enable_write_cache;
1665 }
1666
1667 int bdrv_is_encrypted(BlockDriverState *bs)
1668 {
1669 if (bs->backing_hd && bs->backing_hd->encrypted)
1670 return 1;
1671 return bs->encrypted;
1672 }
1673
1674 int bdrv_key_required(BlockDriverState *bs)
1675 {
1676 BlockDriverState *backing_hd = bs->backing_hd;
1677
1678 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1679 return 1;
1680 return (bs->encrypted && !bs->valid_key);
1681 }
1682
1683 int bdrv_set_key(BlockDriverState *bs, const char *key)
1684 {
1685 int ret;
1686 if (bs->backing_hd && bs->backing_hd->encrypted) {
1687 ret = bdrv_set_key(bs->backing_hd, key);
1688 if (ret < 0)
1689 return ret;
1690 if (!bs->encrypted)
1691 return 0;
1692 }
1693 if (!bs->encrypted) {
1694 return -EINVAL;
1695 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1696 return -ENOMEDIUM;
1697 }
1698 ret = bs->drv->bdrv_set_key(bs, key);
1699 if (ret < 0) {
1700 bs->valid_key = 0;
1701 } else if (!bs->valid_key) {
1702 bs->valid_key = 1;
1703 /* call the change callback now, we skipped it on open */
1704 bdrv_dev_change_media_cb(bs, true);
1705 }
1706 return ret;
1707 }
1708
1709 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1710 {
1711 if (!bs->drv) {
1712 buf[0] = '\0';
1713 } else {
1714 pstrcpy(buf, buf_size, bs->drv->format_name);
1715 }
1716 }
1717
1718 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1719 void *opaque)
1720 {
1721 BlockDriver *drv;
1722
1723 QLIST_FOREACH(drv, &bdrv_drivers, list) {
1724 it(opaque, drv->format_name);
1725 }
1726 }
1727
1728 BlockDriverState *bdrv_find(const char *name)
1729 {
1730 BlockDriverState *bs;
1731
1732 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1733 if (!strcmp(name, bs->device_name)) {
1734 return bs;
1735 }
1736 }
1737 return NULL;
1738 }
1739
1740 BlockDriverState *bdrv_next(BlockDriverState *bs)
1741 {
1742 if (!bs) {
1743 return QTAILQ_FIRST(&bdrv_states);
1744 }
1745 return QTAILQ_NEXT(bs, list);
1746 }
1747
1748 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1749 {
1750 BlockDriverState *bs;
1751
1752 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1753 it(opaque, bs);
1754 }
1755 }
1756
1757 const char *bdrv_get_device_name(BlockDriverState *bs)
1758 {
1759 return bs->device_name;
1760 }
1761
1762 int bdrv_flush(BlockDriverState *bs)
1763 {
1764 if (bs->open_flags & BDRV_O_NO_FLUSH) {
1765 return 0;
1766 }
1767
1768 if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
1769 return bdrv_co_flush_em(bs);
1770 }
1771
1772 if (bs->drv && bs->drv->bdrv_flush) {
1773 return bs->drv->bdrv_flush(bs);
1774 }
1775
1776 /*
1777 * Some block drivers always operate in either writethrough or unsafe mode
1778 * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1779 * the server works (because the behaviour is hardcoded or depends on
1780 * server-side configuration), so we can't ensure that everything is safe
1781 * on disk. Returning an error doesn't work because that would break guests
1782 * even if the server operates in writethrough mode.
1783 *
1784 * Let's hope the user knows what he's doing.
1785 */
1786 return 0;
1787 }
1788
1789 void bdrv_flush_all(void)
1790 {
1791 BlockDriverState *bs;
1792
1793 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1794 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
1795 bdrv_flush(bs);
1796 }
1797 }
1798 }
1799
1800 int bdrv_has_zero_init(BlockDriverState *bs)
1801 {
1802 assert(bs->drv);
1803
1804 if (bs->drv->bdrv_has_zero_init) {
1805 return bs->drv->bdrv_has_zero_init(bs);
1806 }
1807
1808 return 1;
1809 }
1810
1811 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1812 {
1813 if (!bs->drv) {
1814 return -ENOMEDIUM;
1815 }
1816 if (!bs->drv->bdrv_discard) {
1817 return 0;
1818 }
1819 return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1820 }
1821
1822 /*
1823 * Returns true iff the specified sector is present in the disk image. Drivers
1824 * not implementing the functionality are assumed to not support backing files,
1825 * hence all their sectors are reported as allocated.
1826 *
1827 * 'pnum' is set to the number of sectors (including and immediately following
1828 * the specified sector) that are known to be in the same
1829 * allocated/unallocated state.
1830 *
1831 * 'nb_sectors' is the max value 'pnum' should be set to.
1832 */
1833 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1834 int *pnum)
1835 {
1836 int64_t n;
1837 if (!bs->drv->bdrv_is_allocated) {
1838 if (sector_num >= bs->total_sectors) {
1839 *pnum = 0;
1840 return 0;
1841 }
1842 n = bs->total_sectors - sector_num;
1843 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1844 return 1;
1845 }
1846 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1847 }
1848
1849 void bdrv_mon_event(const BlockDriverState *bdrv,
1850 BlockMonEventAction action, int is_read)
1851 {
1852 QObject *data;
1853 const char *action_str;
1854
1855 switch (action) {
1856 case BDRV_ACTION_REPORT:
1857 action_str = "report";
1858 break;
1859 case BDRV_ACTION_IGNORE:
1860 action_str = "ignore";
1861 break;
1862 case BDRV_ACTION_STOP:
1863 action_str = "stop";
1864 break;
1865 default:
1866 abort();
1867 }
1868
1869 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1870 bdrv->device_name,
1871 action_str,
1872 is_read ? "read" : "write");
1873 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1874
1875 qobject_decref(data);
1876 }
1877
1878 static void bdrv_print_dict(QObject *obj, void *opaque)
1879 {
1880 QDict *bs_dict;
1881 Monitor *mon = opaque;
1882
1883 bs_dict = qobject_to_qdict(obj);
1884
1885 monitor_printf(mon, "%s: removable=%d",
1886 qdict_get_str(bs_dict, "device"),
1887 qdict_get_bool(bs_dict, "removable"));
1888
1889 if (qdict_get_bool(bs_dict, "removable")) {
1890 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1891 monitor_printf(mon, " tray-open=%d",
1892 qdict_get_bool(bs_dict, "tray-open"));
1893 }
1894
1895 if (qdict_haskey(bs_dict, "io-status")) {
1896 monitor_printf(mon, " io-status=%s", qdict_get_str(bs_dict, "io-status"));
1897 }
1898
1899 if (qdict_haskey(bs_dict, "inserted")) {
1900 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1901
1902 monitor_printf(mon, " file=");
1903 monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1904 if (qdict_haskey(qdict, "backing_file")) {
1905 monitor_printf(mon, " backing_file=");
1906 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1907 }
1908 monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1909 qdict_get_bool(qdict, "ro"),
1910 qdict_get_str(qdict, "drv"),
1911 qdict_get_bool(qdict, "encrypted"));
1912 } else {
1913 monitor_printf(mon, " [not inserted]");
1914 }
1915
1916 monitor_printf(mon, "\n");
1917 }
1918
1919 void bdrv_info_print(Monitor *mon, const QObject *data)
1920 {
1921 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1922 }
1923
1924 static const char *const io_status_name[BDRV_IOS_MAX] = {
1925 [BDRV_IOS_OK] = "ok",
1926 [BDRV_IOS_FAILED] = "failed",
1927 [BDRV_IOS_ENOSPC] = "nospace",
1928 };
1929
1930 void bdrv_info(Monitor *mon, QObject **ret_data)
1931 {
1932 QList *bs_list;
1933 BlockDriverState *bs;
1934
1935 bs_list = qlist_new();
1936
1937 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1938 QObject *bs_obj;
1939 QDict *bs_dict;
1940
1941 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
1942 "'removable': %i, 'locked': %i }",
1943 bs->device_name,
1944 bdrv_dev_has_removable_media(bs),
1945 bdrv_dev_is_medium_locked(bs));
1946 bs_dict = qobject_to_qdict(bs_obj);
1947
1948 if (bdrv_dev_has_removable_media(bs)) {
1949 qdict_put(bs_dict, "tray-open",
1950 qbool_from_int(bdrv_dev_is_tray_open(bs)));
1951 }
1952
1953 if (bdrv_iostatus_is_enabled(bs)) {
1954 qdict_put(bs_dict, "io-status",
1955 qstring_from_str(io_status_name[bs->iostatus]));
1956 }
1957
1958 if (bs->drv) {
1959 QObject *obj;
1960
1961 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1962 "'encrypted': %i }",
1963 bs->filename, bs->read_only,
1964 bs->drv->format_name,
1965 bdrv_is_encrypted(bs));
1966 if (bs->backing_file[0] != '\0') {
1967 QDict *qdict = qobject_to_qdict(obj);
1968 qdict_put(qdict, "backing_file",
1969 qstring_from_str(bs->backing_file));
1970 }
1971
1972 qdict_put_obj(bs_dict, "inserted", obj);
1973 }
1974 qlist_append_obj(bs_list, bs_obj);
1975 }
1976
1977 *ret_data = QOBJECT(bs_list);
1978 }
1979
1980 static void bdrv_stats_iter(QObject *data, void *opaque)
1981 {
1982 QDict *qdict;
1983 Monitor *mon = opaque;
1984
1985 qdict = qobject_to_qdict(data);
1986 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1987
1988 qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1989 monitor_printf(mon, " rd_bytes=%" PRId64
1990 " wr_bytes=%" PRId64
1991 " rd_operations=%" PRId64
1992 " wr_operations=%" PRId64
1993 " flush_operations=%" PRId64
1994 " wr_total_time_ns=%" PRId64
1995 " rd_total_time_ns=%" PRId64
1996 " flush_total_time_ns=%" PRId64
1997 "\n",
1998 qdict_get_int(qdict, "rd_bytes"),
1999 qdict_get_int(qdict, "wr_bytes"),
2000 qdict_get_int(qdict, "rd_operations"),
2001 qdict_get_int(qdict, "wr_operations"),
2002 qdict_get_int(qdict, "flush_operations"),
2003 qdict_get_int(qdict, "wr_total_time_ns"),
2004 qdict_get_int(qdict, "rd_total_time_ns"),
2005 qdict_get_int(qdict, "flush_total_time_ns"));
2006 }
2007
2008 void bdrv_stats_print(Monitor *mon, const QObject *data)
2009 {
2010 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
2011 }
2012
2013 static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
2014 {
2015 QObject *res;
2016 QDict *dict;
2017
2018 res = qobject_from_jsonf("{ 'stats': {"
2019 "'rd_bytes': %" PRId64 ","
2020 "'wr_bytes': %" PRId64 ","
2021 "'rd_operations': %" PRId64 ","
2022 "'wr_operations': %" PRId64 ","
2023 "'wr_highest_offset': %" PRId64 ","
2024 "'flush_operations': %" PRId64 ","
2025 "'wr_total_time_ns': %" PRId64 ","
2026 "'rd_total_time_ns': %" PRId64 ","
2027 "'flush_total_time_ns': %" PRId64
2028 "} }",
2029 bs->nr_bytes[BDRV_ACCT_READ],
2030 bs->nr_bytes[BDRV_ACCT_WRITE],
2031 bs->nr_ops[BDRV_ACCT_READ],
2032 bs->nr_ops[BDRV_ACCT_WRITE],
2033 bs->wr_highest_sector *
2034 (uint64_t)BDRV_SECTOR_SIZE,
2035 bs->nr_ops[BDRV_ACCT_FLUSH],
2036 bs->total_time_ns[BDRV_ACCT_WRITE],
2037 bs->total_time_ns[BDRV_ACCT_READ],
2038 bs->total_time_ns[BDRV_ACCT_FLUSH]);
2039 dict = qobject_to_qdict(res);
2040
2041 if (*bs->device_name) {
2042 qdict_put(dict, "device", qstring_from_str(bs->device_name));
2043 }
2044
2045 if (bs->file) {
2046 QObject *parent = bdrv_info_stats_bs(bs->file);
2047 qdict_put_obj(dict, "parent", parent);
2048 }
2049
2050 return res;
2051 }
2052
2053 void bdrv_info_stats(Monitor *mon, QObject **ret_data)
2054 {
2055 QObject *obj;
2056 QList *devices;
2057 BlockDriverState *bs;
2058
2059 devices = qlist_new();
2060
2061 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2062 obj = bdrv_info_stats_bs(bs);
2063 qlist_append_obj(devices, obj);
2064 }
2065
2066 *ret_data = QOBJECT(devices);
2067 }
2068
2069 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2070 {
2071 if (bs->backing_hd && bs->backing_hd->encrypted)
2072 return bs->backing_file;
2073 else if (bs->encrypted)
2074 return bs->filename;
2075 else
2076 return NULL;
2077 }
2078
2079 void bdrv_get_backing_filename(BlockDriverState *bs,
2080 char *filename, int filename_size)
2081 {
2082 if (!bs->backing_file) {
2083 pstrcpy(filename, filename_size, "");
2084 } else {
2085 pstrcpy(filename, filename_size, bs->backing_file);
2086 }
2087 }
2088
2089 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
2090 const uint8_t *buf, int nb_sectors)
2091 {
2092 BlockDriver *drv = bs->drv;
2093 if (!drv)
2094 return -ENOMEDIUM;
2095 if (!drv->bdrv_write_compressed)
2096 return -ENOTSUP;
2097 if (bdrv_check_request(bs, sector_num, nb_sectors))
2098 return -EIO;
2099
2100 if (bs->dirty_bitmap) {
2101 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2102 }
2103
2104 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2105 }
2106
2107 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2108 {
2109 BlockDriver *drv = bs->drv;
2110 if (!drv)
2111 return -ENOMEDIUM;
2112 if (!drv->bdrv_get_info)
2113 return -ENOTSUP;
2114 memset(bdi, 0, sizeof(*bdi));
2115 return drv->bdrv_get_info(bs, bdi);
2116 }
2117
2118 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2119 int64_t pos, int size)
2120 {
2121 BlockDriver *drv = bs->drv;
2122 if (!drv)
2123 return -ENOMEDIUM;
2124 if (drv->bdrv_save_vmstate)
2125 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2126 if (bs->file)
2127 return bdrv_save_vmstate(bs->file, buf, pos, size);
2128 return -ENOTSUP;
2129 }
2130
2131 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2132 int64_t pos, int size)
2133 {
2134 BlockDriver *drv = bs->drv;
2135 if (!drv)
2136 return -ENOMEDIUM;
2137 if (drv->bdrv_load_vmstate)
2138 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2139 if (bs->file)
2140 return bdrv_load_vmstate(bs->file, buf, pos, size);
2141 return -ENOTSUP;
2142 }
2143
2144 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2145 {
2146 BlockDriver *drv = bs->drv;
2147
2148 if (!drv || !drv->bdrv_debug_event) {
2149 return;
2150 }
2151
2152 return drv->bdrv_debug_event(bs, event);
2153
2154 }
2155
2156 /**************************************************************/
2157 /* handling of snapshots */
2158
2159 int bdrv_can_snapshot(BlockDriverState *bs)
2160 {
2161 BlockDriver *drv = bs->drv;
2162 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
2163 return 0;
2164 }
2165
2166 if (!drv->bdrv_snapshot_create) {
2167 if (bs->file != NULL) {
2168 return bdrv_can_snapshot(bs->file);
2169 }
2170 return 0;
2171 }
2172
2173 return 1;
2174 }
2175
2176 int bdrv_is_snapshot(BlockDriverState *bs)
2177 {
2178 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2179 }
2180
2181 BlockDriverState *bdrv_snapshots(void)
2182 {
2183 BlockDriverState *bs;
2184
2185 if (bs_snapshots) {
2186 return bs_snapshots;
2187 }
2188
2189 bs = NULL;
2190 while ((bs = bdrv_next(bs))) {
2191 if (bdrv_can_snapshot(bs)) {
2192 bs_snapshots = bs;
2193 return bs;
2194 }
2195 }
2196 return NULL;
2197 }
2198
2199 int bdrv_snapshot_create(BlockDriverState *bs,
2200 QEMUSnapshotInfo *sn_info)
2201 {
2202 BlockDriver *drv = bs->drv;
2203 if (!drv)
2204 return -ENOMEDIUM;
2205 if (drv->bdrv_snapshot_create)
2206 return drv->bdrv_snapshot_create(bs, sn_info);
2207 if (bs->file)
2208 return bdrv_snapshot_create(bs->file, sn_info);
2209 return -ENOTSUP;
2210 }
2211
2212 int bdrv_snapshot_goto(BlockDriverState *bs,
2213 const char *snapshot_id)
2214 {
2215 BlockDriver *drv = bs->drv;
2216 int ret, open_ret;
2217
2218 if (!drv)
2219 return -ENOMEDIUM;
2220 if (drv->bdrv_snapshot_goto)
2221 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2222
2223 if (bs->file) {
2224 drv->bdrv_close(bs);
2225 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2226 open_ret = drv->bdrv_open(bs, bs->open_flags);
2227 if (open_ret < 0) {
2228 bdrv_delete(bs->file);
2229 bs->drv = NULL;
2230 return open_ret;
2231 }
2232 return ret;
2233 }
2234
2235 return -ENOTSUP;
2236 }
2237
2238 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2239 {
2240 BlockDriver *drv = bs->drv;
2241 if (!drv)
2242 return -ENOMEDIUM;
2243 if (drv->bdrv_snapshot_delete)
2244 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2245 if (bs->file)
2246 return bdrv_snapshot_delete(bs->file, snapshot_id);
2247 return -ENOTSUP;
2248 }
2249
2250 int bdrv_snapshot_list(BlockDriverState *bs,
2251 QEMUSnapshotInfo **psn_info)
2252 {
2253 BlockDriver *drv = bs->drv;
2254 if (!drv)
2255 return -ENOMEDIUM;
2256 if (drv->bdrv_snapshot_list)
2257 return drv->bdrv_snapshot_list(bs, psn_info);
2258 if (bs->file)
2259 return bdrv_snapshot_list(bs->file, psn_info);
2260 return -ENOTSUP;
2261 }
2262
2263 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2264 const char *snapshot_name)
2265 {
2266 BlockDriver *drv = bs->drv;
2267 if (!drv) {
2268 return -ENOMEDIUM;
2269 }
2270 if (!bs->read_only) {
2271 return -EINVAL;
2272 }
2273 if (drv->bdrv_snapshot_load_tmp) {
2274 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2275 }
2276 return -ENOTSUP;
2277 }
2278
2279 #define NB_SUFFIXES 4
2280
2281 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2282 {
2283 static const char suffixes[NB_SUFFIXES] = "KMGT";
2284 int64_t base;
2285 int i;
2286
2287 if (size <= 999) {
2288 snprintf(buf, buf_size, "%" PRId64, size);
2289 } else {
2290 base = 1024;
2291 for(i = 0; i < NB_SUFFIXES; i++) {
2292 if (size < (10 * base)) {
2293 snprintf(buf, buf_size, "%0.1f%c",
2294 (double)size / base,
2295 suffixes[i]);
2296 break;
2297 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2298 snprintf(buf, buf_size, "%" PRId64 "%c",
2299 ((size + (base >> 1)) / base),
2300 suffixes[i]);
2301 break;
2302 }
2303 base = base * 1024;
2304 }
2305 }
2306 return buf;
2307 }
2308
2309 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2310 {
2311 char buf1[128], date_buf[128], clock_buf[128];
2312 #ifdef _WIN32
2313 struct tm *ptm;
2314 #else
2315 struct tm tm;
2316 #endif
2317 time_t ti;
2318 int64_t secs;
2319
2320 if (!sn) {
2321 snprintf(buf, buf_size,
2322 "%-10s%-20s%7s%20s%15s",
2323 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2324 } else {
2325 ti = sn->date_sec;
2326 #ifdef _WIN32
2327 ptm = localtime(&ti);
2328 strftime(date_buf, sizeof(date_buf),
2329 "%Y-%m-%d %H:%M:%S", ptm);
2330 #else
2331 localtime_r(&ti, &tm);
2332 strftime(date_buf, sizeof(date_buf),
2333 "%Y-%m-%d %H:%M:%S", &tm);
2334 #endif
2335 secs = sn->vm_clock_nsec / 1000000000;
2336 snprintf(clock_buf, sizeof(clock_buf),
2337 "%02d:%02d:%02d.%03d",
2338 (int)(secs / 3600),
2339 (int)((secs / 60) % 60),
2340 (int)(secs % 60),
2341 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2342 snprintf(buf, buf_size,
2343 "%-10s%-20s%7s%20s%15s",
2344 sn->id_str, sn->name,
2345 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2346 date_buf,
2347 clock_buf);
2348 }
2349 return buf;
2350 }
2351
2352 /**************************************************************/
2353 /* async I/Os */
2354
2355 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2356 QEMUIOVector *qiov, int nb_sectors,
2357 BlockDriverCompletionFunc *cb, void *opaque)
2358 {
2359 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2360
2361 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2362 cb, opaque, false);
2363 }
2364
2365 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2366 QEMUIOVector *qiov, int nb_sectors,
2367 BlockDriverCompletionFunc *cb, void *opaque)
2368 {
2369 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2370
2371 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2372 cb, opaque, true);
2373 }
2374
2375
2376 typedef struct MultiwriteCB {
2377 int error;
2378 int num_requests;
2379 int num_callbacks;
2380 struct {
2381 BlockDriverCompletionFunc *cb;
2382 void *opaque;
2383 QEMUIOVector *free_qiov;
2384 void *free_buf;
2385 } callbacks[];
2386 } MultiwriteCB;
2387
2388 static void multiwrite_user_cb(MultiwriteCB *mcb)
2389 {
2390 int i;
2391
2392 for (i = 0; i < mcb->num_callbacks; i++) {
2393 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2394 if (mcb->callbacks[i].free_qiov) {
2395 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2396 }
2397 g_free(mcb->callbacks[i].free_qiov);
2398 qemu_vfree(mcb->callbacks[i].free_buf);
2399 }
2400 }
2401
2402 static void multiwrite_cb(void *opaque, int ret)
2403 {
2404 MultiwriteCB *mcb = opaque;
2405
2406 trace_multiwrite_cb(mcb, ret);
2407
2408 if (ret < 0 && !mcb->error) {
2409 mcb->error = ret;
2410 }
2411
2412 mcb->num_requests--;
2413 if (mcb->num_requests == 0) {
2414 multiwrite_user_cb(mcb);
2415 g_free(mcb);
2416 }
2417 }
2418
2419 static int multiwrite_req_compare(const void *a, const void *b)
2420 {
2421 const BlockRequest *req1 = a, *req2 = b;
2422
2423 /*
2424 * Note that we can't simply subtract req2->sector from req1->sector
2425 * here as that could overflow the return value.
2426 */
2427 if (req1->sector > req2->sector) {
2428 return 1;
2429 } else if (req1->sector < req2->sector) {
2430 return -1;
2431 } else {
2432 return 0;
2433 }
2434 }
2435
2436 /*
2437 * Takes a bunch of requests and tries to merge them. Returns the number of
2438 * requests that remain after merging.
2439 */
2440 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2441 int num_reqs, MultiwriteCB *mcb)
2442 {
2443 int i, outidx;
2444
2445 // Sort requests by start sector
2446 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2447
2448 // Check if adjacent requests touch the same clusters. If so, combine them,
2449 // filling up gaps with zero sectors.
2450 outidx = 0;
2451 for (i = 1; i < num_reqs; i++) {
2452 int merge = 0;
2453 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2454
2455 // This handles the cases that are valid for all block drivers, namely
2456 // exactly sequential writes and overlapping writes.
2457 if (reqs[i].sector <= oldreq_last) {
2458 merge = 1;
2459 }
2460
2461 // The block driver may decide that it makes sense to combine requests
2462 // even if there is a gap of some sectors between them. In this case,
2463 // the gap is filled with zeros (therefore only applicable for yet
2464 // unused space in format like qcow2).
2465 if (!merge && bs->drv->bdrv_merge_requests) {
2466 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2467 }
2468
2469 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2470 merge = 0;
2471 }
2472
2473 if (merge) {
2474 size_t size;
2475 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2476 qemu_iovec_init(qiov,
2477 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2478
2479 // Add the first request to the merged one. If the requests are
2480 // overlapping, drop the last sectors of the first request.
2481 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2482 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2483
2484 // We might need to add some zeros between the two requests
2485 if (reqs[i].sector > oldreq_last) {
2486 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2487 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2488 memset(buf, 0, zero_bytes);
2489 qemu_iovec_add(qiov, buf, zero_bytes);
2490 mcb->callbacks[i].free_buf = buf;
2491 }
2492
2493 // Add the second request
2494 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2495
2496 reqs[outidx].nb_sectors = qiov->size >> 9;
2497 reqs[outidx].qiov = qiov;
2498
2499 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2500 } else {
2501 outidx++;
2502 reqs[outidx].sector = reqs[i].sector;
2503 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2504 reqs[outidx].qiov = reqs[i].qiov;
2505 }
2506 }
2507
2508 return outidx + 1;
2509 }
2510
2511 /*
2512 * Submit multiple AIO write requests at once.
2513 *
2514 * On success, the function returns 0 and all requests in the reqs array have
2515 * been submitted. In error case this function returns -1, and any of the
2516 * requests may or may not be submitted yet. In particular, this means that the
2517 * callback will be called for some of the requests, for others it won't. The
2518 * caller must check the error field of the BlockRequest to wait for the right
2519 * callbacks (if error != 0, no callback will be called).
2520 *
2521 * The implementation may modify the contents of the reqs array, e.g. to merge
2522 * requests. However, the fields opaque and error are left unmodified as they
2523 * are used to signal failure for a single request to the caller.
2524 */
2525 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2526 {
2527 BlockDriverAIOCB *acb;
2528 MultiwriteCB *mcb;
2529 int i;
2530
2531 /* don't submit writes if we don't have a medium */
2532 if (bs->drv == NULL) {
2533 for (i = 0; i < num_reqs; i++) {
2534 reqs[i].error = -ENOMEDIUM;
2535 }
2536 return -1;
2537 }
2538
2539 if (num_reqs == 0) {
2540 return 0;
2541 }
2542
2543 // Create MultiwriteCB structure
2544 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2545 mcb->num_requests = 0;
2546 mcb->num_callbacks = num_reqs;
2547
2548 for (i = 0; i < num_reqs; i++) {
2549 mcb->callbacks[i].cb = reqs[i].cb;
2550 mcb->callbacks[i].opaque = reqs[i].opaque;
2551 }
2552
2553 // Check for mergable requests
2554 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2555
2556 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2557
2558 /*
2559 * Run the aio requests. As soon as one request can't be submitted
2560 * successfully, fail all requests that are not yet submitted (we must
2561 * return failure for all requests anyway)
2562 *
2563 * num_requests cannot be set to the right value immediately: If
2564 * bdrv_aio_writev fails for some request, num_requests would be too high
2565 * and therefore multiwrite_cb() would never recognize the multiwrite
2566 * request as completed. We also cannot use the loop variable i to set it
2567 * when the first request fails because the callback may already have been
2568 * called for previously submitted requests. Thus, num_requests must be
2569 * incremented for each request that is submitted.
2570 *
2571 * The problem that callbacks may be called early also means that we need
2572 * to take care that num_requests doesn't become 0 before all requests are
2573 * submitted - multiwrite_cb() would consider the multiwrite request
2574 * completed. A dummy request that is "completed" by a manual call to
2575 * multiwrite_cb() takes care of this.
2576 */
2577 mcb->num_requests = 1;
2578
2579 // Run the aio requests
2580 for (i = 0; i < num_reqs; i++) {
2581 mcb->num_requests++;
2582 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2583 reqs[i].nb_sectors, multiwrite_cb, mcb);
2584
2585 if (acb == NULL) {
2586 // We can only fail the whole thing if no request has been
2587 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2588 // complete and report the error in the callback.
2589 if (i == 0) {
2590 trace_bdrv_aio_multiwrite_earlyfail(mcb);
2591 goto fail;
2592 } else {
2593 trace_bdrv_aio_multiwrite_latefail(mcb, i);
2594 multiwrite_cb(mcb, -EIO);
2595 break;
2596 }
2597 }
2598 }
2599
2600 /* Complete the dummy request */
2601 multiwrite_cb(mcb, 0);
2602
2603 return 0;
2604
2605 fail:
2606 for (i = 0; i < mcb->num_callbacks; i++) {
2607 reqs[i].error = -EIO;
2608 }
2609 g_free(mcb);
2610 return -1;
2611 }
2612
2613 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2614 BlockDriverCompletionFunc *cb, void *opaque)
2615 {
2616 BlockDriver *drv = bs->drv;
2617
2618 trace_bdrv_aio_flush(bs, opaque);
2619
2620 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2621 return bdrv_aio_noop_em(bs, cb, opaque);
2622 }
2623
2624 if (!drv)
2625 return NULL;
2626 return drv->bdrv_aio_flush(bs, cb, opaque);
2627 }
2628
2629 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2630 {
2631 acb->pool->cancel(acb);
2632 }
2633
2634
2635 /**************************************************************/
2636 /* async block device emulation */
2637
2638 typedef struct BlockDriverAIOCBSync {
2639 BlockDriverAIOCB common;
2640 QEMUBH *bh;
2641 int ret;
2642 /* vector translation state */
2643 QEMUIOVector *qiov;
2644 uint8_t *bounce;
2645 int is_write;
2646 } BlockDriverAIOCBSync;
2647
2648 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2649 {
2650 BlockDriverAIOCBSync *acb =
2651 container_of(blockacb, BlockDriverAIOCBSync, common);
2652 qemu_bh_delete(acb->bh);
2653 acb->bh = NULL;
2654 qemu_aio_release(acb);
2655 }
2656
2657 static AIOPool bdrv_em_aio_pool = {
2658 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2659 .cancel = bdrv_aio_cancel_em,
2660 };
2661
2662 static void bdrv_aio_bh_cb(void *opaque)
2663 {
2664 BlockDriverAIOCBSync *acb = opaque;
2665
2666 if (!acb->is_write)
2667 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2668 qemu_vfree(acb->bounce);
2669 acb->common.cb(acb->common.opaque, acb->ret);
2670 qemu_bh_delete(acb->bh);
2671 acb->bh = NULL;
2672 qemu_aio_release(acb);
2673 }
2674
2675 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2676 int64_t sector_num,
2677 QEMUIOVector *qiov,
2678 int nb_sectors,
2679 BlockDriverCompletionFunc *cb,
2680 void *opaque,
2681 int is_write)
2682
2683 {
2684 BlockDriverAIOCBSync *acb;
2685
2686 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2687 acb->is_write = is_write;
2688 acb->qiov = qiov;
2689 acb->bounce = qemu_blockalign(bs, qiov->size);
2690
2691 if (!acb->bh)
2692 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2693
2694 if (is_write) {
2695 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2696 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2697 } else {
2698 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2699 }
2700
2701 qemu_bh_schedule(acb->bh);
2702
2703 return &acb->common;
2704 }
2705
2706 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2707 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2708 BlockDriverCompletionFunc *cb, void *opaque)
2709 {
2710 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2711 }
2712
2713 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2714 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2715 BlockDriverCompletionFunc *cb, void *opaque)
2716 {
2717 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2718 }
2719
2720
2721 typedef struct BlockDriverAIOCBCoroutine {
2722 BlockDriverAIOCB common;
2723 BlockRequest req;
2724 bool is_write;
2725 QEMUBH* bh;
2726 } BlockDriverAIOCBCoroutine;
2727
2728 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2729 {
2730 qemu_aio_flush();
2731 }
2732
2733 static AIOPool bdrv_em_co_aio_pool = {
2734 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2735 .cancel = bdrv_aio_co_cancel_em,
2736 };
2737
2738 static void bdrv_co_rw_bh(void *opaque)
2739 {
2740 BlockDriverAIOCBCoroutine *acb = opaque;
2741
2742 acb->common.cb(acb->common.opaque, acb->req.error);
2743 qemu_bh_delete(acb->bh);
2744 qemu_aio_release(acb);
2745 }
2746
2747 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2748 static void coroutine_fn bdrv_co_do_rw(void *opaque)
2749 {
2750 BlockDriverAIOCBCoroutine *acb = opaque;
2751 BlockDriverState *bs = acb->common.bs;
2752
2753 if (!acb->is_write) {
2754 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
2755 acb->req.nb_sectors, acb->req.qiov);
2756 } else {
2757 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
2758 acb->req.nb_sectors, acb->req.qiov);
2759 }
2760
2761 acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2762 qemu_bh_schedule(acb->bh);
2763 }
2764
2765 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2766 int64_t sector_num,
2767 QEMUIOVector *qiov,
2768 int nb_sectors,
2769 BlockDriverCompletionFunc *cb,
2770 void *opaque,
2771 bool is_write)
2772 {
2773 Coroutine *co;
2774 BlockDriverAIOCBCoroutine *acb;
2775
2776 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2777 acb->req.sector = sector_num;
2778 acb->req.nb_sectors = nb_sectors;
2779 acb->req.qiov = qiov;
2780 acb->is_write = is_write;
2781
2782 co = qemu_coroutine_create(bdrv_co_do_rw);
2783 qemu_coroutine_enter(co, acb);
2784
2785 return &acb->common;
2786 }
2787
2788 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2789 BlockDriverCompletionFunc *cb, void *opaque)
2790 {
2791 BlockDriverAIOCBSync *acb;
2792
2793 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2794 acb->is_write = 1; /* don't bounce in the completion hadler */
2795 acb->qiov = NULL;
2796 acb->bounce = NULL;
2797 acb->ret = 0;
2798
2799 if (!acb->bh)
2800 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2801
2802 bdrv_flush(bs);
2803 qemu_bh_schedule(acb->bh);
2804 return &acb->common;
2805 }
2806
2807 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2808 BlockDriverCompletionFunc *cb, void *opaque)
2809 {
2810 BlockDriverAIOCBSync *acb;
2811
2812 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2813 acb->is_write = 1; /* don't bounce in the completion handler */
2814 acb->qiov = NULL;
2815 acb->bounce = NULL;
2816 acb->ret = 0;
2817
2818 if (!acb->bh) {
2819 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2820 }
2821
2822 qemu_bh_schedule(acb->bh);
2823 return &acb->common;
2824 }
2825
2826 void bdrv_init(void)
2827 {
2828 module_call_init(MODULE_INIT_BLOCK);
2829 }
2830
2831 void bdrv_init_with_whitelist(void)
2832 {
2833 use_bdrv_whitelist = 1;
2834 bdrv_init();
2835 }
2836
2837 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2838 BlockDriverCompletionFunc *cb, void *opaque)
2839 {
2840 BlockDriverAIOCB *acb;
2841
2842 if (pool->free_aiocb) {
2843 acb = pool->free_aiocb;
2844 pool->free_aiocb = acb->next;
2845 } else {
2846 acb = g_malloc0(pool->aiocb_size);
2847 acb->pool = pool;
2848 }
2849 acb->bs = bs;
2850 acb->cb = cb;
2851 acb->opaque = opaque;
2852 return acb;
2853 }
2854
2855 void qemu_aio_release(void *p)
2856 {
2857 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2858 AIOPool *pool = acb->pool;
2859 acb->next = pool->free_aiocb;
2860 pool->free_aiocb = acb;
2861 }
2862
2863 /**************************************************************/
2864 /* Coroutine block device emulation */
2865
2866 typedef struct CoroutineIOCompletion {
2867 Coroutine *coroutine;
2868 int ret;
2869 } CoroutineIOCompletion;
2870
2871 static void bdrv_co_io_em_complete(void *opaque, int ret)
2872 {
2873 CoroutineIOCompletion *co = opaque;
2874
2875 co->ret = ret;
2876 qemu_coroutine_enter(co->coroutine, NULL);
2877 }
2878
2879 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2880 int nb_sectors, QEMUIOVector *iov,
2881 bool is_write)
2882 {
2883 CoroutineIOCompletion co = {
2884 .coroutine = qemu_coroutine_self(),
2885 };
2886 BlockDriverAIOCB *acb;
2887
2888 if (is_write) {
2889 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2890 bdrv_co_io_em_complete, &co);
2891 } else {
2892 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2893 bdrv_co_io_em_complete, &co);
2894 }
2895
2896 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
2897 if (!acb) {
2898 return -EIO;
2899 }
2900 qemu_coroutine_yield();
2901
2902 return co.ret;
2903 }
2904
2905 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2906 int64_t sector_num, int nb_sectors,
2907 QEMUIOVector *iov)
2908 {
2909 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2910 }
2911
2912 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2913 int64_t sector_num, int nb_sectors,
2914 QEMUIOVector *iov)
2915 {
2916 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2917 }
2918
2919 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
2920 {
2921 CoroutineIOCompletion co = {
2922 .coroutine = qemu_coroutine_self(),
2923 };
2924 BlockDriverAIOCB *acb;
2925
2926 acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2927 if (!acb) {
2928 return -EIO;
2929 }
2930 qemu_coroutine_yield();
2931 return co.ret;
2932 }
2933
2934 /**************************************************************/
2935 /* removable device support */
2936
2937 /**
2938 * Return TRUE if the media is present
2939 */
2940 int bdrv_is_inserted(BlockDriverState *bs)
2941 {
2942 BlockDriver *drv = bs->drv;
2943
2944 if (!drv)
2945 return 0;
2946 if (!drv->bdrv_is_inserted)
2947 return 1;
2948 return drv->bdrv_is_inserted(bs);
2949 }
2950
2951 /**
2952 * Return whether the media changed since the last call to this
2953 * function, or -ENOTSUP if we don't know. Most drivers don't know.
2954 */
2955 int bdrv_media_changed(BlockDriverState *bs)
2956 {
2957 BlockDriver *drv = bs->drv;
2958
2959 if (drv && drv->bdrv_media_changed) {
2960 return drv->bdrv_media_changed(bs);
2961 }
2962 return -ENOTSUP;
2963 }
2964
2965 /**
2966 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2967 */
2968 void bdrv_eject(BlockDriverState *bs, int eject_flag)
2969 {
2970 BlockDriver *drv = bs->drv;
2971
2972 if (drv && drv->bdrv_eject) {
2973 drv->bdrv_eject(bs, eject_flag);
2974 }
2975 }
2976
2977 /**
2978 * Lock or unlock the media (if it is locked, the user won't be able
2979 * to eject it manually).
2980 */
2981 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
2982 {
2983 BlockDriver *drv = bs->drv;
2984
2985 trace_bdrv_lock_medium(bs, locked);
2986
2987 if (drv && drv->bdrv_lock_medium) {
2988 drv->bdrv_lock_medium(bs, locked);
2989 }
2990 }
2991
2992 /* needed for generic scsi interface */
2993
2994 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
2995 {
2996 BlockDriver *drv = bs->drv;
2997
2998 if (drv && drv->bdrv_ioctl)
2999 return drv->bdrv_ioctl(bs, req, buf);
3000 return -ENOTSUP;
3001 }
3002
3003 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3004 unsigned long int req, void *buf,
3005 BlockDriverCompletionFunc *cb, void *opaque)
3006 {
3007 BlockDriver *drv = bs->drv;
3008
3009 if (drv && drv->bdrv_aio_ioctl)
3010 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3011 return NULL;
3012 }
3013
3014 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3015 {
3016 bs->buffer_alignment = align;
3017 }
3018
3019 void *qemu_blockalign(BlockDriverState *bs, size_t size)
3020 {
3021 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3022 }
3023
3024 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3025 {
3026 int64_t bitmap_size;
3027
3028 bs->dirty_count = 0;
3029 if (enable) {
3030 if (!bs->dirty_bitmap) {
3031 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3032 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3033 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3034
3035 bs->dirty_bitmap = g_malloc0(bitmap_size);
3036 }
3037 } else {
3038 if (bs->dirty_bitmap) {
3039 g_free(bs->dirty_bitmap);
3040 bs->dirty_bitmap = NULL;
3041 }
3042 }
3043 }
3044
3045 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3046 {
3047 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3048
3049 if (bs->dirty_bitmap &&
3050 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3051 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3052 (1UL << (chunk % (sizeof(unsigned long) * 8))));
3053 } else {
3054 return 0;
3055 }
3056 }
3057
3058 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3059 int nr_sectors)
3060 {
3061 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3062 }
3063
3064 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3065 {
3066 return bs->dirty_count;
3067 }
3068
3069 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3070 {
3071 assert(bs->in_use != in_use);
3072 bs->in_use = in_use;
3073 }
3074
3075 int bdrv_in_use(BlockDriverState *bs)
3076 {
3077 return bs->in_use;
3078 }
3079
3080 void bdrv_iostatus_enable(BlockDriverState *bs)
3081 {
3082 bs->iostatus = BDRV_IOS_OK;
3083 }
3084
3085 /* The I/O status is only enabled if the drive explicitly
3086 * enables it _and_ the VM is configured to stop on errors */
3087 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3088 {
3089 return (bs->iostatus != BDRV_IOS_INVAL &&
3090 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3091 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3092 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3093 }
3094
3095 void bdrv_iostatus_disable(BlockDriverState *bs)
3096 {
3097 bs->iostatus = BDRV_IOS_INVAL;
3098 }
3099
3100 void bdrv_iostatus_reset(BlockDriverState *bs)
3101 {
3102 if (bdrv_iostatus_is_enabled(bs)) {
3103 bs->iostatus = BDRV_IOS_OK;
3104 }
3105 }
3106
3107 /* XXX: Today this is set by device models because it makes the implementation
3108 quite simple. However, the block layer knows about the error, so it's
3109 possible to implement this without device models being involved */
3110 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3111 {
3112 if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
3113 assert(error >= 0);
3114 bs->iostatus = error == ENOSPC ? BDRV_IOS_ENOSPC : BDRV_IOS_FAILED;
3115 }
3116 }
3117
3118 void
3119 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3120 enum BlockAcctType type)
3121 {
3122 assert(type < BDRV_MAX_IOTYPE);
3123
3124 cookie->bytes = bytes;
3125 cookie->start_time_ns = get_clock();
3126 cookie->type = type;
3127 }
3128
3129 void
3130 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3131 {
3132 assert(cookie->type < BDRV_MAX_IOTYPE);
3133
3134 bs->nr_bytes[cookie->type] += cookie->bytes;
3135 bs->nr_ops[cookie->type]++;
3136 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3137 }
3138
3139 int bdrv_img_create(const char *filename, const char *fmt,
3140 const char *base_filename, const char *base_fmt,
3141 char *options, uint64_t img_size, int flags)
3142 {
3143 QEMUOptionParameter *param = NULL, *create_options = NULL;
3144 QEMUOptionParameter *backing_fmt, *backing_file, *size;
3145 BlockDriverState *bs = NULL;
3146 BlockDriver *drv, *proto_drv;
3147 BlockDriver *backing_drv = NULL;
3148 int ret = 0;
3149
3150 /* Find driver and parse its options */
3151 drv = bdrv_find_format(fmt);
3152 if (!drv) {
3153 error_report("Unknown file format '%s'", fmt);
3154 ret = -EINVAL;
3155 goto out;
3156 }
3157
3158 proto_drv = bdrv_find_protocol(filename);
3159 if (!proto_drv) {
3160 error_report("Unknown protocol '%s'", filename);
3161 ret = -EINVAL;
3162 goto out;
3163 }
3164
3165 create_options = append_option_parameters(create_options,
3166 drv->create_options);
3167 create_options = append_option_parameters(create_options,
3168 proto_drv->create_options);
3169
3170 /* Create parameter list with default values */
3171 param = parse_option_parameters("", create_options, param);
3172
3173 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3174
3175 /* Parse -o options */
3176 if (options) {
3177 param = parse_option_parameters(options, create_options, param);
3178 if (param == NULL) {
3179 error_report("Invalid options for file format '%s'.", fmt);
3180 ret = -EINVAL;
3181 goto out;
3182 }
3183 }
3184
3185 if (base_filename) {
3186 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3187 base_filename)) {
3188 error_report("Backing file not supported for file format '%s'",
3189 fmt);
3190 ret = -EINVAL;
3191 goto out;
3192 }
3193 }
3194
3195 if (base_fmt) {
3196 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3197 error_report("Backing file format not supported for file "
3198 "format '%s'", fmt);
3199 ret = -EINVAL;
3200 goto out;
3201 }
3202 }
3203
3204 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3205 if (backing_file && backing_file->value.s) {
3206 if (!strcmp(filename, backing_file->value.s)) {
3207 error_report("Error: Trying to create an image with the "
3208 "same filename as the backing file");
3209 ret = -EINVAL;
3210 goto out;
3211 }
3212 }
3213
3214 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3215 if (backing_fmt && backing_fmt->value.s) {
3216 backing_drv = bdrv_find_format(backing_fmt->value.s);
3217 if (!backing_drv) {
3218 error_report("Unknown backing file format '%s'",
3219 backing_fmt->value.s);
3220 ret = -EINVAL;
3221 goto out;
3222 }
3223 }
3224
3225 // The size for the image must always be specified, with one exception:
3226 // If we are using a backing file, we can obtain the size from there
3227 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3228 if (size && size->value.n == -1) {
3229 if (backing_file && backing_file->value.s) {
3230 uint64_t size;
3231 char buf[32];
3232
3233 bs = bdrv_new("");
3234
3235 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3236 if (ret < 0) {
3237 error_report("Could not open '%s'", backing_file->value.s);
3238 goto out;
3239 }
3240 bdrv_get_geometry(bs, &size);
3241 size *= 512;
3242
3243 snprintf(buf, sizeof(buf), "%" PRId64, size);
3244 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3245 } else {
3246 error_report("Image creation needs a size parameter");
3247 ret = -EINVAL;
3248 goto out;
3249 }
3250 }
3251
3252 printf("Formatting '%s', fmt=%s ", filename, fmt);
3253 print_option_parameters(param);
3254 puts("");
3255
3256 ret = bdrv_create(drv, filename, param);
3257
3258 if (ret < 0) {
3259 if (ret == -ENOTSUP) {
3260 error_report("Formatting or formatting option not supported for "
3261 "file format '%s'", fmt);
3262 } else if (ret == -EFBIG) {
3263 error_report("The image size is too large for file format '%s'",
3264 fmt);
3265 } else {
3266 error_report("%s: error while creating %s: %s", filename, fmt,
3267 strerror(-ret));
3268 }
3269 }
3270
3271 out:
3272 free_option_parameters(create_options);
3273 free_option_parameters(param);
3274
3275 if (bs) {
3276 bdrv_delete(bs);
3277 }
3278
3279 return ret;
3280 }