]> git.proxmox.com Git - qemu.git/blob - block.c
block: drop .bdrv_read()/.bdrv_write() emulation
[qemu.git] / block.c
1 /*
2 * QEMU System Emulator block driver
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "monitor.h"
28 #include "block_int.h"
29 #include "module.h"
30 #include "qemu-objects.h"
31 #include "qemu-coroutine.h"
32
33 #ifdef CONFIG_BSD
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <sys/ioctl.h>
37 #include <sys/queue.h>
38 #ifndef __DragonFly__
39 #include <sys/disk.h>
40 #endif
41 #endif
42
43 #ifdef _WIN32
44 #include <windows.h>
45 #endif
46
47 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
48
49 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
50 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
51 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
52 BlockDriverCompletionFunc *cb, void *opaque);
53 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
54 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
55 BlockDriverCompletionFunc *cb, void *opaque);
56 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
57 BlockDriverCompletionFunc *cb, void *opaque);
58 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
59 BlockDriverCompletionFunc *cb, void *opaque);
60 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
61 int64_t sector_num, int nb_sectors,
62 QEMUIOVector *iov);
63 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors,
65 QEMUIOVector *iov);
66 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
67 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
68 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
69 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
70 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
71 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
72 int64_t sector_num,
73 QEMUIOVector *qiov,
74 int nb_sectors,
75 BlockDriverCompletionFunc *cb,
76 void *opaque,
77 bool is_write);
78 static void coroutine_fn bdrv_co_do_rw(void *opaque);
79
80 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
81 QTAILQ_HEAD_INITIALIZER(bdrv_states);
82
83 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
84 QLIST_HEAD_INITIALIZER(bdrv_drivers);
85
86 /* The device to use for VM snapshots */
87 static BlockDriverState *bs_snapshots;
88
89 /* If non-zero, use only whitelisted block drivers */
90 static int use_bdrv_whitelist;
91
92 #ifdef _WIN32
93 static int is_windows_drive_prefix(const char *filename)
94 {
95 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
96 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
97 filename[1] == ':');
98 }
99
100 int is_windows_drive(const char *filename)
101 {
102 if (is_windows_drive_prefix(filename) &&
103 filename[2] == '\0')
104 return 1;
105 if (strstart(filename, "\\\\.\\", NULL) ||
106 strstart(filename, "//./", NULL))
107 return 1;
108 return 0;
109 }
110 #endif
111
112 /* check if the path starts with "<protocol>:" */
113 static int path_has_protocol(const char *path)
114 {
115 #ifdef _WIN32
116 if (is_windows_drive(path) ||
117 is_windows_drive_prefix(path)) {
118 return 0;
119 }
120 #endif
121
122 return strchr(path, ':') != NULL;
123 }
124
125 int path_is_absolute(const char *path)
126 {
127 const char *p;
128 #ifdef _WIN32
129 /* specific case for names like: "\\.\d:" */
130 if (*path == '/' || *path == '\\')
131 return 1;
132 #endif
133 p = strchr(path, ':');
134 if (p)
135 p++;
136 else
137 p = path;
138 #ifdef _WIN32
139 return (*p == '/' || *p == '\\');
140 #else
141 return (*p == '/');
142 #endif
143 }
144
145 /* if filename is absolute, just copy it to dest. Otherwise, build a
146 path to it by considering it is relative to base_path. URL are
147 supported. */
148 void path_combine(char *dest, int dest_size,
149 const char *base_path,
150 const char *filename)
151 {
152 const char *p, *p1;
153 int len;
154
155 if (dest_size <= 0)
156 return;
157 if (path_is_absolute(filename)) {
158 pstrcpy(dest, dest_size, filename);
159 } else {
160 p = strchr(base_path, ':');
161 if (p)
162 p++;
163 else
164 p = base_path;
165 p1 = strrchr(base_path, '/');
166 #ifdef _WIN32
167 {
168 const char *p2;
169 p2 = strrchr(base_path, '\\');
170 if (!p1 || p2 > p1)
171 p1 = p2;
172 }
173 #endif
174 if (p1)
175 p1++;
176 else
177 p1 = base_path;
178 if (p1 > p)
179 p = p1;
180 len = p - base_path;
181 if (len > dest_size - 1)
182 len = dest_size - 1;
183 memcpy(dest, base_path, len);
184 dest[len] = '\0';
185 pstrcat(dest, dest_size, filename);
186 }
187 }
188
189 void bdrv_register(BlockDriver *bdrv)
190 {
191 /* Block drivers without coroutine functions need emulation */
192 if (!bdrv->bdrv_co_readv) {
193 bdrv->bdrv_co_readv = bdrv_co_readv_em;
194 bdrv->bdrv_co_writev = bdrv_co_writev_em;
195
196 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
197 * the block driver lacks aio we need to emulate that too.
198 */
199 if (!bdrv->bdrv_aio_readv) {
200 /* add AIO emulation layer */
201 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
202 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
203 }
204 }
205
206 if (!bdrv->bdrv_aio_flush)
207 bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
208
209 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
210 }
211
212 /* create a new block device (by default it is empty) */
213 BlockDriverState *bdrv_new(const char *device_name)
214 {
215 BlockDriverState *bs;
216
217 bs = g_malloc0(sizeof(BlockDriverState));
218 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
219 if (device_name[0] != '\0') {
220 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
221 }
222 bdrv_iostatus_disable(bs);
223 return bs;
224 }
225
226 BlockDriver *bdrv_find_format(const char *format_name)
227 {
228 BlockDriver *drv1;
229 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
230 if (!strcmp(drv1->format_name, format_name)) {
231 return drv1;
232 }
233 }
234 return NULL;
235 }
236
237 static int bdrv_is_whitelisted(BlockDriver *drv)
238 {
239 static const char *whitelist[] = {
240 CONFIG_BDRV_WHITELIST
241 };
242 const char **p;
243
244 if (!whitelist[0])
245 return 1; /* no whitelist, anything goes */
246
247 for (p = whitelist; *p; p++) {
248 if (!strcmp(drv->format_name, *p)) {
249 return 1;
250 }
251 }
252 return 0;
253 }
254
255 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
256 {
257 BlockDriver *drv = bdrv_find_format(format_name);
258 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
259 }
260
261 int bdrv_create(BlockDriver *drv, const char* filename,
262 QEMUOptionParameter *options)
263 {
264 if (!drv->bdrv_create)
265 return -ENOTSUP;
266
267 return drv->bdrv_create(filename, options);
268 }
269
270 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
271 {
272 BlockDriver *drv;
273
274 drv = bdrv_find_protocol(filename);
275 if (drv == NULL) {
276 return -ENOENT;
277 }
278
279 return bdrv_create(drv, filename, options);
280 }
281
282 #ifdef _WIN32
283 void get_tmp_filename(char *filename, int size)
284 {
285 char temp_dir[MAX_PATH];
286
287 GetTempPath(MAX_PATH, temp_dir);
288 GetTempFileName(temp_dir, "qem", 0, filename);
289 }
290 #else
291 void get_tmp_filename(char *filename, int size)
292 {
293 int fd;
294 const char *tmpdir;
295 /* XXX: race condition possible */
296 tmpdir = getenv("TMPDIR");
297 if (!tmpdir)
298 tmpdir = "/tmp";
299 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
300 fd = mkstemp(filename);
301 close(fd);
302 }
303 #endif
304
305 /*
306 * Detect host devices. By convention, /dev/cdrom[N] is always
307 * recognized as a host CDROM.
308 */
309 static BlockDriver *find_hdev_driver(const char *filename)
310 {
311 int score_max = 0, score;
312 BlockDriver *drv = NULL, *d;
313
314 QLIST_FOREACH(d, &bdrv_drivers, list) {
315 if (d->bdrv_probe_device) {
316 score = d->bdrv_probe_device(filename);
317 if (score > score_max) {
318 score_max = score;
319 drv = d;
320 }
321 }
322 }
323
324 return drv;
325 }
326
327 BlockDriver *bdrv_find_protocol(const char *filename)
328 {
329 BlockDriver *drv1;
330 char protocol[128];
331 int len;
332 const char *p;
333
334 /* TODO Drivers without bdrv_file_open must be specified explicitly */
335
336 /*
337 * XXX(hch): we really should not let host device detection
338 * override an explicit protocol specification, but moving this
339 * later breaks access to device names with colons in them.
340 * Thanks to the brain-dead persistent naming schemes on udev-
341 * based Linux systems those actually are quite common.
342 */
343 drv1 = find_hdev_driver(filename);
344 if (drv1) {
345 return drv1;
346 }
347
348 if (!path_has_protocol(filename)) {
349 return bdrv_find_format("file");
350 }
351 p = strchr(filename, ':');
352 assert(p != NULL);
353 len = p - filename;
354 if (len > sizeof(protocol) - 1)
355 len = sizeof(protocol) - 1;
356 memcpy(protocol, filename, len);
357 protocol[len] = '\0';
358 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
359 if (drv1->protocol_name &&
360 !strcmp(drv1->protocol_name, protocol)) {
361 return drv1;
362 }
363 }
364 return NULL;
365 }
366
367 static int find_image_format(const char *filename, BlockDriver **pdrv)
368 {
369 int ret, score, score_max;
370 BlockDriver *drv1, *drv;
371 uint8_t buf[2048];
372 BlockDriverState *bs;
373
374 ret = bdrv_file_open(&bs, filename, 0);
375 if (ret < 0) {
376 *pdrv = NULL;
377 return ret;
378 }
379
380 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
381 if (bs->sg || !bdrv_is_inserted(bs)) {
382 bdrv_delete(bs);
383 drv = bdrv_find_format("raw");
384 if (!drv) {
385 ret = -ENOENT;
386 }
387 *pdrv = drv;
388 return ret;
389 }
390
391 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
392 bdrv_delete(bs);
393 if (ret < 0) {
394 *pdrv = NULL;
395 return ret;
396 }
397
398 score_max = 0;
399 drv = NULL;
400 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
401 if (drv1->bdrv_probe) {
402 score = drv1->bdrv_probe(buf, ret, filename);
403 if (score > score_max) {
404 score_max = score;
405 drv = drv1;
406 }
407 }
408 }
409 if (!drv) {
410 ret = -ENOENT;
411 }
412 *pdrv = drv;
413 return ret;
414 }
415
416 /**
417 * Set the current 'total_sectors' value
418 */
419 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
420 {
421 BlockDriver *drv = bs->drv;
422
423 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
424 if (bs->sg)
425 return 0;
426
427 /* query actual device if possible, otherwise just trust the hint */
428 if (drv->bdrv_getlength) {
429 int64_t length = drv->bdrv_getlength(bs);
430 if (length < 0) {
431 return length;
432 }
433 hint = length >> BDRV_SECTOR_BITS;
434 }
435
436 bs->total_sectors = hint;
437 return 0;
438 }
439
440 /**
441 * Set open flags for a given cache mode
442 *
443 * Return 0 on success, -1 if the cache mode was invalid.
444 */
445 int bdrv_parse_cache_flags(const char *mode, int *flags)
446 {
447 *flags &= ~BDRV_O_CACHE_MASK;
448
449 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
450 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
451 } else if (!strcmp(mode, "directsync")) {
452 *flags |= BDRV_O_NOCACHE;
453 } else if (!strcmp(mode, "writeback")) {
454 *flags |= BDRV_O_CACHE_WB;
455 } else if (!strcmp(mode, "unsafe")) {
456 *flags |= BDRV_O_CACHE_WB;
457 *flags |= BDRV_O_NO_FLUSH;
458 } else if (!strcmp(mode, "writethrough")) {
459 /* this is the default */
460 } else {
461 return -1;
462 }
463
464 return 0;
465 }
466
467 /*
468 * Common part for opening disk images and files
469 */
470 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
471 int flags, BlockDriver *drv)
472 {
473 int ret, open_flags;
474
475 assert(drv != NULL);
476
477 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
478
479 bs->file = NULL;
480 bs->total_sectors = 0;
481 bs->encrypted = 0;
482 bs->valid_key = 0;
483 bs->open_flags = flags;
484 bs->buffer_alignment = 512;
485
486 pstrcpy(bs->filename, sizeof(bs->filename), filename);
487
488 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
489 return -ENOTSUP;
490 }
491
492 bs->drv = drv;
493 bs->opaque = g_malloc0(drv->instance_size);
494
495 if (flags & BDRV_O_CACHE_WB)
496 bs->enable_write_cache = 1;
497
498 /*
499 * Clear flags that are internal to the block layer before opening the
500 * image.
501 */
502 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
503
504 /*
505 * Snapshots should be writable.
506 */
507 if (bs->is_temporary) {
508 open_flags |= BDRV_O_RDWR;
509 }
510
511 /* Open the image, either directly or using a protocol */
512 if (drv->bdrv_file_open) {
513 ret = drv->bdrv_file_open(bs, filename, open_flags);
514 } else {
515 ret = bdrv_file_open(&bs->file, filename, open_flags);
516 if (ret >= 0) {
517 ret = drv->bdrv_open(bs, open_flags);
518 }
519 }
520
521 if (ret < 0) {
522 goto free_and_fail;
523 }
524
525 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
526
527 ret = refresh_total_sectors(bs, bs->total_sectors);
528 if (ret < 0) {
529 goto free_and_fail;
530 }
531
532 #ifndef _WIN32
533 if (bs->is_temporary) {
534 unlink(filename);
535 }
536 #endif
537 return 0;
538
539 free_and_fail:
540 if (bs->file) {
541 bdrv_delete(bs->file);
542 bs->file = NULL;
543 }
544 g_free(bs->opaque);
545 bs->opaque = NULL;
546 bs->drv = NULL;
547 return ret;
548 }
549
550 /*
551 * Opens a file using a protocol (file, host_device, nbd, ...)
552 */
553 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
554 {
555 BlockDriverState *bs;
556 BlockDriver *drv;
557 int ret;
558
559 drv = bdrv_find_protocol(filename);
560 if (!drv) {
561 return -ENOENT;
562 }
563
564 bs = bdrv_new("");
565 ret = bdrv_open_common(bs, filename, flags, drv);
566 if (ret < 0) {
567 bdrv_delete(bs);
568 return ret;
569 }
570 bs->growable = 1;
571 *pbs = bs;
572 return 0;
573 }
574
575 /*
576 * Opens a disk image (raw, qcow2, vmdk, ...)
577 */
578 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
579 BlockDriver *drv)
580 {
581 int ret;
582
583 if (flags & BDRV_O_SNAPSHOT) {
584 BlockDriverState *bs1;
585 int64_t total_size;
586 int is_protocol = 0;
587 BlockDriver *bdrv_qcow2;
588 QEMUOptionParameter *options;
589 char tmp_filename[PATH_MAX];
590 char backing_filename[PATH_MAX];
591
592 /* if snapshot, we create a temporary backing file and open it
593 instead of opening 'filename' directly */
594
595 /* if there is a backing file, use it */
596 bs1 = bdrv_new("");
597 ret = bdrv_open(bs1, filename, 0, drv);
598 if (ret < 0) {
599 bdrv_delete(bs1);
600 return ret;
601 }
602 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
603
604 if (bs1->drv && bs1->drv->protocol_name)
605 is_protocol = 1;
606
607 bdrv_delete(bs1);
608
609 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
610
611 /* Real path is meaningless for protocols */
612 if (is_protocol)
613 snprintf(backing_filename, sizeof(backing_filename),
614 "%s", filename);
615 else if (!realpath(filename, backing_filename))
616 return -errno;
617
618 bdrv_qcow2 = bdrv_find_format("qcow2");
619 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
620
621 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
622 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
623 if (drv) {
624 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
625 drv->format_name);
626 }
627
628 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
629 free_option_parameters(options);
630 if (ret < 0) {
631 return ret;
632 }
633
634 filename = tmp_filename;
635 drv = bdrv_qcow2;
636 bs->is_temporary = 1;
637 }
638
639 /* Find the right image format driver */
640 if (!drv) {
641 ret = find_image_format(filename, &drv);
642 }
643
644 if (!drv) {
645 goto unlink_and_fail;
646 }
647
648 /* Open the image */
649 ret = bdrv_open_common(bs, filename, flags, drv);
650 if (ret < 0) {
651 goto unlink_and_fail;
652 }
653
654 /* If there is a backing file, use it */
655 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
656 char backing_filename[PATH_MAX];
657 int back_flags;
658 BlockDriver *back_drv = NULL;
659
660 bs->backing_hd = bdrv_new("");
661
662 if (path_has_protocol(bs->backing_file)) {
663 pstrcpy(backing_filename, sizeof(backing_filename),
664 bs->backing_file);
665 } else {
666 path_combine(backing_filename, sizeof(backing_filename),
667 filename, bs->backing_file);
668 }
669
670 if (bs->backing_format[0] != '\0') {
671 back_drv = bdrv_find_format(bs->backing_format);
672 }
673
674 /* backing files always opened read-only */
675 back_flags =
676 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
677
678 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
679 if (ret < 0) {
680 bdrv_close(bs);
681 return ret;
682 }
683 if (bs->is_temporary) {
684 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
685 } else {
686 /* base image inherits from "parent" */
687 bs->backing_hd->keep_read_only = bs->keep_read_only;
688 }
689 }
690
691 if (!bdrv_key_required(bs)) {
692 bdrv_dev_change_media_cb(bs, true);
693 }
694
695 return 0;
696
697 unlink_and_fail:
698 if (bs->is_temporary) {
699 unlink(filename);
700 }
701 return ret;
702 }
703
704 void bdrv_close(BlockDriverState *bs)
705 {
706 if (bs->drv) {
707 if (bs == bs_snapshots) {
708 bs_snapshots = NULL;
709 }
710 if (bs->backing_hd) {
711 bdrv_delete(bs->backing_hd);
712 bs->backing_hd = NULL;
713 }
714 bs->drv->bdrv_close(bs);
715 g_free(bs->opaque);
716 #ifdef _WIN32
717 if (bs->is_temporary) {
718 unlink(bs->filename);
719 }
720 #endif
721 bs->opaque = NULL;
722 bs->drv = NULL;
723
724 if (bs->file != NULL) {
725 bdrv_close(bs->file);
726 }
727
728 bdrv_dev_change_media_cb(bs, false);
729 }
730 }
731
732 void bdrv_close_all(void)
733 {
734 BlockDriverState *bs;
735
736 QTAILQ_FOREACH(bs, &bdrv_states, list) {
737 bdrv_close(bs);
738 }
739 }
740
741 /* make a BlockDriverState anonymous by removing from bdrv_state list.
742 Also, NULL terminate the device_name to prevent double remove */
743 void bdrv_make_anon(BlockDriverState *bs)
744 {
745 if (bs->device_name[0] != '\0') {
746 QTAILQ_REMOVE(&bdrv_states, bs, list);
747 }
748 bs->device_name[0] = '\0';
749 }
750
751 void bdrv_delete(BlockDriverState *bs)
752 {
753 assert(!bs->dev);
754
755 /* remove from list, if necessary */
756 bdrv_make_anon(bs);
757
758 bdrv_close(bs);
759 if (bs->file != NULL) {
760 bdrv_delete(bs->file);
761 }
762
763 assert(bs != bs_snapshots);
764 g_free(bs);
765 }
766
767 int bdrv_attach_dev(BlockDriverState *bs, void *dev)
768 /* TODO change to DeviceState *dev when all users are qdevified */
769 {
770 if (bs->dev) {
771 return -EBUSY;
772 }
773 bs->dev = dev;
774 bdrv_iostatus_reset(bs);
775 return 0;
776 }
777
778 /* TODO qdevified devices don't use this, remove when devices are qdevified */
779 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
780 {
781 if (bdrv_attach_dev(bs, dev) < 0) {
782 abort();
783 }
784 }
785
786 void bdrv_detach_dev(BlockDriverState *bs, void *dev)
787 /* TODO change to DeviceState *dev when all users are qdevified */
788 {
789 assert(bs->dev == dev);
790 bs->dev = NULL;
791 bs->dev_ops = NULL;
792 bs->dev_opaque = NULL;
793 bs->buffer_alignment = 512;
794 }
795
796 /* TODO change to return DeviceState * when all users are qdevified */
797 void *bdrv_get_attached_dev(BlockDriverState *bs)
798 {
799 return bs->dev;
800 }
801
802 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
803 void *opaque)
804 {
805 bs->dev_ops = ops;
806 bs->dev_opaque = opaque;
807 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
808 bs_snapshots = NULL;
809 }
810 }
811
812 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
813 {
814 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
815 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
816 }
817 }
818
819 bool bdrv_dev_has_removable_media(BlockDriverState *bs)
820 {
821 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
822 }
823
824 bool bdrv_dev_is_tray_open(BlockDriverState *bs)
825 {
826 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
827 return bs->dev_ops->is_tray_open(bs->dev_opaque);
828 }
829 return false;
830 }
831
832 static void bdrv_dev_resize_cb(BlockDriverState *bs)
833 {
834 if (bs->dev_ops && bs->dev_ops->resize_cb) {
835 bs->dev_ops->resize_cb(bs->dev_opaque);
836 }
837 }
838
839 bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
840 {
841 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
842 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
843 }
844 return false;
845 }
846
847 /*
848 * Run consistency checks on an image
849 *
850 * Returns 0 if the check could be completed (it doesn't mean that the image is
851 * free of errors) or -errno when an internal error occurred. The results of the
852 * check are stored in res.
853 */
854 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
855 {
856 if (bs->drv->bdrv_check == NULL) {
857 return -ENOTSUP;
858 }
859
860 memset(res, 0, sizeof(*res));
861 return bs->drv->bdrv_check(bs, res);
862 }
863
864 #define COMMIT_BUF_SECTORS 2048
865
866 /* commit COW file into the raw image */
867 int bdrv_commit(BlockDriverState *bs)
868 {
869 BlockDriver *drv = bs->drv;
870 BlockDriver *backing_drv;
871 int64_t sector, total_sectors;
872 int n, ro, open_flags;
873 int ret = 0, rw_ret = 0;
874 uint8_t *buf;
875 char filename[1024];
876 BlockDriverState *bs_rw, *bs_ro;
877
878 if (!drv)
879 return -ENOMEDIUM;
880
881 if (!bs->backing_hd) {
882 return -ENOTSUP;
883 }
884
885 if (bs->backing_hd->keep_read_only) {
886 return -EACCES;
887 }
888
889 backing_drv = bs->backing_hd->drv;
890 ro = bs->backing_hd->read_only;
891 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
892 open_flags = bs->backing_hd->open_flags;
893
894 if (ro) {
895 /* re-open as RW */
896 bdrv_delete(bs->backing_hd);
897 bs->backing_hd = NULL;
898 bs_rw = bdrv_new("");
899 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
900 backing_drv);
901 if (rw_ret < 0) {
902 bdrv_delete(bs_rw);
903 /* try to re-open read-only */
904 bs_ro = bdrv_new("");
905 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
906 backing_drv);
907 if (ret < 0) {
908 bdrv_delete(bs_ro);
909 /* drive not functional anymore */
910 bs->drv = NULL;
911 return ret;
912 }
913 bs->backing_hd = bs_ro;
914 return rw_ret;
915 }
916 bs->backing_hd = bs_rw;
917 }
918
919 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
920 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
921
922 for (sector = 0; sector < total_sectors; sector += n) {
923 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
924
925 if (bdrv_read(bs, sector, buf, n) != 0) {
926 ret = -EIO;
927 goto ro_cleanup;
928 }
929
930 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
931 ret = -EIO;
932 goto ro_cleanup;
933 }
934 }
935 }
936
937 if (drv->bdrv_make_empty) {
938 ret = drv->bdrv_make_empty(bs);
939 bdrv_flush(bs);
940 }
941
942 /*
943 * Make sure all data we wrote to the backing device is actually
944 * stable on disk.
945 */
946 if (bs->backing_hd)
947 bdrv_flush(bs->backing_hd);
948
949 ro_cleanup:
950 g_free(buf);
951
952 if (ro) {
953 /* re-open as RO */
954 bdrv_delete(bs->backing_hd);
955 bs->backing_hd = NULL;
956 bs_ro = bdrv_new("");
957 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
958 backing_drv);
959 if (ret < 0) {
960 bdrv_delete(bs_ro);
961 /* drive not functional anymore */
962 bs->drv = NULL;
963 return ret;
964 }
965 bs->backing_hd = bs_ro;
966 bs->backing_hd->keep_read_only = 0;
967 }
968
969 return ret;
970 }
971
972 void bdrv_commit_all(void)
973 {
974 BlockDriverState *bs;
975
976 QTAILQ_FOREACH(bs, &bdrv_states, list) {
977 bdrv_commit(bs);
978 }
979 }
980
981 /*
982 * Return values:
983 * 0 - success
984 * -EINVAL - backing format specified, but no file
985 * -ENOSPC - can't update the backing file because no space is left in the
986 * image file header
987 * -ENOTSUP - format driver doesn't support changing the backing file
988 */
989 int bdrv_change_backing_file(BlockDriverState *bs,
990 const char *backing_file, const char *backing_fmt)
991 {
992 BlockDriver *drv = bs->drv;
993
994 if (drv->bdrv_change_backing_file != NULL) {
995 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
996 } else {
997 return -ENOTSUP;
998 }
999 }
1000
1001 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1002 size_t size)
1003 {
1004 int64_t len;
1005
1006 if (!bdrv_is_inserted(bs))
1007 return -ENOMEDIUM;
1008
1009 if (bs->growable)
1010 return 0;
1011
1012 len = bdrv_getlength(bs);
1013
1014 if (offset < 0)
1015 return -EIO;
1016
1017 if ((offset > len) || (len - offset < size))
1018 return -EIO;
1019
1020 return 0;
1021 }
1022
1023 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1024 int nb_sectors)
1025 {
1026 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1027 nb_sectors * BDRV_SECTOR_SIZE);
1028 }
1029
1030 static inline bool bdrv_has_async_rw(BlockDriver *drv)
1031 {
1032 return drv->bdrv_co_readv != bdrv_co_readv_em
1033 || drv->bdrv_aio_readv != bdrv_aio_readv_em;
1034 }
1035
1036 static inline bool bdrv_has_async_flush(BlockDriver *drv)
1037 {
1038 return drv->bdrv_aio_flush != bdrv_aio_flush_em;
1039 }
1040
1041 typedef struct RwCo {
1042 BlockDriverState *bs;
1043 int64_t sector_num;
1044 int nb_sectors;
1045 QEMUIOVector *qiov;
1046 bool is_write;
1047 int ret;
1048 } RwCo;
1049
1050 static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1051 {
1052 RwCo *rwco = opaque;
1053
1054 if (!rwco->is_write) {
1055 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1056 rwco->nb_sectors, rwco->qiov);
1057 } else {
1058 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1059 rwco->nb_sectors, rwco->qiov);
1060 }
1061 }
1062
1063 /*
1064 * Process a synchronous request using coroutines
1065 */
1066 static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1067 int nb_sectors, bool is_write)
1068 {
1069 QEMUIOVector qiov;
1070 struct iovec iov = {
1071 .iov_base = (void *)buf,
1072 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1073 };
1074 Coroutine *co;
1075 RwCo rwco = {
1076 .bs = bs,
1077 .sector_num = sector_num,
1078 .nb_sectors = nb_sectors,
1079 .qiov = &qiov,
1080 .is_write = is_write,
1081 .ret = NOT_DONE,
1082 };
1083
1084 qemu_iovec_init_external(&qiov, &iov, 1);
1085
1086 if (qemu_in_coroutine()) {
1087 /* Fast-path if already in coroutine context */
1088 bdrv_rw_co_entry(&rwco);
1089 } else {
1090 co = qemu_coroutine_create(bdrv_rw_co_entry);
1091 qemu_coroutine_enter(co, &rwco);
1092 while (rwco.ret == NOT_DONE) {
1093 qemu_aio_wait();
1094 }
1095 }
1096 return rwco.ret;
1097 }
1098
1099 /* return < 0 if error. See bdrv_write() for the return codes */
1100 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1101 uint8_t *buf, int nb_sectors)
1102 {
1103 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
1104 }
1105
1106 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1107 int nb_sectors, int dirty)
1108 {
1109 int64_t start, end;
1110 unsigned long val, idx, bit;
1111
1112 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1113 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1114
1115 for (; start <= end; start++) {
1116 idx = start / (sizeof(unsigned long) * 8);
1117 bit = start % (sizeof(unsigned long) * 8);
1118 val = bs->dirty_bitmap[idx];
1119 if (dirty) {
1120 if (!(val & (1UL << bit))) {
1121 bs->dirty_count++;
1122 val |= 1UL << bit;
1123 }
1124 } else {
1125 if (val & (1UL << bit)) {
1126 bs->dirty_count--;
1127 val &= ~(1UL << bit);
1128 }
1129 }
1130 bs->dirty_bitmap[idx] = val;
1131 }
1132 }
1133
1134 /* Return < 0 if error. Important errors are:
1135 -EIO generic I/O error (may happen for all errors)
1136 -ENOMEDIUM No media inserted.
1137 -EINVAL Invalid sector number or nb_sectors
1138 -EACCES Trying to write a read-only device
1139 */
1140 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1141 const uint8_t *buf, int nb_sectors)
1142 {
1143 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
1144 }
1145
1146 int bdrv_pread(BlockDriverState *bs, int64_t offset,
1147 void *buf, int count1)
1148 {
1149 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1150 int len, nb_sectors, count;
1151 int64_t sector_num;
1152 int ret;
1153
1154 count = count1;
1155 /* first read to align to sector start */
1156 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1157 if (len > count)
1158 len = count;
1159 sector_num = offset >> BDRV_SECTOR_BITS;
1160 if (len > 0) {
1161 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1162 return ret;
1163 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1164 count -= len;
1165 if (count == 0)
1166 return count1;
1167 sector_num++;
1168 buf += len;
1169 }
1170
1171 /* read the sectors "in place" */
1172 nb_sectors = count >> BDRV_SECTOR_BITS;
1173 if (nb_sectors > 0) {
1174 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1175 return ret;
1176 sector_num += nb_sectors;
1177 len = nb_sectors << BDRV_SECTOR_BITS;
1178 buf += len;
1179 count -= len;
1180 }
1181
1182 /* add data from the last sector */
1183 if (count > 0) {
1184 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1185 return ret;
1186 memcpy(buf, tmp_buf, count);
1187 }
1188 return count1;
1189 }
1190
1191 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1192 const void *buf, int count1)
1193 {
1194 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1195 int len, nb_sectors, count;
1196 int64_t sector_num;
1197 int ret;
1198
1199 count = count1;
1200 /* first write to align to sector start */
1201 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1202 if (len > count)
1203 len = count;
1204 sector_num = offset >> BDRV_SECTOR_BITS;
1205 if (len > 0) {
1206 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1207 return ret;
1208 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1209 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1210 return ret;
1211 count -= len;
1212 if (count == 0)
1213 return count1;
1214 sector_num++;
1215 buf += len;
1216 }
1217
1218 /* write the sectors "in place" */
1219 nb_sectors = count >> BDRV_SECTOR_BITS;
1220 if (nb_sectors > 0) {
1221 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1222 return ret;
1223 sector_num += nb_sectors;
1224 len = nb_sectors << BDRV_SECTOR_BITS;
1225 buf += len;
1226 count -= len;
1227 }
1228
1229 /* add data from the last sector */
1230 if (count > 0) {
1231 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1232 return ret;
1233 memcpy(tmp_buf, buf, count);
1234 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1235 return ret;
1236 }
1237 return count1;
1238 }
1239
1240 /*
1241 * Writes to the file and ensures that no writes are reordered across this
1242 * request (acts as a barrier)
1243 *
1244 * Returns 0 on success, -errno in error cases.
1245 */
1246 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1247 const void *buf, int count)
1248 {
1249 int ret;
1250
1251 ret = bdrv_pwrite(bs, offset, buf, count);
1252 if (ret < 0) {
1253 return ret;
1254 }
1255
1256 /* No flush needed for cache modes that use O_DSYNC */
1257 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1258 bdrv_flush(bs);
1259 }
1260
1261 return 0;
1262 }
1263
1264 /*
1265 * Handle a read request in coroutine context
1266 */
1267 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1268 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1269 {
1270 BlockDriver *drv = bs->drv;
1271
1272 if (!drv) {
1273 return -ENOMEDIUM;
1274 }
1275 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1276 return -EIO;
1277 }
1278
1279 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1280 }
1281
1282 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1283 int nb_sectors, QEMUIOVector *qiov)
1284 {
1285 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1286
1287 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1288 }
1289
1290 /*
1291 * Handle a write request in coroutine context
1292 */
1293 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1294 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1295 {
1296 BlockDriver *drv = bs->drv;
1297 int ret;
1298
1299 if (!bs->drv) {
1300 return -ENOMEDIUM;
1301 }
1302 if (bs->read_only) {
1303 return -EACCES;
1304 }
1305 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1306 return -EIO;
1307 }
1308
1309 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1310
1311 if (bs->dirty_bitmap) {
1312 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1313 }
1314
1315 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1316 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1317 }
1318
1319 return ret;
1320 }
1321
1322 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1323 int nb_sectors, QEMUIOVector *qiov)
1324 {
1325 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1326
1327 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1328 }
1329
1330 /**
1331 * Truncate file to 'offset' bytes (needed only for file protocols)
1332 */
1333 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1334 {
1335 BlockDriver *drv = bs->drv;
1336 int ret;
1337 if (!drv)
1338 return -ENOMEDIUM;
1339 if (!drv->bdrv_truncate)
1340 return -ENOTSUP;
1341 if (bs->read_only)
1342 return -EACCES;
1343 if (bdrv_in_use(bs))
1344 return -EBUSY;
1345 ret = drv->bdrv_truncate(bs, offset);
1346 if (ret == 0) {
1347 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1348 bdrv_dev_resize_cb(bs);
1349 }
1350 return ret;
1351 }
1352
1353 /**
1354 * Length of a allocated file in bytes. Sparse files are counted by actual
1355 * allocated space. Return < 0 if error or unknown.
1356 */
1357 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1358 {
1359 BlockDriver *drv = bs->drv;
1360 if (!drv) {
1361 return -ENOMEDIUM;
1362 }
1363 if (drv->bdrv_get_allocated_file_size) {
1364 return drv->bdrv_get_allocated_file_size(bs);
1365 }
1366 if (bs->file) {
1367 return bdrv_get_allocated_file_size(bs->file);
1368 }
1369 return -ENOTSUP;
1370 }
1371
1372 /**
1373 * Length of a file in bytes. Return < 0 if error or unknown.
1374 */
1375 int64_t bdrv_getlength(BlockDriverState *bs)
1376 {
1377 BlockDriver *drv = bs->drv;
1378 if (!drv)
1379 return -ENOMEDIUM;
1380
1381 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
1382 if (drv->bdrv_getlength) {
1383 return drv->bdrv_getlength(bs);
1384 }
1385 }
1386 return bs->total_sectors * BDRV_SECTOR_SIZE;
1387 }
1388
1389 /* return 0 as number of sectors if no device present or error */
1390 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1391 {
1392 int64_t length;
1393 length = bdrv_getlength(bs);
1394 if (length < 0)
1395 length = 0;
1396 else
1397 length = length >> BDRV_SECTOR_BITS;
1398 *nb_sectors_ptr = length;
1399 }
1400
1401 struct partition {
1402 uint8_t boot_ind; /* 0x80 - active */
1403 uint8_t head; /* starting head */
1404 uint8_t sector; /* starting sector */
1405 uint8_t cyl; /* starting cylinder */
1406 uint8_t sys_ind; /* What partition type */
1407 uint8_t end_head; /* end head */
1408 uint8_t end_sector; /* end sector */
1409 uint8_t end_cyl; /* end cylinder */
1410 uint32_t start_sect; /* starting sector counting from 0 */
1411 uint32_t nr_sects; /* nr of sectors in partition */
1412 } QEMU_PACKED;
1413
1414 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1415 static int guess_disk_lchs(BlockDriverState *bs,
1416 int *pcylinders, int *pheads, int *psectors)
1417 {
1418 uint8_t buf[BDRV_SECTOR_SIZE];
1419 int ret, i, heads, sectors, cylinders;
1420 struct partition *p;
1421 uint32_t nr_sects;
1422 uint64_t nb_sectors;
1423
1424 bdrv_get_geometry(bs, &nb_sectors);
1425
1426 ret = bdrv_read(bs, 0, buf, 1);
1427 if (ret < 0)
1428 return -1;
1429 /* test msdos magic */
1430 if (buf[510] != 0x55 || buf[511] != 0xaa)
1431 return -1;
1432 for(i = 0; i < 4; i++) {
1433 p = ((struct partition *)(buf + 0x1be)) + i;
1434 nr_sects = le32_to_cpu(p->nr_sects);
1435 if (nr_sects && p->end_head) {
1436 /* We make the assumption that the partition terminates on
1437 a cylinder boundary */
1438 heads = p->end_head + 1;
1439 sectors = p->end_sector & 63;
1440 if (sectors == 0)
1441 continue;
1442 cylinders = nb_sectors / (heads * sectors);
1443 if (cylinders < 1 || cylinders > 16383)
1444 continue;
1445 *pheads = heads;
1446 *psectors = sectors;
1447 *pcylinders = cylinders;
1448 #if 0
1449 printf("guessed geometry: LCHS=%d %d %d\n",
1450 cylinders, heads, sectors);
1451 #endif
1452 return 0;
1453 }
1454 }
1455 return -1;
1456 }
1457
1458 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1459 {
1460 int translation, lba_detected = 0;
1461 int cylinders, heads, secs;
1462 uint64_t nb_sectors;
1463
1464 /* if a geometry hint is available, use it */
1465 bdrv_get_geometry(bs, &nb_sectors);
1466 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1467 translation = bdrv_get_translation_hint(bs);
1468 if (cylinders != 0) {
1469 *pcyls = cylinders;
1470 *pheads = heads;
1471 *psecs = secs;
1472 } else {
1473 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1474 if (heads > 16) {
1475 /* if heads > 16, it means that a BIOS LBA
1476 translation was active, so the default
1477 hardware geometry is OK */
1478 lba_detected = 1;
1479 goto default_geometry;
1480 } else {
1481 *pcyls = cylinders;
1482 *pheads = heads;
1483 *psecs = secs;
1484 /* disable any translation to be in sync with
1485 the logical geometry */
1486 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1487 bdrv_set_translation_hint(bs,
1488 BIOS_ATA_TRANSLATION_NONE);
1489 }
1490 }
1491 } else {
1492 default_geometry:
1493 /* if no geometry, use a standard physical disk geometry */
1494 cylinders = nb_sectors / (16 * 63);
1495
1496 if (cylinders > 16383)
1497 cylinders = 16383;
1498 else if (cylinders < 2)
1499 cylinders = 2;
1500 *pcyls = cylinders;
1501 *pheads = 16;
1502 *psecs = 63;
1503 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1504 if ((*pcyls * *pheads) <= 131072) {
1505 bdrv_set_translation_hint(bs,
1506 BIOS_ATA_TRANSLATION_LARGE);
1507 } else {
1508 bdrv_set_translation_hint(bs,
1509 BIOS_ATA_TRANSLATION_LBA);
1510 }
1511 }
1512 }
1513 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1514 }
1515 }
1516
1517 void bdrv_set_geometry_hint(BlockDriverState *bs,
1518 int cyls, int heads, int secs)
1519 {
1520 bs->cyls = cyls;
1521 bs->heads = heads;
1522 bs->secs = secs;
1523 }
1524
1525 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1526 {
1527 bs->translation = translation;
1528 }
1529
1530 void bdrv_get_geometry_hint(BlockDriverState *bs,
1531 int *pcyls, int *pheads, int *psecs)
1532 {
1533 *pcyls = bs->cyls;
1534 *pheads = bs->heads;
1535 *psecs = bs->secs;
1536 }
1537
1538 /* Recognize floppy formats */
1539 typedef struct FDFormat {
1540 FDriveType drive;
1541 uint8_t last_sect;
1542 uint8_t max_track;
1543 uint8_t max_head;
1544 } FDFormat;
1545
1546 static const FDFormat fd_formats[] = {
1547 /* First entry is default format */
1548 /* 1.44 MB 3"1/2 floppy disks */
1549 { FDRIVE_DRV_144, 18, 80, 1, },
1550 { FDRIVE_DRV_144, 20, 80, 1, },
1551 { FDRIVE_DRV_144, 21, 80, 1, },
1552 { FDRIVE_DRV_144, 21, 82, 1, },
1553 { FDRIVE_DRV_144, 21, 83, 1, },
1554 { FDRIVE_DRV_144, 22, 80, 1, },
1555 { FDRIVE_DRV_144, 23, 80, 1, },
1556 { FDRIVE_DRV_144, 24, 80, 1, },
1557 /* 2.88 MB 3"1/2 floppy disks */
1558 { FDRIVE_DRV_288, 36, 80, 1, },
1559 { FDRIVE_DRV_288, 39, 80, 1, },
1560 { FDRIVE_DRV_288, 40, 80, 1, },
1561 { FDRIVE_DRV_288, 44, 80, 1, },
1562 { FDRIVE_DRV_288, 48, 80, 1, },
1563 /* 720 kB 3"1/2 floppy disks */
1564 { FDRIVE_DRV_144, 9, 80, 1, },
1565 { FDRIVE_DRV_144, 10, 80, 1, },
1566 { FDRIVE_DRV_144, 10, 82, 1, },
1567 { FDRIVE_DRV_144, 10, 83, 1, },
1568 { FDRIVE_DRV_144, 13, 80, 1, },
1569 { FDRIVE_DRV_144, 14, 80, 1, },
1570 /* 1.2 MB 5"1/4 floppy disks */
1571 { FDRIVE_DRV_120, 15, 80, 1, },
1572 { FDRIVE_DRV_120, 18, 80, 1, },
1573 { FDRIVE_DRV_120, 18, 82, 1, },
1574 { FDRIVE_DRV_120, 18, 83, 1, },
1575 { FDRIVE_DRV_120, 20, 80, 1, },
1576 /* 720 kB 5"1/4 floppy disks */
1577 { FDRIVE_DRV_120, 9, 80, 1, },
1578 { FDRIVE_DRV_120, 11, 80, 1, },
1579 /* 360 kB 5"1/4 floppy disks */
1580 { FDRIVE_DRV_120, 9, 40, 1, },
1581 { FDRIVE_DRV_120, 9, 40, 0, },
1582 { FDRIVE_DRV_120, 10, 41, 1, },
1583 { FDRIVE_DRV_120, 10, 42, 1, },
1584 /* 320 kB 5"1/4 floppy disks */
1585 { FDRIVE_DRV_120, 8, 40, 1, },
1586 { FDRIVE_DRV_120, 8, 40, 0, },
1587 /* 360 kB must match 5"1/4 better than 3"1/2... */
1588 { FDRIVE_DRV_144, 9, 80, 0, },
1589 /* end */
1590 { FDRIVE_DRV_NONE, -1, -1, 0, },
1591 };
1592
1593 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1594 int *max_track, int *last_sect,
1595 FDriveType drive_in, FDriveType *drive)
1596 {
1597 const FDFormat *parse;
1598 uint64_t nb_sectors, size;
1599 int i, first_match, match;
1600
1601 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1602 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1603 /* User defined disk */
1604 } else {
1605 bdrv_get_geometry(bs, &nb_sectors);
1606 match = -1;
1607 first_match = -1;
1608 for (i = 0; ; i++) {
1609 parse = &fd_formats[i];
1610 if (parse->drive == FDRIVE_DRV_NONE) {
1611 break;
1612 }
1613 if (drive_in == parse->drive ||
1614 drive_in == FDRIVE_DRV_NONE) {
1615 size = (parse->max_head + 1) * parse->max_track *
1616 parse->last_sect;
1617 if (nb_sectors == size) {
1618 match = i;
1619 break;
1620 }
1621 if (first_match == -1) {
1622 first_match = i;
1623 }
1624 }
1625 }
1626 if (match == -1) {
1627 if (first_match == -1) {
1628 match = 1;
1629 } else {
1630 match = first_match;
1631 }
1632 parse = &fd_formats[match];
1633 }
1634 *nb_heads = parse->max_head + 1;
1635 *max_track = parse->max_track;
1636 *last_sect = parse->last_sect;
1637 *drive = parse->drive;
1638 }
1639 }
1640
1641 int bdrv_get_translation_hint(BlockDriverState *bs)
1642 {
1643 return bs->translation;
1644 }
1645
1646 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1647 BlockErrorAction on_write_error)
1648 {
1649 bs->on_read_error = on_read_error;
1650 bs->on_write_error = on_write_error;
1651 }
1652
1653 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1654 {
1655 return is_read ? bs->on_read_error : bs->on_write_error;
1656 }
1657
1658 int bdrv_is_read_only(BlockDriverState *bs)
1659 {
1660 return bs->read_only;
1661 }
1662
1663 int bdrv_is_sg(BlockDriverState *bs)
1664 {
1665 return bs->sg;
1666 }
1667
1668 int bdrv_enable_write_cache(BlockDriverState *bs)
1669 {
1670 return bs->enable_write_cache;
1671 }
1672
1673 int bdrv_is_encrypted(BlockDriverState *bs)
1674 {
1675 if (bs->backing_hd && bs->backing_hd->encrypted)
1676 return 1;
1677 return bs->encrypted;
1678 }
1679
1680 int bdrv_key_required(BlockDriverState *bs)
1681 {
1682 BlockDriverState *backing_hd = bs->backing_hd;
1683
1684 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1685 return 1;
1686 return (bs->encrypted && !bs->valid_key);
1687 }
1688
1689 int bdrv_set_key(BlockDriverState *bs, const char *key)
1690 {
1691 int ret;
1692 if (bs->backing_hd && bs->backing_hd->encrypted) {
1693 ret = bdrv_set_key(bs->backing_hd, key);
1694 if (ret < 0)
1695 return ret;
1696 if (!bs->encrypted)
1697 return 0;
1698 }
1699 if (!bs->encrypted) {
1700 return -EINVAL;
1701 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1702 return -ENOMEDIUM;
1703 }
1704 ret = bs->drv->bdrv_set_key(bs, key);
1705 if (ret < 0) {
1706 bs->valid_key = 0;
1707 } else if (!bs->valid_key) {
1708 bs->valid_key = 1;
1709 /* call the change callback now, we skipped it on open */
1710 bdrv_dev_change_media_cb(bs, true);
1711 }
1712 return ret;
1713 }
1714
1715 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1716 {
1717 if (!bs->drv) {
1718 buf[0] = '\0';
1719 } else {
1720 pstrcpy(buf, buf_size, bs->drv->format_name);
1721 }
1722 }
1723
1724 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1725 void *opaque)
1726 {
1727 BlockDriver *drv;
1728
1729 QLIST_FOREACH(drv, &bdrv_drivers, list) {
1730 it(opaque, drv->format_name);
1731 }
1732 }
1733
1734 BlockDriverState *bdrv_find(const char *name)
1735 {
1736 BlockDriverState *bs;
1737
1738 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1739 if (!strcmp(name, bs->device_name)) {
1740 return bs;
1741 }
1742 }
1743 return NULL;
1744 }
1745
1746 BlockDriverState *bdrv_next(BlockDriverState *bs)
1747 {
1748 if (!bs) {
1749 return QTAILQ_FIRST(&bdrv_states);
1750 }
1751 return QTAILQ_NEXT(bs, list);
1752 }
1753
1754 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1755 {
1756 BlockDriverState *bs;
1757
1758 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1759 it(opaque, bs);
1760 }
1761 }
1762
1763 const char *bdrv_get_device_name(BlockDriverState *bs)
1764 {
1765 return bs->device_name;
1766 }
1767
1768 int bdrv_flush(BlockDriverState *bs)
1769 {
1770 if (bs->open_flags & BDRV_O_NO_FLUSH) {
1771 return 0;
1772 }
1773
1774 if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
1775 return bdrv_co_flush_em(bs);
1776 }
1777
1778 if (bs->drv && bs->drv->bdrv_flush) {
1779 return bs->drv->bdrv_flush(bs);
1780 }
1781
1782 /*
1783 * Some block drivers always operate in either writethrough or unsafe mode
1784 * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1785 * the server works (because the behaviour is hardcoded or depends on
1786 * server-side configuration), so we can't ensure that everything is safe
1787 * on disk. Returning an error doesn't work because that would break guests
1788 * even if the server operates in writethrough mode.
1789 *
1790 * Let's hope the user knows what he's doing.
1791 */
1792 return 0;
1793 }
1794
1795 void bdrv_flush_all(void)
1796 {
1797 BlockDriverState *bs;
1798
1799 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1800 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
1801 bdrv_flush(bs);
1802 }
1803 }
1804 }
1805
1806 int bdrv_has_zero_init(BlockDriverState *bs)
1807 {
1808 assert(bs->drv);
1809
1810 if (bs->drv->bdrv_has_zero_init) {
1811 return bs->drv->bdrv_has_zero_init(bs);
1812 }
1813
1814 return 1;
1815 }
1816
1817 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1818 {
1819 if (!bs->drv) {
1820 return -ENOMEDIUM;
1821 }
1822 if (!bs->drv->bdrv_discard) {
1823 return 0;
1824 }
1825 return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1826 }
1827
1828 /*
1829 * Returns true iff the specified sector is present in the disk image. Drivers
1830 * not implementing the functionality are assumed to not support backing files,
1831 * hence all their sectors are reported as allocated.
1832 *
1833 * 'pnum' is set to the number of sectors (including and immediately following
1834 * the specified sector) that are known to be in the same
1835 * allocated/unallocated state.
1836 *
1837 * 'nb_sectors' is the max value 'pnum' should be set to.
1838 */
1839 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1840 int *pnum)
1841 {
1842 int64_t n;
1843 if (!bs->drv->bdrv_is_allocated) {
1844 if (sector_num >= bs->total_sectors) {
1845 *pnum = 0;
1846 return 0;
1847 }
1848 n = bs->total_sectors - sector_num;
1849 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1850 return 1;
1851 }
1852 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1853 }
1854
1855 void bdrv_mon_event(const BlockDriverState *bdrv,
1856 BlockMonEventAction action, int is_read)
1857 {
1858 QObject *data;
1859 const char *action_str;
1860
1861 switch (action) {
1862 case BDRV_ACTION_REPORT:
1863 action_str = "report";
1864 break;
1865 case BDRV_ACTION_IGNORE:
1866 action_str = "ignore";
1867 break;
1868 case BDRV_ACTION_STOP:
1869 action_str = "stop";
1870 break;
1871 default:
1872 abort();
1873 }
1874
1875 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1876 bdrv->device_name,
1877 action_str,
1878 is_read ? "read" : "write");
1879 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1880
1881 qobject_decref(data);
1882 }
1883
1884 static void bdrv_print_dict(QObject *obj, void *opaque)
1885 {
1886 QDict *bs_dict;
1887 Monitor *mon = opaque;
1888
1889 bs_dict = qobject_to_qdict(obj);
1890
1891 monitor_printf(mon, "%s: removable=%d",
1892 qdict_get_str(bs_dict, "device"),
1893 qdict_get_bool(bs_dict, "removable"));
1894
1895 if (qdict_get_bool(bs_dict, "removable")) {
1896 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1897 monitor_printf(mon, " tray-open=%d",
1898 qdict_get_bool(bs_dict, "tray-open"));
1899 }
1900
1901 if (qdict_haskey(bs_dict, "io-status")) {
1902 monitor_printf(mon, " io-status=%s", qdict_get_str(bs_dict, "io-status"));
1903 }
1904
1905 if (qdict_haskey(bs_dict, "inserted")) {
1906 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1907
1908 monitor_printf(mon, " file=");
1909 monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1910 if (qdict_haskey(qdict, "backing_file")) {
1911 monitor_printf(mon, " backing_file=");
1912 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1913 }
1914 monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1915 qdict_get_bool(qdict, "ro"),
1916 qdict_get_str(qdict, "drv"),
1917 qdict_get_bool(qdict, "encrypted"));
1918 } else {
1919 monitor_printf(mon, " [not inserted]");
1920 }
1921
1922 monitor_printf(mon, "\n");
1923 }
1924
1925 void bdrv_info_print(Monitor *mon, const QObject *data)
1926 {
1927 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1928 }
1929
1930 static const char *const io_status_name[BDRV_IOS_MAX] = {
1931 [BDRV_IOS_OK] = "ok",
1932 [BDRV_IOS_FAILED] = "failed",
1933 [BDRV_IOS_ENOSPC] = "nospace",
1934 };
1935
1936 void bdrv_info(Monitor *mon, QObject **ret_data)
1937 {
1938 QList *bs_list;
1939 BlockDriverState *bs;
1940
1941 bs_list = qlist_new();
1942
1943 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1944 QObject *bs_obj;
1945 QDict *bs_dict;
1946
1947 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
1948 "'removable': %i, 'locked': %i }",
1949 bs->device_name,
1950 bdrv_dev_has_removable_media(bs),
1951 bdrv_dev_is_medium_locked(bs));
1952 bs_dict = qobject_to_qdict(bs_obj);
1953
1954 if (bdrv_dev_has_removable_media(bs)) {
1955 qdict_put(bs_dict, "tray-open",
1956 qbool_from_int(bdrv_dev_is_tray_open(bs)));
1957 }
1958
1959 if (bdrv_iostatus_is_enabled(bs)) {
1960 qdict_put(bs_dict, "io-status",
1961 qstring_from_str(io_status_name[bs->iostatus]));
1962 }
1963
1964 if (bs->drv) {
1965 QObject *obj;
1966
1967 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1968 "'encrypted': %i }",
1969 bs->filename, bs->read_only,
1970 bs->drv->format_name,
1971 bdrv_is_encrypted(bs));
1972 if (bs->backing_file[0] != '\0') {
1973 QDict *qdict = qobject_to_qdict(obj);
1974 qdict_put(qdict, "backing_file",
1975 qstring_from_str(bs->backing_file));
1976 }
1977
1978 qdict_put_obj(bs_dict, "inserted", obj);
1979 }
1980 qlist_append_obj(bs_list, bs_obj);
1981 }
1982
1983 *ret_data = QOBJECT(bs_list);
1984 }
1985
1986 static void bdrv_stats_iter(QObject *data, void *opaque)
1987 {
1988 QDict *qdict;
1989 Monitor *mon = opaque;
1990
1991 qdict = qobject_to_qdict(data);
1992 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1993
1994 qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1995 monitor_printf(mon, " rd_bytes=%" PRId64
1996 " wr_bytes=%" PRId64
1997 " rd_operations=%" PRId64
1998 " wr_operations=%" PRId64
1999 " flush_operations=%" PRId64
2000 " wr_total_time_ns=%" PRId64
2001 " rd_total_time_ns=%" PRId64
2002 " flush_total_time_ns=%" PRId64
2003 "\n",
2004 qdict_get_int(qdict, "rd_bytes"),
2005 qdict_get_int(qdict, "wr_bytes"),
2006 qdict_get_int(qdict, "rd_operations"),
2007 qdict_get_int(qdict, "wr_operations"),
2008 qdict_get_int(qdict, "flush_operations"),
2009 qdict_get_int(qdict, "wr_total_time_ns"),
2010 qdict_get_int(qdict, "rd_total_time_ns"),
2011 qdict_get_int(qdict, "flush_total_time_ns"));
2012 }
2013
2014 void bdrv_stats_print(Monitor *mon, const QObject *data)
2015 {
2016 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
2017 }
2018
2019 static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
2020 {
2021 QObject *res;
2022 QDict *dict;
2023
2024 res = qobject_from_jsonf("{ 'stats': {"
2025 "'rd_bytes': %" PRId64 ","
2026 "'wr_bytes': %" PRId64 ","
2027 "'rd_operations': %" PRId64 ","
2028 "'wr_operations': %" PRId64 ","
2029 "'wr_highest_offset': %" PRId64 ","
2030 "'flush_operations': %" PRId64 ","
2031 "'wr_total_time_ns': %" PRId64 ","
2032 "'rd_total_time_ns': %" PRId64 ","
2033 "'flush_total_time_ns': %" PRId64
2034 "} }",
2035 bs->nr_bytes[BDRV_ACCT_READ],
2036 bs->nr_bytes[BDRV_ACCT_WRITE],
2037 bs->nr_ops[BDRV_ACCT_READ],
2038 bs->nr_ops[BDRV_ACCT_WRITE],
2039 bs->wr_highest_sector *
2040 (uint64_t)BDRV_SECTOR_SIZE,
2041 bs->nr_ops[BDRV_ACCT_FLUSH],
2042 bs->total_time_ns[BDRV_ACCT_WRITE],
2043 bs->total_time_ns[BDRV_ACCT_READ],
2044 bs->total_time_ns[BDRV_ACCT_FLUSH]);
2045 dict = qobject_to_qdict(res);
2046
2047 if (*bs->device_name) {
2048 qdict_put(dict, "device", qstring_from_str(bs->device_name));
2049 }
2050
2051 if (bs->file) {
2052 QObject *parent = bdrv_info_stats_bs(bs->file);
2053 qdict_put_obj(dict, "parent", parent);
2054 }
2055
2056 return res;
2057 }
2058
2059 void bdrv_info_stats(Monitor *mon, QObject **ret_data)
2060 {
2061 QObject *obj;
2062 QList *devices;
2063 BlockDriverState *bs;
2064
2065 devices = qlist_new();
2066
2067 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2068 obj = bdrv_info_stats_bs(bs);
2069 qlist_append_obj(devices, obj);
2070 }
2071
2072 *ret_data = QOBJECT(devices);
2073 }
2074
2075 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2076 {
2077 if (bs->backing_hd && bs->backing_hd->encrypted)
2078 return bs->backing_file;
2079 else if (bs->encrypted)
2080 return bs->filename;
2081 else
2082 return NULL;
2083 }
2084
2085 void bdrv_get_backing_filename(BlockDriverState *bs,
2086 char *filename, int filename_size)
2087 {
2088 if (!bs->backing_file) {
2089 pstrcpy(filename, filename_size, "");
2090 } else {
2091 pstrcpy(filename, filename_size, bs->backing_file);
2092 }
2093 }
2094
2095 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
2096 const uint8_t *buf, int nb_sectors)
2097 {
2098 BlockDriver *drv = bs->drv;
2099 if (!drv)
2100 return -ENOMEDIUM;
2101 if (!drv->bdrv_write_compressed)
2102 return -ENOTSUP;
2103 if (bdrv_check_request(bs, sector_num, nb_sectors))
2104 return -EIO;
2105
2106 if (bs->dirty_bitmap) {
2107 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2108 }
2109
2110 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2111 }
2112
2113 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2114 {
2115 BlockDriver *drv = bs->drv;
2116 if (!drv)
2117 return -ENOMEDIUM;
2118 if (!drv->bdrv_get_info)
2119 return -ENOTSUP;
2120 memset(bdi, 0, sizeof(*bdi));
2121 return drv->bdrv_get_info(bs, bdi);
2122 }
2123
2124 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2125 int64_t pos, int size)
2126 {
2127 BlockDriver *drv = bs->drv;
2128 if (!drv)
2129 return -ENOMEDIUM;
2130 if (drv->bdrv_save_vmstate)
2131 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2132 if (bs->file)
2133 return bdrv_save_vmstate(bs->file, buf, pos, size);
2134 return -ENOTSUP;
2135 }
2136
2137 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2138 int64_t pos, int size)
2139 {
2140 BlockDriver *drv = bs->drv;
2141 if (!drv)
2142 return -ENOMEDIUM;
2143 if (drv->bdrv_load_vmstate)
2144 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2145 if (bs->file)
2146 return bdrv_load_vmstate(bs->file, buf, pos, size);
2147 return -ENOTSUP;
2148 }
2149
2150 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2151 {
2152 BlockDriver *drv = bs->drv;
2153
2154 if (!drv || !drv->bdrv_debug_event) {
2155 return;
2156 }
2157
2158 return drv->bdrv_debug_event(bs, event);
2159
2160 }
2161
2162 /**************************************************************/
2163 /* handling of snapshots */
2164
2165 int bdrv_can_snapshot(BlockDriverState *bs)
2166 {
2167 BlockDriver *drv = bs->drv;
2168 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
2169 return 0;
2170 }
2171
2172 if (!drv->bdrv_snapshot_create) {
2173 if (bs->file != NULL) {
2174 return bdrv_can_snapshot(bs->file);
2175 }
2176 return 0;
2177 }
2178
2179 return 1;
2180 }
2181
2182 int bdrv_is_snapshot(BlockDriverState *bs)
2183 {
2184 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2185 }
2186
2187 BlockDriverState *bdrv_snapshots(void)
2188 {
2189 BlockDriverState *bs;
2190
2191 if (bs_snapshots) {
2192 return bs_snapshots;
2193 }
2194
2195 bs = NULL;
2196 while ((bs = bdrv_next(bs))) {
2197 if (bdrv_can_snapshot(bs)) {
2198 bs_snapshots = bs;
2199 return bs;
2200 }
2201 }
2202 return NULL;
2203 }
2204
2205 int bdrv_snapshot_create(BlockDriverState *bs,
2206 QEMUSnapshotInfo *sn_info)
2207 {
2208 BlockDriver *drv = bs->drv;
2209 if (!drv)
2210 return -ENOMEDIUM;
2211 if (drv->bdrv_snapshot_create)
2212 return drv->bdrv_snapshot_create(bs, sn_info);
2213 if (bs->file)
2214 return bdrv_snapshot_create(bs->file, sn_info);
2215 return -ENOTSUP;
2216 }
2217
2218 int bdrv_snapshot_goto(BlockDriverState *bs,
2219 const char *snapshot_id)
2220 {
2221 BlockDriver *drv = bs->drv;
2222 int ret, open_ret;
2223
2224 if (!drv)
2225 return -ENOMEDIUM;
2226 if (drv->bdrv_snapshot_goto)
2227 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2228
2229 if (bs->file) {
2230 drv->bdrv_close(bs);
2231 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2232 open_ret = drv->bdrv_open(bs, bs->open_flags);
2233 if (open_ret < 0) {
2234 bdrv_delete(bs->file);
2235 bs->drv = NULL;
2236 return open_ret;
2237 }
2238 return ret;
2239 }
2240
2241 return -ENOTSUP;
2242 }
2243
2244 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2245 {
2246 BlockDriver *drv = bs->drv;
2247 if (!drv)
2248 return -ENOMEDIUM;
2249 if (drv->bdrv_snapshot_delete)
2250 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2251 if (bs->file)
2252 return bdrv_snapshot_delete(bs->file, snapshot_id);
2253 return -ENOTSUP;
2254 }
2255
2256 int bdrv_snapshot_list(BlockDriverState *bs,
2257 QEMUSnapshotInfo **psn_info)
2258 {
2259 BlockDriver *drv = bs->drv;
2260 if (!drv)
2261 return -ENOMEDIUM;
2262 if (drv->bdrv_snapshot_list)
2263 return drv->bdrv_snapshot_list(bs, psn_info);
2264 if (bs->file)
2265 return bdrv_snapshot_list(bs->file, psn_info);
2266 return -ENOTSUP;
2267 }
2268
2269 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2270 const char *snapshot_name)
2271 {
2272 BlockDriver *drv = bs->drv;
2273 if (!drv) {
2274 return -ENOMEDIUM;
2275 }
2276 if (!bs->read_only) {
2277 return -EINVAL;
2278 }
2279 if (drv->bdrv_snapshot_load_tmp) {
2280 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2281 }
2282 return -ENOTSUP;
2283 }
2284
2285 #define NB_SUFFIXES 4
2286
2287 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2288 {
2289 static const char suffixes[NB_SUFFIXES] = "KMGT";
2290 int64_t base;
2291 int i;
2292
2293 if (size <= 999) {
2294 snprintf(buf, buf_size, "%" PRId64, size);
2295 } else {
2296 base = 1024;
2297 for(i = 0; i < NB_SUFFIXES; i++) {
2298 if (size < (10 * base)) {
2299 snprintf(buf, buf_size, "%0.1f%c",
2300 (double)size / base,
2301 suffixes[i]);
2302 break;
2303 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2304 snprintf(buf, buf_size, "%" PRId64 "%c",
2305 ((size + (base >> 1)) / base),
2306 suffixes[i]);
2307 break;
2308 }
2309 base = base * 1024;
2310 }
2311 }
2312 return buf;
2313 }
2314
2315 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2316 {
2317 char buf1[128], date_buf[128], clock_buf[128];
2318 #ifdef _WIN32
2319 struct tm *ptm;
2320 #else
2321 struct tm tm;
2322 #endif
2323 time_t ti;
2324 int64_t secs;
2325
2326 if (!sn) {
2327 snprintf(buf, buf_size,
2328 "%-10s%-20s%7s%20s%15s",
2329 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2330 } else {
2331 ti = sn->date_sec;
2332 #ifdef _WIN32
2333 ptm = localtime(&ti);
2334 strftime(date_buf, sizeof(date_buf),
2335 "%Y-%m-%d %H:%M:%S", ptm);
2336 #else
2337 localtime_r(&ti, &tm);
2338 strftime(date_buf, sizeof(date_buf),
2339 "%Y-%m-%d %H:%M:%S", &tm);
2340 #endif
2341 secs = sn->vm_clock_nsec / 1000000000;
2342 snprintf(clock_buf, sizeof(clock_buf),
2343 "%02d:%02d:%02d.%03d",
2344 (int)(secs / 3600),
2345 (int)((secs / 60) % 60),
2346 (int)(secs % 60),
2347 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2348 snprintf(buf, buf_size,
2349 "%-10s%-20s%7s%20s%15s",
2350 sn->id_str, sn->name,
2351 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2352 date_buf,
2353 clock_buf);
2354 }
2355 return buf;
2356 }
2357
2358 /**************************************************************/
2359 /* async I/Os */
2360
2361 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2362 QEMUIOVector *qiov, int nb_sectors,
2363 BlockDriverCompletionFunc *cb, void *opaque)
2364 {
2365 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2366
2367 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2368 cb, opaque, false);
2369 }
2370
2371 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2372 QEMUIOVector *qiov, int nb_sectors,
2373 BlockDriverCompletionFunc *cb, void *opaque)
2374 {
2375 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2376
2377 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2378 cb, opaque, true);
2379 }
2380
2381
2382 typedef struct MultiwriteCB {
2383 int error;
2384 int num_requests;
2385 int num_callbacks;
2386 struct {
2387 BlockDriverCompletionFunc *cb;
2388 void *opaque;
2389 QEMUIOVector *free_qiov;
2390 void *free_buf;
2391 } callbacks[];
2392 } MultiwriteCB;
2393
2394 static void multiwrite_user_cb(MultiwriteCB *mcb)
2395 {
2396 int i;
2397
2398 for (i = 0; i < mcb->num_callbacks; i++) {
2399 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2400 if (mcb->callbacks[i].free_qiov) {
2401 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2402 }
2403 g_free(mcb->callbacks[i].free_qiov);
2404 qemu_vfree(mcb->callbacks[i].free_buf);
2405 }
2406 }
2407
2408 static void multiwrite_cb(void *opaque, int ret)
2409 {
2410 MultiwriteCB *mcb = opaque;
2411
2412 trace_multiwrite_cb(mcb, ret);
2413
2414 if (ret < 0 && !mcb->error) {
2415 mcb->error = ret;
2416 }
2417
2418 mcb->num_requests--;
2419 if (mcb->num_requests == 0) {
2420 multiwrite_user_cb(mcb);
2421 g_free(mcb);
2422 }
2423 }
2424
2425 static int multiwrite_req_compare(const void *a, const void *b)
2426 {
2427 const BlockRequest *req1 = a, *req2 = b;
2428
2429 /*
2430 * Note that we can't simply subtract req2->sector from req1->sector
2431 * here as that could overflow the return value.
2432 */
2433 if (req1->sector > req2->sector) {
2434 return 1;
2435 } else if (req1->sector < req2->sector) {
2436 return -1;
2437 } else {
2438 return 0;
2439 }
2440 }
2441
2442 /*
2443 * Takes a bunch of requests and tries to merge them. Returns the number of
2444 * requests that remain after merging.
2445 */
2446 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2447 int num_reqs, MultiwriteCB *mcb)
2448 {
2449 int i, outidx;
2450
2451 // Sort requests by start sector
2452 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2453
2454 // Check if adjacent requests touch the same clusters. If so, combine them,
2455 // filling up gaps with zero sectors.
2456 outidx = 0;
2457 for (i = 1; i < num_reqs; i++) {
2458 int merge = 0;
2459 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2460
2461 // This handles the cases that are valid for all block drivers, namely
2462 // exactly sequential writes and overlapping writes.
2463 if (reqs[i].sector <= oldreq_last) {
2464 merge = 1;
2465 }
2466
2467 // The block driver may decide that it makes sense to combine requests
2468 // even if there is a gap of some sectors between them. In this case,
2469 // the gap is filled with zeros (therefore only applicable for yet
2470 // unused space in format like qcow2).
2471 if (!merge && bs->drv->bdrv_merge_requests) {
2472 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2473 }
2474
2475 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2476 merge = 0;
2477 }
2478
2479 if (merge) {
2480 size_t size;
2481 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2482 qemu_iovec_init(qiov,
2483 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2484
2485 // Add the first request to the merged one. If the requests are
2486 // overlapping, drop the last sectors of the first request.
2487 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2488 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2489
2490 // We might need to add some zeros between the two requests
2491 if (reqs[i].sector > oldreq_last) {
2492 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2493 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2494 memset(buf, 0, zero_bytes);
2495 qemu_iovec_add(qiov, buf, zero_bytes);
2496 mcb->callbacks[i].free_buf = buf;
2497 }
2498
2499 // Add the second request
2500 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2501
2502 reqs[outidx].nb_sectors = qiov->size >> 9;
2503 reqs[outidx].qiov = qiov;
2504
2505 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2506 } else {
2507 outidx++;
2508 reqs[outidx].sector = reqs[i].sector;
2509 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2510 reqs[outidx].qiov = reqs[i].qiov;
2511 }
2512 }
2513
2514 return outidx + 1;
2515 }
2516
2517 /*
2518 * Submit multiple AIO write requests at once.
2519 *
2520 * On success, the function returns 0 and all requests in the reqs array have
2521 * been submitted. In error case this function returns -1, and any of the
2522 * requests may or may not be submitted yet. In particular, this means that the
2523 * callback will be called for some of the requests, for others it won't. The
2524 * caller must check the error field of the BlockRequest to wait for the right
2525 * callbacks (if error != 0, no callback will be called).
2526 *
2527 * The implementation may modify the contents of the reqs array, e.g. to merge
2528 * requests. However, the fields opaque and error are left unmodified as they
2529 * are used to signal failure for a single request to the caller.
2530 */
2531 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2532 {
2533 BlockDriverAIOCB *acb;
2534 MultiwriteCB *mcb;
2535 int i;
2536
2537 /* don't submit writes if we don't have a medium */
2538 if (bs->drv == NULL) {
2539 for (i = 0; i < num_reqs; i++) {
2540 reqs[i].error = -ENOMEDIUM;
2541 }
2542 return -1;
2543 }
2544
2545 if (num_reqs == 0) {
2546 return 0;
2547 }
2548
2549 // Create MultiwriteCB structure
2550 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2551 mcb->num_requests = 0;
2552 mcb->num_callbacks = num_reqs;
2553
2554 for (i = 0; i < num_reqs; i++) {
2555 mcb->callbacks[i].cb = reqs[i].cb;
2556 mcb->callbacks[i].opaque = reqs[i].opaque;
2557 }
2558
2559 // Check for mergable requests
2560 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2561
2562 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2563
2564 /*
2565 * Run the aio requests. As soon as one request can't be submitted
2566 * successfully, fail all requests that are not yet submitted (we must
2567 * return failure for all requests anyway)
2568 *
2569 * num_requests cannot be set to the right value immediately: If
2570 * bdrv_aio_writev fails for some request, num_requests would be too high
2571 * and therefore multiwrite_cb() would never recognize the multiwrite
2572 * request as completed. We also cannot use the loop variable i to set it
2573 * when the first request fails because the callback may already have been
2574 * called for previously submitted requests. Thus, num_requests must be
2575 * incremented for each request that is submitted.
2576 *
2577 * The problem that callbacks may be called early also means that we need
2578 * to take care that num_requests doesn't become 0 before all requests are
2579 * submitted - multiwrite_cb() would consider the multiwrite request
2580 * completed. A dummy request that is "completed" by a manual call to
2581 * multiwrite_cb() takes care of this.
2582 */
2583 mcb->num_requests = 1;
2584
2585 // Run the aio requests
2586 for (i = 0; i < num_reqs; i++) {
2587 mcb->num_requests++;
2588 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2589 reqs[i].nb_sectors, multiwrite_cb, mcb);
2590
2591 if (acb == NULL) {
2592 // We can only fail the whole thing if no request has been
2593 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2594 // complete and report the error in the callback.
2595 if (i == 0) {
2596 trace_bdrv_aio_multiwrite_earlyfail(mcb);
2597 goto fail;
2598 } else {
2599 trace_bdrv_aio_multiwrite_latefail(mcb, i);
2600 multiwrite_cb(mcb, -EIO);
2601 break;
2602 }
2603 }
2604 }
2605
2606 /* Complete the dummy request */
2607 multiwrite_cb(mcb, 0);
2608
2609 return 0;
2610
2611 fail:
2612 for (i = 0; i < mcb->num_callbacks; i++) {
2613 reqs[i].error = -EIO;
2614 }
2615 g_free(mcb);
2616 return -1;
2617 }
2618
2619 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2620 BlockDriverCompletionFunc *cb, void *opaque)
2621 {
2622 BlockDriver *drv = bs->drv;
2623
2624 trace_bdrv_aio_flush(bs, opaque);
2625
2626 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2627 return bdrv_aio_noop_em(bs, cb, opaque);
2628 }
2629
2630 if (!drv)
2631 return NULL;
2632 return drv->bdrv_aio_flush(bs, cb, opaque);
2633 }
2634
2635 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2636 {
2637 acb->pool->cancel(acb);
2638 }
2639
2640
2641 /**************************************************************/
2642 /* async block device emulation */
2643
2644 typedef struct BlockDriverAIOCBSync {
2645 BlockDriverAIOCB common;
2646 QEMUBH *bh;
2647 int ret;
2648 /* vector translation state */
2649 QEMUIOVector *qiov;
2650 uint8_t *bounce;
2651 int is_write;
2652 } BlockDriverAIOCBSync;
2653
2654 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2655 {
2656 BlockDriverAIOCBSync *acb =
2657 container_of(blockacb, BlockDriverAIOCBSync, common);
2658 qemu_bh_delete(acb->bh);
2659 acb->bh = NULL;
2660 qemu_aio_release(acb);
2661 }
2662
2663 static AIOPool bdrv_em_aio_pool = {
2664 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2665 .cancel = bdrv_aio_cancel_em,
2666 };
2667
2668 static void bdrv_aio_bh_cb(void *opaque)
2669 {
2670 BlockDriverAIOCBSync *acb = opaque;
2671
2672 if (!acb->is_write)
2673 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2674 qemu_vfree(acb->bounce);
2675 acb->common.cb(acb->common.opaque, acb->ret);
2676 qemu_bh_delete(acb->bh);
2677 acb->bh = NULL;
2678 qemu_aio_release(acb);
2679 }
2680
2681 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2682 int64_t sector_num,
2683 QEMUIOVector *qiov,
2684 int nb_sectors,
2685 BlockDriverCompletionFunc *cb,
2686 void *opaque,
2687 int is_write)
2688
2689 {
2690 BlockDriverAIOCBSync *acb;
2691
2692 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2693 acb->is_write = is_write;
2694 acb->qiov = qiov;
2695 acb->bounce = qemu_blockalign(bs, qiov->size);
2696
2697 if (!acb->bh)
2698 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2699
2700 if (is_write) {
2701 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2702 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2703 } else {
2704 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2705 }
2706
2707 qemu_bh_schedule(acb->bh);
2708
2709 return &acb->common;
2710 }
2711
2712 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2713 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2714 BlockDriverCompletionFunc *cb, void *opaque)
2715 {
2716 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2717 }
2718
2719 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2720 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2721 BlockDriverCompletionFunc *cb, void *opaque)
2722 {
2723 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2724 }
2725
2726
2727 typedef struct BlockDriverAIOCBCoroutine {
2728 BlockDriverAIOCB common;
2729 BlockRequest req;
2730 bool is_write;
2731 QEMUBH* bh;
2732 } BlockDriverAIOCBCoroutine;
2733
2734 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2735 {
2736 qemu_aio_flush();
2737 }
2738
2739 static AIOPool bdrv_em_co_aio_pool = {
2740 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2741 .cancel = bdrv_aio_co_cancel_em,
2742 };
2743
2744 static void bdrv_co_rw_bh(void *opaque)
2745 {
2746 BlockDriverAIOCBCoroutine *acb = opaque;
2747
2748 acb->common.cb(acb->common.opaque, acb->req.error);
2749 qemu_bh_delete(acb->bh);
2750 qemu_aio_release(acb);
2751 }
2752
2753 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2754 static void coroutine_fn bdrv_co_do_rw(void *opaque)
2755 {
2756 BlockDriverAIOCBCoroutine *acb = opaque;
2757 BlockDriverState *bs = acb->common.bs;
2758
2759 if (!acb->is_write) {
2760 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
2761 acb->req.nb_sectors, acb->req.qiov);
2762 } else {
2763 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
2764 acb->req.nb_sectors, acb->req.qiov);
2765 }
2766
2767 acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2768 qemu_bh_schedule(acb->bh);
2769 }
2770
2771 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2772 int64_t sector_num,
2773 QEMUIOVector *qiov,
2774 int nb_sectors,
2775 BlockDriverCompletionFunc *cb,
2776 void *opaque,
2777 bool is_write)
2778 {
2779 Coroutine *co;
2780 BlockDriverAIOCBCoroutine *acb;
2781
2782 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2783 acb->req.sector = sector_num;
2784 acb->req.nb_sectors = nb_sectors;
2785 acb->req.qiov = qiov;
2786 acb->is_write = is_write;
2787
2788 co = qemu_coroutine_create(bdrv_co_do_rw);
2789 qemu_coroutine_enter(co, acb);
2790
2791 return &acb->common;
2792 }
2793
2794 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2795 BlockDriverCompletionFunc *cb, void *opaque)
2796 {
2797 BlockDriverAIOCBSync *acb;
2798
2799 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2800 acb->is_write = 1; /* don't bounce in the completion hadler */
2801 acb->qiov = NULL;
2802 acb->bounce = NULL;
2803 acb->ret = 0;
2804
2805 if (!acb->bh)
2806 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2807
2808 bdrv_flush(bs);
2809 qemu_bh_schedule(acb->bh);
2810 return &acb->common;
2811 }
2812
2813 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2814 BlockDriverCompletionFunc *cb, void *opaque)
2815 {
2816 BlockDriverAIOCBSync *acb;
2817
2818 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2819 acb->is_write = 1; /* don't bounce in the completion handler */
2820 acb->qiov = NULL;
2821 acb->bounce = NULL;
2822 acb->ret = 0;
2823
2824 if (!acb->bh) {
2825 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2826 }
2827
2828 qemu_bh_schedule(acb->bh);
2829 return &acb->common;
2830 }
2831
2832 void bdrv_init(void)
2833 {
2834 module_call_init(MODULE_INIT_BLOCK);
2835 }
2836
2837 void bdrv_init_with_whitelist(void)
2838 {
2839 use_bdrv_whitelist = 1;
2840 bdrv_init();
2841 }
2842
2843 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2844 BlockDriverCompletionFunc *cb, void *opaque)
2845 {
2846 BlockDriverAIOCB *acb;
2847
2848 if (pool->free_aiocb) {
2849 acb = pool->free_aiocb;
2850 pool->free_aiocb = acb->next;
2851 } else {
2852 acb = g_malloc0(pool->aiocb_size);
2853 acb->pool = pool;
2854 }
2855 acb->bs = bs;
2856 acb->cb = cb;
2857 acb->opaque = opaque;
2858 return acb;
2859 }
2860
2861 void qemu_aio_release(void *p)
2862 {
2863 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2864 AIOPool *pool = acb->pool;
2865 acb->next = pool->free_aiocb;
2866 pool->free_aiocb = acb;
2867 }
2868
2869 /**************************************************************/
2870 /* Coroutine block device emulation */
2871
2872 typedef struct CoroutineIOCompletion {
2873 Coroutine *coroutine;
2874 int ret;
2875 } CoroutineIOCompletion;
2876
2877 static void bdrv_co_io_em_complete(void *opaque, int ret)
2878 {
2879 CoroutineIOCompletion *co = opaque;
2880
2881 co->ret = ret;
2882 qemu_coroutine_enter(co->coroutine, NULL);
2883 }
2884
2885 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2886 int nb_sectors, QEMUIOVector *iov,
2887 bool is_write)
2888 {
2889 CoroutineIOCompletion co = {
2890 .coroutine = qemu_coroutine_self(),
2891 };
2892 BlockDriverAIOCB *acb;
2893
2894 if (is_write) {
2895 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2896 bdrv_co_io_em_complete, &co);
2897 } else {
2898 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2899 bdrv_co_io_em_complete, &co);
2900 }
2901
2902 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
2903 if (!acb) {
2904 return -EIO;
2905 }
2906 qemu_coroutine_yield();
2907
2908 return co.ret;
2909 }
2910
2911 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2912 int64_t sector_num, int nb_sectors,
2913 QEMUIOVector *iov)
2914 {
2915 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2916 }
2917
2918 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2919 int64_t sector_num, int nb_sectors,
2920 QEMUIOVector *iov)
2921 {
2922 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2923 }
2924
2925 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
2926 {
2927 CoroutineIOCompletion co = {
2928 .coroutine = qemu_coroutine_self(),
2929 };
2930 BlockDriverAIOCB *acb;
2931
2932 acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2933 if (!acb) {
2934 return -EIO;
2935 }
2936 qemu_coroutine_yield();
2937 return co.ret;
2938 }
2939
2940 /**************************************************************/
2941 /* removable device support */
2942
2943 /**
2944 * Return TRUE if the media is present
2945 */
2946 int bdrv_is_inserted(BlockDriverState *bs)
2947 {
2948 BlockDriver *drv = bs->drv;
2949
2950 if (!drv)
2951 return 0;
2952 if (!drv->bdrv_is_inserted)
2953 return 1;
2954 return drv->bdrv_is_inserted(bs);
2955 }
2956
2957 /**
2958 * Return whether the media changed since the last call to this
2959 * function, or -ENOTSUP if we don't know. Most drivers don't know.
2960 */
2961 int bdrv_media_changed(BlockDriverState *bs)
2962 {
2963 BlockDriver *drv = bs->drv;
2964
2965 if (drv && drv->bdrv_media_changed) {
2966 return drv->bdrv_media_changed(bs);
2967 }
2968 return -ENOTSUP;
2969 }
2970
2971 /**
2972 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2973 */
2974 void bdrv_eject(BlockDriverState *bs, int eject_flag)
2975 {
2976 BlockDriver *drv = bs->drv;
2977
2978 if (drv && drv->bdrv_eject) {
2979 drv->bdrv_eject(bs, eject_flag);
2980 }
2981 }
2982
2983 /**
2984 * Lock or unlock the media (if it is locked, the user won't be able
2985 * to eject it manually).
2986 */
2987 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
2988 {
2989 BlockDriver *drv = bs->drv;
2990
2991 trace_bdrv_lock_medium(bs, locked);
2992
2993 if (drv && drv->bdrv_lock_medium) {
2994 drv->bdrv_lock_medium(bs, locked);
2995 }
2996 }
2997
2998 /* needed for generic scsi interface */
2999
3000 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3001 {
3002 BlockDriver *drv = bs->drv;
3003
3004 if (drv && drv->bdrv_ioctl)
3005 return drv->bdrv_ioctl(bs, req, buf);
3006 return -ENOTSUP;
3007 }
3008
3009 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3010 unsigned long int req, void *buf,
3011 BlockDriverCompletionFunc *cb, void *opaque)
3012 {
3013 BlockDriver *drv = bs->drv;
3014
3015 if (drv && drv->bdrv_aio_ioctl)
3016 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3017 return NULL;
3018 }
3019
3020 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3021 {
3022 bs->buffer_alignment = align;
3023 }
3024
3025 void *qemu_blockalign(BlockDriverState *bs, size_t size)
3026 {
3027 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3028 }
3029
3030 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3031 {
3032 int64_t bitmap_size;
3033
3034 bs->dirty_count = 0;
3035 if (enable) {
3036 if (!bs->dirty_bitmap) {
3037 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3038 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3039 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3040
3041 bs->dirty_bitmap = g_malloc0(bitmap_size);
3042 }
3043 } else {
3044 if (bs->dirty_bitmap) {
3045 g_free(bs->dirty_bitmap);
3046 bs->dirty_bitmap = NULL;
3047 }
3048 }
3049 }
3050
3051 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3052 {
3053 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3054
3055 if (bs->dirty_bitmap &&
3056 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3057 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3058 (1UL << (chunk % (sizeof(unsigned long) * 8))));
3059 } else {
3060 return 0;
3061 }
3062 }
3063
3064 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3065 int nr_sectors)
3066 {
3067 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3068 }
3069
3070 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3071 {
3072 return bs->dirty_count;
3073 }
3074
3075 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3076 {
3077 assert(bs->in_use != in_use);
3078 bs->in_use = in_use;
3079 }
3080
3081 int bdrv_in_use(BlockDriverState *bs)
3082 {
3083 return bs->in_use;
3084 }
3085
3086 void bdrv_iostatus_enable(BlockDriverState *bs)
3087 {
3088 bs->iostatus = BDRV_IOS_OK;
3089 }
3090
3091 /* The I/O status is only enabled if the drive explicitly
3092 * enables it _and_ the VM is configured to stop on errors */
3093 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3094 {
3095 return (bs->iostatus != BDRV_IOS_INVAL &&
3096 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3097 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3098 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3099 }
3100
3101 void bdrv_iostatus_disable(BlockDriverState *bs)
3102 {
3103 bs->iostatus = BDRV_IOS_INVAL;
3104 }
3105
3106 void bdrv_iostatus_reset(BlockDriverState *bs)
3107 {
3108 if (bdrv_iostatus_is_enabled(bs)) {
3109 bs->iostatus = BDRV_IOS_OK;
3110 }
3111 }
3112
3113 /* XXX: Today this is set by device models because it makes the implementation
3114 quite simple. However, the block layer knows about the error, so it's
3115 possible to implement this without device models being involved */
3116 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3117 {
3118 if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
3119 assert(error >= 0);
3120 bs->iostatus = error == ENOSPC ? BDRV_IOS_ENOSPC : BDRV_IOS_FAILED;
3121 }
3122 }
3123
3124 void
3125 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3126 enum BlockAcctType type)
3127 {
3128 assert(type < BDRV_MAX_IOTYPE);
3129
3130 cookie->bytes = bytes;
3131 cookie->start_time_ns = get_clock();
3132 cookie->type = type;
3133 }
3134
3135 void
3136 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3137 {
3138 assert(cookie->type < BDRV_MAX_IOTYPE);
3139
3140 bs->nr_bytes[cookie->type] += cookie->bytes;
3141 bs->nr_ops[cookie->type]++;
3142 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3143 }
3144
3145 int bdrv_img_create(const char *filename, const char *fmt,
3146 const char *base_filename, const char *base_fmt,
3147 char *options, uint64_t img_size, int flags)
3148 {
3149 QEMUOptionParameter *param = NULL, *create_options = NULL;
3150 QEMUOptionParameter *backing_fmt, *backing_file, *size;
3151 BlockDriverState *bs = NULL;
3152 BlockDriver *drv, *proto_drv;
3153 BlockDriver *backing_drv = NULL;
3154 int ret = 0;
3155
3156 /* Find driver and parse its options */
3157 drv = bdrv_find_format(fmt);
3158 if (!drv) {
3159 error_report("Unknown file format '%s'", fmt);
3160 ret = -EINVAL;
3161 goto out;
3162 }
3163
3164 proto_drv = bdrv_find_protocol(filename);
3165 if (!proto_drv) {
3166 error_report("Unknown protocol '%s'", filename);
3167 ret = -EINVAL;
3168 goto out;
3169 }
3170
3171 create_options = append_option_parameters(create_options,
3172 drv->create_options);
3173 create_options = append_option_parameters(create_options,
3174 proto_drv->create_options);
3175
3176 /* Create parameter list with default values */
3177 param = parse_option_parameters("", create_options, param);
3178
3179 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3180
3181 /* Parse -o options */
3182 if (options) {
3183 param = parse_option_parameters(options, create_options, param);
3184 if (param == NULL) {
3185 error_report("Invalid options for file format '%s'.", fmt);
3186 ret = -EINVAL;
3187 goto out;
3188 }
3189 }
3190
3191 if (base_filename) {
3192 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3193 base_filename)) {
3194 error_report("Backing file not supported for file format '%s'",
3195 fmt);
3196 ret = -EINVAL;
3197 goto out;
3198 }
3199 }
3200
3201 if (base_fmt) {
3202 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3203 error_report("Backing file format not supported for file "
3204 "format '%s'", fmt);
3205 ret = -EINVAL;
3206 goto out;
3207 }
3208 }
3209
3210 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3211 if (backing_file && backing_file->value.s) {
3212 if (!strcmp(filename, backing_file->value.s)) {
3213 error_report("Error: Trying to create an image with the "
3214 "same filename as the backing file");
3215 ret = -EINVAL;
3216 goto out;
3217 }
3218 }
3219
3220 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3221 if (backing_fmt && backing_fmt->value.s) {
3222 backing_drv = bdrv_find_format(backing_fmt->value.s);
3223 if (!backing_drv) {
3224 error_report("Unknown backing file format '%s'",
3225 backing_fmt->value.s);
3226 ret = -EINVAL;
3227 goto out;
3228 }
3229 }
3230
3231 // The size for the image must always be specified, with one exception:
3232 // If we are using a backing file, we can obtain the size from there
3233 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3234 if (size && size->value.n == -1) {
3235 if (backing_file && backing_file->value.s) {
3236 uint64_t size;
3237 char buf[32];
3238
3239 bs = bdrv_new("");
3240
3241 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3242 if (ret < 0) {
3243 error_report("Could not open '%s'", backing_file->value.s);
3244 goto out;
3245 }
3246 bdrv_get_geometry(bs, &size);
3247 size *= 512;
3248
3249 snprintf(buf, sizeof(buf), "%" PRId64, size);
3250 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3251 } else {
3252 error_report("Image creation needs a size parameter");
3253 ret = -EINVAL;
3254 goto out;
3255 }
3256 }
3257
3258 printf("Formatting '%s', fmt=%s ", filename, fmt);
3259 print_option_parameters(param);
3260 puts("");
3261
3262 ret = bdrv_create(drv, filename, param);
3263
3264 if (ret < 0) {
3265 if (ret == -ENOTSUP) {
3266 error_report("Formatting or formatting option not supported for "
3267 "file format '%s'", fmt);
3268 } else if (ret == -EFBIG) {
3269 error_report("The image size is too large for file format '%s'",
3270 fmt);
3271 } else {
3272 error_report("%s: error while creating %s: %s", filename, fmt,
3273 strerror(-ret));
3274 }
3275 }
3276
3277 out:
3278 free_option_parameters(create_options);
3279 free_option_parameters(param);
3280
3281 if (bs) {
3282 bdrv_delete(bs);
3283 }
3284
3285 return ret;
3286 }