]> git.proxmox.com Git - qemu.git/blob - block.c
vvfat: unify and correct computation of sector count
[qemu.git] / block.c
1 /*
2 * QEMU System Emulator block driver
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "monitor.h"
28 #include "block_int.h"
29 #include "module.h"
30 #include "qjson.h"
31 #include "qemu-coroutine.h"
32 #include "qmp-commands.h"
33
34 #ifdef CONFIG_BSD
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/ioctl.h>
38 #include <sys/queue.h>
39 #ifndef __DragonFly__
40 #include <sys/disk.h>
41 #endif
42 #endif
43
44 #ifdef _WIN32
45 #include <windows.h>
46 #endif
47
48 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
49
50 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
51 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
52 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
53 BlockDriverCompletionFunc *cb, void *opaque);
54 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
55 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
56 BlockDriverCompletionFunc *cb, void *opaque);
57 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
58 int64_t sector_num, int nb_sectors,
59 QEMUIOVector *iov);
60 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
61 int64_t sector_num, int nb_sectors,
62 QEMUIOVector *iov);
63 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
65 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
66 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
67 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
68 int64_t sector_num,
69 QEMUIOVector *qiov,
70 int nb_sectors,
71 BlockDriverCompletionFunc *cb,
72 void *opaque,
73 bool is_write);
74 static void coroutine_fn bdrv_co_do_rw(void *opaque);
75
76 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
77 QTAILQ_HEAD_INITIALIZER(bdrv_states);
78
79 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80 QLIST_HEAD_INITIALIZER(bdrv_drivers);
81
82 /* The device to use for VM snapshots */
83 static BlockDriverState *bs_snapshots;
84
85 /* If non-zero, use only whitelisted block drivers */
86 static int use_bdrv_whitelist;
87
88 #ifdef _WIN32
89 static int is_windows_drive_prefix(const char *filename)
90 {
91 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
92 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
93 filename[1] == ':');
94 }
95
96 int is_windows_drive(const char *filename)
97 {
98 if (is_windows_drive_prefix(filename) &&
99 filename[2] == '\0')
100 return 1;
101 if (strstart(filename, "\\\\.\\", NULL) ||
102 strstart(filename, "//./", NULL))
103 return 1;
104 return 0;
105 }
106 #endif
107
108 /* check if the path starts with "<protocol>:" */
109 static int path_has_protocol(const char *path)
110 {
111 #ifdef _WIN32
112 if (is_windows_drive(path) ||
113 is_windows_drive_prefix(path)) {
114 return 0;
115 }
116 #endif
117
118 return strchr(path, ':') != NULL;
119 }
120
121 int path_is_absolute(const char *path)
122 {
123 const char *p;
124 #ifdef _WIN32
125 /* specific case for names like: "\\.\d:" */
126 if (*path == '/' || *path == '\\')
127 return 1;
128 #endif
129 p = strchr(path, ':');
130 if (p)
131 p++;
132 else
133 p = path;
134 #ifdef _WIN32
135 return (*p == '/' || *p == '\\');
136 #else
137 return (*p == '/');
138 #endif
139 }
140
141 /* if filename is absolute, just copy it to dest. Otherwise, build a
142 path to it by considering it is relative to base_path. URL are
143 supported. */
144 void path_combine(char *dest, int dest_size,
145 const char *base_path,
146 const char *filename)
147 {
148 const char *p, *p1;
149 int len;
150
151 if (dest_size <= 0)
152 return;
153 if (path_is_absolute(filename)) {
154 pstrcpy(dest, dest_size, filename);
155 } else {
156 p = strchr(base_path, ':');
157 if (p)
158 p++;
159 else
160 p = base_path;
161 p1 = strrchr(base_path, '/');
162 #ifdef _WIN32
163 {
164 const char *p2;
165 p2 = strrchr(base_path, '\\');
166 if (!p1 || p2 > p1)
167 p1 = p2;
168 }
169 #endif
170 if (p1)
171 p1++;
172 else
173 p1 = base_path;
174 if (p1 > p)
175 p = p1;
176 len = p - base_path;
177 if (len > dest_size - 1)
178 len = dest_size - 1;
179 memcpy(dest, base_path, len);
180 dest[len] = '\0';
181 pstrcat(dest, dest_size, filename);
182 }
183 }
184
185 void bdrv_register(BlockDriver *bdrv)
186 {
187 /* Block drivers without coroutine functions need emulation */
188 if (!bdrv->bdrv_co_readv) {
189 bdrv->bdrv_co_readv = bdrv_co_readv_em;
190 bdrv->bdrv_co_writev = bdrv_co_writev_em;
191
192 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
193 * the block driver lacks aio we need to emulate that too.
194 */
195 if (!bdrv->bdrv_aio_readv) {
196 /* add AIO emulation layer */
197 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
198 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
199 }
200 }
201
202 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
203 }
204
205 /* create a new block device (by default it is empty) */
206 BlockDriverState *bdrv_new(const char *device_name)
207 {
208 BlockDriverState *bs;
209
210 bs = g_malloc0(sizeof(BlockDriverState));
211 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
212 if (device_name[0] != '\0') {
213 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
214 }
215 bdrv_iostatus_disable(bs);
216 return bs;
217 }
218
219 BlockDriver *bdrv_find_format(const char *format_name)
220 {
221 BlockDriver *drv1;
222 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
223 if (!strcmp(drv1->format_name, format_name)) {
224 return drv1;
225 }
226 }
227 return NULL;
228 }
229
230 static int bdrv_is_whitelisted(BlockDriver *drv)
231 {
232 static const char *whitelist[] = {
233 CONFIG_BDRV_WHITELIST
234 };
235 const char **p;
236
237 if (!whitelist[0])
238 return 1; /* no whitelist, anything goes */
239
240 for (p = whitelist; *p; p++) {
241 if (!strcmp(drv->format_name, *p)) {
242 return 1;
243 }
244 }
245 return 0;
246 }
247
248 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
249 {
250 BlockDriver *drv = bdrv_find_format(format_name);
251 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
252 }
253
254 int bdrv_create(BlockDriver *drv, const char* filename,
255 QEMUOptionParameter *options)
256 {
257 if (!drv->bdrv_create)
258 return -ENOTSUP;
259
260 return drv->bdrv_create(filename, options);
261 }
262
263 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
264 {
265 BlockDriver *drv;
266
267 drv = bdrv_find_protocol(filename);
268 if (drv == NULL) {
269 return -ENOENT;
270 }
271
272 return bdrv_create(drv, filename, options);
273 }
274
275 #ifdef _WIN32
276 void get_tmp_filename(char *filename, int size)
277 {
278 char temp_dir[MAX_PATH];
279
280 GetTempPath(MAX_PATH, temp_dir);
281 GetTempFileName(temp_dir, "qem", 0, filename);
282 }
283 #else
284 void get_tmp_filename(char *filename, int size)
285 {
286 int fd;
287 const char *tmpdir;
288 /* XXX: race condition possible */
289 tmpdir = getenv("TMPDIR");
290 if (!tmpdir)
291 tmpdir = "/tmp";
292 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
293 fd = mkstemp(filename);
294 close(fd);
295 }
296 #endif
297
298 /*
299 * Detect host devices. By convention, /dev/cdrom[N] is always
300 * recognized as a host CDROM.
301 */
302 static BlockDriver *find_hdev_driver(const char *filename)
303 {
304 int score_max = 0, score;
305 BlockDriver *drv = NULL, *d;
306
307 QLIST_FOREACH(d, &bdrv_drivers, list) {
308 if (d->bdrv_probe_device) {
309 score = d->bdrv_probe_device(filename);
310 if (score > score_max) {
311 score_max = score;
312 drv = d;
313 }
314 }
315 }
316
317 return drv;
318 }
319
320 BlockDriver *bdrv_find_protocol(const char *filename)
321 {
322 BlockDriver *drv1;
323 char protocol[128];
324 int len;
325 const char *p;
326
327 /* TODO Drivers without bdrv_file_open must be specified explicitly */
328
329 /*
330 * XXX(hch): we really should not let host device detection
331 * override an explicit protocol specification, but moving this
332 * later breaks access to device names with colons in them.
333 * Thanks to the brain-dead persistent naming schemes on udev-
334 * based Linux systems those actually are quite common.
335 */
336 drv1 = find_hdev_driver(filename);
337 if (drv1) {
338 return drv1;
339 }
340
341 if (!path_has_protocol(filename)) {
342 return bdrv_find_format("file");
343 }
344 p = strchr(filename, ':');
345 assert(p != NULL);
346 len = p - filename;
347 if (len > sizeof(protocol) - 1)
348 len = sizeof(protocol) - 1;
349 memcpy(protocol, filename, len);
350 protocol[len] = '\0';
351 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
352 if (drv1->protocol_name &&
353 !strcmp(drv1->protocol_name, protocol)) {
354 return drv1;
355 }
356 }
357 return NULL;
358 }
359
360 static int find_image_format(const char *filename, BlockDriver **pdrv)
361 {
362 int ret, score, score_max;
363 BlockDriver *drv1, *drv;
364 uint8_t buf[2048];
365 BlockDriverState *bs;
366
367 ret = bdrv_file_open(&bs, filename, 0);
368 if (ret < 0) {
369 *pdrv = NULL;
370 return ret;
371 }
372
373 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
374 if (bs->sg || !bdrv_is_inserted(bs)) {
375 bdrv_delete(bs);
376 drv = bdrv_find_format("raw");
377 if (!drv) {
378 ret = -ENOENT;
379 }
380 *pdrv = drv;
381 return ret;
382 }
383
384 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
385 bdrv_delete(bs);
386 if (ret < 0) {
387 *pdrv = NULL;
388 return ret;
389 }
390
391 score_max = 0;
392 drv = NULL;
393 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
394 if (drv1->bdrv_probe) {
395 score = drv1->bdrv_probe(buf, ret, filename);
396 if (score > score_max) {
397 score_max = score;
398 drv = drv1;
399 }
400 }
401 }
402 if (!drv) {
403 ret = -ENOENT;
404 }
405 *pdrv = drv;
406 return ret;
407 }
408
409 /**
410 * Set the current 'total_sectors' value
411 */
412 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
413 {
414 BlockDriver *drv = bs->drv;
415
416 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
417 if (bs->sg)
418 return 0;
419
420 /* query actual device if possible, otherwise just trust the hint */
421 if (drv->bdrv_getlength) {
422 int64_t length = drv->bdrv_getlength(bs);
423 if (length < 0) {
424 return length;
425 }
426 hint = length >> BDRV_SECTOR_BITS;
427 }
428
429 bs->total_sectors = hint;
430 return 0;
431 }
432
433 /**
434 * Set open flags for a given cache mode
435 *
436 * Return 0 on success, -1 if the cache mode was invalid.
437 */
438 int bdrv_parse_cache_flags(const char *mode, int *flags)
439 {
440 *flags &= ~BDRV_O_CACHE_MASK;
441
442 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
443 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
444 } else if (!strcmp(mode, "directsync")) {
445 *flags |= BDRV_O_NOCACHE;
446 } else if (!strcmp(mode, "writeback")) {
447 *flags |= BDRV_O_CACHE_WB;
448 } else if (!strcmp(mode, "unsafe")) {
449 *flags |= BDRV_O_CACHE_WB;
450 *flags |= BDRV_O_NO_FLUSH;
451 } else if (!strcmp(mode, "writethrough")) {
452 /* this is the default */
453 } else {
454 return -1;
455 }
456
457 return 0;
458 }
459
460 /*
461 * Common part for opening disk images and files
462 */
463 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
464 int flags, BlockDriver *drv)
465 {
466 int ret, open_flags;
467
468 assert(drv != NULL);
469
470 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
471
472 bs->file = NULL;
473 bs->total_sectors = 0;
474 bs->encrypted = 0;
475 bs->valid_key = 0;
476 bs->sg = 0;
477 bs->open_flags = flags;
478 bs->growable = 0;
479 bs->buffer_alignment = 512;
480
481 pstrcpy(bs->filename, sizeof(bs->filename), filename);
482 bs->backing_file[0] = '\0';
483
484 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
485 return -ENOTSUP;
486 }
487
488 bs->drv = drv;
489 bs->opaque = g_malloc0(drv->instance_size);
490
491 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
492
493 /*
494 * Clear flags that are internal to the block layer before opening the
495 * image.
496 */
497 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
498
499 /*
500 * Snapshots should be writable.
501 */
502 if (bs->is_temporary) {
503 open_flags |= BDRV_O_RDWR;
504 }
505
506 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
507
508 /* Open the image, either directly or using a protocol */
509 if (drv->bdrv_file_open) {
510 ret = drv->bdrv_file_open(bs, filename, open_flags);
511 } else {
512 ret = bdrv_file_open(&bs->file, filename, open_flags);
513 if (ret >= 0) {
514 ret = drv->bdrv_open(bs, open_flags);
515 }
516 }
517
518 if (ret < 0) {
519 goto free_and_fail;
520 }
521
522 ret = refresh_total_sectors(bs, bs->total_sectors);
523 if (ret < 0) {
524 goto free_and_fail;
525 }
526
527 #ifndef _WIN32
528 if (bs->is_temporary) {
529 unlink(filename);
530 }
531 #endif
532 return 0;
533
534 free_and_fail:
535 if (bs->file) {
536 bdrv_delete(bs->file);
537 bs->file = NULL;
538 }
539 g_free(bs->opaque);
540 bs->opaque = NULL;
541 bs->drv = NULL;
542 return ret;
543 }
544
545 /*
546 * Opens a file using a protocol (file, host_device, nbd, ...)
547 */
548 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
549 {
550 BlockDriverState *bs;
551 BlockDriver *drv;
552 int ret;
553
554 drv = bdrv_find_protocol(filename);
555 if (!drv) {
556 return -ENOENT;
557 }
558
559 bs = bdrv_new("");
560 ret = bdrv_open_common(bs, filename, flags, drv);
561 if (ret < 0) {
562 bdrv_delete(bs);
563 return ret;
564 }
565 bs->growable = 1;
566 *pbs = bs;
567 return 0;
568 }
569
570 /*
571 * Opens a disk image (raw, qcow2, vmdk, ...)
572 */
573 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
574 BlockDriver *drv)
575 {
576 int ret;
577 char tmp_filename[PATH_MAX];
578
579 if (flags & BDRV_O_SNAPSHOT) {
580 BlockDriverState *bs1;
581 int64_t total_size;
582 int is_protocol = 0;
583 BlockDriver *bdrv_qcow2;
584 QEMUOptionParameter *options;
585 char backing_filename[PATH_MAX];
586
587 /* if snapshot, we create a temporary backing file and open it
588 instead of opening 'filename' directly */
589
590 /* if there is a backing file, use it */
591 bs1 = bdrv_new("");
592 ret = bdrv_open(bs1, filename, 0, drv);
593 if (ret < 0) {
594 bdrv_delete(bs1);
595 return ret;
596 }
597 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
598
599 if (bs1->drv && bs1->drv->protocol_name)
600 is_protocol = 1;
601
602 bdrv_delete(bs1);
603
604 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
605
606 /* Real path is meaningless for protocols */
607 if (is_protocol)
608 snprintf(backing_filename, sizeof(backing_filename),
609 "%s", filename);
610 else if (!realpath(filename, backing_filename))
611 return -errno;
612
613 bdrv_qcow2 = bdrv_find_format("qcow2");
614 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
615
616 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
617 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
618 if (drv) {
619 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
620 drv->format_name);
621 }
622
623 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
624 free_option_parameters(options);
625 if (ret < 0) {
626 return ret;
627 }
628
629 filename = tmp_filename;
630 drv = bdrv_qcow2;
631 bs->is_temporary = 1;
632 }
633
634 /* Find the right image format driver */
635 if (!drv) {
636 ret = find_image_format(filename, &drv);
637 }
638
639 if (!drv) {
640 goto unlink_and_fail;
641 }
642
643 /* Open the image */
644 ret = bdrv_open_common(bs, filename, flags, drv);
645 if (ret < 0) {
646 goto unlink_and_fail;
647 }
648
649 /* If there is a backing file, use it */
650 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
651 char backing_filename[PATH_MAX];
652 int back_flags;
653 BlockDriver *back_drv = NULL;
654
655 bs->backing_hd = bdrv_new("");
656
657 if (path_has_protocol(bs->backing_file)) {
658 pstrcpy(backing_filename, sizeof(backing_filename),
659 bs->backing_file);
660 } else {
661 path_combine(backing_filename, sizeof(backing_filename),
662 filename, bs->backing_file);
663 }
664
665 if (bs->backing_format[0] != '\0') {
666 back_drv = bdrv_find_format(bs->backing_format);
667 }
668
669 /* backing files always opened read-only */
670 back_flags =
671 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
672
673 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
674 if (ret < 0) {
675 bdrv_close(bs);
676 return ret;
677 }
678 if (bs->is_temporary) {
679 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
680 } else {
681 /* base image inherits from "parent" */
682 bs->backing_hd->keep_read_only = bs->keep_read_only;
683 }
684 }
685
686 if (!bdrv_key_required(bs)) {
687 bdrv_dev_change_media_cb(bs, true);
688 }
689
690 return 0;
691
692 unlink_and_fail:
693 if (bs->is_temporary) {
694 unlink(filename);
695 }
696 return ret;
697 }
698
699 void bdrv_close(BlockDriverState *bs)
700 {
701 if (bs->drv) {
702 if (bs == bs_snapshots) {
703 bs_snapshots = NULL;
704 }
705 if (bs->backing_hd) {
706 bdrv_delete(bs->backing_hd);
707 bs->backing_hd = NULL;
708 }
709 bs->drv->bdrv_close(bs);
710 g_free(bs->opaque);
711 #ifdef _WIN32
712 if (bs->is_temporary) {
713 unlink(bs->filename);
714 }
715 #endif
716 bs->opaque = NULL;
717 bs->drv = NULL;
718
719 if (bs->file != NULL) {
720 bdrv_close(bs->file);
721 }
722
723 bdrv_dev_change_media_cb(bs, false);
724 }
725 }
726
727 void bdrv_close_all(void)
728 {
729 BlockDriverState *bs;
730
731 QTAILQ_FOREACH(bs, &bdrv_states, list) {
732 bdrv_close(bs);
733 }
734 }
735
736 /* make a BlockDriverState anonymous by removing from bdrv_state list.
737 Also, NULL terminate the device_name to prevent double remove */
738 void bdrv_make_anon(BlockDriverState *bs)
739 {
740 if (bs->device_name[0] != '\0') {
741 QTAILQ_REMOVE(&bdrv_states, bs, list);
742 }
743 bs->device_name[0] = '\0';
744 }
745
746 void bdrv_delete(BlockDriverState *bs)
747 {
748 assert(!bs->dev);
749
750 /* remove from list, if necessary */
751 bdrv_make_anon(bs);
752
753 bdrv_close(bs);
754 if (bs->file != NULL) {
755 bdrv_delete(bs->file);
756 }
757
758 assert(bs != bs_snapshots);
759 g_free(bs);
760 }
761
762 int bdrv_attach_dev(BlockDriverState *bs, void *dev)
763 /* TODO change to DeviceState *dev when all users are qdevified */
764 {
765 if (bs->dev) {
766 return -EBUSY;
767 }
768 bs->dev = dev;
769 bdrv_iostatus_reset(bs);
770 return 0;
771 }
772
773 /* TODO qdevified devices don't use this, remove when devices are qdevified */
774 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
775 {
776 if (bdrv_attach_dev(bs, dev) < 0) {
777 abort();
778 }
779 }
780
781 void bdrv_detach_dev(BlockDriverState *bs, void *dev)
782 /* TODO change to DeviceState *dev when all users are qdevified */
783 {
784 assert(bs->dev == dev);
785 bs->dev = NULL;
786 bs->dev_ops = NULL;
787 bs->dev_opaque = NULL;
788 bs->buffer_alignment = 512;
789 }
790
791 /* TODO change to return DeviceState * when all users are qdevified */
792 void *bdrv_get_attached_dev(BlockDriverState *bs)
793 {
794 return bs->dev;
795 }
796
797 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
798 void *opaque)
799 {
800 bs->dev_ops = ops;
801 bs->dev_opaque = opaque;
802 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
803 bs_snapshots = NULL;
804 }
805 }
806
807 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
808 {
809 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
810 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
811 }
812 }
813
814 bool bdrv_dev_has_removable_media(BlockDriverState *bs)
815 {
816 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
817 }
818
819 bool bdrv_dev_is_tray_open(BlockDriverState *bs)
820 {
821 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
822 return bs->dev_ops->is_tray_open(bs->dev_opaque);
823 }
824 return false;
825 }
826
827 static void bdrv_dev_resize_cb(BlockDriverState *bs)
828 {
829 if (bs->dev_ops && bs->dev_ops->resize_cb) {
830 bs->dev_ops->resize_cb(bs->dev_opaque);
831 }
832 }
833
834 bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
835 {
836 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
837 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
838 }
839 return false;
840 }
841
842 /*
843 * Run consistency checks on an image
844 *
845 * Returns 0 if the check could be completed (it doesn't mean that the image is
846 * free of errors) or -errno when an internal error occurred. The results of the
847 * check are stored in res.
848 */
849 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
850 {
851 if (bs->drv->bdrv_check == NULL) {
852 return -ENOTSUP;
853 }
854
855 memset(res, 0, sizeof(*res));
856 return bs->drv->bdrv_check(bs, res);
857 }
858
859 #define COMMIT_BUF_SECTORS 2048
860
861 /* commit COW file into the raw image */
862 int bdrv_commit(BlockDriverState *bs)
863 {
864 BlockDriver *drv = bs->drv;
865 BlockDriver *backing_drv;
866 int64_t sector, total_sectors;
867 int n, ro, open_flags;
868 int ret = 0, rw_ret = 0;
869 uint8_t *buf;
870 char filename[1024];
871 BlockDriverState *bs_rw, *bs_ro;
872
873 if (!drv)
874 return -ENOMEDIUM;
875
876 if (!bs->backing_hd) {
877 return -ENOTSUP;
878 }
879
880 if (bs->backing_hd->keep_read_only) {
881 return -EACCES;
882 }
883
884 backing_drv = bs->backing_hd->drv;
885 ro = bs->backing_hd->read_only;
886 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
887 open_flags = bs->backing_hd->open_flags;
888
889 if (ro) {
890 /* re-open as RW */
891 bdrv_delete(bs->backing_hd);
892 bs->backing_hd = NULL;
893 bs_rw = bdrv_new("");
894 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
895 backing_drv);
896 if (rw_ret < 0) {
897 bdrv_delete(bs_rw);
898 /* try to re-open read-only */
899 bs_ro = bdrv_new("");
900 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
901 backing_drv);
902 if (ret < 0) {
903 bdrv_delete(bs_ro);
904 /* drive not functional anymore */
905 bs->drv = NULL;
906 return ret;
907 }
908 bs->backing_hd = bs_ro;
909 return rw_ret;
910 }
911 bs->backing_hd = bs_rw;
912 }
913
914 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
915 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
916
917 for (sector = 0; sector < total_sectors; sector += n) {
918 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
919
920 if (bdrv_read(bs, sector, buf, n) != 0) {
921 ret = -EIO;
922 goto ro_cleanup;
923 }
924
925 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
926 ret = -EIO;
927 goto ro_cleanup;
928 }
929 }
930 }
931
932 if (drv->bdrv_make_empty) {
933 ret = drv->bdrv_make_empty(bs);
934 bdrv_flush(bs);
935 }
936
937 /*
938 * Make sure all data we wrote to the backing device is actually
939 * stable on disk.
940 */
941 if (bs->backing_hd)
942 bdrv_flush(bs->backing_hd);
943
944 ro_cleanup:
945 g_free(buf);
946
947 if (ro) {
948 /* re-open as RO */
949 bdrv_delete(bs->backing_hd);
950 bs->backing_hd = NULL;
951 bs_ro = bdrv_new("");
952 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
953 backing_drv);
954 if (ret < 0) {
955 bdrv_delete(bs_ro);
956 /* drive not functional anymore */
957 bs->drv = NULL;
958 return ret;
959 }
960 bs->backing_hd = bs_ro;
961 bs->backing_hd->keep_read_only = 0;
962 }
963
964 return ret;
965 }
966
967 void bdrv_commit_all(void)
968 {
969 BlockDriverState *bs;
970
971 QTAILQ_FOREACH(bs, &bdrv_states, list) {
972 bdrv_commit(bs);
973 }
974 }
975
976 /*
977 * Return values:
978 * 0 - success
979 * -EINVAL - backing format specified, but no file
980 * -ENOSPC - can't update the backing file because no space is left in the
981 * image file header
982 * -ENOTSUP - format driver doesn't support changing the backing file
983 */
984 int bdrv_change_backing_file(BlockDriverState *bs,
985 const char *backing_file, const char *backing_fmt)
986 {
987 BlockDriver *drv = bs->drv;
988
989 if (drv->bdrv_change_backing_file != NULL) {
990 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
991 } else {
992 return -ENOTSUP;
993 }
994 }
995
996 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
997 size_t size)
998 {
999 int64_t len;
1000
1001 if (!bdrv_is_inserted(bs))
1002 return -ENOMEDIUM;
1003
1004 if (bs->growable)
1005 return 0;
1006
1007 len = bdrv_getlength(bs);
1008
1009 if (offset < 0)
1010 return -EIO;
1011
1012 if ((offset > len) || (len - offset < size))
1013 return -EIO;
1014
1015 return 0;
1016 }
1017
1018 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1019 int nb_sectors)
1020 {
1021 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1022 nb_sectors * BDRV_SECTOR_SIZE);
1023 }
1024
1025 typedef struct RwCo {
1026 BlockDriverState *bs;
1027 int64_t sector_num;
1028 int nb_sectors;
1029 QEMUIOVector *qiov;
1030 bool is_write;
1031 int ret;
1032 } RwCo;
1033
1034 static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1035 {
1036 RwCo *rwco = opaque;
1037
1038 if (!rwco->is_write) {
1039 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1040 rwco->nb_sectors, rwco->qiov);
1041 } else {
1042 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1043 rwco->nb_sectors, rwco->qiov);
1044 }
1045 }
1046
1047 /*
1048 * Process a synchronous request using coroutines
1049 */
1050 static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1051 int nb_sectors, bool is_write)
1052 {
1053 QEMUIOVector qiov;
1054 struct iovec iov = {
1055 .iov_base = (void *)buf,
1056 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1057 };
1058 Coroutine *co;
1059 RwCo rwco = {
1060 .bs = bs,
1061 .sector_num = sector_num,
1062 .nb_sectors = nb_sectors,
1063 .qiov = &qiov,
1064 .is_write = is_write,
1065 .ret = NOT_DONE,
1066 };
1067
1068 qemu_iovec_init_external(&qiov, &iov, 1);
1069
1070 if (qemu_in_coroutine()) {
1071 /* Fast-path if already in coroutine context */
1072 bdrv_rw_co_entry(&rwco);
1073 } else {
1074 co = qemu_coroutine_create(bdrv_rw_co_entry);
1075 qemu_coroutine_enter(co, &rwco);
1076 while (rwco.ret == NOT_DONE) {
1077 qemu_aio_wait();
1078 }
1079 }
1080 return rwco.ret;
1081 }
1082
1083 /* return < 0 if error. See bdrv_write() for the return codes */
1084 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1085 uint8_t *buf, int nb_sectors)
1086 {
1087 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
1088 }
1089
1090 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1091 int nb_sectors, int dirty)
1092 {
1093 int64_t start, end;
1094 unsigned long val, idx, bit;
1095
1096 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1097 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1098
1099 for (; start <= end; start++) {
1100 idx = start / (sizeof(unsigned long) * 8);
1101 bit = start % (sizeof(unsigned long) * 8);
1102 val = bs->dirty_bitmap[idx];
1103 if (dirty) {
1104 if (!(val & (1UL << bit))) {
1105 bs->dirty_count++;
1106 val |= 1UL << bit;
1107 }
1108 } else {
1109 if (val & (1UL << bit)) {
1110 bs->dirty_count--;
1111 val &= ~(1UL << bit);
1112 }
1113 }
1114 bs->dirty_bitmap[idx] = val;
1115 }
1116 }
1117
1118 /* Return < 0 if error. Important errors are:
1119 -EIO generic I/O error (may happen for all errors)
1120 -ENOMEDIUM No media inserted.
1121 -EINVAL Invalid sector number or nb_sectors
1122 -EACCES Trying to write a read-only device
1123 */
1124 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1125 const uint8_t *buf, int nb_sectors)
1126 {
1127 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
1128 }
1129
1130 int bdrv_pread(BlockDriverState *bs, int64_t offset,
1131 void *buf, int count1)
1132 {
1133 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1134 int len, nb_sectors, count;
1135 int64_t sector_num;
1136 int ret;
1137
1138 count = count1;
1139 /* first read to align to sector start */
1140 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1141 if (len > count)
1142 len = count;
1143 sector_num = offset >> BDRV_SECTOR_BITS;
1144 if (len > 0) {
1145 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1146 return ret;
1147 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1148 count -= len;
1149 if (count == 0)
1150 return count1;
1151 sector_num++;
1152 buf += len;
1153 }
1154
1155 /* read the sectors "in place" */
1156 nb_sectors = count >> BDRV_SECTOR_BITS;
1157 if (nb_sectors > 0) {
1158 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1159 return ret;
1160 sector_num += nb_sectors;
1161 len = nb_sectors << BDRV_SECTOR_BITS;
1162 buf += len;
1163 count -= len;
1164 }
1165
1166 /* add data from the last sector */
1167 if (count > 0) {
1168 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1169 return ret;
1170 memcpy(buf, tmp_buf, count);
1171 }
1172 return count1;
1173 }
1174
1175 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1176 const void *buf, int count1)
1177 {
1178 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1179 int len, nb_sectors, count;
1180 int64_t sector_num;
1181 int ret;
1182
1183 count = count1;
1184 /* first write to align to sector start */
1185 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1186 if (len > count)
1187 len = count;
1188 sector_num = offset >> BDRV_SECTOR_BITS;
1189 if (len > 0) {
1190 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1191 return ret;
1192 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1193 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1194 return ret;
1195 count -= len;
1196 if (count == 0)
1197 return count1;
1198 sector_num++;
1199 buf += len;
1200 }
1201
1202 /* write the sectors "in place" */
1203 nb_sectors = count >> BDRV_SECTOR_BITS;
1204 if (nb_sectors > 0) {
1205 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1206 return ret;
1207 sector_num += nb_sectors;
1208 len = nb_sectors << BDRV_SECTOR_BITS;
1209 buf += len;
1210 count -= len;
1211 }
1212
1213 /* add data from the last sector */
1214 if (count > 0) {
1215 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1216 return ret;
1217 memcpy(tmp_buf, buf, count);
1218 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1219 return ret;
1220 }
1221 return count1;
1222 }
1223
1224 /*
1225 * Writes to the file and ensures that no writes are reordered across this
1226 * request (acts as a barrier)
1227 *
1228 * Returns 0 on success, -errno in error cases.
1229 */
1230 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1231 const void *buf, int count)
1232 {
1233 int ret;
1234
1235 ret = bdrv_pwrite(bs, offset, buf, count);
1236 if (ret < 0) {
1237 return ret;
1238 }
1239
1240 /* No flush needed for cache modes that use O_DSYNC */
1241 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1242 bdrv_flush(bs);
1243 }
1244
1245 return 0;
1246 }
1247
1248 /*
1249 * Handle a read request in coroutine context
1250 */
1251 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1252 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1253 {
1254 BlockDriver *drv = bs->drv;
1255
1256 if (!drv) {
1257 return -ENOMEDIUM;
1258 }
1259 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1260 return -EIO;
1261 }
1262
1263 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1264 }
1265
1266 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1267 int nb_sectors, QEMUIOVector *qiov)
1268 {
1269 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1270
1271 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1272 }
1273
1274 /*
1275 * Handle a write request in coroutine context
1276 */
1277 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1278 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1279 {
1280 BlockDriver *drv = bs->drv;
1281 int ret;
1282
1283 if (!bs->drv) {
1284 return -ENOMEDIUM;
1285 }
1286 if (bs->read_only) {
1287 return -EACCES;
1288 }
1289 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1290 return -EIO;
1291 }
1292
1293 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1294
1295 if (bs->dirty_bitmap) {
1296 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1297 }
1298
1299 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1300 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1301 }
1302
1303 return ret;
1304 }
1305
1306 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1307 int nb_sectors, QEMUIOVector *qiov)
1308 {
1309 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1310
1311 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1312 }
1313
1314 /**
1315 * Truncate file to 'offset' bytes (needed only for file protocols)
1316 */
1317 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1318 {
1319 BlockDriver *drv = bs->drv;
1320 int ret;
1321 if (!drv)
1322 return -ENOMEDIUM;
1323 if (!drv->bdrv_truncate)
1324 return -ENOTSUP;
1325 if (bs->read_only)
1326 return -EACCES;
1327 if (bdrv_in_use(bs))
1328 return -EBUSY;
1329 ret = drv->bdrv_truncate(bs, offset);
1330 if (ret == 0) {
1331 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1332 bdrv_dev_resize_cb(bs);
1333 }
1334 return ret;
1335 }
1336
1337 /**
1338 * Length of a allocated file in bytes. Sparse files are counted by actual
1339 * allocated space. Return < 0 if error or unknown.
1340 */
1341 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1342 {
1343 BlockDriver *drv = bs->drv;
1344 if (!drv) {
1345 return -ENOMEDIUM;
1346 }
1347 if (drv->bdrv_get_allocated_file_size) {
1348 return drv->bdrv_get_allocated_file_size(bs);
1349 }
1350 if (bs->file) {
1351 return bdrv_get_allocated_file_size(bs->file);
1352 }
1353 return -ENOTSUP;
1354 }
1355
1356 /**
1357 * Length of a file in bytes. Return < 0 if error or unknown.
1358 */
1359 int64_t bdrv_getlength(BlockDriverState *bs)
1360 {
1361 BlockDriver *drv = bs->drv;
1362 if (!drv)
1363 return -ENOMEDIUM;
1364
1365 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
1366 if (drv->bdrv_getlength) {
1367 return drv->bdrv_getlength(bs);
1368 }
1369 }
1370 return bs->total_sectors * BDRV_SECTOR_SIZE;
1371 }
1372
1373 /* return 0 as number of sectors if no device present or error */
1374 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1375 {
1376 int64_t length;
1377 length = bdrv_getlength(bs);
1378 if (length < 0)
1379 length = 0;
1380 else
1381 length = length >> BDRV_SECTOR_BITS;
1382 *nb_sectors_ptr = length;
1383 }
1384
1385 struct partition {
1386 uint8_t boot_ind; /* 0x80 - active */
1387 uint8_t head; /* starting head */
1388 uint8_t sector; /* starting sector */
1389 uint8_t cyl; /* starting cylinder */
1390 uint8_t sys_ind; /* What partition type */
1391 uint8_t end_head; /* end head */
1392 uint8_t end_sector; /* end sector */
1393 uint8_t end_cyl; /* end cylinder */
1394 uint32_t start_sect; /* starting sector counting from 0 */
1395 uint32_t nr_sects; /* nr of sectors in partition */
1396 } QEMU_PACKED;
1397
1398 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1399 static int guess_disk_lchs(BlockDriverState *bs,
1400 int *pcylinders, int *pheads, int *psectors)
1401 {
1402 uint8_t buf[BDRV_SECTOR_SIZE];
1403 int ret, i, heads, sectors, cylinders;
1404 struct partition *p;
1405 uint32_t nr_sects;
1406 uint64_t nb_sectors;
1407
1408 bdrv_get_geometry(bs, &nb_sectors);
1409
1410 ret = bdrv_read(bs, 0, buf, 1);
1411 if (ret < 0)
1412 return -1;
1413 /* test msdos magic */
1414 if (buf[510] != 0x55 || buf[511] != 0xaa)
1415 return -1;
1416 for(i = 0; i < 4; i++) {
1417 p = ((struct partition *)(buf + 0x1be)) + i;
1418 nr_sects = le32_to_cpu(p->nr_sects);
1419 if (nr_sects && p->end_head) {
1420 /* We make the assumption that the partition terminates on
1421 a cylinder boundary */
1422 heads = p->end_head + 1;
1423 sectors = p->end_sector & 63;
1424 if (sectors == 0)
1425 continue;
1426 cylinders = nb_sectors / (heads * sectors);
1427 if (cylinders < 1 || cylinders > 16383)
1428 continue;
1429 *pheads = heads;
1430 *psectors = sectors;
1431 *pcylinders = cylinders;
1432 #if 0
1433 printf("guessed geometry: LCHS=%d %d %d\n",
1434 cylinders, heads, sectors);
1435 #endif
1436 return 0;
1437 }
1438 }
1439 return -1;
1440 }
1441
1442 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1443 {
1444 int translation, lba_detected = 0;
1445 int cylinders, heads, secs;
1446 uint64_t nb_sectors;
1447
1448 /* if a geometry hint is available, use it */
1449 bdrv_get_geometry(bs, &nb_sectors);
1450 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1451 translation = bdrv_get_translation_hint(bs);
1452 if (cylinders != 0) {
1453 *pcyls = cylinders;
1454 *pheads = heads;
1455 *psecs = secs;
1456 } else {
1457 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1458 if (heads > 16) {
1459 /* if heads > 16, it means that a BIOS LBA
1460 translation was active, so the default
1461 hardware geometry is OK */
1462 lba_detected = 1;
1463 goto default_geometry;
1464 } else {
1465 *pcyls = cylinders;
1466 *pheads = heads;
1467 *psecs = secs;
1468 /* disable any translation to be in sync with
1469 the logical geometry */
1470 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1471 bdrv_set_translation_hint(bs,
1472 BIOS_ATA_TRANSLATION_NONE);
1473 }
1474 }
1475 } else {
1476 default_geometry:
1477 /* if no geometry, use a standard physical disk geometry */
1478 cylinders = nb_sectors / (16 * 63);
1479
1480 if (cylinders > 16383)
1481 cylinders = 16383;
1482 else if (cylinders < 2)
1483 cylinders = 2;
1484 *pcyls = cylinders;
1485 *pheads = 16;
1486 *psecs = 63;
1487 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1488 if ((*pcyls * *pheads) <= 131072) {
1489 bdrv_set_translation_hint(bs,
1490 BIOS_ATA_TRANSLATION_LARGE);
1491 } else {
1492 bdrv_set_translation_hint(bs,
1493 BIOS_ATA_TRANSLATION_LBA);
1494 }
1495 }
1496 }
1497 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1498 }
1499 }
1500
1501 void bdrv_set_geometry_hint(BlockDriverState *bs,
1502 int cyls, int heads, int secs)
1503 {
1504 bs->cyls = cyls;
1505 bs->heads = heads;
1506 bs->secs = secs;
1507 }
1508
1509 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1510 {
1511 bs->translation = translation;
1512 }
1513
1514 void bdrv_get_geometry_hint(BlockDriverState *bs,
1515 int *pcyls, int *pheads, int *psecs)
1516 {
1517 *pcyls = bs->cyls;
1518 *pheads = bs->heads;
1519 *psecs = bs->secs;
1520 }
1521
1522 /* Recognize floppy formats */
1523 typedef struct FDFormat {
1524 FDriveType drive;
1525 uint8_t last_sect;
1526 uint8_t max_track;
1527 uint8_t max_head;
1528 } FDFormat;
1529
1530 static const FDFormat fd_formats[] = {
1531 /* First entry is default format */
1532 /* 1.44 MB 3"1/2 floppy disks */
1533 { FDRIVE_DRV_144, 18, 80, 1, },
1534 { FDRIVE_DRV_144, 20, 80, 1, },
1535 { FDRIVE_DRV_144, 21, 80, 1, },
1536 { FDRIVE_DRV_144, 21, 82, 1, },
1537 { FDRIVE_DRV_144, 21, 83, 1, },
1538 { FDRIVE_DRV_144, 22, 80, 1, },
1539 { FDRIVE_DRV_144, 23, 80, 1, },
1540 { FDRIVE_DRV_144, 24, 80, 1, },
1541 /* 2.88 MB 3"1/2 floppy disks */
1542 { FDRIVE_DRV_288, 36, 80, 1, },
1543 { FDRIVE_DRV_288, 39, 80, 1, },
1544 { FDRIVE_DRV_288, 40, 80, 1, },
1545 { FDRIVE_DRV_288, 44, 80, 1, },
1546 { FDRIVE_DRV_288, 48, 80, 1, },
1547 /* 720 kB 3"1/2 floppy disks */
1548 { FDRIVE_DRV_144, 9, 80, 1, },
1549 { FDRIVE_DRV_144, 10, 80, 1, },
1550 { FDRIVE_DRV_144, 10, 82, 1, },
1551 { FDRIVE_DRV_144, 10, 83, 1, },
1552 { FDRIVE_DRV_144, 13, 80, 1, },
1553 { FDRIVE_DRV_144, 14, 80, 1, },
1554 /* 1.2 MB 5"1/4 floppy disks */
1555 { FDRIVE_DRV_120, 15, 80, 1, },
1556 { FDRIVE_DRV_120, 18, 80, 1, },
1557 { FDRIVE_DRV_120, 18, 82, 1, },
1558 { FDRIVE_DRV_120, 18, 83, 1, },
1559 { FDRIVE_DRV_120, 20, 80, 1, },
1560 /* 720 kB 5"1/4 floppy disks */
1561 { FDRIVE_DRV_120, 9, 80, 1, },
1562 { FDRIVE_DRV_120, 11, 80, 1, },
1563 /* 360 kB 5"1/4 floppy disks */
1564 { FDRIVE_DRV_120, 9, 40, 1, },
1565 { FDRIVE_DRV_120, 9, 40, 0, },
1566 { FDRIVE_DRV_120, 10, 41, 1, },
1567 { FDRIVE_DRV_120, 10, 42, 1, },
1568 /* 320 kB 5"1/4 floppy disks */
1569 { FDRIVE_DRV_120, 8, 40, 1, },
1570 { FDRIVE_DRV_120, 8, 40, 0, },
1571 /* 360 kB must match 5"1/4 better than 3"1/2... */
1572 { FDRIVE_DRV_144, 9, 80, 0, },
1573 /* end */
1574 { FDRIVE_DRV_NONE, -1, -1, 0, },
1575 };
1576
1577 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1578 int *max_track, int *last_sect,
1579 FDriveType drive_in, FDriveType *drive)
1580 {
1581 const FDFormat *parse;
1582 uint64_t nb_sectors, size;
1583 int i, first_match, match;
1584
1585 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1586 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1587 /* User defined disk */
1588 } else {
1589 bdrv_get_geometry(bs, &nb_sectors);
1590 match = -1;
1591 first_match = -1;
1592 for (i = 0; ; i++) {
1593 parse = &fd_formats[i];
1594 if (parse->drive == FDRIVE_DRV_NONE) {
1595 break;
1596 }
1597 if (drive_in == parse->drive ||
1598 drive_in == FDRIVE_DRV_NONE) {
1599 size = (parse->max_head + 1) * parse->max_track *
1600 parse->last_sect;
1601 if (nb_sectors == size) {
1602 match = i;
1603 break;
1604 }
1605 if (first_match == -1) {
1606 first_match = i;
1607 }
1608 }
1609 }
1610 if (match == -1) {
1611 if (first_match == -1) {
1612 match = 1;
1613 } else {
1614 match = first_match;
1615 }
1616 parse = &fd_formats[match];
1617 }
1618 *nb_heads = parse->max_head + 1;
1619 *max_track = parse->max_track;
1620 *last_sect = parse->last_sect;
1621 *drive = parse->drive;
1622 }
1623 }
1624
1625 int bdrv_get_translation_hint(BlockDriverState *bs)
1626 {
1627 return bs->translation;
1628 }
1629
1630 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1631 BlockErrorAction on_write_error)
1632 {
1633 bs->on_read_error = on_read_error;
1634 bs->on_write_error = on_write_error;
1635 }
1636
1637 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1638 {
1639 return is_read ? bs->on_read_error : bs->on_write_error;
1640 }
1641
1642 int bdrv_is_read_only(BlockDriverState *bs)
1643 {
1644 return bs->read_only;
1645 }
1646
1647 int bdrv_is_sg(BlockDriverState *bs)
1648 {
1649 return bs->sg;
1650 }
1651
1652 int bdrv_enable_write_cache(BlockDriverState *bs)
1653 {
1654 return bs->enable_write_cache;
1655 }
1656
1657 int bdrv_is_encrypted(BlockDriverState *bs)
1658 {
1659 if (bs->backing_hd && bs->backing_hd->encrypted)
1660 return 1;
1661 return bs->encrypted;
1662 }
1663
1664 int bdrv_key_required(BlockDriverState *bs)
1665 {
1666 BlockDriverState *backing_hd = bs->backing_hd;
1667
1668 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1669 return 1;
1670 return (bs->encrypted && !bs->valid_key);
1671 }
1672
1673 int bdrv_set_key(BlockDriverState *bs, const char *key)
1674 {
1675 int ret;
1676 if (bs->backing_hd && bs->backing_hd->encrypted) {
1677 ret = bdrv_set_key(bs->backing_hd, key);
1678 if (ret < 0)
1679 return ret;
1680 if (!bs->encrypted)
1681 return 0;
1682 }
1683 if (!bs->encrypted) {
1684 return -EINVAL;
1685 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1686 return -ENOMEDIUM;
1687 }
1688 ret = bs->drv->bdrv_set_key(bs, key);
1689 if (ret < 0) {
1690 bs->valid_key = 0;
1691 } else if (!bs->valid_key) {
1692 bs->valid_key = 1;
1693 /* call the change callback now, we skipped it on open */
1694 bdrv_dev_change_media_cb(bs, true);
1695 }
1696 return ret;
1697 }
1698
1699 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1700 {
1701 if (!bs->drv) {
1702 buf[0] = '\0';
1703 } else {
1704 pstrcpy(buf, buf_size, bs->drv->format_name);
1705 }
1706 }
1707
1708 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1709 void *opaque)
1710 {
1711 BlockDriver *drv;
1712
1713 QLIST_FOREACH(drv, &bdrv_drivers, list) {
1714 it(opaque, drv->format_name);
1715 }
1716 }
1717
1718 BlockDriverState *bdrv_find(const char *name)
1719 {
1720 BlockDriverState *bs;
1721
1722 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1723 if (!strcmp(name, bs->device_name)) {
1724 return bs;
1725 }
1726 }
1727 return NULL;
1728 }
1729
1730 BlockDriverState *bdrv_next(BlockDriverState *bs)
1731 {
1732 if (!bs) {
1733 return QTAILQ_FIRST(&bdrv_states);
1734 }
1735 return QTAILQ_NEXT(bs, list);
1736 }
1737
1738 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1739 {
1740 BlockDriverState *bs;
1741
1742 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1743 it(opaque, bs);
1744 }
1745 }
1746
1747 const char *bdrv_get_device_name(BlockDriverState *bs)
1748 {
1749 return bs->device_name;
1750 }
1751
1752 void bdrv_flush_all(void)
1753 {
1754 BlockDriverState *bs;
1755
1756 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1757 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
1758 bdrv_flush(bs);
1759 }
1760 }
1761 }
1762
1763 int bdrv_has_zero_init(BlockDriverState *bs)
1764 {
1765 assert(bs->drv);
1766
1767 if (bs->drv->bdrv_has_zero_init) {
1768 return bs->drv->bdrv_has_zero_init(bs);
1769 }
1770
1771 return 1;
1772 }
1773
1774 /*
1775 * Returns true iff the specified sector is present in the disk image. Drivers
1776 * not implementing the functionality are assumed to not support backing files,
1777 * hence all their sectors are reported as allocated.
1778 *
1779 * 'pnum' is set to the number of sectors (including and immediately following
1780 * the specified sector) that are known to be in the same
1781 * allocated/unallocated state.
1782 *
1783 * 'nb_sectors' is the max value 'pnum' should be set to.
1784 */
1785 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1786 int *pnum)
1787 {
1788 int64_t n;
1789 if (!bs->drv->bdrv_is_allocated) {
1790 if (sector_num >= bs->total_sectors) {
1791 *pnum = 0;
1792 return 0;
1793 }
1794 n = bs->total_sectors - sector_num;
1795 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1796 return 1;
1797 }
1798 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1799 }
1800
1801 void bdrv_mon_event(const BlockDriverState *bdrv,
1802 BlockMonEventAction action, int is_read)
1803 {
1804 QObject *data;
1805 const char *action_str;
1806
1807 switch (action) {
1808 case BDRV_ACTION_REPORT:
1809 action_str = "report";
1810 break;
1811 case BDRV_ACTION_IGNORE:
1812 action_str = "ignore";
1813 break;
1814 case BDRV_ACTION_STOP:
1815 action_str = "stop";
1816 break;
1817 default:
1818 abort();
1819 }
1820
1821 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1822 bdrv->device_name,
1823 action_str,
1824 is_read ? "read" : "write");
1825 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1826
1827 qobject_decref(data);
1828 }
1829
1830 BlockInfoList *qmp_query_block(Error **errp)
1831 {
1832 BlockInfoList *head = NULL, *cur_item = NULL;
1833 BlockDriverState *bs;
1834
1835 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1836 BlockInfoList *info = g_malloc0(sizeof(*info));
1837
1838 info->value = g_malloc0(sizeof(*info->value));
1839 info->value->device = g_strdup(bs->device_name);
1840 info->value->type = g_strdup("unknown");
1841 info->value->locked = bdrv_dev_is_medium_locked(bs);
1842 info->value->removable = bdrv_dev_has_removable_media(bs);
1843
1844 if (bdrv_dev_has_removable_media(bs)) {
1845 info->value->has_tray_open = true;
1846 info->value->tray_open = bdrv_dev_is_tray_open(bs);
1847 }
1848
1849 if (bdrv_iostatus_is_enabled(bs)) {
1850 info->value->has_io_status = true;
1851 info->value->io_status = bs->iostatus;
1852 }
1853
1854 if (bs->drv) {
1855 info->value->has_inserted = true;
1856 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
1857 info->value->inserted->file = g_strdup(bs->filename);
1858 info->value->inserted->ro = bs->read_only;
1859 info->value->inserted->drv = g_strdup(bs->drv->format_name);
1860 info->value->inserted->encrypted = bs->encrypted;
1861 if (bs->backing_file[0]) {
1862 info->value->inserted->has_backing_file = true;
1863 info->value->inserted->backing_file = g_strdup(bs->backing_file);
1864 }
1865 }
1866
1867 /* XXX: waiting for the qapi to support GSList */
1868 if (!cur_item) {
1869 head = cur_item = info;
1870 } else {
1871 cur_item->next = info;
1872 cur_item = info;
1873 }
1874 }
1875
1876 return head;
1877 }
1878
1879 /* Consider exposing this as a full fledged QMP command */
1880 static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
1881 {
1882 BlockStats *s;
1883
1884 s = g_malloc0(sizeof(*s));
1885
1886 if (bs->device_name[0]) {
1887 s->has_device = true;
1888 s->device = g_strdup(bs->device_name);
1889 }
1890
1891 s->stats = g_malloc0(sizeof(*s->stats));
1892 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
1893 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
1894 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
1895 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
1896 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
1897 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
1898 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
1899 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
1900 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
1901
1902 if (bs->file) {
1903 s->has_parent = true;
1904 s->parent = qmp_query_blockstat(bs->file, NULL);
1905 }
1906
1907 return s;
1908 }
1909
1910 BlockStatsList *qmp_query_blockstats(Error **errp)
1911 {
1912 BlockStatsList *head = NULL, *cur_item = NULL;
1913 BlockDriverState *bs;
1914
1915 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1916 BlockStatsList *info = g_malloc0(sizeof(*info));
1917 info->value = qmp_query_blockstat(bs, NULL);
1918
1919 /* XXX: waiting for the qapi to support GSList */
1920 if (!cur_item) {
1921 head = cur_item = info;
1922 } else {
1923 cur_item->next = info;
1924 cur_item = info;
1925 }
1926 }
1927
1928 return head;
1929 }
1930
1931 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1932 {
1933 if (bs->backing_hd && bs->backing_hd->encrypted)
1934 return bs->backing_file;
1935 else if (bs->encrypted)
1936 return bs->filename;
1937 else
1938 return NULL;
1939 }
1940
1941 void bdrv_get_backing_filename(BlockDriverState *bs,
1942 char *filename, int filename_size)
1943 {
1944 pstrcpy(filename, filename_size, bs->backing_file);
1945 }
1946
1947 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1948 const uint8_t *buf, int nb_sectors)
1949 {
1950 BlockDriver *drv = bs->drv;
1951 if (!drv)
1952 return -ENOMEDIUM;
1953 if (!drv->bdrv_write_compressed)
1954 return -ENOTSUP;
1955 if (bdrv_check_request(bs, sector_num, nb_sectors))
1956 return -EIO;
1957
1958 if (bs->dirty_bitmap) {
1959 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1960 }
1961
1962 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1963 }
1964
1965 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1966 {
1967 BlockDriver *drv = bs->drv;
1968 if (!drv)
1969 return -ENOMEDIUM;
1970 if (!drv->bdrv_get_info)
1971 return -ENOTSUP;
1972 memset(bdi, 0, sizeof(*bdi));
1973 return drv->bdrv_get_info(bs, bdi);
1974 }
1975
1976 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1977 int64_t pos, int size)
1978 {
1979 BlockDriver *drv = bs->drv;
1980 if (!drv)
1981 return -ENOMEDIUM;
1982 if (drv->bdrv_save_vmstate)
1983 return drv->bdrv_save_vmstate(bs, buf, pos, size);
1984 if (bs->file)
1985 return bdrv_save_vmstate(bs->file, buf, pos, size);
1986 return -ENOTSUP;
1987 }
1988
1989 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
1990 int64_t pos, int size)
1991 {
1992 BlockDriver *drv = bs->drv;
1993 if (!drv)
1994 return -ENOMEDIUM;
1995 if (drv->bdrv_load_vmstate)
1996 return drv->bdrv_load_vmstate(bs, buf, pos, size);
1997 if (bs->file)
1998 return bdrv_load_vmstate(bs->file, buf, pos, size);
1999 return -ENOTSUP;
2000 }
2001
2002 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2003 {
2004 BlockDriver *drv = bs->drv;
2005
2006 if (!drv || !drv->bdrv_debug_event) {
2007 return;
2008 }
2009
2010 return drv->bdrv_debug_event(bs, event);
2011
2012 }
2013
2014 /**************************************************************/
2015 /* handling of snapshots */
2016
2017 int bdrv_can_snapshot(BlockDriverState *bs)
2018 {
2019 BlockDriver *drv = bs->drv;
2020 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
2021 return 0;
2022 }
2023
2024 if (!drv->bdrv_snapshot_create) {
2025 if (bs->file != NULL) {
2026 return bdrv_can_snapshot(bs->file);
2027 }
2028 return 0;
2029 }
2030
2031 return 1;
2032 }
2033
2034 int bdrv_is_snapshot(BlockDriverState *bs)
2035 {
2036 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2037 }
2038
2039 BlockDriverState *bdrv_snapshots(void)
2040 {
2041 BlockDriverState *bs;
2042
2043 if (bs_snapshots) {
2044 return bs_snapshots;
2045 }
2046
2047 bs = NULL;
2048 while ((bs = bdrv_next(bs))) {
2049 if (bdrv_can_snapshot(bs)) {
2050 bs_snapshots = bs;
2051 return bs;
2052 }
2053 }
2054 return NULL;
2055 }
2056
2057 int bdrv_snapshot_create(BlockDriverState *bs,
2058 QEMUSnapshotInfo *sn_info)
2059 {
2060 BlockDriver *drv = bs->drv;
2061 if (!drv)
2062 return -ENOMEDIUM;
2063 if (drv->bdrv_snapshot_create)
2064 return drv->bdrv_snapshot_create(bs, sn_info);
2065 if (bs->file)
2066 return bdrv_snapshot_create(bs->file, sn_info);
2067 return -ENOTSUP;
2068 }
2069
2070 int bdrv_snapshot_goto(BlockDriverState *bs,
2071 const char *snapshot_id)
2072 {
2073 BlockDriver *drv = bs->drv;
2074 int ret, open_ret;
2075
2076 if (!drv)
2077 return -ENOMEDIUM;
2078 if (drv->bdrv_snapshot_goto)
2079 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2080
2081 if (bs->file) {
2082 drv->bdrv_close(bs);
2083 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2084 open_ret = drv->bdrv_open(bs, bs->open_flags);
2085 if (open_ret < 0) {
2086 bdrv_delete(bs->file);
2087 bs->drv = NULL;
2088 return open_ret;
2089 }
2090 return ret;
2091 }
2092
2093 return -ENOTSUP;
2094 }
2095
2096 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2097 {
2098 BlockDriver *drv = bs->drv;
2099 if (!drv)
2100 return -ENOMEDIUM;
2101 if (drv->bdrv_snapshot_delete)
2102 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2103 if (bs->file)
2104 return bdrv_snapshot_delete(bs->file, snapshot_id);
2105 return -ENOTSUP;
2106 }
2107
2108 int bdrv_snapshot_list(BlockDriverState *bs,
2109 QEMUSnapshotInfo **psn_info)
2110 {
2111 BlockDriver *drv = bs->drv;
2112 if (!drv)
2113 return -ENOMEDIUM;
2114 if (drv->bdrv_snapshot_list)
2115 return drv->bdrv_snapshot_list(bs, psn_info);
2116 if (bs->file)
2117 return bdrv_snapshot_list(bs->file, psn_info);
2118 return -ENOTSUP;
2119 }
2120
2121 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2122 const char *snapshot_name)
2123 {
2124 BlockDriver *drv = bs->drv;
2125 if (!drv) {
2126 return -ENOMEDIUM;
2127 }
2128 if (!bs->read_only) {
2129 return -EINVAL;
2130 }
2131 if (drv->bdrv_snapshot_load_tmp) {
2132 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2133 }
2134 return -ENOTSUP;
2135 }
2136
2137 #define NB_SUFFIXES 4
2138
2139 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2140 {
2141 static const char suffixes[NB_SUFFIXES] = "KMGT";
2142 int64_t base;
2143 int i;
2144
2145 if (size <= 999) {
2146 snprintf(buf, buf_size, "%" PRId64, size);
2147 } else {
2148 base = 1024;
2149 for(i = 0; i < NB_SUFFIXES; i++) {
2150 if (size < (10 * base)) {
2151 snprintf(buf, buf_size, "%0.1f%c",
2152 (double)size / base,
2153 suffixes[i]);
2154 break;
2155 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2156 snprintf(buf, buf_size, "%" PRId64 "%c",
2157 ((size + (base >> 1)) / base),
2158 suffixes[i]);
2159 break;
2160 }
2161 base = base * 1024;
2162 }
2163 }
2164 return buf;
2165 }
2166
2167 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2168 {
2169 char buf1[128], date_buf[128], clock_buf[128];
2170 #ifdef _WIN32
2171 struct tm *ptm;
2172 #else
2173 struct tm tm;
2174 #endif
2175 time_t ti;
2176 int64_t secs;
2177
2178 if (!sn) {
2179 snprintf(buf, buf_size,
2180 "%-10s%-20s%7s%20s%15s",
2181 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2182 } else {
2183 ti = sn->date_sec;
2184 #ifdef _WIN32
2185 ptm = localtime(&ti);
2186 strftime(date_buf, sizeof(date_buf),
2187 "%Y-%m-%d %H:%M:%S", ptm);
2188 #else
2189 localtime_r(&ti, &tm);
2190 strftime(date_buf, sizeof(date_buf),
2191 "%Y-%m-%d %H:%M:%S", &tm);
2192 #endif
2193 secs = sn->vm_clock_nsec / 1000000000;
2194 snprintf(clock_buf, sizeof(clock_buf),
2195 "%02d:%02d:%02d.%03d",
2196 (int)(secs / 3600),
2197 (int)((secs / 60) % 60),
2198 (int)(secs % 60),
2199 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2200 snprintf(buf, buf_size,
2201 "%-10s%-20s%7s%20s%15s",
2202 sn->id_str, sn->name,
2203 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2204 date_buf,
2205 clock_buf);
2206 }
2207 return buf;
2208 }
2209
2210 /**************************************************************/
2211 /* async I/Os */
2212
2213 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2214 QEMUIOVector *qiov, int nb_sectors,
2215 BlockDriverCompletionFunc *cb, void *opaque)
2216 {
2217 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2218
2219 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2220 cb, opaque, false);
2221 }
2222
2223 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2224 QEMUIOVector *qiov, int nb_sectors,
2225 BlockDriverCompletionFunc *cb, void *opaque)
2226 {
2227 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2228
2229 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2230 cb, opaque, true);
2231 }
2232
2233
2234 typedef struct MultiwriteCB {
2235 int error;
2236 int num_requests;
2237 int num_callbacks;
2238 struct {
2239 BlockDriverCompletionFunc *cb;
2240 void *opaque;
2241 QEMUIOVector *free_qiov;
2242 void *free_buf;
2243 } callbacks[];
2244 } MultiwriteCB;
2245
2246 static void multiwrite_user_cb(MultiwriteCB *mcb)
2247 {
2248 int i;
2249
2250 for (i = 0; i < mcb->num_callbacks; i++) {
2251 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2252 if (mcb->callbacks[i].free_qiov) {
2253 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2254 }
2255 g_free(mcb->callbacks[i].free_qiov);
2256 qemu_vfree(mcb->callbacks[i].free_buf);
2257 }
2258 }
2259
2260 static void multiwrite_cb(void *opaque, int ret)
2261 {
2262 MultiwriteCB *mcb = opaque;
2263
2264 trace_multiwrite_cb(mcb, ret);
2265
2266 if (ret < 0 && !mcb->error) {
2267 mcb->error = ret;
2268 }
2269
2270 mcb->num_requests--;
2271 if (mcb->num_requests == 0) {
2272 multiwrite_user_cb(mcb);
2273 g_free(mcb);
2274 }
2275 }
2276
2277 static int multiwrite_req_compare(const void *a, const void *b)
2278 {
2279 const BlockRequest *req1 = a, *req2 = b;
2280
2281 /*
2282 * Note that we can't simply subtract req2->sector from req1->sector
2283 * here as that could overflow the return value.
2284 */
2285 if (req1->sector > req2->sector) {
2286 return 1;
2287 } else if (req1->sector < req2->sector) {
2288 return -1;
2289 } else {
2290 return 0;
2291 }
2292 }
2293
2294 /*
2295 * Takes a bunch of requests and tries to merge them. Returns the number of
2296 * requests that remain after merging.
2297 */
2298 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2299 int num_reqs, MultiwriteCB *mcb)
2300 {
2301 int i, outidx;
2302
2303 // Sort requests by start sector
2304 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2305
2306 // Check if adjacent requests touch the same clusters. If so, combine them,
2307 // filling up gaps with zero sectors.
2308 outidx = 0;
2309 for (i = 1; i < num_reqs; i++) {
2310 int merge = 0;
2311 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2312
2313 // This handles the cases that are valid for all block drivers, namely
2314 // exactly sequential writes and overlapping writes.
2315 if (reqs[i].sector <= oldreq_last) {
2316 merge = 1;
2317 }
2318
2319 // The block driver may decide that it makes sense to combine requests
2320 // even if there is a gap of some sectors between them. In this case,
2321 // the gap is filled with zeros (therefore only applicable for yet
2322 // unused space in format like qcow2).
2323 if (!merge && bs->drv->bdrv_merge_requests) {
2324 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2325 }
2326
2327 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2328 merge = 0;
2329 }
2330
2331 if (merge) {
2332 size_t size;
2333 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2334 qemu_iovec_init(qiov,
2335 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2336
2337 // Add the first request to the merged one. If the requests are
2338 // overlapping, drop the last sectors of the first request.
2339 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2340 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2341
2342 // We might need to add some zeros between the two requests
2343 if (reqs[i].sector > oldreq_last) {
2344 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2345 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2346 memset(buf, 0, zero_bytes);
2347 qemu_iovec_add(qiov, buf, zero_bytes);
2348 mcb->callbacks[i].free_buf = buf;
2349 }
2350
2351 // Add the second request
2352 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2353
2354 reqs[outidx].nb_sectors = qiov->size >> 9;
2355 reqs[outidx].qiov = qiov;
2356
2357 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2358 } else {
2359 outidx++;
2360 reqs[outidx].sector = reqs[i].sector;
2361 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2362 reqs[outidx].qiov = reqs[i].qiov;
2363 }
2364 }
2365
2366 return outidx + 1;
2367 }
2368
2369 /*
2370 * Submit multiple AIO write requests at once.
2371 *
2372 * On success, the function returns 0 and all requests in the reqs array have
2373 * been submitted. In error case this function returns -1, and any of the
2374 * requests may or may not be submitted yet. In particular, this means that the
2375 * callback will be called for some of the requests, for others it won't. The
2376 * caller must check the error field of the BlockRequest to wait for the right
2377 * callbacks (if error != 0, no callback will be called).
2378 *
2379 * The implementation may modify the contents of the reqs array, e.g. to merge
2380 * requests. However, the fields opaque and error are left unmodified as they
2381 * are used to signal failure for a single request to the caller.
2382 */
2383 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2384 {
2385 BlockDriverAIOCB *acb;
2386 MultiwriteCB *mcb;
2387 int i;
2388
2389 /* don't submit writes if we don't have a medium */
2390 if (bs->drv == NULL) {
2391 for (i = 0; i < num_reqs; i++) {
2392 reqs[i].error = -ENOMEDIUM;
2393 }
2394 return -1;
2395 }
2396
2397 if (num_reqs == 0) {
2398 return 0;
2399 }
2400
2401 // Create MultiwriteCB structure
2402 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2403 mcb->num_requests = 0;
2404 mcb->num_callbacks = num_reqs;
2405
2406 for (i = 0; i < num_reqs; i++) {
2407 mcb->callbacks[i].cb = reqs[i].cb;
2408 mcb->callbacks[i].opaque = reqs[i].opaque;
2409 }
2410
2411 // Check for mergable requests
2412 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2413
2414 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2415
2416 /*
2417 * Run the aio requests. As soon as one request can't be submitted
2418 * successfully, fail all requests that are not yet submitted (we must
2419 * return failure for all requests anyway)
2420 *
2421 * num_requests cannot be set to the right value immediately: If
2422 * bdrv_aio_writev fails for some request, num_requests would be too high
2423 * and therefore multiwrite_cb() would never recognize the multiwrite
2424 * request as completed. We also cannot use the loop variable i to set it
2425 * when the first request fails because the callback may already have been
2426 * called for previously submitted requests. Thus, num_requests must be
2427 * incremented for each request that is submitted.
2428 *
2429 * The problem that callbacks may be called early also means that we need
2430 * to take care that num_requests doesn't become 0 before all requests are
2431 * submitted - multiwrite_cb() would consider the multiwrite request
2432 * completed. A dummy request that is "completed" by a manual call to
2433 * multiwrite_cb() takes care of this.
2434 */
2435 mcb->num_requests = 1;
2436
2437 // Run the aio requests
2438 for (i = 0; i < num_reqs; i++) {
2439 mcb->num_requests++;
2440 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2441 reqs[i].nb_sectors, multiwrite_cb, mcb);
2442
2443 if (acb == NULL) {
2444 // We can only fail the whole thing if no request has been
2445 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2446 // complete and report the error in the callback.
2447 if (i == 0) {
2448 trace_bdrv_aio_multiwrite_earlyfail(mcb);
2449 goto fail;
2450 } else {
2451 trace_bdrv_aio_multiwrite_latefail(mcb, i);
2452 multiwrite_cb(mcb, -EIO);
2453 break;
2454 }
2455 }
2456 }
2457
2458 /* Complete the dummy request */
2459 multiwrite_cb(mcb, 0);
2460
2461 return 0;
2462
2463 fail:
2464 for (i = 0; i < mcb->num_callbacks; i++) {
2465 reqs[i].error = -EIO;
2466 }
2467 g_free(mcb);
2468 return -1;
2469 }
2470
2471 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2472 {
2473 acb->pool->cancel(acb);
2474 }
2475
2476
2477 /**************************************************************/
2478 /* async block device emulation */
2479
2480 typedef struct BlockDriverAIOCBSync {
2481 BlockDriverAIOCB common;
2482 QEMUBH *bh;
2483 int ret;
2484 /* vector translation state */
2485 QEMUIOVector *qiov;
2486 uint8_t *bounce;
2487 int is_write;
2488 } BlockDriverAIOCBSync;
2489
2490 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2491 {
2492 BlockDriverAIOCBSync *acb =
2493 container_of(blockacb, BlockDriverAIOCBSync, common);
2494 qemu_bh_delete(acb->bh);
2495 acb->bh = NULL;
2496 qemu_aio_release(acb);
2497 }
2498
2499 static AIOPool bdrv_em_aio_pool = {
2500 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2501 .cancel = bdrv_aio_cancel_em,
2502 };
2503
2504 static void bdrv_aio_bh_cb(void *opaque)
2505 {
2506 BlockDriverAIOCBSync *acb = opaque;
2507
2508 if (!acb->is_write)
2509 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2510 qemu_vfree(acb->bounce);
2511 acb->common.cb(acb->common.opaque, acb->ret);
2512 qemu_bh_delete(acb->bh);
2513 acb->bh = NULL;
2514 qemu_aio_release(acb);
2515 }
2516
2517 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2518 int64_t sector_num,
2519 QEMUIOVector *qiov,
2520 int nb_sectors,
2521 BlockDriverCompletionFunc *cb,
2522 void *opaque,
2523 int is_write)
2524
2525 {
2526 BlockDriverAIOCBSync *acb;
2527
2528 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2529 acb->is_write = is_write;
2530 acb->qiov = qiov;
2531 acb->bounce = qemu_blockalign(bs, qiov->size);
2532
2533 if (!acb->bh)
2534 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2535
2536 if (is_write) {
2537 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2538 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2539 } else {
2540 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2541 }
2542
2543 qemu_bh_schedule(acb->bh);
2544
2545 return &acb->common;
2546 }
2547
2548 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2549 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2550 BlockDriverCompletionFunc *cb, void *opaque)
2551 {
2552 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2553 }
2554
2555 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2556 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2557 BlockDriverCompletionFunc *cb, void *opaque)
2558 {
2559 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2560 }
2561
2562
2563 typedef struct BlockDriverAIOCBCoroutine {
2564 BlockDriverAIOCB common;
2565 BlockRequest req;
2566 bool is_write;
2567 QEMUBH* bh;
2568 } BlockDriverAIOCBCoroutine;
2569
2570 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2571 {
2572 qemu_aio_flush();
2573 }
2574
2575 static AIOPool bdrv_em_co_aio_pool = {
2576 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2577 .cancel = bdrv_aio_co_cancel_em,
2578 };
2579
2580 static void bdrv_co_em_bh(void *opaque)
2581 {
2582 BlockDriverAIOCBCoroutine *acb = opaque;
2583
2584 acb->common.cb(acb->common.opaque, acb->req.error);
2585 qemu_bh_delete(acb->bh);
2586 qemu_aio_release(acb);
2587 }
2588
2589 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2590 static void coroutine_fn bdrv_co_do_rw(void *opaque)
2591 {
2592 BlockDriverAIOCBCoroutine *acb = opaque;
2593 BlockDriverState *bs = acb->common.bs;
2594
2595 if (!acb->is_write) {
2596 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
2597 acb->req.nb_sectors, acb->req.qiov);
2598 } else {
2599 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
2600 acb->req.nb_sectors, acb->req.qiov);
2601 }
2602
2603 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2604 qemu_bh_schedule(acb->bh);
2605 }
2606
2607 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2608 int64_t sector_num,
2609 QEMUIOVector *qiov,
2610 int nb_sectors,
2611 BlockDriverCompletionFunc *cb,
2612 void *opaque,
2613 bool is_write)
2614 {
2615 Coroutine *co;
2616 BlockDriverAIOCBCoroutine *acb;
2617
2618 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2619 acb->req.sector = sector_num;
2620 acb->req.nb_sectors = nb_sectors;
2621 acb->req.qiov = qiov;
2622 acb->is_write = is_write;
2623
2624 co = qemu_coroutine_create(bdrv_co_do_rw);
2625 qemu_coroutine_enter(co, acb);
2626
2627 return &acb->common;
2628 }
2629
2630 static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
2631 {
2632 BlockDriverAIOCBCoroutine *acb = opaque;
2633 BlockDriverState *bs = acb->common.bs;
2634
2635 acb->req.error = bdrv_co_flush(bs);
2636 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2637 qemu_bh_schedule(acb->bh);
2638 }
2639
2640 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2641 BlockDriverCompletionFunc *cb, void *opaque)
2642 {
2643 trace_bdrv_aio_flush(bs, opaque);
2644
2645 Coroutine *co;
2646 BlockDriverAIOCBCoroutine *acb;
2647
2648 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2649 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
2650 qemu_coroutine_enter(co, acb);
2651
2652 return &acb->common;
2653 }
2654
2655 static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
2656 {
2657 BlockDriverAIOCBCoroutine *acb = opaque;
2658 BlockDriverState *bs = acb->common.bs;
2659
2660 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
2661 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2662 qemu_bh_schedule(acb->bh);
2663 }
2664
2665 BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
2666 int64_t sector_num, int nb_sectors,
2667 BlockDriverCompletionFunc *cb, void *opaque)
2668 {
2669 Coroutine *co;
2670 BlockDriverAIOCBCoroutine *acb;
2671
2672 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
2673
2674 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2675 acb->req.sector = sector_num;
2676 acb->req.nb_sectors = nb_sectors;
2677 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
2678 qemu_coroutine_enter(co, acb);
2679
2680 return &acb->common;
2681 }
2682
2683 void bdrv_init(void)
2684 {
2685 module_call_init(MODULE_INIT_BLOCK);
2686 }
2687
2688 void bdrv_init_with_whitelist(void)
2689 {
2690 use_bdrv_whitelist = 1;
2691 bdrv_init();
2692 }
2693
2694 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2695 BlockDriverCompletionFunc *cb, void *opaque)
2696 {
2697 BlockDriverAIOCB *acb;
2698
2699 if (pool->free_aiocb) {
2700 acb = pool->free_aiocb;
2701 pool->free_aiocb = acb->next;
2702 } else {
2703 acb = g_malloc0(pool->aiocb_size);
2704 acb->pool = pool;
2705 }
2706 acb->bs = bs;
2707 acb->cb = cb;
2708 acb->opaque = opaque;
2709 return acb;
2710 }
2711
2712 void qemu_aio_release(void *p)
2713 {
2714 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2715 AIOPool *pool = acb->pool;
2716 acb->next = pool->free_aiocb;
2717 pool->free_aiocb = acb;
2718 }
2719
2720 /**************************************************************/
2721 /* Coroutine block device emulation */
2722
2723 typedef struct CoroutineIOCompletion {
2724 Coroutine *coroutine;
2725 int ret;
2726 } CoroutineIOCompletion;
2727
2728 static void bdrv_co_io_em_complete(void *opaque, int ret)
2729 {
2730 CoroutineIOCompletion *co = opaque;
2731
2732 co->ret = ret;
2733 qemu_coroutine_enter(co->coroutine, NULL);
2734 }
2735
2736 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2737 int nb_sectors, QEMUIOVector *iov,
2738 bool is_write)
2739 {
2740 CoroutineIOCompletion co = {
2741 .coroutine = qemu_coroutine_self(),
2742 };
2743 BlockDriverAIOCB *acb;
2744
2745 if (is_write) {
2746 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2747 bdrv_co_io_em_complete, &co);
2748 } else {
2749 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2750 bdrv_co_io_em_complete, &co);
2751 }
2752
2753 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
2754 if (!acb) {
2755 return -EIO;
2756 }
2757 qemu_coroutine_yield();
2758
2759 return co.ret;
2760 }
2761
2762 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2763 int64_t sector_num, int nb_sectors,
2764 QEMUIOVector *iov)
2765 {
2766 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2767 }
2768
2769 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2770 int64_t sector_num, int nb_sectors,
2771 QEMUIOVector *iov)
2772 {
2773 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2774 }
2775
2776 static void coroutine_fn bdrv_flush_co_entry(void *opaque)
2777 {
2778 RwCo *rwco = opaque;
2779
2780 rwco->ret = bdrv_co_flush(rwco->bs);
2781 }
2782
2783 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2784 {
2785 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2786 return 0;
2787 } else if (!bs->drv) {
2788 return 0;
2789 } else if (bs->drv->bdrv_co_flush) {
2790 return bs->drv->bdrv_co_flush(bs);
2791 } else if (bs->drv->bdrv_aio_flush) {
2792 BlockDriverAIOCB *acb;
2793 CoroutineIOCompletion co = {
2794 .coroutine = qemu_coroutine_self(),
2795 };
2796
2797 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2798 if (acb == NULL) {
2799 return -EIO;
2800 } else {
2801 qemu_coroutine_yield();
2802 return co.ret;
2803 }
2804 } else {
2805 /*
2806 * Some block drivers always operate in either writethrough or unsafe
2807 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
2808 * know how the server works (because the behaviour is hardcoded or
2809 * depends on server-side configuration), so we can't ensure that
2810 * everything is safe on disk. Returning an error doesn't work because
2811 * that would break guests even if the server operates in writethrough
2812 * mode.
2813 *
2814 * Let's hope the user knows what he's doing.
2815 */
2816 return 0;
2817 }
2818 }
2819
2820 int bdrv_flush(BlockDriverState *bs)
2821 {
2822 Coroutine *co;
2823 RwCo rwco = {
2824 .bs = bs,
2825 .ret = NOT_DONE,
2826 };
2827
2828 if (qemu_in_coroutine()) {
2829 /* Fast-path if already in coroutine context */
2830 bdrv_flush_co_entry(&rwco);
2831 } else {
2832 co = qemu_coroutine_create(bdrv_flush_co_entry);
2833 qemu_coroutine_enter(co, &rwco);
2834 while (rwco.ret == NOT_DONE) {
2835 qemu_aio_wait();
2836 }
2837 }
2838
2839 return rwco.ret;
2840 }
2841
2842 static void coroutine_fn bdrv_discard_co_entry(void *opaque)
2843 {
2844 RwCo *rwco = opaque;
2845
2846 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
2847 }
2848
2849 int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
2850 int nb_sectors)
2851 {
2852 if (!bs->drv) {
2853 return -ENOMEDIUM;
2854 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
2855 return -EIO;
2856 } else if (bs->read_only) {
2857 return -EROFS;
2858 } else if (bs->drv->bdrv_co_discard) {
2859 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
2860 } else if (bs->drv->bdrv_aio_discard) {
2861 BlockDriverAIOCB *acb;
2862 CoroutineIOCompletion co = {
2863 .coroutine = qemu_coroutine_self(),
2864 };
2865
2866 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
2867 bdrv_co_io_em_complete, &co);
2868 if (acb == NULL) {
2869 return -EIO;
2870 } else {
2871 qemu_coroutine_yield();
2872 return co.ret;
2873 }
2874 } else {
2875 return 0;
2876 }
2877 }
2878
2879 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
2880 {
2881 Coroutine *co;
2882 RwCo rwco = {
2883 .bs = bs,
2884 .sector_num = sector_num,
2885 .nb_sectors = nb_sectors,
2886 .ret = NOT_DONE,
2887 };
2888
2889 if (qemu_in_coroutine()) {
2890 /* Fast-path if already in coroutine context */
2891 bdrv_discard_co_entry(&rwco);
2892 } else {
2893 co = qemu_coroutine_create(bdrv_discard_co_entry);
2894 qemu_coroutine_enter(co, &rwco);
2895 while (rwco.ret == NOT_DONE) {
2896 qemu_aio_wait();
2897 }
2898 }
2899
2900 return rwco.ret;
2901 }
2902
2903 /**************************************************************/
2904 /* removable device support */
2905
2906 /**
2907 * Return TRUE if the media is present
2908 */
2909 int bdrv_is_inserted(BlockDriverState *bs)
2910 {
2911 BlockDriver *drv = bs->drv;
2912
2913 if (!drv)
2914 return 0;
2915 if (!drv->bdrv_is_inserted)
2916 return 1;
2917 return drv->bdrv_is_inserted(bs);
2918 }
2919
2920 /**
2921 * Return whether the media changed since the last call to this
2922 * function, or -ENOTSUP if we don't know. Most drivers don't know.
2923 */
2924 int bdrv_media_changed(BlockDriverState *bs)
2925 {
2926 BlockDriver *drv = bs->drv;
2927
2928 if (drv && drv->bdrv_media_changed) {
2929 return drv->bdrv_media_changed(bs);
2930 }
2931 return -ENOTSUP;
2932 }
2933
2934 /**
2935 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2936 */
2937 void bdrv_eject(BlockDriverState *bs, int eject_flag)
2938 {
2939 BlockDriver *drv = bs->drv;
2940
2941 if (drv && drv->bdrv_eject) {
2942 drv->bdrv_eject(bs, eject_flag);
2943 }
2944 }
2945
2946 /**
2947 * Lock or unlock the media (if it is locked, the user won't be able
2948 * to eject it manually).
2949 */
2950 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
2951 {
2952 BlockDriver *drv = bs->drv;
2953
2954 trace_bdrv_lock_medium(bs, locked);
2955
2956 if (drv && drv->bdrv_lock_medium) {
2957 drv->bdrv_lock_medium(bs, locked);
2958 }
2959 }
2960
2961 /* needed for generic scsi interface */
2962
2963 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
2964 {
2965 BlockDriver *drv = bs->drv;
2966
2967 if (drv && drv->bdrv_ioctl)
2968 return drv->bdrv_ioctl(bs, req, buf);
2969 return -ENOTSUP;
2970 }
2971
2972 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
2973 unsigned long int req, void *buf,
2974 BlockDriverCompletionFunc *cb, void *opaque)
2975 {
2976 BlockDriver *drv = bs->drv;
2977
2978 if (drv && drv->bdrv_aio_ioctl)
2979 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
2980 return NULL;
2981 }
2982
2983 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
2984 {
2985 bs->buffer_alignment = align;
2986 }
2987
2988 void *qemu_blockalign(BlockDriverState *bs, size_t size)
2989 {
2990 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
2991 }
2992
2993 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
2994 {
2995 int64_t bitmap_size;
2996
2997 bs->dirty_count = 0;
2998 if (enable) {
2999 if (!bs->dirty_bitmap) {
3000 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3001 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3002 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3003
3004 bs->dirty_bitmap = g_malloc0(bitmap_size);
3005 }
3006 } else {
3007 if (bs->dirty_bitmap) {
3008 g_free(bs->dirty_bitmap);
3009 bs->dirty_bitmap = NULL;
3010 }
3011 }
3012 }
3013
3014 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3015 {
3016 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3017
3018 if (bs->dirty_bitmap &&
3019 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3020 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3021 (1UL << (chunk % (sizeof(unsigned long) * 8))));
3022 } else {
3023 return 0;
3024 }
3025 }
3026
3027 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3028 int nr_sectors)
3029 {
3030 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3031 }
3032
3033 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3034 {
3035 return bs->dirty_count;
3036 }
3037
3038 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3039 {
3040 assert(bs->in_use != in_use);
3041 bs->in_use = in_use;
3042 }
3043
3044 int bdrv_in_use(BlockDriverState *bs)
3045 {
3046 return bs->in_use;
3047 }
3048
3049 void bdrv_iostatus_enable(BlockDriverState *bs)
3050 {
3051 bs->iostatus_enabled = true;
3052 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3053 }
3054
3055 /* The I/O status is only enabled if the drive explicitly
3056 * enables it _and_ the VM is configured to stop on errors */
3057 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3058 {
3059 return (bs->iostatus_enabled &&
3060 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3061 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3062 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3063 }
3064
3065 void bdrv_iostatus_disable(BlockDriverState *bs)
3066 {
3067 bs->iostatus_enabled = false;
3068 }
3069
3070 void bdrv_iostatus_reset(BlockDriverState *bs)
3071 {
3072 if (bdrv_iostatus_is_enabled(bs)) {
3073 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3074 }
3075 }
3076
3077 /* XXX: Today this is set by device models because it makes the implementation
3078 quite simple. However, the block layer knows about the error, so it's
3079 possible to implement this without device models being involved */
3080 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3081 {
3082 if (bdrv_iostatus_is_enabled(bs) &&
3083 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3084 assert(error >= 0);
3085 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3086 BLOCK_DEVICE_IO_STATUS_FAILED;
3087 }
3088 }
3089
3090 void
3091 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3092 enum BlockAcctType type)
3093 {
3094 assert(type < BDRV_MAX_IOTYPE);
3095
3096 cookie->bytes = bytes;
3097 cookie->start_time_ns = get_clock();
3098 cookie->type = type;
3099 }
3100
3101 void
3102 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3103 {
3104 assert(cookie->type < BDRV_MAX_IOTYPE);
3105
3106 bs->nr_bytes[cookie->type] += cookie->bytes;
3107 bs->nr_ops[cookie->type]++;
3108 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3109 }
3110
3111 int bdrv_img_create(const char *filename, const char *fmt,
3112 const char *base_filename, const char *base_fmt,
3113 char *options, uint64_t img_size, int flags)
3114 {
3115 QEMUOptionParameter *param = NULL, *create_options = NULL;
3116 QEMUOptionParameter *backing_fmt, *backing_file, *size;
3117 BlockDriverState *bs = NULL;
3118 BlockDriver *drv, *proto_drv;
3119 BlockDriver *backing_drv = NULL;
3120 int ret = 0;
3121
3122 /* Find driver and parse its options */
3123 drv = bdrv_find_format(fmt);
3124 if (!drv) {
3125 error_report("Unknown file format '%s'", fmt);
3126 ret = -EINVAL;
3127 goto out;
3128 }
3129
3130 proto_drv = bdrv_find_protocol(filename);
3131 if (!proto_drv) {
3132 error_report("Unknown protocol '%s'", filename);
3133 ret = -EINVAL;
3134 goto out;
3135 }
3136
3137 create_options = append_option_parameters(create_options,
3138 drv->create_options);
3139 create_options = append_option_parameters(create_options,
3140 proto_drv->create_options);
3141
3142 /* Create parameter list with default values */
3143 param = parse_option_parameters("", create_options, param);
3144
3145 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3146
3147 /* Parse -o options */
3148 if (options) {
3149 param = parse_option_parameters(options, create_options, param);
3150 if (param == NULL) {
3151 error_report("Invalid options for file format '%s'.", fmt);
3152 ret = -EINVAL;
3153 goto out;
3154 }
3155 }
3156
3157 if (base_filename) {
3158 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3159 base_filename)) {
3160 error_report("Backing file not supported for file format '%s'",
3161 fmt);
3162 ret = -EINVAL;
3163 goto out;
3164 }
3165 }
3166
3167 if (base_fmt) {
3168 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3169 error_report("Backing file format not supported for file "
3170 "format '%s'", fmt);
3171 ret = -EINVAL;
3172 goto out;
3173 }
3174 }
3175
3176 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3177 if (backing_file && backing_file->value.s) {
3178 if (!strcmp(filename, backing_file->value.s)) {
3179 error_report("Error: Trying to create an image with the "
3180 "same filename as the backing file");
3181 ret = -EINVAL;
3182 goto out;
3183 }
3184 }
3185
3186 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3187 if (backing_fmt && backing_fmt->value.s) {
3188 backing_drv = bdrv_find_format(backing_fmt->value.s);
3189 if (!backing_drv) {
3190 error_report("Unknown backing file format '%s'",
3191 backing_fmt->value.s);
3192 ret = -EINVAL;
3193 goto out;
3194 }
3195 }
3196
3197 // The size for the image must always be specified, with one exception:
3198 // If we are using a backing file, we can obtain the size from there
3199 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3200 if (size && size->value.n == -1) {
3201 if (backing_file && backing_file->value.s) {
3202 uint64_t size;
3203 char buf[32];
3204
3205 bs = bdrv_new("");
3206
3207 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3208 if (ret < 0) {
3209 error_report("Could not open '%s'", backing_file->value.s);
3210 goto out;
3211 }
3212 bdrv_get_geometry(bs, &size);
3213 size *= 512;
3214
3215 snprintf(buf, sizeof(buf), "%" PRId64, size);
3216 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3217 } else {
3218 error_report("Image creation needs a size parameter");
3219 ret = -EINVAL;
3220 goto out;
3221 }
3222 }
3223
3224 printf("Formatting '%s', fmt=%s ", filename, fmt);
3225 print_option_parameters(param);
3226 puts("");
3227
3228 ret = bdrv_create(drv, filename, param);
3229
3230 if (ret < 0) {
3231 if (ret == -ENOTSUP) {
3232 error_report("Formatting or formatting option not supported for "
3233 "file format '%s'", fmt);
3234 } else if (ret == -EFBIG) {
3235 error_report("The image size is too large for file format '%s'",
3236 fmt);
3237 } else {
3238 error_report("%s: error while creating %s: %s", filename, fmt,
3239 strerror(-ret));
3240 }
3241 }
3242
3243 out:
3244 free_option_parameters(create_options);
3245 free_option_parameters(param);
3246
3247 if (bs) {
3248 bdrv_delete(bs);
3249 }
3250
3251 return ret;
3252 }