]> git.proxmox.com Git - qemu.git/blob - block.c
Version 1.0.1
[qemu.git] / block.c
1 /*
2 * QEMU System Emulator block driver
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "monitor.h"
28 #include "block_int.h"
29 #include "module.h"
30 #include "qjson.h"
31 #include "qemu-coroutine.h"
32 #include "qmp-commands.h"
33
34 #ifdef CONFIG_BSD
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/ioctl.h>
38 #include <sys/queue.h>
39 #ifndef __DragonFly__
40 #include <sys/disk.h>
41 #endif
42 #endif
43
44 #ifdef _WIN32
45 #include <windows.h>
46 #endif
47
48 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
49
50 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
51 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
52 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
53 BlockDriverCompletionFunc *cb, void *opaque);
54 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
55 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
56 BlockDriverCompletionFunc *cb, void *opaque);
57 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
58 int64_t sector_num, int nb_sectors,
59 QEMUIOVector *iov);
60 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
61 int64_t sector_num, int nb_sectors,
62 QEMUIOVector *iov);
63 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
65 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
66 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
67 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
68 int64_t sector_num,
69 QEMUIOVector *qiov,
70 int nb_sectors,
71 BlockDriverCompletionFunc *cb,
72 void *opaque,
73 bool is_write);
74 static void coroutine_fn bdrv_co_do_rw(void *opaque);
75
76 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
77 QTAILQ_HEAD_INITIALIZER(bdrv_states);
78
79 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80 QLIST_HEAD_INITIALIZER(bdrv_drivers);
81
82 /* The device to use for VM snapshots */
83 static BlockDriverState *bs_snapshots;
84
85 /* If non-zero, use only whitelisted block drivers */
86 static int use_bdrv_whitelist;
87
88 #ifdef _WIN32
89 static int is_windows_drive_prefix(const char *filename)
90 {
91 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
92 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
93 filename[1] == ':');
94 }
95
96 int is_windows_drive(const char *filename)
97 {
98 if (is_windows_drive_prefix(filename) &&
99 filename[2] == '\0')
100 return 1;
101 if (strstart(filename, "\\\\.\\", NULL) ||
102 strstart(filename, "//./", NULL))
103 return 1;
104 return 0;
105 }
106 #endif
107
108 /* check if the path starts with "<protocol>:" */
109 static int path_has_protocol(const char *path)
110 {
111 #ifdef _WIN32
112 if (is_windows_drive(path) ||
113 is_windows_drive_prefix(path)) {
114 return 0;
115 }
116 #endif
117
118 return strchr(path, ':') != NULL;
119 }
120
121 int path_is_absolute(const char *path)
122 {
123 const char *p;
124 #ifdef _WIN32
125 /* specific case for names like: "\\.\d:" */
126 if (*path == '/' || *path == '\\')
127 return 1;
128 #endif
129 p = strchr(path, ':');
130 if (p)
131 p++;
132 else
133 p = path;
134 #ifdef _WIN32
135 return (*p == '/' || *p == '\\');
136 #else
137 return (*p == '/');
138 #endif
139 }
140
141 /* if filename is absolute, just copy it to dest. Otherwise, build a
142 path to it by considering it is relative to base_path. URL are
143 supported. */
144 void path_combine(char *dest, int dest_size,
145 const char *base_path,
146 const char *filename)
147 {
148 const char *p, *p1;
149 int len;
150
151 if (dest_size <= 0)
152 return;
153 if (path_is_absolute(filename)) {
154 pstrcpy(dest, dest_size, filename);
155 } else {
156 p = strchr(base_path, ':');
157 if (p)
158 p++;
159 else
160 p = base_path;
161 p1 = strrchr(base_path, '/');
162 #ifdef _WIN32
163 {
164 const char *p2;
165 p2 = strrchr(base_path, '\\');
166 if (!p1 || p2 > p1)
167 p1 = p2;
168 }
169 #endif
170 if (p1)
171 p1++;
172 else
173 p1 = base_path;
174 if (p1 > p)
175 p = p1;
176 len = p - base_path;
177 if (len > dest_size - 1)
178 len = dest_size - 1;
179 memcpy(dest, base_path, len);
180 dest[len] = '\0';
181 pstrcat(dest, dest_size, filename);
182 }
183 }
184
185 void bdrv_register(BlockDriver *bdrv)
186 {
187 /* Block drivers without coroutine functions need emulation */
188 if (!bdrv->bdrv_co_readv) {
189 bdrv->bdrv_co_readv = bdrv_co_readv_em;
190 bdrv->bdrv_co_writev = bdrv_co_writev_em;
191
192 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
193 * the block driver lacks aio we need to emulate that too.
194 */
195 if (!bdrv->bdrv_aio_readv) {
196 /* add AIO emulation layer */
197 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
198 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
199 }
200 }
201
202 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
203 }
204
205 /* create a new block device (by default it is empty) */
206 BlockDriverState *bdrv_new(const char *device_name)
207 {
208 BlockDriverState *bs;
209
210 bs = g_malloc0(sizeof(BlockDriverState));
211 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
212 if (device_name[0] != '\0') {
213 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
214 }
215 bdrv_iostatus_disable(bs);
216 return bs;
217 }
218
219 BlockDriver *bdrv_find_format(const char *format_name)
220 {
221 BlockDriver *drv1;
222 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
223 if (!strcmp(drv1->format_name, format_name)) {
224 return drv1;
225 }
226 }
227 return NULL;
228 }
229
230 static int bdrv_is_whitelisted(BlockDriver *drv)
231 {
232 static const char *whitelist[] = {
233 CONFIG_BDRV_WHITELIST
234 };
235 const char **p;
236
237 if (!whitelist[0])
238 return 1; /* no whitelist, anything goes */
239
240 for (p = whitelist; *p; p++) {
241 if (!strcmp(drv->format_name, *p)) {
242 return 1;
243 }
244 }
245 return 0;
246 }
247
248 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
249 {
250 BlockDriver *drv = bdrv_find_format(format_name);
251 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
252 }
253
254 int bdrv_create(BlockDriver *drv, const char* filename,
255 QEMUOptionParameter *options)
256 {
257 if (!drv->bdrv_create)
258 return -ENOTSUP;
259
260 return drv->bdrv_create(filename, options);
261 }
262
263 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
264 {
265 BlockDriver *drv;
266
267 drv = bdrv_find_protocol(filename);
268 if (drv == NULL) {
269 return -ENOENT;
270 }
271
272 return bdrv_create(drv, filename, options);
273 }
274
275 #ifdef _WIN32
276 void get_tmp_filename(char *filename, int size)
277 {
278 char temp_dir[MAX_PATH];
279
280 GetTempPath(MAX_PATH, temp_dir);
281 GetTempFileName(temp_dir, "qem", 0, filename);
282 }
283 #else
284 void get_tmp_filename(char *filename, int size)
285 {
286 int fd;
287 const char *tmpdir;
288 /* XXX: race condition possible */
289 tmpdir = getenv("TMPDIR");
290 if (!tmpdir)
291 tmpdir = "/tmp";
292 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
293 fd = mkstemp(filename);
294 close(fd);
295 }
296 #endif
297
298 /*
299 * Detect host devices. By convention, /dev/cdrom[N] is always
300 * recognized as a host CDROM.
301 */
302 static BlockDriver *find_hdev_driver(const char *filename)
303 {
304 int score_max = 0, score;
305 BlockDriver *drv = NULL, *d;
306
307 QLIST_FOREACH(d, &bdrv_drivers, list) {
308 if (d->bdrv_probe_device) {
309 score = d->bdrv_probe_device(filename);
310 if (score > score_max) {
311 score_max = score;
312 drv = d;
313 }
314 }
315 }
316
317 return drv;
318 }
319
320 BlockDriver *bdrv_find_protocol(const char *filename)
321 {
322 BlockDriver *drv1;
323 char protocol[128];
324 int len;
325 const char *p;
326
327 /* TODO Drivers without bdrv_file_open must be specified explicitly */
328
329 /*
330 * XXX(hch): we really should not let host device detection
331 * override an explicit protocol specification, but moving this
332 * later breaks access to device names with colons in them.
333 * Thanks to the brain-dead persistent naming schemes on udev-
334 * based Linux systems those actually are quite common.
335 */
336 drv1 = find_hdev_driver(filename);
337 if (drv1) {
338 return drv1;
339 }
340
341 if (!path_has_protocol(filename)) {
342 return bdrv_find_format("file");
343 }
344 p = strchr(filename, ':');
345 assert(p != NULL);
346 len = p - filename;
347 if (len > sizeof(protocol) - 1)
348 len = sizeof(protocol) - 1;
349 memcpy(protocol, filename, len);
350 protocol[len] = '\0';
351 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
352 if (drv1->protocol_name &&
353 !strcmp(drv1->protocol_name, protocol)) {
354 return drv1;
355 }
356 }
357 return NULL;
358 }
359
360 static int find_image_format(const char *filename, BlockDriver **pdrv)
361 {
362 int ret, score, score_max;
363 BlockDriver *drv1, *drv;
364 uint8_t buf[2048];
365 BlockDriverState *bs;
366
367 ret = bdrv_file_open(&bs, filename, 0);
368 if (ret < 0) {
369 *pdrv = NULL;
370 return ret;
371 }
372
373 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
374 if (bs->sg || !bdrv_is_inserted(bs)) {
375 bdrv_delete(bs);
376 drv = bdrv_find_format("raw");
377 if (!drv) {
378 ret = -ENOENT;
379 }
380 *pdrv = drv;
381 return ret;
382 }
383
384 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
385 bdrv_delete(bs);
386 if (ret < 0) {
387 *pdrv = NULL;
388 return ret;
389 }
390
391 score_max = 0;
392 drv = NULL;
393 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
394 if (drv1->bdrv_probe) {
395 score = drv1->bdrv_probe(buf, ret, filename);
396 if (score > score_max) {
397 score_max = score;
398 drv = drv1;
399 }
400 }
401 }
402 if (!drv) {
403 ret = -ENOENT;
404 }
405 *pdrv = drv;
406 return ret;
407 }
408
409 /**
410 * Set the current 'total_sectors' value
411 */
412 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
413 {
414 BlockDriver *drv = bs->drv;
415
416 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
417 if (bs->sg)
418 return 0;
419
420 /* query actual device if possible, otherwise just trust the hint */
421 if (drv->bdrv_getlength) {
422 int64_t length = drv->bdrv_getlength(bs);
423 if (length < 0) {
424 return length;
425 }
426 hint = length >> BDRV_SECTOR_BITS;
427 }
428
429 bs->total_sectors = hint;
430 return 0;
431 }
432
433 /**
434 * Set open flags for a given cache mode
435 *
436 * Return 0 on success, -1 if the cache mode was invalid.
437 */
438 int bdrv_parse_cache_flags(const char *mode, int *flags)
439 {
440 *flags &= ~BDRV_O_CACHE_MASK;
441
442 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
443 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
444 } else if (!strcmp(mode, "directsync")) {
445 *flags |= BDRV_O_NOCACHE;
446 } else if (!strcmp(mode, "writeback")) {
447 *flags |= BDRV_O_CACHE_WB;
448 } else if (!strcmp(mode, "unsafe")) {
449 *flags |= BDRV_O_CACHE_WB;
450 *flags |= BDRV_O_NO_FLUSH;
451 } else if (!strcmp(mode, "writethrough")) {
452 /* this is the default */
453 } else {
454 return -1;
455 }
456
457 return 0;
458 }
459
460 /*
461 * Common part for opening disk images and files
462 */
463 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
464 int flags, BlockDriver *drv)
465 {
466 int ret, open_flags;
467
468 assert(drv != NULL);
469
470 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
471
472 bs->file = NULL;
473 bs->total_sectors = 0;
474 bs->encrypted = 0;
475 bs->valid_key = 0;
476 bs->sg = 0;
477 bs->open_flags = flags;
478 bs->growable = 0;
479 bs->buffer_alignment = 512;
480
481 pstrcpy(bs->filename, sizeof(bs->filename), filename);
482 bs->backing_file[0] = '\0';
483
484 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
485 return -ENOTSUP;
486 }
487
488 bs->drv = drv;
489 bs->opaque = g_malloc0(drv->instance_size);
490
491 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
492
493 /*
494 * Clear flags that are internal to the block layer before opening the
495 * image.
496 */
497 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
498
499 /*
500 * Snapshots should be writable.
501 */
502 if (bs->is_temporary) {
503 open_flags |= BDRV_O_RDWR;
504 }
505
506 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
507
508 /* Open the image, either directly or using a protocol */
509 if (drv->bdrv_file_open) {
510 ret = drv->bdrv_file_open(bs, filename, open_flags);
511 } else {
512 ret = bdrv_file_open(&bs->file, filename, open_flags);
513 if (ret >= 0) {
514 ret = drv->bdrv_open(bs, open_flags);
515 }
516 }
517
518 if (ret < 0) {
519 goto free_and_fail;
520 }
521
522 ret = refresh_total_sectors(bs, bs->total_sectors);
523 if (ret < 0) {
524 goto free_and_fail;
525 }
526
527 #ifndef _WIN32
528 if (bs->is_temporary) {
529 unlink(filename);
530 }
531 #endif
532 return 0;
533
534 free_and_fail:
535 if (bs->file) {
536 bdrv_delete(bs->file);
537 bs->file = NULL;
538 }
539 g_free(bs->opaque);
540 bs->opaque = NULL;
541 bs->drv = NULL;
542 return ret;
543 }
544
545 /*
546 * Opens a file using a protocol (file, host_device, nbd, ...)
547 */
548 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
549 {
550 BlockDriverState *bs;
551 BlockDriver *drv;
552 int ret;
553
554 drv = bdrv_find_protocol(filename);
555 if (!drv) {
556 return -ENOENT;
557 }
558
559 bs = bdrv_new("");
560 ret = bdrv_open_common(bs, filename, flags, drv);
561 if (ret < 0) {
562 bdrv_delete(bs);
563 return ret;
564 }
565 bs->growable = 1;
566 *pbs = bs;
567 return 0;
568 }
569
570 /*
571 * Opens a disk image (raw, qcow2, vmdk, ...)
572 */
573 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
574 BlockDriver *drv)
575 {
576 int ret;
577 char tmp_filename[PATH_MAX];
578
579 if (flags & BDRV_O_SNAPSHOT) {
580 BlockDriverState *bs1;
581 int64_t total_size;
582 int is_protocol = 0;
583 BlockDriver *bdrv_qcow2;
584 QEMUOptionParameter *options;
585 char backing_filename[PATH_MAX];
586
587 /* if snapshot, we create a temporary backing file and open it
588 instead of opening 'filename' directly */
589
590 /* if there is a backing file, use it */
591 bs1 = bdrv_new("");
592 ret = bdrv_open(bs1, filename, 0, drv);
593 if (ret < 0) {
594 bdrv_delete(bs1);
595 return ret;
596 }
597 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
598
599 if (bs1->drv && bs1->drv->protocol_name)
600 is_protocol = 1;
601
602 bdrv_delete(bs1);
603
604 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
605
606 /* Real path is meaningless for protocols */
607 if (is_protocol)
608 snprintf(backing_filename, sizeof(backing_filename),
609 "%s", filename);
610 else if (!realpath(filename, backing_filename))
611 return -errno;
612
613 bdrv_qcow2 = bdrv_find_format("qcow2");
614 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
615
616 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
617 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
618 if (drv) {
619 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
620 drv->format_name);
621 }
622
623 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
624 free_option_parameters(options);
625 if (ret < 0) {
626 return ret;
627 }
628
629 filename = tmp_filename;
630 drv = bdrv_qcow2;
631 bs->is_temporary = 1;
632 }
633
634 /* Find the right image format driver */
635 if (!drv) {
636 ret = find_image_format(filename, &drv);
637 }
638
639 if (!drv) {
640 goto unlink_and_fail;
641 }
642
643 /* Open the image */
644 ret = bdrv_open_common(bs, filename, flags, drv);
645 if (ret < 0) {
646 goto unlink_and_fail;
647 }
648
649 /* If there is a backing file, use it */
650 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
651 char backing_filename[PATH_MAX];
652 int back_flags;
653 BlockDriver *back_drv = NULL;
654
655 bs->backing_hd = bdrv_new("");
656
657 if (path_has_protocol(bs->backing_file)) {
658 pstrcpy(backing_filename, sizeof(backing_filename),
659 bs->backing_file);
660 } else {
661 path_combine(backing_filename, sizeof(backing_filename),
662 filename, bs->backing_file);
663 }
664
665 if (bs->backing_format[0] != '\0') {
666 back_drv = bdrv_find_format(bs->backing_format);
667 }
668
669 /* backing files always opened read-only */
670 back_flags =
671 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
672
673 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
674 if (ret < 0) {
675 bdrv_close(bs);
676 return ret;
677 }
678 if (bs->is_temporary) {
679 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
680 } else {
681 /* base image inherits from "parent" */
682 bs->backing_hd->keep_read_only = bs->keep_read_only;
683 }
684 }
685
686 if (!bdrv_key_required(bs)) {
687 bdrv_dev_change_media_cb(bs, true);
688 }
689
690 return 0;
691
692 unlink_and_fail:
693 if (bs->is_temporary) {
694 unlink(filename);
695 }
696 return ret;
697 }
698
699 void bdrv_close(BlockDriverState *bs)
700 {
701 if (bs->drv) {
702 if (bs == bs_snapshots) {
703 bs_snapshots = NULL;
704 }
705 if (bs->backing_hd) {
706 bdrv_delete(bs->backing_hd);
707 bs->backing_hd = NULL;
708 }
709 bs->drv->bdrv_close(bs);
710 g_free(bs->opaque);
711 #ifdef _WIN32
712 if (bs->is_temporary) {
713 unlink(bs->filename);
714 }
715 #endif
716 bs->opaque = NULL;
717 bs->drv = NULL;
718
719 if (bs->file != NULL) {
720 bdrv_close(bs->file);
721 }
722
723 bdrv_dev_change_media_cb(bs, false);
724 }
725 }
726
727 void bdrv_close_all(void)
728 {
729 BlockDriverState *bs;
730
731 QTAILQ_FOREACH(bs, &bdrv_states, list) {
732 bdrv_close(bs);
733 }
734 }
735
736 /* make a BlockDriverState anonymous by removing from bdrv_state list.
737 Also, NULL terminate the device_name to prevent double remove */
738 void bdrv_make_anon(BlockDriverState *bs)
739 {
740 if (bs->device_name[0] != '\0') {
741 QTAILQ_REMOVE(&bdrv_states, bs, list);
742 }
743 bs->device_name[0] = '\0';
744 }
745
746 void bdrv_delete(BlockDriverState *bs)
747 {
748 assert(!bs->dev);
749
750 /* remove from list, if necessary */
751 bdrv_make_anon(bs);
752
753 bdrv_close(bs);
754 if (bs->file != NULL) {
755 bdrv_delete(bs->file);
756 }
757
758 assert(bs != bs_snapshots);
759 g_free(bs);
760 }
761
762 int bdrv_attach_dev(BlockDriverState *bs, void *dev)
763 /* TODO change to DeviceState *dev when all users are qdevified */
764 {
765 if (bs->dev) {
766 return -EBUSY;
767 }
768 bs->dev = dev;
769 bdrv_iostatus_reset(bs);
770 return 0;
771 }
772
773 /* TODO qdevified devices don't use this, remove when devices are qdevified */
774 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
775 {
776 if (bdrv_attach_dev(bs, dev) < 0) {
777 abort();
778 }
779 }
780
781 void bdrv_detach_dev(BlockDriverState *bs, void *dev)
782 /* TODO change to DeviceState *dev when all users are qdevified */
783 {
784 assert(bs->dev == dev);
785 bs->dev = NULL;
786 bs->dev_ops = NULL;
787 bs->dev_opaque = NULL;
788 bs->buffer_alignment = 512;
789 }
790
791 /* TODO change to return DeviceState * when all users are qdevified */
792 void *bdrv_get_attached_dev(BlockDriverState *bs)
793 {
794 return bs->dev;
795 }
796
797 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
798 void *opaque)
799 {
800 bs->dev_ops = ops;
801 bs->dev_opaque = opaque;
802 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
803 bs_snapshots = NULL;
804 }
805 }
806
807 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
808 {
809 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
810 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
811 }
812 }
813
814 bool bdrv_dev_has_removable_media(BlockDriverState *bs)
815 {
816 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
817 }
818
819 void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
820 {
821 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
822 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
823 }
824 }
825
826 bool bdrv_dev_is_tray_open(BlockDriverState *bs)
827 {
828 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
829 return bs->dev_ops->is_tray_open(bs->dev_opaque);
830 }
831 return false;
832 }
833
834 static void bdrv_dev_resize_cb(BlockDriverState *bs)
835 {
836 if (bs->dev_ops && bs->dev_ops->resize_cb) {
837 bs->dev_ops->resize_cb(bs->dev_opaque);
838 }
839 }
840
841 bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
842 {
843 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
844 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
845 }
846 return false;
847 }
848
849 /*
850 * Run consistency checks on an image
851 *
852 * Returns 0 if the check could be completed (it doesn't mean that the image is
853 * free of errors) or -errno when an internal error occurred. The results of the
854 * check are stored in res.
855 */
856 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
857 {
858 if (bs->drv->bdrv_check == NULL) {
859 return -ENOTSUP;
860 }
861
862 memset(res, 0, sizeof(*res));
863 return bs->drv->bdrv_check(bs, res);
864 }
865
866 #define COMMIT_BUF_SECTORS 2048
867
868 /* commit COW file into the raw image */
869 int bdrv_commit(BlockDriverState *bs)
870 {
871 BlockDriver *drv = bs->drv;
872 BlockDriver *backing_drv;
873 int64_t sector, total_sectors;
874 int n, ro, open_flags;
875 int ret = 0, rw_ret = 0;
876 uint8_t *buf;
877 char filename[1024];
878 BlockDriverState *bs_rw, *bs_ro;
879
880 if (!drv)
881 return -ENOMEDIUM;
882
883 if (!bs->backing_hd) {
884 return -ENOTSUP;
885 }
886
887 if (bs->backing_hd->keep_read_only) {
888 return -EACCES;
889 }
890
891 backing_drv = bs->backing_hd->drv;
892 ro = bs->backing_hd->read_only;
893 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
894 open_flags = bs->backing_hd->open_flags;
895
896 if (ro) {
897 /* re-open as RW */
898 bdrv_delete(bs->backing_hd);
899 bs->backing_hd = NULL;
900 bs_rw = bdrv_new("");
901 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
902 backing_drv);
903 if (rw_ret < 0) {
904 bdrv_delete(bs_rw);
905 /* try to re-open read-only */
906 bs_ro = bdrv_new("");
907 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
908 backing_drv);
909 if (ret < 0) {
910 bdrv_delete(bs_ro);
911 /* drive not functional anymore */
912 bs->drv = NULL;
913 return ret;
914 }
915 bs->backing_hd = bs_ro;
916 return rw_ret;
917 }
918 bs->backing_hd = bs_rw;
919 }
920
921 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
922 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
923
924 for (sector = 0; sector < total_sectors; sector += n) {
925 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
926
927 if (bdrv_read(bs, sector, buf, n) != 0) {
928 ret = -EIO;
929 goto ro_cleanup;
930 }
931
932 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
933 ret = -EIO;
934 goto ro_cleanup;
935 }
936 }
937 }
938
939 if (drv->bdrv_make_empty) {
940 ret = drv->bdrv_make_empty(bs);
941 bdrv_flush(bs);
942 }
943
944 /*
945 * Make sure all data we wrote to the backing device is actually
946 * stable on disk.
947 */
948 if (bs->backing_hd)
949 bdrv_flush(bs->backing_hd);
950
951 ro_cleanup:
952 g_free(buf);
953
954 if (ro) {
955 /* re-open as RO */
956 bdrv_delete(bs->backing_hd);
957 bs->backing_hd = NULL;
958 bs_ro = bdrv_new("");
959 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
960 backing_drv);
961 if (ret < 0) {
962 bdrv_delete(bs_ro);
963 /* drive not functional anymore */
964 bs->drv = NULL;
965 return ret;
966 }
967 bs->backing_hd = bs_ro;
968 bs->backing_hd->keep_read_only = 0;
969 }
970
971 return ret;
972 }
973
974 void bdrv_commit_all(void)
975 {
976 BlockDriverState *bs;
977
978 QTAILQ_FOREACH(bs, &bdrv_states, list) {
979 bdrv_commit(bs);
980 }
981 }
982
983 /*
984 * Return values:
985 * 0 - success
986 * -EINVAL - backing format specified, but no file
987 * -ENOSPC - can't update the backing file because no space is left in the
988 * image file header
989 * -ENOTSUP - format driver doesn't support changing the backing file
990 */
991 int bdrv_change_backing_file(BlockDriverState *bs,
992 const char *backing_file, const char *backing_fmt)
993 {
994 BlockDriver *drv = bs->drv;
995
996 if (drv->bdrv_change_backing_file != NULL) {
997 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
998 } else {
999 return -ENOTSUP;
1000 }
1001 }
1002
1003 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1004 size_t size)
1005 {
1006 int64_t len;
1007
1008 if (!bdrv_is_inserted(bs))
1009 return -ENOMEDIUM;
1010
1011 if (bs->growable)
1012 return 0;
1013
1014 len = bdrv_getlength(bs);
1015
1016 if (offset < 0)
1017 return -EIO;
1018
1019 if ((offset > len) || (len - offset < size))
1020 return -EIO;
1021
1022 return 0;
1023 }
1024
1025 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1026 int nb_sectors)
1027 {
1028 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1029 nb_sectors * BDRV_SECTOR_SIZE);
1030 }
1031
1032 typedef struct RwCo {
1033 BlockDriverState *bs;
1034 int64_t sector_num;
1035 int nb_sectors;
1036 QEMUIOVector *qiov;
1037 bool is_write;
1038 int ret;
1039 } RwCo;
1040
1041 static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1042 {
1043 RwCo *rwco = opaque;
1044
1045 if (!rwco->is_write) {
1046 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1047 rwco->nb_sectors, rwco->qiov);
1048 } else {
1049 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1050 rwco->nb_sectors, rwco->qiov);
1051 }
1052 }
1053
1054 /*
1055 * Process a synchronous request using coroutines
1056 */
1057 static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1058 int nb_sectors, bool is_write)
1059 {
1060 QEMUIOVector qiov;
1061 struct iovec iov = {
1062 .iov_base = (void *)buf,
1063 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1064 };
1065 Coroutine *co;
1066 RwCo rwco = {
1067 .bs = bs,
1068 .sector_num = sector_num,
1069 .nb_sectors = nb_sectors,
1070 .qiov = &qiov,
1071 .is_write = is_write,
1072 .ret = NOT_DONE,
1073 };
1074
1075 qemu_iovec_init_external(&qiov, &iov, 1);
1076
1077 if (qemu_in_coroutine()) {
1078 /* Fast-path if already in coroutine context */
1079 bdrv_rw_co_entry(&rwco);
1080 } else {
1081 co = qemu_coroutine_create(bdrv_rw_co_entry);
1082 qemu_coroutine_enter(co, &rwco);
1083 while (rwco.ret == NOT_DONE) {
1084 qemu_aio_wait();
1085 }
1086 }
1087 return rwco.ret;
1088 }
1089
1090 /* return < 0 if error. See bdrv_write() for the return codes */
1091 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1092 uint8_t *buf, int nb_sectors)
1093 {
1094 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
1095 }
1096
1097 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1098 int nb_sectors, int dirty)
1099 {
1100 int64_t start, end;
1101 unsigned long val, idx, bit;
1102
1103 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1104 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1105
1106 for (; start <= end; start++) {
1107 idx = start / (sizeof(unsigned long) * 8);
1108 bit = start % (sizeof(unsigned long) * 8);
1109 val = bs->dirty_bitmap[idx];
1110 if (dirty) {
1111 if (!(val & (1UL << bit))) {
1112 bs->dirty_count++;
1113 val |= 1UL << bit;
1114 }
1115 } else {
1116 if (val & (1UL << bit)) {
1117 bs->dirty_count--;
1118 val &= ~(1UL << bit);
1119 }
1120 }
1121 bs->dirty_bitmap[idx] = val;
1122 }
1123 }
1124
1125 /* Return < 0 if error. Important errors are:
1126 -EIO generic I/O error (may happen for all errors)
1127 -ENOMEDIUM No media inserted.
1128 -EINVAL Invalid sector number or nb_sectors
1129 -EACCES Trying to write a read-only device
1130 */
1131 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1132 const uint8_t *buf, int nb_sectors)
1133 {
1134 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
1135 }
1136
1137 int bdrv_pread(BlockDriverState *bs, int64_t offset,
1138 void *buf, int count1)
1139 {
1140 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1141 int len, nb_sectors, count;
1142 int64_t sector_num;
1143 int ret;
1144
1145 count = count1;
1146 /* first read to align to sector start */
1147 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1148 if (len > count)
1149 len = count;
1150 sector_num = offset >> BDRV_SECTOR_BITS;
1151 if (len > 0) {
1152 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1153 return ret;
1154 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1155 count -= len;
1156 if (count == 0)
1157 return count1;
1158 sector_num++;
1159 buf += len;
1160 }
1161
1162 /* read the sectors "in place" */
1163 nb_sectors = count >> BDRV_SECTOR_BITS;
1164 if (nb_sectors > 0) {
1165 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1166 return ret;
1167 sector_num += nb_sectors;
1168 len = nb_sectors << BDRV_SECTOR_BITS;
1169 buf += len;
1170 count -= len;
1171 }
1172
1173 /* add data from the last sector */
1174 if (count > 0) {
1175 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1176 return ret;
1177 memcpy(buf, tmp_buf, count);
1178 }
1179 return count1;
1180 }
1181
1182 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1183 const void *buf, int count1)
1184 {
1185 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1186 int len, nb_sectors, count;
1187 int64_t sector_num;
1188 int ret;
1189
1190 count = count1;
1191 /* first write to align to sector start */
1192 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1193 if (len > count)
1194 len = count;
1195 sector_num = offset >> BDRV_SECTOR_BITS;
1196 if (len > 0) {
1197 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1198 return ret;
1199 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1200 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1201 return ret;
1202 count -= len;
1203 if (count == 0)
1204 return count1;
1205 sector_num++;
1206 buf += len;
1207 }
1208
1209 /* write the sectors "in place" */
1210 nb_sectors = count >> BDRV_SECTOR_BITS;
1211 if (nb_sectors > 0) {
1212 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1213 return ret;
1214 sector_num += nb_sectors;
1215 len = nb_sectors << BDRV_SECTOR_BITS;
1216 buf += len;
1217 count -= len;
1218 }
1219
1220 /* add data from the last sector */
1221 if (count > 0) {
1222 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1223 return ret;
1224 memcpy(tmp_buf, buf, count);
1225 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1226 return ret;
1227 }
1228 return count1;
1229 }
1230
1231 /*
1232 * Writes to the file and ensures that no writes are reordered across this
1233 * request (acts as a barrier)
1234 *
1235 * Returns 0 on success, -errno in error cases.
1236 */
1237 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1238 const void *buf, int count)
1239 {
1240 int ret;
1241
1242 ret = bdrv_pwrite(bs, offset, buf, count);
1243 if (ret < 0) {
1244 return ret;
1245 }
1246
1247 /* No flush needed for cache modes that use O_DSYNC */
1248 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1249 bdrv_flush(bs);
1250 }
1251
1252 return 0;
1253 }
1254
1255 /*
1256 * Handle a read request in coroutine context
1257 */
1258 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1259 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1260 {
1261 BlockDriver *drv = bs->drv;
1262
1263 if (!drv) {
1264 return -ENOMEDIUM;
1265 }
1266 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1267 return -EIO;
1268 }
1269
1270 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1271 }
1272
1273 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1274 int nb_sectors, QEMUIOVector *qiov)
1275 {
1276 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1277
1278 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1279 }
1280
1281 /*
1282 * Handle a write request in coroutine context
1283 */
1284 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1285 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1286 {
1287 BlockDriver *drv = bs->drv;
1288 int ret;
1289
1290 if (!bs->drv) {
1291 return -ENOMEDIUM;
1292 }
1293 if (bs->read_only) {
1294 return -EACCES;
1295 }
1296 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1297 return -EIO;
1298 }
1299
1300 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1301
1302 if (bs->dirty_bitmap) {
1303 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1304 }
1305
1306 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1307 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1308 }
1309
1310 return ret;
1311 }
1312
1313 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1314 int nb_sectors, QEMUIOVector *qiov)
1315 {
1316 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1317
1318 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1319 }
1320
1321 /**
1322 * Truncate file to 'offset' bytes (needed only for file protocols)
1323 */
1324 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1325 {
1326 BlockDriver *drv = bs->drv;
1327 int ret;
1328 if (!drv)
1329 return -ENOMEDIUM;
1330 if (!drv->bdrv_truncate)
1331 return -ENOTSUP;
1332 if (bs->read_only)
1333 return -EACCES;
1334 if (bdrv_in_use(bs))
1335 return -EBUSY;
1336 ret = drv->bdrv_truncate(bs, offset);
1337 if (ret == 0) {
1338 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1339 bdrv_dev_resize_cb(bs);
1340 }
1341 return ret;
1342 }
1343
1344 /**
1345 * Length of a allocated file in bytes. Sparse files are counted by actual
1346 * allocated space. Return < 0 if error or unknown.
1347 */
1348 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1349 {
1350 BlockDriver *drv = bs->drv;
1351 if (!drv) {
1352 return -ENOMEDIUM;
1353 }
1354 if (drv->bdrv_get_allocated_file_size) {
1355 return drv->bdrv_get_allocated_file_size(bs);
1356 }
1357 if (bs->file) {
1358 return bdrv_get_allocated_file_size(bs->file);
1359 }
1360 return -ENOTSUP;
1361 }
1362
1363 /**
1364 * Length of a file in bytes. Return < 0 if error or unknown.
1365 */
1366 int64_t bdrv_getlength(BlockDriverState *bs)
1367 {
1368 BlockDriver *drv = bs->drv;
1369 if (!drv)
1370 return -ENOMEDIUM;
1371
1372 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
1373 if (drv->bdrv_getlength) {
1374 return drv->bdrv_getlength(bs);
1375 }
1376 }
1377 return bs->total_sectors * BDRV_SECTOR_SIZE;
1378 }
1379
1380 /* return 0 as number of sectors if no device present or error */
1381 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1382 {
1383 int64_t length;
1384 length = bdrv_getlength(bs);
1385 if (length < 0)
1386 length = 0;
1387 else
1388 length = length >> BDRV_SECTOR_BITS;
1389 *nb_sectors_ptr = length;
1390 }
1391
1392 struct partition {
1393 uint8_t boot_ind; /* 0x80 - active */
1394 uint8_t head; /* starting head */
1395 uint8_t sector; /* starting sector */
1396 uint8_t cyl; /* starting cylinder */
1397 uint8_t sys_ind; /* What partition type */
1398 uint8_t end_head; /* end head */
1399 uint8_t end_sector; /* end sector */
1400 uint8_t end_cyl; /* end cylinder */
1401 uint32_t start_sect; /* starting sector counting from 0 */
1402 uint32_t nr_sects; /* nr of sectors in partition */
1403 } QEMU_PACKED;
1404
1405 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1406 static int guess_disk_lchs(BlockDriverState *bs,
1407 int *pcylinders, int *pheads, int *psectors)
1408 {
1409 uint8_t buf[BDRV_SECTOR_SIZE];
1410 int ret, i, heads, sectors, cylinders;
1411 struct partition *p;
1412 uint32_t nr_sects;
1413 uint64_t nb_sectors;
1414
1415 bdrv_get_geometry(bs, &nb_sectors);
1416
1417 ret = bdrv_read(bs, 0, buf, 1);
1418 if (ret < 0)
1419 return -1;
1420 /* test msdos magic */
1421 if (buf[510] != 0x55 || buf[511] != 0xaa)
1422 return -1;
1423 for(i = 0; i < 4; i++) {
1424 p = ((struct partition *)(buf + 0x1be)) + i;
1425 nr_sects = le32_to_cpu(p->nr_sects);
1426 if (nr_sects && p->end_head) {
1427 /* We make the assumption that the partition terminates on
1428 a cylinder boundary */
1429 heads = p->end_head + 1;
1430 sectors = p->end_sector & 63;
1431 if (sectors == 0)
1432 continue;
1433 cylinders = nb_sectors / (heads * sectors);
1434 if (cylinders < 1 || cylinders > 16383)
1435 continue;
1436 *pheads = heads;
1437 *psectors = sectors;
1438 *pcylinders = cylinders;
1439 #if 0
1440 printf("guessed geometry: LCHS=%d %d %d\n",
1441 cylinders, heads, sectors);
1442 #endif
1443 return 0;
1444 }
1445 }
1446 return -1;
1447 }
1448
1449 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1450 {
1451 int translation, lba_detected = 0;
1452 int cylinders, heads, secs;
1453 uint64_t nb_sectors;
1454
1455 /* if a geometry hint is available, use it */
1456 bdrv_get_geometry(bs, &nb_sectors);
1457 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1458 translation = bdrv_get_translation_hint(bs);
1459 if (cylinders != 0) {
1460 *pcyls = cylinders;
1461 *pheads = heads;
1462 *psecs = secs;
1463 } else {
1464 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1465 if (heads > 16) {
1466 /* if heads > 16, it means that a BIOS LBA
1467 translation was active, so the default
1468 hardware geometry is OK */
1469 lba_detected = 1;
1470 goto default_geometry;
1471 } else {
1472 *pcyls = cylinders;
1473 *pheads = heads;
1474 *psecs = secs;
1475 /* disable any translation to be in sync with
1476 the logical geometry */
1477 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1478 bdrv_set_translation_hint(bs,
1479 BIOS_ATA_TRANSLATION_NONE);
1480 }
1481 }
1482 } else {
1483 default_geometry:
1484 /* if no geometry, use a standard physical disk geometry */
1485 cylinders = nb_sectors / (16 * 63);
1486
1487 if (cylinders > 16383)
1488 cylinders = 16383;
1489 else if (cylinders < 2)
1490 cylinders = 2;
1491 *pcyls = cylinders;
1492 *pheads = 16;
1493 *psecs = 63;
1494 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1495 if ((*pcyls * *pheads) <= 131072) {
1496 bdrv_set_translation_hint(bs,
1497 BIOS_ATA_TRANSLATION_LARGE);
1498 } else {
1499 bdrv_set_translation_hint(bs,
1500 BIOS_ATA_TRANSLATION_LBA);
1501 }
1502 }
1503 }
1504 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1505 }
1506 }
1507
1508 void bdrv_set_geometry_hint(BlockDriverState *bs,
1509 int cyls, int heads, int secs)
1510 {
1511 bs->cyls = cyls;
1512 bs->heads = heads;
1513 bs->secs = secs;
1514 }
1515
1516 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1517 {
1518 bs->translation = translation;
1519 }
1520
1521 void bdrv_get_geometry_hint(BlockDriverState *bs,
1522 int *pcyls, int *pheads, int *psecs)
1523 {
1524 *pcyls = bs->cyls;
1525 *pheads = bs->heads;
1526 *psecs = bs->secs;
1527 }
1528
1529 /* Recognize floppy formats */
1530 typedef struct FDFormat {
1531 FDriveType drive;
1532 uint8_t last_sect;
1533 uint8_t max_track;
1534 uint8_t max_head;
1535 } FDFormat;
1536
1537 static const FDFormat fd_formats[] = {
1538 /* First entry is default format */
1539 /* 1.44 MB 3"1/2 floppy disks */
1540 { FDRIVE_DRV_144, 18, 80, 1, },
1541 { FDRIVE_DRV_144, 20, 80, 1, },
1542 { FDRIVE_DRV_144, 21, 80, 1, },
1543 { FDRIVE_DRV_144, 21, 82, 1, },
1544 { FDRIVE_DRV_144, 21, 83, 1, },
1545 { FDRIVE_DRV_144, 22, 80, 1, },
1546 { FDRIVE_DRV_144, 23, 80, 1, },
1547 { FDRIVE_DRV_144, 24, 80, 1, },
1548 /* 2.88 MB 3"1/2 floppy disks */
1549 { FDRIVE_DRV_288, 36, 80, 1, },
1550 { FDRIVE_DRV_288, 39, 80, 1, },
1551 { FDRIVE_DRV_288, 40, 80, 1, },
1552 { FDRIVE_DRV_288, 44, 80, 1, },
1553 { FDRIVE_DRV_288, 48, 80, 1, },
1554 /* 720 kB 3"1/2 floppy disks */
1555 { FDRIVE_DRV_144, 9, 80, 1, },
1556 { FDRIVE_DRV_144, 10, 80, 1, },
1557 { FDRIVE_DRV_144, 10, 82, 1, },
1558 { FDRIVE_DRV_144, 10, 83, 1, },
1559 { FDRIVE_DRV_144, 13, 80, 1, },
1560 { FDRIVE_DRV_144, 14, 80, 1, },
1561 /* 1.2 MB 5"1/4 floppy disks */
1562 { FDRIVE_DRV_120, 15, 80, 1, },
1563 { FDRIVE_DRV_120, 18, 80, 1, },
1564 { FDRIVE_DRV_120, 18, 82, 1, },
1565 { FDRIVE_DRV_120, 18, 83, 1, },
1566 { FDRIVE_DRV_120, 20, 80, 1, },
1567 /* 720 kB 5"1/4 floppy disks */
1568 { FDRIVE_DRV_120, 9, 80, 1, },
1569 { FDRIVE_DRV_120, 11, 80, 1, },
1570 /* 360 kB 5"1/4 floppy disks */
1571 { FDRIVE_DRV_120, 9, 40, 1, },
1572 { FDRIVE_DRV_120, 9, 40, 0, },
1573 { FDRIVE_DRV_120, 10, 41, 1, },
1574 { FDRIVE_DRV_120, 10, 42, 1, },
1575 /* 320 kB 5"1/4 floppy disks */
1576 { FDRIVE_DRV_120, 8, 40, 1, },
1577 { FDRIVE_DRV_120, 8, 40, 0, },
1578 /* 360 kB must match 5"1/4 better than 3"1/2... */
1579 { FDRIVE_DRV_144, 9, 80, 0, },
1580 /* end */
1581 { FDRIVE_DRV_NONE, -1, -1, 0, },
1582 };
1583
1584 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1585 int *max_track, int *last_sect,
1586 FDriveType drive_in, FDriveType *drive)
1587 {
1588 const FDFormat *parse;
1589 uint64_t nb_sectors, size;
1590 int i, first_match, match;
1591
1592 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1593 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1594 /* User defined disk */
1595 } else {
1596 bdrv_get_geometry(bs, &nb_sectors);
1597 match = -1;
1598 first_match = -1;
1599 for (i = 0; ; i++) {
1600 parse = &fd_formats[i];
1601 if (parse->drive == FDRIVE_DRV_NONE) {
1602 break;
1603 }
1604 if (drive_in == parse->drive ||
1605 drive_in == FDRIVE_DRV_NONE) {
1606 size = (parse->max_head + 1) * parse->max_track *
1607 parse->last_sect;
1608 if (nb_sectors == size) {
1609 match = i;
1610 break;
1611 }
1612 if (first_match == -1) {
1613 first_match = i;
1614 }
1615 }
1616 }
1617 if (match == -1) {
1618 if (first_match == -1) {
1619 match = 1;
1620 } else {
1621 match = first_match;
1622 }
1623 parse = &fd_formats[match];
1624 }
1625 *nb_heads = parse->max_head + 1;
1626 *max_track = parse->max_track;
1627 *last_sect = parse->last_sect;
1628 *drive = parse->drive;
1629 }
1630 }
1631
1632 int bdrv_get_translation_hint(BlockDriverState *bs)
1633 {
1634 return bs->translation;
1635 }
1636
1637 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1638 BlockErrorAction on_write_error)
1639 {
1640 bs->on_read_error = on_read_error;
1641 bs->on_write_error = on_write_error;
1642 }
1643
1644 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1645 {
1646 return is_read ? bs->on_read_error : bs->on_write_error;
1647 }
1648
1649 int bdrv_is_read_only(BlockDriverState *bs)
1650 {
1651 return bs->read_only;
1652 }
1653
1654 int bdrv_is_sg(BlockDriverState *bs)
1655 {
1656 return bs->sg;
1657 }
1658
1659 int bdrv_enable_write_cache(BlockDriverState *bs)
1660 {
1661 return bs->enable_write_cache;
1662 }
1663
1664 int bdrv_is_encrypted(BlockDriverState *bs)
1665 {
1666 if (bs->backing_hd && bs->backing_hd->encrypted)
1667 return 1;
1668 return bs->encrypted;
1669 }
1670
1671 int bdrv_key_required(BlockDriverState *bs)
1672 {
1673 BlockDriverState *backing_hd = bs->backing_hd;
1674
1675 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1676 return 1;
1677 return (bs->encrypted && !bs->valid_key);
1678 }
1679
1680 int bdrv_set_key(BlockDriverState *bs, const char *key)
1681 {
1682 int ret;
1683 if (bs->backing_hd && bs->backing_hd->encrypted) {
1684 ret = bdrv_set_key(bs->backing_hd, key);
1685 if (ret < 0)
1686 return ret;
1687 if (!bs->encrypted)
1688 return 0;
1689 }
1690 if (!bs->encrypted) {
1691 return -EINVAL;
1692 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1693 return -ENOMEDIUM;
1694 }
1695 ret = bs->drv->bdrv_set_key(bs, key);
1696 if (ret < 0) {
1697 bs->valid_key = 0;
1698 } else if (!bs->valid_key) {
1699 bs->valid_key = 1;
1700 /* call the change callback now, we skipped it on open */
1701 bdrv_dev_change_media_cb(bs, true);
1702 }
1703 return ret;
1704 }
1705
1706 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1707 {
1708 if (!bs->drv) {
1709 buf[0] = '\0';
1710 } else {
1711 pstrcpy(buf, buf_size, bs->drv->format_name);
1712 }
1713 }
1714
1715 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1716 void *opaque)
1717 {
1718 BlockDriver *drv;
1719
1720 QLIST_FOREACH(drv, &bdrv_drivers, list) {
1721 it(opaque, drv->format_name);
1722 }
1723 }
1724
1725 BlockDriverState *bdrv_find(const char *name)
1726 {
1727 BlockDriverState *bs;
1728
1729 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1730 if (!strcmp(name, bs->device_name)) {
1731 return bs;
1732 }
1733 }
1734 return NULL;
1735 }
1736
1737 BlockDriverState *bdrv_next(BlockDriverState *bs)
1738 {
1739 if (!bs) {
1740 return QTAILQ_FIRST(&bdrv_states);
1741 }
1742 return QTAILQ_NEXT(bs, list);
1743 }
1744
1745 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1746 {
1747 BlockDriverState *bs;
1748
1749 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1750 it(opaque, bs);
1751 }
1752 }
1753
1754 const char *bdrv_get_device_name(BlockDriverState *bs)
1755 {
1756 return bs->device_name;
1757 }
1758
1759 void bdrv_flush_all(void)
1760 {
1761 BlockDriverState *bs;
1762
1763 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1764 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
1765 bdrv_flush(bs);
1766 }
1767 }
1768 }
1769
1770 int bdrv_has_zero_init(BlockDriverState *bs)
1771 {
1772 assert(bs->drv);
1773
1774 if (bs->drv->bdrv_has_zero_init) {
1775 return bs->drv->bdrv_has_zero_init(bs);
1776 }
1777
1778 return 1;
1779 }
1780
1781 /*
1782 * Returns true iff the specified sector is present in the disk image. Drivers
1783 * not implementing the functionality are assumed to not support backing files,
1784 * hence all their sectors are reported as allocated.
1785 *
1786 * 'pnum' is set to the number of sectors (including and immediately following
1787 * the specified sector) that are known to be in the same
1788 * allocated/unallocated state.
1789 *
1790 * 'nb_sectors' is the max value 'pnum' should be set to.
1791 */
1792 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1793 int *pnum)
1794 {
1795 int64_t n;
1796 if (!bs->drv->bdrv_is_allocated) {
1797 if (sector_num >= bs->total_sectors) {
1798 *pnum = 0;
1799 return 0;
1800 }
1801 n = bs->total_sectors - sector_num;
1802 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1803 return 1;
1804 }
1805 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1806 }
1807
1808 void bdrv_mon_event(const BlockDriverState *bdrv,
1809 BlockMonEventAction action, int is_read)
1810 {
1811 QObject *data;
1812 const char *action_str;
1813
1814 switch (action) {
1815 case BDRV_ACTION_REPORT:
1816 action_str = "report";
1817 break;
1818 case BDRV_ACTION_IGNORE:
1819 action_str = "ignore";
1820 break;
1821 case BDRV_ACTION_STOP:
1822 action_str = "stop";
1823 break;
1824 default:
1825 abort();
1826 }
1827
1828 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1829 bdrv->device_name,
1830 action_str,
1831 is_read ? "read" : "write");
1832 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1833
1834 qobject_decref(data);
1835 }
1836
1837 BlockInfoList *qmp_query_block(Error **errp)
1838 {
1839 BlockInfoList *head = NULL, *cur_item = NULL;
1840 BlockDriverState *bs;
1841
1842 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1843 BlockInfoList *info = g_malloc0(sizeof(*info));
1844
1845 info->value = g_malloc0(sizeof(*info->value));
1846 info->value->device = g_strdup(bs->device_name);
1847 info->value->type = g_strdup("unknown");
1848 info->value->locked = bdrv_dev_is_medium_locked(bs);
1849 info->value->removable = bdrv_dev_has_removable_media(bs);
1850
1851 if (bdrv_dev_has_removable_media(bs)) {
1852 info->value->has_tray_open = true;
1853 info->value->tray_open = bdrv_dev_is_tray_open(bs);
1854 }
1855
1856 if (bdrv_iostatus_is_enabled(bs)) {
1857 info->value->has_io_status = true;
1858 info->value->io_status = bs->iostatus;
1859 }
1860
1861 if (bs->drv) {
1862 info->value->has_inserted = true;
1863 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
1864 info->value->inserted->file = g_strdup(bs->filename);
1865 info->value->inserted->ro = bs->read_only;
1866 info->value->inserted->drv = g_strdup(bs->drv->format_name);
1867 info->value->inserted->encrypted = bs->encrypted;
1868 if (bs->backing_file[0]) {
1869 info->value->inserted->has_backing_file = true;
1870 info->value->inserted->backing_file = g_strdup(bs->backing_file);
1871 }
1872 }
1873
1874 /* XXX: waiting for the qapi to support GSList */
1875 if (!cur_item) {
1876 head = cur_item = info;
1877 } else {
1878 cur_item->next = info;
1879 cur_item = info;
1880 }
1881 }
1882
1883 return head;
1884 }
1885
1886 /* Consider exposing this as a full fledged QMP command */
1887 static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
1888 {
1889 BlockStats *s;
1890
1891 s = g_malloc0(sizeof(*s));
1892
1893 if (bs->device_name[0]) {
1894 s->has_device = true;
1895 s->device = g_strdup(bs->device_name);
1896 }
1897
1898 s->stats = g_malloc0(sizeof(*s->stats));
1899 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
1900 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
1901 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
1902 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
1903 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
1904 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
1905 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
1906 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
1907 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
1908
1909 if (bs->file) {
1910 s->has_parent = true;
1911 s->parent = qmp_query_blockstat(bs->file, NULL);
1912 }
1913
1914 return s;
1915 }
1916
1917 BlockStatsList *qmp_query_blockstats(Error **errp)
1918 {
1919 BlockStatsList *head = NULL, *cur_item = NULL;
1920 BlockDriverState *bs;
1921
1922 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1923 BlockStatsList *info = g_malloc0(sizeof(*info));
1924 info->value = qmp_query_blockstat(bs, NULL);
1925
1926 /* XXX: waiting for the qapi to support GSList */
1927 if (!cur_item) {
1928 head = cur_item = info;
1929 } else {
1930 cur_item->next = info;
1931 cur_item = info;
1932 }
1933 }
1934
1935 return head;
1936 }
1937
1938 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1939 {
1940 if (bs->backing_hd && bs->backing_hd->encrypted)
1941 return bs->backing_file;
1942 else if (bs->encrypted)
1943 return bs->filename;
1944 else
1945 return NULL;
1946 }
1947
1948 void bdrv_get_backing_filename(BlockDriverState *bs,
1949 char *filename, int filename_size)
1950 {
1951 pstrcpy(filename, filename_size, bs->backing_file);
1952 }
1953
1954 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1955 const uint8_t *buf, int nb_sectors)
1956 {
1957 BlockDriver *drv = bs->drv;
1958 if (!drv)
1959 return -ENOMEDIUM;
1960 if (!drv->bdrv_write_compressed)
1961 return -ENOTSUP;
1962 if (bdrv_check_request(bs, sector_num, nb_sectors))
1963 return -EIO;
1964
1965 if (bs->dirty_bitmap) {
1966 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1967 }
1968
1969 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1970 }
1971
1972 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1973 {
1974 BlockDriver *drv = bs->drv;
1975 if (!drv)
1976 return -ENOMEDIUM;
1977 if (!drv->bdrv_get_info)
1978 return -ENOTSUP;
1979 memset(bdi, 0, sizeof(*bdi));
1980 return drv->bdrv_get_info(bs, bdi);
1981 }
1982
1983 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1984 int64_t pos, int size)
1985 {
1986 BlockDriver *drv = bs->drv;
1987 if (!drv)
1988 return -ENOMEDIUM;
1989 if (drv->bdrv_save_vmstate)
1990 return drv->bdrv_save_vmstate(bs, buf, pos, size);
1991 if (bs->file)
1992 return bdrv_save_vmstate(bs->file, buf, pos, size);
1993 return -ENOTSUP;
1994 }
1995
1996 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
1997 int64_t pos, int size)
1998 {
1999 BlockDriver *drv = bs->drv;
2000 if (!drv)
2001 return -ENOMEDIUM;
2002 if (drv->bdrv_load_vmstate)
2003 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2004 if (bs->file)
2005 return bdrv_load_vmstate(bs->file, buf, pos, size);
2006 return -ENOTSUP;
2007 }
2008
2009 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2010 {
2011 BlockDriver *drv = bs->drv;
2012
2013 if (!drv || !drv->bdrv_debug_event) {
2014 return;
2015 }
2016
2017 return drv->bdrv_debug_event(bs, event);
2018
2019 }
2020
2021 /**************************************************************/
2022 /* handling of snapshots */
2023
2024 int bdrv_can_snapshot(BlockDriverState *bs)
2025 {
2026 BlockDriver *drv = bs->drv;
2027 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
2028 return 0;
2029 }
2030
2031 if (!drv->bdrv_snapshot_create) {
2032 if (bs->file != NULL) {
2033 return bdrv_can_snapshot(bs->file);
2034 }
2035 return 0;
2036 }
2037
2038 return 1;
2039 }
2040
2041 int bdrv_is_snapshot(BlockDriverState *bs)
2042 {
2043 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2044 }
2045
2046 BlockDriverState *bdrv_snapshots(void)
2047 {
2048 BlockDriverState *bs;
2049
2050 if (bs_snapshots) {
2051 return bs_snapshots;
2052 }
2053
2054 bs = NULL;
2055 while ((bs = bdrv_next(bs))) {
2056 if (bdrv_can_snapshot(bs)) {
2057 bs_snapshots = bs;
2058 return bs;
2059 }
2060 }
2061 return NULL;
2062 }
2063
2064 int bdrv_snapshot_create(BlockDriverState *bs,
2065 QEMUSnapshotInfo *sn_info)
2066 {
2067 BlockDriver *drv = bs->drv;
2068 if (!drv)
2069 return -ENOMEDIUM;
2070 if (drv->bdrv_snapshot_create)
2071 return drv->bdrv_snapshot_create(bs, sn_info);
2072 if (bs->file)
2073 return bdrv_snapshot_create(bs->file, sn_info);
2074 return -ENOTSUP;
2075 }
2076
2077 int bdrv_snapshot_goto(BlockDriverState *bs,
2078 const char *snapshot_id)
2079 {
2080 BlockDriver *drv = bs->drv;
2081 int ret, open_ret;
2082
2083 if (!drv)
2084 return -ENOMEDIUM;
2085 if (drv->bdrv_snapshot_goto)
2086 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2087
2088 if (bs->file) {
2089 drv->bdrv_close(bs);
2090 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2091 open_ret = drv->bdrv_open(bs, bs->open_flags);
2092 if (open_ret < 0) {
2093 bdrv_delete(bs->file);
2094 bs->drv = NULL;
2095 return open_ret;
2096 }
2097 return ret;
2098 }
2099
2100 return -ENOTSUP;
2101 }
2102
2103 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2104 {
2105 BlockDriver *drv = bs->drv;
2106 if (!drv)
2107 return -ENOMEDIUM;
2108 if (drv->bdrv_snapshot_delete)
2109 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2110 if (bs->file)
2111 return bdrv_snapshot_delete(bs->file, snapshot_id);
2112 return -ENOTSUP;
2113 }
2114
2115 int bdrv_snapshot_list(BlockDriverState *bs,
2116 QEMUSnapshotInfo **psn_info)
2117 {
2118 BlockDriver *drv = bs->drv;
2119 if (!drv)
2120 return -ENOMEDIUM;
2121 if (drv->bdrv_snapshot_list)
2122 return drv->bdrv_snapshot_list(bs, psn_info);
2123 if (bs->file)
2124 return bdrv_snapshot_list(bs->file, psn_info);
2125 return -ENOTSUP;
2126 }
2127
2128 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2129 const char *snapshot_name)
2130 {
2131 BlockDriver *drv = bs->drv;
2132 if (!drv) {
2133 return -ENOMEDIUM;
2134 }
2135 if (!bs->read_only) {
2136 return -EINVAL;
2137 }
2138 if (drv->bdrv_snapshot_load_tmp) {
2139 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2140 }
2141 return -ENOTSUP;
2142 }
2143
2144 #define NB_SUFFIXES 4
2145
2146 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2147 {
2148 static const char suffixes[NB_SUFFIXES] = "KMGT";
2149 int64_t base;
2150 int i;
2151
2152 if (size <= 999) {
2153 snprintf(buf, buf_size, "%" PRId64, size);
2154 } else {
2155 base = 1024;
2156 for(i = 0; i < NB_SUFFIXES; i++) {
2157 if (size < (10 * base)) {
2158 snprintf(buf, buf_size, "%0.1f%c",
2159 (double)size / base,
2160 suffixes[i]);
2161 break;
2162 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2163 snprintf(buf, buf_size, "%" PRId64 "%c",
2164 ((size + (base >> 1)) / base),
2165 suffixes[i]);
2166 break;
2167 }
2168 base = base * 1024;
2169 }
2170 }
2171 return buf;
2172 }
2173
2174 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2175 {
2176 char buf1[128], date_buf[128], clock_buf[128];
2177 #ifdef _WIN32
2178 struct tm *ptm;
2179 #else
2180 struct tm tm;
2181 #endif
2182 time_t ti;
2183 int64_t secs;
2184
2185 if (!sn) {
2186 snprintf(buf, buf_size,
2187 "%-10s%-20s%7s%20s%15s",
2188 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2189 } else {
2190 ti = sn->date_sec;
2191 #ifdef _WIN32
2192 ptm = localtime(&ti);
2193 strftime(date_buf, sizeof(date_buf),
2194 "%Y-%m-%d %H:%M:%S", ptm);
2195 #else
2196 localtime_r(&ti, &tm);
2197 strftime(date_buf, sizeof(date_buf),
2198 "%Y-%m-%d %H:%M:%S", &tm);
2199 #endif
2200 secs = sn->vm_clock_nsec / 1000000000;
2201 snprintf(clock_buf, sizeof(clock_buf),
2202 "%02d:%02d:%02d.%03d",
2203 (int)(secs / 3600),
2204 (int)((secs / 60) % 60),
2205 (int)(secs % 60),
2206 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2207 snprintf(buf, buf_size,
2208 "%-10s%-20s%7s%20s%15s",
2209 sn->id_str, sn->name,
2210 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2211 date_buf,
2212 clock_buf);
2213 }
2214 return buf;
2215 }
2216
2217 /**************************************************************/
2218 /* async I/Os */
2219
2220 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2221 QEMUIOVector *qiov, int nb_sectors,
2222 BlockDriverCompletionFunc *cb, void *opaque)
2223 {
2224 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2225
2226 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2227 cb, opaque, false);
2228 }
2229
2230 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2231 QEMUIOVector *qiov, int nb_sectors,
2232 BlockDriverCompletionFunc *cb, void *opaque)
2233 {
2234 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2235
2236 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2237 cb, opaque, true);
2238 }
2239
2240
2241 typedef struct MultiwriteCB {
2242 int error;
2243 int num_requests;
2244 int num_callbacks;
2245 struct {
2246 BlockDriverCompletionFunc *cb;
2247 void *opaque;
2248 QEMUIOVector *free_qiov;
2249 void *free_buf;
2250 } callbacks[];
2251 } MultiwriteCB;
2252
2253 static void multiwrite_user_cb(MultiwriteCB *mcb)
2254 {
2255 int i;
2256
2257 for (i = 0; i < mcb->num_callbacks; i++) {
2258 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2259 if (mcb->callbacks[i].free_qiov) {
2260 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2261 }
2262 g_free(mcb->callbacks[i].free_qiov);
2263 qemu_vfree(mcb->callbacks[i].free_buf);
2264 }
2265 }
2266
2267 static void multiwrite_cb(void *opaque, int ret)
2268 {
2269 MultiwriteCB *mcb = opaque;
2270
2271 trace_multiwrite_cb(mcb, ret);
2272
2273 if (ret < 0 && !mcb->error) {
2274 mcb->error = ret;
2275 }
2276
2277 mcb->num_requests--;
2278 if (mcb->num_requests == 0) {
2279 multiwrite_user_cb(mcb);
2280 g_free(mcb);
2281 }
2282 }
2283
2284 static int multiwrite_req_compare(const void *a, const void *b)
2285 {
2286 const BlockRequest *req1 = a, *req2 = b;
2287
2288 /*
2289 * Note that we can't simply subtract req2->sector from req1->sector
2290 * here as that could overflow the return value.
2291 */
2292 if (req1->sector > req2->sector) {
2293 return 1;
2294 } else if (req1->sector < req2->sector) {
2295 return -1;
2296 } else {
2297 return 0;
2298 }
2299 }
2300
2301 /*
2302 * Takes a bunch of requests and tries to merge them. Returns the number of
2303 * requests that remain after merging.
2304 */
2305 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2306 int num_reqs, MultiwriteCB *mcb)
2307 {
2308 int i, outidx;
2309
2310 // Sort requests by start sector
2311 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2312
2313 // Check if adjacent requests touch the same clusters. If so, combine them,
2314 // filling up gaps with zero sectors.
2315 outidx = 0;
2316 for (i = 1; i < num_reqs; i++) {
2317 int merge = 0;
2318 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2319
2320 // This handles the cases that are valid for all block drivers, namely
2321 // exactly sequential writes and overlapping writes.
2322 if (reqs[i].sector <= oldreq_last) {
2323 merge = 1;
2324 }
2325
2326 // The block driver may decide that it makes sense to combine requests
2327 // even if there is a gap of some sectors between them. In this case,
2328 // the gap is filled with zeros (therefore only applicable for yet
2329 // unused space in format like qcow2).
2330 if (!merge && bs->drv->bdrv_merge_requests) {
2331 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2332 }
2333
2334 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2335 merge = 0;
2336 }
2337
2338 if (merge) {
2339 size_t size;
2340 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2341 qemu_iovec_init(qiov,
2342 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2343
2344 // Add the first request to the merged one. If the requests are
2345 // overlapping, drop the last sectors of the first request.
2346 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2347 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2348
2349 // We might need to add some zeros between the two requests
2350 if (reqs[i].sector > oldreq_last) {
2351 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2352 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2353 memset(buf, 0, zero_bytes);
2354 qemu_iovec_add(qiov, buf, zero_bytes);
2355 mcb->callbacks[i].free_buf = buf;
2356 }
2357
2358 // Add the second request
2359 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2360
2361 reqs[outidx].nb_sectors = qiov->size >> 9;
2362 reqs[outidx].qiov = qiov;
2363
2364 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2365 } else {
2366 outidx++;
2367 reqs[outidx].sector = reqs[i].sector;
2368 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2369 reqs[outidx].qiov = reqs[i].qiov;
2370 }
2371 }
2372
2373 return outidx + 1;
2374 }
2375
2376 /*
2377 * Submit multiple AIO write requests at once.
2378 *
2379 * On success, the function returns 0 and all requests in the reqs array have
2380 * been submitted. In error case this function returns -1, and any of the
2381 * requests may or may not be submitted yet. In particular, this means that the
2382 * callback will be called for some of the requests, for others it won't. The
2383 * caller must check the error field of the BlockRequest to wait for the right
2384 * callbacks (if error != 0, no callback will be called).
2385 *
2386 * The implementation may modify the contents of the reqs array, e.g. to merge
2387 * requests. However, the fields opaque and error are left unmodified as they
2388 * are used to signal failure for a single request to the caller.
2389 */
2390 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2391 {
2392 BlockDriverAIOCB *acb;
2393 MultiwriteCB *mcb;
2394 int i;
2395
2396 /* don't submit writes if we don't have a medium */
2397 if (bs->drv == NULL) {
2398 for (i = 0; i < num_reqs; i++) {
2399 reqs[i].error = -ENOMEDIUM;
2400 }
2401 return -1;
2402 }
2403
2404 if (num_reqs == 0) {
2405 return 0;
2406 }
2407
2408 // Create MultiwriteCB structure
2409 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2410 mcb->num_requests = 0;
2411 mcb->num_callbacks = num_reqs;
2412
2413 for (i = 0; i < num_reqs; i++) {
2414 mcb->callbacks[i].cb = reqs[i].cb;
2415 mcb->callbacks[i].opaque = reqs[i].opaque;
2416 }
2417
2418 // Check for mergable requests
2419 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2420
2421 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2422
2423 /*
2424 * Run the aio requests. As soon as one request can't be submitted
2425 * successfully, fail all requests that are not yet submitted (we must
2426 * return failure for all requests anyway)
2427 *
2428 * num_requests cannot be set to the right value immediately: If
2429 * bdrv_aio_writev fails for some request, num_requests would be too high
2430 * and therefore multiwrite_cb() would never recognize the multiwrite
2431 * request as completed. We also cannot use the loop variable i to set it
2432 * when the first request fails because the callback may already have been
2433 * called for previously submitted requests. Thus, num_requests must be
2434 * incremented for each request that is submitted.
2435 *
2436 * The problem that callbacks may be called early also means that we need
2437 * to take care that num_requests doesn't become 0 before all requests are
2438 * submitted - multiwrite_cb() would consider the multiwrite request
2439 * completed. A dummy request that is "completed" by a manual call to
2440 * multiwrite_cb() takes care of this.
2441 */
2442 mcb->num_requests = 1;
2443
2444 // Run the aio requests
2445 for (i = 0; i < num_reqs; i++) {
2446 mcb->num_requests++;
2447 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2448 reqs[i].nb_sectors, multiwrite_cb, mcb);
2449
2450 if (acb == NULL) {
2451 // We can only fail the whole thing if no request has been
2452 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2453 // complete and report the error in the callback.
2454 if (i == 0) {
2455 trace_bdrv_aio_multiwrite_earlyfail(mcb);
2456 goto fail;
2457 } else {
2458 trace_bdrv_aio_multiwrite_latefail(mcb, i);
2459 multiwrite_cb(mcb, -EIO);
2460 break;
2461 }
2462 }
2463 }
2464
2465 /* Complete the dummy request */
2466 multiwrite_cb(mcb, 0);
2467
2468 return 0;
2469
2470 fail:
2471 for (i = 0; i < mcb->num_callbacks; i++) {
2472 reqs[i].error = -EIO;
2473 }
2474 g_free(mcb);
2475 return -1;
2476 }
2477
2478 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2479 {
2480 acb->pool->cancel(acb);
2481 }
2482
2483
2484 /**************************************************************/
2485 /* async block device emulation */
2486
2487 typedef struct BlockDriverAIOCBSync {
2488 BlockDriverAIOCB common;
2489 QEMUBH *bh;
2490 int ret;
2491 /* vector translation state */
2492 QEMUIOVector *qiov;
2493 uint8_t *bounce;
2494 int is_write;
2495 } BlockDriverAIOCBSync;
2496
2497 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2498 {
2499 BlockDriverAIOCBSync *acb =
2500 container_of(blockacb, BlockDriverAIOCBSync, common);
2501 qemu_bh_delete(acb->bh);
2502 acb->bh = NULL;
2503 qemu_aio_release(acb);
2504 }
2505
2506 static AIOPool bdrv_em_aio_pool = {
2507 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2508 .cancel = bdrv_aio_cancel_em,
2509 };
2510
2511 static void bdrv_aio_bh_cb(void *opaque)
2512 {
2513 BlockDriverAIOCBSync *acb = opaque;
2514
2515 if (!acb->is_write)
2516 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2517 qemu_vfree(acb->bounce);
2518 acb->common.cb(acb->common.opaque, acb->ret);
2519 qemu_bh_delete(acb->bh);
2520 acb->bh = NULL;
2521 qemu_aio_release(acb);
2522 }
2523
2524 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2525 int64_t sector_num,
2526 QEMUIOVector *qiov,
2527 int nb_sectors,
2528 BlockDriverCompletionFunc *cb,
2529 void *opaque,
2530 int is_write)
2531
2532 {
2533 BlockDriverAIOCBSync *acb;
2534
2535 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2536 acb->is_write = is_write;
2537 acb->qiov = qiov;
2538 acb->bounce = qemu_blockalign(bs, qiov->size);
2539
2540 if (!acb->bh)
2541 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2542
2543 if (is_write) {
2544 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2545 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2546 } else {
2547 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2548 }
2549
2550 qemu_bh_schedule(acb->bh);
2551
2552 return &acb->common;
2553 }
2554
2555 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2556 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2557 BlockDriverCompletionFunc *cb, void *opaque)
2558 {
2559 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2560 }
2561
2562 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2563 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2564 BlockDriverCompletionFunc *cb, void *opaque)
2565 {
2566 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2567 }
2568
2569
2570 typedef struct BlockDriverAIOCBCoroutine {
2571 BlockDriverAIOCB common;
2572 BlockRequest req;
2573 bool is_write;
2574 QEMUBH* bh;
2575 } BlockDriverAIOCBCoroutine;
2576
2577 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2578 {
2579 qemu_aio_flush();
2580 }
2581
2582 static AIOPool bdrv_em_co_aio_pool = {
2583 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2584 .cancel = bdrv_aio_co_cancel_em,
2585 };
2586
2587 static void bdrv_co_em_bh(void *opaque)
2588 {
2589 BlockDriverAIOCBCoroutine *acb = opaque;
2590
2591 acb->common.cb(acb->common.opaque, acb->req.error);
2592 qemu_bh_delete(acb->bh);
2593 qemu_aio_release(acb);
2594 }
2595
2596 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2597 static void coroutine_fn bdrv_co_do_rw(void *opaque)
2598 {
2599 BlockDriverAIOCBCoroutine *acb = opaque;
2600 BlockDriverState *bs = acb->common.bs;
2601
2602 if (!acb->is_write) {
2603 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
2604 acb->req.nb_sectors, acb->req.qiov);
2605 } else {
2606 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
2607 acb->req.nb_sectors, acb->req.qiov);
2608 }
2609
2610 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2611 qemu_bh_schedule(acb->bh);
2612 }
2613
2614 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2615 int64_t sector_num,
2616 QEMUIOVector *qiov,
2617 int nb_sectors,
2618 BlockDriverCompletionFunc *cb,
2619 void *opaque,
2620 bool is_write)
2621 {
2622 Coroutine *co;
2623 BlockDriverAIOCBCoroutine *acb;
2624
2625 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2626 acb->req.sector = sector_num;
2627 acb->req.nb_sectors = nb_sectors;
2628 acb->req.qiov = qiov;
2629 acb->is_write = is_write;
2630
2631 co = qemu_coroutine_create(bdrv_co_do_rw);
2632 qemu_coroutine_enter(co, acb);
2633
2634 return &acb->common;
2635 }
2636
2637 static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
2638 {
2639 BlockDriverAIOCBCoroutine *acb = opaque;
2640 BlockDriverState *bs = acb->common.bs;
2641
2642 acb->req.error = bdrv_co_flush(bs);
2643 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2644 qemu_bh_schedule(acb->bh);
2645 }
2646
2647 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2648 BlockDriverCompletionFunc *cb, void *opaque)
2649 {
2650 trace_bdrv_aio_flush(bs, opaque);
2651
2652 Coroutine *co;
2653 BlockDriverAIOCBCoroutine *acb;
2654
2655 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2656 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
2657 qemu_coroutine_enter(co, acb);
2658
2659 return &acb->common;
2660 }
2661
2662 static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
2663 {
2664 BlockDriverAIOCBCoroutine *acb = opaque;
2665 BlockDriverState *bs = acb->common.bs;
2666
2667 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
2668 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2669 qemu_bh_schedule(acb->bh);
2670 }
2671
2672 BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
2673 int64_t sector_num, int nb_sectors,
2674 BlockDriverCompletionFunc *cb, void *opaque)
2675 {
2676 Coroutine *co;
2677 BlockDriverAIOCBCoroutine *acb;
2678
2679 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
2680
2681 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2682 acb->req.sector = sector_num;
2683 acb->req.nb_sectors = nb_sectors;
2684 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
2685 qemu_coroutine_enter(co, acb);
2686
2687 return &acb->common;
2688 }
2689
2690 void bdrv_init(void)
2691 {
2692 module_call_init(MODULE_INIT_BLOCK);
2693 }
2694
2695 void bdrv_init_with_whitelist(void)
2696 {
2697 use_bdrv_whitelist = 1;
2698 bdrv_init();
2699 }
2700
2701 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2702 BlockDriverCompletionFunc *cb, void *opaque)
2703 {
2704 BlockDriverAIOCB *acb;
2705
2706 if (pool->free_aiocb) {
2707 acb = pool->free_aiocb;
2708 pool->free_aiocb = acb->next;
2709 } else {
2710 acb = g_malloc0(pool->aiocb_size);
2711 acb->pool = pool;
2712 }
2713 acb->bs = bs;
2714 acb->cb = cb;
2715 acb->opaque = opaque;
2716 return acb;
2717 }
2718
2719 void qemu_aio_release(void *p)
2720 {
2721 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2722 AIOPool *pool = acb->pool;
2723 acb->next = pool->free_aiocb;
2724 pool->free_aiocb = acb;
2725 }
2726
2727 /**************************************************************/
2728 /* Coroutine block device emulation */
2729
2730 typedef struct CoroutineIOCompletion {
2731 Coroutine *coroutine;
2732 int ret;
2733 } CoroutineIOCompletion;
2734
2735 static void bdrv_co_io_em_complete(void *opaque, int ret)
2736 {
2737 CoroutineIOCompletion *co = opaque;
2738
2739 co->ret = ret;
2740 qemu_coroutine_enter(co->coroutine, NULL);
2741 }
2742
2743 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2744 int nb_sectors, QEMUIOVector *iov,
2745 bool is_write)
2746 {
2747 CoroutineIOCompletion co = {
2748 .coroutine = qemu_coroutine_self(),
2749 };
2750 BlockDriverAIOCB *acb;
2751
2752 if (is_write) {
2753 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2754 bdrv_co_io_em_complete, &co);
2755 } else {
2756 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2757 bdrv_co_io_em_complete, &co);
2758 }
2759
2760 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
2761 if (!acb) {
2762 return -EIO;
2763 }
2764 qemu_coroutine_yield();
2765
2766 return co.ret;
2767 }
2768
2769 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2770 int64_t sector_num, int nb_sectors,
2771 QEMUIOVector *iov)
2772 {
2773 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2774 }
2775
2776 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2777 int64_t sector_num, int nb_sectors,
2778 QEMUIOVector *iov)
2779 {
2780 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2781 }
2782
2783 static void coroutine_fn bdrv_flush_co_entry(void *opaque)
2784 {
2785 RwCo *rwco = opaque;
2786
2787 rwco->ret = bdrv_co_flush(rwco->bs);
2788 }
2789
2790 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2791 {
2792 int ret;
2793
2794 if (!bs->drv) {
2795 return 0;
2796 }
2797
2798 /* Write back cached data to the OS even with cache=unsafe */
2799 if (bs->drv->bdrv_co_flush_to_os) {
2800 ret = bs->drv->bdrv_co_flush_to_os(bs);
2801 if (ret < 0) {
2802 return ret;
2803 }
2804 }
2805
2806 /* But don't actually force it to the disk with cache=unsafe */
2807 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2808 return 0;
2809 }
2810
2811 if (bs->drv->bdrv_co_flush_to_disk) {
2812 return bs->drv->bdrv_co_flush_to_disk(bs);
2813 } else if (bs->drv->bdrv_aio_flush) {
2814 BlockDriverAIOCB *acb;
2815 CoroutineIOCompletion co = {
2816 .coroutine = qemu_coroutine_self(),
2817 };
2818
2819 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2820 if (acb == NULL) {
2821 return -EIO;
2822 } else {
2823 qemu_coroutine_yield();
2824 return co.ret;
2825 }
2826 } else {
2827 /*
2828 * Some block drivers always operate in either writethrough or unsafe
2829 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
2830 * know how the server works (because the behaviour is hardcoded or
2831 * depends on server-side configuration), so we can't ensure that
2832 * everything is safe on disk. Returning an error doesn't work because
2833 * that would break guests even if the server operates in writethrough
2834 * mode.
2835 *
2836 * Let's hope the user knows what he's doing.
2837 */
2838 return 0;
2839 }
2840 }
2841
2842 void bdrv_invalidate_cache(BlockDriverState *bs)
2843 {
2844 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
2845 bs->drv->bdrv_invalidate_cache(bs);
2846 }
2847 }
2848
2849 void bdrv_invalidate_cache_all(void)
2850 {
2851 BlockDriverState *bs;
2852
2853 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2854 bdrv_invalidate_cache(bs);
2855 }
2856 }
2857
2858 int bdrv_flush(BlockDriverState *bs)
2859 {
2860 Coroutine *co;
2861 RwCo rwco = {
2862 .bs = bs,
2863 .ret = NOT_DONE,
2864 };
2865
2866 if (qemu_in_coroutine()) {
2867 /* Fast-path if already in coroutine context */
2868 bdrv_flush_co_entry(&rwco);
2869 } else {
2870 co = qemu_coroutine_create(bdrv_flush_co_entry);
2871 qemu_coroutine_enter(co, &rwco);
2872 while (rwco.ret == NOT_DONE) {
2873 qemu_aio_wait();
2874 }
2875 }
2876
2877 return rwco.ret;
2878 }
2879
2880 static void coroutine_fn bdrv_discard_co_entry(void *opaque)
2881 {
2882 RwCo *rwco = opaque;
2883
2884 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
2885 }
2886
2887 int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
2888 int nb_sectors)
2889 {
2890 if (!bs->drv) {
2891 return -ENOMEDIUM;
2892 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
2893 return -EIO;
2894 } else if (bs->read_only) {
2895 return -EROFS;
2896 } else if (bs->drv->bdrv_co_discard) {
2897 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
2898 } else if (bs->drv->bdrv_aio_discard) {
2899 BlockDriverAIOCB *acb;
2900 CoroutineIOCompletion co = {
2901 .coroutine = qemu_coroutine_self(),
2902 };
2903
2904 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
2905 bdrv_co_io_em_complete, &co);
2906 if (acb == NULL) {
2907 return -EIO;
2908 } else {
2909 qemu_coroutine_yield();
2910 return co.ret;
2911 }
2912 } else {
2913 return 0;
2914 }
2915 }
2916
2917 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
2918 {
2919 Coroutine *co;
2920 RwCo rwco = {
2921 .bs = bs,
2922 .sector_num = sector_num,
2923 .nb_sectors = nb_sectors,
2924 .ret = NOT_DONE,
2925 };
2926
2927 if (qemu_in_coroutine()) {
2928 /* Fast-path if already in coroutine context */
2929 bdrv_discard_co_entry(&rwco);
2930 } else {
2931 co = qemu_coroutine_create(bdrv_discard_co_entry);
2932 qemu_coroutine_enter(co, &rwco);
2933 while (rwco.ret == NOT_DONE) {
2934 qemu_aio_wait();
2935 }
2936 }
2937
2938 return rwco.ret;
2939 }
2940
2941 /**************************************************************/
2942 /* removable device support */
2943
2944 /**
2945 * Return TRUE if the media is present
2946 */
2947 int bdrv_is_inserted(BlockDriverState *bs)
2948 {
2949 BlockDriver *drv = bs->drv;
2950
2951 if (!drv)
2952 return 0;
2953 if (!drv->bdrv_is_inserted)
2954 return 1;
2955 return drv->bdrv_is_inserted(bs);
2956 }
2957
2958 /**
2959 * Return whether the media changed since the last call to this
2960 * function, or -ENOTSUP if we don't know. Most drivers don't know.
2961 */
2962 int bdrv_media_changed(BlockDriverState *bs)
2963 {
2964 BlockDriver *drv = bs->drv;
2965
2966 if (drv && drv->bdrv_media_changed) {
2967 return drv->bdrv_media_changed(bs);
2968 }
2969 return -ENOTSUP;
2970 }
2971
2972 /**
2973 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2974 */
2975 void bdrv_eject(BlockDriverState *bs, int eject_flag)
2976 {
2977 BlockDriver *drv = bs->drv;
2978
2979 if (drv && drv->bdrv_eject) {
2980 drv->bdrv_eject(bs, eject_flag);
2981 }
2982 }
2983
2984 /**
2985 * Lock or unlock the media (if it is locked, the user won't be able
2986 * to eject it manually).
2987 */
2988 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
2989 {
2990 BlockDriver *drv = bs->drv;
2991
2992 trace_bdrv_lock_medium(bs, locked);
2993
2994 if (drv && drv->bdrv_lock_medium) {
2995 drv->bdrv_lock_medium(bs, locked);
2996 }
2997 }
2998
2999 /* needed for generic scsi interface */
3000
3001 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3002 {
3003 BlockDriver *drv = bs->drv;
3004
3005 if (drv && drv->bdrv_ioctl)
3006 return drv->bdrv_ioctl(bs, req, buf);
3007 return -ENOTSUP;
3008 }
3009
3010 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3011 unsigned long int req, void *buf,
3012 BlockDriverCompletionFunc *cb, void *opaque)
3013 {
3014 BlockDriver *drv = bs->drv;
3015
3016 if (drv && drv->bdrv_aio_ioctl)
3017 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3018 return NULL;
3019 }
3020
3021 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3022 {
3023 bs->buffer_alignment = align;
3024 }
3025
3026 void *qemu_blockalign(BlockDriverState *bs, size_t size)
3027 {
3028 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3029 }
3030
3031 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3032 {
3033 int64_t bitmap_size;
3034
3035 bs->dirty_count = 0;
3036 if (enable) {
3037 if (!bs->dirty_bitmap) {
3038 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3039 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3040 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3041
3042 bs->dirty_bitmap = g_malloc0(bitmap_size);
3043 }
3044 } else {
3045 if (bs->dirty_bitmap) {
3046 g_free(bs->dirty_bitmap);
3047 bs->dirty_bitmap = NULL;
3048 }
3049 }
3050 }
3051
3052 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3053 {
3054 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3055
3056 if (bs->dirty_bitmap &&
3057 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3058 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3059 (1UL << (chunk % (sizeof(unsigned long) * 8))));
3060 } else {
3061 return 0;
3062 }
3063 }
3064
3065 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3066 int nr_sectors)
3067 {
3068 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3069 }
3070
3071 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3072 {
3073 return bs->dirty_count;
3074 }
3075
3076 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3077 {
3078 assert(bs->in_use != in_use);
3079 bs->in_use = in_use;
3080 }
3081
3082 int bdrv_in_use(BlockDriverState *bs)
3083 {
3084 return bs->in_use;
3085 }
3086
3087 void bdrv_iostatus_enable(BlockDriverState *bs)
3088 {
3089 bs->iostatus_enabled = true;
3090 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3091 }
3092
3093 /* The I/O status is only enabled if the drive explicitly
3094 * enables it _and_ the VM is configured to stop on errors */
3095 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3096 {
3097 return (bs->iostatus_enabled &&
3098 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3099 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3100 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3101 }
3102
3103 void bdrv_iostatus_disable(BlockDriverState *bs)
3104 {
3105 bs->iostatus_enabled = false;
3106 }
3107
3108 void bdrv_iostatus_reset(BlockDriverState *bs)
3109 {
3110 if (bdrv_iostatus_is_enabled(bs)) {
3111 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3112 }
3113 }
3114
3115 /* XXX: Today this is set by device models because it makes the implementation
3116 quite simple. However, the block layer knows about the error, so it's
3117 possible to implement this without device models being involved */
3118 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3119 {
3120 if (bdrv_iostatus_is_enabled(bs) &&
3121 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3122 assert(error >= 0);
3123 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3124 BLOCK_DEVICE_IO_STATUS_FAILED;
3125 }
3126 }
3127
3128 void
3129 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3130 enum BlockAcctType type)
3131 {
3132 assert(type < BDRV_MAX_IOTYPE);
3133
3134 cookie->bytes = bytes;
3135 cookie->start_time_ns = get_clock();
3136 cookie->type = type;
3137 }
3138
3139 void
3140 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3141 {
3142 assert(cookie->type < BDRV_MAX_IOTYPE);
3143
3144 bs->nr_bytes[cookie->type] += cookie->bytes;
3145 bs->nr_ops[cookie->type]++;
3146 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3147 }
3148
3149 int bdrv_img_create(const char *filename, const char *fmt,
3150 const char *base_filename, const char *base_fmt,
3151 char *options, uint64_t img_size, int flags)
3152 {
3153 QEMUOptionParameter *param = NULL, *create_options = NULL;
3154 QEMUOptionParameter *backing_fmt, *backing_file, *size;
3155 BlockDriverState *bs = NULL;
3156 BlockDriver *drv, *proto_drv;
3157 BlockDriver *backing_drv = NULL;
3158 int ret = 0;
3159
3160 /* Find driver and parse its options */
3161 drv = bdrv_find_format(fmt);
3162 if (!drv) {
3163 error_report("Unknown file format '%s'", fmt);
3164 ret = -EINVAL;
3165 goto out;
3166 }
3167
3168 proto_drv = bdrv_find_protocol(filename);
3169 if (!proto_drv) {
3170 error_report("Unknown protocol '%s'", filename);
3171 ret = -EINVAL;
3172 goto out;
3173 }
3174
3175 create_options = append_option_parameters(create_options,
3176 drv->create_options);
3177 create_options = append_option_parameters(create_options,
3178 proto_drv->create_options);
3179
3180 /* Create parameter list with default values */
3181 param = parse_option_parameters("", create_options, param);
3182
3183 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3184
3185 /* Parse -o options */
3186 if (options) {
3187 param = parse_option_parameters(options, create_options, param);
3188 if (param == NULL) {
3189 error_report("Invalid options for file format '%s'.", fmt);
3190 ret = -EINVAL;
3191 goto out;
3192 }
3193 }
3194
3195 if (base_filename) {
3196 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3197 base_filename)) {
3198 error_report("Backing file not supported for file format '%s'",
3199 fmt);
3200 ret = -EINVAL;
3201 goto out;
3202 }
3203 }
3204
3205 if (base_fmt) {
3206 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3207 error_report("Backing file format not supported for file "
3208 "format '%s'", fmt);
3209 ret = -EINVAL;
3210 goto out;
3211 }
3212 }
3213
3214 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3215 if (backing_file && backing_file->value.s) {
3216 if (!strcmp(filename, backing_file->value.s)) {
3217 error_report("Error: Trying to create an image with the "
3218 "same filename as the backing file");
3219 ret = -EINVAL;
3220 goto out;
3221 }
3222 }
3223
3224 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3225 if (backing_fmt && backing_fmt->value.s) {
3226 backing_drv = bdrv_find_format(backing_fmt->value.s);
3227 if (!backing_drv) {
3228 error_report("Unknown backing file format '%s'",
3229 backing_fmt->value.s);
3230 ret = -EINVAL;
3231 goto out;
3232 }
3233 }
3234
3235 // The size for the image must always be specified, with one exception:
3236 // If we are using a backing file, we can obtain the size from there
3237 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3238 if (size && size->value.n == -1) {
3239 if (backing_file && backing_file->value.s) {
3240 uint64_t size;
3241 char buf[32];
3242
3243 bs = bdrv_new("");
3244
3245 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3246 if (ret < 0) {
3247 error_report("Could not open '%s'", backing_file->value.s);
3248 goto out;
3249 }
3250 bdrv_get_geometry(bs, &size);
3251 size *= 512;
3252
3253 snprintf(buf, sizeof(buf), "%" PRId64, size);
3254 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3255 } else {
3256 error_report("Image creation needs a size parameter");
3257 ret = -EINVAL;
3258 goto out;
3259 }
3260 }
3261
3262 printf("Formatting '%s', fmt=%s ", filename, fmt);
3263 print_option_parameters(param);
3264 puts("");
3265
3266 ret = bdrv_create(drv, filename, param);
3267
3268 if (ret < 0) {
3269 if (ret == -ENOTSUP) {
3270 error_report("Formatting or formatting option not supported for "
3271 "file format '%s'", fmt);
3272 } else if (ret == -EFBIG) {
3273 error_report("The image size is too large for file format '%s'",
3274 fmt);
3275 } else {
3276 error_report("%s: error while creating %s: %s", filename, fmt,
3277 strerror(-ret));
3278 }
3279 }
3280
3281 out:
3282 free_option_parameters(create_options);
3283 free_option_parameters(param);
3284
3285 if (bs) {
3286 bdrv_delete(bs);
3287 }
3288
3289 return ret;
3290 }