]> git.proxmox.com Git - qemu.git/blob - block.c
4c650353134e075bba26ae89db895d2944a023b9
[qemu.git] / block.c
1 /*
2 * QEMU System Emulator block driver
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "monitor.h"
27 #include "block_int.h"
28 #include "module.h"
29 #include "qemu-objects.h"
30
31 #ifdef CONFIG_BSD
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/ioctl.h>
35 #include <sys/queue.h>
36 #ifndef __DragonFly__
37 #include <sys/disk.h>
38 #endif
39 #endif
40
41 #ifdef _WIN32
42 #include <windows.h>
43 #endif
44
45 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
46 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
47 BlockDriverCompletionFunc *cb, void *opaque);
48 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
49 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
50 BlockDriverCompletionFunc *cb, void *opaque);
51 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
52 BlockDriverCompletionFunc *cb, void *opaque);
53 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
54 BlockDriverCompletionFunc *cb, void *opaque);
55 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
56 uint8_t *buf, int nb_sectors);
57 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
58 const uint8_t *buf, int nb_sectors);
59
60 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
61 QTAILQ_HEAD_INITIALIZER(bdrv_states);
62
63 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
64 QLIST_HEAD_INITIALIZER(bdrv_drivers);
65
66 /* If non-zero, use only whitelisted block drivers */
67 static int use_bdrv_whitelist;
68
69 int path_is_absolute(const char *path)
70 {
71 const char *p;
72 #ifdef _WIN32
73 /* specific case for names like: "\\.\d:" */
74 if (*path == '/' || *path == '\\')
75 return 1;
76 #endif
77 p = strchr(path, ':');
78 if (p)
79 p++;
80 else
81 p = path;
82 #ifdef _WIN32
83 return (*p == '/' || *p == '\\');
84 #else
85 return (*p == '/');
86 #endif
87 }
88
89 /* if filename is absolute, just copy it to dest. Otherwise, build a
90 path to it by considering it is relative to base_path. URL are
91 supported. */
92 void path_combine(char *dest, int dest_size,
93 const char *base_path,
94 const char *filename)
95 {
96 const char *p, *p1;
97 int len;
98
99 if (dest_size <= 0)
100 return;
101 if (path_is_absolute(filename)) {
102 pstrcpy(dest, dest_size, filename);
103 } else {
104 p = strchr(base_path, ':');
105 if (p)
106 p++;
107 else
108 p = base_path;
109 p1 = strrchr(base_path, '/');
110 #ifdef _WIN32
111 {
112 const char *p2;
113 p2 = strrchr(base_path, '\\');
114 if (!p1 || p2 > p1)
115 p1 = p2;
116 }
117 #endif
118 if (p1)
119 p1++;
120 else
121 p1 = base_path;
122 if (p1 > p)
123 p = p1;
124 len = p - base_path;
125 if (len > dest_size - 1)
126 len = dest_size - 1;
127 memcpy(dest, base_path, len);
128 dest[len] = '\0';
129 pstrcat(dest, dest_size, filename);
130 }
131 }
132
133 void bdrv_register(BlockDriver *bdrv)
134 {
135 if (!bdrv->bdrv_aio_readv) {
136 /* add AIO emulation layer */
137 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
138 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
139 } else if (!bdrv->bdrv_read) {
140 /* add synchronous IO emulation layer */
141 bdrv->bdrv_read = bdrv_read_em;
142 bdrv->bdrv_write = bdrv_write_em;
143 }
144
145 if (!bdrv->bdrv_aio_flush)
146 bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
147
148 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
149 }
150
151 /* create a new block device (by default it is empty) */
152 BlockDriverState *bdrv_new(const char *device_name)
153 {
154 BlockDriverState *bs;
155
156 bs = qemu_mallocz(sizeof(BlockDriverState));
157 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
158 if (device_name[0] != '\0') {
159 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
160 }
161 return bs;
162 }
163
164 BlockDriver *bdrv_find_format(const char *format_name)
165 {
166 BlockDriver *drv1;
167 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
168 if (!strcmp(drv1->format_name, format_name)) {
169 return drv1;
170 }
171 }
172 return NULL;
173 }
174
175 static int bdrv_is_whitelisted(BlockDriver *drv)
176 {
177 static const char *whitelist[] = {
178 CONFIG_BDRV_WHITELIST
179 };
180 const char **p;
181
182 if (!whitelist[0])
183 return 1; /* no whitelist, anything goes */
184
185 for (p = whitelist; *p; p++) {
186 if (!strcmp(drv->format_name, *p)) {
187 return 1;
188 }
189 }
190 return 0;
191 }
192
193 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
194 {
195 BlockDriver *drv = bdrv_find_format(format_name);
196 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
197 }
198
199 int bdrv_create(BlockDriver *drv, const char* filename,
200 QEMUOptionParameter *options)
201 {
202 if (!drv->bdrv_create)
203 return -ENOTSUP;
204
205 return drv->bdrv_create(filename, options);
206 }
207
208 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
209 {
210 BlockDriver *drv;
211
212 drv = bdrv_find_protocol(filename);
213 if (drv == NULL) {
214 drv = bdrv_find_format("file");
215 }
216
217 return bdrv_create(drv, filename, options);
218 }
219
220 #ifdef _WIN32
221 void get_tmp_filename(char *filename, int size)
222 {
223 char temp_dir[MAX_PATH];
224
225 GetTempPath(MAX_PATH, temp_dir);
226 GetTempFileName(temp_dir, "qem", 0, filename);
227 }
228 #else
229 void get_tmp_filename(char *filename, int size)
230 {
231 int fd;
232 const char *tmpdir;
233 /* XXX: race condition possible */
234 tmpdir = getenv("TMPDIR");
235 if (!tmpdir)
236 tmpdir = "/tmp";
237 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
238 fd = mkstemp(filename);
239 close(fd);
240 }
241 #endif
242
243 #ifdef _WIN32
244 static int is_windows_drive_prefix(const char *filename)
245 {
246 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
247 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
248 filename[1] == ':');
249 }
250
251 int is_windows_drive(const char *filename)
252 {
253 if (is_windows_drive_prefix(filename) &&
254 filename[2] == '\0')
255 return 1;
256 if (strstart(filename, "\\\\.\\", NULL) ||
257 strstart(filename, "//./", NULL))
258 return 1;
259 return 0;
260 }
261 #endif
262
263 /*
264 * Detect host devices. By convention, /dev/cdrom[N] is always
265 * recognized as a host CDROM.
266 */
267 static BlockDriver *find_hdev_driver(const char *filename)
268 {
269 int score_max = 0, score;
270 BlockDriver *drv = NULL, *d;
271
272 QLIST_FOREACH(d, &bdrv_drivers, list) {
273 if (d->bdrv_probe_device) {
274 score = d->bdrv_probe_device(filename);
275 if (score > score_max) {
276 score_max = score;
277 drv = d;
278 }
279 }
280 }
281
282 return drv;
283 }
284
285 BlockDriver *bdrv_find_protocol(const char *filename)
286 {
287 BlockDriver *drv1;
288 char protocol[128];
289 int len;
290 const char *p;
291
292 /* TODO Drivers without bdrv_file_open must be specified explicitly */
293
294 /*
295 * XXX(hch): we really should not let host device detection
296 * override an explicit protocol specification, but moving this
297 * later breaks access to device names with colons in them.
298 * Thanks to the brain-dead persistent naming schemes on udev-
299 * based Linux systems those actually are quite common.
300 */
301 drv1 = find_hdev_driver(filename);
302 if (drv1) {
303 return drv1;
304 }
305
306 #ifdef _WIN32
307 if (is_windows_drive(filename) ||
308 is_windows_drive_prefix(filename))
309 return bdrv_find_format("file");
310 #endif
311
312 p = strchr(filename, ':');
313 if (!p) {
314 return bdrv_find_format("file");
315 }
316 len = p - filename;
317 if (len > sizeof(protocol) - 1)
318 len = sizeof(protocol) - 1;
319 memcpy(protocol, filename, len);
320 protocol[len] = '\0';
321 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
322 if (drv1->protocol_name &&
323 !strcmp(drv1->protocol_name, protocol)) {
324 return drv1;
325 }
326 }
327 return NULL;
328 }
329
330 static BlockDriver *find_image_format(const char *filename)
331 {
332 int ret, score, score_max;
333 BlockDriver *drv1, *drv;
334 uint8_t buf[2048];
335 BlockDriverState *bs;
336
337 ret = bdrv_file_open(&bs, filename, 0);
338 if (ret < 0)
339 return NULL;
340
341 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
342 if (bs->sg || !bdrv_is_inserted(bs)) {
343 bdrv_delete(bs);
344 return bdrv_find_format("raw");
345 }
346
347 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
348 bdrv_delete(bs);
349 if (ret < 0) {
350 return NULL;
351 }
352
353 score_max = 0;
354 drv = NULL;
355 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
356 if (drv1->bdrv_probe) {
357 score = drv1->bdrv_probe(buf, ret, filename);
358 if (score > score_max) {
359 score_max = score;
360 drv = drv1;
361 }
362 }
363 }
364 return drv;
365 }
366
367 /**
368 * Set the current 'total_sectors' value
369 */
370 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
371 {
372 BlockDriver *drv = bs->drv;
373
374 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
375 if (bs->sg)
376 return 0;
377
378 /* query actual device if possible, otherwise just trust the hint */
379 if (drv->bdrv_getlength) {
380 int64_t length = drv->bdrv_getlength(bs);
381 if (length < 0) {
382 return length;
383 }
384 hint = length >> BDRV_SECTOR_BITS;
385 }
386
387 bs->total_sectors = hint;
388 return 0;
389 }
390
391 /*
392 * Common part for opening disk images and files
393 */
394 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
395 int flags, BlockDriver *drv)
396 {
397 int ret, open_flags;
398
399 assert(drv != NULL);
400
401 bs->file = NULL;
402 bs->total_sectors = 0;
403 bs->encrypted = 0;
404 bs->valid_key = 0;
405 bs->open_flags = flags;
406 /* buffer_alignment defaulted to 512, drivers can change this value */
407 bs->buffer_alignment = 512;
408
409 pstrcpy(bs->filename, sizeof(bs->filename), filename);
410
411 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
412 return -ENOTSUP;
413 }
414
415 bs->drv = drv;
416 bs->opaque = qemu_mallocz(drv->instance_size);
417
418 /*
419 * Yes, BDRV_O_NOCACHE aka O_DIRECT means we have to present a
420 * write cache to the guest. We do need the fdatasync to flush
421 * out transactions for block allocations, and we maybe have a
422 * volatile write cache in our backing device to deal with.
423 */
424 if (flags & (BDRV_O_CACHE_WB|BDRV_O_NOCACHE))
425 bs->enable_write_cache = 1;
426
427 /*
428 * Clear flags that are internal to the block layer before opening the
429 * image.
430 */
431 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
432
433 /*
434 * Snapshots should be writeable.
435 */
436 if (bs->is_temporary) {
437 open_flags |= BDRV_O_RDWR;
438 }
439
440 /* Open the image, either directly or using a protocol */
441 if (drv->bdrv_file_open) {
442 ret = drv->bdrv_file_open(bs, filename, open_flags);
443 } else {
444 ret = bdrv_file_open(&bs->file, filename, open_flags);
445 if (ret >= 0) {
446 ret = drv->bdrv_open(bs, open_flags);
447 }
448 }
449
450 if (ret < 0) {
451 goto free_and_fail;
452 }
453
454 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
455
456 ret = refresh_total_sectors(bs, bs->total_sectors);
457 if (ret < 0) {
458 goto free_and_fail;
459 }
460
461 #ifndef _WIN32
462 if (bs->is_temporary) {
463 unlink(filename);
464 }
465 #endif
466 return 0;
467
468 free_and_fail:
469 if (bs->file) {
470 bdrv_delete(bs->file);
471 bs->file = NULL;
472 }
473 qemu_free(bs->opaque);
474 bs->opaque = NULL;
475 bs->drv = NULL;
476 return ret;
477 }
478
479 /*
480 * Opens a file using a protocol (file, host_device, nbd, ...)
481 */
482 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
483 {
484 BlockDriverState *bs;
485 BlockDriver *drv;
486 int ret;
487
488 drv = bdrv_find_protocol(filename);
489 if (!drv) {
490 return -ENOENT;
491 }
492
493 bs = bdrv_new("");
494 ret = bdrv_open_common(bs, filename, flags, drv);
495 if (ret < 0) {
496 bdrv_delete(bs);
497 return ret;
498 }
499 bs->growable = 1;
500 *pbs = bs;
501 return 0;
502 }
503
504 /*
505 * Opens a disk image (raw, qcow2, vmdk, ...)
506 */
507 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
508 BlockDriver *drv)
509 {
510 int ret;
511
512 if (flags & BDRV_O_SNAPSHOT) {
513 BlockDriverState *bs1;
514 int64_t total_size;
515 int is_protocol = 0;
516 BlockDriver *bdrv_qcow2;
517 QEMUOptionParameter *options;
518 char tmp_filename[PATH_MAX];
519 char backing_filename[PATH_MAX];
520
521 /* if snapshot, we create a temporary backing file and open it
522 instead of opening 'filename' directly */
523
524 /* if there is a backing file, use it */
525 bs1 = bdrv_new("");
526 ret = bdrv_open(bs1, filename, 0, drv);
527 if (ret < 0) {
528 bdrv_delete(bs1);
529 return ret;
530 }
531 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
532
533 if (bs1->drv && bs1->drv->protocol_name)
534 is_protocol = 1;
535
536 bdrv_delete(bs1);
537
538 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
539
540 /* Real path is meaningless for protocols */
541 if (is_protocol)
542 snprintf(backing_filename, sizeof(backing_filename),
543 "%s", filename);
544 else if (!realpath(filename, backing_filename))
545 return -errno;
546
547 bdrv_qcow2 = bdrv_find_format("qcow2");
548 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
549
550 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
551 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
552 if (drv) {
553 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
554 drv->format_name);
555 }
556
557 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
558 free_option_parameters(options);
559 if (ret < 0) {
560 return ret;
561 }
562
563 filename = tmp_filename;
564 drv = bdrv_qcow2;
565 bs->is_temporary = 1;
566 }
567
568 /* Find the right image format driver */
569 if (!drv) {
570 drv = find_image_format(filename);
571 }
572
573 if (!drv) {
574 ret = -ENOENT;
575 goto unlink_and_fail;
576 }
577
578 /* Open the image */
579 ret = bdrv_open_common(bs, filename, flags, drv);
580 if (ret < 0) {
581 goto unlink_and_fail;
582 }
583
584 /* If there is a backing file, use it */
585 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
586 char backing_filename[PATH_MAX];
587 int back_flags;
588 BlockDriver *back_drv = NULL;
589
590 bs->backing_hd = bdrv_new("");
591 path_combine(backing_filename, sizeof(backing_filename),
592 filename, bs->backing_file);
593 if (bs->backing_format[0] != '\0')
594 back_drv = bdrv_find_format(bs->backing_format);
595
596 /* backing files always opened read-only */
597 back_flags =
598 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
599
600 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
601 if (ret < 0) {
602 bdrv_close(bs);
603 return ret;
604 }
605 if (bs->is_temporary) {
606 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
607 } else {
608 /* base image inherits from "parent" */
609 bs->backing_hd->keep_read_only = bs->keep_read_only;
610 }
611 }
612
613 if (!bdrv_key_required(bs)) {
614 /* call the change callback */
615 bs->media_changed = 1;
616 if (bs->change_cb)
617 bs->change_cb(bs->change_opaque);
618 }
619
620 return 0;
621
622 unlink_and_fail:
623 if (bs->is_temporary) {
624 unlink(filename);
625 }
626 return ret;
627 }
628
629 void bdrv_close(BlockDriverState *bs)
630 {
631 if (bs->drv) {
632 if (bs->backing_hd) {
633 bdrv_delete(bs->backing_hd);
634 bs->backing_hd = NULL;
635 }
636 bs->drv->bdrv_close(bs);
637 qemu_free(bs->opaque);
638 #ifdef _WIN32
639 if (bs->is_temporary) {
640 unlink(bs->filename);
641 }
642 #endif
643 bs->opaque = NULL;
644 bs->drv = NULL;
645
646 if (bs->file != NULL) {
647 bdrv_close(bs->file);
648 }
649
650 /* call the change callback */
651 bs->media_changed = 1;
652 if (bs->change_cb)
653 bs->change_cb(bs->change_opaque);
654 }
655 }
656
657 void bdrv_close_all(void)
658 {
659 BlockDriverState *bs;
660
661 QTAILQ_FOREACH(bs, &bdrv_states, list) {
662 bdrv_close(bs);
663 }
664 }
665
666 void bdrv_delete(BlockDriverState *bs)
667 {
668 assert(!bs->peer);
669
670 /* remove from list, if necessary */
671 if (bs->device_name[0] != '\0') {
672 QTAILQ_REMOVE(&bdrv_states, bs, list);
673 }
674
675 bdrv_close(bs);
676 if (bs->file != NULL) {
677 bdrv_delete(bs->file);
678 }
679
680 qemu_free(bs);
681 }
682
683 int bdrv_attach(BlockDriverState *bs, DeviceState *qdev)
684 {
685 if (bs->peer) {
686 return -EBUSY;
687 }
688 bs->peer = qdev;
689 return 0;
690 }
691
692 void bdrv_detach(BlockDriverState *bs, DeviceState *qdev)
693 {
694 assert(bs->peer == qdev);
695 bs->peer = NULL;
696 }
697
698 DeviceState *bdrv_get_attached(BlockDriverState *bs)
699 {
700 return bs->peer;
701 }
702
703 /*
704 * Run consistency checks on an image
705 *
706 * Returns the number of errors or -errno when an internal error occurs
707 */
708 int bdrv_check(BlockDriverState *bs)
709 {
710 if (bs->drv->bdrv_check == NULL) {
711 return -ENOTSUP;
712 }
713
714 return bs->drv->bdrv_check(bs);
715 }
716
717 /* commit COW file into the raw image */
718 int bdrv_commit(BlockDriverState *bs)
719 {
720 BlockDriver *drv = bs->drv;
721 int64_t i, total_sectors;
722 int n, j, ro, open_flags;
723 int ret = 0, rw_ret = 0;
724 unsigned char sector[BDRV_SECTOR_SIZE];
725 char filename[1024];
726 BlockDriverState *bs_rw, *bs_ro;
727
728 if (!drv)
729 return -ENOMEDIUM;
730
731 if (!bs->backing_hd) {
732 return -ENOTSUP;
733 }
734
735 if (bs->backing_hd->keep_read_only) {
736 return -EACCES;
737 }
738
739 ro = bs->backing_hd->read_only;
740 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
741 open_flags = bs->backing_hd->open_flags;
742
743 if (ro) {
744 /* re-open as RW */
745 bdrv_delete(bs->backing_hd);
746 bs->backing_hd = NULL;
747 bs_rw = bdrv_new("");
748 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR, drv);
749 if (rw_ret < 0) {
750 bdrv_delete(bs_rw);
751 /* try to re-open read-only */
752 bs_ro = bdrv_new("");
753 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, drv);
754 if (ret < 0) {
755 bdrv_delete(bs_ro);
756 /* drive not functional anymore */
757 bs->drv = NULL;
758 return ret;
759 }
760 bs->backing_hd = bs_ro;
761 return rw_ret;
762 }
763 bs->backing_hd = bs_rw;
764 }
765
766 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
767 for (i = 0; i < total_sectors;) {
768 if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
769 for(j = 0; j < n; j++) {
770 if (bdrv_read(bs, i, sector, 1) != 0) {
771 ret = -EIO;
772 goto ro_cleanup;
773 }
774
775 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
776 ret = -EIO;
777 goto ro_cleanup;
778 }
779 i++;
780 }
781 } else {
782 i += n;
783 }
784 }
785
786 if (drv->bdrv_make_empty) {
787 ret = drv->bdrv_make_empty(bs);
788 bdrv_flush(bs);
789 }
790
791 /*
792 * Make sure all data we wrote to the backing device is actually
793 * stable on disk.
794 */
795 if (bs->backing_hd)
796 bdrv_flush(bs->backing_hd);
797
798 ro_cleanup:
799
800 if (ro) {
801 /* re-open as RO */
802 bdrv_delete(bs->backing_hd);
803 bs->backing_hd = NULL;
804 bs_ro = bdrv_new("");
805 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, drv);
806 if (ret < 0) {
807 bdrv_delete(bs_ro);
808 /* drive not functional anymore */
809 bs->drv = NULL;
810 return ret;
811 }
812 bs->backing_hd = bs_ro;
813 bs->backing_hd->keep_read_only = 0;
814 }
815
816 return ret;
817 }
818
819 void bdrv_commit_all(void)
820 {
821 BlockDriverState *bs;
822
823 QTAILQ_FOREACH(bs, &bdrv_states, list) {
824 bdrv_commit(bs);
825 }
826 }
827
828 /*
829 * Return values:
830 * 0 - success
831 * -EINVAL - backing format specified, but no file
832 * -ENOSPC - can't update the backing file because no space is left in the
833 * image file header
834 * -ENOTSUP - format driver doesn't support changing the backing file
835 */
836 int bdrv_change_backing_file(BlockDriverState *bs,
837 const char *backing_file, const char *backing_fmt)
838 {
839 BlockDriver *drv = bs->drv;
840
841 if (drv->bdrv_change_backing_file != NULL) {
842 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
843 } else {
844 return -ENOTSUP;
845 }
846 }
847
848 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
849 size_t size)
850 {
851 int64_t len;
852
853 if (!bdrv_is_inserted(bs))
854 return -ENOMEDIUM;
855
856 if (bs->growable)
857 return 0;
858
859 len = bdrv_getlength(bs);
860
861 if (offset < 0)
862 return -EIO;
863
864 if ((offset > len) || (len - offset < size))
865 return -EIO;
866
867 return 0;
868 }
869
870 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
871 int nb_sectors)
872 {
873 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
874 nb_sectors * BDRV_SECTOR_SIZE);
875 }
876
877 /* return < 0 if error. See bdrv_write() for the return codes */
878 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
879 uint8_t *buf, int nb_sectors)
880 {
881 BlockDriver *drv = bs->drv;
882
883 if (!drv)
884 return -ENOMEDIUM;
885 if (bdrv_check_request(bs, sector_num, nb_sectors))
886 return -EIO;
887
888 return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
889 }
890
891 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
892 int nb_sectors, int dirty)
893 {
894 int64_t start, end;
895 unsigned long val, idx, bit;
896
897 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
898 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
899
900 for (; start <= end; start++) {
901 idx = start / (sizeof(unsigned long) * 8);
902 bit = start % (sizeof(unsigned long) * 8);
903 val = bs->dirty_bitmap[idx];
904 if (dirty) {
905 if (!(val & (1 << bit))) {
906 bs->dirty_count++;
907 val |= 1 << bit;
908 }
909 } else {
910 if (val & (1 << bit)) {
911 bs->dirty_count--;
912 val &= ~(1 << bit);
913 }
914 }
915 bs->dirty_bitmap[idx] = val;
916 }
917 }
918
919 /* Return < 0 if error. Important errors are:
920 -EIO generic I/O error (may happen for all errors)
921 -ENOMEDIUM No media inserted.
922 -EINVAL Invalid sector number or nb_sectors
923 -EACCES Trying to write a read-only device
924 */
925 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
926 const uint8_t *buf, int nb_sectors)
927 {
928 BlockDriver *drv = bs->drv;
929 if (!bs->drv)
930 return -ENOMEDIUM;
931 if (bs->read_only)
932 return -EACCES;
933 if (bdrv_check_request(bs, sector_num, nb_sectors))
934 return -EIO;
935
936 if (bs->dirty_bitmap) {
937 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
938 }
939
940 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
941 bs->wr_highest_sector = sector_num + nb_sectors - 1;
942 }
943
944 return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
945 }
946
947 int bdrv_pread(BlockDriverState *bs, int64_t offset,
948 void *buf, int count1)
949 {
950 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
951 int len, nb_sectors, count;
952 int64_t sector_num;
953 int ret;
954
955 count = count1;
956 /* first read to align to sector start */
957 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
958 if (len > count)
959 len = count;
960 sector_num = offset >> BDRV_SECTOR_BITS;
961 if (len > 0) {
962 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
963 return ret;
964 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
965 count -= len;
966 if (count == 0)
967 return count1;
968 sector_num++;
969 buf += len;
970 }
971
972 /* read the sectors "in place" */
973 nb_sectors = count >> BDRV_SECTOR_BITS;
974 if (nb_sectors > 0) {
975 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
976 return ret;
977 sector_num += nb_sectors;
978 len = nb_sectors << BDRV_SECTOR_BITS;
979 buf += len;
980 count -= len;
981 }
982
983 /* add data from the last sector */
984 if (count > 0) {
985 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
986 return ret;
987 memcpy(buf, tmp_buf, count);
988 }
989 return count1;
990 }
991
992 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
993 const void *buf, int count1)
994 {
995 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
996 int len, nb_sectors, count;
997 int64_t sector_num;
998 int ret;
999
1000 count = count1;
1001 /* first write to align to sector start */
1002 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1003 if (len > count)
1004 len = count;
1005 sector_num = offset >> BDRV_SECTOR_BITS;
1006 if (len > 0) {
1007 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1008 return ret;
1009 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1010 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1011 return ret;
1012 count -= len;
1013 if (count == 0)
1014 return count1;
1015 sector_num++;
1016 buf += len;
1017 }
1018
1019 /* write the sectors "in place" */
1020 nb_sectors = count >> BDRV_SECTOR_BITS;
1021 if (nb_sectors > 0) {
1022 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1023 return ret;
1024 sector_num += nb_sectors;
1025 len = nb_sectors << BDRV_SECTOR_BITS;
1026 buf += len;
1027 count -= len;
1028 }
1029
1030 /* add data from the last sector */
1031 if (count > 0) {
1032 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1033 return ret;
1034 memcpy(tmp_buf, buf, count);
1035 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1036 return ret;
1037 }
1038 return count1;
1039 }
1040
1041 /*
1042 * Writes to the file and ensures that no writes are reordered across this
1043 * request (acts as a barrier)
1044 *
1045 * Returns 0 on success, -errno in error cases.
1046 */
1047 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1048 const void *buf, int count)
1049 {
1050 int ret;
1051
1052 ret = bdrv_pwrite(bs, offset, buf, count);
1053 if (ret < 0) {
1054 return ret;
1055 }
1056
1057 /* No flush needed for cache=writethrough, it uses O_DSYNC */
1058 if ((bs->open_flags & BDRV_O_CACHE_MASK) != 0) {
1059 bdrv_flush(bs);
1060 }
1061
1062 return 0;
1063 }
1064
1065 /*
1066 * Writes to the file and ensures that no writes are reordered across this
1067 * request (acts as a barrier)
1068 *
1069 * Returns 0 on success, -errno in error cases.
1070 */
1071 int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num,
1072 const uint8_t *buf, int nb_sectors)
1073 {
1074 return bdrv_pwrite_sync(bs, BDRV_SECTOR_SIZE * sector_num,
1075 buf, BDRV_SECTOR_SIZE * nb_sectors);
1076 }
1077
1078 /**
1079 * Truncate file to 'offset' bytes (needed only for file protocols)
1080 */
1081 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1082 {
1083 BlockDriver *drv = bs->drv;
1084 int ret;
1085 if (!drv)
1086 return -ENOMEDIUM;
1087 if (!drv->bdrv_truncate)
1088 return -ENOTSUP;
1089 if (bs->read_only)
1090 return -EACCES;
1091 ret = drv->bdrv_truncate(bs, offset);
1092 if (ret == 0) {
1093 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1094 }
1095 return ret;
1096 }
1097
1098 /**
1099 * Length of a file in bytes. Return < 0 if error or unknown.
1100 */
1101 int64_t bdrv_getlength(BlockDriverState *bs)
1102 {
1103 BlockDriver *drv = bs->drv;
1104 if (!drv)
1105 return -ENOMEDIUM;
1106
1107 /* Fixed size devices use the total_sectors value for speed instead of
1108 issuing a length query (like lseek) on each call. Also, legacy block
1109 drivers don't provide a bdrv_getlength function and must use
1110 total_sectors. */
1111 if (!bs->growable || !drv->bdrv_getlength) {
1112 return bs->total_sectors * BDRV_SECTOR_SIZE;
1113 }
1114 return drv->bdrv_getlength(bs);
1115 }
1116
1117 /* return 0 as number of sectors if no device present or error */
1118 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1119 {
1120 int64_t length;
1121 length = bdrv_getlength(bs);
1122 if (length < 0)
1123 length = 0;
1124 else
1125 length = length >> BDRV_SECTOR_BITS;
1126 *nb_sectors_ptr = length;
1127 }
1128
1129 struct partition {
1130 uint8_t boot_ind; /* 0x80 - active */
1131 uint8_t head; /* starting head */
1132 uint8_t sector; /* starting sector */
1133 uint8_t cyl; /* starting cylinder */
1134 uint8_t sys_ind; /* What partition type */
1135 uint8_t end_head; /* end head */
1136 uint8_t end_sector; /* end sector */
1137 uint8_t end_cyl; /* end cylinder */
1138 uint32_t start_sect; /* starting sector counting from 0 */
1139 uint32_t nr_sects; /* nr of sectors in partition */
1140 } __attribute__((packed));
1141
1142 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1143 static int guess_disk_lchs(BlockDriverState *bs,
1144 int *pcylinders, int *pheads, int *psectors)
1145 {
1146 uint8_t buf[BDRV_SECTOR_SIZE];
1147 int ret, i, heads, sectors, cylinders;
1148 struct partition *p;
1149 uint32_t nr_sects;
1150 uint64_t nb_sectors;
1151
1152 bdrv_get_geometry(bs, &nb_sectors);
1153
1154 ret = bdrv_read(bs, 0, buf, 1);
1155 if (ret < 0)
1156 return -1;
1157 /* test msdos magic */
1158 if (buf[510] != 0x55 || buf[511] != 0xaa)
1159 return -1;
1160 for(i = 0; i < 4; i++) {
1161 p = ((struct partition *)(buf + 0x1be)) + i;
1162 nr_sects = le32_to_cpu(p->nr_sects);
1163 if (nr_sects && p->end_head) {
1164 /* We make the assumption that the partition terminates on
1165 a cylinder boundary */
1166 heads = p->end_head + 1;
1167 sectors = p->end_sector & 63;
1168 if (sectors == 0)
1169 continue;
1170 cylinders = nb_sectors / (heads * sectors);
1171 if (cylinders < 1 || cylinders > 16383)
1172 continue;
1173 *pheads = heads;
1174 *psectors = sectors;
1175 *pcylinders = cylinders;
1176 #if 0
1177 printf("guessed geometry: LCHS=%d %d %d\n",
1178 cylinders, heads, sectors);
1179 #endif
1180 return 0;
1181 }
1182 }
1183 return -1;
1184 }
1185
1186 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1187 {
1188 int translation, lba_detected = 0;
1189 int cylinders, heads, secs;
1190 uint64_t nb_sectors;
1191
1192 /* if a geometry hint is available, use it */
1193 bdrv_get_geometry(bs, &nb_sectors);
1194 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1195 translation = bdrv_get_translation_hint(bs);
1196 if (cylinders != 0) {
1197 *pcyls = cylinders;
1198 *pheads = heads;
1199 *psecs = secs;
1200 } else {
1201 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1202 if (heads > 16) {
1203 /* if heads > 16, it means that a BIOS LBA
1204 translation was active, so the default
1205 hardware geometry is OK */
1206 lba_detected = 1;
1207 goto default_geometry;
1208 } else {
1209 *pcyls = cylinders;
1210 *pheads = heads;
1211 *psecs = secs;
1212 /* disable any translation to be in sync with
1213 the logical geometry */
1214 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1215 bdrv_set_translation_hint(bs,
1216 BIOS_ATA_TRANSLATION_NONE);
1217 }
1218 }
1219 } else {
1220 default_geometry:
1221 /* if no geometry, use a standard physical disk geometry */
1222 cylinders = nb_sectors / (16 * 63);
1223
1224 if (cylinders > 16383)
1225 cylinders = 16383;
1226 else if (cylinders < 2)
1227 cylinders = 2;
1228 *pcyls = cylinders;
1229 *pheads = 16;
1230 *psecs = 63;
1231 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1232 if ((*pcyls * *pheads) <= 131072) {
1233 bdrv_set_translation_hint(bs,
1234 BIOS_ATA_TRANSLATION_LARGE);
1235 } else {
1236 bdrv_set_translation_hint(bs,
1237 BIOS_ATA_TRANSLATION_LBA);
1238 }
1239 }
1240 }
1241 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1242 }
1243 }
1244
1245 void bdrv_set_geometry_hint(BlockDriverState *bs,
1246 int cyls, int heads, int secs)
1247 {
1248 bs->cyls = cyls;
1249 bs->heads = heads;
1250 bs->secs = secs;
1251 }
1252
1253 void bdrv_set_type_hint(BlockDriverState *bs, int type)
1254 {
1255 bs->type = type;
1256 bs->removable = ((type == BDRV_TYPE_CDROM ||
1257 type == BDRV_TYPE_FLOPPY));
1258 }
1259
1260 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1261 {
1262 bs->translation = translation;
1263 }
1264
1265 void bdrv_get_geometry_hint(BlockDriverState *bs,
1266 int *pcyls, int *pheads, int *psecs)
1267 {
1268 *pcyls = bs->cyls;
1269 *pheads = bs->heads;
1270 *psecs = bs->secs;
1271 }
1272
1273 int bdrv_get_type_hint(BlockDriverState *bs)
1274 {
1275 return bs->type;
1276 }
1277
1278 int bdrv_get_translation_hint(BlockDriverState *bs)
1279 {
1280 return bs->translation;
1281 }
1282
1283 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1284 BlockErrorAction on_write_error)
1285 {
1286 bs->on_read_error = on_read_error;
1287 bs->on_write_error = on_write_error;
1288 }
1289
1290 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1291 {
1292 return is_read ? bs->on_read_error : bs->on_write_error;
1293 }
1294
1295 int bdrv_is_removable(BlockDriverState *bs)
1296 {
1297 return bs->removable;
1298 }
1299
1300 int bdrv_is_read_only(BlockDriverState *bs)
1301 {
1302 return bs->read_only;
1303 }
1304
1305 int bdrv_is_sg(BlockDriverState *bs)
1306 {
1307 return bs->sg;
1308 }
1309
1310 int bdrv_enable_write_cache(BlockDriverState *bs)
1311 {
1312 return bs->enable_write_cache;
1313 }
1314
1315 /* XXX: no longer used */
1316 void bdrv_set_change_cb(BlockDriverState *bs,
1317 void (*change_cb)(void *opaque), void *opaque)
1318 {
1319 bs->change_cb = change_cb;
1320 bs->change_opaque = opaque;
1321 }
1322
1323 int bdrv_is_encrypted(BlockDriverState *bs)
1324 {
1325 if (bs->backing_hd && bs->backing_hd->encrypted)
1326 return 1;
1327 return bs->encrypted;
1328 }
1329
1330 int bdrv_key_required(BlockDriverState *bs)
1331 {
1332 BlockDriverState *backing_hd = bs->backing_hd;
1333
1334 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1335 return 1;
1336 return (bs->encrypted && !bs->valid_key);
1337 }
1338
1339 int bdrv_set_key(BlockDriverState *bs, const char *key)
1340 {
1341 int ret;
1342 if (bs->backing_hd && bs->backing_hd->encrypted) {
1343 ret = bdrv_set_key(bs->backing_hd, key);
1344 if (ret < 0)
1345 return ret;
1346 if (!bs->encrypted)
1347 return 0;
1348 }
1349 if (!bs->encrypted) {
1350 return -EINVAL;
1351 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1352 return -ENOMEDIUM;
1353 }
1354 ret = bs->drv->bdrv_set_key(bs, key);
1355 if (ret < 0) {
1356 bs->valid_key = 0;
1357 } else if (!bs->valid_key) {
1358 bs->valid_key = 1;
1359 /* call the change callback now, we skipped it on open */
1360 bs->media_changed = 1;
1361 if (bs->change_cb)
1362 bs->change_cb(bs->change_opaque);
1363 }
1364 return ret;
1365 }
1366
1367 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1368 {
1369 if (!bs->drv) {
1370 buf[0] = '\0';
1371 } else {
1372 pstrcpy(buf, buf_size, bs->drv->format_name);
1373 }
1374 }
1375
1376 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1377 void *opaque)
1378 {
1379 BlockDriver *drv;
1380
1381 QLIST_FOREACH(drv, &bdrv_drivers, list) {
1382 it(opaque, drv->format_name);
1383 }
1384 }
1385
1386 BlockDriverState *bdrv_find(const char *name)
1387 {
1388 BlockDriverState *bs;
1389
1390 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1391 if (!strcmp(name, bs->device_name)) {
1392 return bs;
1393 }
1394 }
1395 return NULL;
1396 }
1397
1398 BlockDriverState *bdrv_next(BlockDriverState *bs)
1399 {
1400 if (!bs) {
1401 return QTAILQ_FIRST(&bdrv_states);
1402 }
1403 return QTAILQ_NEXT(bs, list);
1404 }
1405
1406 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1407 {
1408 BlockDriverState *bs;
1409
1410 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1411 it(opaque, bs);
1412 }
1413 }
1414
1415 const char *bdrv_get_device_name(BlockDriverState *bs)
1416 {
1417 return bs->device_name;
1418 }
1419
1420 void bdrv_flush(BlockDriverState *bs)
1421 {
1422 if (bs->open_flags & BDRV_O_NO_FLUSH) {
1423 return;
1424 }
1425
1426 if (bs->drv && bs->drv->bdrv_flush)
1427 bs->drv->bdrv_flush(bs);
1428 }
1429
1430 void bdrv_flush_all(void)
1431 {
1432 BlockDriverState *bs;
1433
1434 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1435 if (bs->drv && !bdrv_is_read_only(bs) &&
1436 (!bdrv_is_removable(bs) || bdrv_is_inserted(bs))) {
1437 bdrv_flush(bs);
1438 }
1439 }
1440 }
1441
1442 int bdrv_has_zero_init(BlockDriverState *bs)
1443 {
1444 assert(bs->drv);
1445
1446 if (bs->drv->no_zero_init) {
1447 return 0;
1448 } else if (bs->file) {
1449 return bdrv_has_zero_init(bs->file);
1450 }
1451
1452 return 1;
1453 }
1454
1455 /*
1456 * Returns true iff the specified sector is present in the disk image. Drivers
1457 * not implementing the functionality are assumed to not support backing files,
1458 * hence all their sectors are reported as allocated.
1459 *
1460 * 'pnum' is set to the number of sectors (including and immediately following
1461 * the specified sector) that are known to be in the same
1462 * allocated/unallocated state.
1463 *
1464 * 'nb_sectors' is the max value 'pnum' should be set to.
1465 */
1466 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1467 int *pnum)
1468 {
1469 int64_t n;
1470 if (!bs->drv->bdrv_is_allocated) {
1471 if (sector_num >= bs->total_sectors) {
1472 *pnum = 0;
1473 return 0;
1474 }
1475 n = bs->total_sectors - sector_num;
1476 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1477 return 1;
1478 }
1479 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1480 }
1481
1482 void bdrv_mon_event(const BlockDriverState *bdrv,
1483 BlockMonEventAction action, int is_read)
1484 {
1485 QObject *data;
1486 const char *action_str;
1487
1488 switch (action) {
1489 case BDRV_ACTION_REPORT:
1490 action_str = "report";
1491 break;
1492 case BDRV_ACTION_IGNORE:
1493 action_str = "ignore";
1494 break;
1495 case BDRV_ACTION_STOP:
1496 action_str = "stop";
1497 break;
1498 default:
1499 abort();
1500 }
1501
1502 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1503 bdrv->device_name,
1504 action_str,
1505 is_read ? "read" : "write");
1506 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1507
1508 qobject_decref(data);
1509 }
1510
1511 static void bdrv_print_dict(QObject *obj, void *opaque)
1512 {
1513 QDict *bs_dict;
1514 Monitor *mon = opaque;
1515
1516 bs_dict = qobject_to_qdict(obj);
1517
1518 monitor_printf(mon, "%s: type=%s removable=%d",
1519 qdict_get_str(bs_dict, "device"),
1520 qdict_get_str(bs_dict, "type"),
1521 qdict_get_bool(bs_dict, "removable"));
1522
1523 if (qdict_get_bool(bs_dict, "removable")) {
1524 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1525 }
1526
1527 if (qdict_haskey(bs_dict, "inserted")) {
1528 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1529
1530 monitor_printf(mon, " file=");
1531 monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1532 if (qdict_haskey(qdict, "backing_file")) {
1533 monitor_printf(mon, " backing_file=");
1534 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1535 }
1536 monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1537 qdict_get_bool(qdict, "ro"),
1538 qdict_get_str(qdict, "drv"),
1539 qdict_get_bool(qdict, "encrypted"));
1540 } else {
1541 monitor_printf(mon, " [not inserted]");
1542 }
1543
1544 monitor_printf(mon, "\n");
1545 }
1546
1547 void bdrv_info_print(Monitor *mon, const QObject *data)
1548 {
1549 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1550 }
1551
1552 void bdrv_info(Monitor *mon, QObject **ret_data)
1553 {
1554 QList *bs_list;
1555 BlockDriverState *bs;
1556
1557 bs_list = qlist_new();
1558
1559 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1560 QObject *bs_obj;
1561 const char *type = "unknown";
1562
1563 switch(bs->type) {
1564 case BDRV_TYPE_HD:
1565 type = "hd";
1566 break;
1567 case BDRV_TYPE_CDROM:
1568 type = "cdrom";
1569 break;
1570 case BDRV_TYPE_FLOPPY:
1571 type = "floppy";
1572 break;
1573 }
1574
1575 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': %s, "
1576 "'removable': %i, 'locked': %i }",
1577 bs->device_name, type, bs->removable,
1578 bs->locked);
1579
1580 if (bs->drv) {
1581 QObject *obj;
1582 QDict *bs_dict = qobject_to_qdict(bs_obj);
1583
1584 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1585 "'encrypted': %i }",
1586 bs->filename, bs->read_only,
1587 bs->drv->format_name,
1588 bdrv_is_encrypted(bs));
1589 if (bs->backing_file[0] != '\0') {
1590 QDict *qdict = qobject_to_qdict(obj);
1591 qdict_put(qdict, "backing_file",
1592 qstring_from_str(bs->backing_file));
1593 }
1594
1595 qdict_put_obj(bs_dict, "inserted", obj);
1596 }
1597 qlist_append_obj(bs_list, bs_obj);
1598 }
1599
1600 *ret_data = QOBJECT(bs_list);
1601 }
1602
1603 static void bdrv_stats_iter(QObject *data, void *opaque)
1604 {
1605 QDict *qdict;
1606 Monitor *mon = opaque;
1607
1608 qdict = qobject_to_qdict(data);
1609 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1610
1611 qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1612 monitor_printf(mon, " rd_bytes=%" PRId64
1613 " wr_bytes=%" PRId64
1614 " rd_operations=%" PRId64
1615 " wr_operations=%" PRId64
1616 "\n",
1617 qdict_get_int(qdict, "rd_bytes"),
1618 qdict_get_int(qdict, "wr_bytes"),
1619 qdict_get_int(qdict, "rd_operations"),
1620 qdict_get_int(qdict, "wr_operations"));
1621 }
1622
1623 void bdrv_stats_print(Monitor *mon, const QObject *data)
1624 {
1625 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1626 }
1627
1628 static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1629 {
1630 QObject *res;
1631 QDict *dict;
1632
1633 res = qobject_from_jsonf("{ 'stats': {"
1634 "'rd_bytes': %" PRId64 ","
1635 "'wr_bytes': %" PRId64 ","
1636 "'rd_operations': %" PRId64 ","
1637 "'wr_operations': %" PRId64 ","
1638 "'wr_highest_offset': %" PRId64
1639 "} }",
1640 bs->rd_bytes, bs->wr_bytes,
1641 bs->rd_ops, bs->wr_ops,
1642 bs->wr_highest_sector *
1643 (uint64_t)BDRV_SECTOR_SIZE);
1644 dict = qobject_to_qdict(res);
1645
1646 if (*bs->device_name) {
1647 qdict_put(dict, "device", qstring_from_str(bs->device_name));
1648 }
1649
1650 if (bs->file) {
1651 QObject *parent = bdrv_info_stats_bs(bs->file);
1652 qdict_put_obj(dict, "parent", parent);
1653 }
1654
1655 return res;
1656 }
1657
1658 void bdrv_info_stats(Monitor *mon, QObject **ret_data)
1659 {
1660 QObject *obj;
1661 QList *devices;
1662 BlockDriverState *bs;
1663
1664 devices = qlist_new();
1665
1666 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1667 obj = bdrv_info_stats_bs(bs);
1668 qlist_append_obj(devices, obj);
1669 }
1670
1671 *ret_data = QOBJECT(devices);
1672 }
1673
1674 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1675 {
1676 if (bs->backing_hd && bs->backing_hd->encrypted)
1677 return bs->backing_file;
1678 else if (bs->encrypted)
1679 return bs->filename;
1680 else
1681 return NULL;
1682 }
1683
1684 void bdrv_get_backing_filename(BlockDriverState *bs,
1685 char *filename, int filename_size)
1686 {
1687 if (!bs->backing_file) {
1688 pstrcpy(filename, filename_size, "");
1689 } else {
1690 pstrcpy(filename, filename_size, bs->backing_file);
1691 }
1692 }
1693
1694 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1695 const uint8_t *buf, int nb_sectors)
1696 {
1697 BlockDriver *drv = bs->drv;
1698 if (!drv)
1699 return -ENOMEDIUM;
1700 if (!drv->bdrv_write_compressed)
1701 return -ENOTSUP;
1702 if (bdrv_check_request(bs, sector_num, nb_sectors))
1703 return -EIO;
1704
1705 if (bs->dirty_bitmap) {
1706 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1707 }
1708
1709 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1710 }
1711
1712 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1713 {
1714 BlockDriver *drv = bs->drv;
1715 if (!drv)
1716 return -ENOMEDIUM;
1717 if (!drv->bdrv_get_info)
1718 return -ENOTSUP;
1719 memset(bdi, 0, sizeof(*bdi));
1720 return drv->bdrv_get_info(bs, bdi);
1721 }
1722
1723 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1724 int64_t pos, int size)
1725 {
1726 BlockDriver *drv = bs->drv;
1727 if (!drv)
1728 return -ENOMEDIUM;
1729 if (drv->bdrv_save_vmstate)
1730 return drv->bdrv_save_vmstate(bs, buf, pos, size);
1731 if (bs->file)
1732 return bdrv_save_vmstate(bs->file, buf, pos, size);
1733 return -ENOTSUP;
1734 }
1735
1736 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
1737 int64_t pos, int size)
1738 {
1739 BlockDriver *drv = bs->drv;
1740 if (!drv)
1741 return -ENOMEDIUM;
1742 if (drv->bdrv_load_vmstate)
1743 return drv->bdrv_load_vmstate(bs, buf, pos, size);
1744 if (bs->file)
1745 return bdrv_load_vmstate(bs->file, buf, pos, size);
1746 return -ENOTSUP;
1747 }
1748
1749 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
1750 {
1751 BlockDriver *drv = bs->drv;
1752
1753 if (!drv || !drv->bdrv_debug_event) {
1754 return;
1755 }
1756
1757 return drv->bdrv_debug_event(bs, event);
1758
1759 }
1760
1761 /**************************************************************/
1762 /* handling of snapshots */
1763
1764 int bdrv_can_snapshot(BlockDriverState *bs)
1765 {
1766 BlockDriver *drv = bs->drv;
1767 if (!drv || bdrv_is_removable(bs) || bdrv_is_read_only(bs)) {
1768 return 0;
1769 }
1770
1771 if (!drv->bdrv_snapshot_create) {
1772 if (bs->file != NULL) {
1773 return bdrv_can_snapshot(bs->file);
1774 }
1775 return 0;
1776 }
1777
1778 return 1;
1779 }
1780
1781 int bdrv_snapshot_create(BlockDriverState *bs,
1782 QEMUSnapshotInfo *sn_info)
1783 {
1784 BlockDriver *drv = bs->drv;
1785 if (!drv)
1786 return -ENOMEDIUM;
1787 if (drv->bdrv_snapshot_create)
1788 return drv->bdrv_snapshot_create(bs, sn_info);
1789 if (bs->file)
1790 return bdrv_snapshot_create(bs->file, sn_info);
1791 return -ENOTSUP;
1792 }
1793
1794 int bdrv_snapshot_goto(BlockDriverState *bs,
1795 const char *snapshot_id)
1796 {
1797 BlockDriver *drv = bs->drv;
1798 int ret, open_ret;
1799
1800 if (!drv)
1801 return -ENOMEDIUM;
1802 if (drv->bdrv_snapshot_goto)
1803 return drv->bdrv_snapshot_goto(bs, snapshot_id);
1804
1805 if (bs->file) {
1806 drv->bdrv_close(bs);
1807 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
1808 open_ret = drv->bdrv_open(bs, bs->open_flags);
1809 if (open_ret < 0) {
1810 bdrv_delete(bs->file);
1811 bs->drv = NULL;
1812 return open_ret;
1813 }
1814 return ret;
1815 }
1816
1817 return -ENOTSUP;
1818 }
1819
1820 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1821 {
1822 BlockDriver *drv = bs->drv;
1823 if (!drv)
1824 return -ENOMEDIUM;
1825 if (drv->bdrv_snapshot_delete)
1826 return drv->bdrv_snapshot_delete(bs, snapshot_id);
1827 if (bs->file)
1828 return bdrv_snapshot_delete(bs->file, snapshot_id);
1829 return -ENOTSUP;
1830 }
1831
1832 int bdrv_snapshot_list(BlockDriverState *bs,
1833 QEMUSnapshotInfo **psn_info)
1834 {
1835 BlockDriver *drv = bs->drv;
1836 if (!drv)
1837 return -ENOMEDIUM;
1838 if (drv->bdrv_snapshot_list)
1839 return drv->bdrv_snapshot_list(bs, psn_info);
1840 if (bs->file)
1841 return bdrv_snapshot_list(bs->file, psn_info);
1842 return -ENOTSUP;
1843 }
1844
1845 #define NB_SUFFIXES 4
1846
1847 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1848 {
1849 static const char suffixes[NB_SUFFIXES] = "KMGT";
1850 int64_t base;
1851 int i;
1852
1853 if (size <= 999) {
1854 snprintf(buf, buf_size, "%" PRId64, size);
1855 } else {
1856 base = 1024;
1857 for(i = 0; i < NB_SUFFIXES; i++) {
1858 if (size < (10 * base)) {
1859 snprintf(buf, buf_size, "%0.1f%c",
1860 (double)size / base,
1861 suffixes[i]);
1862 break;
1863 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1864 snprintf(buf, buf_size, "%" PRId64 "%c",
1865 ((size + (base >> 1)) / base),
1866 suffixes[i]);
1867 break;
1868 }
1869 base = base * 1024;
1870 }
1871 }
1872 return buf;
1873 }
1874
1875 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1876 {
1877 char buf1[128], date_buf[128], clock_buf[128];
1878 #ifdef _WIN32
1879 struct tm *ptm;
1880 #else
1881 struct tm tm;
1882 #endif
1883 time_t ti;
1884 int64_t secs;
1885
1886 if (!sn) {
1887 snprintf(buf, buf_size,
1888 "%-10s%-20s%7s%20s%15s",
1889 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1890 } else {
1891 ti = sn->date_sec;
1892 #ifdef _WIN32
1893 ptm = localtime(&ti);
1894 strftime(date_buf, sizeof(date_buf),
1895 "%Y-%m-%d %H:%M:%S", ptm);
1896 #else
1897 localtime_r(&ti, &tm);
1898 strftime(date_buf, sizeof(date_buf),
1899 "%Y-%m-%d %H:%M:%S", &tm);
1900 #endif
1901 secs = sn->vm_clock_nsec / 1000000000;
1902 snprintf(clock_buf, sizeof(clock_buf),
1903 "%02d:%02d:%02d.%03d",
1904 (int)(secs / 3600),
1905 (int)((secs / 60) % 60),
1906 (int)(secs % 60),
1907 (int)((sn->vm_clock_nsec / 1000000) % 1000));
1908 snprintf(buf, buf_size,
1909 "%-10s%-20s%7s%20s%15s",
1910 sn->id_str, sn->name,
1911 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1912 date_buf,
1913 clock_buf);
1914 }
1915 return buf;
1916 }
1917
1918
1919 /**************************************************************/
1920 /* async I/Os */
1921
1922 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1923 QEMUIOVector *qiov, int nb_sectors,
1924 BlockDriverCompletionFunc *cb, void *opaque)
1925 {
1926 BlockDriver *drv = bs->drv;
1927 BlockDriverAIOCB *ret;
1928
1929 if (!drv)
1930 return NULL;
1931 if (bdrv_check_request(bs, sector_num, nb_sectors))
1932 return NULL;
1933
1934 ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
1935 cb, opaque);
1936
1937 if (ret) {
1938 /* Update stats even though technically transfer has not happened. */
1939 bs->rd_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
1940 bs->rd_ops ++;
1941 }
1942
1943 return ret;
1944 }
1945
1946 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1947 QEMUIOVector *qiov, int nb_sectors,
1948 BlockDriverCompletionFunc *cb, void *opaque)
1949 {
1950 BlockDriver *drv = bs->drv;
1951 BlockDriverAIOCB *ret;
1952
1953 if (!drv)
1954 return NULL;
1955 if (bs->read_only)
1956 return NULL;
1957 if (bdrv_check_request(bs, sector_num, nb_sectors))
1958 return NULL;
1959
1960 if (bs->dirty_bitmap) {
1961 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1962 }
1963
1964 ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
1965 cb, opaque);
1966
1967 if (ret) {
1968 /* Update stats even though technically transfer has not happened. */
1969 bs->wr_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
1970 bs->wr_ops ++;
1971 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1972 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1973 }
1974 }
1975
1976 return ret;
1977 }
1978
1979
1980 typedef struct MultiwriteCB {
1981 int error;
1982 int num_requests;
1983 int num_callbacks;
1984 struct {
1985 BlockDriverCompletionFunc *cb;
1986 void *opaque;
1987 QEMUIOVector *free_qiov;
1988 void *free_buf;
1989 } callbacks[];
1990 } MultiwriteCB;
1991
1992 static void multiwrite_user_cb(MultiwriteCB *mcb)
1993 {
1994 int i;
1995
1996 for (i = 0; i < mcb->num_callbacks; i++) {
1997 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1998 if (mcb->callbacks[i].free_qiov) {
1999 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2000 }
2001 qemu_free(mcb->callbacks[i].free_qiov);
2002 qemu_vfree(mcb->callbacks[i].free_buf);
2003 }
2004 }
2005
2006 static void multiwrite_cb(void *opaque, int ret)
2007 {
2008 MultiwriteCB *mcb = opaque;
2009
2010 if (ret < 0 && !mcb->error) {
2011 mcb->error = ret;
2012 multiwrite_user_cb(mcb);
2013 }
2014
2015 mcb->num_requests--;
2016 if (mcb->num_requests == 0) {
2017 if (mcb->error == 0) {
2018 multiwrite_user_cb(mcb);
2019 }
2020 qemu_free(mcb);
2021 }
2022 }
2023
2024 static int multiwrite_req_compare(const void *a, const void *b)
2025 {
2026 const BlockRequest *req1 = a, *req2 = b;
2027
2028 /*
2029 * Note that we can't simply subtract req2->sector from req1->sector
2030 * here as that could overflow the return value.
2031 */
2032 if (req1->sector > req2->sector) {
2033 return 1;
2034 } else if (req1->sector < req2->sector) {
2035 return -1;
2036 } else {
2037 return 0;
2038 }
2039 }
2040
2041 /*
2042 * Takes a bunch of requests and tries to merge them. Returns the number of
2043 * requests that remain after merging.
2044 */
2045 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2046 int num_reqs, MultiwriteCB *mcb)
2047 {
2048 int i, outidx;
2049
2050 // Sort requests by start sector
2051 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2052
2053 // Check if adjacent requests touch the same clusters. If so, combine them,
2054 // filling up gaps with zero sectors.
2055 outidx = 0;
2056 for (i = 1; i < num_reqs; i++) {
2057 int merge = 0;
2058 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2059
2060 // This handles the cases that are valid for all block drivers, namely
2061 // exactly sequential writes and overlapping writes.
2062 if (reqs[i].sector <= oldreq_last) {
2063 merge = 1;
2064 }
2065
2066 // The block driver may decide that it makes sense to combine requests
2067 // even if there is a gap of some sectors between them. In this case,
2068 // the gap is filled with zeros (therefore only applicable for yet
2069 // unused space in format like qcow2).
2070 if (!merge && bs->drv->bdrv_merge_requests) {
2071 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2072 }
2073
2074 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2075 merge = 0;
2076 }
2077
2078 if (merge) {
2079 size_t size;
2080 QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov));
2081 qemu_iovec_init(qiov,
2082 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2083
2084 // Add the first request to the merged one. If the requests are
2085 // overlapping, drop the last sectors of the first request.
2086 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2087 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2088
2089 // We might need to add some zeros between the two requests
2090 if (reqs[i].sector > oldreq_last) {
2091 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2092 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2093 memset(buf, 0, zero_bytes);
2094 qemu_iovec_add(qiov, buf, zero_bytes);
2095 mcb->callbacks[i].free_buf = buf;
2096 }
2097
2098 // Add the second request
2099 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2100
2101 reqs[outidx].nb_sectors = qiov->size >> 9;
2102 reqs[outidx].qiov = qiov;
2103
2104 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2105 } else {
2106 outidx++;
2107 reqs[outidx].sector = reqs[i].sector;
2108 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2109 reqs[outidx].qiov = reqs[i].qiov;
2110 }
2111 }
2112
2113 return outidx + 1;
2114 }
2115
2116 /*
2117 * Submit multiple AIO write requests at once.
2118 *
2119 * On success, the function returns 0 and all requests in the reqs array have
2120 * been submitted. In error case this function returns -1, and any of the
2121 * requests may or may not be submitted yet. In particular, this means that the
2122 * callback will be called for some of the requests, for others it won't. The
2123 * caller must check the error field of the BlockRequest to wait for the right
2124 * callbacks (if error != 0, no callback will be called).
2125 *
2126 * The implementation may modify the contents of the reqs array, e.g. to merge
2127 * requests. However, the fields opaque and error are left unmodified as they
2128 * are used to signal failure for a single request to the caller.
2129 */
2130 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2131 {
2132 BlockDriverAIOCB *acb;
2133 MultiwriteCB *mcb;
2134 int i;
2135
2136 if (num_reqs == 0) {
2137 return 0;
2138 }
2139
2140 // Create MultiwriteCB structure
2141 mcb = qemu_mallocz(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2142 mcb->num_requests = 0;
2143 mcb->num_callbacks = num_reqs;
2144
2145 for (i = 0; i < num_reqs; i++) {
2146 mcb->callbacks[i].cb = reqs[i].cb;
2147 mcb->callbacks[i].opaque = reqs[i].opaque;
2148 }
2149
2150 // Check for mergable requests
2151 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2152
2153 // Run the aio requests
2154 for (i = 0; i < num_reqs; i++) {
2155 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2156 reqs[i].nb_sectors, multiwrite_cb, mcb);
2157
2158 if (acb == NULL) {
2159 // We can only fail the whole thing if no request has been
2160 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2161 // complete and report the error in the callback.
2162 if (mcb->num_requests == 0) {
2163 reqs[i].error = -EIO;
2164 goto fail;
2165 } else {
2166 mcb->num_requests++;
2167 multiwrite_cb(mcb, -EIO);
2168 break;
2169 }
2170 } else {
2171 mcb->num_requests++;
2172 }
2173 }
2174
2175 return 0;
2176
2177 fail:
2178 qemu_free(mcb);
2179 return -1;
2180 }
2181
2182 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2183 BlockDriverCompletionFunc *cb, void *opaque)
2184 {
2185 BlockDriver *drv = bs->drv;
2186
2187 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2188 return bdrv_aio_noop_em(bs, cb, opaque);
2189 }
2190
2191 if (!drv)
2192 return NULL;
2193 return drv->bdrv_aio_flush(bs, cb, opaque);
2194 }
2195
2196 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2197 {
2198 acb->pool->cancel(acb);
2199 }
2200
2201
2202 /**************************************************************/
2203 /* async block device emulation */
2204
2205 typedef struct BlockDriverAIOCBSync {
2206 BlockDriverAIOCB common;
2207 QEMUBH *bh;
2208 int ret;
2209 /* vector translation state */
2210 QEMUIOVector *qiov;
2211 uint8_t *bounce;
2212 int is_write;
2213 } BlockDriverAIOCBSync;
2214
2215 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2216 {
2217 BlockDriverAIOCBSync *acb =
2218 container_of(blockacb, BlockDriverAIOCBSync, common);
2219 qemu_bh_delete(acb->bh);
2220 acb->bh = NULL;
2221 qemu_aio_release(acb);
2222 }
2223
2224 static AIOPool bdrv_em_aio_pool = {
2225 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2226 .cancel = bdrv_aio_cancel_em,
2227 };
2228
2229 static void bdrv_aio_bh_cb(void *opaque)
2230 {
2231 BlockDriverAIOCBSync *acb = opaque;
2232
2233 if (!acb->is_write)
2234 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2235 qemu_vfree(acb->bounce);
2236 acb->common.cb(acb->common.opaque, acb->ret);
2237 qemu_bh_delete(acb->bh);
2238 acb->bh = NULL;
2239 qemu_aio_release(acb);
2240 }
2241
2242 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2243 int64_t sector_num,
2244 QEMUIOVector *qiov,
2245 int nb_sectors,
2246 BlockDriverCompletionFunc *cb,
2247 void *opaque,
2248 int is_write)
2249
2250 {
2251 BlockDriverAIOCBSync *acb;
2252
2253 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2254 acb->is_write = is_write;
2255 acb->qiov = qiov;
2256 acb->bounce = qemu_blockalign(bs, qiov->size);
2257
2258 if (!acb->bh)
2259 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2260
2261 if (is_write) {
2262 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2263 acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2264 } else {
2265 acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2266 }
2267
2268 qemu_bh_schedule(acb->bh);
2269
2270 return &acb->common;
2271 }
2272
2273 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2274 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2275 BlockDriverCompletionFunc *cb, void *opaque)
2276 {
2277 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2278 }
2279
2280 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2281 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2282 BlockDriverCompletionFunc *cb, void *opaque)
2283 {
2284 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2285 }
2286
2287 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2288 BlockDriverCompletionFunc *cb, void *opaque)
2289 {
2290 BlockDriverAIOCBSync *acb;
2291
2292 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2293 acb->is_write = 1; /* don't bounce in the completion hadler */
2294 acb->qiov = NULL;
2295 acb->bounce = NULL;
2296 acb->ret = 0;
2297
2298 if (!acb->bh)
2299 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2300
2301 bdrv_flush(bs);
2302 qemu_bh_schedule(acb->bh);
2303 return &acb->common;
2304 }
2305
2306 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2307 BlockDriverCompletionFunc *cb, void *opaque)
2308 {
2309 BlockDriverAIOCBSync *acb;
2310
2311 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2312 acb->is_write = 1; /* don't bounce in the completion handler */
2313 acb->qiov = NULL;
2314 acb->bounce = NULL;
2315 acb->ret = 0;
2316
2317 if (!acb->bh) {
2318 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2319 }
2320
2321 qemu_bh_schedule(acb->bh);
2322 return &acb->common;
2323 }
2324
2325 /**************************************************************/
2326 /* sync block device emulation */
2327
2328 static void bdrv_rw_em_cb(void *opaque, int ret)
2329 {
2330 *(int *)opaque = ret;
2331 }
2332
2333 #define NOT_DONE 0x7fffffff
2334
2335 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
2336 uint8_t *buf, int nb_sectors)
2337 {
2338 int async_ret;
2339 BlockDriverAIOCB *acb;
2340 struct iovec iov;
2341 QEMUIOVector qiov;
2342
2343 async_context_push();
2344
2345 async_ret = NOT_DONE;
2346 iov.iov_base = (void *)buf;
2347 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2348 qemu_iovec_init_external(&qiov, &iov, 1);
2349 acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2350 bdrv_rw_em_cb, &async_ret);
2351 if (acb == NULL) {
2352 async_ret = -1;
2353 goto fail;
2354 }
2355
2356 while (async_ret == NOT_DONE) {
2357 qemu_aio_wait();
2358 }
2359
2360
2361 fail:
2362 async_context_pop();
2363 return async_ret;
2364 }
2365
2366 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2367 const uint8_t *buf, int nb_sectors)
2368 {
2369 int async_ret;
2370 BlockDriverAIOCB *acb;
2371 struct iovec iov;
2372 QEMUIOVector qiov;
2373
2374 async_context_push();
2375
2376 async_ret = NOT_DONE;
2377 iov.iov_base = (void *)buf;
2378 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2379 qemu_iovec_init_external(&qiov, &iov, 1);
2380 acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2381 bdrv_rw_em_cb, &async_ret);
2382 if (acb == NULL) {
2383 async_ret = -1;
2384 goto fail;
2385 }
2386 while (async_ret == NOT_DONE) {
2387 qemu_aio_wait();
2388 }
2389
2390 fail:
2391 async_context_pop();
2392 return async_ret;
2393 }
2394
2395 void bdrv_init(void)
2396 {
2397 module_call_init(MODULE_INIT_BLOCK);
2398 }
2399
2400 void bdrv_init_with_whitelist(void)
2401 {
2402 use_bdrv_whitelist = 1;
2403 bdrv_init();
2404 }
2405
2406 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2407 BlockDriverCompletionFunc *cb, void *opaque)
2408 {
2409 BlockDriverAIOCB *acb;
2410
2411 if (pool->free_aiocb) {
2412 acb = pool->free_aiocb;
2413 pool->free_aiocb = acb->next;
2414 } else {
2415 acb = qemu_mallocz(pool->aiocb_size);
2416 acb->pool = pool;
2417 }
2418 acb->bs = bs;
2419 acb->cb = cb;
2420 acb->opaque = opaque;
2421 return acb;
2422 }
2423
2424 void qemu_aio_release(void *p)
2425 {
2426 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2427 AIOPool *pool = acb->pool;
2428 acb->next = pool->free_aiocb;
2429 pool->free_aiocb = acb;
2430 }
2431
2432 /**************************************************************/
2433 /* removable device support */
2434
2435 /**
2436 * Return TRUE if the media is present
2437 */
2438 int bdrv_is_inserted(BlockDriverState *bs)
2439 {
2440 BlockDriver *drv = bs->drv;
2441 int ret;
2442 if (!drv)
2443 return 0;
2444 if (!drv->bdrv_is_inserted)
2445 return 1;
2446 ret = drv->bdrv_is_inserted(bs);
2447 return ret;
2448 }
2449
2450 /**
2451 * Return TRUE if the media changed since the last call to this
2452 * function. It is currently only used for floppy disks
2453 */
2454 int bdrv_media_changed(BlockDriverState *bs)
2455 {
2456 BlockDriver *drv = bs->drv;
2457 int ret;
2458
2459 if (!drv || !drv->bdrv_media_changed)
2460 ret = -ENOTSUP;
2461 else
2462 ret = drv->bdrv_media_changed(bs);
2463 if (ret == -ENOTSUP)
2464 ret = bs->media_changed;
2465 bs->media_changed = 0;
2466 return ret;
2467 }
2468
2469 /**
2470 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2471 */
2472 int bdrv_eject(BlockDriverState *bs, int eject_flag)
2473 {
2474 BlockDriver *drv = bs->drv;
2475 int ret;
2476
2477 if (bs->locked) {
2478 return -EBUSY;
2479 }
2480
2481 if (!drv || !drv->bdrv_eject) {
2482 ret = -ENOTSUP;
2483 } else {
2484 ret = drv->bdrv_eject(bs, eject_flag);
2485 }
2486 if (ret == -ENOTSUP) {
2487 if (eject_flag)
2488 bdrv_close(bs);
2489 ret = 0;
2490 }
2491
2492 return ret;
2493 }
2494
2495 int bdrv_is_locked(BlockDriverState *bs)
2496 {
2497 return bs->locked;
2498 }
2499
2500 /**
2501 * Lock or unlock the media (if it is locked, the user won't be able
2502 * to eject it manually).
2503 */
2504 void bdrv_set_locked(BlockDriverState *bs, int locked)
2505 {
2506 BlockDriver *drv = bs->drv;
2507
2508 bs->locked = locked;
2509 if (drv && drv->bdrv_set_locked) {
2510 drv->bdrv_set_locked(bs, locked);
2511 }
2512 }
2513
2514 /* needed for generic scsi interface */
2515
2516 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
2517 {
2518 BlockDriver *drv = bs->drv;
2519
2520 if (drv && drv->bdrv_ioctl)
2521 return drv->bdrv_ioctl(bs, req, buf);
2522 return -ENOTSUP;
2523 }
2524
2525 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
2526 unsigned long int req, void *buf,
2527 BlockDriverCompletionFunc *cb, void *opaque)
2528 {
2529 BlockDriver *drv = bs->drv;
2530
2531 if (drv && drv->bdrv_aio_ioctl)
2532 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
2533 return NULL;
2534 }
2535
2536
2537
2538 void *qemu_blockalign(BlockDriverState *bs, size_t size)
2539 {
2540 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
2541 }
2542
2543 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
2544 {
2545 int64_t bitmap_size;
2546
2547 bs->dirty_count = 0;
2548 if (enable) {
2549 if (!bs->dirty_bitmap) {
2550 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
2551 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
2552 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
2553
2554 bs->dirty_bitmap = qemu_mallocz(bitmap_size);
2555 }
2556 } else {
2557 if (bs->dirty_bitmap) {
2558 qemu_free(bs->dirty_bitmap);
2559 bs->dirty_bitmap = NULL;
2560 }
2561 }
2562 }
2563
2564 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
2565 {
2566 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
2567
2568 if (bs->dirty_bitmap &&
2569 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
2570 return bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
2571 (1 << (chunk % (sizeof(unsigned long) * 8)));
2572 } else {
2573 return 0;
2574 }
2575 }
2576
2577 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
2578 int nr_sectors)
2579 {
2580 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
2581 }
2582
2583 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
2584 {
2585 return bs->dirty_count;
2586 }