]> git.proxmox.com Git - mirror_qemu.git/blob - block.c
block: fix streaming/closing race
[mirror_qemu.git] / block.c
1 /*
2 * QEMU System Emulator block driver
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "monitor.h"
28 #include "block_int.h"
29 #include "module.h"
30 #include "qjson.h"
31 #include "qemu-coroutine.h"
32 #include "qmp-commands.h"
33 #include "qemu-timer.h"
34
35 #ifdef CONFIG_BSD
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <sys/ioctl.h>
39 #include <sys/queue.h>
40 #ifndef __DragonFly__
41 #include <sys/disk.h>
42 #endif
43 #endif
44
45 #ifdef _WIN32
46 #include <windows.h>
47 #endif
48
49 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
51 typedef enum {
52 BDRV_REQ_COPY_ON_READ = 0x1,
53 BDRV_REQ_ZERO_WRITE = 0x2,
54 } BdrvRequestFlags;
55
56 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
57 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
58 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
59 BlockDriverCompletionFunc *cb, void *opaque);
60 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
61 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
62 BlockDriverCompletionFunc *cb, void *opaque);
63 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors,
65 QEMUIOVector *iov);
66 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors,
68 QEMUIOVector *iov);
69 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
70 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
71 BdrvRequestFlags flags);
72 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
73 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
74 BdrvRequestFlags flags);
75 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
76 int64_t sector_num,
77 QEMUIOVector *qiov,
78 int nb_sectors,
79 BlockDriverCompletionFunc *cb,
80 void *opaque,
81 bool is_write);
82 static void coroutine_fn bdrv_co_do_rw(void *opaque);
83
84 static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
85 bool is_write, double elapsed_time, uint64_t *wait);
86 static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
87 double elapsed_time, uint64_t *wait);
88 static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
89 bool is_write, int64_t *wait);
90
91 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
92 QTAILQ_HEAD_INITIALIZER(bdrv_states);
93
94 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
95 QLIST_HEAD_INITIALIZER(bdrv_drivers);
96
97 /* The device to use for VM snapshots */
98 static BlockDriverState *bs_snapshots;
99
100 /* If non-zero, use only whitelisted block drivers */
101 static int use_bdrv_whitelist;
102
103 #ifdef _WIN32
104 static int is_windows_drive_prefix(const char *filename)
105 {
106 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
107 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
108 filename[1] == ':');
109 }
110
111 int is_windows_drive(const char *filename)
112 {
113 if (is_windows_drive_prefix(filename) &&
114 filename[2] == '\0')
115 return 1;
116 if (strstart(filename, "\\\\.\\", NULL) ||
117 strstart(filename, "//./", NULL))
118 return 1;
119 return 0;
120 }
121 #endif
122
123 /* throttling disk I/O limits */
124 void bdrv_io_limits_disable(BlockDriverState *bs)
125 {
126 bs->io_limits_enabled = false;
127
128 while (qemu_co_queue_next(&bs->throttled_reqs));
129
130 if (bs->block_timer) {
131 qemu_del_timer(bs->block_timer);
132 qemu_free_timer(bs->block_timer);
133 bs->block_timer = NULL;
134 }
135
136 bs->slice_start = 0;
137 bs->slice_end = 0;
138 bs->slice_time = 0;
139 memset(&bs->io_base, 0, sizeof(bs->io_base));
140 }
141
142 static void bdrv_block_timer(void *opaque)
143 {
144 BlockDriverState *bs = opaque;
145
146 qemu_co_queue_next(&bs->throttled_reqs);
147 }
148
149 void bdrv_io_limits_enable(BlockDriverState *bs)
150 {
151 qemu_co_queue_init(&bs->throttled_reqs);
152 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
153 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
154 bs->slice_start = qemu_get_clock_ns(vm_clock);
155 bs->slice_end = bs->slice_start + bs->slice_time;
156 memset(&bs->io_base, 0, sizeof(bs->io_base));
157 bs->io_limits_enabled = true;
158 }
159
160 bool bdrv_io_limits_enabled(BlockDriverState *bs)
161 {
162 BlockIOLimit *io_limits = &bs->io_limits;
163 return io_limits->bps[BLOCK_IO_LIMIT_READ]
164 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
165 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
166 || io_limits->iops[BLOCK_IO_LIMIT_READ]
167 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
168 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
169 }
170
171 static void bdrv_io_limits_intercept(BlockDriverState *bs,
172 bool is_write, int nb_sectors)
173 {
174 int64_t wait_time = -1;
175
176 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
177 qemu_co_queue_wait(&bs->throttled_reqs);
178 }
179
180 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
181 * throttled requests will not be dequeued until the current request is
182 * allowed to be serviced. So if the current request still exceeds the
183 * limits, it will be inserted to the head. All requests followed it will
184 * be still in throttled_reqs queue.
185 */
186
187 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
188 qemu_mod_timer(bs->block_timer,
189 wait_time + qemu_get_clock_ns(vm_clock));
190 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
191 }
192
193 qemu_co_queue_next(&bs->throttled_reqs);
194 }
195
196 /* check if the path starts with "<protocol>:" */
197 static int path_has_protocol(const char *path)
198 {
199 #ifdef _WIN32
200 if (is_windows_drive(path) ||
201 is_windows_drive_prefix(path)) {
202 return 0;
203 }
204 #endif
205
206 return strchr(path, ':') != NULL;
207 }
208
209 int path_is_absolute(const char *path)
210 {
211 const char *p;
212 #ifdef _WIN32
213 /* specific case for names like: "\\.\d:" */
214 if (*path == '/' || *path == '\\')
215 return 1;
216 #endif
217 p = strchr(path, ':');
218 if (p)
219 p++;
220 else
221 p = path;
222 #ifdef _WIN32
223 return (*p == '/' || *p == '\\');
224 #else
225 return (*p == '/');
226 #endif
227 }
228
229 /* if filename is absolute, just copy it to dest. Otherwise, build a
230 path to it by considering it is relative to base_path. URL are
231 supported. */
232 void path_combine(char *dest, int dest_size,
233 const char *base_path,
234 const char *filename)
235 {
236 const char *p, *p1;
237 int len;
238
239 if (dest_size <= 0)
240 return;
241 if (path_is_absolute(filename)) {
242 pstrcpy(dest, dest_size, filename);
243 } else {
244 p = strchr(base_path, ':');
245 if (p)
246 p++;
247 else
248 p = base_path;
249 p1 = strrchr(base_path, '/');
250 #ifdef _WIN32
251 {
252 const char *p2;
253 p2 = strrchr(base_path, '\\');
254 if (!p1 || p2 > p1)
255 p1 = p2;
256 }
257 #endif
258 if (p1)
259 p1++;
260 else
261 p1 = base_path;
262 if (p1 > p)
263 p = p1;
264 len = p - base_path;
265 if (len > dest_size - 1)
266 len = dest_size - 1;
267 memcpy(dest, base_path, len);
268 dest[len] = '\0';
269 pstrcat(dest, dest_size, filename);
270 }
271 }
272
273 void bdrv_register(BlockDriver *bdrv)
274 {
275 /* Block drivers without coroutine functions need emulation */
276 if (!bdrv->bdrv_co_readv) {
277 bdrv->bdrv_co_readv = bdrv_co_readv_em;
278 bdrv->bdrv_co_writev = bdrv_co_writev_em;
279
280 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
281 * the block driver lacks aio we need to emulate that too.
282 */
283 if (!bdrv->bdrv_aio_readv) {
284 /* add AIO emulation layer */
285 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
286 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
287 }
288 }
289
290 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
291 }
292
293 /* create a new block device (by default it is empty) */
294 BlockDriverState *bdrv_new(const char *device_name)
295 {
296 BlockDriverState *bs;
297
298 bs = g_malloc0(sizeof(BlockDriverState));
299 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
300 if (device_name[0] != '\0') {
301 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
302 }
303 bdrv_iostatus_disable(bs);
304 return bs;
305 }
306
307 BlockDriver *bdrv_find_format(const char *format_name)
308 {
309 BlockDriver *drv1;
310 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
311 if (!strcmp(drv1->format_name, format_name)) {
312 return drv1;
313 }
314 }
315 return NULL;
316 }
317
318 static int bdrv_is_whitelisted(BlockDriver *drv)
319 {
320 static const char *whitelist[] = {
321 CONFIG_BDRV_WHITELIST
322 };
323 const char **p;
324
325 if (!whitelist[0])
326 return 1; /* no whitelist, anything goes */
327
328 for (p = whitelist; *p; p++) {
329 if (!strcmp(drv->format_name, *p)) {
330 return 1;
331 }
332 }
333 return 0;
334 }
335
336 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
337 {
338 BlockDriver *drv = bdrv_find_format(format_name);
339 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
340 }
341
342 int bdrv_create(BlockDriver *drv, const char* filename,
343 QEMUOptionParameter *options)
344 {
345 if (!drv->bdrv_create)
346 return -ENOTSUP;
347
348 return drv->bdrv_create(filename, options);
349 }
350
351 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
352 {
353 BlockDriver *drv;
354
355 drv = bdrv_find_protocol(filename);
356 if (drv == NULL) {
357 return -ENOENT;
358 }
359
360 return bdrv_create(drv, filename, options);
361 }
362
363 #ifdef _WIN32
364 void get_tmp_filename(char *filename, int size)
365 {
366 char temp_dir[MAX_PATH];
367
368 GetTempPath(MAX_PATH, temp_dir);
369 GetTempFileName(temp_dir, "qem", 0, filename);
370 }
371 #else
372 void get_tmp_filename(char *filename, int size)
373 {
374 int fd;
375 const char *tmpdir;
376 /* XXX: race condition possible */
377 tmpdir = getenv("TMPDIR");
378 if (!tmpdir)
379 tmpdir = "/tmp";
380 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
381 fd = mkstemp(filename);
382 close(fd);
383 }
384 #endif
385
386 /*
387 * Detect host devices. By convention, /dev/cdrom[N] is always
388 * recognized as a host CDROM.
389 */
390 static BlockDriver *find_hdev_driver(const char *filename)
391 {
392 int score_max = 0, score;
393 BlockDriver *drv = NULL, *d;
394
395 QLIST_FOREACH(d, &bdrv_drivers, list) {
396 if (d->bdrv_probe_device) {
397 score = d->bdrv_probe_device(filename);
398 if (score > score_max) {
399 score_max = score;
400 drv = d;
401 }
402 }
403 }
404
405 return drv;
406 }
407
408 BlockDriver *bdrv_find_protocol(const char *filename)
409 {
410 BlockDriver *drv1;
411 char protocol[128];
412 int len;
413 const char *p;
414
415 /* TODO Drivers without bdrv_file_open must be specified explicitly */
416
417 /*
418 * XXX(hch): we really should not let host device detection
419 * override an explicit protocol specification, but moving this
420 * later breaks access to device names with colons in them.
421 * Thanks to the brain-dead persistent naming schemes on udev-
422 * based Linux systems those actually are quite common.
423 */
424 drv1 = find_hdev_driver(filename);
425 if (drv1) {
426 return drv1;
427 }
428
429 if (!path_has_protocol(filename)) {
430 return bdrv_find_format("file");
431 }
432 p = strchr(filename, ':');
433 assert(p != NULL);
434 len = p - filename;
435 if (len > sizeof(protocol) - 1)
436 len = sizeof(protocol) - 1;
437 memcpy(protocol, filename, len);
438 protocol[len] = '\0';
439 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
440 if (drv1->protocol_name &&
441 !strcmp(drv1->protocol_name, protocol)) {
442 return drv1;
443 }
444 }
445 return NULL;
446 }
447
448 static int find_image_format(const char *filename, BlockDriver **pdrv)
449 {
450 int ret, score, score_max;
451 BlockDriver *drv1, *drv;
452 uint8_t buf[2048];
453 BlockDriverState *bs;
454
455 ret = bdrv_file_open(&bs, filename, 0);
456 if (ret < 0) {
457 *pdrv = NULL;
458 return ret;
459 }
460
461 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
462 if (bs->sg || !bdrv_is_inserted(bs)) {
463 bdrv_delete(bs);
464 drv = bdrv_find_format("raw");
465 if (!drv) {
466 ret = -ENOENT;
467 }
468 *pdrv = drv;
469 return ret;
470 }
471
472 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
473 bdrv_delete(bs);
474 if (ret < 0) {
475 *pdrv = NULL;
476 return ret;
477 }
478
479 score_max = 0;
480 drv = NULL;
481 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
482 if (drv1->bdrv_probe) {
483 score = drv1->bdrv_probe(buf, ret, filename);
484 if (score > score_max) {
485 score_max = score;
486 drv = drv1;
487 }
488 }
489 }
490 if (!drv) {
491 ret = -ENOENT;
492 }
493 *pdrv = drv;
494 return ret;
495 }
496
497 /**
498 * Set the current 'total_sectors' value
499 */
500 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
501 {
502 BlockDriver *drv = bs->drv;
503
504 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
505 if (bs->sg)
506 return 0;
507
508 /* query actual device if possible, otherwise just trust the hint */
509 if (drv->bdrv_getlength) {
510 int64_t length = drv->bdrv_getlength(bs);
511 if (length < 0) {
512 return length;
513 }
514 hint = length >> BDRV_SECTOR_BITS;
515 }
516
517 bs->total_sectors = hint;
518 return 0;
519 }
520
521 /**
522 * Set open flags for a given cache mode
523 *
524 * Return 0 on success, -1 if the cache mode was invalid.
525 */
526 int bdrv_parse_cache_flags(const char *mode, int *flags)
527 {
528 *flags &= ~BDRV_O_CACHE_MASK;
529
530 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
531 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
532 } else if (!strcmp(mode, "directsync")) {
533 *flags |= BDRV_O_NOCACHE;
534 } else if (!strcmp(mode, "writeback")) {
535 *flags |= BDRV_O_CACHE_WB;
536 } else if (!strcmp(mode, "unsafe")) {
537 *flags |= BDRV_O_CACHE_WB;
538 *flags |= BDRV_O_NO_FLUSH;
539 } else if (!strcmp(mode, "writethrough")) {
540 /* this is the default */
541 } else {
542 return -1;
543 }
544
545 return 0;
546 }
547
548 /**
549 * The copy-on-read flag is actually a reference count so multiple users may
550 * use the feature without worrying about clobbering its previous state.
551 * Copy-on-read stays enabled until all users have called to disable it.
552 */
553 void bdrv_enable_copy_on_read(BlockDriverState *bs)
554 {
555 bs->copy_on_read++;
556 }
557
558 void bdrv_disable_copy_on_read(BlockDriverState *bs)
559 {
560 assert(bs->copy_on_read > 0);
561 bs->copy_on_read--;
562 }
563
564 /*
565 * Common part for opening disk images and files
566 */
567 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
568 int flags, BlockDriver *drv)
569 {
570 int ret, open_flags;
571
572 assert(drv != NULL);
573
574 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
575
576 bs->file = NULL;
577 bs->total_sectors = 0;
578 bs->encrypted = 0;
579 bs->valid_key = 0;
580 bs->sg = 0;
581 bs->open_flags = flags;
582 bs->growable = 0;
583 bs->buffer_alignment = 512;
584
585 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
586 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
587 bdrv_enable_copy_on_read(bs);
588 }
589
590 pstrcpy(bs->filename, sizeof(bs->filename), filename);
591 bs->backing_file[0] = '\0';
592
593 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
594 return -ENOTSUP;
595 }
596
597 bs->drv = drv;
598 bs->opaque = g_malloc0(drv->instance_size);
599
600 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
601
602 /*
603 * Clear flags that are internal to the block layer before opening the
604 * image.
605 */
606 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
607
608 /*
609 * Snapshots should be writable.
610 */
611 if (bs->is_temporary) {
612 open_flags |= BDRV_O_RDWR;
613 }
614
615 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
616
617 /* Open the image, either directly or using a protocol */
618 if (drv->bdrv_file_open) {
619 ret = drv->bdrv_file_open(bs, filename, open_flags);
620 } else {
621 ret = bdrv_file_open(&bs->file, filename, open_flags);
622 if (ret >= 0) {
623 ret = drv->bdrv_open(bs, open_flags);
624 }
625 }
626
627 if (ret < 0) {
628 goto free_and_fail;
629 }
630
631 ret = refresh_total_sectors(bs, bs->total_sectors);
632 if (ret < 0) {
633 goto free_and_fail;
634 }
635
636 #ifndef _WIN32
637 if (bs->is_temporary) {
638 unlink(filename);
639 }
640 #endif
641 return 0;
642
643 free_and_fail:
644 if (bs->file) {
645 bdrv_delete(bs->file);
646 bs->file = NULL;
647 }
648 g_free(bs->opaque);
649 bs->opaque = NULL;
650 bs->drv = NULL;
651 return ret;
652 }
653
654 /*
655 * Opens a file using a protocol (file, host_device, nbd, ...)
656 */
657 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
658 {
659 BlockDriverState *bs;
660 BlockDriver *drv;
661 int ret;
662
663 drv = bdrv_find_protocol(filename);
664 if (!drv) {
665 return -ENOENT;
666 }
667
668 bs = bdrv_new("");
669 ret = bdrv_open_common(bs, filename, flags, drv);
670 if (ret < 0) {
671 bdrv_delete(bs);
672 return ret;
673 }
674 bs->growable = 1;
675 *pbs = bs;
676 return 0;
677 }
678
679 /*
680 * Opens a disk image (raw, qcow2, vmdk, ...)
681 */
682 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
683 BlockDriver *drv)
684 {
685 int ret;
686 char tmp_filename[PATH_MAX];
687
688 if (flags & BDRV_O_SNAPSHOT) {
689 BlockDriverState *bs1;
690 int64_t total_size;
691 int is_protocol = 0;
692 BlockDriver *bdrv_qcow2;
693 QEMUOptionParameter *options;
694 char backing_filename[PATH_MAX];
695
696 /* if snapshot, we create a temporary backing file and open it
697 instead of opening 'filename' directly */
698
699 /* if there is a backing file, use it */
700 bs1 = bdrv_new("");
701 ret = bdrv_open(bs1, filename, 0, drv);
702 if (ret < 0) {
703 bdrv_delete(bs1);
704 return ret;
705 }
706 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
707
708 if (bs1->drv && bs1->drv->protocol_name)
709 is_protocol = 1;
710
711 bdrv_delete(bs1);
712
713 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
714
715 /* Real path is meaningless for protocols */
716 if (is_protocol)
717 snprintf(backing_filename, sizeof(backing_filename),
718 "%s", filename);
719 else if (!realpath(filename, backing_filename))
720 return -errno;
721
722 bdrv_qcow2 = bdrv_find_format("qcow2");
723 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
724
725 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
726 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
727 if (drv) {
728 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
729 drv->format_name);
730 }
731
732 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
733 free_option_parameters(options);
734 if (ret < 0) {
735 return ret;
736 }
737
738 filename = tmp_filename;
739 drv = bdrv_qcow2;
740 bs->is_temporary = 1;
741 }
742
743 /* Find the right image format driver */
744 if (!drv) {
745 ret = find_image_format(filename, &drv);
746 }
747
748 if (!drv) {
749 goto unlink_and_fail;
750 }
751
752 /* Open the image */
753 ret = bdrv_open_common(bs, filename, flags, drv);
754 if (ret < 0) {
755 goto unlink_and_fail;
756 }
757
758 /* If there is a backing file, use it */
759 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
760 char backing_filename[PATH_MAX];
761 int back_flags;
762 BlockDriver *back_drv = NULL;
763
764 bs->backing_hd = bdrv_new("");
765
766 if (path_has_protocol(bs->backing_file)) {
767 pstrcpy(backing_filename, sizeof(backing_filename),
768 bs->backing_file);
769 } else {
770 path_combine(backing_filename, sizeof(backing_filename),
771 filename, bs->backing_file);
772 }
773
774 if (bs->backing_format[0] != '\0') {
775 back_drv = bdrv_find_format(bs->backing_format);
776 }
777
778 /* backing files always opened read-only */
779 back_flags =
780 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
781
782 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
783 if (ret < 0) {
784 bdrv_close(bs);
785 return ret;
786 }
787 if (bs->is_temporary) {
788 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
789 } else {
790 /* base image inherits from "parent" */
791 bs->backing_hd->keep_read_only = bs->keep_read_only;
792 }
793 }
794
795 if (!bdrv_key_required(bs)) {
796 bdrv_dev_change_media_cb(bs, true);
797 }
798
799 /* throttling disk I/O limits */
800 if (bs->io_limits_enabled) {
801 bdrv_io_limits_enable(bs);
802 }
803
804 return 0;
805
806 unlink_and_fail:
807 if (bs->is_temporary) {
808 unlink(filename);
809 }
810 return ret;
811 }
812
813 void bdrv_close(BlockDriverState *bs)
814 {
815 if (bs->drv) {
816 if (bs->job) {
817 block_job_cancel_sync(bs->job);
818 }
819 if (bs == bs_snapshots) {
820 bs_snapshots = NULL;
821 }
822 if (bs->backing_hd) {
823 bdrv_delete(bs->backing_hd);
824 bs->backing_hd = NULL;
825 }
826 bs->drv->bdrv_close(bs);
827 g_free(bs->opaque);
828 #ifdef _WIN32
829 if (bs->is_temporary) {
830 unlink(bs->filename);
831 }
832 #endif
833 bs->opaque = NULL;
834 bs->drv = NULL;
835 bs->copy_on_read = 0;
836
837 if (bs->file != NULL) {
838 bdrv_close(bs->file);
839 }
840
841 bdrv_dev_change_media_cb(bs, false);
842 }
843
844 /*throttling disk I/O limits*/
845 if (bs->io_limits_enabled) {
846 bdrv_io_limits_disable(bs);
847 }
848 }
849
850 void bdrv_close_all(void)
851 {
852 BlockDriverState *bs;
853
854 QTAILQ_FOREACH(bs, &bdrv_states, list) {
855 bdrv_close(bs);
856 }
857 }
858
859 /*
860 * Wait for pending requests to complete across all BlockDriverStates
861 *
862 * This function does not flush data to disk, use bdrv_flush_all() for that
863 * after calling this function.
864 */
865 void bdrv_drain_all(void)
866 {
867 BlockDriverState *bs;
868
869 qemu_aio_flush();
870
871 /* If requests are still pending there is a bug somewhere */
872 QTAILQ_FOREACH(bs, &bdrv_states, list) {
873 assert(QLIST_EMPTY(&bs->tracked_requests));
874 assert(qemu_co_queue_empty(&bs->throttled_reqs));
875 }
876 }
877
878 /* make a BlockDriverState anonymous by removing from bdrv_state list.
879 Also, NULL terminate the device_name to prevent double remove */
880 void bdrv_make_anon(BlockDriverState *bs)
881 {
882 if (bs->device_name[0] != '\0') {
883 QTAILQ_REMOVE(&bdrv_states, bs, list);
884 }
885 bs->device_name[0] = '\0';
886 }
887
888 /*
889 * Add new bs contents at the top of an image chain while the chain is
890 * live, while keeping required fields on the top layer.
891 *
892 * This will modify the BlockDriverState fields, and swap contents
893 * between bs_new and bs_top. Both bs_new and bs_top are modified.
894 *
895 * This function does not create any image files.
896 */
897 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
898 {
899 BlockDriverState tmp;
900
901 /* the new bs must not be in bdrv_states */
902 bdrv_make_anon(bs_new);
903
904 tmp = *bs_new;
905
906 /* there are some fields that need to stay on the top layer: */
907
908 /* dev info */
909 tmp.dev_ops = bs_top->dev_ops;
910 tmp.dev_opaque = bs_top->dev_opaque;
911 tmp.dev = bs_top->dev;
912 tmp.buffer_alignment = bs_top->buffer_alignment;
913 tmp.copy_on_read = bs_top->copy_on_read;
914
915 /* i/o timing parameters */
916 tmp.slice_time = bs_top->slice_time;
917 tmp.slice_start = bs_top->slice_start;
918 tmp.slice_end = bs_top->slice_end;
919 tmp.io_limits = bs_top->io_limits;
920 tmp.io_base = bs_top->io_base;
921 tmp.throttled_reqs = bs_top->throttled_reqs;
922 tmp.block_timer = bs_top->block_timer;
923 tmp.io_limits_enabled = bs_top->io_limits_enabled;
924
925 /* geometry */
926 tmp.cyls = bs_top->cyls;
927 tmp.heads = bs_top->heads;
928 tmp.secs = bs_top->secs;
929 tmp.translation = bs_top->translation;
930
931 /* r/w error */
932 tmp.on_read_error = bs_top->on_read_error;
933 tmp.on_write_error = bs_top->on_write_error;
934
935 /* i/o status */
936 tmp.iostatus_enabled = bs_top->iostatus_enabled;
937 tmp.iostatus = bs_top->iostatus;
938
939 /* keep the same entry in bdrv_states */
940 pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top->device_name);
941 tmp.list = bs_top->list;
942
943 /* The contents of 'tmp' will become bs_top, as we are
944 * swapping bs_new and bs_top contents. */
945 tmp.backing_hd = bs_new;
946 pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
947
948 /* swap contents of the fixed new bs and the current top */
949 *bs_new = *bs_top;
950 *bs_top = tmp;
951
952 /* clear the copied fields in the new backing file */
953 bdrv_detach_dev(bs_new, bs_new->dev);
954
955 qemu_co_queue_init(&bs_new->throttled_reqs);
956 memset(&bs_new->io_base, 0, sizeof(bs_new->io_base));
957 memset(&bs_new->io_limits, 0, sizeof(bs_new->io_limits));
958 bdrv_iostatus_disable(bs_new);
959
960 /* we don't use bdrv_io_limits_disable() for this, because we don't want
961 * to affect or delete the block_timer, as it has been moved to bs_top */
962 bs_new->io_limits_enabled = false;
963 bs_new->block_timer = NULL;
964 bs_new->slice_time = 0;
965 bs_new->slice_start = 0;
966 bs_new->slice_end = 0;
967 }
968
969 void bdrv_delete(BlockDriverState *bs)
970 {
971 assert(!bs->dev);
972 assert(!bs->job);
973 assert(!bs->in_use);
974
975 /* remove from list, if necessary */
976 bdrv_make_anon(bs);
977
978 bdrv_close(bs);
979 if (bs->file != NULL) {
980 bdrv_delete(bs->file);
981 }
982
983 assert(bs != bs_snapshots);
984 g_free(bs);
985 }
986
987 int bdrv_attach_dev(BlockDriverState *bs, void *dev)
988 /* TODO change to DeviceState *dev when all users are qdevified */
989 {
990 if (bs->dev) {
991 return -EBUSY;
992 }
993 bs->dev = dev;
994 bdrv_iostatus_reset(bs);
995 return 0;
996 }
997
998 /* TODO qdevified devices don't use this, remove when devices are qdevified */
999 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
1000 {
1001 if (bdrv_attach_dev(bs, dev) < 0) {
1002 abort();
1003 }
1004 }
1005
1006 void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1007 /* TODO change to DeviceState *dev when all users are qdevified */
1008 {
1009 assert(bs->dev == dev);
1010 bs->dev = NULL;
1011 bs->dev_ops = NULL;
1012 bs->dev_opaque = NULL;
1013 bs->buffer_alignment = 512;
1014 }
1015
1016 /* TODO change to return DeviceState * when all users are qdevified */
1017 void *bdrv_get_attached_dev(BlockDriverState *bs)
1018 {
1019 return bs->dev;
1020 }
1021
1022 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
1023 void *opaque)
1024 {
1025 bs->dev_ops = ops;
1026 bs->dev_opaque = opaque;
1027 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
1028 bs_snapshots = NULL;
1029 }
1030 }
1031
1032 void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
1033 BlockQMPEventAction action, int is_read)
1034 {
1035 QObject *data;
1036 const char *action_str;
1037
1038 switch (action) {
1039 case BDRV_ACTION_REPORT:
1040 action_str = "report";
1041 break;
1042 case BDRV_ACTION_IGNORE:
1043 action_str = "ignore";
1044 break;
1045 case BDRV_ACTION_STOP:
1046 action_str = "stop";
1047 break;
1048 default:
1049 abort();
1050 }
1051
1052 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1053 bdrv->device_name,
1054 action_str,
1055 is_read ? "read" : "write");
1056 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1057
1058 qobject_decref(data);
1059 }
1060
1061 static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
1062 {
1063 QObject *data;
1064
1065 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1066 bdrv_get_device_name(bs), ejected);
1067 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
1068
1069 qobject_decref(data);
1070 }
1071
1072 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
1073 {
1074 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
1075 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
1076 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
1077 if (tray_was_closed) {
1078 /* tray open */
1079 bdrv_emit_qmp_eject_event(bs, true);
1080 }
1081 if (load) {
1082 /* tray close */
1083 bdrv_emit_qmp_eject_event(bs, false);
1084 }
1085 }
1086 }
1087
1088 bool bdrv_dev_has_removable_media(BlockDriverState *bs)
1089 {
1090 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
1091 }
1092
1093 void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
1094 {
1095 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
1096 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
1097 }
1098 }
1099
1100 bool bdrv_dev_is_tray_open(BlockDriverState *bs)
1101 {
1102 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
1103 return bs->dev_ops->is_tray_open(bs->dev_opaque);
1104 }
1105 return false;
1106 }
1107
1108 static void bdrv_dev_resize_cb(BlockDriverState *bs)
1109 {
1110 if (bs->dev_ops && bs->dev_ops->resize_cb) {
1111 bs->dev_ops->resize_cb(bs->dev_opaque);
1112 }
1113 }
1114
1115 bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
1116 {
1117 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
1118 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
1119 }
1120 return false;
1121 }
1122
1123 /*
1124 * Run consistency checks on an image
1125 *
1126 * Returns 0 if the check could be completed (it doesn't mean that the image is
1127 * free of errors) or -errno when an internal error occurred. The results of the
1128 * check are stored in res.
1129 */
1130 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
1131 {
1132 if (bs->drv->bdrv_check == NULL) {
1133 return -ENOTSUP;
1134 }
1135
1136 memset(res, 0, sizeof(*res));
1137 return bs->drv->bdrv_check(bs, res);
1138 }
1139
1140 #define COMMIT_BUF_SECTORS 2048
1141
1142 /* commit COW file into the raw image */
1143 int bdrv_commit(BlockDriverState *bs)
1144 {
1145 BlockDriver *drv = bs->drv;
1146 BlockDriver *backing_drv;
1147 int64_t sector, total_sectors;
1148 int n, ro, open_flags;
1149 int ret = 0, rw_ret = 0;
1150 uint8_t *buf;
1151 char filename[1024];
1152 BlockDriverState *bs_rw, *bs_ro;
1153
1154 if (!drv)
1155 return -ENOMEDIUM;
1156
1157 if (!bs->backing_hd) {
1158 return -ENOTSUP;
1159 }
1160
1161 if (bs->backing_hd->keep_read_only) {
1162 return -EACCES;
1163 }
1164
1165 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1166 return -EBUSY;
1167 }
1168
1169 backing_drv = bs->backing_hd->drv;
1170 ro = bs->backing_hd->read_only;
1171 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1172 open_flags = bs->backing_hd->open_flags;
1173
1174 if (ro) {
1175 /* re-open as RW */
1176 bdrv_delete(bs->backing_hd);
1177 bs->backing_hd = NULL;
1178 bs_rw = bdrv_new("");
1179 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1180 backing_drv);
1181 if (rw_ret < 0) {
1182 bdrv_delete(bs_rw);
1183 /* try to re-open read-only */
1184 bs_ro = bdrv_new("");
1185 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1186 backing_drv);
1187 if (ret < 0) {
1188 bdrv_delete(bs_ro);
1189 /* drive not functional anymore */
1190 bs->drv = NULL;
1191 return ret;
1192 }
1193 bs->backing_hd = bs_ro;
1194 return rw_ret;
1195 }
1196 bs->backing_hd = bs_rw;
1197 }
1198
1199 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
1200 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
1201
1202 for (sector = 0; sector < total_sectors; sector += n) {
1203 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
1204
1205 if (bdrv_read(bs, sector, buf, n) != 0) {
1206 ret = -EIO;
1207 goto ro_cleanup;
1208 }
1209
1210 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1211 ret = -EIO;
1212 goto ro_cleanup;
1213 }
1214 }
1215 }
1216
1217 if (drv->bdrv_make_empty) {
1218 ret = drv->bdrv_make_empty(bs);
1219 bdrv_flush(bs);
1220 }
1221
1222 /*
1223 * Make sure all data we wrote to the backing device is actually
1224 * stable on disk.
1225 */
1226 if (bs->backing_hd)
1227 bdrv_flush(bs->backing_hd);
1228
1229 ro_cleanup:
1230 g_free(buf);
1231
1232 if (ro) {
1233 /* re-open as RO */
1234 bdrv_delete(bs->backing_hd);
1235 bs->backing_hd = NULL;
1236 bs_ro = bdrv_new("");
1237 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1238 backing_drv);
1239 if (ret < 0) {
1240 bdrv_delete(bs_ro);
1241 /* drive not functional anymore */
1242 bs->drv = NULL;
1243 return ret;
1244 }
1245 bs->backing_hd = bs_ro;
1246 bs->backing_hd->keep_read_only = 0;
1247 }
1248
1249 return ret;
1250 }
1251
1252 int bdrv_commit_all(void)
1253 {
1254 BlockDriverState *bs;
1255
1256 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1257 int ret = bdrv_commit(bs);
1258 if (ret < 0) {
1259 return ret;
1260 }
1261 }
1262 return 0;
1263 }
1264
1265 struct BdrvTrackedRequest {
1266 BlockDriverState *bs;
1267 int64_t sector_num;
1268 int nb_sectors;
1269 bool is_write;
1270 QLIST_ENTRY(BdrvTrackedRequest) list;
1271 Coroutine *co; /* owner, used for deadlock detection */
1272 CoQueue wait_queue; /* coroutines blocked on this request */
1273 };
1274
1275 /**
1276 * Remove an active request from the tracked requests list
1277 *
1278 * This function should be called when a tracked request is completing.
1279 */
1280 static void tracked_request_end(BdrvTrackedRequest *req)
1281 {
1282 QLIST_REMOVE(req, list);
1283 qemu_co_queue_restart_all(&req->wait_queue);
1284 }
1285
1286 /**
1287 * Add an active request to the tracked requests list
1288 */
1289 static void tracked_request_begin(BdrvTrackedRequest *req,
1290 BlockDriverState *bs,
1291 int64_t sector_num,
1292 int nb_sectors, bool is_write)
1293 {
1294 *req = (BdrvTrackedRequest){
1295 .bs = bs,
1296 .sector_num = sector_num,
1297 .nb_sectors = nb_sectors,
1298 .is_write = is_write,
1299 .co = qemu_coroutine_self(),
1300 };
1301
1302 qemu_co_queue_init(&req->wait_queue);
1303
1304 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1305 }
1306
1307 /**
1308 * Round a region to cluster boundaries
1309 */
1310 static void round_to_clusters(BlockDriverState *bs,
1311 int64_t sector_num, int nb_sectors,
1312 int64_t *cluster_sector_num,
1313 int *cluster_nb_sectors)
1314 {
1315 BlockDriverInfo bdi;
1316
1317 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1318 *cluster_sector_num = sector_num;
1319 *cluster_nb_sectors = nb_sectors;
1320 } else {
1321 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1322 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1323 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1324 nb_sectors, c);
1325 }
1326 }
1327
1328 static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1329 int64_t sector_num, int nb_sectors) {
1330 /* aaaa bbbb */
1331 if (sector_num >= req->sector_num + req->nb_sectors) {
1332 return false;
1333 }
1334 /* bbbb aaaa */
1335 if (req->sector_num >= sector_num + nb_sectors) {
1336 return false;
1337 }
1338 return true;
1339 }
1340
1341 static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1342 int64_t sector_num, int nb_sectors)
1343 {
1344 BdrvTrackedRequest *req;
1345 int64_t cluster_sector_num;
1346 int cluster_nb_sectors;
1347 bool retry;
1348
1349 /* If we touch the same cluster it counts as an overlap. This guarantees
1350 * that allocating writes will be serialized and not race with each other
1351 * for the same cluster. For example, in copy-on-read it ensures that the
1352 * CoR read and write operations are atomic and guest writes cannot
1353 * interleave between them.
1354 */
1355 round_to_clusters(bs, sector_num, nb_sectors,
1356 &cluster_sector_num, &cluster_nb_sectors);
1357
1358 do {
1359 retry = false;
1360 QLIST_FOREACH(req, &bs->tracked_requests, list) {
1361 if (tracked_request_overlaps(req, cluster_sector_num,
1362 cluster_nb_sectors)) {
1363 /* Hitting this means there was a reentrant request, for
1364 * example, a block driver issuing nested requests. This must
1365 * never happen since it means deadlock.
1366 */
1367 assert(qemu_coroutine_self() != req->co);
1368
1369 qemu_co_queue_wait(&req->wait_queue);
1370 retry = true;
1371 break;
1372 }
1373 }
1374 } while (retry);
1375 }
1376
1377 /*
1378 * Return values:
1379 * 0 - success
1380 * -EINVAL - backing format specified, but no file
1381 * -ENOSPC - can't update the backing file because no space is left in the
1382 * image file header
1383 * -ENOTSUP - format driver doesn't support changing the backing file
1384 */
1385 int bdrv_change_backing_file(BlockDriverState *bs,
1386 const char *backing_file, const char *backing_fmt)
1387 {
1388 BlockDriver *drv = bs->drv;
1389
1390 if (drv->bdrv_change_backing_file != NULL) {
1391 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1392 } else {
1393 return -ENOTSUP;
1394 }
1395 }
1396
1397 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1398 size_t size)
1399 {
1400 int64_t len;
1401
1402 if (!bdrv_is_inserted(bs))
1403 return -ENOMEDIUM;
1404
1405 if (bs->growable)
1406 return 0;
1407
1408 len = bdrv_getlength(bs);
1409
1410 if (offset < 0)
1411 return -EIO;
1412
1413 if ((offset > len) || (len - offset < size))
1414 return -EIO;
1415
1416 return 0;
1417 }
1418
1419 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1420 int nb_sectors)
1421 {
1422 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1423 nb_sectors * BDRV_SECTOR_SIZE);
1424 }
1425
1426 typedef struct RwCo {
1427 BlockDriverState *bs;
1428 int64_t sector_num;
1429 int nb_sectors;
1430 QEMUIOVector *qiov;
1431 bool is_write;
1432 int ret;
1433 } RwCo;
1434
1435 static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1436 {
1437 RwCo *rwco = opaque;
1438
1439 if (!rwco->is_write) {
1440 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1441 rwco->nb_sectors, rwco->qiov, 0);
1442 } else {
1443 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1444 rwco->nb_sectors, rwco->qiov, 0);
1445 }
1446 }
1447
1448 /*
1449 * Process a synchronous request using coroutines
1450 */
1451 static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1452 int nb_sectors, bool is_write)
1453 {
1454 QEMUIOVector qiov;
1455 struct iovec iov = {
1456 .iov_base = (void *)buf,
1457 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1458 };
1459 Coroutine *co;
1460 RwCo rwco = {
1461 .bs = bs,
1462 .sector_num = sector_num,
1463 .nb_sectors = nb_sectors,
1464 .qiov = &qiov,
1465 .is_write = is_write,
1466 .ret = NOT_DONE,
1467 };
1468
1469 qemu_iovec_init_external(&qiov, &iov, 1);
1470
1471 /**
1472 * In sync call context, when the vcpu is blocked, this throttling timer
1473 * will not fire; so the I/O throttling function has to be disabled here
1474 * if it has been enabled.
1475 */
1476 if (bs->io_limits_enabled) {
1477 fprintf(stderr, "Disabling I/O throttling on '%s' due "
1478 "to synchronous I/O.\n", bdrv_get_device_name(bs));
1479 bdrv_io_limits_disable(bs);
1480 }
1481
1482 if (qemu_in_coroutine()) {
1483 /* Fast-path if already in coroutine context */
1484 bdrv_rw_co_entry(&rwco);
1485 } else {
1486 co = qemu_coroutine_create(bdrv_rw_co_entry);
1487 qemu_coroutine_enter(co, &rwco);
1488 while (rwco.ret == NOT_DONE) {
1489 qemu_aio_wait();
1490 }
1491 }
1492 return rwco.ret;
1493 }
1494
1495 /* return < 0 if error. See bdrv_write() for the return codes */
1496 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1497 uint8_t *buf, int nb_sectors)
1498 {
1499 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
1500 }
1501
1502 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1503 int nb_sectors, int dirty)
1504 {
1505 int64_t start, end;
1506 unsigned long val, idx, bit;
1507
1508 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1509 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1510
1511 for (; start <= end; start++) {
1512 idx = start / (sizeof(unsigned long) * 8);
1513 bit = start % (sizeof(unsigned long) * 8);
1514 val = bs->dirty_bitmap[idx];
1515 if (dirty) {
1516 if (!(val & (1UL << bit))) {
1517 bs->dirty_count++;
1518 val |= 1UL << bit;
1519 }
1520 } else {
1521 if (val & (1UL << bit)) {
1522 bs->dirty_count--;
1523 val &= ~(1UL << bit);
1524 }
1525 }
1526 bs->dirty_bitmap[idx] = val;
1527 }
1528 }
1529
1530 /* Return < 0 if error. Important errors are:
1531 -EIO generic I/O error (may happen for all errors)
1532 -ENOMEDIUM No media inserted.
1533 -EINVAL Invalid sector number or nb_sectors
1534 -EACCES Trying to write a read-only device
1535 */
1536 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1537 const uint8_t *buf, int nb_sectors)
1538 {
1539 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
1540 }
1541
1542 int bdrv_pread(BlockDriverState *bs, int64_t offset,
1543 void *buf, int count1)
1544 {
1545 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1546 int len, nb_sectors, count;
1547 int64_t sector_num;
1548 int ret;
1549
1550 count = count1;
1551 /* first read to align to sector start */
1552 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1553 if (len > count)
1554 len = count;
1555 sector_num = offset >> BDRV_SECTOR_BITS;
1556 if (len > 0) {
1557 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1558 return ret;
1559 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1560 count -= len;
1561 if (count == 0)
1562 return count1;
1563 sector_num++;
1564 buf += len;
1565 }
1566
1567 /* read the sectors "in place" */
1568 nb_sectors = count >> BDRV_SECTOR_BITS;
1569 if (nb_sectors > 0) {
1570 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1571 return ret;
1572 sector_num += nb_sectors;
1573 len = nb_sectors << BDRV_SECTOR_BITS;
1574 buf += len;
1575 count -= len;
1576 }
1577
1578 /* add data from the last sector */
1579 if (count > 0) {
1580 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1581 return ret;
1582 memcpy(buf, tmp_buf, count);
1583 }
1584 return count1;
1585 }
1586
1587 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1588 const void *buf, int count1)
1589 {
1590 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1591 int len, nb_sectors, count;
1592 int64_t sector_num;
1593 int ret;
1594
1595 count = count1;
1596 /* first write to align to sector start */
1597 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1598 if (len > count)
1599 len = count;
1600 sector_num = offset >> BDRV_SECTOR_BITS;
1601 if (len > 0) {
1602 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1603 return ret;
1604 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1605 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1606 return ret;
1607 count -= len;
1608 if (count == 0)
1609 return count1;
1610 sector_num++;
1611 buf += len;
1612 }
1613
1614 /* write the sectors "in place" */
1615 nb_sectors = count >> BDRV_SECTOR_BITS;
1616 if (nb_sectors > 0) {
1617 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1618 return ret;
1619 sector_num += nb_sectors;
1620 len = nb_sectors << BDRV_SECTOR_BITS;
1621 buf += len;
1622 count -= len;
1623 }
1624
1625 /* add data from the last sector */
1626 if (count > 0) {
1627 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1628 return ret;
1629 memcpy(tmp_buf, buf, count);
1630 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1631 return ret;
1632 }
1633 return count1;
1634 }
1635
1636 /*
1637 * Writes to the file and ensures that no writes are reordered across this
1638 * request (acts as a barrier)
1639 *
1640 * Returns 0 on success, -errno in error cases.
1641 */
1642 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1643 const void *buf, int count)
1644 {
1645 int ret;
1646
1647 ret = bdrv_pwrite(bs, offset, buf, count);
1648 if (ret < 0) {
1649 return ret;
1650 }
1651
1652 /* No flush needed for cache modes that use O_DSYNC */
1653 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1654 bdrv_flush(bs);
1655 }
1656
1657 return 0;
1658 }
1659
1660 static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
1661 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1662 {
1663 /* Perform I/O through a temporary buffer so that users who scribble over
1664 * their read buffer while the operation is in progress do not end up
1665 * modifying the image file. This is critical for zero-copy guest I/O
1666 * where anything might happen inside guest memory.
1667 */
1668 void *bounce_buffer;
1669
1670 BlockDriver *drv = bs->drv;
1671 struct iovec iov;
1672 QEMUIOVector bounce_qiov;
1673 int64_t cluster_sector_num;
1674 int cluster_nb_sectors;
1675 size_t skip_bytes;
1676 int ret;
1677
1678 /* Cover entire cluster so no additional backing file I/O is required when
1679 * allocating cluster in the image file.
1680 */
1681 round_to_clusters(bs, sector_num, nb_sectors,
1682 &cluster_sector_num, &cluster_nb_sectors);
1683
1684 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
1685 cluster_sector_num, cluster_nb_sectors);
1686
1687 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1688 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1689 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1690
1691 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1692 &bounce_qiov);
1693 if (ret < 0) {
1694 goto err;
1695 }
1696
1697 if (drv->bdrv_co_write_zeroes &&
1698 buffer_is_zero(bounce_buffer, iov.iov_len)) {
1699 ret = drv->bdrv_co_write_zeroes(bs, cluster_sector_num,
1700 cluster_nb_sectors);
1701 } else {
1702 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
1703 &bounce_qiov);
1704 }
1705
1706 if (ret < 0) {
1707 /* It might be okay to ignore write errors for guest requests. If this
1708 * is a deliberate copy-on-read then we don't want to ignore the error.
1709 * Simply report it in all cases.
1710 */
1711 goto err;
1712 }
1713
1714 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1715 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1716 nb_sectors * BDRV_SECTOR_SIZE);
1717
1718 err:
1719 qemu_vfree(bounce_buffer);
1720 return ret;
1721 }
1722
1723 /*
1724 * Handle a read request in coroutine context
1725 */
1726 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1727 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1728 BdrvRequestFlags flags)
1729 {
1730 BlockDriver *drv = bs->drv;
1731 BdrvTrackedRequest req;
1732 int ret;
1733
1734 if (!drv) {
1735 return -ENOMEDIUM;
1736 }
1737 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1738 return -EIO;
1739 }
1740
1741 /* throttling disk read I/O */
1742 if (bs->io_limits_enabled) {
1743 bdrv_io_limits_intercept(bs, false, nb_sectors);
1744 }
1745
1746 if (bs->copy_on_read) {
1747 flags |= BDRV_REQ_COPY_ON_READ;
1748 }
1749 if (flags & BDRV_REQ_COPY_ON_READ) {
1750 bs->copy_on_read_in_flight++;
1751 }
1752
1753 if (bs->copy_on_read_in_flight) {
1754 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1755 }
1756
1757 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
1758
1759 if (flags & BDRV_REQ_COPY_ON_READ) {
1760 int pnum;
1761
1762 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1763 if (ret < 0) {
1764 goto out;
1765 }
1766
1767 if (!ret || pnum != nb_sectors) {
1768 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
1769 goto out;
1770 }
1771 }
1772
1773 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1774
1775 out:
1776 tracked_request_end(&req);
1777
1778 if (flags & BDRV_REQ_COPY_ON_READ) {
1779 bs->copy_on_read_in_flight--;
1780 }
1781
1782 return ret;
1783 }
1784
1785 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1786 int nb_sectors, QEMUIOVector *qiov)
1787 {
1788 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1789
1790 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
1791 }
1792
1793 int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1794 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1795 {
1796 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
1797
1798 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
1799 BDRV_REQ_COPY_ON_READ);
1800 }
1801
1802 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
1803 int64_t sector_num, int nb_sectors)
1804 {
1805 BlockDriver *drv = bs->drv;
1806 QEMUIOVector qiov;
1807 struct iovec iov;
1808 int ret;
1809
1810 /* First try the efficient write zeroes operation */
1811 if (drv->bdrv_co_write_zeroes) {
1812 return drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1813 }
1814
1815 /* Fall back to bounce buffer if write zeroes is unsupported */
1816 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
1817 iov.iov_base = qemu_blockalign(bs, iov.iov_len);
1818 memset(iov.iov_base, 0, iov.iov_len);
1819 qemu_iovec_init_external(&qiov, &iov, 1);
1820
1821 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1822
1823 qemu_vfree(iov.iov_base);
1824 return ret;
1825 }
1826
1827 /*
1828 * Handle a write request in coroutine context
1829 */
1830 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1831 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1832 BdrvRequestFlags flags)
1833 {
1834 BlockDriver *drv = bs->drv;
1835 BdrvTrackedRequest req;
1836 int ret;
1837
1838 if (!bs->drv) {
1839 return -ENOMEDIUM;
1840 }
1841 if (bs->read_only) {
1842 return -EACCES;
1843 }
1844 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1845 return -EIO;
1846 }
1847
1848 /* throttling disk write I/O */
1849 if (bs->io_limits_enabled) {
1850 bdrv_io_limits_intercept(bs, true, nb_sectors);
1851 }
1852
1853 if (bs->copy_on_read_in_flight) {
1854 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1855 }
1856
1857 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1858
1859 if (flags & BDRV_REQ_ZERO_WRITE) {
1860 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
1861 } else {
1862 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1863 }
1864
1865 if (bs->dirty_bitmap) {
1866 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1867 }
1868
1869 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1870 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1871 }
1872
1873 tracked_request_end(&req);
1874
1875 return ret;
1876 }
1877
1878 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1879 int nb_sectors, QEMUIOVector *qiov)
1880 {
1881 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1882
1883 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
1884 }
1885
1886 int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
1887 int64_t sector_num, int nb_sectors)
1888 {
1889 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1890
1891 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
1892 BDRV_REQ_ZERO_WRITE);
1893 }
1894
1895 /**
1896 * Truncate file to 'offset' bytes (needed only for file protocols)
1897 */
1898 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1899 {
1900 BlockDriver *drv = bs->drv;
1901 int ret;
1902 if (!drv)
1903 return -ENOMEDIUM;
1904 if (!drv->bdrv_truncate)
1905 return -ENOTSUP;
1906 if (bs->read_only)
1907 return -EACCES;
1908 if (bdrv_in_use(bs))
1909 return -EBUSY;
1910 ret = drv->bdrv_truncate(bs, offset);
1911 if (ret == 0) {
1912 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1913 bdrv_dev_resize_cb(bs);
1914 }
1915 return ret;
1916 }
1917
1918 /**
1919 * Length of a allocated file in bytes. Sparse files are counted by actual
1920 * allocated space. Return < 0 if error or unknown.
1921 */
1922 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1923 {
1924 BlockDriver *drv = bs->drv;
1925 if (!drv) {
1926 return -ENOMEDIUM;
1927 }
1928 if (drv->bdrv_get_allocated_file_size) {
1929 return drv->bdrv_get_allocated_file_size(bs);
1930 }
1931 if (bs->file) {
1932 return bdrv_get_allocated_file_size(bs->file);
1933 }
1934 return -ENOTSUP;
1935 }
1936
1937 /**
1938 * Length of a file in bytes. Return < 0 if error or unknown.
1939 */
1940 int64_t bdrv_getlength(BlockDriverState *bs)
1941 {
1942 BlockDriver *drv = bs->drv;
1943 if (!drv)
1944 return -ENOMEDIUM;
1945
1946 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
1947 if (drv->bdrv_getlength) {
1948 return drv->bdrv_getlength(bs);
1949 }
1950 }
1951 return bs->total_sectors * BDRV_SECTOR_SIZE;
1952 }
1953
1954 /* return 0 as number of sectors if no device present or error */
1955 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1956 {
1957 int64_t length;
1958 length = bdrv_getlength(bs);
1959 if (length < 0)
1960 length = 0;
1961 else
1962 length = length >> BDRV_SECTOR_BITS;
1963 *nb_sectors_ptr = length;
1964 }
1965
1966 struct partition {
1967 uint8_t boot_ind; /* 0x80 - active */
1968 uint8_t head; /* starting head */
1969 uint8_t sector; /* starting sector */
1970 uint8_t cyl; /* starting cylinder */
1971 uint8_t sys_ind; /* What partition type */
1972 uint8_t end_head; /* end head */
1973 uint8_t end_sector; /* end sector */
1974 uint8_t end_cyl; /* end cylinder */
1975 uint32_t start_sect; /* starting sector counting from 0 */
1976 uint32_t nr_sects; /* nr of sectors in partition */
1977 } QEMU_PACKED;
1978
1979 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1980 static int guess_disk_lchs(BlockDriverState *bs,
1981 int *pcylinders, int *pheads, int *psectors)
1982 {
1983 uint8_t buf[BDRV_SECTOR_SIZE];
1984 int ret, i, heads, sectors, cylinders;
1985 struct partition *p;
1986 uint32_t nr_sects;
1987 uint64_t nb_sectors;
1988 bool enabled;
1989
1990 bdrv_get_geometry(bs, &nb_sectors);
1991
1992 /**
1993 * The function will be invoked during startup not only in sync I/O mode,
1994 * but also in async I/O mode. So the I/O throttling function has to
1995 * be disabled temporarily here, not permanently.
1996 */
1997 enabled = bs->io_limits_enabled;
1998 bs->io_limits_enabled = false;
1999 ret = bdrv_read(bs, 0, buf, 1);
2000 bs->io_limits_enabled = enabled;
2001 if (ret < 0)
2002 return -1;
2003 /* test msdos magic */
2004 if (buf[510] != 0x55 || buf[511] != 0xaa)
2005 return -1;
2006 for(i = 0; i < 4; i++) {
2007 p = ((struct partition *)(buf + 0x1be)) + i;
2008 nr_sects = le32_to_cpu(p->nr_sects);
2009 if (nr_sects && p->end_head) {
2010 /* We make the assumption that the partition terminates on
2011 a cylinder boundary */
2012 heads = p->end_head + 1;
2013 sectors = p->end_sector & 63;
2014 if (sectors == 0)
2015 continue;
2016 cylinders = nb_sectors / (heads * sectors);
2017 if (cylinders < 1 || cylinders > 16383)
2018 continue;
2019 *pheads = heads;
2020 *psectors = sectors;
2021 *pcylinders = cylinders;
2022 #if 0
2023 printf("guessed geometry: LCHS=%d %d %d\n",
2024 cylinders, heads, sectors);
2025 #endif
2026 return 0;
2027 }
2028 }
2029 return -1;
2030 }
2031
2032 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
2033 {
2034 int translation, lba_detected = 0;
2035 int cylinders, heads, secs;
2036 uint64_t nb_sectors;
2037
2038 /* if a geometry hint is available, use it */
2039 bdrv_get_geometry(bs, &nb_sectors);
2040 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
2041 translation = bdrv_get_translation_hint(bs);
2042 if (cylinders != 0) {
2043 *pcyls = cylinders;
2044 *pheads = heads;
2045 *psecs = secs;
2046 } else {
2047 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
2048 if (heads > 16) {
2049 /* if heads > 16, it means that a BIOS LBA
2050 translation was active, so the default
2051 hardware geometry is OK */
2052 lba_detected = 1;
2053 goto default_geometry;
2054 } else {
2055 *pcyls = cylinders;
2056 *pheads = heads;
2057 *psecs = secs;
2058 /* disable any translation to be in sync with
2059 the logical geometry */
2060 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
2061 bdrv_set_translation_hint(bs,
2062 BIOS_ATA_TRANSLATION_NONE);
2063 }
2064 }
2065 } else {
2066 default_geometry:
2067 /* if no geometry, use a standard physical disk geometry */
2068 cylinders = nb_sectors / (16 * 63);
2069
2070 if (cylinders > 16383)
2071 cylinders = 16383;
2072 else if (cylinders < 2)
2073 cylinders = 2;
2074 *pcyls = cylinders;
2075 *pheads = 16;
2076 *psecs = 63;
2077 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
2078 if ((*pcyls * *pheads) <= 131072) {
2079 bdrv_set_translation_hint(bs,
2080 BIOS_ATA_TRANSLATION_LARGE);
2081 } else {
2082 bdrv_set_translation_hint(bs,
2083 BIOS_ATA_TRANSLATION_LBA);
2084 }
2085 }
2086 }
2087 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
2088 }
2089 }
2090
2091 void bdrv_set_geometry_hint(BlockDriverState *bs,
2092 int cyls, int heads, int secs)
2093 {
2094 bs->cyls = cyls;
2095 bs->heads = heads;
2096 bs->secs = secs;
2097 }
2098
2099 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
2100 {
2101 bs->translation = translation;
2102 }
2103
2104 void bdrv_get_geometry_hint(BlockDriverState *bs,
2105 int *pcyls, int *pheads, int *psecs)
2106 {
2107 *pcyls = bs->cyls;
2108 *pheads = bs->heads;
2109 *psecs = bs->secs;
2110 }
2111
2112 /* throttling disk io limits */
2113 void bdrv_set_io_limits(BlockDriverState *bs,
2114 BlockIOLimit *io_limits)
2115 {
2116 bs->io_limits = *io_limits;
2117 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
2118 }
2119
2120 /* Recognize floppy formats */
2121 typedef struct FDFormat {
2122 FDriveType drive;
2123 uint8_t last_sect;
2124 uint8_t max_track;
2125 uint8_t max_head;
2126 FDriveRate rate;
2127 } FDFormat;
2128
2129 static const FDFormat fd_formats[] = {
2130 /* First entry is default format */
2131 /* 1.44 MB 3"1/2 floppy disks */
2132 { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
2133 { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
2134 { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
2135 { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
2136 { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
2137 { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
2138 { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
2139 { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
2140 /* 2.88 MB 3"1/2 floppy disks */
2141 { FDRIVE_DRV_288, 36, 80, 1, FDRIVE_RATE_1M, },
2142 { FDRIVE_DRV_288, 39, 80, 1, FDRIVE_RATE_1M, },
2143 { FDRIVE_DRV_288, 40, 80, 1, FDRIVE_RATE_1M, },
2144 { FDRIVE_DRV_288, 44, 80, 1, FDRIVE_RATE_1M, },
2145 { FDRIVE_DRV_288, 48, 80, 1, FDRIVE_RATE_1M, },
2146 /* 720 kB 3"1/2 floppy disks */
2147 { FDRIVE_DRV_144, 9, 80, 1, FDRIVE_RATE_250K, },
2148 { FDRIVE_DRV_144, 10, 80, 1, FDRIVE_RATE_250K, },
2149 { FDRIVE_DRV_144, 10, 82, 1, FDRIVE_RATE_250K, },
2150 { FDRIVE_DRV_144, 10, 83, 1, FDRIVE_RATE_250K, },
2151 { FDRIVE_DRV_144, 13, 80, 1, FDRIVE_RATE_250K, },
2152 { FDRIVE_DRV_144, 14, 80, 1, FDRIVE_RATE_250K, },
2153 /* 1.2 MB 5"1/4 floppy disks */
2154 { FDRIVE_DRV_120, 15, 80, 1, FDRIVE_RATE_500K, },
2155 { FDRIVE_DRV_120, 18, 80, 1, FDRIVE_RATE_500K, },
2156 { FDRIVE_DRV_120, 18, 82, 1, FDRIVE_RATE_500K, },
2157 { FDRIVE_DRV_120, 18, 83, 1, FDRIVE_RATE_500K, },
2158 { FDRIVE_DRV_120, 20, 80, 1, FDRIVE_RATE_500K, },
2159 /* 720 kB 5"1/4 floppy disks */
2160 { FDRIVE_DRV_120, 9, 80, 1, FDRIVE_RATE_250K, },
2161 { FDRIVE_DRV_120, 11, 80, 1, FDRIVE_RATE_250K, },
2162 /* 360 kB 5"1/4 floppy disks */
2163 { FDRIVE_DRV_120, 9, 40, 1, FDRIVE_RATE_300K, },
2164 { FDRIVE_DRV_120, 9, 40, 0, FDRIVE_RATE_300K, },
2165 { FDRIVE_DRV_120, 10, 41, 1, FDRIVE_RATE_300K, },
2166 { FDRIVE_DRV_120, 10, 42, 1, FDRIVE_RATE_300K, },
2167 /* 320 kB 5"1/4 floppy disks */
2168 { FDRIVE_DRV_120, 8, 40, 1, FDRIVE_RATE_250K, },
2169 { FDRIVE_DRV_120, 8, 40, 0, FDRIVE_RATE_250K, },
2170 /* 360 kB must match 5"1/4 better than 3"1/2... */
2171 { FDRIVE_DRV_144, 9, 80, 0, FDRIVE_RATE_250K, },
2172 /* end */
2173 { FDRIVE_DRV_NONE, -1, -1, 0, 0, },
2174 };
2175
2176 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
2177 int *max_track, int *last_sect,
2178 FDriveType drive_in, FDriveType *drive,
2179 FDriveRate *rate)
2180 {
2181 const FDFormat *parse;
2182 uint64_t nb_sectors, size;
2183 int i, first_match, match;
2184
2185 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
2186 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
2187 /* User defined disk */
2188 *rate = FDRIVE_RATE_500K;
2189 } else {
2190 bdrv_get_geometry(bs, &nb_sectors);
2191 match = -1;
2192 first_match = -1;
2193 for (i = 0; ; i++) {
2194 parse = &fd_formats[i];
2195 if (parse->drive == FDRIVE_DRV_NONE) {
2196 break;
2197 }
2198 if (drive_in == parse->drive ||
2199 drive_in == FDRIVE_DRV_NONE) {
2200 size = (parse->max_head + 1) * parse->max_track *
2201 parse->last_sect;
2202 if (nb_sectors == size) {
2203 match = i;
2204 break;
2205 }
2206 if (first_match == -1) {
2207 first_match = i;
2208 }
2209 }
2210 }
2211 if (match == -1) {
2212 if (first_match == -1) {
2213 match = 1;
2214 } else {
2215 match = first_match;
2216 }
2217 parse = &fd_formats[match];
2218 }
2219 *nb_heads = parse->max_head + 1;
2220 *max_track = parse->max_track;
2221 *last_sect = parse->last_sect;
2222 *drive = parse->drive;
2223 *rate = parse->rate;
2224 }
2225 }
2226
2227 int bdrv_get_translation_hint(BlockDriverState *bs)
2228 {
2229 return bs->translation;
2230 }
2231
2232 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
2233 BlockErrorAction on_write_error)
2234 {
2235 bs->on_read_error = on_read_error;
2236 bs->on_write_error = on_write_error;
2237 }
2238
2239 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
2240 {
2241 return is_read ? bs->on_read_error : bs->on_write_error;
2242 }
2243
2244 int bdrv_is_read_only(BlockDriverState *bs)
2245 {
2246 return bs->read_only;
2247 }
2248
2249 int bdrv_is_sg(BlockDriverState *bs)
2250 {
2251 return bs->sg;
2252 }
2253
2254 int bdrv_enable_write_cache(BlockDriverState *bs)
2255 {
2256 return bs->enable_write_cache;
2257 }
2258
2259 int bdrv_is_encrypted(BlockDriverState *bs)
2260 {
2261 if (bs->backing_hd && bs->backing_hd->encrypted)
2262 return 1;
2263 return bs->encrypted;
2264 }
2265
2266 int bdrv_key_required(BlockDriverState *bs)
2267 {
2268 BlockDriverState *backing_hd = bs->backing_hd;
2269
2270 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2271 return 1;
2272 return (bs->encrypted && !bs->valid_key);
2273 }
2274
2275 int bdrv_set_key(BlockDriverState *bs, const char *key)
2276 {
2277 int ret;
2278 if (bs->backing_hd && bs->backing_hd->encrypted) {
2279 ret = bdrv_set_key(bs->backing_hd, key);
2280 if (ret < 0)
2281 return ret;
2282 if (!bs->encrypted)
2283 return 0;
2284 }
2285 if (!bs->encrypted) {
2286 return -EINVAL;
2287 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2288 return -ENOMEDIUM;
2289 }
2290 ret = bs->drv->bdrv_set_key(bs, key);
2291 if (ret < 0) {
2292 bs->valid_key = 0;
2293 } else if (!bs->valid_key) {
2294 bs->valid_key = 1;
2295 /* call the change callback now, we skipped it on open */
2296 bdrv_dev_change_media_cb(bs, true);
2297 }
2298 return ret;
2299 }
2300
2301 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2302 {
2303 if (!bs->drv) {
2304 buf[0] = '\0';
2305 } else {
2306 pstrcpy(buf, buf_size, bs->drv->format_name);
2307 }
2308 }
2309
2310 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2311 void *opaque)
2312 {
2313 BlockDriver *drv;
2314
2315 QLIST_FOREACH(drv, &bdrv_drivers, list) {
2316 it(opaque, drv->format_name);
2317 }
2318 }
2319
2320 BlockDriverState *bdrv_find(const char *name)
2321 {
2322 BlockDriverState *bs;
2323
2324 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2325 if (!strcmp(name, bs->device_name)) {
2326 return bs;
2327 }
2328 }
2329 return NULL;
2330 }
2331
2332 BlockDriverState *bdrv_next(BlockDriverState *bs)
2333 {
2334 if (!bs) {
2335 return QTAILQ_FIRST(&bdrv_states);
2336 }
2337 return QTAILQ_NEXT(bs, list);
2338 }
2339
2340 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
2341 {
2342 BlockDriverState *bs;
2343
2344 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2345 it(opaque, bs);
2346 }
2347 }
2348
2349 const char *bdrv_get_device_name(BlockDriverState *bs)
2350 {
2351 return bs->device_name;
2352 }
2353
2354 void bdrv_flush_all(void)
2355 {
2356 BlockDriverState *bs;
2357
2358 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2359 bdrv_flush(bs);
2360 }
2361 }
2362
2363 int bdrv_has_zero_init(BlockDriverState *bs)
2364 {
2365 assert(bs->drv);
2366
2367 if (bs->drv->bdrv_has_zero_init) {
2368 return bs->drv->bdrv_has_zero_init(bs);
2369 }
2370
2371 return 1;
2372 }
2373
2374 typedef struct BdrvCoIsAllocatedData {
2375 BlockDriverState *bs;
2376 int64_t sector_num;
2377 int nb_sectors;
2378 int *pnum;
2379 int ret;
2380 bool done;
2381 } BdrvCoIsAllocatedData;
2382
2383 /*
2384 * Returns true iff the specified sector is present in the disk image. Drivers
2385 * not implementing the functionality are assumed to not support backing files,
2386 * hence all their sectors are reported as allocated.
2387 *
2388 * If 'sector_num' is beyond the end of the disk image the return value is 0
2389 * and 'pnum' is set to 0.
2390 *
2391 * 'pnum' is set to the number of sectors (including and immediately following
2392 * the specified sector) that are known to be in the same
2393 * allocated/unallocated state.
2394 *
2395 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2396 * beyond the end of the disk image it will be clamped.
2397 */
2398 int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2399 int nb_sectors, int *pnum)
2400 {
2401 int64_t n;
2402
2403 if (sector_num >= bs->total_sectors) {
2404 *pnum = 0;
2405 return 0;
2406 }
2407
2408 n = bs->total_sectors - sector_num;
2409 if (n < nb_sectors) {
2410 nb_sectors = n;
2411 }
2412
2413 if (!bs->drv->bdrv_co_is_allocated) {
2414 *pnum = nb_sectors;
2415 return 1;
2416 }
2417
2418 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2419 }
2420
2421 /* Coroutine wrapper for bdrv_is_allocated() */
2422 static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2423 {
2424 BdrvCoIsAllocatedData *data = opaque;
2425 BlockDriverState *bs = data->bs;
2426
2427 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2428 data->pnum);
2429 data->done = true;
2430 }
2431
2432 /*
2433 * Synchronous wrapper around bdrv_co_is_allocated().
2434 *
2435 * See bdrv_co_is_allocated() for details.
2436 */
2437 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2438 int *pnum)
2439 {
2440 Coroutine *co;
2441 BdrvCoIsAllocatedData data = {
2442 .bs = bs,
2443 .sector_num = sector_num,
2444 .nb_sectors = nb_sectors,
2445 .pnum = pnum,
2446 .done = false,
2447 };
2448
2449 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2450 qemu_coroutine_enter(co, &data);
2451 while (!data.done) {
2452 qemu_aio_wait();
2453 }
2454 return data.ret;
2455 }
2456
2457 BlockInfoList *qmp_query_block(Error **errp)
2458 {
2459 BlockInfoList *head = NULL, *cur_item = NULL;
2460 BlockDriverState *bs;
2461
2462 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2463 BlockInfoList *info = g_malloc0(sizeof(*info));
2464
2465 info->value = g_malloc0(sizeof(*info->value));
2466 info->value->device = g_strdup(bs->device_name);
2467 info->value->type = g_strdup("unknown");
2468 info->value->locked = bdrv_dev_is_medium_locked(bs);
2469 info->value->removable = bdrv_dev_has_removable_media(bs);
2470
2471 if (bdrv_dev_has_removable_media(bs)) {
2472 info->value->has_tray_open = true;
2473 info->value->tray_open = bdrv_dev_is_tray_open(bs);
2474 }
2475
2476 if (bdrv_iostatus_is_enabled(bs)) {
2477 info->value->has_io_status = true;
2478 info->value->io_status = bs->iostatus;
2479 }
2480
2481 if (bs->drv) {
2482 info->value->has_inserted = true;
2483 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2484 info->value->inserted->file = g_strdup(bs->filename);
2485 info->value->inserted->ro = bs->read_only;
2486 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2487 info->value->inserted->encrypted = bs->encrypted;
2488 if (bs->backing_file[0]) {
2489 info->value->inserted->has_backing_file = true;
2490 info->value->inserted->backing_file = g_strdup(bs->backing_file);
2491 }
2492
2493 if (bs->io_limits_enabled) {
2494 info->value->inserted->bps =
2495 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2496 info->value->inserted->bps_rd =
2497 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2498 info->value->inserted->bps_wr =
2499 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2500 info->value->inserted->iops =
2501 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2502 info->value->inserted->iops_rd =
2503 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2504 info->value->inserted->iops_wr =
2505 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2506 }
2507 }
2508
2509 /* XXX: waiting for the qapi to support GSList */
2510 if (!cur_item) {
2511 head = cur_item = info;
2512 } else {
2513 cur_item->next = info;
2514 cur_item = info;
2515 }
2516 }
2517
2518 return head;
2519 }
2520
2521 /* Consider exposing this as a full fledged QMP command */
2522 static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
2523 {
2524 BlockStats *s;
2525
2526 s = g_malloc0(sizeof(*s));
2527
2528 if (bs->device_name[0]) {
2529 s->has_device = true;
2530 s->device = g_strdup(bs->device_name);
2531 }
2532
2533 s->stats = g_malloc0(sizeof(*s->stats));
2534 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2535 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2536 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2537 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2538 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2539 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2540 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2541 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2542 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2543
2544 if (bs->file) {
2545 s->has_parent = true;
2546 s->parent = qmp_query_blockstat(bs->file, NULL);
2547 }
2548
2549 return s;
2550 }
2551
2552 BlockStatsList *qmp_query_blockstats(Error **errp)
2553 {
2554 BlockStatsList *head = NULL, *cur_item = NULL;
2555 BlockDriverState *bs;
2556
2557 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2558 BlockStatsList *info = g_malloc0(sizeof(*info));
2559 info->value = qmp_query_blockstat(bs, NULL);
2560
2561 /* XXX: waiting for the qapi to support GSList */
2562 if (!cur_item) {
2563 head = cur_item = info;
2564 } else {
2565 cur_item->next = info;
2566 cur_item = info;
2567 }
2568 }
2569
2570 return head;
2571 }
2572
2573 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2574 {
2575 if (bs->backing_hd && bs->backing_hd->encrypted)
2576 return bs->backing_file;
2577 else if (bs->encrypted)
2578 return bs->filename;
2579 else
2580 return NULL;
2581 }
2582
2583 void bdrv_get_backing_filename(BlockDriverState *bs,
2584 char *filename, int filename_size)
2585 {
2586 pstrcpy(filename, filename_size, bs->backing_file);
2587 }
2588
2589 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
2590 const uint8_t *buf, int nb_sectors)
2591 {
2592 BlockDriver *drv = bs->drv;
2593 if (!drv)
2594 return -ENOMEDIUM;
2595 if (!drv->bdrv_write_compressed)
2596 return -ENOTSUP;
2597 if (bdrv_check_request(bs, sector_num, nb_sectors))
2598 return -EIO;
2599
2600 if (bs->dirty_bitmap) {
2601 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2602 }
2603
2604 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2605 }
2606
2607 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2608 {
2609 BlockDriver *drv = bs->drv;
2610 if (!drv)
2611 return -ENOMEDIUM;
2612 if (!drv->bdrv_get_info)
2613 return -ENOTSUP;
2614 memset(bdi, 0, sizeof(*bdi));
2615 return drv->bdrv_get_info(bs, bdi);
2616 }
2617
2618 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2619 int64_t pos, int size)
2620 {
2621 BlockDriver *drv = bs->drv;
2622 if (!drv)
2623 return -ENOMEDIUM;
2624 if (drv->bdrv_save_vmstate)
2625 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2626 if (bs->file)
2627 return bdrv_save_vmstate(bs->file, buf, pos, size);
2628 return -ENOTSUP;
2629 }
2630
2631 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2632 int64_t pos, int size)
2633 {
2634 BlockDriver *drv = bs->drv;
2635 if (!drv)
2636 return -ENOMEDIUM;
2637 if (drv->bdrv_load_vmstate)
2638 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2639 if (bs->file)
2640 return bdrv_load_vmstate(bs->file, buf, pos, size);
2641 return -ENOTSUP;
2642 }
2643
2644 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2645 {
2646 BlockDriver *drv = bs->drv;
2647
2648 if (!drv || !drv->bdrv_debug_event) {
2649 return;
2650 }
2651
2652 return drv->bdrv_debug_event(bs, event);
2653
2654 }
2655
2656 /**************************************************************/
2657 /* handling of snapshots */
2658
2659 int bdrv_can_snapshot(BlockDriverState *bs)
2660 {
2661 BlockDriver *drv = bs->drv;
2662 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
2663 return 0;
2664 }
2665
2666 if (!drv->bdrv_snapshot_create) {
2667 if (bs->file != NULL) {
2668 return bdrv_can_snapshot(bs->file);
2669 }
2670 return 0;
2671 }
2672
2673 return 1;
2674 }
2675
2676 int bdrv_is_snapshot(BlockDriverState *bs)
2677 {
2678 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2679 }
2680
2681 BlockDriverState *bdrv_snapshots(void)
2682 {
2683 BlockDriverState *bs;
2684
2685 if (bs_snapshots) {
2686 return bs_snapshots;
2687 }
2688
2689 bs = NULL;
2690 while ((bs = bdrv_next(bs))) {
2691 if (bdrv_can_snapshot(bs)) {
2692 bs_snapshots = bs;
2693 return bs;
2694 }
2695 }
2696 return NULL;
2697 }
2698
2699 int bdrv_snapshot_create(BlockDriverState *bs,
2700 QEMUSnapshotInfo *sn_info)
2701 {
2702 BlockDriver *drv = bs->drv;
2703 if (!drv)
2704 return -ENOMEDIUM;
2705 if (drv->bdrv_snapshot_create)
2706 return drv->bdrv_snapshot_create(bs, sn_info);
2707 if (bs->file)
2708 return bdrv_snapshot_create(bs->file, sn_info);
2709 return -ENOTSUP;
2710 }
2711
2712 int bdrv_snapshot_goto(BlockDriverState *bs,
2713 const char *snapshot_id)
2714 {
2715 BlockDriver *drv = bs->drv;
2716 int ret, open_ret;
2717
2718 if (!drv)
2719 return -ENOMEDIUM;
2720 if (drv->bdrv_snapshot_goto)
2721 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2722
2723 if (bs->file) {
2724 drv->bdrv_close(bs);
2725 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2726 open_ret = drv->bdrv_open(bs, bs->open_flags);
2727 if (open_ret < 0) {
2728 bdrv_delete(bs->file);
2729 bs->drv = NULL;
2730 return open_ret;
2731 }
2732 return ret;
2733 }
2734
2735 return -ENOTSUP;
2736 }
2737
2738 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2739 {
2740 BlockDriver *drv = bs->drv;
2741 if (!drv)
2742 return -ENOMEDIUM;
2743 if (drv->bdrv_snapshot_delete)
2744 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2745 if (bs->file)
2746 return bdrv_snapshot_delete(bs->file, snapshot_id);
2747 return -ENOTSUP;
2748 }
2749
2750 int bdrv_snapshot_list(BlockDriverState *bs,
2751 QEMUSnapshotInfo **psn_info)
2752 {
2753 BlockDriver *drv = bs->drv;
2754 if (!drv)
2755 return -ENOMEDIUM;
2756 if (drv->bdrv_snapshot_list)
2757 return drv->bdrv_snapshot_list(bs, psn_info);
2758 if (bs->file)
2759 return bdrv_snapshot_list(bs->file, psn_info);
2760 return -ENOTSUP;
2761 }
2762
2763 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2764 const char *snapshot_name)
2765 {
2766 BlockDriver *drv = bs->drv;
2767 if (!drv) {
2768 return -ENOMEDIUM;
2769 }
2770 if (!bs->read_only) {
2771 return -EINVAL;
2772 }
2773 if (drv->bdrv_snapshot_load_tmp) {
2774 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2775 }
2776 return -ENOTSUP;
2777 }
2778
2779 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2780 const char *backing_file)
2781 {
2782 if (!bs->drv) {
2783 return NULL;
2784 }
2785
2786 if (bs->backing_hd) {
2787 if (strcmp(bs->backing_file, backing_file) == 0) {
2788 return bs->backing_hd;
2789 } else {
2790 return bdrv_find_backing_image(bs->backing_hd, backing_file);
2791 }
2792 }
2793
2794 return NULL;
2795 }
2796
2797 #define NB_SUFFIXES 4
2798
2799 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2800 {
2801 static const char suffixes[NB_SUFFIXES] = "KMGT";
2802 int64_t base;
2803 int i;
2804
2805 if (size <= 999) {
2806 snprintf(buf, buf_size, "%" PRId64, size);
2807 } else {
2808 base = 1024;
2809 for(i = 0; i < NB_SUFFIXES; i++) {
2810 if (size < (10 * base)) {
2811 snprintf(buf, buf_size, "%0.1f%c",
2812 (double)size / base,
2813 suffixes[i]);
2814 break;
2815 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2816 snprintf(buf, buf_size, "%" PRId64 "%c",
2817 ((size + (base >> 1)) / base),
2818 suffixes[i]);
2819 break;
2820 }
2821 base = base * 1024;
2822 }
2823 }
2824 return buf;
2825 }
2826
2827 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2828 {
2829 char buf1[128], date_buf[128], clock_buf[128];
2830 #ifdef _WIN32
2831 struct tm *ptm;
2832 #else
2833 struct tm tm;
2834 #endif
2835 time_t ti;
2836 int64_t secs;
2837
2838 if (!sn) {
2839 snprintf(buf, buf_size,
2840 "%-10s%-20s%7s%20s%15s",
2841 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2842 } else {
2843 ti = sn->date_sec;
2844 #ifdef _WIN32
2845 ptm = localtime(&ti);
2846 strftime(date_buf, sizeof(date_buf),
2847 "%Y-%m-%d %H:%M:%S", ptm);
2848 #else
2849 localtime_r(&ti, &tm);
2850 strftime(date_buf, sizeof(date_buf),
2851 "%Y-%m-%d %H:%M:%S", &tm);
2852 #endif
2853 secs = sn->vm_clock_nsec / 1000000000;
2854 snprintf(clock_buf, sizeof(clock_buf),
2855 "%02d:%02d:%02d.%03d",
2856 (int)(secs / 3600),
2857 (int)((secs / 60) % 60),
2858 (int)(secs % 60),
2859 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2860 snprintf(buf, buf_size,
2861 "%-10s%-20s%7s%20s%15s",
2862 sn->id_str, sn->name,
2863 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2864 date_buf,
2865 clock_buf);
2866 }
2867 return buf;
2868 }
2869
2870 /**************************************************************/
2871 /* async I/Os */
2872
2873 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2874 QEMUIOVector *qiov, int nb_sectors,
2875 BlockDriverCompletionFunc *cb, void *opaque)
2876 {
2877 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2878
2879 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2880 cb, opaque, false);
2881 }
2882
2883 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2884 QEMUIOVector *qiov, int nb_sectors,
2885 BlockDriverCompletionFunc *cb, void *opaque)
2886 {
2887 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2888
2889 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2890 cb, opaque, true);
2891 }
2892
2893
2894 typedef struct MultiwriteCB {
2895 int error;
2896 int num_requests;
2897 int num_callbacks;
2898 struct {
2899 BlockDriverCompletionFunc *cb;
2900 void *opaque;
2901 QEMUIOVector *free_qiov;
2902 } callbacks[];
2903 } MultiwriteCB;
2904
2905 static void multiwrite_user_cb(MultiwriteCB *mcb)
2906 {
2907 int i;
2908
2909 for (i = 0; i < mcb->num_callbacks; i++) {
2910 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2911 if (mcb->callbacks[i].free_qiov) {
2912 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2913 }
2914 g_free(mcb->callbacks[i].free_qiov);
2915 }
2916 }
2917
2918 static void multiwrite_cb(void *opaque, int ret)
2919 {
2920 MultiwriteCB *mcb = opaque;
2921
2922 trace_multiwrite_cb(mcb, ret);
2923
2924 if (ret < 0 && !mcb->error) {
2925 mcb->error = ret;
2926 }
2927
2928 mcb->num_requests--;
2929 if (mcb->num_requests == 0) {
2930 multiwrite_user_cb(mcb);
2931 g_free(mcb);
2932 }
2933 }
2934
2935 static int multiwrite_req_compare(const void *a, const void *b)
2936 {
2937 const BlockRequest *req1 = a, *req2 = b;
2938
2939 /*
2940 * Note that we can't simply subtract req2->sector from req1->sector
2941 * here as that could overflow the return value.
2942 */
2943 if (req1->sector > req2->sector) {
2944 return 1;
2945 } else if (req1->sector < req2->sector) {
2946 return -1;
2947 } else {
2948 return 0;
2949 }
2950 }
2951
2952 /*
2953 * Takes a bunch of requests and tries to merge them. Returns the number of
2954 * requests that remain after merging.
2955 */
2956 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2957 int num_reqs, MultiwriteCB *mcb)
2958 {
2959 int i, outidx;
2960
2961 // Sort requests by start sector
2962 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2963
2964 // Check if adjacent requests touch the same clusters. If so, combine them,
2965 // filling up gaps with zero sectors.
2966 outidx = 0;
2967 for (i = 1; i < num_reqs; i++) {
2968 int merge = 0;
2969 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2970
2971 // Handle exactly sequential writes and overlapping writes.
2972 if (reqs[i].sector <= oldreq_last) {
2973 merge = 1;
2974 }
2975
2976 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2977 merge = 0;
2978 }
2979
2980 if (merge) {
2981 size_t size;
2982 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2983 qemu_iovec_init(qiov,
2984 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2985
2986 // Add the first request to the merged one. If the requests are
2987 // overlapping, drop the last sectors of the first request.
2988 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2989 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2990
2991 // We should need to add any zeros between the two requests
2992 assert (reqs[i].sector <= oldreq_last);
2993
2994 // Add the second request
2995 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2996
2997 reqs[outidx].nb_sectors = qiov->size >> 9;
2998 reqs[outidx].qiov = qiov;
2999
3000 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
3001 } else {
3002 outidx++;
3003 reqs[outidx].sector = reqs[i].sector;
3004 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
3005 reqs[outidx].qiov = reqs[i].qiov;
3006 }
3007 }
3008
3009 return outidx + 1;
3010 }
3011
3012 /*
3013 * Submit multiple AIO write requests at once.
3014 *
3015 * On success, the function returns 0 and all requests in the reqs array have
3016 * been submitted. In error case this function returns -1, and any of the
3017 * requests may or may not be submitted yet. In particular, this means that the
3018 * callback will be called for some of the requests, for others it won't. The
3019 * caller must check the error field of the BlockRequest to wait for the right
3020 * callbacks (if error != 0, no callback will be called).
3021 *
3022 * The implementation may modify the contents of the reqs array, e.g. to merge
3023 * requests. However, the fields opaque and error are left unmodified as they
3024 * are used to signal failure for a single request to the caller.
3025 */
3026 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
3027 {
3028 MultiwriteCB *mcb;
3029 int i;
3030
3031 /* don't submit writes if we don't have a medium */
3032 if (bs->drv == NULL) {
3033 for (i = 0; i < num_reqs; i++) {
3034 reqs[i].error = -ENOMEDIUM;
3035 }
3036 return -1;
3037 }
3038
3039 if (num_reqs == 0) {
3040 return 0;
3041 }
3042
3043 // Create MultiwriteCB structure
3044 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
3045 mcb->num_requests = 0;
3046 mcb->num_callbacks = num_reqs;
3047
3048 for (i = 0; i < num_reqs; i++) {
3049 mcb->callbacks[i].cb = reqs[i].cb;
3050 mcb->callbacks[i].opaque = reqs[i].opaque;
3051 }
3052
3053 // Check for mergable requests
3054 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
3055
3056 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
3057
3058 /* Run the aio requests. */
3059 mcb->num_requests = num_reqs;
3060 for (i = 0; i < num_reqs; i++) {
3061 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
3062 reqs[i].nb_sectors, multiwrite_cb, mcb);
3063 }
3064
3065 return 0;
3066 }
3067
3068 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
3069 {
3070 acb->pool->cancel(acb);
3071 }
3072
3073 /* block I/O throttling */
3074 static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
3075 bool is_write, double elapsed_time, uint64_t *wait)
3076 {
3077 uint64_t bps_limit = 0;
3078 double bytes_limit, bytes_base, bytes_res;
3079 double slice_time, wait_time;
3080
3081 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3082 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
3083 } else if (bs->io_limits.bps[is_write]) {
3084 bps_limit = bs->io_limits.bps[is_write];
3085 } else {
3086 if (wait) {
3087 *wait = 0;
3088 }
3089
3090 return false;
3091 }
3092
3093 slice_time = bs->slice_end - bs->slice_start;
3094 slice_time /= (NANOSECONDS_PER_SECOND);
3095 bytes_limit = bps_limit * slice_time;
3096 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
3097 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3098 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
3099 }
3100
3101 /* bytes_base: the bytes of data which have been read/written; and
3102 * it is obtained from the history statistic info.
3103 * bytes_res: the remaining bytes of data which need to be read/written.
3104 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3105 * the total time for completing reading/writting all data.
3106 */
3107 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
3108
3109 if (bytes_base + bytes_res <= bytes_limit) {
3110 if (wait) {
3111 *wait = 0;
3112 }
3113
3114 return false;
3115 }
3116
3117 /* Calc approx time to dispatch */
3118 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
3119
3120 /* When the I/O rate at runtime exceeds the limits,
3121 * bs->slice_end need to be extended in order that the current statistic
3122 * info can be kept until the timer fire, so it is increased and tuned
3123 * based on the result of experiment.
3124 */
3125 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3126 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3127 if (wait) {
3128 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3129 }
3130
3131 return true;
3132 }
3133
3134 static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
3135 double elapsed_time, uint64_t *wait)
3136 {
3137 uint64_t iops_limit = 0;
3138 double ios_limit, ios_base;
3139 double slice_time, wait_time;
3140
3141 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3142 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
3143 } else if (bs->io_limits.iops[is_write]) {
3144 iops_limit = bs->io_limits.iops[is_write];
3145 } else {
3146 if (wait) {
3147 *wait = 0;
3148 }
3149
3150 return false;
3151 }
3152
3153 slice_time = bs->slice_end - bs->slice_start;
3154 slice_time /= (NANOSECONDS_PER_SECOND);
3155 ios_limit = iops_limit * slice_time;
3156 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
3157 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3158 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
3159 }
3160
3161 if (ios_base + 1 <= ios_limit) {
3162 if (wait) {
3163 *wait = 0;
3164 }
3165
3166 return false;
3167 }
3168
3169 /* Calc approx time to dispatch */
3170 wait_time = (ios_base + 1) / iops_limit;
3171 if (wait_time > elapsed_time) {
3172 wait_time = wait_time - elapsed_time;
3173 } else {
3174 wait_time = 0;
3175 }
3176
3177 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3178 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3179 if (wait) {
3180 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3181 }
3182
3183 return true;
3184 }
3185
3186 static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3187 bool is_write, int64_t *wait)
3188 {
3189 int64_t now, max_wait;
3190 uint64_t bps_wait = 0, iops_wait = 0;
3191 double elapsed_time;
3192 int bps_ret, iops_ret;
3193
3194 now = qemu_get_clock_ns(vm_clock);
3195 if ((bs->slice_start < now)
3196 && (bs->slice_end > now)) {
3197 bs->slice_end = now + bs->slice_time;
3198 } else {
3199 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3200 bs->slice_start = now;
3201 bs->slice_end = now + bs->slice_time;
3202
3203 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3204 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3205
3206 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3207 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3208 }
3209
3210 elapsed_time = now - bs->slice_start;
3211 elapsed_time /= (NANOSECONDS_PER_SECOND);
3212
3213 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3214 is_write, elapsed_time, &bps_wait);
3215 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3216 elapsed_time, &iops_wait);
3217 if (bps_ret || iops_ret) {
3218 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3219 if (wait) {
3220 *wait = max_wait;
3221 }
3222
3223 now = qemu_get_clock_ns(vm_clock);
3224 if (bs->slice_end < now + max_wait) {
3225 bs->slice_end = now + max_wait;
3226 }
3227
3228 return true;
3229 }
3230
3231 if (wait) {
3232 *wait = 0;
3233 }
3234
3235 return false;
3236 }
3237
3238 /**************************************************************/
3239 /* async block device emulation */
3240
3241 typedef struct BlockDriverAIOCBSync {
3242 BlockDriverAIOCB common;
3243 QEMUBH *bh;
3244 int ret;
3245 /* vector translation state */
3246 QEMUIOVector *qiov;
3247 uint8_t *bounce;
3248 int is_write;
3249 } BlockDriverAIOCBSync;
3250
3251 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3252 {
3253 BlockDriverAIOCBSync *acb =
3254 container_of(blockacb, BlockDriverAIOCBSync, common);
3255 qemu_bh_delete(acb->bh);
3256 acb->bh = NULL;
3257 qemu_aio_release(acb);
3258 }
3259
3260 static AIOPool bdrv_em_aio_pool = {
3261 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3262 .cancel = bdrv_aio_cancel_em,
3263 };
3264
3265 static void bdrv_aio_bh_cb(void *opaque)
3266 {
3267 BlockDriverAIOCBSync *acb = opaque;
3268
3269 if (!acb->is_write)
3270 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
3271 qemu_vfree(acb->bounce);
3272 acb->common.cb(acb->common.opaque, acb->ret);
3273 qemu_bh_delete(acb->bh);
3274 acb->bh = NULL;
3275 qemu_aio_release(acb);
3276 }
3277
3278 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3279 int64_t sector_num,
3280 QEMUIOVector *qiov,
3281 int nb_sectors,
3282 BlockDriverCompletionFunc *cb,
3283 void *opaque,
3284 int is_write)
3285
3286 {
3287 BlockDriverAIOCBSync *acb;
3288
3289 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
3290 acb->is_write = is_write;
3291 acb->qiov = qiov;
3292 acb->bounce = qemu_blockalign(bs, qiov->size);
3293 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
3294
3295 if (is_write) {
3296 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
3297 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
3298 } else {
3299 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
3300 }
3301
3302 qemu_bh_schedule(acb->bh);
3303
3304 return &acb->common;
3305 }
3306
3307 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3308 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3309 BlockDriverCompletionFunc *cb, void *opaque)
3310 {
3311 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
3312 }
3313
3314 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3315 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3316 BlockDriverCompletionFunc *cb, void *opaque)
3317 {
3318 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3319 }
3320
3321
3322 typedef struct BlockDriverAIOCBCoroutine {
3323 BlockDriverAIOCB common;
3324 BlockRequest req;
3325 bool is_write;
3326 QEMUBH* bh;
3327 } BlockDriverAIOCBCoroutine;
3328
3329 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3330 {
3331 qemu_aio_flush();
3332 }
3333
3334 static AIOPool bdrv_em_co_aio_pool = {
3335 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3336 .cancel = bdrv_aio_co_cancel_em,
3337 };
3338
3339 static void bdrv_co_em_bh(void *opaque)
3340 {
3341 BlockDriverAIOCBCoroutine *acb = opaque;
3342
3343 acb->common.cb(acb->common.opaque, acb->req.error);
3344 qemu_bh_delete(acb->bh);
3345 qemu_aio_release(acb);
3346 }
3347
3348 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3349 static void coroutine_fn bdrv_co_do_rw(void *opaque)
3350 {
3351 BlockDriverAIOCBCoroutine *acb = opaque;
3352 BlockDriverState *bs = acb->common.bs;
3353
3354 if (!acb->is_write) {
3355 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
3356 acb->req.nb_sectors, acb->req.qiov, 0);
3357 } else {
3358 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
3359 acb->req.nb_sectors, acb->req.qiov, 0);
3360 }
3361
3362 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3363 qemu_bh_schedule(acb->bh);
3364 }
3365
3366 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3367 int64_t sector_num,
3368 QEMUIOVector *qiov,
3369 int nb_sectors,
3370 BlockDriverCompletionFunc *cb,
3371 void *opaque,
3372 bool is_write)
3373 {
3374 Coroutine *co;
3375 BlockDriverAIOCBCoroutine *acb;
3376
3377 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3378 acb->req.sector = sector_num;
3379 acb->req.nb_sectors = nb_sectors;
3380 acb->req.qiov = qiov;
3381 acb->is_write = is_write;
3382
3383 co = qemu_coroutine_create(bdrv_co_do_rw);
3384 qemu_coroutine_enter(co, acb);
3385
3386 return &acb->common;
3387 }
3388
3389 static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
3390 {
3391 BlockDriverAIOCBCoroutine *acb = opaque;
3392 BlockDriverState *bs = acb->common.bs;
3393
3394 acb->req.error = bdrv_co_flush(bs);
3395 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3396 qemu_bh_schedule(acb->bh);
3397 }
3398
3399 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
3400 BlockDriverCompletionFunc *cb, void *opaque)
3401 {
3402 trace_bdrv_aio_flush(bs, opaque);
3403
3404 Coroutine *co;
3405 BlockDriverAIOCBCoroutine *acb;
3406
3407 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3408 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3409 qemu_coroutine_enter(co, acb);
3410
3411 return &acb->common;
3412 }
3413
3414 static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3415 {
3416 BlockDriverAIOCBCoroutine *acb = opaque;
3417 BlockDriverState *bs = acb->common.bs;
3418
3419 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3420 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3421 qemu_bh_schedule(acb->bh);
3422 }
3423
3424 BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3425 int64_t sector_num, int nb_sectors,
3426 BlockDriverCompletionFunc *cb, void *opaque)
3427 {
3428 Coroutine *co;
3429 BlockDriverAIOCBCoroutine *acb;
3430
3431 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3432
3433 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3434 acb->req.sector = sector_num;
3435 acb->req.nb_sectors = nb_sectors;
3436 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3437 qemu_coroutine_enter(co, acb);
3438
3439 return &acb->common;
3440 }
3441
3442 void bdrv_init(void)
3443 {
3444 module_call_init(MODULE_INIT_BLOCK);
3445 }
3446
3447 void bdrv_init_with_whitelist(void)
3448 {
3449 use_bdrv_whitelist = 1;
3450 bdrv_init();
3451 }
3452
3453 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3454 BlockDriverCompletionFunc *cb, void *opaque)
3455 {
3456 BlockDriverAIOCB *acb;
3457
3458 if (pool->free_aiocb) {
3459 acb = pool->free_aiocb;
3460 pool->free_aiocb = acb->next;
3461 } else {
3462 acb = g_malloc0(pool->aiocb_size);
3463 acb->pool = pool;
3464 }
3465 acb->bs = bs;
3466 acb->cb = cb;
3467 acb->opaque = opaque;
3468 return acb;
3469 }
3470
3471 void qemu_aio_release(void *p)
3472 {
3473 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3474 AIOPool *pool = acb->pool;
3475 acb->next = pool->free_aiocb;
3476 pool->free_aiocb = acb;
3477 }
3478
3479 /**************************************************************/
3480 /* Coroutine block device emulation */
3481
3482 typedef struct CoroutineIOCompletion {
3483 Coroutine *coroutine;
3484 int ret;
3485 } CoroutineIOCompletion;
3486
3487 static void bdrv_co_io_em_complete(void *opaque, int ret)
3488 {
3489 CoroutineIOCompletion *co = opaque;
3490
3491 co->ret = ret;
3492 qemu_coroutine_enter(co->coroutine, NULL);
3493 }
3494
3495 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3496 int nb_sectors, QEMUIOVector *iov,
3497 bool is_write)
3498 {
3499 CoroutineIOCompletion co = {
3500 .coroutine = qemu_coroutine_self(),
3501 };
3502 BlockDriverAIOCB *acb;
3503
3504 if (is_write) {
3505 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3506 bdrv_co_io_em_complete, &co);
3507 } else {
3508 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3509 bdrv_co_io_em_complete, &co);
3510 }
3511
3512 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
3513 if (!acb) {
3514 return -EIO;
3515 }
3516 qemu_coroutine_yield();
3517
3518 return co.ret;
3519 }
3520
3521 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3522 int64_t sector_num, int nb_sectors,
3523 QEMUIOVector *iov)
3524 {
3525 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3526 }
3527
3528 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3529 int64_t sector_num, int nb_sectors,
3530 QEMUIOVector *iov)
3531 {
3532 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3533 }
3534
3535 static void coroutine_fn bdrv_flush_co_entry(void *opaque)
3536 {
3537 RwCo *rwco = opaque;
3538
3539 rwco->ret = bdrv_co_flush(rwco->bs);
3540 }
3541
3542 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3543 {
3544 int ret;
3545
3546 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
3547 return 0;
3548 }
3549
3550 /* Write back cached data to the OS even with cache=unsafe */
3551 if (bs->drv->bdrv_co_flush_to_os) {
3552 ret = bs->drv->bdrv_co_flush_to_os(bs);
3553 if (ret < 0) {
3554 return ret;
3555 }
3556 }
3557
3558 /* But don't actually force it to the disk with cache=unsafe */
3559 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3560 return 0;
3561 }
3562
3563 if (bs->drv->bdrv_co_flush_to_disk) {
3564 ret = bs->drv->bdrv_co_flush_to_disk(bs);
3565 } else if (bs->drv->bdrv_aio_flush) {
3566 BlockDriverAIOCB *acb;
3567 CoroutineIOCompletion co = {
3568 .coroutine = qemu_coroutine_self(),
3569 };
3570
3571 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3572 if (acb == NULL) {
3573 ret = -EIO;
3574 } else {
3575 qemu_coroutine_yield();
3576 ret = co.ret;
3577 }
3578 } else {
3579 /*
3580 * Some block drivers always operate in either writethrough or unsafe
3581 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3582 * know how the server works (because the behaviour is hardcoded or
3583 * depends on server-side configuration), so we can't ensure that
3584 * everything is safe on disk. Returning an error doesn't work because
3585 * that would break guests even if the server operates in writethrough
3586 * mode.
3587 *
3588 * Let's hope the user knows what he's doing.
3589 */
3590 ret = 0;
3591 }
3592 if (ret < 0) {
3593 return ret;
3594 }
3595
3596 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3597 * in the case of cache=unsafe, so there are no useless flushes.
3598 */
3599 return bdrv_co_flush(bs->file);
3600 }
3601
3602 void bdrv_invalidate_cache(BlockDriverState *bs)
3603 {
3604 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3605 bs->drv->bdrv_invalidate_cache(bs);
3606 }
3607 }
3608
3609 void bdrv_invalidate_cache_all(void)
3610 {
3611 BlockDriverState *bs;
3612
3613 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3614 bdrv_invalidate_cache(bs);
3615 }
3616 }
3617
3618 int bdrv_flush(BlockDriverState *bs)
3619 {
3620 Coroutine *co;
3621 RwCo rwco = {
3622 .bs = bs,
3623 .ret = NOT_DONE,
3624 };
3625
3626 if (qemu_in_coroutine()) {
3627 /* Fast-path if already in coroutine context */
3628 bdrv_flush_co_entry(&rwco);
3629 } else {
3630 co = qemu_coroutine_create(bdrv_flush_co_entry);
3631 qemu_coroutine_enter(co, &rwco);
3632 while (rwco.ret == NOT_DONE) {
3633 qemu_aio_wait();
3634 }
3635 }
3636
3637 return rwco.ret;
3638 }
3639
3640 static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3641 {
3642 RwCo *rwco = opaque;
3643
3644 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3645 }
3646
3647 int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3648 int nb_sectors)
3649 {
3650 if (!bs->drv) {
3651 return -ENOMEDIUM;
3652 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3653 return -EIO;
3654 } else if (bs->read_only) {
3655 return -EROFS;
3656 } else if (bs->drv->bdrv_co_discard) {
3657 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3658 } else if (bs->drv->bdrv_aio_discard) {
3659 BlockDriverAIOCB *acb;
3660 CoroutineIOCompletion co = {
3661 .coroutine = qemu_coroutine_self(),
3662 };
3663
3664 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3665 bdrv_co_io_em_complete, &co);
3666 if (acb == NULL) {
3667 return -EIO;
3668 } else {
3669 qemu_coroutine_yield();
3670 return co.ret;
3671 }
3672 } else {
3673 return 0;
3674 }
3675 }
3676
3677 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3678 {
3679 Coroutine *co;
3680 RwCo rwco = {
3681 .bs = bs,
3682 .sector_num = sector_num,
3683 .nb_sectors = nb_sectors,
3684 .ret = NOT_DONE,
3685 };
3686
3687 if (qemu_in_coroutine()) {
3688 /* Fast-path if already in coroutine context */
3689 bdrv_discard_co_entry(&rwco);
3690 } else {
3691 co = qemu_coroutine_create(bdrv_discard_co_entry);
3692 qemu_coroutine_enter(co, &rwco);
3693 while (rwco.ret == NOT_DONE) {
3694 qemu_aio_wait();
3695 }
3696 }
3697
3698 return rwco.ret;
3699 }
3700
3701 /**************************************************************/
3702 /* removable device support */
3703
3704 /**
3705 * Return TRUE if the media is present
3706 */
3707 int bdrv_is_inserted(BlockDriverState *bs)
3708 {
3709 BlockDriver *drv = bs->drv;
3710
3711 if (!drv)
3712 return 0;
3713 if (!drv->bdrv_is_inserted)
3714 return 1;
3715 return drv->bdrv_is_inserted(bs);
3716 }
3717
3718 /**
3719 * Return whether the media changed since the last call to this
3720 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3721 */
3722 int bdrv_media_changed(BlockDriverState *bs)
3723 {
3724 BlockDriver *drv = bs->drv;
3725
3726 if (drv && drv->bdrv_media_changed) {
3727 return drv->bdrv_media_changed(bs);
3728 }
3729 return -ENOTSUP;
3730 }
3731
3732 /**
3733 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3734 */
3735 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3736 {
3737 BlockDriver *drv = bs->drv;
3738
3739 if (drv && drv->bdrv_eject) {
3740 drv->bdrv_eject(bs, eject_flag);
3741 }
3742
3743 if (bs->device_name[0] != '\0') {
3744 bdrv_emit_qmp_eject_event(bs, eject_flag);
3745 }
3746 }
3747
3748 /**
3749 * Lock or unlock the media (if it is locked, the user won't be able
3750 * to eject it manually).
3751 */
3752 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3753 {
3754 BlockDriver *drv = bs->drv;
3755
3756 trace_bdrv_lock_medium(bs, locked);
3757
3758 if (drv && drv->bdrv_lock_medium) {
3759 drv->bdrv_lock_medium(bs, locked);
3760 }
3761 }
3762
3763 /* needed for generic scsi interface */
3764
3765 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3766 {
3767 BlockDriver *drv = bs->drv;
3768
3769 if (drv && drv->bdrv_ioctl)
3770 return drv->bdrv_ioctl(bs, req, buf);
3771 return -ENOTSUP;
3772 }
3773
3774 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3775 unsigned long int req, void *buf,
3776 BlockDriverCompletionFunc *cb, void *opaque)
3777 {
3778 BlockDriver *drv = bs->drv;
3779
3780 if (drv && drv->bdrv_aio_ioctl)
3781 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3782 return NULL;
3783 }
3784
3785 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3786 {
3787 bs->buffer_alignment = align;
3788 }
3789
3790 void *qemu_blockalign(BlockDriverState *bs, size_t size)
3791 {
3792 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3793 }
3794
3795 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3796 {
3797 int64_t bitmap_size;
3798
3799 bs->dirty_count = 0;
3800 if (enable) {
3801 if (!bs->dirty_bitmap) {
3802 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3803 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3804 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3805
3806 bs->dirty_bitmap = g_malloc0(bitmap_size);
3807 }
3808 } else {
3809 if (bs->dirty_bitmap) {
3810 g_free(bs->dirty_bitmap);
3811 bs->dirty_bitmap = NULL;
3812 }
3813 }
3814 }
3815
3816 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3817 {
3818 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3819
3820 if (bs->dirty_bitmap &&
3821 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3822 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3823 (1UL << (chunk % (sizeof(unsigned long) * 8))));
3824 } else {
3825 return 0;
3826 }
3827 }
3828
3829 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3830 int nr_sectors)
3831 {
3832 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3833 }
3834
3835 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3836 {
3837 return bs->dirty_count;
3838 }
3839
3840 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3841 {
3842 assert(bs->in_use != in_use);
3843 bs->in_use = in_use;
3844 }
3845
3846 int bdrv_in_use(BlockDriverState *bs)
3847 {
3848 return bs->in_use;
3849 }
3850
3851 void bdrv_iostatus_enable(BlockDriverState *bs)
3852 {
3853 bs->iostatus_enabled = true;
3854 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3855 }
3856
3857 /* The I/O status is only enabled if the drive explicitly
3858 * enables it _and_ the VM is configured to stop on errors */
3859 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3860 {
3861 return (bs->iostatus_enabled &&
3862 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3863 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3864 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3865 }
3866
3867 void bdrv_iostatus_disable(BlockDriverState *bs)
3868 {
3869 bs->iostatus_enabled = false;
3870 }
3871
3872 void bdrv_iostatus_reset(BlockDriverState *bs)
3873 {
3874 if (bdrv_iostatus_is_enabled(bs)) {
3875 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3876 }
3877 }
3878
3879 /* XXX: Today this is set by device models because it makes the implementation
3880 quite simple. However, the block layer knows about the error, so it's
3881 possible to implement this without device models being involved */
3882 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3883 {
3884 if (bdrv_iostatus_is_enabled(bs) &&
3885 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3886 assert(error >= 0);
3887 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3888 BLOCK_DEVICE_IO_STATUS_FAILED;
3889 }
3890 }
3891
3892 void
3893 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3894 enum BlockAcctType type)
3895 {
3896 assert(type < BDRV_MAX_IOTYPE);
3897
3898 cookie->bytes = bytes;
3899 cookie->start_time_ns = get_clock();
3900 cookie->type = type;
3901 }
3902
3903 void
3904 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3905 {
3906 assert(cookie->type < BDRV_MAX_IOTYPE);
3907
3908 bs->nr_bytes[cookie->type] += cookie->bytes;
3909 bs->nr_ops[cookie->type]++;
3910 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3911 }
3912
3913 int bdrv_img_create(const char *filename, const char *fmt,
3914 const char *base_filename, const char *base_fmt,
3915 char *options, uint64_t img_size, int flags)
3916 {
3917 QEMUOptionParameter *param = NULL, *create_options = NULL;
3918 QEMUOptionParameter *backing_fmt, *backing_file, *size;
3919 BlockDriverState *bs = NULL;
3920 BlockDriver *drv, *proto_drv;
3921 BlockDriver *backing_drv = NULL;
3922 int ret = 0;
3923
3924 /* Find driver and parse its options */
3925 drv = bdrv_find_format(fmt);
3926 if (!drv) {
3927 error_report("Unknown file format '%s'", fmt);
3928 ret = -EINVAL;
3929 goto out;
3930 }
3931
3932 proto_drv = bdrv_find_protocol(filename);
3933 if (!proto_drv) {
3934 error_report("Unknown protocol '%s'", filename);
3935 ret = -EINVAL;
3936 goto out;
3937 }
3938
3939 create_options = append_option_parameters(create_options,
3940 drv->create_options);
3941 create_options = append_option_parameters(create_options,
3942 proto_drv->create_options);
3943
3944 /* Create parameter list with default values */
3945 param = parse_option_parameters("", create_options, param);
3946
3947 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3948
3949 /* Parse -o options */
3950 if (options) {
3951 param = parse_option_parameters(options, create_options, param);
3952 if (param == NULL) {
3953 error_report("Invalid options for file format '%s'.", fmt);
3954 ret = -EINVAL;
3955 goto out;
3956 }
3957 }
3958
3959 if (base_filename) {
3960 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3961 base_filename)) {
3962 error_report("Backing file not supported for file format '%s'",
3963 fmt);
3964 ret = -EINVAL;
3965 goto out;
3966 }
3967 }
3968
3969 if (base_fmt) {
3970 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3971 error_report("Backing file format not supported for file "
3972 "format '%s'", fmt);
3973 ret = -EINVAL;
3974 goto out;
3975 }
3976 }
3977
3978 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3979 if (backing_file && backing_file->value.s) {
3980 if (!strcmp(filename, backing_file->value.s)) {
3981 error_report("Error: Trying to create an image with the "
3982 "same filename as the backing file");
3983 ret = -EINVAL;
3984 goto out;
3985 }
3986 }
3987
3988 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3989 if (backing_fmt && backing_fmt->value.s) {
3990 backing_drv = bdrv_find_format(backing_fmt->value.s);
3991 if (!backing_drv) {
3992 error_report("Unknown backing file format '%s'",
3993 backing_fmt->value.s);
3994 ret = -EINVAL;
3995 goto out;
3996 }
3997 }
3998
3999 // The size for the image must always be specified, with one exception:
4000 // If we are using a backing file, we can obtain the size from there
4001 size = get_option_parameter(param, BLOCK_OPT_SIZE);
4002 if (size && size->value.n == -1) {
4003 if (backing_file && backing_file->value.s) {
4004 uint64_t size;
4005 char buf[32];
4006
4007 bs = bdrv_new("");
4008
4009 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
4010 if (ret < 0) {
4011 error_report("Could not open '%s'", backing_file->value.s);
4012 goto out;
4013 }
4014 bdrv_get_geometry(bs, &size);
4015 size *= 512;
4016
4017 snprintf(buf, sizeof(buf), "%" PRId64, size);
4018 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
4019 } else {
4020 error_report("Image creation needs a size parameter");
4021 ret = -EINVAL;
4022 goto out;
4023 }
4024 }
4025
4026 printf("Formatting '%s', fmt=%s ", filename, fmt);
4027 print_option_parameters(param);
4028 puts("");
4029
4030 ret = bdrv_create(drv, filename, param);
4031
4032 if (ret < 0) {
4033 if (ret == -ENOTSUP) {
4034 error_report("Formatting or formatting option not supported for "
4035 "file format '%s'", fmt);
4036 } else if (ret == -EFBIG) {
4037 error_report("The image size is too large for file format '%s'",
4038 fmt);
4039 } else {
4040 error_report("%s: error while creating %s: %s", filename, fmt,
4041 strerror(-ret));
4042 }
4043 }
4044
4045 out:
4046 free_option_parameters(create_options);
4047 free_option_parameters(param);
4048
4049 if (bs) {
4050 bdrv_delete(bs);
4051 }
4052
4053 return ret;
4054 }
4055
4056 void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
4057 BlockDriverCompletionFunc *cb, void *opaque)
4058 {
4059 BlockJob *job;
4060
4061 if (bs->job || bdrv_in_use(bs)) {
4062 return NULL;
4063 }
4064 bdrv_set_in_use(bs, 1);
4065
4066 job = g_malloc0(job_type->instance_size);
4067 job->job_type = job_type;
4068 job->bs = bs;
4069 job->cb = cb;
4070 job->opaque = opaque;
4071 bs->job = job;
4072 return job;
4073 }
4074
4075 void block_job_complete(BlockJob *job, int ret)
4076 {
4077 BlockDriverState *bs = job->bs;
4078
4079 assert(bs->job == job);
4080 job->cb(job->opaque, ret);
4081 bs->job = NULL;
4082 g_free(job);
4083 bdrv_set_in_use(bs, 0);
4084 }
4085
4086 int block_job_set_speed(BlockJob *job, int64_t value)
4087 {
4088 if (!job->job_type->set_speed) {
4089 return -ENOTSUP;
4090 }
4091 return job->job_type->set_speed(job, value);
4092 }
4093
4094 void block_job_cancel(BlockJob *job)
4095 {
4096 job->cancelled = true;
4097 }
4098
4099 bool block_job_is_cancelled(BlockJob *job)
4100 {
4101 return job->cancelled;
4102 }
4103
4104 void block_job_cancel_sync(BlockJob *job)
4105 {
4106 BlockDriverState *bs = job->bs;
4107
4108 assert(bs->job == job);
4109 block_job_cancel(job);
4110 while (bs->job != NULL && bs->job->busy) {
4111 qemu_aio_wait();
4112 }
4113 }