]> git.proxmox.com Git - mirror_qemu.git/blame - block.c
qapi: QMP input visitor, handle floats parsed as ints
[mirror_qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
f795e743 30#include "qjson.h"
68485420 31#include "qemu-coroutine.h"
b2023818 32#include "qmp-commands.h"
0563e191 33#include "qemu-timer.h"
fc01f7e7 34
71e72a19 35#ifdef CONFIG_BSD
7674e7bf
FB
36#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
72cf2d4f 39#include <sys/queue.h>
c5e97233 40#ifndef __DragonFly__
7674e7bf
FB
41#include <sys/disk.h>
42#endif
c5e97233 43#endif
7674e7bf 44
49dc768d
AL
45#ifdef _WIN32
46#include <windows.h>
47#endif
48
1c9805a3
SH
49#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
470c0504
SH
51typedef enum {
52 BDRV_REQ_COPY_ON_READ = 0x1,
f08f2dda 53 BDRV_REQ_ZERO_WRITE = 0x2,
470c0504
SH
54} BdrvRequestFlags;
55
7d4b4ba5 56static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
57static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
58 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 59 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
60static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
61 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 62 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
63static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors,
65 QEMUIOVector *iov);
66static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors,
68 QEMUIOVector *iov);
c5fbe571 69static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
470c0504
SH
70 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
71 BdrvRequestFlags flags);
1c9805a3 72static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
f08f2dda
SH
73 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
74 BdrvRequestFlags flags);
b2a61371
SH
75static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
76 int64_t sector_num,
77 QEMUIOVector *qiov,
78 int nb_sectors,
79 BlockDriverCompletionFunc *cb,
80 void *opaque,
8c5873d6 81 bool is_write);
b2a61371 82static void coroutine_fn bdrv_co_do_rw(void *opaque);
621f0589
KW
83static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
84 int64_t sector_num, int nb_sectors);
ec530c81 85
98f90dba
ZYW
86static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
87 bool is_write, double elapsed_time, uint64_t *wait);
88static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
89 double elapsed_time, uint64_t *wait);
90static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
91 bool is_write, int64_t *wait);
92
1b7bdbc1
SH
93static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
94 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 95
8a22f02a
SH
96static QLIST_HEAD(, BlockDriver) bdrv_drivers =
97 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 98
f9092b10
MA
99/* The device to use for VM snapshots */
100static BlockDriverState *bs_snapshots;
101
eb852011
MA
102/* If non-zero, use only whitelisted block drivers */
103static int use_bdrv_whitelist;
104
9e0b22f4
SH
105#ifdef _WIN32
106static int is_windows_drive_prefix(const char *filename)
107{
108 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
109 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
110 filename[1] == ':');
111}
112
113int is_windows_drive(const char *filename)
114{
115 if (is_windows_drive_prefix(filename) &&
116 filename[2] == '\0')
117 return 1;
118 if (strstart(filename, "\\\\.\\", NULL) ||
119 strstart(filename, "//./", NULL))
120 return 1;
121 return 0;
122}
123#endif
124
0563e191 125/* throttling disk I/O limits */
98f90dba
ZYW
126void bdrv_io_limits_disable(BlockDriverState *bs)
127{
128 bs->io_limits_enabled = false;
129
130 while (qemu_co_queue_next(&bs->throttled_reqs));
131
132 if (bs->block_timer) {
133 qemu_del_timer(bs->block_timer);
134 qemu_free_timer(bs->block_timer);
135 bs->block_timer = NULL;
136 }
137
138 bs->slice_start = 0;
139 bs->slice_end = 0;
140 bs->slice_time = 0;
141 memset(&bs->io_base, 0, sizeof(bs->io_base));
142}
143
0563e191
ZYW
144static void bdrv_block_timer(void *opaque)
145{
146 BlockDriverState *bs = opaque;
147
148 qemu_co_queue_next(&bs->throttled_reqs);
149}
150
151void bdrv_io_limits_enable(BlockDriverState *bs)
152{
153 qemu_co_queue_init(&bs->throttled_reqs);
154 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
155 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
156 bs->slice_start = qemu_get_clock_ns(vm_clock);
157 bs->slice_end = bs->slice_start + bs->slice_time;
158 memset(&bs->io_base, 0, sizeof(bs->io_base));
159 bs->io_limits_enabled = true;
160}
161
162bool bdrv_io_limits_enabled(BlockDriverState *bs)
163{
164 BlockIOLimit *io_limits = &bs->io_limits;
165 return io_limits->bps[BLOCK_IO_LIMIT_READ]
166 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
167 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
168 || io_limits->iops[BLOCK_IO_LIMIT_READ]
169 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
170 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
171}
172
98f90dba
ZYW
173static void bdrv_io_limits_intercept(BlockDriverState *bs,
174 bool is_write, int nb_sectors)
175{
176 int64_t wait_time = -1;
177
178 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
179 qemu_co_queue_wait(&bs->throttled_reqs);
180 }
181
182 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
183 * throttled requests will not be dequeued until the current request is
184 * allowed to be serviced. So if the current request still exceeds the
185 * limits, it will be inserted to the head. All requests followed it will
186 * be still in throttled_reqs queue.
187 */
188
189 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
190 qemu_mod_timer(bs->block_timer,
191 wait_time + qemu_get_clock_ns(vm_clock));
192 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
193 }
194
195 qemu_co_queue_next(&bs->throttled_reqs);
196}
197
9e0b22f4
SH
198/* check if the path starts with "<protocol>:" */
199static int path_has_protocol(const char *path)
200{
201#ifdef _WIN32
202 if (is_windows_drive(path) ||
203 is_windows_drive_prefix(path)) {
204 return 0;
205 }
206#endif
207
208 return strchr(path, ':') != NULL;
209}
210
83f64091 211int path_is_absolute(const char *path)
3b0d4f61 212{
83f64091 213 const char *p;
21664424
FB
214#ifdef _WIN32
215 /* specific case for names like: "\\.\d:" */
216 if (*path == '/' || *path == '\\')
217 return 1;
218#endif
83f64091
FB
219 p = strchr(path, ':');
220 if (p)
221 p++;
222 else
223 p = path;
3b9f94e1
FB
224#ifdef _WIN32
225 return (*p == '/' || *p == '\\');
226#else
227 return (*p == '/');
228#endif
3b0d4f61
FB
229}
230
83f64091
FB
231/* if filename is absolute, just copy it to dest. Otherwise, build a
232 path to it by considering it is relative to base_path. URL are
233 supported. */
234void path_combine(char *dest, int dest_size,
235 const char *base_path,
236 const char *filename)
3b0d4f61 237{
83f64091
FB
238 const char *p, *p1;
239 int len;
240
241 if (dest_size <= 0)
242 return;
243 if (path_is_absolute(filename)) {
244 pstrcpy(dest, dest_size, filename);
245 } else {
246 p = strchr(base_path, ':');
247 if (p)
248 p++;
249 else
250 p = base_path;
3b9f94e1
FB
251 p1 = strrchr(base_path, '/');
252#ifdef _WIN32
253 {
254 const char *p2;
255 p2 = strrchr(base_path, '\\');
256 if (!p1 || p2 > p1)
257 p1 = p2;
258 }
259#endif
83f64091
FB
260 if (p1)
261 p1++;
262 else
263 p1 = base_path;
264 if (p1 > p)
265 p = p1;
266 len = p - base_path;
267 if (len > dest_size - 1)
268 len = dest_size - 1;
269 memcpy(dest, base_path, len);
270 dest[len] = '\0';
271 pstrcat(dest, dest_size, filename);
3b0d4f61 272 }
3b0d4f61
FB
273}
274
5efa9d5a 275void bdrv_register(BlockDriver *bdrv)
ea2384d3 276{
8c5873d6
SH
277 /* Block drivers without coroutine functions need emulation */
278 if (!bdrv->bdrv_co_readv) {
f9f05dc5
KW
279 bdrv->bdrv_co_readv = bdrv_co_readv_em;
280 bdrv->bdrv_co_writev = bdrv_co_writev_em;
281
f8c35c1d
SH
282 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
283 * the block driver lacks aio we need to emulate that too.
284 */
f9f05dc5
KW
285 if (!bdrv->bdrv_aio_readv) {
286 /* add AIO emulation layer */
287 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
288 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
f9f05dc5 289 }
83f64091 290 }
b2e12bc6 291
8a22f02a 292 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 293}
b338082b
FB
294
295/* create a new block device (by default it is empty) */
296BlockDriverState *bdrv_new(const char *device_name)
297{
1b7bdbc1 298 BlockDriverState *bs;
b338082b 299
7267c094 300 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 301 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 302 if (device_name[0] != '\0') {
1b7bdbc1 303 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 304 }
28a7282a 305 bdrv_iostatus_disable(bs);
b338082b
FB
306 return bs;
307}
308
ea2384d3
FB
309BlockDriver *bdrv_find_format(const char *format_name)
310{
311 BlockDriver *drv1;
8a22f02a
SH
312 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
313 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 314 return drv1;
8a22f02a 315 }
ea2384d3
FB
316 }
317 return NULL;
318}
319
eb852011
MA
320static int bdrv_is_whitelisted(BlockDriver *drv)
321{
322 static const char *whitelist[] = {
323 CONFIG_BDRV_WHITELIST
324 };
325 const char **p;
326
327 if (!whitelist[0])
328 return 1; /* no whitelist, anything goes */
329
330 for (p = whitelist; *p; p++) {
331 if (!strcmp(drv->format_name, *p)) {
332 return 1;
333 }
334 }
335 return 0;
336}
337
338BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
339{
340 BlockDriver *drv = bdrv_find_format(format_name);
341 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
342}
343
5b7e1542
ZYW
344typedef struct CreateCo {
345 BlockDriver *drv;
346 char *filename;
347 QEMUOptionParameter *options;
348 int ret;
349} CreateCo;
350
351static void coroutine_fn bdrv_create_co_entry(void *opaque)
352{
353 CreateCo *cco = opaque;
354 assert(cco->drv);
355
356 cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
357}
358
0e7e1989
KW
359int bdrv_create(BlockDriver *drv, const char* filename,
360 QEMUOptionParameter *options)
ea2384d3 361{
5b7e1542
ZYW
362 int ret;
363
364 Coroutine *co;
365 CreateCo cco = {
366 .drv = drv,
367 .filename = g_strdup(filename),
368 .options = options,
369 .ret = NOT_DONE,
370 };
371
372 if (!drv->bdrv_create) {
ea2384d3 373 return -ENOTSUP;
5b7e1542
ZYW
374 }
375
376 if (qemu_in_coroutine()) {
377 /* Fast-path if already in coroutine context */
378 bdrv_create_co_entry(&cco);
379 } else {
380 co = qemu_coroutine_create(bdrv_create_co_entry);
381 qemu_coroutine_enter(co, &cco);
382 while (cco.ret == NOT_DONE) {
383 qemu_aio_wait();
384 }
385 }
386
387 ret = cco.ret;
388 g_free(cco.filename);
0e7e1989 389
5b7e1542 390 return ret;
ea2384d3
FB
391}
392
84a12e66
CH
393int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
394{
395 BlockDriver *drv;
396
b50cbabc 397 drv = bdrv_find_protocol(filename);
84a12e66 398 if (drv == NULL) {
16905d71 399 return -ENOENT;
84a12e66
CH
400 }
401
402 return bdrv_create(drv, filename, options);
403}
404
d5249393 405#ifdef _WIN32
95389c86 406void get_tmp_filename(char *filename, int size)
d5249393 407{
3b9f94e1 408 char temp_dir[MAX_PATH];
3b46e624 409
3b9f94e1
FB
410 GetTempPath(MAX_PATH, temp_dir);
411 GetTempFileName(temp_dir, "qem", 0, filename);
d5249393
FB
412}
413#else
95389c86 414void get_tmp_filename(char *filename, int size)
fc01f7e7 415{
67b915a5 416 int fd;
7ccfb2eb 417 const char *tmpdir;
d5249393 418 /* XXX: race condition possible */
0badc1ee
AJ
419 tmpdir = getenv("TMPDIR");
420 if (!tmpdir)
421 tmpdir = "/tmp";
422 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
ea2384d3
FB
423 fd = mkstemp(filename);
424 close(fd);
425}
d5249393 426#endif
fc01f7e7 427
84a12e66
CH
428/*
429 * Detect host devices. By convention, /dev/cdrom[N] is always
430 * recognized as a host CDROM.
431 */
432static BlockDriver *find_hdev_driver(const char *filename)
433{
434 int score_max = 0, score;
435 BlockDriver *drv = NULL, *d;
436
437 QLIST_FOREACH(d, &bdrv_drivers, list) {
438 if (d->bdrv_probe_device) {
439 score = d->bdrv_probe_device(filename);
440 if (score > score_max) {
441 score_max = score;
442 drv = d;
443 }
444 }
445 }
446
447 return drv;
448}
449
b50cbabc 450BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
451{
452 BlockDriver *drv1;
453 char protocol[128];
1cec71e3 454 int len;
83f64091 455 const char *p;
19cb3738 456
66f82cee
KW
457 /* TODO Drivers without bdrv_file_open must be specified explicitly */
458
39508e7a
CH
459 /*
460 * XXX(hch): we really should not let host device detection
461 * override an explicit protocol specification, but moving this
462 * later breaks access to device names with colons in them.
463 * Thanks to the brain-dead persistent naming schemes on udev-
464 * based Linux systems those actually are quite common.
465 */
466 drv1 = find_hdev_driver(filename);
467 if (drv1) {
468 return drv1;
469 }
470
9e0b22f4 471 if (!path_has_protocol(filename)) {
39508e7a 472 return bdrv_find_format("file");
84a12e66 473 }
9e0b22f4
SH
474 p = strchr(filename, ':');
475 assert(p != NULL);
1cec71e3
AL
476 len = p - filename;
477 if (len > sizeof(protocol) - 1)
478 len = sizeof(protocol) - 1;
479 memcpy(protocol, filename, len);
480 protocol[len] = '\0';
8a22f02a 481 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 482 if (drv1->protocol_name &&
8a22f02a 483 !strcmp(drv1->protocol_name, protocol)) {
83f64091 484 return drv1;
8a22f02a 485 }
83f64091
FB
486 }
487 return NULL;
488}
489
c98ac35d 490static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
491{
492 int ret, score, score_max;
493 BlockDriver *drv1, *drv;
494 uint8_t buf[2048];
495 BlockDriverState *bs;
496
f5edb014 497 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
498 if (ret < 0) {
499 *pdrv = NULL;
500 return ret;
501 }
f8ea0b00 502
08a00559
KW
503 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
504 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 505 bdrv_delete(bs);
c98ac35d
SW
506 drv = bdrv_find_format("raw");
507 if (!drv) {
508 ret = -ENOENT;
509 }
510 *pdrv = drv;
511 return ret;
1a396859 512 }
f8ea0b00 513
83f64091
FB
514 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
515 bdrv_delete(bs);
516 if (ret < 0) {
c98ac35d
SW
517 *pdrv = NULL;
518 return ret;
83f64091
FB
519 }
520
ea2384d3 521 score_max = 0;
84a12e66 522 drv = NULL;
8a22f02a 523 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
524 if (drv1->bdrv_probe) {
525 score = drv1->bdrv_probe(buf, ret, filename);
526 if (score > score_max) {
527 score_max = score;
528 drv = drv1;
529 }
0849bf08 530 }
fc01f7e7 531 }
c98ac35d
SW
532 if (!drv) {
533 ret = -ENOENT;
534 }
535 *pdrv = drv;
536 return ret;
ea2384d3
FB
537}
538
51762288
SH
539/**
540 * Set the current 'total_sectors' value
541 */
542static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
543{
544 BlockDriver *drv = bs->drv;
545
396759ad
NB
546 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
547 if (bs->sg)
548 return 0;
549
51762288
SH
550 /* query actual device if possible, otherwise just trust the hint */
551 if (drv->bdrv_getlength) {
552 int64_t length = drv->bdrv_getlength(bs);
553 if (length < 0) {
554 return length;
555 }
556 hint = length >> BDRV_SECTOR_BITS;
557 }
558
559 bs->total_sectors = hint;
560 return 0;
561}
562
c3993cdc
SH
563/**
564 * Set open flags for a given cache mode
565 *
566 * Return 0 on success, -1 if the cache mode was invalid.
567 */
568int bdrv_parse_cache_flags(const char *mode, int *flags)
569{
570 *flags &= ~BDRV_O_CACHE_MASK;
571
572 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
573 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
574 } else if (!strcmp(mode, "directsync")) {
575 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
576 } else if (!strcmp(mode, "writeback")) {
577 *flags |= BDRV_O_CACHE_WB;
578 } else if (!strcmp(mode, "unsafe")) {
579 *flags |= BDRV_O_CACHE_WB;
580 *flags |= BDRV_O_NO_FLUSH;
581 } else if (!strcmp(mode, "writethrough")) {
582 /* this is the default */
583 } else {
584 return -1;
585 }
586
587 return 0;
588}
589
53fec9d3
SH
590/**
591 * The copy-on-read flag is actually a reference count so multiple users may
592 * use the feature without worrying about clobbering its previous state.
593 * Copy-on-read stays enabled until all users have called to disable it.
594 */
595void bdrv_enable_copy_on_read(BlockDriverState *bs)
596{
597 bs->copy_on_read++;
598}
599
600void bdrv_disable_copy_on_read(BlockDriverState *bs)
601{
602 assert(bs->copy_on_read > 0);
603 bs->copy_on_read--;
604}
605
57915332
KW
606/*
607 * Common part for opening disk images and files
608 */
609static int bdrv_open_common(BlockDriverState *bs, const char *filename,
610 int flags, BlockDriver *drv)
611{
612 int ret, open_flags;
613
614 assert(drv != NULL);
615
28dcee10
SH
616 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
617
66f82cee 618 bs->file = NULL;
51762288 619 bs->total_sectors = 0;
57915332
KW
620 bs->encrypted = 0;
621 bs->valid_key = 0;
03f541bd 622 bs->sg = 0;
57915332 623 bs->open_flags = flags;
03f541bd 624 bs->growable = 0;
57915332
KW
625 bs->buffer_alignment = 512;
626
53fec9d3
SH
627 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
628 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
629 bdrv_enable_copy_on_read(bs);
630 }
631
57915332 632 pstrcpy(bs->filename, sizeof(bs->filename), filename);
03f541bd 633 bs->backing_file[0] = '\0';
57915332
KW
634
635 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
636 return -ENOTSUP;
637 }
638
639 bs->drv = drv;
7267c094 640 bs->opaque = g_malloc0(drv->instance_size);
57915332 641
03f541bd 642 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
57915332
KW
643
644 /*
645 * Clear flags that are internal to the block layer before opening the
646 * image.
647 */
648 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
649
650 /*
ebabb67a 651 * Snapshots should be writable.
57915332
KW
652 */
653 if (bs->is_temporary) {
654 open_flags |= BDRV_O_RDWR;
655 }
656
e7c63796
SH
657 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
658
66f82cee
KW
659 /* Open the image, either directly or using a protocol */
660 if (drv->bdrv_file_open) {
661 ret = drv->bdrv_file_open(bs, filename, open_flags);
662 } else {
663 ret = bdrv_file_open(&bs->file, filename, open_flags);
664 if (ret >= 0) {
665 ret = drv->bdrv_open(bs, open_flags);
666 }
667 }
668
57915332
KW
669 if (ret < 0) {
670 goto free_and_fail;
671 }
672
51762288
SH
673 ret = refresh_total_sectors(bs, bs->total_sectors);
674 if (ret < 0) {
675 goto free_and_fail;
57915332 676 }
51762288 677
57915332
KW
678#ifndef _WIN32
679 if (bs->is_temporary) {
680 unlink(filename);
681 }
682#endif
683 return 0;
684
685free_and_fail:
66f82cee
KW
686 if (bs->file) {
687 bdrv_delete(bs->file);
688 bs->file = NULL;
689 }
7267c094 690 g_free(bs->opaque);
57915332
KW
691 bs->opaque = NULL;
692 bs->drv = NULL;
693 return ret;
694}
695
b6ce07aa
KW
696/*
697 * Opens a file using a protocol (file, host_device, nbd, ...)
698 */
83f64091 699int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 700{
83f64091 701 BlockDriverState *bs;
6db95603 702 BlockDriver *drv;
83f64091
FB
703 int ret;
704
b50cbabc 705 drv = bdrv_find_protocol(filename);
6db95603
CH
706 if (!drv) {
707 return -ENOENT;
708 }
709
83f64091 710 bs = bdrv_new("");
b6ce07aa 711 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
712 if (ret < 0) {
713 bdrv_delete(bs);
714 return ret;
3b0d4f61 715 }
71d0770c 716 bs->growable = 1;
83f64091
FB
717 *pbs = bs;
718 return 0;
719}
720
b6ce07aa
KW
721/*
722 * Opens a disk image (raw, qcow2, vmdk, ...)
723 */
d6e9098e
KW
724int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
725 BlockDriver *drv)
ea2384d3 726{
b6ce07aa 727 int ret;
2b572816 728 char tmp_filename[PATH_MAX];
712e7874 729
83f64091 730 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
731 BlockDriverState *bs1;
732 int64_t total_size;
7c96d46e 733 int is_protocol = 0;
91a073a9
KW
734 BlockDriver *bdrv_qcow2;
735 QEMUOptionParameter *options;
b6ce07aa 736 char backing_filename[PATH_MAX];
3b46e624 737
ea2384d3
FB
738 /* if snapshot, we create a temporary backing file and open it
739 instead of opening 'filename' directly */
33e3963e 740
ea2384d3
FB
741 /* if there is a backing file, use it */
742 bs1 = bdrv_new("");
d6e9098e 743 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 744 if (ret < 0) {
ea2384d3 745 bdrv_delete(bs1);
51d7c00c 746 return ret;
ea2384d3 747 }
3e82990b 748 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
749
750 if (bs1->drv && bs1->drv->protocol_name)
751 is_protocol = 1;
752
ea2384d3 753 bdrv_delete(bs1);
3b46e624 754
ea2384d3 755 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
7c96d46e
AL
756
757 /* Real path is meaningless for protocols */
758 if (is_protocol)
759 snprintf(backing_filename, sizeof(backing_filename),
760 "%s", filename);
114cdfa9
KS
761 else if (!realpath(filename, backing_filename))
762 return -errno;
7c96d46e 763
91a073a9
KW
764 bdrv_qcow2 = bdrv_find_format("qcow2");
765 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
766
3e82990b 767 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
768 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
769 if (drv) {
770 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
771 drv->format_name);
772 }
773
774 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 775 free_option_parameters(options);
51d7c00c
AL
776 if (ret < 0) {
777 return ret;
ea2384d3 778 }
91a073a9 779
ea2384d3 780 filename = tmp_filename;
91a073a9 781 drv = bdrv_qcow2;
ea2384d3
FB
782 bs->is_temporary = 1;
783 }
712e7874 784
b6ce07aa 785 /* Find the right image format driver */
6db95603 786 if (!drv) {
c98ac35d 787 ret = find_image_format(filename, &drv);
51d7c00c 788 }
6987307c 789
51d7c00c 790 if (!drv) {
51d7c00c 791 goto unlink_and_fail;
ea2384d3 792 }
b6ce07aa
KW
793
794 /* Open the image */
795 ret = bdrv_open_common(bs, filename, flags, drv);
796 if (ret < 0) {
6987307c
CH
797 goto unlink_and_fail;
798 }
799
b6ce07aa
KW
800 /* If there is a backing file, use it */
801 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
802 char backing_filename[PATH_MAX];
803 int back_flags;
804 BlockDriver *back_drv = NULL;
805
806 bs->backing_hd = bdrv_new("");
df2dbb4a
SH
807
808 if (path_has_protocol(bs->backing_file)) {
809 pstrcpy(backing_filename, sizeof(backing_filename),
810 bs->backing_file);
811 } else {
812 path_combine(backing_filename, sizeof(backing_filename),
813 filename, bs->backing_file);
814 }
815
816 if (bs->backing_format[0] != '\0') {
b6ce07aa 817 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 818 }
b6ce07aa
KW
819
820 /* backing files always opened read-only */
821 back_flags =
822 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
823
824 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
825 if (ret < 0) {
826 bdrv_close(bs);
827 return ret;
828 }
829 if (bs->is_temporary) {
830 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
831 } else {
832 /* base image inherits from "parent" */
833 bs->backing_hd->keep_read_only = bs->keep_read_only;
834 }
835 }
836
837 if (!bdrv_key_required(bs)) {
7d4b4ba5 838 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
839 }
840
98f90dba
ZYW
841 /* throttling disk I/O limits */
842 if (bs->io_limits_enabled) {
843 bdrv_io_limits_enable(bs);
844 }
845
b6ce07aa
KW
846 return 0;
847
848unlink_and_fail:
849 if (bs->is_temporary) {
850 unlink(filename);
851 }
852 return ret;
853}
854
fc01f7e7
FB
855void bdrv_close(BlockDriverState *bs)
856{
80ccf93b 857 bdrv_flush(bs);
19cb3738 858 if (bs->drv) {
3e914655
PB
859 if (bs->job) {
860 block_job_cancel_sync(bs->job);
861 }
7094f12f
KW
862 bdrv_drain_all();
863
f9092b10
MA
864 if (bs == bs_snapshots) {
865 bs_snapshots = NULL;
866 }
557df6ac 867 if (bs->backing_hd) {
ea2384d3 868 bdrv_delete(bs->backing_hd);
557df6ac
SH
869 bs->backing_hd = NULL;
870 }
ea2384d3 871 bs->drv->bdrv_close(bs);
7267c094 872 g_free(bs->opaque);
ea2384d3
FB
873#ifdef _WIN32
874 if (bs->is_temporary) {
875 unlink(bs->filename);
876 }
67b915a5 877#endif
ea2384d3
FB
878 bs->opaque = NULL;
879 bs->drv = NULL;
53fec9d3 880 bs->copy_on_read = 0;
b338082b 881
66f82cee
KW
882 if (bs->file != NULL) {
883 bdrv_close(bs->file);
884 }
885
7d4b4ba5 886 bdrv_dev_change_media_cb(bs, false);
b338082b 887 }
98f90dba
ZYW
888
889 /*throttling disk I/O limits*/
890 if (bs->io_limits_enabled) {
891 bdrv_io_limits_disable(bs);
892 }
b338082b
FB
893}
894
2bc93fed
MK
895void bdrv_close_all(void)
896{
897 BlockDriverState *bs;
898
899 QTAILQ_FOREACH(bs, &bdrv_states, list) {
900 bdrv_close(bs);
901 }
902}
903
922453bc
SH
904/*
905 * Wait for pending requests to complete across all BlockDriverStates
906 *
907 * This function does not flush data to disk, use bdrv_flush_all() for that
908 * after calling this function.
909 */
910void bdrv_drain_all(void)
911{
912 BlockDriverState *bs;
913
914 qemu_aio_flush();
915
916 /* If requests are still pending there is a bug somewhere */
917 QTAILQ_FOREACH(bs, &bdrv_states, list) {
918 assert(QLIST_EMPTY(&bs->tracked_requests));
919 assert(qemu_co_queue_empty(&bs->throttled_reqs));
920 }
921}
922
d22b2f41
RH
923/* make a BlockDriverState anonymous by removing from bdrv_state list.
924 Also, NULL terminate the device_name to prevent double remove */
925void bdrv_make_anon(BlockDriverState *bs)
926{
927 if (bs->device_name[0] != '\0') {
928 QTAILQ_REMOVE(&bdrv_states, bs, list);
929 }
930 bs->device_name[0] = '\0';
931}
932
8802d1fd
JC
933/*
934 * Add new bs contents at the top of an image chain while the chain is
935 * live, while keeping required fields on the top layer.
936 *
937 * This will modify the BlockDriverState fields, and swap contents
938 * between bs_new and bs_top. Both bs_new and bs_top are modified.
939 *
f6801b83
JC
940 * bs_new is required to be anonymous.
941 *
8802d1fd
JC
942 * This function does not create any image files.
943 */
944void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
945{
946 BlockDriverState tmp;
947
f6801b83
JC
948 /* bs_new must be anonymous */
949 assert(bs_new->device_name[0] == '\0');
8802d1fd
JC
950
951 tmp = *bs_new;
952
953 /* there are some fields that need to stay on the top layer: */
954
955 /* dev info */
956 tmp.dev_ops = bs_top->dev_ops;
957 tmp.dev_opaque = bs_top->dev_opaque;
958 tmp.dev = bs_top->dev;
959 tmp.buffer_alignment = bs_top->buffer_alignment;
960 tmp.copy_on_read = bs_top->copy_on_read;
961
962 /* i/o timing parameters */
963 tmp.slice_time = bs_top->slice_time;
964 tmp.slice_start = bs_top->slice_start;
965 tmp.slice_end = bs_top->slice_end;
966 tmp.io_limits = bs_top->io_limits;
967 tmp.io_base = bs_top->io_base;
968 tmp.throttled_reqs = bs_top->throttled_reqs;
969 tmp.block_timer = bs_top->block_timer;
970 tmp.io_limits_enabled = bs_top->io_limits_enabled;
971
972 /* geometry */
973 tmp.cyls = bs_top->cyls;
974 tmp.heads = bs_top->heads;
975 tmp.secs = bs_top->secs;
976 tmp.translation = bs_top->translation;
977
978 /* r/w error */
979 tmp.on_read_error = bs_top->on_read_error;
980 tmp.on_write_error = bs_top->on_write_error;
981
982 /* i/o status */
983 tmp.iostatus_enabled = bs_top->iostatus_enabled;
984 tmp.iostatus = bs_top->iostatus;
985
986 /* keep the same entry in bdrv_states */
987 pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top->device_name);
988 tmp.list = bs_top->list;
989
990 /* The contents of 'tmp' will become bs_top, as we are
991 * swapping bs_new and bs_top contents. */
992 tmp.backing_hd = bs_new;
993 pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
f6801b83 994 bdrv_get_format(bs_top, tmp.backing_format, sizeof(tmp.backing_format));
8802d1fd
JC
995
996 /* swap contents of the fixed new bs and the current top */
997 *bs_new = *bs_top;
998 *bs_top = tmp;
999
f6801b83
JC
1000 /* device_name[] was carried over from the old bs_top. bs_new
1001 * shouldn't be in bdrv_states, so we need to make device_name[]
1002 * reflect the anonymity of bs_new
1003 */
1004 bs_new->device_name[0] = '\0';
1005
8802d1fd
JC
1006 /* clear the copied fields in the new backing file */
1007 bdrv_detach_dev(bs_new, bs_new->dev);
1008
1009 qemu_co_queue_init(&bs_new->throttled_reqs);
1010 memset(&bs_new->io_base, 0, sizeof(bs_new->io_base));
1011 memset(&bs_new->io_limits, 0, sizeof(bs_new->io_limits));
1012 bdrv_iostatus_disable(bs_new);
1013
1014 /* we don't use bdrv_io_limits_disable() for this, because we don't want
1015 * to affect or delete the block_timer, as it has been moved to bs_top */
1016 bs_new->io_limits_enabled = false;
1017 bs_new->block_timer = NULL;
1018 bs_new->slice_time = 0;
1019 bs_new->slice_start = 0;
1020 bs_new->slice_end = 0;
1021}
1022
b338082b
FB
1023void bdrv_delete(BlockDriverState *bs)
1024{
fa879d62 1025 assert(!bs->dev);
3e914655
PB
1026 assert(!bs->job);
1027 assert(!bs->in_use);
18846dee 1028
1b7bdbc1 1029 /* remove from list, if necessary */
d22b2f41 1030 bdrv_make_anon(bs);
34c6f050 1031
b338082b 1032 bdrv_close(bs);
66f82cee
KW
1033 if (bs->file != NULL) {
1034 bdrv_delete(bs->file);
1035 }
1036
f9092b10 1037 assert(bs != bs_snapshots);
7267c094 1038 g_free(bs);
fc01f7e7
FB
1039}
1040
fa879d62
MA
1041int bdrv_attach_dev(BlockDriverState *bs, void *dev)
1042/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 1043{
fa879d62 1044 if (bs->dev) {
18846dee
MA
1045 return -EBUSY;
1046 }
fa879d62 1047 bs->dev = dev;
28a7282a 1048 bdrv_iostatus_reset(bs);
18846dee
MA
1049 return 0;
1050}
1051
fa879d62
MA
1052/* TODO qdevified devices don't use this, remove when devices are qdevified */
1053void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 1054{
fa879d62
MA
1055 if (bdrv_attach_dev(bs, dev) < 0) {
1056 abort();
1057 }
1058}
1059
1060void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1061/* TODO change to DeviceState *dev when all users are qdevified */
1062{
1063 assert(bs->dev == dev);
1064 bs->dev = NULL;
0e49de52
MA
1065 bs->dev_ops = NULL;
1066 bs->dev_opaque = NULL;
29e05f20 1067 bs->buffer_alignment = 512;
18846dee
MA
1068}
1069
fa879d62
MA
1070/* TODO change to return DeviceState * when all users are qdevified */
1071void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 1072{
fa879d62 1073 return bs->dev;
18846dee
MA
1074}
1075
0e49de52
MA
1076void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
1077 void *opaque)
1078{
1079 bs->dev_ops = ops;
1080 bs->dev_opaque = opaque;
2c6942fa
MA
1081 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
1082 bs_snapshots = NULL;
1083 }
0e49de52
MA
1084}
1085
329c0a48
LC
1086void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
1087 BlockQMPEventAction action, int is_read)
1088{
1089 QObject *data;
1090 const char *action_str;
1091
1092 switch (action) {
1093 case BDRV_ACTION_REPORT:
1094 action_str = "report";
1095 break;
1096 case BDRV_ACTION_IGNORE:
1097 action_str = "ignore";
1098 break;
1099 case BDRV_ACTION_STOP:
1100 action_str = "stop";
1101 break;
1102 default:
1103 abort();
1104 }
1105
1106 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1107 bdrv->device_name,
1108 action_str,
1109 is_read ? "read" : "write");
1110 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1111
1112 qobject_decref(data);
1113}
1114
6f382ed2
LC
1115static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
1116{
1117 QObject *data;
1118
1119 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1120 bdrv_get_device_name(bs), ejected);
1121 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
1122
1123 qobject_decref(data);
1124}
1125
7d4b4ba5 1126static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 1127{
145feb17 1128 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
6f382ed2 1129 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
7d4b4ba5 1130 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
6f382ed2
LC
1131 if (tray_was_closed) {
1132 /* tray open */
1133 bdrv_emit_qmp_eject_event(bs, true);
1134 }
1135 if (load) {
1136 /* tray close */
1137 bdrv_emit_qmp_eject_event(bs, false);
1138 }
145feb17
MA
1139 }
1140}
1141
2c6942fa
MA
1142bool bdrv_dev_has_removable_media(BlockDriverState *bs)
1143{
1144 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
1145}
1146
025ccaa7
PB
1147void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
1148{
1149 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
1150 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
1151 }
1152}
1153
e4def80b
MA
1154bool bdrv_dev_is_tray_open(BlockDriverState *bs)
1155{
1156 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
1157 return bs->dev_ops->is_tray_open(bs->dev_opaque);
1158 }
1159 return false;
1160}
1161
145feb17
MA
1162static void bdrv_dev_resize_cb(BlockDriverState *bs)
1163{
1164 if (bs->dev_ops && bs->dev_ops->resize_cb) {
1165 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
1166 }
1167}
1168
f107639a
MA
1169bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
1170{
1171 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
1172 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
1173 }
1174 return false;
1175}
1176
e97fc193
AL
1177/*
1178 * Run consistency checks on an image
1179 *
e076f338 1180 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 1181 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 1182 * check are stored in res.
e97fc193 1183 */
e076f338 1184int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
e97fc193
AL
1185{
1186 if (bs->drv->bdrv_check == NULL) {
1187 return -ENOTSUP;
1188 }
1189
e076f338 1190 memset(res, 0, sizeof(*res));
9ac228e0 1191 return bs->drv->bdrv_check(bs, res);
e97fc193
AL
1192}
1193
8a426614
KW
1194#define COMMIT_BUF_SECTORS 2048
1195
33e3963e
FB
1196/* commit COW file into the raw image */
1197int bdrv_commit(BlockDriverState *bs)
1198{
19cb3738 1199 BlockDriver *drv = bs->drv;
ee181196 1200 BlockDriver *backing_drv;
8a426614
KW
1201 int64_t sector, total_sectors;
1202 int n, ro, open_flags;
4dca4b63 1203 int ret = 0, rw_ret = 0;
8a426614 1204 uint8_t *buf;
4dca4b63
NS
1205 char filename[1024];
1206 BlockDriverState *bs_rw, *bs_ro;
33e3963e 1207
19cb3738
FB
1208 if (!drv)
1209 return -ENOMEDIUM;
4dca4b63
NS
1210
1211 if (!bs->backing_hd) {
1212 return -ENOTSUP;
33e3963e
FB
1213 }
1214
4dca4b63
NS
1215 if (bs->backing_hd->keep_read_only) {
1216 return -EACCES;
1217 }
ee181196 1218
2d3735d3
SH
1219 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1220 return -EBUSY;
1221 }
1222
ee181196 1223 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
1224 ro = bs->backing_hd->read_only;
1225 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1226 open_flags = bs->backing_hd->open_flags;
1227
1228 if (ro) {
1229 /* re-open as RW */
1230 bdrv_delete(bs->backing_hd);
1231 bs->backing_hd = NULL;
1232 bs_rw = bdrv_new("");
ee181196
KW
1233 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1234 backing_drv);
4dca4b63
NS
1235 if (rw_ret < 0) {
1236 bdrv_delete(bs_rw);
1237 /* try to re-open read-only */
1238 bs_ro = bdrv_new("");
ee181196
KW
1239 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1240 backing_drv);
4dca4b63
NS
1241 if (ret < 0) {
1242 bdrv_delete(bs_ro);
1243 /* drive not functional anymore */
1244 bs->drv = NULL;
1245 return ret;
1246 }
1247 bs->backing_hd = bs_ro;
1248 return rw_ret;
1249 }
1250 bs->backing_hd = bs_rw;
ea2384d3 1251 }
33e3963e 1252
6ea44308 1253 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 1254 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
1255
1256 for (sector = 0; sector < total_sectors; sector += n) {
05c4af54 1257 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
8a426614
KW
1258
1259 if (bdrv_read(bs, sector, buf, n) != 0) {
1260 ret = -EIO;
1261 goto ro_cleanup;
1262 }
1263
1264 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1265 ret = -EIO;
1266 goto ro_cleanup;
1267 }
ea2384d3 1268 }
33e3963e 1269 }
95389c86 1270
1d44952f
CH
1271 if (drv->bdrv_make_empty) {
1272 ret = drv->bdrv_make_empty(bs);
1273 bdrv_flush(bs);
1274 }
95389c86 1275
3f5075ae
CH
1276 /*
1277 * Make sure all data we wrote to the backing device is actually
1278 * stable on disk.
1279 */
1280 if (bs->backing_hd)
1281 bdrv_flush(bs->backing_hd);
4dca4b63
NS
1282
1283ro_cleanup:
7267c094 1284 g_free(buf);
4dca4b63
NS
1285
1286 if (ro) {
1287 /* re-open as RO */
1288 bdrv_delete(bs->backing_hd);
1289 bs->backing_hd = NULL;
1290 bs_ro = bdrv_new("");
ee181196
KW
1291 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1292 backing_drv);
4dca4b63
NS
1293 if (ret < 0) {
1294 bdrv_delete(bs_ro);
1295 /* drive not functional anymore */
1296 bs->drv = NULL;
1297 return ret;
1298 }
1299 bs->backing_hd = bs_ro;
1300 bs->backing_hd->keep_read_only = 0;
1301 }
1302
1d44952f 1303 return ret;
33e3963e
FB
1304}
1305
e8877497 1306int bdrv_commit_all(void)
6ab4b5ab
MA
1307{
1308 BlockDriverState *bs;
1309
1310 QTAILQ_FOREACH(bs, &bdrv_states, list) {
e8877497
SH
1311 int ret = bdrv_commit(bs);
1312 if (ret < 0) {
1313 return ret;
1314 }
6ab4b5ab 1315 }
e8877497 1316 return 0;
6ab4b5ab
MA
1317}
1318
dbffbdcf
SH
1319struct BdrvTrackedRequest {
1320 BlockDriverState *bs;
1321 int64_t sector_num;
1322 int nb_sectors;
1323 bool is_write;
1324 QLIST_ENTRY(BdrvTrackedRequest) list;
5f8b6491 1325 Coroutine *co; /* owner, used for deadlock detection */
f4658285 1326 CoQueue wait_queue; /* coroutines blocked on this request */
dbffbdcf
SH
1327};
1328
1329/**
1330 * Remove an active request from the tracked requests list
1331 *
1332 * This function should be called when a tracked request is completing.
1333 */
1334static void tracked_request_end(BdrvTrackedRequest *req)
1335{
1336 QLIST_REMOVE(req, list);
f4658285 1337 qemu_co_queue_restart_all(&req->wait_queue);
dbffbdcf
SH
1338}
1339
1340/**
1341 * Add an active request to the tracked requests list
1342 */
1343static void tracked_request_begin(BdrvTrackedRequest *req,
1344 BlockDriverState *bs,
1345 int64_t sector_num,
1346 int nb_sectors, bool is_write)
1347{
1348 *req = (BdrvTrackedRequest){
1349 .bs = bs,
1350 .sector_num = sector_num,
1351 .nb_sectors = nb_sectors,
1352 .is_write = is_write,
5f8b6491 1353 .co = qemu_coroutine_self(),
dbffbdcf
SH
1354 };
1355
f4658285
SH
1356 qemu_co_queue_init(&req->wait_queue);
1357
dbffbdcf
SH
1358 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1359}
1360
d83947ac
SH
1361/**
1362 * Round a region to cluster boundaries
1363 */
1364static void round_to_clusters(BlockDriverState *bs,
1365 int64_t sector_num, int nb_sectors,
1366 int64_t *cluster_sector_num,
1367 int *cluster_nb_sectors)
1368{
1369 BlockDriverInfo bdi;
1370
1371 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1372 *cluster_sector_num = sector_num;
1373 *cluster_nb_sectors = nb_sectors;
1374 } else {
1375 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1376 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1377 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1378 nb_sectors, c);
1379 }
1380}
1381
f4658285
SH
1382static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1383 int64_t sector_num, int nb_sectors) {
d83947ac
SH
1384 /* aaaa bbbb */
1385 if (sector_num >= req->sector_num + req->nb_sectors) {
1386 return false;
1387 }
1388 /* bbbb aaaa */
1389 if (req->sector_num >= sector_num + nb_sectors) {
1390 return false;
1391 }
1392 return true;
f4658285
SH
1393}
1394
1395static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1396 int64_t sector_num, int nb_sectors)
1397{
1398 BdrvTrackedRequest *req;
d83947ac
SH
1399 int64_t cluster_sector_num;
1400 int cluster_nb_sectors;
f4658285
SH
1401 bool retry;
1402
d83947ac
SH
1403 /* If we touch the same cluster it counts as an overlap. This guarantees
1404 * that allocating writes will be serialized and not race with each other
1405 * for the same cluster. For example, in copy-on-read it ensures that the
1406 * CoR read and write operations are atomic and guest writes cannot
1407 * interleave between them.
1408 */
1409 round_to_clusters(bs, sector_num, nb_sectors,
1410 &cluster_sector_num, &cluster_nb_sectors);
1411
f4658285
SH
1412 do {
1413 retry = false;
1414 QLIST_FOREACH(req, &bs->tracked_requests, list) {
d83947ac
SH
1415 if (tracked_request_overlaps(req, cluster_sector_num,
1416 cluster_nb_sectors)) {
5f8b6491
SH
1417 /* Hitting this means there was a reentrant request, for
1418 * example, a block driver issuing nested requests. This must
1419 * never happen since it means deadlock.
1420 */
1421 assert(qemu_coroutine_self() != req->co);
1422
f4658285
SH
1423 qemu_co_queue_wait(&req->wait_queue);
1424 retry = true;
1425 break;
1426 }
1427 }
1428 } while (retry);
1429}
1430
756e6736
KW
1431/*
1432 * Return values:
1433 * 0 - success
1434 * -EINVAL - backing format specified, but no file
1435 * -ENOSPC - can't update the backing file because no space is left in the
1436 * image file header
1437 * -ENOTSUP - format driver doesn't support changing the backing file
1438 */
1439int bdrv_change_backing_file(BlockDriverState *bs,
1440 const char *backing_file, const char *backing_fmt)
1441{
1442 BlockDriver *drv = bs->drv;
1443
1444 if (drv->bdrv_change_backing_file != NULL) {
1445 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1446 } else {
1447 return -ENOTSUP;
1448 }
1449}
1450
71d0770c
AL
1451static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1452 size_t size)
1453{
1454 int64_t len;
1455
1456 if (!bdrv_is_inserted(bs))
1457 return -ENOMEDIUM;
1458
1459 if (bs->growable)
1460 return 0;
1461
1462 len = bdrv_getlength(bs);
1463
fbb7b4e0
KW
1464 if (offset < 0)
1465 return -EIO;
1466
1467 if ((offset > len) || (len - offset < size))
71d0770c
AL
1468 return -EIO;
1469
1470 return 0;
1471}
1472
1473static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1474 int nb_sectors)
1475{
eb5a3165
JS
1476 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1477 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1478}
1479
1c9805a3
SH
1480typedef struct RwCo {
1481 BlockDriverState *bs;
1482 int64_t sector_num;
1483 int nb_sectors;
1484 QEMUIOVector *qiov;
1485 bool is_write;
1486 int ret;
1487} RwCo;
1488
1489static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 1490{
1c9805a3 1491 RwCo *rwco = opaque;
ea2384d3 1492
1c9805a3
SH
1493 if (!rwco->is_write) {
1494 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
470c0504 1495 rwco->nb_sectors, rwco->qiov, 0);
1c9805a3
SH
1496 } else {
1497 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
f08f2dda 1498 rwco->nb_sectors, rwco->qiov, 0);
1c9805a3
SH
1499 }
1500}
e7a8a783 1501
1c9805a3
SH
1502/*
1503 * Process a synchronous request using coroutines
1504 */
1505static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1506 int nb_sectors, bool is_write)
1507{
1508 QEMUIOVector qiov;
1509 struct iovec iov = {
1510 .iov_base = (void *)buf,
1511 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1512 };
1513 Coroutine *co;
1514 RwCo rwco = {
1515 .bs = bs,
1516 .sector_num = sector_num,
1517 .nb_sectors = nb_sectors,
1518 .qiov = &qiov,
1519 .is_write = is_write,
1520 .ret = NOT_DONE,
1521 };
e7a8a783 1522
1c9805a3 1523 qemu_iovec_init_external(&qiov, &iov, 1);
e7a8a783 1524
498e386c
ZYW
1525 /**
1526 * In sync call context, when the vcpu is blocked, this throttling timer
1527 * will not fire; so the I/O throttling function has to be disabled here
1528 * if it has been enabled.
1529 */
1530 if (bs->io_limits_enabled) {
1531 fprintf(stderr, "Disabling I/O throttling on '%s' due "
1532 "to synchronous I/O.\n", bdrv_get_device_name(bs));
1533 bdrv_io_limits_disable(bs);
1534 }
1535
1c9805a3
SH
1536 if (qemu_in_coroutine()) {
1537 /* Fast-path if already in coroutine context */
1538 bdrv_rw_co_entry(&rwco);
1539 } else {
1540 co = qemu_coroutine_create(bdrv_rw_co_entry);
1541 qemu_coroutine_enter(co, &rwco);
1542 while (rwco.ret == NOT_DONE) {
1543 qemu_aio_wait();
1544 }
1545 }
1546 return rwco.ret;
1547}
b338082b 1548
1c9805a3
SH
1549/* return < 0 if error. See bdrv_write() for the return codes */
1550int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1551 uint8_t *buf, int nb_sectors)
1552{
1553 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
fc01f7e7
FB
1554}
1555
7cd1e32a 1556static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1557 int nb_sectors, int dirty)
7cd1e32a
LS
1558{
1559 int64_t start, end;
c6d22830 1560 unsigned long val, idx, bit;
a55eb92c 1561
6ea44308 1562 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1563 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1564
1565 for (; start <= end; start++) {
c6d22830
JK
1566 idx = start / (sizeof(unsigned long) * 8);
1567 bit = start % (sizeof(unsigned long) * 8);
1568 val = bs->dirty_bitmap[idx];
1569 if (dirty) {
6d59fec1 1570 if (!(val & (1UL << bit))) {
aaa0eb75 1571 bs->dirty_count++;
6d59fec1 1572 val |= 1UL << bit;
aaa0eb75 1573 }
c6d22830 1574 } else {
6d59fec1 1575 if (val & (1UL << bit)) {
aaa0eb75 1576 bs->dirty_count--;
6d59fec1 1577 val &= ~(1UL << bit);
aaa0eb75 1578 }
c6d22830
JK
1579 }
1580 bs->dirty_bitmap[idx] = val;
7cd1e32a
LS
1581 }
1582}
1583
5fafdf24 1584/* Return < 0 if error. Important errors are:
19cb3738
FB
1585 -EIO generic I/O error (may happen for all errors)
1586 -ENOMEDIUM No media inserted.
1587 -EINVAL Invalid sector number or nb_sectors
1588 -EACCES Trying to write a read-only device
1589*/
5fafdf24 1590int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1591 const uint8_t *buf, int nb_sectors)
1592{
1c9805a3 1593 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
83f64091
FB
1594}
1595
eda578e5
AL
1596int bdrv_pread(BlockDriverState *bs, int64_t offset,
1597 void *buf, int count1)
83f64091 1598{
6ea44308 1599 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1600 int len, nb_sectors, count;
1601 int64_t sector_num;
9a8c4cce 1602 int ret;
83f64091
FB
1603
1604 count = count1;
1605 /* first read to align to sector start */
6ea44308 1606 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1607 if (len > count)
1608 len = count;
6ea44308 1609 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1610 if (len > 0) {
9a8c4cce
KW
1611 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1612 return ret;
6ea44308 1613 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1614 count -= len;
1615 if (count == 0)
1616 return count1;
1617 sector_num++;
1618 buf += len;
1619 }
1620
1621 /* read the sectors "in place" */
6ea44308 1622 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1623 if (nb_sectors > 0) {
9a8c4cce
KW
1624 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1625 return ret;
83f64091 1626 sector_num += nb_sectors;
6ea44308 1627 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1628 buf += len;
1629 count -= len;
1630 }
1631
1632 /* add data from the last sector */
1633 if (count > 0) {
9a8c4cce
KW
1634 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1635 return ret;
83f64091
FB
1636 memcpy(buf, tmp_buf, count);
1637 }
1638 return count1;
1639}
1640
eda578e5
AL
1641int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1642 const void *buf, int count1)
83f64091 1643{
6ea44308 1644 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1645 int len, nb_sectors, count;
1646 int64_t sector_num;
9a8c4cce 1647 int ret;
83f64091
FB
1648
1649 count = count1;
1650 /* first write to align to sector start */
6ea44308 1651 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1652 if (len > count)
1653 len = count;
6ea44308 1654 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1655 if (len > 0) {
9a8c4cce
KW
1656 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1657 return ret;
6ea44308 1658 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1659 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1660 return ret;
83f64091
FB
1661 count -= len;
1662 if (count == 0)
1663 return count1;
1664 sector_num++;
1665 buf += len;
1666 }
1667
1668 /* write the sectors "in place" */
6ea44308 1669 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1670 if (nb_sectors > 0) {
9a8c4cce
KW
1671 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1672 return ret;
83f64091 1673 sector_num += nb_sectors;
6ea44308 1674 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1675 buf += len;
1676 count -= len;
1677 }
1678
1679 /* add data from the last sector */
1680 if (count > 0) {
9a8c4cce
KW
1681 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1682 return ret;
83f64091 1683 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1684 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1685 return ret;
83f64091
FB
1686 }
1687 return count1;
1688}
83f64091 1689
f08145fe
KW
1690/*
1691 * Writes to the file and ensures that no writes are reordered across this
1692 * request (acts as a barrier)
1693 *
1694 * Returns 0 on success, -errno in error cases.
1695 */
1696int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1697 const void *buf, int count)
1698{
1699 int ret;
1700
1701 ret = bdrv_pwrite(bs, offset, buf, count);
1702 if (ret < 0) {
1703 return ret;
1704 }
1705
92196b2f
SH
1706 /* No flush needed for cache modes that use O_DSYNC */
1707 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
f08145fe
KW
1708 bdrv_flush(bs);
1709 }
1710
1711 return 0;
1712}
1713
470c0504 1714static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
ab185921
SH
1715 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1716{
1717 /* Perform I/O through a temporary buffer so that users who scribble over
1718 * their read buffer while the operation is in progress do not end up
1719 * modifying the image file. This is critical for zero-copy guest I/O
1720 * where anything might happen inside guest memory.
1721 */
1722 void *bounce_buffer;
1723
79c053bd 1724 BlockDriver *drv = bs->drv;
ab185921
SH
1725 struct iovec iov;
1726 QEMUIOVector bounce_qiov;
1727 int64_t cluster_sector_num;
1728 int cluster_nb_sectors;
1729 size_t skip_bytes;
1730 int ret;
1731
1732 /* Cover entire cluster so no additional backing file I/O is required when
1733 * allocating cluster in the image file.
1734 */
1735 round_to_clusters(bs, sector_num, nb_sectors,
1736 &cluster_sector_num, &cluster_nb_sectors);
1737
470c0504
SH
1738 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
1739 cluster_sector_num, cluster_nb_sectors);
ab185921
SH
1740
1741 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1742 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1743 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1744
79c053bd
SH
1745 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1746 &bounce_qiov);
ab185921
SH
1747 if (ret < 0) {
1748 goto err;
1749 }
1750
79c053bd
SH
1751 if (drv->bdrv_co_write_zeroes &&
1752 buffer_is_zero(bounce_buffer, iov.iov_len)) {
621f0589
KW
1753 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
1754 cluster_nb_sectors);
79c053bd
SH
1755 } else {
1756 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
ab185921 1757 &bounce_qiov);
79c053bd
SH
1758 }
1759
ab185921
SH
1760 if (ret < 0) {
1761 /* It might be okay to ignore write errors for guest requests. If this
1762 * is a deliberate copy-on-read then we don't want to ignore the error.
1763 * Simply report it in all cases.
1764 */
1765 goto err;
1766 }
1767
1768 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1769 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1770 nb_sectors * BDRV_SECTOR_SIZE);
1771
1772err:
1773 qemu_vfree(bounce_buffer);
1774 return ret;
1775}
1776
c5fbe571
SH
1777/*
1778 * Handle a read request in coroutine context
1779 */
1780static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
470c0504
SH
1781 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1782 BdrvRequestFlags flags)
da1fa91d
KW
1783{
1784 BlockDriver *drv = bs->drv;
dbffbdcf
SH
1785 BdrvTrackedRequest req;
1786 int ret;
da1fa91d 1787
da1fa91d
KW
1788 if (!drv) {
1789 return -ENOMEDIUM;
1790 }
1791 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1792 return -EIO;
1793 }
1794
98f90dba
ZYW
1795 /* throttling disk read I/O */
1796 if (bs->io_limits_enabled) {
1797 bdrv_io_limits_intercept(bs, false, nb_sectors);
1798 }
1799
f4658285 1800 if (bs->copy_on_read) {
470c0504
SH
1801 flags |= BDRV_REQ_COPY_ON_READ;
1802 }
1803 if (flags & BDRV_REQ_COPY_ON_READ) {
1804 bs->copy_on_read_in_flight++;
1805 }
1806
1807 if (bs->copy_on_read_in_flight) {
f4658285
SH
1808 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1809 }
1810
dbffbdcf 1811 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
ab185921 1812
470c0504 1813 if (flags & BDRV_REQ_COPY_ON_READ) {
ab185921
SH
1814 int pnum;
1815
1816 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1817 if (ret < 0) {
1818 goto out;
1819 }
1820
1821 if (!ret || pnum != nb_sectors) {
470c0504 1822 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
ab185921
SH
1823 goto out;
1824 }
1825 }
1826
dbffbdcf 1827 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
ab185921
SH
1828
1829out:
dbffbdcf 1830 tracked_request_end(&req);
470c0504
SH
1831
1832 if (flags & BDRV_REQ_COPY_ON_READ) {
1833 bs->copy_on_read_in_flight--;
1834 }
1835
dbffbdcf 1836 return ret;
da1fa91d
KW
1837}
1838
c5fbe571 1839int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
1840 int nb_sectors, QEMUIOVector *qiov)
1841{
c5fbe571 1842 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 1843
470c0504
SH
1844 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
1845}
1846
1847int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1848 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1849{
1850 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
1851
1852 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
1853 BDRV_REQ_COPY_ON_READ);
c5fbe571
SH
1854}
1855
f08f2dda
SH
1856static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
1857 int64_t sector_num, int nb_sectors)
1858{
1859 BlockDriver *drv = bs->drv;
1860 QEMUIOVector qiov;
1861 struct iovec iov;
1862 int ret;
1863
621f0589
KW
1864 /* TODO Emulate only part of misaligned requests instead of letting block
1865 * drivers return -ENOTSUP and emulate everything */
1866
f08f2dda
SH
1867 /* First try the efficient write zeroes operation */
1868 if (drv->bdrv_co_write_zeroes) {
621f0589
KW
1869 ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1870 if (ret != -ENOTSUP) {
1871 return ret;
1872 }
f08f2dda
SH
1873 }
1874
1875 /* Fall back to bounce buffer if write zeroes is unsupported */
1876 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
1877 iov.iov_base = qemu_blockalign(bs, iov.iov_len);
1878 memset(iov.iov_base, 0, iov.iov_len);
1879 qemu_iovec_init_external(&qiov, &iov, 1);
1880
1881 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1882
1883 qemu_vfree(iov.iov_base);
1884 return ret;
1885}
1886
c5fbe571
SH
1887/*
1888 * Handle a write request in coroutine context
1889 */
1890static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
f08f2dda
SH
1891 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1892 BdrvRequestFlags flags)
c5fbe571
SH
1893{
1894 BlockDriver *drv = bs->drv;
dbffbdcf 1895 BdrvTrackedRequest req;
6b7cb247 1896 int ret;
da1fa91d
KW
1897
1898 if (!bs->drv) {
1899 return -ENOMEDIUM;
1900 }
1901 if (bs->read_only) {
1902 return -EACCES;
1903 }
1904 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1905 return -EIO;
1906 }
1907
98f90dba
ZYW
1908 /* throttling disk write I/O */
1909 if (bs->io_limits_enabled) {
1910 bdrv_io_limits_intercept(bs, true, nb_sectors);
1911 }
1912
470c0504 1913 if (bs->copy_on_read_in_flight) {
f4658285
SH
1914 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1915 }
1916
dbffbdcf
SH
1917 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1918
f08f2dda
SH
1919 if (flags & BDRV_REQ_ZERO_WRITE) {
1920 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
1921 } else {
1922 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1923 }
6b7cb247 1924
da1fa91d
KW
1925 if (bs->dirty_bitmap) {
1926 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1927 }
1928
1929 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1930 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1931 }
1932
dbffbdcf
SH
1933 tracked_request_end(&req);
1934
6b7cb247 1935 return ret;
da1fa91d
KW
1936}
1937
c5fbe571
SH
1938int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1939 int nb_sectors, QEMUIOVector *qiov)
1940{
1941 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1942
f08f2dda
SH
1943 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
1944}
1945
1946int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
1947 int64_t sector_num, int nb_sectors)
1948{
1949 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1950
1951 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
1952 BDRV_REQ_ZERO_WRITE);
c5fbe571
SH
1953}
1954
83f64091
FB
1955/**
1956 * Truncate file to 'offset' bytes (needed only for file protocols)
1957 */
1958int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1959{
1960 BlockDriver *drv = bs->drv;
51762288 1961 int ret;
83f64091 1962 if (!drv)
19cb3738 1963 return -ENOMEDIUM;
83f64091
FB
1964 if (!drv->bdrv_truncate)
1965 return -ENOTSUP;
59f2689d
NS
1966 if (bs->read_only)
1967 return -EACCES;
8591675f
MT
1968 if (bdrv_in_use(bs))
1969 return -EBUSY;
51762288
SH
1970 ret = drv->bdrv_truncate(bs, offset);
1971 if (ret == 0) {
1972 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 1973 bdrv_dev_resize_cb(bs);
51762288
SH
1974 }
1975 return ret;
83f64091
FB
1976}
1977
4a1d5e1f
FZ
1978/**
1979 * Length of a allocated file in bytes. Sparse files are counted by actual
1980 * allocated space. Return < 0 if error or unknown.
1981 */
1982int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1983{
1984 BlockDriver *drv = bs->drv;
1985 if (!drv) {
1986 return -ENOMEDIUM;
1987 }
1988 if (drv->bdrv_get_allocated_file_size) {
1989 return drv->bdrv_get_allocated_file_size(bs);
1990 }
1991 if (bs->file) {
1992 return bdrv_get_allocated_file_size(bs->file);
1993 }
1994 return -ENOTSUP;
1995}
1996
83f64091
FB
1997/**
1998 * Length of a file in bytes. Return < 0 if error or unknown.
1999 */
2000int64_t bdrv_getlength(BlockDriverState *bs)
2001{
2002 BlockDriver *drv = bs->drv;
2003 if (!drv)
19cb3738 2004 return -ENOMEDIUM;
51762288 2005
2c6942fa 2006 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
2007 if (drv->bdrv_getlength) {
2008 return drv->bdrv_getlength(bs);
2009 }
83f64091 2010 }
46a4e4e6 2011 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
2012}
2013
19cb3738 2014/* return 0 as number of sectors if no device present or error */
96b8f136 2015void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 2016{
19cb3738
FB
2017 int64_t length;
2018 length = bdrv_getlength(bs);
2019 if (length < 0)
2020 length = 0;
2021 else
6ea44308 2022 length = length >> BDRV_SECTOR_BITS;
19cb3738 2023 *nb_sectors_ptr = length;
fc01f7e7 2024}
cf98951b 2025
f3d54fc4
AL
2026struct partition {
2027 uint8_t boot_ind; /* 0x80 - active */
2028 uint8_t head; /* starting head */
2029 uint8_t sector; /* starting sector */
2030 uint8_t cyl; /* starting cylinder */
2031 uint8_t sys_ind; /* What partition type */
2032 uint8_t end_head; /* end head */
2033 uint8_t end_sector; /* end sector */
2034 uint8_t end_cyl; /* end cylinder */
2035 uint32_t start_sect; /* starting sector counting from 0 */
2036 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 2037} QEMU_PACKED;
f3d54fc4
AL
2038
2039/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
2040static int guess_disk_lchs(BlockDriverState *bs,
2041 int *pcylinders, int *pheads, int *psectors)
2042{
eb5a3165 2043 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
2044 int ret, i, heads, sectors, cylinders;
2045 struct partition *p;
2046 uint32_t nr_sects;
a38131b6 2047 uint64_t nb_sectors;
498e386c 2048 bool enabled;
f3d54fc4
AL
2049
2050 bdrv_get_geometry(bs, &nb_sectors);
2051
498e386c
ZYW
2052 /**
2053 * The function will be invoked during startup not only in sync I/O mode,
2054 * but also in async I/O mode. So the I/O throttling function has to
2055 * be disabled temporarily here, not permanently.
2056 */
2057 enabled = bs->io_limits_enabled;
2058 bs->io_limits_enabled = false;
f3d54fc4 2059 ret = bdrv_read(bs, 0, buf, 1);
498e386c 2060 bs->io_limits_enabled = enabled;
f3d54fc4
AL
2061 if (ret < 0)
2062 return -1;
2063 /* test msdos magic */
2064 if (buf[510] != 0x55 || buf[511] != 0xaa)
2065 return -1;
2066 for(i = 0; i < 4; i++) {
2067 p = ((struct partition *)(buf + 0x1be)) + i;
2068 nr_sects = le32_to_cpu(p->nr_sects);
2069 if (nr_sects && p->end_head) {
2070 /* We make the assumption that the partition terminates on
2071 a cylinder boundary */
2072 heads = p->end_head + 1;
2073 sectors = p->end_sector & 63;
2074 if (sectors == 0)
2075 continue;
2076 cylinders = nb_sectors / (heads * sectors);
2077 if (cylinders < 1 || cylinders > 16383)
2078 continue;
2079 *pheads = heads;
2080 *psectors = sectors;
2081 *pcylinders = cylinders;
2082#if 0
2083 printf("guessed geometry: LCHS=%d %d %d\n",
2084 cylinders, heads, sectors);
2085#endif
2086 return 0;
2087 }
2088 }
2089 return -1;
2090}
2091
2092void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
2093{
2094 int translation, lba_detected = 0;
2095 int cylinders, heads, secs;
a38131b6 2096 uint64_t nb_sectors;
f3d54fc4
AL
2097
2098 /* if a geometry hint is available, use it */
2099 bdrv_get_geometry(bs, &nb_sectors);
2100 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
2101 translation = bdrv_get_translation_hint(bs);
2102 if (cylinders != 0) {
2103 *pcyls = cylinders;
2104 *pheads = heads;
2105 *psecs = secs;
2106 } else {
2107 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
2108 if (heads > 16) {
2109 /* if heads > 16, it means that a BIOS LBA
2110 translation was active, so the default
2111 hardware geometry is OK */
2112 lba_detected = 1;
2113 goto default_geometry;
2114 } else {
2115 *pcyls = cylinders;
2116 *pheads = heads;
2117 *psecs = secs;
2118 /* disable any translation to be in sync with
2119 the logical geometry */
2120 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
2121 bdrv_set_translation_hint(bs,
2122 BIOS_ATA_TRANSLATION_NONE);
2123 }
2124 }
2125 } else {
2126 default_geometry:
2127 /* if no geometry, use a standard physical disk geometry */
2128 cylinders = nb_sectors / (16 * 63);
2129
2130 if (cylinders > 16383)
2131 cylinders = 16383;
2132 else if (cylinders < 2)
2133 cylinders = 2;
2134 *pcyls = cylinders;
2135 *pheads = 16;
2136 *psecs = 63;
2137 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
2138 if ((*pcyls * *pheads) <= 131072) {
2139 bdrv_set_translation_hint(bs,
2140 BIOS_ATA_TRANSLATION_LARGE);
2141 } else {
2142 bdrv_set_translation_hint(bs,
2143 BIOS_ATA_TRANSLATION_LBA);
2144 }
2145 }
2146 }
2147 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
2148 }
2149}
2150
5fafdf24 2151void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
2152 int cyls, int heads, int secs)
2153{
2154 bs->cyls = cyls;
2155 bs->heads = heads;
2156 bs->secs = secs;
2157}
2158
46d4767d
FB
2159void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
2160{
2161 bs->translation = translation;
2162}
2163
5fafdf24 2164void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
2165 int *pcyls, int *pheads, int *psecs)
2166{
2167 *pcyls = bs->cyls;
2168 *pheads = bs->heads;
2169 *psecs = bs->secs;
2170}
2171
0563e191
ZYW
2172/* throttling disk io limits */
2173void bdrv_set_io_limits(BlockDriverState *bs,
2174 BlockIOLimit *io_limits)
2175{
2176 bs->io_limits = *io_limits;
2177 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
2178}
2179
5bbdbb46
BS
2180/* Recognize floppy formats */
2181typedef struct FDFormat {
2182 FDriveType drive;
2183 uint8_t last_sect;
2184 uint8_t max_track;
2185 uint8_t max_head;
f8d3d128 2186 FDriveRate rate;
5bbdbb46
BS
2187} FDFormat;
2188
2189static const FDFormat fd_formats[] = {
2190 /* First entry is default format */
2191 /* 1.44 MB 3"1/2 floppy disks */
f8d3d128
HP
2192 { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
2193 { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
2194 { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
2195 { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
2196 { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
2197 { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
2198 { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
2199 { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
5bbdbb46 2200 /* 2.88 MB 3"1/2 floppy disks */
f8d3d128
HP
2201 { FDRIVE_DRV_288, 36, 80, 1, FDRIVE_RATE_1M, },
2202 { FDRIVE_DRV_288, 39, 80, 1, FDRIVE_RATE_1M, },
2203 { FDRIVE_DRV_288, 40, 80, 1, FDRIVE_RATE_1M, },
2204 { FDRIVE_DRV_288, 44, 80, 1, FDRIVE_RATE_1M, },
2205 { FDRIVE_DRV_288, 48, 80, 1, FDRIVE_RATE_1M, },
5bbdbb46 2206 /* 720 kB 3"1/2 floppy disks */
f8d3d128
HP
2207 { FDRIVE_DRV_144, 9, 80, 1, FDRIVE_RATE_250K, },
2208 { FDRIVE_DRV_144, 10, 80, 1, FDRIVE_RATE_250K, },
2209 { FDRIVE_DRV_144, 10, 82, 1, FDRIVE_RATE_250K, },
2210 { FDRIVE_DRV_144, 10, 83, 1, FDRIVE_RATE_250K, },
2211 { FDRIVE_DRV_144, 13, 80, 1, FDRIVE_RATE_250K, },
2212 { FDRIVE_DRV_144, 14, 80, 1, FDRIVE_RATE_250K, },
5bbdbb46 2213 /* 1.2 MB 5"1/4 floppy disks */
f8d3d128
HP
2214 { FDRIVE_DRV_120, 15, 80, 1, FDRIVE_RATE_500K, },
2215 { FDRIVE_DRV_120, 18, 80, 1, FDRIVE_RATE_500K, },
2216 { FDRIVE_DRV_120, 18, 82, 1, FDRIVE_RATE_500K, },
2217 { FDRIVE_DRV_120, 18, 83, 1, FDRIVE_RATE_500K, },
2218 { FDRIVE_DRV_120, 20, 80, 1, FDRIVE_RATE_500K, },
5bbdbb46 2219 /* 720 kB 5"1/4 floppy disks */
f8d3d128
HP
2220 { FDRIVE_DRV_120, 9, 80, 1, FDRIVE_RATE_250K, },
2221 { FDRIVE_DRV_120, 11, 80, 1, FDRIVE_RATE_250K, },
5bbdbb46 2222 /* 360 kB 5"1/4 floppy disks */
f8d3d128
HP
2223 { FDRIVE_DRV_120, 9, 40, 1, FDRIVE_RATE_300K, },
2224 { FDRIVE_DRV_120, 9, 40, 0, FDRIVE_RATE_300K, },
2225 { FDRIVE_DRV_120, 10, 41, 1, FDRIVE_RATE_300K, },
2226 { FDRIVE_DRV_120, 10, 42, 1, FDRIVE_RATE_300K, },
5bbdbb46 2227 /* 320 kB 5"1/4 floppy disks */
f8d3d128
HP
2228 { FDRIVE_DRV_120, 8, 40, 1, FDRIVE_RATE_250K, },
2229 { FDRIVE_DRV_120, 8, 40, 0, FDRIVE_RATE_250K, },
5bbdbb46 2230 /* 360 kB must match 5"1/4 better than 3"1/2... */
f8d3d128 2231 { FDRIVE_DRV_144, 9, 80, 0, FDRIVE_RATE_250K, },
5bbdbb46 2232 /* end */
f8d3d128 2233 { FDRIVE_DRV_NONE, -1, -1, 0, 0, },
5bbdbb46
BS
2234};
2235
2236void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
2237 int *max_track, int *last_sect,
f8d3d128
HP
2238 FDriveType drive_in, FDriveType *drive,
2239 FDriveRate *rate)
5bbdbb46
BS
2240{
2241 const FDFormat *parse;
2242 uint64_t nb_sectors, size;
2243 int i, first_match, match;
2244
2245 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
2246 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
2247 /* User defined disk */
f8d3d128 2248 *rate = FDRIVE_RATE_500K;
5bbdbb46
BS
2249 } else {
2250 bdrv_get_geometry(bs, &nb_sectors);
2251 match = -1;
2252 first_match = -1;
2253 for (i = 0; ; i++) {
2254 parse = &fd_formats[i];
2255 if (parse->drive == FDRIVE_DRV_NONE) {
2256 break;
2257 }
2258 if (drive_in == parse->drive ||
2259 drive_in == FDRIVE_DRV_NONE) {
2260 size = (parse->max_head + 1) * parse->max_track *
2261 parse->last_sect;
2262 if (nb_sectors == size) {
2263 match = i;
2264 break;
2265 }
2266 if (first_match == -1) {
2267 first_match = i;
2268 }
2269 }
2270 }
2271 if (match == -1) {
2272 if (first_match == -1) {
2273 match = 1;
2274 } else {
2275 match = first_match;
2276 }
2277 parse = &fd_formats[match];
2278 }
2279 *nb_heads = parse->max_head + 1;
2280 *max_track = parse->max_track;
2281 *last_sect = parse->last_sect;
2282 *drive = parse->drive;
f8d3d128 2283 *rate = parse->rate;
5bbdbb46
BS
2284 }
2285}
2286
46d4767d
FB
2287int bdrv_get_translation_hint(BlockDriverState *bs)
2288{
2289 return bs->translation;
2290}
2291
abd7f68d
MA
2292void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
2293 BlockErrorAction on_write_error)
2294{
2295 bs->on_read_error = on_read_error;
2296 bs->on_write_error = on_write_error;
2297}
2298
2299BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
2300{
2301 return is_read ? bs->on_read_error : bs->on_write_error;
2302}
2303
b338082b
FB
2304int bdrv_is_read_only(BlockDriverState *bs)
2305{
2306 return bs->read_only;
2307}
2308
985a03b0
TS
2309int bdrv_is_sg(BlockDriverState *bs)
2310{
2311 return bs->sg;
2312}
2313
e900a7b7
CH
2314int bdrv_enable_write_cache(BlockDriverState *bs)
2315{
2316 return bs->enable_write_cache;
2317}
2318
ea2384d3
FB
2319int bdrv_is_encrypted(BlockDriverState *bs)
2320{
2321 if (bs->backing_hd && bs->backing_hd->encrypted)
2322 return 1;
2323 return bs->encrypted;
2324}
2325
c0f4ce77
AL
2326int bdrv_key_required(BlockDriverState *bs)
2327{
2328 BlockDriverState *backing_hd = bs->backing_hd;
2329
2330 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2331 return 1;
2332 return (bs->encrypted && !bs->valid_key);
2333}
2334
ea2384d3
FB
2335int bdrv_set_key(BlockDriverState *bs, const char *key)
2336{
2337 int ret;
2338 if (bs->backing_hd && bs->backing_hd->encrypted) {
2339 ret = bdrv_set_key(bs->backing_hd, key);
2340 if (ret < 0)
2341 return ret;
2342 if (!bs->encrypted)
2343 return 0;
2344 }
fd04a2ae
SH
2345 if (!bs->encrypted) {
2346 return -EINVAL;
2347 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2348 return -ENOMEDIUM;
2349 }
c0f4ce77 2350 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
2351 if (ret < 0) {
2352 bs->valid_key = 0;
2353 } else if (!bs->valid_key) {
2354 bs->valid_key = 1;
2355 /* call the change callback now, we skipped it on open */
7d4b4ba5 2356 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 2357 }
c0f4ce77 2358 return ret;
ea2384d3
FB
2359}
2360
2361void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2362{
19cb3738 2363 if (!bs->drv) {
ea2384d3
FB
2364 buf[0] = '\0';
2365 } else {
2366 pstrcpy(buf, buf_size, bs->drv->format_name);
2367 }
2368}
2369
5fafdf24 2370void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
2371 void *opaque)
2372{
2373 BlockDriver *drv;
2374
8a22f02a 2375 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
2376 it(opaque, drv->format_name);
2377 }
2378}
2379
b338082b
FB
2380BlockDriverState *bdrv_find(const char *name)
2381{
2382 BlockDriverState *bs;
2383
1b7bdbc1
SH
2384 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2385 if (!strcmp(name, bs->device_name)) {
b338082b 2386 return bs;
1b7bdbc1 2387 }
b338082b
FB
2388 }
2389 return NULL;
2390}
2391
2f399b0a
MA
2392BlockDriverState *bdrv_next(BlockDriverState *bs)
2393{
2394 if (!bs) {
2395 return QTAILQ_FIRST(&bdrv_states);
2396 }
2397 return QTAILQ_NEXT(bs, list);
2398}
2399
51de9760 2400void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
2401{
2402 BlockDriverState *bs;
2403
1b7bdbc1 2404 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 2405 it(opaque, bs);
81d0912d
FB
2406 }
2407}
2408
ea2384d3
FB
2409const char *bdrv_get_device_name(BlockDriverState *bs)
2410{
2411 return bs->device_name;
2412}
2413
c6ca28d6
AL
2414void bdrv_flush_all(void)
2415{
2416 BlockDriverState *bs;
2417
1b7bdbc1 2418 QTAILQ_FOREACH(bs, &bdrv_states, list) {
29cdb251 2419 bdrv_flush(bs);
1b7bdbc1 2420 }
c6ca28d6
AL
2421}
2422
f2feebbd
KW
2423int bdrv_has_zero_init(BlockDriverState *bs)
2424{
2425 assert(bs->drv);
2426
336c1c12
KW
2427 if (bs->drv->bdrv_has_zero_init) {
2428 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
2429 }
2430
2431 return 1;
2432}
2433
376ae3f1
SH
2434typedef struct BdrvCoIsAllocatedData {
2435 BlockDriverState *bs;
2436 int64_t sector_num;
2437 int nb_sectors;
2438 int *pnum;
2439 int ret;
2440 bool done;
2441} BdrvCoIsAllocatedData;
2442
f58c7b35
TS
2443/*
2444 * Returns true iff the specified sector is present in the disk image. Drivers
2445 * not implementing the functionality are assumed to not support backing files,
2446 * hence all their sectors are reported as allocated.
2447 *
bd9533e3
SH
2448 * If 'sector_num' is beyond the end of the disk image the return value is 0
2449 * and 'pnum' is set to 0.
2450 *
f58c7b35
TS
2451 * 'pnum' is set to the number of sectors (including and immediately following
2452 * the specified sector) that are known to be in the same
2453 * allocated/unallocated state.
2454 *
bd9533e3
SH
2455 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2456 * beyond the end of the disk image it will be clamped.
f58c7b35 2457 */
060f51c9
SH
2458int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2459 int nb_sectors, int *pnum)
f58c7b35 2460{
bd9533e3
SH
2461 int64_t n;
2462
2463 if (sector_num >= bs->total_sectors) {
2464 *pnum = 0;
2465 return 0;
2466 }
2467
2468 n = bs->total_sectors - sector_num;
2469 if (n < nb_sectors) {
2470 nb_sectors = n;
2471 }
2472
6aebab14 2473 if (!bs->drv->bdrv_co_is_allocated) {
bd9533e3 2474 *pnum = nb_sectors;
f58c7b35
TS
2475 return 1;
2476 }
6aebab14 2477
060f51c9
SH
2478 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2479}
2480
2481/* Coroutine wrapper for bdrv_is_allocated() */
2482static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2483{
2484 BdrvCoIsAllocatedData *data = opaque;
2485 BlockDriverState *bs = data->bs;
2486
2487 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2488 data->pnum);
2489 data->done = true;
2490}
2491
2492/*
2493 * Synchronous wrapper around bdrv_co_is_allocated().
2494 *
2495 * See bdrv_co_is_allocated() for details.
2496 */
2497int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2498 int *pnum)
2499{
6aebab14
SH
2500 Coroutine *co;
2501 BdrvCoIsAllocatedData data = {
2502 .bs = bs,
2503 .sector_num = sector_num,
2504 .nb_sectors = nb_sectors,
2505 .pnum = pnum,
2506 .done = false,
2507 };
2508
2509 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2510 qemu_coroutine_enter(co, &data);
2511 while (!data.done) {
2512 qemu_aio_wait();
2513 }
2514 return data.ret;
f58c7b35
TS
2515}
2516
b2023818 2517BlockInfoList *qmp_query_block(Error **errp)
b338082b 2518{
b2023818 2519 BlockInfoList *head = NULL, *cur_item = NULL;
b338082b
FB
2520 BlockDriverState *bs;
2521
1b7bdbc1 2522 QTAILQ_FOREACH(bs, &bdrv_states, list) {
b2023818 2523 BlockInfoList *info = g_malloc0(sizeof(*info));
d15e5465 2524
b2023818
LC
2525 info->value = g_malloc0(sizeof(*info->value));
2526 info->value->device = g_strdup(bs->device_name);
2527 info->value->type = g_strdup("unknown");
2528 info->value->locked = bdrv_dev_is_medium_locked(bs);
2529 info->value->removable = bdrv_dev_has_removable_media(bs);
d15e5465 2530
e4def80b 2531 if (bdrv_dev_has_removable_media(bs)) {
b2023818
LC
2532 info->value->has_tray_open = true;
2533 info->value->tray_open = bdrv_dev_is_tray_open(bs);
e4def80b 2534 }
f04ef601
LC
2535
2536 if (bdrv_iostatus_is_enabled(bs)) {
b2023818
LC
2537 info->value->has_io_status = true;
2538 info->value->io_status = bs->iostatus;
f04ef601
LC
2539 }
2540
19cb3738 2541 if (bs->drv) {
b2023818
LC
2542 info->value->has_inserted = true;
2543 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2544 info->value->inserted->file = g_strdup(bs->filename);
2545 info->value->inserted->ro = bs->read_only;
2546 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2547 info->value->inserted->encrypted = bs->encrypted;
2548 if (bs->backing_file[0]) {
2549 info->value->inserted->has_backing_file = true;
2550 info->value->inserted->backing_file = g_strdup(bs->backing_file);
376253ec 2551 }
727f005e
ZYW
2552
2553 if (bs->io_limits_enabled) {
2554 info->value->inserted->bps =
2555 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2556 info->value->inserted->bps_rd =
2557 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2558 info->value->inserted->bps_wr =
2559 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2560 info->value->inserted->iops =
2561 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2562 info->value->inserted->iops_rd =
2563 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2564 info->value->inserted->iops_wr =
2565 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2566 }
b2023818 2567 }
d15e5465 2568
b2023818
LC
2569 /* XXX: waiting for the qapi to support GSList */
2570 if (!cur_item) {
2571 head = cur_item = info;
2572 } else {
2573 cur_item->next = info;
2574 cur_item = info;
b338082b 2575 }
b338082b 2576 }
d15e5465 2577
b2023818 2578 return head;
b338082b 2579}
a36e69dd 2580
f11f57e4
LC
2581/* Consider exposing this as a full fledged QMP command */
2582static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
2583{
2584 BlockStats *s;
2585
2586 s = g_malloc0(sizeof(*s));
2587
2588 if (bs->device_name[0]) {
2589 s->has_device = true;
2590 s->device = g_strdup(bs->device_name);
294cc35f
KW
2591 }
2592
f11f57e4
LC
2593 s->stats = g_malloc0(sizeof(*s->stats));
2594 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2595 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2596 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2597 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2598 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2599 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2600 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2601 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2602 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2603
294cc35f 2604 if (bs->file) {
f11f57e4
LC
2605 s->has_parent = true;
2606 s->parent = qmp_query_blockstat(bs->file, NULL);
294cc35f
KW
2607 }
2608
f11f57e4 2609 return s;
294cc35f
KW
2610}
2611
f11f57e4 2612BlockStatsList *qmp_query_blockstats(Error **errp)
218a536a 2613{
f11f57e4 2614 BlockStatsList *head = NULL, *cur_item = NULL;
a36e69dd
TS
2615 BlockDriverState *bs;
2616
1b7bdbc1 2617 QTAILQ_FOREACH(bs, &bdrv_states, list) {
f11f57e4
LC
2618 BlockStatsList *info = g_malloc0(sizeof(*info));
2619 info->value = qmp_query_blockstat(bs, NULL);
2620
2621 /* XXX: waiting for the qapi to support GSList */
2622 if (!cur_item) {
2623 head = cur_item = info;
2624 } else {
2625 cur_item->next = info;
2626 cur_item = info;
2627 }
a36e69dd 2628 }
218a536a 2629
f11f57e4 2630 return head;
a36e69dd 2631}
ea2384d3 2632
045df330
AL
2633const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2634{
2635 if (bs->backing_hd && bs->backing_hd->encrypted)
2636 return bs->backing_file;
2637 else if (bs->encrypted)
2638 return bs->filename;
2639 else
2640 return NULL;
2641}
2642
5fafdf24 2643void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
2644 char *filename, int filename_size)
2645{
3574c608 2646 pstrcpy(filename, filename_size, bs->backing_file);
83f64091
FB
2647}
2648
5fafdf24 2649int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
2650 const uint8_t *buf, int nb_sectors)
2651{
2652 BlockDriver *drv = bs->drv;
2653 if (!drv)
19cb3738 2654 return -ENOMEDIUM;
faea38e7
FB
2655 if (!drv->bdrv_write_compressed)
2656 return -ENOTSUP;
fbb7b4e0
KW
2657 if (bdrv_check_request(bs, sector_num, nb_sectors))
2658 return -EIO;
a55eb92c 2659
c6d22830 2660 if (bs->dirty_bitmap) {
7cd1e32a
LS
2661 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2662 }
a55eb92c 2663
faea38e7
FB
2664 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2665}
3b46e624 2666
faea38e7
FB
2667int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2668{
2669 BlockDriver *drv = bs->drv;
2670 if (!drv)
19cb3738 2671 return -ENOMEDIUM;
faea38e7
FB
2672 if (!drv->bdrv_get_info)
2673 return -ENOTSUP;
2674 memset(bdi, 0, sizeof(*bdi));
2675 return drv->bdrv_get_info(bs, bdi);
2676}
2677
45566e9c
CH
2678int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2679 int64_t pos, int size)
178e08a5
AL
2680{
2681 BlockDriver *drv = bs->drv;
2682 if (!drv)
2683 return -ENOMEDIUM;
7cdb1f6d
MK
2684 if (drv->bdrv_save_vmstate)
2685 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2686 if (bs->file)
2687 return bdrv_save_vmstate(bs->file, buf, pos, size);
2688 return -ENOTSUP;
178e08a5
AL
2689}
2690
45566e9c
CH
2691int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2692 int64_t pos, int size)
178e08a5
AL
2693{
2694 BlockDriver *drv = bs->drv;
2695 if (!drv)
2696 return -ENOMEDIUM;
7cdb1f6d
MK
2697 if (drv->bdrv_load_vmstate)
2698 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2699 if (bs->file)
2700 return bdrv_load_vmstate(bs->file, buf, pos, size);
2701 return -ENOTSUP;
178e08a5
AL
2702}
2703
8b9b0cc2
KW
2704void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2705{
2706 BlockDriver *drv = bs->drv;
2707
2708 if (!drv || !drv->bdrv_debug_event) {
2709 return;
2710 }
2711
2712 return drv->bdrv_debug_event(bs, event);
2713
2714}
2715
faea38e7
FB
2716/**************************************************************/
2717/* handling of snapshots */
2718
feeee5ac
MDCF
2719int bdrv_can_snapshot(BlockDriverState *bs)
2720{
2721 BlockDriver *drv = bs->drv;
07b70bfb 2722 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2723 return 0;
2724 }
2725
2726 if (!drv->bdrv_snapshot_create) {
2727 if (bs->file != NULL) {
2728 return bdrv_can_snapshot(bs->file);
2729 }
2730 return 0;
2731 }
2732
2733 return 1;
2734}
2735
199630b6
BS
2736int bdrv_is_snapshot(BlockDriverState *bs)
2737{
2738 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2739}
2740
f9092b10
MA
2741BlockDriverState *bdrv_snapshots(void)
2742{
2743 BlockDriverState *bs;
2744
3ac906f7 2745 if (bs_snapshots) {
f9092b10 2746 return bs_snapshots;
3ac906f7 2747 }
f9092b10
MA
2748
2749 bs = NULL;
2750 while ((bs = bdrv_next(bs))) {
2751 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2752 bs_snapshots = bs;
2753 return bs;
f9092b10
MA
2754 }
2755 }
2756 return NULL;
f9092b10
MA
2757}
2758
5fafdf24 2759int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2760 QEMUSnapshotInfo *sn_info)
2761{
2762 BlockDriver *drv = bs->drv;
2763 if (!drv)
19cb3738 2764 return -ENOMEDIUM;
7cdb1f6d
MK
2765 if (drv->bdrv_snapshot_create)
2766 return drv->bdrv_snapshot_create(bs, sn_info);
2767 if (bs->file)
2768 return bdrv_snapshot_create(bs->file, sn_info);
2769 return -ENOTSUP;
faea38e7
FB
2770}
2771
5fafdf24 2772int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2773 const char *snapshot_id)
2774{
2775 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2776 int ret, open_ret;
2777
faea38e7 2778 if (!drv)
19cb3738 2779 return -ENOMEDIUM;
7cdb1f6d
MK
2780 if (drv->bdrv_snapshot_goto)
2781 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2782
2783 if (bs->file) {
2784 drv->bdrv_close(bs);
2785 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2786 open_ret = drv->bdrv_open(bs, bs->open_flags);
2787 if (open_ret < 0) {
2788 bdrv_delete(bs->file);
2789 bs->drv = NULL;
2790 return open_ret;
2791 }
2792 return ret;
2793 }
2794
2795 return -ENOTSUP;
faea38e7
FB
2796}
2797
2798int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2799{
2800 BlockDriver *drv = bs->drv;
2801 if (!drv)
19cb3738 2802 return -ENOMEDIUM;
7cdb1f6d
MK
2803 if (drv->bdrv_snapshot_delete)
2804 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2805 if (bs->file)
2806 return bdrv_snapshot_delete(bs->file, snapshot_id);
2807 return -ENOTSUP;
faea38e7
FB
2808}
2809
5fafdf24 2810int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2811 QEMUSnapshotInfo **psn_info)
2812{
2813 BlockDriver *drv = bs->drv;
2814 if (!drv)
19cb3738 2815 return -ENOMEDIUM;
7cdb1f6d
MK
2816 if (drv->bdrv_snapshot_list)
2817 return drv->bdrv_snapshot_list(bs, psn_info);
2818 if (bs->file)
2819 return bdrv_snapshot_list(bs->file, psn_info);
2820 return -ENOTSUP;
faea38e7
FB
2821}
2822
51ef6727 2823int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2824 const char *snapshot_name)
2825{
2826 BlockDriver *drv = bs->drv;
2827 if (!drv) {
2828 return -ENOMEDIUM;
2829 }
2830 if (!bs->read_only) {
2831 return -EINVAL;
2832 }
2833 if (drv->bdrv_snapshot_load_tmp) {
2834 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2835 }
2836 return -ENOTSUP;
2837}
2838
e8a6bb9c
MT
2839BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2840 const char *backing_file)
2841{
2842 if (!bs->drv) {
2843 return NULL;
2844 }
2845
2846 if (bs->backing_hd) {
2847 if (strcmp(bs->backing_file, backing_file) == 0) {
2848 return bs->backing_hd;
2849 } else {
2850 return bdrv_find_backing_image(bs->backing_hd, backing_file);
2851 }
2852 }
2853
2854 return NULL;
2855}
2856
faea38e7
FB
2857#define NB_SUFFIXES 4
2858
2859char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2860{
2861 static const char suffixes[NB_SUFFIXES] = "KMGT";
2862 int64_t base;
2863 int i;
2864
2865 if (size <= 999) {
2866 snprintf(buf, buf_size, "%" PRId64, size);
2867 } else {
2868 base = 1024;
2869 for(i = 0; i < NB_SUFFIXES; i++) {
2870 if (size < (10 * base)) {
5fafdf24 2871 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
2872 (double)size / base,
2873 suffixes[i]);
2874 break;
2875 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 2876 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
2877 ((size + (base >> 1)) / base),
2878 suffixes[i]);
2879 break;
2880 }
2881 base = base * 1024;
2882 }
2883 }
2884 return buf;
2885}
2886
2887char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2888{
2889 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
2890#ifdef _WIN32
2891 struct tm *ptm;
2892#else
faea38e7 2893 struct tm tm;
3b9f94e1 2894#endif
faea38e7
FB
2895 time_t ti;
2896 int64_t secs;
2897
2898 if (!sn) {
5fafdf24
TS
2899 snprintf(buf, buf_size,
2900 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2901 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2902 } else {
2903 ti = sn->date_sec;
3b9f94e1
FB
2904#ifdef _WIN32
2905 ptm = localtime(&ti);
2906 strftime(date_buf, sizeof(date_buf),
2907 "%Y-%m-%d %H:%M:%S", ptm);
2908#else
faea38e7
FB
2909 localtime_r(&ti, &tm);
2910 strftime(date_buf, sizeof(date_buf),
2911 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 2912#endif
faea38e7
FB
2913 secs = sn->vm_clock_nsec / 1000000000;
2914 snprintf(clock_buf, sizeof(clock_buf),
2915 "%02d:%02d:%02d.%03d",
2916 (int)(secs / 3600),
2917 (int)((secs / 60) % 60),
5fafdf24 2918 (int)(secs % 60),
faea38e7
FB
2919 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2920 snprintf(buf, buf_size,
5fafdf24 2921 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2922 sn->id_str, sn->name,
2923 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2924 date_buf,
2925 clock_buf);
2926 }
2927 return buf;
2928}
2929
ea2384d3 2930/**************************************************************/
83f64091 2931/* async I/Os */
ea2384d3 2932
3b69e4b9 2933BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 2934 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 2935 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 2936{
bbf0a440
SH
2937 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2938
b2a61371 2939 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2940 cb, opaque, false);
ea2384d3
FB
2941}
2942
f141eafe
AL
2943BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2944 QEMUIOVector *qiov, int nb_sectors,
2945 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 2946{
bbf0a440
SH
2947 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2948
1a6e115b 2949 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2950 cb, opaque, true);
83f64091
FB
2951}
2952
40b4f539
KW
2953
2954typedef struct MultiwriteCB {
2955 int error;
2956 int num_requests;
2957 int num_callbacks;
2958 struct {
2959 BlockDriverCompletionFunc *cb;
2960 void *opaque;
2961 QEMUIOVector *free_qiov;
40b4f539
KW
2962 } callbacks[];
2963} MultiwriteCB;
2964
2965static void multiwrite_user_cb(MultiwriteCB *mcb)
2966{
2967 int i;
2968
2969 for (i = 0; i < mcb->num_callbacks; i++) {
2970 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
2971 if (mcb->callbacks[i].free_qiov) {
2972 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2973 }
7267c094 2974 g_free(mcb->callbacks[i].free_qiov);
40b4f539
KW
2975 }
2976}
2977
2978static void multiwrite_cb(void *opaque, int ret)
2979{
2980 MultiwriteCB *mcb = opaque;
2981
6d519a5f
SH
2982 trace_multiwrite_cb(mcb, ret);
2983
cb6d3ca0 2984 if (ret < 0 && !mcb->error) {
40b4f539 2985 mcb->error = ret;
40b4f539
KW
2986 }
2987
2988 mcb->num_requests--;
2989 if (mcb->num_requests == 0) {
de189a1b 2990 multiwrite_user_cb(mcb);
7267c094 2991 g_free(mcb);
40b4f539
KW
2992 }
2993}
2994
2995static int multiwrite_req_compare(const void *a, const void *b)
2996{
77be4366
CH
2997 const BlockRequest *req1 = a, *req2 = b;
2998
2999 /*
3000 * Note that we can't simply subtract req2->sector from req1->sector
3001 * here as that could overflow the return value.
3002 */
3003 if (req1->sector > req2->sector) {
3004 return 1;
3005 } else if (req1->sector < req2->sector) {
3006 return -1;
3007 } else {
3008 return 0;
3009 }
40b4f539
KW
3010}
3011
3012/*
3013 * Takes a bunch of requests and tries to merge them. Returns the number of
3014 * requests that remain after merging.
3015 */
3016static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
3017 int num_reqs, MultiwriteCB *mcb)
3018{
3019 int i, outidx;
3020
3021 // Sort requests by start sector
3022 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
3023
3024 // Check if adjacent requests touch the same clusters. If so, combine them,
3025 // filling up gaps with zero sectors.
3026 outidx = 0;
3027 for (i = 1; i < num_reqs; i++) {
3028 int merge = 0;
3029 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
3030
b6a127a1 3031 // Handle exactly sequential writes and overlapping writes.
40b4f539
KW
3032 if (reqs[i].sector <= oldreq_last) {
3033 merge = 1;
3034 }
3035
e2a305fb
CH
3036 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
3037 merge = 0;
3038 }
3039
40b4f539
KW
3040 if (merge) {
3041 size_t size;
7267c094 3042 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
3043 qemu_iovec_init(qiov,
3044 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
3045
3046 // Add the first request to the merged one. If the requests are
3047 // overlapping, drop the last sectors of the first request.
3048 size = (reqs[i].sector - reqs[outidx].sector) << 9;
3049 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
3050
b6a127a1
PB
3051 // We should need to add any zeros between the two requests
3052 assert (reqs[i].sector <= oldreq_last);
40b4f539
KW
3053
3054 // Add the second request
3055 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
3056
cbf1dff2 3057 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
3058 reqs[outidx].qiov = qiov;
3059
3060 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
3061 } else {
3062 outidx++;
3063 reqs[outidx].sector = reqs[i].sector;
3064 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
3065 reqs[outidx].qiov = reqs[i].qiov;
3066 }
3067 }
3068
3069 return outidx + 1;
3070}
3071
3072/*
3073 * Submit multiple AIO write requests at once.
3074 *
3075 * On success, the function returns 0 and all requests in the reqs array have
3076 * been submitted. In error case this function returns -1, and any of the
3077 * requests may or may not be submitted yet. In particular, this means that the
3078 * callback will be called for some of the requests, for others it won't. The
3079 * caller must check the error field of the BlockRequest to wait for the right
3080 * callbacks (if error != 0, no callback will be called).
3081 *
3082 * The implementation may modify the contents of the reqs array, e.g. to merge
3083 * requests. However, the fields opaque and error are left unmodified as they
3084 * are used to signal failure for a single request to the caller.
3085 */
3086int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
3087{
40b4f539
KW
3088 MultiwriteCB *mcb;
3089 int i;
3090
301db7c2
RH
3091 /* don't submit writes if we don't have a medium */
3092 if (bs->drv == NULL) {
3093 for (i = 0; i < num_reqs; i++) {
3094 reqs[i].error = -ENOMEDIUM;
3095 }
3096 return -1;
3097 }
3098
40b4f539
KW
3099 if (num_reqs == 0) {
3100 return 0;
3101 }
3102
3103 // Create MultiwriteCB structure
7267c094 3104 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
3105 mcb->num_requests = 0;
3106 mcb->num_callbacks = num_reqs;
3107
3108 for (i = 0; i < num_reqs; i++) {
3109 mcb->callbacks[i].cb = reqs[i].cb;
3110 mcb->callbacks[i].opaque = reqs[i].opaque;
3111 }
3112
3113 // Check for mergable requests
3114 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
3115
6d519a5f
SH
3116 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
3117
df9309fb
PB
3118 /* Run the aio requests. */
3119 mcb->num_requests = num_reqs;
40b4f539 3120 for (i = 0; i < num_reqs; i++) {
ad54ae80 3121 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
40b4f539 3122 reqs[i].nb_sectors, multiwrite_cb, mcb);
40b4f539
KW
3123 }
3124
3125 return 0;
40b4f539
KW
3126}
3127
83f64091 3128void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 3129{
6bbff9a0 3130 acb->pool->cancel(acb);
83f64091
FB
3131}
3132
98f90dba
ZYW
3133/* block I/O throttling */
3134static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
3135 bool is_write, double elapsed_time, uint64_t *wait)
3136{
3137 uint64_t bps_limit = 0;
3138 double bytes_limit, bytes_base, bytes_res;
3139 double slice_time, wait_time;
3140
3141 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3142 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
3143 } else if (bs->io_limits.bps[is_write]) {
3144 bps_limit = bs->io_limits.bps[is_write];
3145 } else {
3146 if (wait) {
3147 *wait = 0;
3148 }
3149
3150 return false;
3151 }
3152
3153 slice_time = bs->slice_end - bs->slice_start;
3154 slice_time /= (NANOSECONDS_PER_SECOND);
3155 bytes_limit = bps_limit * slice_time;
3156 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
3157 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3158 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
3159 }
3160
3161 /* bytes_base: the bytes of data which have been read/written; and
3162 * it is obtained from the history statistic info.
3163 * bytes_res: the remaining bytes of data which need to be read/written.
3164 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3165 * the total time for completing reading/writting all data.
3166 */
3167 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
3168
3169 if (bytes_base + bytes_res <= bytes_limit) {
3170 if (wait) {
3171 *wait = 0;
3172 }
3173
3174 return false;
3175 }
3176
3177 /* Calc approx time to dispatch */
3178 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
3179
3180 /* When the I/O rate at runtime exceeds the limits,
3181 * bs->slice_end need to be extended in order that the current statistic
3182 * info can be kept until the timer fire, so it is increased and tuned
3183 * based on the result of experiment.
3184 */
3185 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3186 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3187 if (wait) {
3188 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3189 }
3190
3191 return true;
3192}
3193
3194static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
3195 double elapsed_time, uint64_t *wait)
3196{
3197 uint64_t iops_limit = 0;
3198 double ios_limit, ios_base;
3199 double slice_time, wait_time;
3200
3201 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3202 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
3203 } else if (bs->io_limits.iops[is_write]) {
3204 iops_limit = bs->io_limits.iops[is_write];
3205 } else {
3206 if (wait) {
3207 *wait = 0;
3208 }
3209
3210 return false;
3211 }
3212
3213 slice_time = bs->slice_end - bs->slice_start;
3214 slice_time /= (NANOSECONDS_PER_SECOND);
3215 ios_limit = iops_limit * slice_time;
3216 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
3217 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3218 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
3219 }
3220
3221 if (ios_base + 1 <= ios_limit) {
3222 if (wait) {
3223 *wait = 0;
3224 }
3225
3226 return false;
3227 }
3228
3229 /* Calc approx time to dispatch */
3230 wait_time = (ios_base + 1) / iops_limit;
3231 if (wait_time > elapsed_time) {
3232 wait_time = wait_time - elapsed_time;
3233 } else {
3234 wait_time = 0;
3235 }
3236
3237 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3238 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3239 if (wait) {
3240 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3241 }
3242
3243 return true;
3244}
3245
3246static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3247 bool is_write, int64_t *wait)
3248{
3249 int64_t now, max_wait;
3250 uint64_t bps_wait = 0, iops_wait = 0;
3251 double elapsed_time;
3252 int bps_ret, iops_ret;
3253
3254 now = qemu_get_clock_ns(vm_clock);
3255 if ((bs->slice_start < now)
3256 && (bs->slice_end > now)) {
3257 bs->slice_end = now + bs->slice_time;
3258 } else {
3259 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3260 bs->slice_start = now;
3261 bs->slice_end = now + bs->slice_time;
3262
3263 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3264 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3265
3266 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3267 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3268 }
3269
3270 elapsed_time = now - bs->slice_start;
3271 elapsed_time /= (NANOSECONDS_PER_SECOND);
3272
3273 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3274 is_write, elapsed_time, &bps_wait);
3275 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3276 elapsed_time, &iops_wait);
3277 if (bps_ret || iops_ret) {
3278 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3279 if (wait) {
3280 *wait = max_wait;
3281 }
3282
3283 now = qemu_get_clock_ns(vm_clock);
3284 if (bs->slice_end < now + max_wait) {
3285 bs->slice_end = now + max_wait;
3286 }
3287
3288 return true;
3289 }
3290
3291 if (wait) {
3292 *wait = 0;
3293 }
3294
3295 return false;
3296}
ce1a14dc 3297
83f64091
FB
3298/**************************************************************/
3299/* async block device emulation */
3300
c16b5a2c
CH
3301typedef struct BlockDriverAIOCBSync {
3302 BlockDriverAIOCB common;
3303 QEMUBH *bh;
3304 int ret;
3305 /* vector translation state */
3306 QEMUIOVector *qiov;
3307 uint8_t *bounce;
3308 int is_write;
3309} BlockDriverAIOCBSync;
3310
3311static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3312{
b666d239
KW
3313 BlockDriverAIOCBSync *acb =
3314 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 3315 qemu_bh_delete(acb->bh);
36afc451 3316 acb->bh = NULL;
c16b5a2c
CH
3317 qemu_aio_release(acb);
3318}
3319
3320static AIOPool bdrv_em_aio_pool = {
3321 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3322 .cancel = bdrv_aio_cancel_em,
3323};
3324
ce1a14dc 3325static void bdrv_aio_bh_cb(void *opaque)
83f64091 3326{
ce1a14dc 3327 BlockDriverAIOCBSync *acb = opaque;
f141eafe 3328
f141eafe
AL
3329 if (!acb->is_write)
3330 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 3331 qemu_vfree(acb->bounce);
ce1a14dc 3332 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 3333 qemu_bh_delete(acb->bh);
36afc451 3334 acb->bh = NULL;
ce1a14dc 3335 qemu_aio_release(acb);
83f64091 3336}
beac80cd 3337
f141eafe
AL
3338static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3339 int64_t sector_num,
3340 QEMUIOVector *qiov,
3341 int nb_sectors,
3342 BlockDriverCompletionFunc *cb,
3343 void *opaque,
3344 int is_write)
3345
83f64091 3346{
ce1a14dc 3347 BlockDriverAIOCBSync *acb;
ce1a14dc 3348
c16b5a2c 3349 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
3350 acb->is_write = is_write;
3351 acb->qiov = qiov;
e268ca52 3352 acb->bounce = qemu_blockalign(bs, qiov->size);
3f3aace8 3353 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
3354
3355 if (is_write) {
3356 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1ed20acf 3357 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 3358 } else {
1ed20acf 3359 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
3360 }
3361
ce1a14dc 3362 qemu_bh_schedule(acb->bh);
f141eafe 3363
ce1a14dc 3364 return &acb->common;
beac80cd
FB
3365}
3366
f141eafe
AL
3367static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3368 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 3369 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 3370{
f141eafe
AL
3371 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
3372}
83f64091 3373
f141eafe
AL
3374static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3375 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3376 BlockDriverCompletionFunc *cb, void *opaque)
3377{
3378 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 3379}
beac80cd 3380
68485420
KW
3381
3382typedef struct BlockDriverAIOCBCoroutine {
3383 BlockDriverAIOCB common;
3384 BlockRequest req;
3385 bool is_write;
3386 QEMUBH* bh;
3387} BlockDriverAIOCBCoroutine;
3388
3389static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3390{
3391 qemu_aio_flush();
3392}
3393
3394static AIOPool bdrv_em_co_aio_pool = {
3395 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3396 .cancel = bdrv_aio_co_cancel_em,
3397};
3398
35246a68 3399static void bdrv_co_em_bh(void *opaque)
68485420
KW
3400{
3401 BlockDriverAIOCBCoroutine *acb = opaque;
3402
3403 acb->common.cb(acb->common.opaque, acb->req.error);
3404 qemu_bh_delete(acb->bh);
3405 qemu_aio_release(acb);
3406}
3407
b2a61371
SH
3408/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3409static void coroutine_fn bdrv_co_do_rw(void *opaque)
3410{
3411 BlockDriverAIOCBCoroutine *acb = opaque;
3412 BlockDriverState *bs = acb->common.bs;
3413
3414 if (!acb->is_write) {
3415 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
470c0504 3416 acb->req.nb_sectors, acb->req.qiov, 0);
b2a61371
SH
3417 } else {
3418 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
f08f2dda 3419 acb->req.nb_sectors, acb->req.qiov, 0);
b2a61371
SH
3420 }
3421
35246a68 3422 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2a61371
SH
3423 qemu_bh_schedule(acb->bh);
3424}
3425
68485420
KW
3426static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3427 int64_t sector_num,
3428 QEMUIOVector *qiov,
3429 int nb_sectors,
3430 BlockDriverCompletionFunc *cb,
3431 void *opaque,
8c5873d6 3432 bool is_write)
68485420
KW
3433{
3434 Coroutine *co;
3435 BlockDriverAIOCBCoroutine *acb;
3436
3437 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3438 acb->req.sector = sector_num;
3439 acb->req.nb_sectors = nb_sectors;
3440 acb->req.qiov = qiov;
3441 acb->is_write = is_write;
3442
8c5873d6 3443 co = qemu_coroutine_create(bdrv_co_do_rw);
68485420
KW
3444 qemu_coroutine_enter(co, acb);
3445
3446 return &acb->common;
3447}
3448
07f07615 3449static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
b2e12bc6 3450{
07f07615
PB
3451 BlockDriverAIOCBCoroutine *acb = opaque;
3452 BlockDriverState *bs = acb->common.bs;
b2e12bc6 3453
07f07615
PB
3454 acb->req.error = bdrv_co_flush(bs);
3455 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2e12bc6 3456 qemu_bh_schedule(acb->bh);
b2e12bc6
CH
3457}
3458
07f07615 3459BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
016f5cf6
AG
3460 BlockDriverCompletionFunc *cb, void *opaque)
3461{
07f07615 3462 trace_bdrv_aio_flush(bs, opaque);
016f5cf6 3463
07f07615
PB
3464 Coroutine *co;
3465 BlockDriverAIOCBCoroutine *acb;
016f5cf6 3466
07f07615
PB
3467 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3468 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3469 qemu_coroutine_enter(co, acb);
016f5cf6 3470
016f5cf6
AG
3471 return &acb->common;
3472}
3473
4265d620
PB
3474static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3475{
3476 BlockDriverAIOCBCoroutine *acb = opaque;
3477 BlockDriverState *bs = acb->common.bs;
3478
3479 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3480 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3481 qemu_bh_schedule(acb->bh);
3482}
3483
3484BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3485 int64_t sector_num, int nb_sectors,
3486 BlockDriverCompletionFunc *cb, void *opaque)
3487{
3488 Coroutine *co;
3489 BlockDriverAIOCBCoroutine *acb;
3490
3491 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3492
3493 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3494 acb->req.sector = sector_num;
3495 acb->req.nb_sectors = nb_sectors;
3496 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3497 qemu_coroutine_enter(co, acb);
3498
3499 return &acb->common;
3500}
3501
ea2384d3
FB
3502void bdrv_init(void)
3503{
5efa9d5a 3504 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 3505}
ce1a14dc 3506
eb852011
MA
3507void bdrv_init_with_whitelist(void)
3508{
3509 use_bdrv_whitelist = 1;
3510 bdrv_init();
3511}
3512
c16b5a2c
CH
3513void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3514 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 3515{
ce1a14dc
PB
3516 BlockDriverAIOCB *acb;
3517
6bbff9a0
AL
3518 if (pool->free_aiocb) {
3519 acb = pool->free_aiocb;
3520 pool->free_aiocb = acb->next;
ce1a14dc 3521 } else {
7267c094 3522 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 3523 acb->pool = pool;
ce1a14dc
PB
3524 }
3525 acb->bs = bs;
3526 acb->cb = cb;
3527 acb->opaque = opaque;
3528 return acb;
3529}
3530
3531void qemu_aio_release(void *p)
3532{
6bbff9a0
AL
3533 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3534 AIOPool *pool = acb->pool;
3535 acb->next = pool->free_aiocb;
3536 pool->free_aiocb = acb;
ce1a14dc 3537}
19cb3738 3538
f9f05dc5
KW
3539/**************************************************************/
3540/* Coroutine block device emulation */
3541
3542typedef struct CoroutineIOCompletion {
3543 Coroutine *coroutine;
3544 int ret;
3545} CoroutineIOCompletion;
3546
3547static void bdrv_co_io_em_complete(void *opaque, int ret)
3548{
3549 CoroutineIOCompletion *co = opaque;
3550
3551 co->ret = ret;
3552 qemu_coroutine_enter(co->coroutine, NULL);
3553}
3554
3555static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3556 int nb_sectors, QEMUIOVector *iov,
3557 bool is_write)
3558{
3559 CoroutineIOCompletion co = {
3560 .coroutine = qemu_coroutine_self(),
3561 };
3562 BlockDriverAIOCB *acb;
3563
3564 if (is_write) {
a652d160
SH
3565 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3566 bdrv_co_io_em_complete, &co);
f9f05dc5 3567 } else {
a652d160
SH
3568 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3569 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
3570 }
3571
59370aaa 3572 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
3573 if (!acb) {
3574 return -EIO;
3575 }
3576 qemu_coroutine_yield();
3577
3578 return co.ret;
3579}
3580
3581static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3582 int64_t sector_num, int nb_sectors,
3583 QEMUIOVector *iov)
3584{
3585 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3586}
3587
3588static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3589 int64_t sector_num, int nb_sectors,
3590 QEMUIOVector *iov)
3591{
3592 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3593}
3594
07f07615 3595static void coroutine_fn bdrv_flush_co_entry(void *opaque)
e7a8a783 3596{
07f07615
PB
3597 RwCo *rwco = opaque;
3598
3599 rwco->ret = bdrv_co_flush(rwco->bs);
3600}
3601
3602int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3603{
eb489bb1
KW
3604 int ret;
3605
29cdb251 3606 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
07f07615 3607 return 0;
eb489bb1
KW
3608 }
3609
ca716364 3610 /* Write back cached data to the OS even with cache=unsafe */
eb489bb1
KW
3611 if (bs->drv->bdrv_co_flush_to_os) {
3612 ret = bs->drv->bdrv_co_flush_to_os(bs);
3613 if (ret < 0) {
3614 return ret;
3615 }
3616 }
3617
ca716364
KW
3618 /* But don't actually force it to the disk with cache=unsafe */
3619 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3620 return 0;
3621 }
3622
eb489bb1 3623 if (bs->drv->bdrv_co_flush_to_disk) {
29cdb251 3624 ret = bs->drv->bdrv_co_flush_to_disk(bs);
07f07615
PB
3625 } else if (bs->drv->bdrv_aio_flush) {
3626 BlockDriverAIOCB *acb;
3627 CoroutineIOCompletion co = {
3628 .coroutine = qemu_coroutine_self(),
3629 };
3630
3631 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3632 if (acb == NULL) {
29cdb251 3633 ret = -EIO;
07f07615
PB
3634 } else {
3635 qemu_coroutine_yield();
29cdb251 3636 ret = co.ret;
07f07615 3637 }
07f07615
PB
3638 } else {
3639 /*
3640 * Some block drivers always operate in either writethrough or unsafe
3641 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3642 * know how the server works (because the behaviour is hardcoded or
3643 * depends on server-side configuration), so we can't ensure that
3644 * everything is safe on disk. Returning an error doesn't work because
3645 * that would break guests even if the server operates in writethrough
3646 * mode.
3647 *
3648 * Let's hope the user knows what he's doing.
3649 */
29cdb251 3650 ret = 0;
07f07615 3651 }
29cdb251
PB
3652 if (ret < 0) {
3653 return ret;
3654 }
3655
3656 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3657 * in the case of cache=unsafe, so there are no useless flushes.
3658 */
3659 return bdrv_co_flush(bs->file);
07f07615
PB
3660}
3661
0f15423c
AL
3662void bdrv_invalidate_cache(BlockDriverState *bs)
3663{
3664 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3665 bs->drv->bdrv_invalidate_cache(bs);
3666 }
3667}
3668
3669void bdrv_invalidate_cache_all(void)
3670{
3671 BlockDriverState *bs;
3672
3673 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3674 bdrv_invalidate_cache(bs);
3675 }
3676}
3677
07789269
BC
3678void bdrv_clear_incoming_migration_all(void)
3679{
3680 BlockDriverState *bs;
3681
3682 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3683 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
3684 }
3685}
3686
07f07615
PB
3687int bdrv_flush(BlockDriverState *bs)
3688{
3689 Coroutine *co;
3690 RwCo rwco = {
3691 .bs = bs,
3692 .ret = NOT_DONE,
e7a8a783 3693 };
e7a8a783 3694
07f07615
PB
3695 if (qemu_in_coroutine()) {
3696 /* Fast-path if already in coroutine context */
3697 bdrv_flush_co_entry(&rwco);
3698 } else {
3699 co = qemu_coroutine_create(bdrv_flush_co_entry);
3700 qemu_coroutine_enter(co, &rwco);
3701 while (rwco.ret == NOT_DONE) {
3702 qemu_aio_wait();
3703 }
e7a8a783 3704 }
07f07615
PB
3705
3706 return rwco.ret;
e7a8a783
KW
3707}
3708
4265d620
PB
3709static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3710{
3711 RwCo *rwco = opaque;
3712
3713 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3714}
3715
3716int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3717 int nb_sectors)
3718{
3719 if (!bs->drv) {
3720 return -ENOMEDIUM;
3721 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3722 return -EIO;
3723 } else if (bs->read_only) {
3724 return -EROFS;
3725 } else if (bs->drv->bdrv_co_discard) {
3726 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3727 } else if (bs->drv->bdrv_aio_discard) {
3728 BlockDriverAIOCB *acb;
3729 CoroutineIOCompletion co = {
3730 .coroutine = qemu_coroutine_self(),
3731 };
3732
3733 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3734 bdrv_co_io_em_complete, &co);
3735 if (acb == NULL) {
3736 return -EIO;
3737 } else {
3738 qemu_coroutine_yield();
3739 return co.ret;
3740 }
4265d620
PB
3741 } else {
3742 return 0;
3743 }
3744}
3745
3746int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3747{
3748 Coroutine *co;
3749 RwCo rwco = {
3750 .bs = bs,
3751 .sector_num = sector_num,
3752 .nb_sectors = nb_sectors,
3753 .ret = NOT_DONE,
3754 };
3755
3756 if (qemu_in_coroutine()) {
3757 /* Fast-path if already in coroutine context */
3758 bdrv_discard_co_entry(&rwco);
3759 } else {
3760 co = qemu_coroutine_create(bdrv_discard_co_entry);
3761 qemu_coroutine_enter(co, &rwco);
3762 while (rwco.ret == NOT_DONE) {
3763 qemu_aio_wait();
3764 }
3765 }
3766
3767 return rwco.ret;
3768}
3769
19cb3738
FB
3770/**************************************************************/
3771/* removable device support */
3772
3773/**
3774 * Return TRUE if the media is present
3775 */
3776int bdrv_is_inserted(BlockDriverState *bs)
3777{
3778 BlockDriver *drv = bs->drv;
a1aff5bf 3779
19cb3738
FB
3780 if (!drv)
3781 return 0;
3782 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
3783 return 1;
3784 return drv->bdrv_is_inserted(bs);
19cb3738
FB
3785}
3786
3787/**
8e49ca46
MA
3788 * Return whether the media changed since the last call to this
3789 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3790 */
3791int bdrv_media_changed(BlockDriverState *bs)
3792{
3793 BlockDriver *drv = bs->drv;
19cb3738 3794
8e49ca46
MA
3795 if (drv && drv->bdrv_media_changed) {
3796 return drv->bdrv_media_changed(bs);
3797 }
3798 return -ENOTSUP;
19cb3738
FB
3799}
3800
3801/**
3802 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3803 */
f36f3949 3804void bdrv_eject(BlockDriverState *bs, bool eject_flag)
19cb3738
FB
3805{
3806 BlockDriver *drv = bs->drv;
19cb3738 3807
822e1cd1
MA
3808 if (drv && drv->bdrv_eject) {
3809 drv->bdrv_eject(bs, eject_flag);
19cb3738 3810 }
6f382ed2
LC
3811
3812 if (bs->device_name[0] != '\0') {
3813 bdrv_emit_qmp_eject_event(bs, eject_flag);
3814 }
19cb3738
FB
3815}
3816
19cb3738
FB
3817/**
3818 * Lock or unlock the media (if it is locked, the user won't be able
3819 * to eject it manually).
3820 */
025e849a 3821void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3822{
3823 BlockDriver *drv = bs->drv;
3824
025e849a 3825 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3826
025e849a
MA
3827 if (drv && drv->bdrv_lock_medium) {
3828 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3829 }
3830}
985a03b0
TS
3831
3832/* needed for generic scsi interface */
3833
3834int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3835{
3836 BlockDriver *drv = bs->drv;
3837
3838 if (drv && drv->bdrv_ioctl)
3839 return drv->bdrv_ioctl(bs, req, buf);
3840 return -ENOTSUP;
3841}
7d780669 3842
221f715d
AL
3843BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3844 unsigned long int req, void *buf,
3845 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3846{
221f715d 3847 BlockDriver *drv = bs->drv;
7d780669 3848
221f715d
AL
3849 if (drv && drv->bdrv_aio_ioctl)
3850 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3851 return NULL;
7d780669 3852}
e268ca52 3853
7b6f9300
MA
3854void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3855{
3856 bs->buffer_alignment = align;
3857}
7cd1e32a 3858
e268ca52
AL
3859void *qemu_blockalign(BlockDriverState *bs, size_t size)
3860{
3861 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3862}
7cd1e32a
LS
3863
3864void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3865{
3866 int64_t bitmap_size;
a55eb92c 3867
aaa0eb75 3868 bs->dirty_count = 0;
a55eb92c 3869 if (enable) {
c6d22830
JK
3870 if (!bs->dirty_bitmap) {
3871 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3872 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3873 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
a55eb92c 3874
7267c094 3875 bs->dirty_bitmap = g_malloc0(bitmap_size);
a55eb92c 3876 }
7cd1e32a 3877 } else {
c6d22830 3878 if (bs->dirty_bitmap) {
7267c094 3879 g_free(bs->dirty_bitmap);
c6d22830 3880 bs->dirty_bitmap = NULL;
a55eb92c 3881 }
7cd1e32a
LS
3882 }
3883}
3884
3885int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3886{
6ea44308 3887 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 3888
c6d22830
JK
3889 if (bs->dirty_bitmap &&
3890 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
3891 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3892 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a
LS
3893 } else {
3894 return 0;
3895 }
3896}
3897
a55eb92c
JK
3898void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3899 int nr_sectors)
7cd1e32a
LS
3900{
3901 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3902}
aaa0eb75
LS
3903
3904int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3905{
3906 return bs->dirty_count;
3907}
f88e1a42 3908
db593f25
MT
3909void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3910{
3911 assert(bs->in_use != in_use);
3912 bs->in_use = in_use;
3913}
3914
3915int bdrv_in_use(BlockDriverState *bs)
3916{
3917 return bs->in_use;
3918}
3919
28a7282a
LC
3920void bdrv_iostatus_enable(BlockDriverState *bs)
3921{
d6bf279e 3922 bs->iostatus_enabled = true;
58e21ef5 3923 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3924}
3925
3926/* The I/O status is only enabled if the drive explicitly
3927 * enables it _and_ the VM is configured to stop on errors */
3928bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3929{
d6bf279e 3930 return (bs->iostatus_enabled &&
28a7282a
LC
3931 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3932 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3933 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3934}
3935
3936void bdrv_iostatus_disable(BlockDriverState *bs)
3937{
d6bf279e 3938 bs->iostatus_enabled = false;
28a7282a
LC
3939}
3940
3941void bdrv_iostatus_reset(BlockDriverState *bs)
3942{
3943 if (bdrv_iostatus_is_enabled(bs)) {
58e21ef5 3944 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3945 }
3946}
3947
3948/* XXX: Today this is set by device models because it makes the implementation
3949 quite simple. However, the block layer knows about the error, so it's
3950 possible to implement this without device models being involved */
3951void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3952{
58e21ef5
LC
3953 if (bdrv_iostatus_is_enabled(bs) &&
3954 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
28a7282a 3955 assert(error >= 0);
58e21ef5
LC
3956 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3957 BLOCK_DEVICE_IO_STATUS_FAILED;
28a7282a
LC
3958 }
3959}
3960
a597e79c
CH
3961void
3962bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3963 enum BlockAcctType type)
3964{
3965 assert(type < BDRV_MAX_IOTYPE);
3966
3967 cookie->bytes = bytes;
c488c7f6 3968 cookie->start_time_ns = get_clock();
a597e79c
CH
3969 cookie->type = type;
3970}
3971
3972void
3973bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3974{
3975 assert(cookie->type < BDRV_MAX_IOTYPE);
3976
3977 bs->nr_bytes[cookie->type] += cookie->bytes;
3978 bs->nr_ops[cookie->type]++;
c488c7f6 3979 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
3980}
3981
f88e1a42
JS
3982int bdrv_img_create(const char *filename, const char *fmt,
3983 const char *base_filename, const char *base_fmt,
3984 char *options, uint64_t img_size, int flags)
3985{
3986 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 3987 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
3988 BlockDriverState *bs = NULL;
3989 BlockDriver *drv, *proto_drv;
96df67d1 3990 BlockDriver *backing_drv = NULL;
f88e1a42
JS
3991 int ret = 0;
3992
3993 /* Find driver and parse its options */
3994 drv = bdrv_find_format(fmt);
3995 if (!drv) {
3996 error_report("Unknown file format '%s'", fmt);
4f70f249 3997 ret = -EINVAL;
f88e1a42
JS
3998 goto out;
3999 }
4000
4001 proto_drv = bdrv_find_protocol(filename);
4002 if (!proto_drv) {
4003 error_report("Unknown protocol '%s'", filename);
4f70f249 4004 ret = -EINVAL;
f88e1a42
JS
4005 goto out;
4006 }
4007
4008 create_options = append_option_parameters(create_options,
4009 drv->create_options);
4010 create_options = append_option_parameters(create_options,
4011 proto_drv->create_options);
4012
4013 /* Create parameter list with default values */
4014 param = parse_option_parameters("", create_options, param);
4015
4016 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
4017
4018 /* Parse -o options */
4019 if (options) {
4020 param = parse_option_parameters(options, create_options, param);
4021 if (param == NULL) {
4022 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 4023 ret = -EINVAL;
f88e1a42
JS
4024 goto out;
4025 }
4026 }
4027
4028 if (base_filename) {
4029 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
4030 base_filename)) {
4031 error_report("Backing file not supported for file format '%s'",
4032 fmt);
4f70f249 4033 ret = -EINVAL;
f88e1a42
JS
4034 goto out;
4035 }
4036 }
4037
4038 if (base_fmt) {
4039 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
4040 error_report("Backing file format not supported for file "
4041 "format '%s'", fmt);
4f70f249 4042 ret = -EINVAL;
f88e1a42
JS
4043 goto out;
4044 }
4045 }
4046
792da93a
JS
4047 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
4048 if (backing_file && backing_file->value.s) {
4049 if (!strcmp(filename, backing_file->value.s)) {
4050 error_report("Error: Trying to create an image with the "
4051 "same filename as the backing file");
4f70f249 4052 ret = -EINVAL;
792da93a
JS
4053 goto out;
4054 }
4055 }
4056
f88e1a42
JS
4057 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
4058 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
4059 backing_drv = bdrv_find_format(backing_fmt->value.s);
4060 if (!backing_drv) {
f88e1a42
JS
4061 error_report("Unknown backing file format '%s'",
4062 backing_fmt->value.s);
4f70f249 4063 ret = -EINVAL;
f88e1a42
JS
4064 goto out;
4065 }
4066 }
4067
4068 // The size for the image must always be specified, with one exception:
4069 // If we are using a backing file, we can obtain the size from there
d220894e
KW
4070 size = get_option_parameter(param, BLOCK_OPT_SIZE);
4071 if (size && size->value.n == -1) {
f88e1a42
JS
4072 if (backing_file && backing_file->value.s) {
4073 uint64_t size;
f88e1a42
JS
4074 char buf[32];
4075
f88e1a42
JS
4076 bs = bdrv_new("");
4077
96df67d1 4078 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
f88e1a42 4079 if (ret < 0) {
96df67d1 4080 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
4081 goto out;
4082 }
4083 bdrv_get_geometry(bs, &size);
4084 size *= 512;
4085
4086 snprintf(buf, sizeof(buf), "%" PRId64, size);
4087 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
4088 } else {
4089 error_report("Image creation needs a size parameter");
4f70f249 4090 ret = -EINVAL;
f88e1a42
JS
4091 goto out;
4092 }
4093 }
4094
4095 printf("Formatting '%s', fmt=%s ", filename, fmt);
4096 print_option_parameters(param);
4097 puts("");
4098
4099 ret = bdrv_create(drv, filename, param);
4100
4101 if (ret < 0) {
4102 if (ret == -ENOTSUP) {
4103 error_report("Formatting or formatting option not supported for "
4104 "file format '%s'", fmt);
4105 } else if (ret == -EFBIG) {
4106 error_report("The image size is too large for file format '%s'",
4107 fmt);
4108 } else {
4109 error_report("%s: error while creating %s: %s", filename, fmt,
4110 strerror(-ret));
4111 }
4112 }
4113
4114out:
4115 free_option_parameters(create_options);
4116 free_option_parameters(param);
4117
4118 if (bs) {
4119 bdrv_delete(bs);
4120 }
4f70f249
JS
4121
4122 return ret;
f88e1a42 4123}
eeec61f2
SH
4124
4125void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
c83c66c3
SH
4126 int64_t speed, BlockDriverCompletionFunc *cb,
4127 void *opaque, Error **errp)
eeec61f2
SH
4128{
4129 BlockJob *job;
4130
4131 if (bs->job || bdrv_in_use(bs)) {
fd7f8c65 4132 error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
eeec61f2
SH
4133 return NULL;
4134 }
4135 bdrv_set_in_use(bs, 1);
4136
4137 job = g_malloc0(job_type->instance_size);
4138 job->job_type = job_type;
4139 job->bs = bs;
4140 job->cb = cb;
4141 job->opaque = opaque;
4142 bs->job = job;
c83c66c3
SH
4143
4144 /* Only set speed when necessary to avoid NotSupported error */
4145 if (speed != 0) {
4146 Error *local_err = NULL;
4147
4148 block_job_set_speed(job, speed, &local_err);
4149 if (error_is_set(&local_err)) {
4150 bs->job = NULL;
4151 g_free(job);
4152 bdrv_set_in_use(bs, 0);
4153 error_propagate(errp, local_err);
4154 return NULL;
4155 }
4156 }
eeec61f2
SH
4157 return job;
4158}
4159
4160void block_job_complete(BlockJob *job, int ret)
4161{
4162 BlockDriverState *bs = job->bs;
4163
4164 assert(bs->job == job);
4165 job->cb(job->opaque, ret);
4166 bs->job = NULL;
4167 g_free(job);
4168 bdrv_set_in_use(bs, 0);
4169}
4170
882ec7ce 4171void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
eeec61f2 4172{
9e6636c7 4173 Error *local_err = NULL;
9f25eccc 4174
eeec61f2 4175 if (!job->job_type->set_speed) {
9e6636c7
SH
4176 error_set(errp, QERR_NOT_SUPPORTED);
4177 return;
eeec61f2 4178 }
882ec7ce 4179 job->job_type->set_speed(job, speed, &local_err);
9e6636c7
SH
4180 if (error_is_set(&local_err)) {
4181 error_propagate(errp, local_err);
4182 return;
9f25eccc 4183 }
9e6636c7 4184
882ec7ce 4185 job->speed = speed;
eeec61f2
SH
4186}
4187
4188void block_job_cancel(BlockJob *job)
4189{
4190 job->cancelled = true;
4191}
4192
4193bool block_job_is_cancelled(BlockJob *job)
4194{
4195 return job->cancelled;
4196}
3e914655
PB
4197
4198void block_job_cancel_sync(BlockJob *job)
4199{
4200 BlockDriverState *bs = job->bs;
4201
4202 assert(bs->job == job);
4203 block_job_cancel(job);
4204 while (bs->job != NULL && bs->job->busy) {
4205 qemu_aio_wait();
4206 }
4207}