]> git.proxmox.com Git - mirror_qemu.git/blame - block.c
qtest: Add floppy test
[mirror_qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
f795e743 30#include "qjson.h"
68485420 31#include "qemu-coroutine.h"
b2023818 32#include "qmp-commands.h"
0563e191 33#include "qemu-timer.h"
fc01f7e7 34
71e72a19 35#ifdef CONFIG_BSD
7674e7bf
FB
36#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
72cf2d4f 39#include <sys/queue.h>
c5e97233 40#ifndef __DragonFly__
7674e7bf
FB
41#include <sys/disk.h>
42#endif
c5e97233 43#endif
7674e7bf 44
49dc768d
AL
45#ifdef _WIN32
46#include <windows.h>
47#endif
48
1c9805a3
SH
49#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
470c0504
SH
51typedef enum {
52 BDRV_REQ_COPY_ON_READ = 0x1,
f08f2dda 53 BDRV_REQ_ZERO_WRITE = 0x2,
470c0504
SH
54} BdrvRequestFlags;
55
7d4b4ba5 56static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
57static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
58 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 59 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
60static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
61 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 62 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
63static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors,
65 QEMUIOVector *iov);
66static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors,
68 QEMUIOVector *iov);
c5fbe571 69static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
470c0504
SH
70 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
71 BdrvRequestFlags flags);
1c9805a3 72static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
f08f2dda
SH
73 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
74 BdrvRequestFlags flags);
b2a61371
SH
75static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
76 int64_t sector_num,
77 QEMUIOVector *qiov,
78 int nb_sectors,
79 BlockDriverCompletionFunc *cb,
80 void *opaque,
8c5873d6 81 bool is_write);
b2a61371 82static void coroutine_fn bdrv_co_do_rw(void *opaque);
621f0589
KW
83static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
84 int64_t sector_num, int nb_sectors);
ec530c81 85
98f90dba
ZYW
86static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
87 bool is_write, double elapsed_time, uint64_t *wait);
88static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
89 double elapsed_time, uint64_t *wait);
90static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
91 bool is_write, int64_t *wait);
92
1b7bdbc1
SH
93static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
94 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 95
8a22f02a
SH
96static QLIST_HEAD(, BlockDriver) bdrv_drivers =
97 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 98
f9092b10
MA
99/* The device to use for VM snapshots */
100static BlockDriverState *bs_snapshots;
101
eb852011
MA
102/* If non-zero, use only whitelisted block drivers */
103static int use_bdrv_whitelist;
104
9e0b22f4
SH
105#ifdef _WIN32
106static int is_windows_drive_prefix(const char *filename)
107{
108 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
109 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
110 filename[1] == ':');
111}
112
113int is_windows_drive(const char *filename)
114{
115 if (is_windows_drive_prefix(filename) &&
116 filename[2] == '\0')
117 return 1;
118 if (strstart(filename, "\\\\.\\", NULL) ||
119 strstart(filename, "//./", NULL))
120 return 1;
121 return 0;
122}
123#endif
124
0563e191 125/* throttling disk I/O limits */
98f90dba
ZYW
126void bdrv_io_limits_disable(BlockDriverState *bs)
127{
128 bs->io_limits_enabled = false;
129
130 while (qemu_co_queue_next(&bs->throttled_reqs));
131
132 if (bs->block_timer) {
133 qemu_del_timer(bs->block_timer);
134 qemu_free_timer(bs->block_timer);
135 bs->block_timer = NULL;
136 }
137
138 bs->slice_start = 0;
139 bs->slice_end = 0;
140 bs->slice_time = 0;
141 memset(&bs->io_base, 0, sizeof(bs->io_base));
142}
143
0563e191
ZYW
144static void bdrv_block_timer(void *opaque)
145{
146 BlockDriverState *bs = opaque;
147
148 qemu_co_queue_next(&bs->throttled_reqs);
149}
150
151void bdrv_io_limits_enable(BlockDriverState *bs)
152{
153 qemu_co_queue_init(&bs->throttled_reqs);
154 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
155 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
156 bs->slice_start = qemu_get_clock_ns(vm_clock);
157 bs->slice_end = bs->slice_start + bs->slice_time;
158 memset(&bs->io_base, 0, sizeof(bs->io_base));
159 bs->io_limits_enabled = true;
160}
161
162bool bdrv_io_limits_enabled(BlockDriverState *bs)
163{
164 BlockIOLimit *io_limits = &bs->io_limits;
165 return io_limits->bps[BLOCK_IO_LIMIT_READ]
166 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
167 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
168 || io_limits->iops[BLOCK_IO_LIMIT_READ]
169 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
170 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
171}
172
98f90dba
ZYW
173static void bdrv_io_limits_intercept(BlockDriverState *bs,
174 bool is_write, int nb_sectors)
175{
176 int64_t wait_time = -1;
177
178 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
179 qemu_co_queue_wait(&bs->throttled_reqs);
180 }
181
182 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
183 * throttled requests will not be dequeued until the current request is
184 * allowed to be serviced. So if the current request still exceeds the
185 * limits, it will be inserted to the head. All requests followed it will
186 * be still in throttled_reqs queue.
187 */
188
189 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
190 qemu_mod_timer(bs->block_timer,
191 wait_time + qemu_get_clock_ns(vm_clock));
192 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
193 }
194
195 qemu_co_queue_next(&bs->throttled_reqs);
196}
197
9e0b22f4
SH
198/* check if the path starts with "<protocol>:" */
199static int path_has_protocol(const char *path)
200{
201#ifdef _WIN32
202 if (is_windows_drive(path) ||
203 is_windows_drive_prefix(path)) {
204 return 0;
205 }
206#endif
207
208 return strchr(path, ':') != NULL;
209}
210
83f64091 211int path_is_absolute(const char *path)
3b0d4f61 212{
83f64091 213 const char *p;
21664424
FB
214#ifdef _WIN32
215 /* specific case for names like: "\\.\d:" */
216 if (*path == '/' || *path == '\\')
217 return 1;
218#endif
83f64091
FB
219 p = strchr(path, ':');
220 if (p)
221 p++;
222 else
223 p = path;
3b9f94e1
FB
224#ifdef _WIN32
225 return (*p == '/' || *p == '\\');
226#else
227 return (*p == '/');
228#endif
3b0d4f61
FB
229}
230
83f64091
FB
231/* if filename is absolute, just copy it to dest. Otherwise, build a
232 path to it by considering it is relative to base_path. URL are
233 supported. */
234void path_combine(char *dest, int dest_size,
235 const char *base_path,
236 const char *filename)
3b0d4f61 237{
83f64091
FB
238 const char *p, *p1;
239 int len;
240
241 if (dest_size <= 0)
242 return;
243 if (path_is_absolute(filename)) {
244 pstrcpy(dest, dest_size, filename);
245 } else {
246 p = strchr(base_path, ':');
247 if (p)
248 p++;
249 else
250 p = base_path;
3b9f94e1
FB
251 p1 = strrchr(base_path, '/');
252#ifdef _WIN32
253 {
254 const char *p2;
255 p2 = strrchr(base_path, '\\');
256 if (!p1 || p2 > p1)
257 p1 = p2;
258 }
259#endif
83f64091
FB
260 if (p1)
261 p1++;
262 else
263 p1 = base_path;
264 if (p1 > p)
265 p = p1;
266 len = p - base_path;
267 if (len > dest_size - 1)
268 len = dest_size - 1;
269 memcpy(dest, base_path, len);
270 dest[len] = '\0';
271 pstrcat(dest, dest_size, filename);
3b0d4f61 272 }
3b0d4f61
FB
273}
274
5efa9d5a 275void bdrv_register(BlockDriver *bdrv)
ea2384d3 276{
8c5873d6
SH
277 /* Block drivers without coroutine functions need emulation */
278 if (!bdrv->bdrv_co_readv) {
f9f05dc5
KW
279 bdrv->bdrv_co_readv = bdrv_co_readv_em;
280 bdrv->bdrv_co_writev = bdrv_co_writev_em;
281
f8c35c1d
SH
282 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
283 * the block driver lacks aio we need to emulate that too.
284 */
f9f05dc5
KW
285 if (!bdrv->bdrv_aio_readv) {
286 /* add AIO emulation layer */
287 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
288 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
f9f05dc5 289 }
83f64091 290 }
b2e12bc6 291
8a22f02a 292 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 293}
b338082b
FB
294
295/* create a new block device (by default it is empty) */
296BlockDriverState *bdrv_new(const char *device_name)
297{
1b7bdbc1 298 BlockDriverState *bs;
b338082b 299
7267c094 300 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 301 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 302 if (device_name[0] != '\0') {
1b7bdbc1 303 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 304 }
28a7282a 305 bdrv_iostatus_disable(bs);
b338082b
FB
306 return bs;
307}
308
ea2384d3
FB
309BlockDriver *bdrv_find_format(const char *format_name)
310{
311 BlockDriver *drv1;
8a22f02a
SH
312 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
313 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 314 return drv1;
8a22f02a 315 }
ea2384d3
FB
316 }
317 return NULL;
318}
319
eb852011
MA
320static int bdrv_is_whitelisted(BlockDriver *drv)
321{
322 static const char *whitelist[] = {
323 CONFIG_BDRV_WHITELIST
324 };
325 const char **p;
326
327 if (!whitelist[0])
328 return 1; /* no whitelist, anything goes */
329
330 for (p = whitelist; *p; p++) {
331 if (!strcmp(drv->format_name, *p)) {
332 return 1;
333 }
334 }
335 return 0;
336}
337
338BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
339{
340 BlockDriver *drv = bdrv_find_format(format_name);
341 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
342}
343
5b7e1542
ZYW
344typedef struct CreateCo {
345 BlockDriver *drv;
346 char *filename;
347 QEMUOptionParameter *options;
348 int ret;
349} CreateCo;
350
351static void coroutine_fn bdrv_create_co_entry(void *opaque)
352{
353 CreateCo *cco = opaque;
354 assert(cco->drv);
355
356 cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
357}
358
0e7e1989
KW
359int bdrv_create(BlockDriver *drv, const char* filename,
360 QEMUOptionParameter *options)
ea2384d3 361{
5b7e1542
ZYW
362 int ret;
363
364 Coroutine *co;
365 CreateCo cco = {
366 .drv = drv,
367 .filename = g_strdup(filename),
368 .options = options,
369 .ret = NOT_DONE,
370 };
371
372 if (!drv->bdrv_create) {
ea2384d3 373 return -ENOTSUP;
5b7e1542
ZYW
374 }
375
376 if (qemu_in_coroutine()) {
377 /* Fast-path if already in coroutine context */
378 bdrv_create_co_entry(&cco);
379 } else {
380 co = qemu_coroutine_create(bdrv_create_co_entry);
381 qemu_coroutine_enter(co, &cco);
382 while (cco.ret == NOT_DONE) {
383 qemu_aio_wait();
384 }
385 }
386
387 ret = cco.ret;
388 g_free(cco.filename);
0e7e1989 389
5b7e1542 390 return ret;
ea2384d3
FB
391}
392
84a12e66
CH
393int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
394{
395 BlockDriver *drv;
396
b50cbabc 397 drv = bdrv_find_protocol(filename);
84a12e66 398 if (drv == NULL) {
16905d71 399 return -ENOENT;
84a12e66
CH
400 }
401
402 return bdrv_create(drv, filename, options);
403}
404
d5249393 405#ifdef _WIN32
95389c86 406void get_tmp_filename(char *filename, int size)
d5249393 407{
3b9f94e1 408 char temp_dir[MAX_PATH];
3b46e624 409
3b9f94e1
FB
410 GetTempPath(MAX_PATH, temp_dir);
411 GetTempFileName(temp_dir, "qem", 0, filename);
d5249393
FB
412}
413#else
95389c86 414void get_tmp_filename(char *filename, int size)
fc01f7e7 415{
67b915a5 416 int fd;
7ccfb2eb 417 const char *tmpdir;
d5249393 418 /* XXX: race condition possible */
0badc1ee
AJ
419 tmpdir = getenv("TMPDIR");
420 if (!tmpdir)
421 tmpdir = "/tmp";
422 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
ea2384d3
FB
423 fd = mkstemp(filename);
424 close(fd);
425}
d5249393 426#endif
fc01f7e7 427
84a12e66
CH
428/*
429 * Detect host devices. By convention, /dev/cdrom[N] is always
430 * recognized as a host CDROM.
431 */
432static BlockDriver *find_hdev_driver(const char *filename)
433{
434 int score_max = 0, score;
435 BlockDriver *drv = NULL, *d;
436
437 QLIST_FOREACH(d, &bdrv_drivers, list) {
438 if (d->bdrv_probe_device) {
439 score = d->bdrv_probe_device(filename);
440 if (score > score_max) {
441 score_max = score;
442 drv = d;
443 }
444 }
445 }
446
447 return drv;
448}
449
b50cbabc 450BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
451{
452 BlockDriver *drv1;
453 char protocol[128];
1cec71e3 454 int len;
83f64091 455 const char *p;
19cb3738 456
66f82cee
KW
457 /* TODO Drivers without bdrv_file_open must be specified explicitly */
458
39508e7a
CH
459 /*
460 * XXX(hch): we really should not let host device detection
461 * override an explicit protocol specification, but moving this
462 * later breaks access to device names with colons in them.
463 * Thanks to the brain-dead persistent naming schemes on udev-
464 * based Linux systems those actually are quite common.
465 */
466 drv1 = find_hdev_driver(filename);
467 if (drv1) {
468 return drv1;
469 }
470
9e0b22f4 471 if (!path_has_protocol(filename)) {
39508e7a 472 return bdrv_find_format("file");
84a12e66 473 }
9e0b22f4
SH
474 p = strchr(filename, ':');
475 assert(p != NULL);
1cec71e3
AL
476 len = p - filename;
477 if (len > sizeof(protocol) - 1)
478 len = sizeof(protocol) - 1;
479 memcpy(protocol, filename, len);
480 protocol[len] = '\0';
8a22f02a 481 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 482 if (drv1->protocol_name &&
8a22f02a 483 !strcmp(drv1->protocol_name, protocol)) {
83f64091 484 return drv1;
8a22f02a 485 }
83f64091
FB
486 }
487 return NULL;
488}
489
c98ac35d 490static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
491{
492 int ret, score, score_max;
493 BlockDriver *drv1, *drv;
494 uint8_t buf[2048];
495 BlockDriverState *bs;
496
f5edb014 497 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
498 if (ret < 0) {
499 *pdrv = NULL;
500 return ret;
501 }
f8ea0b00 502
08a00559
KW
503 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
504 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 505 bdrv_delete(bs);
c98ac35d
SW
506 drv = bdrv_find_format("raw");
507 if (!drv) {
508 ret = -ENOENT;
509 }
510 *pdrv = drv;
511 return ret;
1a396859 512 }
f8ea0b00 513
83f64091
FB
514 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
515 bdrv_delete(bs);
516 if (ret < 0) {
c98ac35d
SW
517 *pdrv = NULL;
518 return ret;
83f64091
FB
519 }
520
ea2384d3 521 score_max = 0;
84a12e66 522 drv = NULL;
8a22f02a 523 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
524 if (drv1->bdrv_probe) {
525 score = drv1->bdrv_probe(buf, ret, filename);
526 if (score > score_max) {
527 score_max = score;
528 drv = drv1;
529 }
0849bf08 530 }
fc01f7e7 531 }
c98ac35d
SW
532 if (!drv) {
533 ret = -ENOENT;
534 }
535 *pdrv = drv;
536 return ret;
ea2384d3
FB
537}
538
51762288
SH
539/**
540 * Set the current 'total_sectors' value
541 */
542static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
543{
544 BlockDriver *drv = bs->drv;
545
396759ad
NB
546 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
547 if (bs->sg)
548 return 0;
549
51762288
SH
550 /* query actual device if possible, otherwise just trust the hint */
551 if (drv->bdrv_getlength) {
552 int64_t length = drv->bdrv_getlength(bs);
553 if (length < 0) {
554 return length;
555 }
556 hint = length >> BDRV_SECTOR_BITS;
557 }
558
559 bs->total_sectors = hint;
560 return 0;
561}
562
c3993cdc
SH
563/**
564 * Set open flags for a given cache mode
565 *
566 * Return 0 on success, -1 if the cache mode was invalid.
567 */
568int bdrv_parse_cache_flags(const char *mode, int *flags)
569{
570 *flags &= ~BDRV_O_CACHE_MASK;
571
572 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
573 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
574 } else if (!strcmp(mode, "directsync")) {
575 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
576 } else if (!strcmp(mode, "writeback")) {
577 *flags |= BDRV_O_CACHE_WB;
578 } else if (!strcmp(mode, "unsafe")) {
579 *flags |= BDRV_O_CACHE_WB;
580 *flags |= BDRV_O_NO_FLUSH;
581 } else if (!strcmp(mode, "writethrough")) {
582 /* this is the default */
583 } else {
584 return -1;
585 }
586
587 return 0;
588}
589
53fec9d3
SH
590/**
591 * The copy-on-read flag is actually a reference count so multiple users may
592 * use the feature without worrying about clobbering its previous state.
593 * Copy-on-read stays enabled until all users have called to disable it.
594 */
595void bdrv_enable_copy_on_read(BlockDriverState *bs)
596{
597 bs->copy_on_read++;
598}
599
600void bdrv_disable_copy_on_read(BlockDriverState *bs)
601{
602 assert(bs->copy_on_read > 0);
603 bs->copy_on_read--;
604}
605
57915332
KW
606/*
607 * Common part for opening disk images and files
608 */
609static int bdrv_open_common(BlockDriverState *bs, const char *filename,
610 int flags, BlockDriver *drv)
611{
612 int ret, open_flags;
613
614 assert(drv != NULL);
615
28dcee10
SH
616 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
617
66f82cee 618 bs->file = NULL;
51762288 619 bs->total_sectors = 0;
57915332
KW
620 bs->encrypted = 0;
621 bs->valid_key = 0;
03f541bd 622 bs->sg = 0;
57915332 623 bs->open_flags = flags;
03f541bd 624 bs->growable = 0;
57915332
KW
625 bs->buffer_alignment = 512;
626
53fec9d3
SH
627 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
628 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
629 bdrv_enable_copy_on_read(bs);
630 }
631
57915332 632 pstrcpy(bs->filename, sizeof(bs->filename), filename);
03f541bd 633 bs->backing_file[0] = '\0';
57915332
KW
634
635 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
636 return -ENOTSUP;
637 }
638
639 bs->drv = drv;
7267c094 640 bs->opaque = g_malloc0(drv->instance_size);
57915332 641
03f541bd 642 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
57915332
KW
643
644 /*
645 * Clear flags that are internal to the block layer before opening the
646 * image.
647 */
648 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
649
650 /*
ebabb67a 651 * Snapshots should be writable.
57915332
KW
652 */
653 if (bs->is_temporary) {
654 open_flags |= BDRV_O_RDWR;
655 }
656
e7c63796
SH
657 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
658
66f82cee
KW
659 /* Open the image, either directly or using a protocol */
660 if (drv->bdrv_file_open) {
661 ret = drv->bdrv_file_open(bs, filename, open_flags);
662 } else {
663 ret = bdrv_file_open(&bs->file, filename, open_flags);
664 if (ret >= 0) {
665 ret = drv->bdrv_open(bs, open_flags);
666 }
667 }
668
57915332
KW
669 if (ret < 0) {
670 goto free_and_fail;
671 }
672
51762288
SH
673 ret = refresh_total_sectors(bs, bs->total_sectors);
674 if (ret < 0) {
675 goto free_and_fail;
57915332 676 }
51762288 677
57915332
KW
678#ifndef _WIN32
679 if (bs->is_temporary) {
680 unlink(filename);
681 }
682#endif
683 return 0;
684
685free_and_fail:
66f82cee
KW
686 if (bs->file) {
687 bdrv_delete(bs->file);
688 bs->file = NULL;
689 }
7267c094 690 g_free(bs->opaque);
57915332
KW
691 bs->opaque = NULL;
692 bs->drv = NULL;
693 return ret;
694}
695
b6ce07aa
KW
696/*
697 * Opens a file using a protocol (file, host_device, nbd, ...)
698 */
83f64091 699int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 700{
83f64091 701 BlockDriverState *bs;
6db95603 702 BlockDriver *drv;
83f64091
FB
703 int ret;
704
b50cbabc 705 drv = bdrv_find_protocol(filename);
6db95603
CH
706 if (!drv) {
707 return -ENOENT;
708 }
709
83f64091 710 bs = bdrv_new("");
b6ce07aa 711 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
712 if (ret < 0) {
713 bdrv_delete(bs);
714 return ret;
3b0d4f61 715 }
71d0770c 716 bs->growable = 1;
83f64091
FB
717 *pbs = bs;
718 return 0;
719}
720
b6ce07aa
KW
721/*
722 * Opens a disk image (raw, qcow2, vmdk, ...)
723 */
d6e9098e
KW
724int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
725 BlockDriver *drv)
ea2384d3 726{
b6ce07aa 727 int ret;
2b572816 728 char tmp_filename[PATH_MAX];
712e7874 729
83f64091 730 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
731 BlockDriverState *bs1;
732 int64_t total_size;
7c96d46e 733 int is_protocol = 0;
91a073a9
KW
734 BlockDriver *bdrv_qcow2;
735 QEMUOptionParameter *options;
b6ce07aa 736 char backing_filename[PATH_MAX];
3b46e624 737
ea2384d3
FB
738 /* if snapshot, we create a temporary backing file and open it
739 instead of opening 'filename' directly */
33e3963e 740
ea2384d3
FB
741 /* if there is a backing file, use it */
742 bs1 = bdrv_new("");
d6e9098e 743 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 744 if (ret < 0) {
ea2384d3 745 bdrv_delete(bs1);
51d7c00c 746 return ret;
ea2384d3 747 }
3e82990b 748 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
749
750 if (bs1->drv && bs1->drv->protocol_name)
751 is_protocol = 1;
752
ea2384d3 753 bdrv_delete(bs1);
3b46e624 754
ea2384d3 755 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
7c96d46e
AL
756
757 /* Real path is meaningless for protocols */
758 if (is_protocol)
759 snprintf(backing_filename, sizeof(backing_filename),
760 "%s", filename);
114cdfa9
KS
761 else if (!realpath(filename, backing_filename))
762 return -errno;
7c96d46e 763
91a073a9
KW
764 bdrv_qcow2 = bdrv_find_format("qcow2");
765 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
766
3e82990b 767 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
768 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
769 if (drv) {
770 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
771 drv->format_name);
772 }
773
774 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 775 free_option_parameters(options);
51d7c00c
AL
776 if (ret < 0) {
777 return ret;
ea2384d3 778 }
91a073a9 779
ea2384d3 780 filename = tmp_filename;
91a073a9 781 drv = bdrv_qcow2;
ea2384d3
FB
782 bs->is_temporary = 1;
783 }
712e7874 784
b6ce07aa 785 /* Find the right image format driver */
6db95603 786 if (!drv) {
c98ac35d 787 ret = find_image_format(filename, &drv);
51d7c00c 788 }
6987307c 789
51d7c00c 790 if (!drv) {
51d7c00c 791 goto unlink_and_fail;
ea2384d3 792 }
b6ce07aa
KW
793
794 /* Open the image */
795 ret = bdrv_open_common(bs, filename, flags, drv);
796 if (ret < 0) {
6987307c
CH
797 goto unlink_and_fail;
798 }
799
b6ce07aa
KW
800 /* If there is a backing file, use it */
801 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
802 char backing_filename[PATH_MAX];
803 int back_flags;
804 BlockDriver *back_drv = NULL;
805
806 bs->backing_hd = bdrv_new("");
df2dbb4a
SH
807
808 if (path_has_protocol(bs->backing_file)) {
809 pstrcpy(backing_filename, sizeof(backing_filename),
810 bs->backing_file);
811 } else {
812 path_combine(backing_filename, sizeof(backing_filename),
813 filename, bs->backing_file);
814 }
815
816 if (bs->backing_format[0] != '\0') {
b6ce07aa 817 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 818 }
b6ce07aa
KW
819
820 /* backing files always opened read-only */
821 back_flags =
822 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
823
824 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
825 if (ret < 0) {
826 bdrv_close(bs);
827 return ret;
828 }
829 if (bs->is_temporary) {
830 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
831 } else {
832 /* base image inherits from "parent" */
833 bs->backing_hd->keep_read_only = bs->keep_read_only;
834 }
835 }
836
837 if (!bdrv_key_required(bs)) {
7d4b4ba5 838 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
839 }
840
98f90dba
ZYW
841 /* throttling disk I/O limits */
842 if (bs->io_limits_enabled) {
843 bdrv_io_limits_enable(bs);
844 }
845
b6ce07aa
KW
846 return 0;
847
848unlink_and_fail:
849 if (bs->is_temporary) {
850 unlink(filename);
851 }
852 return ret;
853}
854
fc01f7e7
FB
855void bdrv_close(BlockDriverState *bs)
856{
80ccf93b 857 bdrv_flush(bs);
19cb3738 858 if (bs->drv) {
3e914655
PB
859 if (bs->job) {
860 block_job_cancel_sync(bs->job);
861 }
7094f12f
KW
862 bdrv_drain_all();
863
f9092b10
MA
864 if (bs == bs_snapshots) {
865 bs_snapshots = NULL;
866 }
557df6ac 867 if (bs->backing_hd) {
ea2384d3 868 bdrv_delete(bs->backing_hd);
557df6ac
SH
869 bs->backing_hd = NULL;
870 }
ea2384d3 871 bs->drv->bdrv_close(bs);
7267c094 872 g_free(bs->opaque);
ea2384d3
FB
873#ifdef _WIN32
874 if (bs->is_temporary) {
875 unlink(bs->filename);
876 }
67b915a5 877#endif
ea2384d3
FB
878 bs->opaque = NULL;
879 bs->drv = NULL;
53fec9d3 880 bs->copy_on_read = 0;
b338082b 881
66f82cee
KW
882 if (bs->file != NULL) {
883 bdrv_close(bs->file);
884 }
885
7d4b4ba5 886 bdrv_dev_change_media_cb(bs, false);
b338082b 887 }
98f90dba
ZYW
888
889 /*throttling disk I/O limits*/
890 if (bs->io_limits_enabled) {
891 bdrv_io_limits_disable(bs);
892 }
b338082b
FB
893}
894
2bc93fed
MK
895void bdrv_close_all(void)
896{
897 BlockDriverState *bs;
898
899 QTAILQ_FOREACH(bs, &bdrv_states, list) {
900 bdrv_close(bs);
901 }
902}
903
922453bc
SH
904/*
905 * Wait for pending requests to complete across all BlockDriverStates
906 *
907 * This function does not flush data to disk, use bdrv_flush_all() for that
908 * after calling this function.
4c355d53
ZYW
909 *
910 * Note that completion of an asynchronous I/O operation can trigger any
911 * number of other I/O operations on other devices---for example a coroutine
912 * can be arbitrarily complex and a constant flow of I/O can come until the
913 * coroutine is complete. Because of this, it is not possible to have a
914 * function to drain a single device's I/O queue.
922453bc
SH
915 */
916void bdrv_drain_all(void)
917{
918 BlockDriverState *bs;
4c355d53
ZYW
919 bool busy;
920
921 do {
922 busy = qemu_aio_wait();
922453bc 923
4c355d53
ZYW
924 /* FIXME: We do not have timer support here, so this is effectively
925 * a busy wait.
926 */
927 QTAILQ_FOREACH(bs, &bdrv_states, list) {
928 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
929 qemu_co_queue_restart_all(&bs->throttled_reqs);
930 busy = true;
931 }
932 }
933 } while (busy);
922453bc
SH
934
935 /* If requests are still pending there is a bug somewhere */
936 QTAILQ_FOREACH(bs, &bdrv_states, list) {
937 assert(QLIST_EMPTY(&bs->tracked_requests));
938 assert(qemu_co_queue_empty(&bs->throttled_reqs));
939 }
940}
941
d22b2f41
RH
942/* make a BlockDriverState anonymous by removing from bdrv_state list.
943 Also, NULL terminate the device_name to prevent double remove */
944void bdrv_make_anon(BlockDriverState *bs)
945{
946 if (bs->device_name[0] != '\0') {
947 QTAILQ_REMOVE(&bdrv_states, bs, list);
948 }
949 bs->device_name[0] = '\0';
950}
951
8802d1fd
JC
952/*
953 * Add new bs contents at the top of an image chain while the chain is
954 * live, while keeping required fields on the top layer.
955 *
956 * This will modify the BlockDriverState fields, and swap contents
957 * between bs_new and bs_top. Both bs_new and bs_top are modified.
958 *
f6801b83
JC
959 * bs_new is required to be anonymous.
960 *
8802d1fd
JC
961 * This function does not create any image files.
962 */
963void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
964{
965 BlockDriverState tmp;
966
f6801b83
JC
967 /* bs_new must be anonymous */
968 assert(bs_new->device_name[0] == '\0');
8802d1fd
JC
969
970 tmp = *bs_new;
971
972 /* there are some fields that need to stay on the top layer: */
973
974 /* dev info */
975 tmp.dev_ops = bs_top->dev_ops;
976 tmp.dev_opaque = bs_top->dev_opaque;
977 tmp.dev = bs_top->dev;
978 tmp.buffer_alignment = bs_top->buffer_alignment;
979 tmp.copy_on_read = bs_top->copy_on_read;
980
981 /* i/o timing parameters */
982 tmp.slice_time = bs_top->slice_time;
983 tmp.slice_start = bs_top->slice_start;
984 tmp.slice_end = bs_top->slice_end;
985 tmp.io_limits = bs_top->io_limits;
986 tmp.io_base = bs_top->io_base;
987 tmp.throttled_reqs = bs_top->throttled_reqs;
988 tmp.block_timer = bs_top->block_timer;
989 tmp.io_limits_enabled = bs_top->io_limits_enabled;
990
991 /* geometry */
992 tmp.cyls = bs_top->cyls;
993 tmp.heads = bs_top->heads;
994 tmp.secs = bs_top->secs;
995 tmp.translation = bs_top->translation;
996
997 /* r/w error */
998 tmp.on_read_error = bs_top->on_read_error;
999 tmp.on_write_error = bs_top->on_write_error;
1000
1001 /* i/o status */
1002 tmp.iostatus_enabled = bs_top->iostatus_enabled;
1003 tmp.iostatus = bs_top->iostatus;
1004
1005 /* keep the same entry in bdrv_states */
1006 pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top->device_name);
1007 tmp.list = bs_top->list;
1008
1009 /* The contents of 'tmp' will become bs_top, as we are
1010 * swapping bs_new and bs_top contents. */
1011 tmp.backing_hd = bs_new;
1012 pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
f6801b83 1013 bdrv_get_format(bs_top, tmp.backing_format, sizeof(tmp.backing_format));
8802d1fd
JC
1014
1015 /* swap contents of the fixed new bs and the current top */
1016 *bs_new = *bs_top;
1017 *bs_top = tmp;
1018
f6801b83
JC
1019 /* device_name[] was carried over from the old bs_top. bs_new
1020 * shouldn't be in bdrv_states, so we need to make device_name[]
1021 * reflect the anonymity of bs_new
1022 */
1023 bs_new->device_name[0] = '\0';
1024
8802d1fd
JC
1025 /* clear the copied fields in the new backing file */
1026 bdrv_detach_dev(bs_new, bs_new->dev);
1027
1028 qemu_co_queue_init(&bs_new->throttled_reqs);
1029 memset(&bs_new->io_base, 0, sizeof(bs_new->io_base));
1030 memset(&bs_new->io_limits, 0, sizeof(bs_new->io_limits));
1031 bdrv_iostatus_disable(bs_new);
1032
1033 /* we don't use bdrv_io_limits_disable() for this, because we don't want
1034 * to affect or delete the block_timer, as it has been moved to bs_top */
1035 bs_new->io_limits_enabled = false;
1036 bs_new->block_timer = NULL;
1037 bs_new->slice_time = 0;
1038 bs_new->slice_start = 0;
1039 bs_new->slice_end = 0;
1040}
1041
b338082b
FB
1042void bdrv_delete(BlockDriverState *bs)
1043{
fa879d62 1044 assert(!bs->dev);
3e914655
PB
1045 assert(!bs->job);
1046 assert(!bs->in_use);
18846dee 1047
1b7bdbc1 1048 /* remove from list, if necessary */
d22b2f41 1049 bdrv_make_anon(bs);
34c6f050 1050
b338082b 1051 bdrv_close(bs);
66f82cee
KW
1052 if (bs->file != NULL) {
1053 bdrv_delete(bs->file);
1054 }
1055
f9092b10 1056 assert(bs != bs_snapshots);
7267c094 1057 g_free(bs);
fc01f7e7
FB
1058}
1059
fa879d62
MA
1060int bdrv_attach_dev(BlockDriverState *bs, void *dev)
1061/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 1062{
fa879d62 1063 if (bs->dev) {
18846dee
MA
1064 return -EBUSY;
1065 }
fa879d62 1066 bs->dev = dev;
28a7282a 1067 bdrv_iostatus_reset(bs);
18846dee
MA
1068 return 0;
1069}
1070
fa879d62
MA
1071/* TODO qdevified devices don't use this, remove when devices are qdevified */
1072void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 1073{
fa879d62
MA
1074 if (bdrv_attach_dev(bs, dev) < 0) {
1075 abort();
1076 }
1077}
1078
1079void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1080/* TODO change to DeviceState *dev when all users are qdevified */
1081{
1082 assert(bs->dev == dev);
1083 bs->dev = NULL;
0e49de52
MA
1084 bs->dev_ops = NULL;
1085 bs->dev_opaque = NULL;
29e05f20 1086 bs->buffer_alignment = 512;
18846dee
MA
1087}
1088
fa879d62
MA
1089/* TODO change to return DeviceState * when all users are qdevified */
1090void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 1091{
fa879d62 1092 return bs->dev;
18846dee
MA
1093}
1094
0e49de52
MA
1095void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
1096 void *opaque)
1097{
1098 bs->dev_ops = ops;
1099 bs->dev_opaque = opaque;
2c6942fa
MA
1100 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
1101 bs_snapshots = NULL;
1102 }
0e49de52
MA
1103}
1104
329c0a48
LC
1105void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
1106 BlockQMPEventAction action, int is_read)
1107{
1108 QObject *data;
1109 const char *action_str;
1110
1111 switch (action) {
1112 case BDRV_ACTION_REPORT:
1113 action_str = "report";
1114 break;
1115 case BDRV_ACTION_IGNORE:
1116 action_str = "ignore";
1117 break;
1118 case BDRV_ACTION_STOP:
1119 action_str = "stop";
1120 break;
1121 default:
1122 abort();
1123 }
1124
1125 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1126 bdrv->device_name,
1127 action_str,
1128 is_read ? "read" : "write");
1129 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1130
1131 qobject_decref(data);
1132}
1133
6f382ed2
LC
1134static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
1135{
1136 QObject *data;
1137
1138 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1139 bdrv_get_device_name(bs), ejected);
1140 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
1141
1142 qobject_decref(data);
1143}
1144
7d4b4ba5 1145static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 1146{
145feb17 1147 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
6f382ed2 1148 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
7d4b4ba5 1149 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
6f382ed2
LC
1150 if (tray_was_closed) {
1151 /* tray open */
1152 bdrv_emit_qmp_eject_event(bs, true);
1153 }
1154 if (load) {
1155 /* tray close */
1156 bdrv_emit_qmp_eject_event(bs, false);
1157 }
145feb17
MA
1158 }
1159}
1160
2c6942fa
MA
1161bool bdrv_dev_has_removable_media(BlockDriverState *bs)
1162{
1163 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
1164}
1165
025ccaa7
PB
1166void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
1167{
1168 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
1169 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
1170 }
1171}
1172
e4def80b
MA
1173bool bdrv_dev_is_tray_open(BlockDriverState *bs)
1174{
1175 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
1176 return bs->dev_ops->is_tray_open(bs->dev_opaque);
1177 }
1178 return false;
1179}
1180
145feb17
MA
1181static void bdrv_dev_resize_cb(BlockDriverState *bs)
1182{
1183 if (bs->dev_ops && bs->dev_ops->resize_cb) {
1184 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
1185 }
1186}
1187
f107639a
MA
1188bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
1189{
1190 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
1191 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
1192 }
1193 return false;
1194}
1195
e97fc193
AL
1196/*
1197 * Run consistency checks on an image
1198 *
e076f338 1199 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 1200 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 1201 * check are stored in res.
e97fc193 1202 */
e076f338 1203int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
e97fc193
AL
1204{
1205 if (bs->drv->bdrv_check == NULL) {
1206 return -ENOTSUP;
1207 }
1208
e076f338 1209 memset(res, 0, sizeof(*res));
9ac228e0 1210 return bs->drv->bdrv_check(bs, res);
e97fc193
AL
1211}
1212
8a426614
KW
1213#define COMMIT_BUF_SECTORS 2048
1214
33e3963e
FB
1215/* commit COW file into the raw image */
1216int bdrv_commit(BlockDriverState *bs)
1217{
19cb3738 1218 BlockDriver *drv = bs->drv;
ee181196 1219 BlockDriver *backing_drv;
8a426614
KW
1220 int64_t sector, total_sectors;
1221 int n, ro, open_flags;
4dca4b63 1222 int ret = 0, rw_ret = 0;
8a426614 1223 uint8_t *buf;
4dca4b63
NS
1224 char filename[1024];
1225 BlockDriverState *bs_rw, *bs_ro;
33e3963e 1226
19cb3738
FB
1227 if (!drv)
1228 return -ENOMEDIUM;
4dca4b63
NS
1229
1230 if (!bs->backing_hd) {
1231 return -ENOTSUP;
33e3963e
FB
1232 }
1233
4dca4b63
NS
1234 if (bs->backing_hd->keep_read_only) {
1235 return -EACCES;
1236 }
ee181196 1237
2d3735d3
SH
1238 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1239 return -EBUSY;
1240 }
1241
ee181196 1242 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
1243 ro = bs->backing_hd->read_only;
1244 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1245 open_flags = bs->backing_hd->open_flags;
1246
1247 if (ro) {
1248 /* re-open as RW */
1249 bdrv_delete(bs->backing_hd);
1250 bs->backing_hd = NULL;
1251 bs_rw = bdrv_new("");
ee181196
KW
1252 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1253 backing_drv);
4dca4b63
NS
1254 if (rw_ret < 0) {
1255 bdrv_delete(bs_rw);
1256 /* try to re-open read-only */
1257 bs_ro = bdrv_new("");
ee181196
KW
1258 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1259 backing_drv);
4dca4b63
NS
1260 if (ret < 0) {
1261 bdrv_delete(bs_ro);
1262 /* drive not functional anymore */
1263 bs->drv = NULL;
1264 return ret;
1265 }
1266 bs->backing_hd = bs_ro;
1267 return rw_ret;
1268 }
1269 bs->backing_hd = bs_rw;
ea2384d3 1270 }
33e3963e 1271
6ea44308 1272 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 1273 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
1274
1275 for (sector = 0; sector < total_sectors; sector += n) {
05c4af54 1276 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
8a426614
KW
1277
1278 if (bdrv_read(bs, sector, buf, n) != 0) {
1279 ret = -EIO;
1280 goto ro_cleanup;
1281 }
1282
1283 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1284 ret = -EIO;
1285 goto ro_cleanup;
1286 }
ea2384d3 1287 }
33e3963e 1288 }
95389c86 1289
1d44952f
CH
1290 if (drv->bdrv_make_empty) {
1291 ret = drv->bdrv_make_empty(bs);
1292 bdrv_flush(bs);
1293 }
95389c86 1294
3f5075ae
CH
1295 /*
1296 * Make sure all data we wrote to the backing device is actually
1297 * stable on disk.
1298 */
1299 if (bs->backing_hd)
1300 bdrv_flush(bs->backing_hd);
4dca4b63
NS
1301
1302ro_cleanup:
7267c094 1303 g_free(buf);
4dca4b63
NS
1304
1305 if (ro) {
1306 /* re-open as RO */
1307 bdrv_delete(bs->backing_hd);
1308 bs->backing_hd = NULL;
1309 bs_ro = bdrv_new("");
ee181196
KW
1310 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1311 backing_drv);
4dca4b63
NS
1312 if (ret < 0) {
1313 bdrv_delete(bs_ro);
1314 /* drive not functional anymore */
1315 bs->drv = NULL;
1316 return ret;
1317 }
1318 bs->backing_hd = bs_ro;
1319 bs->backing_hd->keep_read_only = 0;
1320 }
1321
1d44952f 1322 return ret;
33e3963e
FB
1323}
1324
e8877497 1325int bdrv_commit_all(void)
6ab4b5ab
MA
1326{
1327 BlockDriverState *bs;
1328
1329 QTAILQ_FOREACH(bs, &bdrv_states, list) {
e8877497
SH
1330 int ret = bdrv_commit(bs);
1331 if (ret < 0) {
1332 return ret;
1333 }
6ab4b5ab 1334 }
e8877497 1335 return 0;
6ab4b5ab
MA
1336}
1337
dbffbdcf
SH
1338struct BdrvTrackedRequest {
1339 BlockDriverState *bs;
1340 int64_t sector_num;
1341 int nb_sectors;
1342 bool is_write;
1343 QLIST_ENTRY(BdrvTrackedRequest) list;
5f8b6491 1344 Coroutine *co; /* owner, used for deadlock detection */
f4658285 1345 CoQueue wait_queue; /* coroutines blocked on this request */
dbffbdcf
SH
1346};
1347
1348/**
1349 * Remove an active request from the tracked requests list
1350 *
1351 * This function should be called when a tracked request is completing.
1352 */
1353static void tracked_request_end(BdrvTrackedRequest *req)
1354{
1355 QLIST_REMOVE(req, list);
f4658285 1356 qemu_co_queue_restart_all(&req->wait_queue);
dbffbdcf
SH
1357}
1358
1359/**
1360 * Add an active request to the tracked requests list
1361 */
1362static void tracked_request_begin(BdrvTrackedRequest *req,
1363 BlockDriverState *bs,
1364 int64_t sector_num,
1365 int nb_sectors, bool is_write)
1366{
1367 *req = (BdrvTrackedRequest){
1368 .bs = bs,
1369 .sector_num = sector_num,
1370 .nb_sectors = nb_sectors,
1371 .is_write = is_write,
5f8b6491 1372 .co = qemu_coroutine_self(),
dbffbdcf
SH
1373 };
1374
f4658285
SH
1375 qemu_co_queue_init(&req->wait_queue);
1376
dbffbdcf
SH
1377 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1378}
1379
d83947ac
SH
1380/**
1381 * Round a region to cluster boundaries
1382 */
1383static void round_to_clusters(BlockDriverState *bs,
1384 int64_t sector_num, int nb_sectors,
1385 int64_t *cluster_sector_num,
1386 int *cluster_nb_sectors)
1387{
1388 BlockDriverInfo bdi;
1389
1390 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1391 *cluster_sector_num = sector_num;
1392 *cluster_nb_sectors = nb_sectors;
1393 } else {
1394 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1395 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1396 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1397 nb_sectors, c);
1398 }
1399}
1400
f4658285
SH
1401static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1402 int64_t sector_num, int nb_sectors) {
d83947ac
SH
1403 /* aaaa bbbb */
1404 if (sector_num >= req->sector_num + req->nb_sectors) {
1405 return false;
1406 }
1407 /* bbbb aaaa */
1408 if (req->sector_num >= sector_num + nb_sectors) {
1409 return false;
1410 }
1411 return true;
f4658285
SH
1412}
1413
1414static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1415 int64_t sector_num, int nb_sectors)
1416{
1417 BdrvTrackedRequest *req;
d83947ac
SH
1418 int64_t cluster_sector_num;
1419 int cluster_nb_sectors;
f4658285
SH
1420 bool retry;
1421
d83947ac
SH
1422 /* If we touch the same cluster it counts as an overlap. This guarantees
1423 * that allocating writes will be serialized and not race with each other
1424 * for the same cluster. For example, in copy-on-read it ensures that the
1425 * CoR read and write operations are atomic and guest writes cannot
1426 * interleave between them.
1427 */
1428 round_to_clusters(bs, sector_num, nb_sectors,
1429 &cluster_sector_num, &cluster_nb_sectors);
1430
f4658285
SH
1431 do {
1432 retry = false;
1433 QLIST_FOREACH(req, &bs->tracked_requests, list) {
d83947ac
SH
1434 if (tracked_request_overlaps(req, cluster_sector_num,
1435 cluster_nb_sectors)) {
5f8b6491
SH
1436 /* Hitting this means there was a reentrant request, for
1437 * example, a block driver issuing nested requests. This must
1438 * never happen since it means deadlock.
1439 */
1440 assert(qemu_coroutine_self() != req->co);
1441
f4658285
SH
1442 qemu_co_queue_wait(&req->wait_queue);
1443 retry = true;
1444 break;
1445 }
1446 }
1447 } while (retry);
1448}
1449
756e6736
KW
1450/*
1451 * Return values:
1452 * 0 - success
1453 * -EINVAL - backing format specified, but no file
1454 * -ENOSPC - can't update the backing file because no space is left in the
1455 * image file header
1456 * -ENOTSUP - format driver doesn't support changing the backing file
1457 */
1458int bdrv_change_backing_file(BlockDriverState *bs,
1459 const char *backing_file, const char *backing_fmt)
1460{
1461 BlockDriver *drv = bs->drv;
469ef350 1462 int ret;
756e6736 1463
5f377794
PB
1464 /* Backing file format doesn't make sense without a backing file */
1465 if (backing_fmt && !backing_file) {
1466 return -EINVAL;
1467 }
1468
756e6736 1469 if (drv->bdrv_change_backing_file != NULL) {
469ef350 1470 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
756e6736 1471 } else {
469ef350 1472 ret = -ENOTSUP;
756e6736 1473 }
469ef350
PB
1474
1475 if (ret == 0) {
1476 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
1477 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
1478 }
1479 return ret;
756e6736
KW
1480}
1481
71d0770c
AL
1482static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1483 size_t size)
1484{
1485 int64_t len;
1486
1487 if (!bdrv_is_inserted(bs))
1488 return -ENOMEDIUM;
1489
1490 if (bs->growable)
1491 return 0;
1492
1493 len = bdrv_getlength(bs);
1494
fbb7b4e0
KW
1495 if (offset < 0)
1496 return -EIO;
1497
1498 if ((offset > len) || (len - offset < size))
71d0770c
AL
1499 return -EIO;
1500
1501 return 0;
1502}
1503
1504static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1505 int nb_sectors)
1506{
eb5a3165
JS
1507 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1508 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1509}
1510
1c9805a3
SH
1511typedef struct RwCo {
1512 BlockDriverState *bs;
1513 int64_t sector_num;
1514 int nb_sectors;
1515 QEMUIOVector *qiov;
1516 bool is_write;
1517 int ret;
1518} RwCo;
1519
1520static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 1521{
1c9805a3 1522 RwCo *rwco = opaque;
ea2384d3 1523
1c9805a3
SH
1524 if (!rwco->is_write) {
1525 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
470c0504 1526 rwco->nb_sectors, rwco->qiov, 0);
1c9805a3
SH
1527 } else {
1528 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
f08f2dda 1529 rwco->nb_sectors, rwco->qiov, 0);
1c9805a3
SH
1530 }
1531}
e7a8a783 1532
1c9805a3
SH
1533/*
1534 * Process a synchronous request using coroutines
1535 */
1536static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1537 int nb_sectors, bool is_write)
1538{
1539 QEMUIOVector qiov;
1540 struct iovec iov = {
1541 .iov_base = (void *)buf,
1542 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1543 };
1544 Coroutine *co;
1545 RwCo rwco = {
1546 .bs = bs,
1547 .sector_num = sector_num,
1548 .nb_sectors = nb_sectors,
1549 .qiov = &qiov,
1550 .is_write = is_write,
1551 .ret = NOT_DONE,
1552 };
e7a8a783 1553
1c9805a3 1554 qemu_iovec_init_external(&qiov, &iov, 1);
e7a8a783 1555
498e386c
ZYW
1556 /**
1557 * In sync call context, when the vcpu is blocked, this throttling timer
1558 * will not fire; so the I/O throttling function has to be disabled here
1559 * if it has been enabled.
1560 */
1561 if (bs->io_limits_enabled) {
1562 fprintf(stderr, "Disabling I/O throttling on '%s' due "
1563 "to synchronous I/O.\n", bdrv_get_device_name(bs));
1564 bdrv_io_limits_disable(bs);
1565 }
1566
1c9805a3
SH
1567 if (qemu_in_coroutine()) {
1568 /* Fast-path if already in coroutine context */
1569 bdrv_rw_co_entry(&rwco);
1570 } else {
1571 co = qemu_coroutine_create(bdrv_rw_co_entry);
1572 qemu_coroutine_enter(co, &rwco);
1573 while (rwco.ret == NOT_DONE) {
1574 qemu_aio_wait();
1575 }
1576 }
1577 return rwco.ret;
1578}
b338082b 1579
1c9805a3
SH
1580/* return < 0 if error. See bdrv_write() for the return codes */
1581int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1582 uint8_t *buf, int nb_sectors)
1583{
1584 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
fc01f7e7
FB
1585}
1586
71df14fc
PB
1587#define BITS_PER_LONG (sizeof(unsigned long) * 8)
1588
7cd1e32a 1589static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1590 int nb_sectors, int dirty)
7cd1e32a
LS
1591{
1592 int64_t start, end;
c6d22830 1593 unsigned long val, idx, bit;
a55eb92c 1594
6ea44308 1595 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1596 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1597
1598 for (; start <= end; start++) {
71df14fc
PB
1599 idx = start / BITS_PER_LONG;
1600 bit = start % BITS_PER_LONG;
c6d22830
JK
1601 val = bs->dirty_bitmap[idx];
1602 if (dirty) {
6d59fec1 1603 if (!(val & (1UL << bit))) {
aaa0eb75 1604 bs->dirty_count++;
6d59fec1 1605 val |= 1UL << bit;
aaa0eb75 1606 }
c6d22830 1607 } else {
6d59fec1 1608 if (val & (1UL << bit)) {
aaa0eb75 1609 bs->dirty_count--;
6d59fec1 1610 val &= ~(1UL << bit);
aaa0eb75 1611 }
c6d22830
JK
1612 }
1613 bs->dirty_bitmap[idx] = val;
7cd1e32a
LS
1614 }
1615}
1616
5fafdf24 1617/* Return < 0 if error. Important errors are:
19cb3738
FB
1618 -EIO generic I/O error (may happen for all errors)
1619 -ENOMEDIUM No media inserted.
1620 -EINVAL Invalid sector number or nb_sectors
1621 -EACCES Trying to write a read-only device
1622*/
5fafdf24 1623int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1624 const uint8_t *buf, int nb_sectors)
1625{
1c9805a3 1626 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
83f64091
FB
1627}
1628
eda578e5
AL
1629int bdrv_pread(BlockDriverState *bs, int64_t offset,
1630 void *buf, int count1)
83f64091 1631{
6ea44308 1632 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1633 int len, nb_sectors, count;
1634 int64_t sector_num;
9a8c4cce 1635 int ret;
83f64091
FB
1636
1637 count = count1;
1638 /* first read to align to sector start */
6ea44308 1639 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1640 if (len > count)
1641 len = count;
6ea44308 1642 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1643 if (len > 0) {
9a8c4cce
KW
1644 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1645 return ret;
6ea44308 1646 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1647 count -= len;
1648 if (count == 0)
1649 return count1;
1650 sector_num++;
1651 buf += len;
1652 }
1653
1654 /* read the sectors "in place" */
6ea44308 1655 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1656 if (nb_sectors > 0) {
9a8c4cce
KW
1657 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1658 return ret;
83f64091 1659 sector_num += nb_sectors;
6ea44308 1660 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1661 buf += len;
1662 count -= len;
1663 }
1664
1665 /* add data from the last sector */
1666 if (count > 0) {
9a8c4cce
KW
1667 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1668 return ret;
83f64091
FB
1669 memcpy(buf, tmp_buf, count);
1670 }
1671 return count1;
1672}
1673
eda578e5
AL
1674int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1675 const void *buf, int count1)
83f64091 1676{
6ea44308 1677 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1678 int len, nb_sectors, count;
1679 int64_t sector_num;
9a8c4cce 1680 int ret;
83f64091
FB
1681
1682 count = count1;
1683 /* first write to align to sector start */
6ea44308 1684 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1685 if (len > count)
1686 len = count;
6ea44308 1687 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1688 if (len > 0) {
9a8c4cce
KW
1689 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1690 return ret;
6ea44308 1691 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1692 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1693 return ret;
83f64091
FB
1694 count -= len;
1695 if (count == 0)
1696 return count1;
1697 sector_num++;
1698 buf += len;
1699 }
1700
1701 /* write the sectors "in place" */
6ea44308 1702 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1703 if (nb_sectors > 0) {
9a8c4cce
KW
1704 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1705 return ret;
83f64091 1706 sector_num += nb_sectors;
6ea44308 1707 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1708 buf += len;
1709 count -= len;
1710 }
1711
1712 /* add data from the last sector */
1713 if (count > 0) {
9a8c4cce
KW
1714 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1715 return ret;
83f64091 1716 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1717 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1718 return ret;
83f64091
FB
1719 }
1720 return count1;
1721}
83f64091 1722
f08145fe
KW
1723/*
1724 * Writes to the file and ensures that no writes are reordered across this
1725 * request (acts as a barrier)
1726 *
1727 * Returns 0 on success, -errno in error cases.
1728 */
1729int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1730 const void *buf, int count)
1731{
1732 int ret;
1733
1734 ret = bdrv_pwrite(bs, offset, buf, count);
1735 if (ret < 0) {
1736 return ret;
1737 }
1738
92196b2f
SH
1739 /* No flush needed for cache modes that use O_DSYNC */
1740 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
f08145fe
KW
1741 bdrv_flush(bs);
1742 }
1743
1744 return 0;
1745}
1746
470c0504 1747static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
ab185921
SH
1748 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1749{
1750 /* Perform I/O through a temporary buffer so that users who scribble over
1751 * their read buffer while the operation is in progress do not end up
1752 * modifying the image file. This is critical for zero-copy guest I/O
1753 * where anything might happen inside guest memory.
1754 */
1755 void *bounce_buffer;
1756
79c053bd 1757 BlockDriver *drv = bs->drv;
ab185921
SH
1758 struct iovec iov;
1759 QEMUIOVector bounce_qiov;
1760 int64_t cluster_sector_num;
1761 int cluster_nb_sectors;
1762 size_t skip_bytes;
1763 int ret;
1764
1765 /* Cover entire cluster so no additional backing file I/O is required when
1766 * allocating cluster in the image file.
1767 */
1768 round_to_clusters(bs, sector_num, nb_sectors,
1769 &cluster_sector_num, &cluster_nb_sectors);
1770
470c0504
SH
1771 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
1772 cluster_sector_num, cluster_nb_sectors);
ab185921
SH
1773
1774 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1775 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1776 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1777
79c053bd
SH
1778 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1779 &bounce_qiov);
ab185921
SH
1780 if (ret < 0) {
1781 goto err;
1782 }
1783
79c053bd
SH
1784 if (drv->bdrv_co_write_zeroes &&
1785 buffer_is_zero(bounce_buffer, iov.iov_len)) {
621f0589
KW
1786 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
1787 cluster_nb_sectors);
79c053bd
SH
1788 } else {
1789 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
ab185921 1790 &bounce_qiov);
79c053bd
SH
1791 }
1792
ab185921
SH
1793 if (ret < 0) {
1794 /* It might be okay to ignore write errors for guest requests. If this
1795 * is a deliberate copy-on-read then we don't want to ignore the error.
1796 * Simply report it in all cases.
1797 */
1798 goto err;
1799 }
1800
1801 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1802 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1803 nb_sectors * BDRV_SECTOR_SIZE);
1804
1805err:
1806 qemu_vfree(bounce_buffer);
1807 return ret;
1808}
1809
c5fbe571
SH
1810/*
1811 * Handle a read request in coroutine context
1812 */
1813static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
470c0504
SH
1814 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1815 BdrvRequestFlags flags)
da1fa91d
KW
1816{
1817 BlockDriver *drv = bs->drv;
dbffbdcf
SH
1818 BdrvTrackedRequest req;
1819 int ret;
da1fa91d 1820
da1fa91d
KW
1821 if (!drv) {
1822 return -ENOMEDIUM;
1823 }
1824 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1825 return -EIO;
1826 }
1827
98f90dba
ZYW
1828 /* throttling disk read I/O */
1829 if (bs->io_limits_enabled) {
1830 bdrv_io_limits_intercept(bs, false, nb_sectors);
1831 }
1832
f4658285 1833 if (bs->copy_on_read) {
470c0504
SH
1834 flags |= BDRV_REQ_COPY_ON_READ;
1835 }
1836 if (flags & BDRV_REQ_COPY_ON_READ) {
1837 bs->copy_on_read_in_flight++;
1838 }
1839
1840 if (bs->copy_on_read_in_flight) {
f4658285
SH
1841 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1842 }
1843
dbffbdcf 1844 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
ab185921 1845
470c0504 1846 if (flags & BDRV_REQ_COPY_ON_READ) {
ab185921
SH
1847 int pnum;
1848
1849 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1850 if (ret < 0) {
1851 goto out;
1852 }
1853
1854 if (!ret || pnum != nb_sectors) {
470c0504 1855 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
ab185921
SH
1856 goto out;
1857 }
1858 }
1859
dbffbdcf 1860 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
ab185921
SH
1861
1862out:
dbffbdcf 1863 tracked_request_end(&req);
470c0504
SH
1864
1865 if (flags & BDRV_REQ_COPY_ON_READ) {
1866 bs->copy_on_read_in_flight--;
1867 }
1868
dbffbdcf 1869 return ret;
da1fa91d
KW
1870}
1871
c5fbe571 1872int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
1873 int nb_sectors, QEMUIOVector *qiov)
1874{
c5fbe571 1875 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 1876
470c0504
SH
1877 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
1878}
1879
1880int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1881 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1882{
1883 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
1884
1885 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
1886 BDRV_REQ_COPY_ON_READ);
c5fbe571
SH
1887}
1888
f08f2dda
SH
1889static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
1890 int64_t sector_num, int nb_sectors)
1891{
1892 BlockDriver *drv = bs->drv;
1893 QEMUIOVector qiov;
1894 struct iovec iov;
1895 int ret;
1896
621f0589
KW
1897 /* TODO Emulate only part of misaligned requests instead of letting block
1898 * drivers return -ENOTSUP and emulate everything */
1899
f08f2dda
SH
1900 /* First try the efficient write zeroes operation */
1901 if (drv->bdrv_co_write_zeroes) {
621f0589
KW
1902 ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1903 if (ret != -ENOTSUP) {
1904 return ret;
1905 }
f08f2dda
SH
1906 }
1907
1908 /* Fall back to bounce buffer if write zeroes is unsupported */
1909 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
1910 iov.iov_base = qemu_blockalign(bs, iov.iov_len);
1911 memset(iov.iov_base, 0, iov.iov_len);
1912 qemu_iovec_init_external(&qiov, &iov, 1);
1913
1914 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1915
1916 qemu_vfree(iov.iov_base);
1917 return ret;
1918}
1919
c5fbe571
SH
1920/*
1921 * Handle a write request in coroutine context
1922 */
1923static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
f08f2dda
SH
1924 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1925 BdrvRequestFlags flags)
c5fbe571
SH
1926{
1927 BlockDriver *drv = bs->drv;
dbffbdcf 1928 BdrvTrackedRequest req;
6b7cb247 1929 int ret;
da1fa91d
KW
1930
1931 if (!bs->drv) {
1932 return -ENOMEDIUM;
1933 }
1934 if (bs->read_only) {
1935 return -EACCES;
1936 }
1937 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1938 return -EIO;
1939 }
1940
98f90dba
ZYW
1941 /* throttling disk write I/O */
1942 if (bs->io_limits_enabled) {
1943 bdrv_io_limits_intercept(bs, true, nb_sectors);
1944 }
1945
470c0504 1946 if (bs->copy_on_read_in_flight) {
f4658285
SH
1947 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1948 }
1949
dbffbdcf
SH
1950 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1951
f08f2dda
SH
1952 if (flags & BDRV_REQ_ZERO_WRITE) {
1953 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
1954 } else {
1955 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1956 }
6b7cb247 1957
da1fa91d
KW
1958 if (bs->dirty_bitmap) {
1959 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1960 }
1961
1962 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1963 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1964 }
1965
dbffbdcf
SH
1966 tracked_request_end(&req);
1967
6b7cb247 1968 return ret;
da1fa91d
KW
1969}
1970
c5fbe571
SH
1971int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1972 int nb_sectors, QEMUIOVector *qiov)
1973{
1974 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1975
f08f2dda
SH
1976 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
1977}
1978
1979int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
1980 int64_t sector_num, int nb_sectors)
1981{
1982 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1983
1984 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
1985 BDRV_REQ_ZERO_WRITE);
c5fbe571
SH
1986}
1987
83f64091
FB
1988/**
1989 * Truncate file to 'offset' bytes (needed only for file protocols)
1990 */
1991int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1992{
1993 BlockDriver *drv = bs->drv;
51762288 1994 int ret;
83f64091 1995 if (!drv)
19cb3738 1996 return -ENOMEDIUM;
83f64091
FB
1997 if (!drv->bdrv_truncate)
1998 return -ENOTSUP;
59f2689d
NS
1999 if (bs->read_only)
2000 return -EACCES;
8591675f
MT
2001 if (bdrv_in_use(bs))
2002 return -EBUSY;
51762288
SH
2003 ret = drv->bdrv_truncate(bs, offset);
2004 if (ret == 0) {
2005 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 2006 bdrv_dev_resize_cb(bs);
51762288
SH
2007 }
2008 return ret;
83f64091
FB
2009}
2010
4a1d5e1f
FZ
2011/**
2012 * Length of a allocated file in bytes. Sparse files are counted by actual
2013 * allocated space. Return < 0 if error or unknown.
2014 */
2015int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2016{
2017 BlockDriver *drv = bs->drv;
2018 if (!drv) {
2019 return -ENOMEDIUM;
2020 }
2021 if (drv->bdrv_get_allocated_file_size) {
2022 return drv->bdrv_get_allocated_file_size(bs);
2023 }
2024 if (bs->file) {
2025 return bdrv_get_allocated_file_size(bs->file);
2026 }
2027 return -ENOTSUP;
2028}
2029
83f64091
FB
2030/**
2031 * Length of a file in bytes. Return < 0 if error or unknown.
2032 */
2033int64_t bdrv_getlength(BlockDriverState *bs)
2034{
2035 BlockDriver *drv = bs->drv;
2036 if (!drv)
19cb3738 2037 return -ENOMEDIUM;
51762288 2038
2c6942fa 2039 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
2040 if (drv->bdrv_getlength) {
2041 return drv->bdrv_getlength(bs);
2042 }
83f64091 2043 }
46a4e4e6 2044 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
2045}
2046
19cb3738 2047/* return 0 as number of sectors if no device present or error */
96b8f136 2048void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 2049{
19cb3738
FB
2050 int64_t length;
2051 length = bdrv_getlength(bs);
2052 if (length < 0)
2053 length = 0;
2054 else
6ea44308 2055 length = length >> BDRV_SECTOR_BITS;
19cb3738 2056 *nb_sectors_ptr = length;
fc01f7e7 2057}
cf98951b 2058
f3d54fc4
AL
2059struct partition {
2060 uint8_t boot_ind; /* 0x80 - active */
2061 uint8_t head; /* starting head */
2062 uint8_t sector; /* starting sector */
2063 uint8_t cyl; /* starting cylinder */
2064 uint8_t sys_ind; /* What partition type */
2065 uint8_t end_head; /* end head */
2066 uint8_t end_sector; /* end sector */
2067 uint8_t end_cyl; /* end cylinder */
2068 uint32_t start_sect; /* starting sector counting from 0 */
2069 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 2070} QEMU_PACKED;
f3d54fc4
AL
2071
2072/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
2073static int guess_disk_lchs(BlockDriverState *bs,
2074 int *pcylinders, int *pheads, int *psectors)
2075{
eb5a3165 2076 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
2077 int ret, i, heads, sectors, cylinders;
2078 struct partition *p;
2079 uint32_t nr_sects;
a38131b6 2080 uint64_t nb_sectors;
498e386c 2081 bool enabled;
f3d54fc4
AL
2082
2083 bdrv_get_geometry(bs, &nb_sectors);
2084
498e386c
ZYW
2085 /**
2086 * The function will be invoked during startup not only in sync I/O mode,
2087 * but also in async I/O mode. So the I/O throttling function has to
2088 * be disabled temporarily here, not permanently.
2089 */
2090 enabled = bs->io_limits_enabled;
2091 bs->io_limits_enabled = false;
f3d54fc4 2092 ret = bdrv_read(bs, 0, buf, 1);
498e386c 2093 bs->io_limits_enabled = enabled;
f3d54fc4
AL
2094 if (ret < 0)
2095 return -1;
2096 /* test msdos magic */
2097 if (buf[510] != 0x55 || buf[511] != 0xaa)
2098 return -1;
2099 for(i = 0; i < 4; i++) {
2100 p = ((struct partition *)(buf + 0x1be)) + i;
2101 nr_sects = le32_to_cpu(p->nr_sects);
2102 if (nr_sects && p->end_head) {
2103 /* We make the assumption that the partition terminates on
2104 a cylinder boundary */
2105 heads = p->end_head + 1;
2106 sectors = p->end_sector & 63;
2107 if (sectors == 0)
2108 continue;
2109 cylinders = nb_sectors / (heads * sectors);
2110 if (cylinders < 1 || cylinders > 16383)
2111 continue;
2112 *pheads = heads;
2113 *psectors = sectors;
2114 *pcylinders = cylinders;
2115#if 0
2116 printf("guessed geometry: LCHS=%d %d %d\n",
2117 cylinders, heads, sectors);
2118#endif
2119 return 0;
2120 }
2121 }
2122 return -1;
2123}
2124
2125void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
2126{
2127 int translation, lba_detected = 0;
2128 int cylinders, heads, secs;
a38131b6 2129 uint64_t nb_sectors;
f3d54fc4
AL
2130
2131 /* if a geometry hint is available, use it */
2132 bdrv_get_geometry(bs, &nb_sectors);
2133 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
2134 translation = bdrv_get_translation_hint(bs);
2135 if (cylinders != 0) {
2136 *pcyls = cylinders;
2137 *pheads = heads;
2138 *psecs = secs;
2139 } else {
2140 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
2141 if (heads > 16) {
2142 /* if heads > 16, it means that a BIOS LBA
2143 translation was active, so the default
2144 hardware geometry is OK */
2145 lba_detected = 1;
2146 goto default_geometry;
2147 } else {
2148 *pcyls = cylinders;
2149 *pheads = heads;
2150 *psecs = secs;
2151 /* disable any translation to be in sync with
2152 the logical geometry */
2153 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
2154 bdrv_set_translation_hint(bs,
2155 BIOS_ATA_TRANSLATION_NONE);
2156 }
2157 }
2158 } else {
2159 default_geometry:
2160 /* if no geometry, use a standard physical disk geometry */
2161 cylinders = nb_sectors / (16 * 63);
2162
2163 if (cylinders > 16383)
2164 cylinders = 16383;
2165 else if (cylinders < 2)
2166 cylinders = 2;
2167 *pcyls = cylinders;
2168 *pheads = 16;
2169 *psecs = 63;
2170 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
2171 if ((*pcyls * *pheads) <= 131072) {
2172 bdrv_set_translation_hint(bs,
2173 BIOS_ATA_TRANSLATION_LARGE);
2174 } else {
2175 bdrv_set_translation_hint(bs,
2176 BIOS_ATA_TRANSLATION_LBA);
2177 }
2178 }
2179 }
2180 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
2181 }
2182}
2183
5fafdf24 2184void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
2185 int cyls, int heads, int secs)
2186{
2187 bs->cyls = cyls;
2188 bs->heads = heads;
2189 bs->secs = secs;
2190}
2191
46d4767d
FB
2192void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
2193{
2194 bs->translation = translation;
2195}
2196
5fafdf24 2197void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
2198 int *pcyls, int *pheads, int *psecs)
2199{
2200 *pcyls = bs->cyls;
2201 *pheads = bs->heads;
2202 *psecs = bs->secs;
2203}
2204
0563e191
ZYW
2205/* throttling disk io limits */
2206void bdrv_set_io_limits(BlockDriverState *bs,
2207 BlockIOLimit *io_limits)
2208{
2209 bs->io_limits = *io_limits;
2210 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
2211}
2212
5bbdbb46
BS
2213/* Recognize floppy formats */
2214typedef struct FDFormat {
2215 FDriveType drive;
2216 uint8_t last_sect;
2217 uint8_t max_track;
2218 uint8_t max_head;
f8d3d128 2219 FDriveRate rate;
5bbdbb46
BS
2220} FDFormat;
2221
2222static const FDFormat fd_formats[] = {
2223 /* First entry is default format */
2224 /* 1.44 MB 3"1/2 floppy disks */
f8d3d128
HP
2225 { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
2226 { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
2227 { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
2228 { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
2229 { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
2230 { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
2231 { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
2232 { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
5bbdbb46 2233 /* 2.88 MB 3"1/2 floppy disks */
f8d3d128
HP
2234 { FDRIVE_DRV_288, 36, 80, 1, FDRIVE_RATE_1M, },
2235 { FDRIVE_DRV_288, 39, 80, 1, FDRIVE_RATE_1M, },
2236 { FDRIVE_DRV_288, 40, 80, 1, FDRIVE_RATE_1M, },
2237 { FDRIVE_DRV_288, 44, 80, 1, FDRIVE_RATE_1M, },
2238 { FDRIVE_DRV_288, 48, 80, 1, FDRIVE_RATE_1M, },
5bbdbb46 2239 /* 720 kB 3"1/2 floppy disks */
f8d3d128
HP
2240 { FDRIVE_DRV_144, 9, 80, 1, FDRIVE_RATE_250K, },
2241 { FDRIVE_DRV_144, 10, 80, 1, FDRIVE_RATE_250K, },
2242 { FDRIVE_DRV_144, 10, 82, 1, FDRIVE_RATE_250K, },
2243 { FDRIVE_DRV_144, 10, 83, 1, FDRIVE_RATE_250K, },
2244 { FDRIVE_DRV_144, 13, 80, 1, FDRIVE_RATE_250K, },
2245 { FDRIVE_DRV_144, 14, 80, 1, FDRIVE_RATE_250K, },
5bbdbb46 2246 /* 1.2 MB 5"1/4 floppy disks */
f8d3d128
HP
2247 { FDRIVE_DRV_120, 15, 80, 1, FDRIVE_RATE_500K, },
2248 { FDRIVE_DRV_120, 18, 80, 1, FDRIVE_RATE_500K, },
2249 { FDRIVE_DRV_120, 18, 82, 1, FDRIVE_RATE_500K, },
2250 { FDRIVE_DRV_120, 18, 83, 1, FDRIVE_RATE_500K, },
2251 { FDRIVE_DRV_120, 20, 80, 1, FDRIVE_RATE_500K, },
5bbdbb46 2252 /* 720 kB 5"1/4 floppy disks */
f8d3d128
HP
2253 { FDRIVE_DRV_120, 9, 80, 1, FDRIVE_RATE_250K, },
2254 { FDRIVE_DRV_120, 11, 80, 1, FDRIVE_RATE_250K, },
5bbdbb46 2255 /* 360 kB 5"1/4 floppy disks */
f8d3d128
HP
2256 { FDRIVE_DRV_120, 9, 40, 1, FDRIVE_RATE_300K, },
2257 { FDRIVE_DRV_120, 9, 40, 0, FDRIVE_RATE_300K, },
2258 { FDRIVE_DRV_120, 10, 41, 1, FDRIVE_RATE_300K, },
2259 { FDRIVE_DRV_120, 10, 42, 1, FDRIVE_RATE_300K, },
5bbdbb46 2260 /* 320 kB 5"1/4 floppy disks */
f8d3d128
HP
2261 { FDRIVE_DRV_120, 8, 40, 1, FDRIVE_RATE_250K, },
2262 { FDRIVE_DRV_120, 8, 40, 0, FDRIVE_RATE_250K, },
5bbdbb46 2263 /* 360 kB must match 5"1/4 better than 3"1/2... */
f8d3d128 2264 { FDRIVE_DRV_144, 9, 80, 0, FDRIVE_RATE_250K, },
5bbdbb46 2265 /* end */
f8d3d128 2266 { FDRIVE_DRV_NONE, -1, -1, 0, 0, },
5bbdbb46
BS
2267};
2268
2269void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
2270 int *max_track, int *last_sect,
f8d3d128
HP
2271 FDriveType drive_in, FDriveType *drive,
2272 FDriveRate *rate)
5bbdbb46
BS
2273{
2274 const FDFormat *parse;
2275 uint64_t nb_sectors, size;
2276 int i, first_match, match;
2277
2278 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
2279 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
2280 /* User defined disk */
f8d3d128 2281 *rate = FDRIVE_RATE_500K;
5bbdbb46
BS
2282 } else {
2283 bdrv_get_geometry(bs, &nb_sectors);
2284 match = -1;
2285 first_match = -1;
2286 for (i = 0; ; i++) {
2287 parse = &fd_formats[i];
2288 if (parse->drive == FDRIVE_DRV_NONE) {
2289 break;
2290 }
2291 if (drive_in == parse->drive ||
2292 drive_in == FDRIVE_DRV_NONE) {
2293 size = (parse->max_head + 1) * parse->max_track *
2294 parse->last_sect;
2295 if (nb_sectors == size) {
2296 match = i;
2297 break;
2298 }
2299 if (first_match == -1) {
2300 first_match = i;
2301 }
2302 }
2303 }
2304 if (match == -1) {
2305 if (first_match == -1) {
2306 match = 1;
2307 } else {
2308 match = first_match;
2309 }
2310 parse = &fd_formats[match];
2311 }
2312 *nb_heads = parse->max_head + 1;
2313 *max_track = parse->max_track;
2314 *last_sect = parse->last_sect;
2315 *drive = parse->drive;
f8d3d128 2316 *rate = parse->rate;
5bbdbb46
BS
2317 }
2318}
2319
46d4767d
FB
2320int bdrv_get_translation_hint(BlockDriverState *bs)
2321{
2322 return bs->translation;
2323}
2324
abd7f68d
MA
2325void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
2326 BlockErrorAction on_write_error)
2327{
2328 bs->on_read_error = on_read_error;
2329 bs->on_write_error = on_write_error;
2330}
2331
2332BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
2333{
2334 return is_read ? bs->on_read_error : bs->on_write_error;
2335}
2336
b338082b
FB
2337int bdrv_is_read_only(BlockDriverState *bs)
2338{
2339 return bs->read_only;
2340}
2341
985a03b0
TS
2342int bdrv_is_sg(BlockDriverState *bs)
2343{
2344 return bs->sg;
2345}
2346
e900a7b7
CH
2347int bdrv_enable_write_cache(BlockDriverState *bs)
2348{
2349 return bs->enable_write_cache;
2350}
2351
ea2384d3
FB
2352int bdrv_is_encrypted(BlockDriverState *bs)
2353{
2354 if (bs->backing_hd && bs->backing_hd->encrypted)
2355 return 1;
2356 return bs->encrypted;
2357}
2358
c0f4ce77
AL
2359int bdrv_key_required(BlockDriverState *bs)
2360{
2361 BlockDriverState *backing_hd = bs->backing_hd;
2362
2363 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2364 return 1;
2365 return (bs->encrypted && !bs->valid_key);
2366}
2367
ea2384d3
FB
2368int bdrv_set_key(BlockDriverState *bs, const char *key)
2369{
2370 int ret;
2371 if (bs->backing_hd && bs->backing_hd->encrypted) {
2372 ret = bdrv_set_key(bs->backing_hd, key);
2373 if (ret < 0)
2374 return ret;
2375 if (!bs->encrypted)
2376 return 0;
2377 }
fd04a2ae
SH
2378 if (!bs->encrypted) {
2379 return -EINVAL;
2380 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2381 return -ENOMEDIUM;
2382 }
c0f4ce77 2383 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
2384 if (ret < 0) {
2385 bs->valid_key = 0;
2386 } else if (!bs->valid_key) {
2387 bs->valid_key = 1;
2388 /* call the change callback now, we skipped it on open */
7d4b4ba5 2389 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 2390 }
c0f4ce77 2391 return ret;
ea2384d3
FB
2392}
2393
2394void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2395{
19cb3738 2396 if (!bs->drv) {
ea2384d3
FB
2397 buf[0] = '\0';
2398 } else {
2399 pstrcpy(buf, buf_size, bs->drv->format_name);
2400 }
2401}
2402
5fafdf24 2403void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
2404 void *opaque)
2405{
2406 BlockDriver *drv;
2407
8a22f02a 2408 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
2409 it(opaque, drv->format_name);
2410 }
2411}
2412
b338082b
FB
2413BlockDriverState *bdrv_find(const char *name)
2414{
2415 BlockDriverState *bs;
2416
1b7bdbc1
SH
2417 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2418 if (!strcmp(name, bs->device_name)) {
b338082b 2419 return bs;
1b7bdbc1 2420 }
b338082b
FB
2421 }
2422 return NULL;
2423}
2424
2f399b0a
MA
2425BlockDriverState *bdrv_next(BlockDriverState *bs)
2426{
2427 if (!bs) {
2428 return QTAILQ_FIRST(&bdrv_states);
2429 }
2430 return QTAILQ_NEXT(bs, list);
2431}
2432
51de9760 2433void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
2434{
2435 BlockDriverState *bs;
2436
1b7bdbc1 2437 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 2438 it(opaque, bs);
81d0912d
FB
2439 }
2440}
2441
ea2384d3
FB
2442const char *bdrv_get_device_name(BlockDriverState *bs)
2443{
2444 return bs->device_name;
2445}
2446
c6ca28d6
AL
2447void bdrv_flush_all(void)
2448{
2449 BlockDriverState *bs;
2450
1b7bdbc1 2451 QTAILQ_FOREACH(bs, &bdrv_states, list) {
29cdb251 2452 bdrv_flush(bs);
1b7bdbc1 2453 }
c6ca28d6
AL
2454}
2455
f2feebbd
KW
2456int bdrv_has_zero_init(BlockDriverState *bs)
2457{
2458 assert(bs->drv);
2459
336c1c12
KW
2460 if (bs->drv->bdrv_has_zero_init) {
2461 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
2462 }
2463
2464 return 1;
2465}
2466
376ae3f1
SH
2467typedef struct BdrvCoIsAllocatedData {
2468 BlockDriverState *bs;
2469 int64_t sector_num;
2470 int nb_sectors;
2471 int *pnum;
2472 int ret;
2473 bool done;
2474} BdrvCoIsAllocatedData;
2475
f58c7b35
TS
2476/*
2477 * Returns true iff the specified sector is present in the disk image. Drivers
2478 * not implementing the functionality are assumed to not support backing files,
2479 * hence all their sectors are reported as allocated.
2480 *
bd9533e3
SH
2481 * If 'sector_num' is beyond the end of the disk image the return value is 0
2482 * and 'pnum' is set to 0.
2483 *
f58c7b35
TS
2484 * 'pnum' is set to the number of sectors (including and immediately following
2485 * the specified sector) that are known to be in the same
2486 * allocated/unallocated state.
2487 *
bd9533e3
SH
2488 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2489 * beyond the end of the disk image it will be clamped.
f58c7b35 2490 */
060f51c9
SH
2491int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2492 int nb_sectors, int *pnum)
f58c7b35 2493{
bd9533e3
SH
2494 int64_t n;
2495
2496 if (sector_num >= bs->total_sectors) {
2497 *pnum = 0;
2498 return 0;
2499 }
2500
2501 n = bs->total_sectors - sector_num;
2502 if (n < nb_sectors) {
2503 nb_sectors = n;
2504 }
2505
6aebab14 2506 if (!bs->drv->bdrv_co_is_allocated) {
bd9533e3 2507 *pnum = nb_sectors;
f58c7b35
TS
2508 return 1;
2509 }
6aebab14 2510
060f51c9
SH
2511 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2512}
2513
2514/* Coroutine wrapper for bdrv_is_allocated() */
2515static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2516{
2517 BdrvCoIsAllocatedData *data = opaque;
2518 BlockDriverState *bs = data->bs;
2519
2520 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2521 data->pnum);
2522 data->done = true;
2523}
2524
2525/*
2526 * Synchronous wrapper around bdrv_co_is_allocated().
2527 *
2528 * See bdrv_co_is_allocated() for details.
2529 */
2530int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2531 int *pnum)
2532{
6aebab14
SH
2533 Coroutine *co;
2534 BdrvCoIsAllocatedData data = {
2535 .bs = bs,
2536 .sector_num = sector_num,
2537 .nb_sectors = nb_sectors,
2538 .pnum = pnum,
2539 .done = false,
2540 };
2541
2542 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2543 qemu_coroutine_enter(co, &data);
2544 while (!data.done) {
2545 qemu_aio_wait();
2546 }
2547 return data.ret;
f58c7b35
TS
2548}
2549
b2023818 2550BlockInfoList *qmp_query_block(Error **errp)
b338082b 2551{
b2023818 2552 BlockInfoList *head = NULL, *cur_item = NULL;
b338082b
FB
2553 BlockDriverState *bs;
2554
1b7bdbc1 2555 QTAILQ_FOREACH(bs, &bdrv_states, list) {
b2023818 2556 BlockInfoList *info = g_malloc0(sizeof(*info));
d15e5465 2557
b2023818
LC
2558 info->value = g_malloc0(sizeof(*info->value));
2559 info->value->device = g_strdup(bs->device_name);
2560 info->value->type = g_strdup("unknown");
2561 info->value->locked = bdrv_dev_is_medium_locked(bs);
2562 info->value->removable = bdrv_dev_has_removable_media(bs);
d15e5465 2563
e4def80b 2564 if (bdrv_dev_has_removable_media(bs)) {
b2023818
LC
2565 info->value->has_tray_open = true;
2566 info->value->tray_open = bdrv_dev_is_tray_open(bs);
e4def80b 2567 }
f04ef601
LC
2568
2569 if (bdrv_iostatus_is_enabled(bs)) {
b2023818
LC
2570 info->value->has_io_status = true;
2571 info->value->io_status = bs->iostatus;
f04ef601
LC
2572 }
2573
19cb3738 2574 if (bs->drv) {
b2023818
LC
2575 info->value->has_inserted = true;
2576 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2577 info->value->inserted->file = g_strdup(bs->filename);
2578 info->value->inserted->ro = bs->read_only;
2579 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2580 info->value->inserted->encrypted = bs->encrypted;
2581 if (bs->backing_file[0]) {
2582 info->value->inserted->has_backing_file = true;
2583 info->value->inserted->backing_file = g_strdup(bs->backing_file);
376253ec 2584 }
727f005e
ZYW
2585
2586 if (bs->io_limits_enabled) {
2587 info->value->inserted->bps =
2588 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2589 info->value->inserted->bps_rd =
2590 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2591 info->value->inserted->bps_wr =
2592 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2593 info->value->inserted->iops =
2594 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2595 info->value->inserted->iops_rd =
2596 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2597 info->value->inserted->iops_wr =
2598 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2599 }
b2023818 2600 }
d15e5465 2601
b2023818
LC
2602 /* XXX: waiting for the qapi to support GSList */
2603 if (!cur_item) {
2604 head = cur_item = info;
2605 } else {
2606 cur_item->next = info;
2607 cur_item = info;
b338082b 2608 }
b338082b 2609 }
d15e5465 2610
b2023818 2611 return head;
b338082b 2612}
a36e69dd 2613
f11f57e4
LC
2614/* Consider exposing this as a full fledged QMP command */
2615static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
2616{
2617 BlockStats *s;
2618
2619 s = g_malloc0(sizeof(*s));
2620
2621 if (bs->device_name[0]) {
2622 s->has_device = true;
2623 s->device = g_strdup(bs->device_name);
294cc35f
KW
2624 }
2625
f11f57e4
LC
2626 s->stats = g_malloc0(sizeof(*s->stats));
2627 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2628 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2629 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2630 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2631 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2632 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2633 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2634 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2635 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2636
294cc35f 2637 if (bs->file) {
f11f57e4
LC
2638 s->has_parent = true;
2639 s->parent = qmp_query_blockstat(bs->file, NULL);
294cc35f
KW
2640 }
2641
f11f57e4 2642 return s;
294cc35f
KW
2643}
2644
f11f57e4 2645BlockStatsList *qmp_query_blockstats(Error **errp)
218a536a 2646{
f11f57e4 2647 BlockStatsList *head = NULL, *cur_item = NULL;
a36e69dd
TS
2648 BlockDriverState *bs;
2649
1b7bdbc1 2650 QTAILQ_FOREACH(bs, &bdrv_states, list) {
f11f57e4
LC
2651 BlockStatsList *info = g_malloc0(sizeof(*info));
2652 info->value = qmp_query_blockstat(bs, NULL);
2653
2654 /* XXX: waiting for the qapi to support GSList */
2655 if (!cur_item) {
2656 head = cur_item = info;
2657 } else {
2658 cur_item->next = info;
2659 cur_item = info;
2660 }
a36e69dd 2661 }
218a536a 2662
f11f57e4 2663 return head;
a36e69dd 2664}
ea2384d3 2665
045df330
AL
2666const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2667{
2668 if (bs->backing_hd && bs->backing_hd->encrypted)
2669 return bs->backing_file;
2670 else if (bs->encrypted)
2671 return bs->filename;
2672 else
2673 return NULL;
2674}
2675
5fafdf24 2676void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
2677 char *filename, int filename_size)
2678{
3574c608 2679 pstrcpy(filename, filename_size, bs->backing_file);
83f64091
FB
2680}
2681
5fafdf24 2682int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
2683 const uint8_t *buf, int nb_sectors)
2684{
2685 BlockDriver *drv = bs->drv;
2686 if (!drv)
19cb3738 2687 return -ENOMEDIUM;
faea38e7
FB
2688 if (!drv->bdrv_write_compressed)
2689 return -ENOTSUP;
fbb7b4e0
KW
2690 if (bdrv_check_request(bs, sector_num, nb_sectors))
2691 return -EIO;
a55eb92c 2692
c6d22830 2693 if (bs->dirty_bitmap) {
7cd1e32a
LS
2694 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2695 }
a55eb92c 2696
faea38e7
FB
2697 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2698}
3b46e624 2699
faea38e7
FB
2700int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2701{
2702 BlockDriver *drv = bs->drv;
2703 if (!drv)
19cb3738 2704 return -ENOMEDIUM;
faea38e7
FB
2705 if (!drv->bdrv_get_info)
2706 return -ENOTSUP;
2707 memset(bdi, 0, sizeof(*bdi));
2708 return drv->bdrv_get_info(bs, bdi);
2709}
2710
45566e9c
CH
2711int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2712 int64_t pos, int size)
178e08a5
AL
2713{
2714 BlockDriver *drv = bs->drv;
2715 if (!drv)
2716 return -ENOMEDIUM;
7cdb1f6d
MK
2717 if (drv->bdrv_save_vmstate)
2718 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2719 if (bs->file)
2720 return bdrv_save_vmstate(bs->file, buf, pos, size);
2721 return -ENOTSUP;
178e08a5
AL
2722}
2723
45566e9c
CH
2724int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2725 int64_t pos, int size)
178e08a5
AL
2726{
2727 BlockDriver *drv = bs->drv;
2728 if (!drv)
2729 return -ENOMEDIUM;
7cdb1f6d
MK
2730 if (drv->bdrv_load_vmstate)
2731 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2732 if (bs->file)
2733 return bdrv_load_vmstate(bs->file, buf, pos, size);
2734 return -ENOTSUP;
178e08a5
AL
2735}
2736
8b9b0cc2
KW
2737void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2738{
2739 BlockDriver *drv = bs->drv;
2740
2741 if (!drv || !drv->bdrv_debug_event) {
2742 return;
2743 }
2744
2745 return drv->bdrv_debug_event(bs, event);
2746
2747}
2748
faea38e7
FB
2749/**************************************************************/
2750/* handling of snapshots */
2751
feeee5ac
MDCF
2752int bdrv_can_snapshot(BlockDriverState *bs)
2753{
2754 BlockDriver *drv = bs->drv;
07b70bfb 2755 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2756 return 0;
2757 }
2758
2759 if (!drv->bdrv_snapshot_create) {
2760 if (bs->file != NULL) {
2761 return bdrv_can_snapshot(bs->file);
2762 }
2763 return 0;
2764 }
2765
2766 return 1;
2767}
2768
199630b6
BS
2769int bdrv_is_snapshot(BlockDriverState *bs)
2770{
2771 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2772}
2773
f9092b10
MA
2774BlockDriverState *bdrv_snapshots(void)
2775{
2776 BlockDriverState *bs;
2777
3ac906f7 2778 if (bs_snapshots) {
f9092b10 2779 return bs_snapshots;
3ac906f7 2780 }
f9092b10
MA
2781
2782 bs = NULL;
2783 while ((bs = bdrv_next(bs))) {
2784 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2785 bs_snapshots = bs;
2786 return bs;
f9092b10
MA
2787 }
2788 }
2789 return NULL;
f9092b10
MA
2790}
2791
5fafdf24 2792int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2793 QEMUSnapshotInfo *sn_info)
2794{
2795 BlockDriver *drv = bs->drv;
2796 if (!drv)
19cb3738 2797 return -ENOMEDIUM;
7cdb1f6d
MK
2798 if (drv->bdrv_snapshot_create)
2799 return drv->bdrv_snapshot_create(bs, sn_info);
2800 if (bs->file)
2801 return bdrv_snapshot_create(bs->file, sn_info);
2802 return -ENOTSUP;
faea38e7
FB
2803}
2804
5fafdf24 2805int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2806 const char *snapshot_id)
2807{
2808 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2809 int ret, open_ret;
2810
faea38e7 2811 if (!drv)
19cb3738 2812 return -ENOMEDIUM;
7cdb1f6d
MK
2813 if (drv->bdrv_snapshot_goto)
2814 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2815
2816 if (bs->file) {
2817 drv->bdrv_close(bs);
2818 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2819 open_ret = drv->bdrv_open(bs, bs->open_flags);
2820 if (open_ret < 0) {
2821 bdrv_delete(bs->file);
2822 bs->drv = NULL;
2823 return open_ret;
2824 }
2825 return ret;
2826 }
2827
2828 return -ENOTSUP;
faea38e7
FB
2829}
2830
2831int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2832{
2833 BlockDriver *drv = bs->drv;
2834 if (!drv)
19cb3738 2835 return -ENOMEDIUM;
7cdb1f6d
MK
2836 if (drv->bdrv_snapshot_delete)
2837 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2838 if (bs->file)
2839 return bdrv_snapshot_delete(bs->file, snapshot_id);
2840 return -ENOTSUP;
faea38e7
FB
2841}
2842
5fafdf24 2843int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2844 QEMUSnapshotInfo **psn_info)
2845{
2846 BlockDriver *drv = bs->drv;
2847 if (!drv)
19cb3738 2848 return -ENOMEDIUM;
7cdb1f6d
MK
2849 if (drv->bdrv_snapshot_list)
2850 return drv->bdrv_snapshot_list(bs, psn_info);
2851 if (bs->file)
2852 return bdrv_snapshot_list(bs->file, psn_info);
2853 return -ENOTSUP;
faea38e7
FB
2854}
2855
51ef6727 2856int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2857 const char *snapshot_name)
2858{
2859 BlockDriver *drv = bs->drv;
2860 if (!drv) {
2861 return -ENOMEDIUM;
2862 }
2863 if (!bs->read_only) {
2864 return -EINVAL;
2865 }
2866 if (drv->bdrv_snapshot_load_tmp) {
2867 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2868 }
2869 return -ENOTSUP;
2870}
2871
e8a6bb9c
MT
2872BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2873 const char *backing_file)
2874{
2875 if (!bs->drv) {
2876 return NULL;
2877 }
2878
2879 if (bs->backing_hd) {
2880 if (strcmp(bs->backing_file, backing_file) == 0) {
2881 return bs->backing_hd;
2882 } else {
2883 return bdrv_find_backing_image(bs->backing_hd, backing_file);
2884 }
2885 }
2886
2887 return NULL;
2888}
2889
faea38e7
FB
2890#define NB_SUFFIXES 4
2891
2892char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2893{
2894 static const char suffixes[NB_SUFFIXES] = "KMGT";
2895 int64_t base;
2896 int i;
2897
2898 if (size <= 999) {
2899 snprintf(buf, buf_size, "%" PRId64, size);
2900 } else {
2901 base = 1024;
2902 for(i = 0; i < NB_SUFFIXES; i++) {
2903 if (size < (10 * base)) {
5fafdf24 2904 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
2905 (double)size / base,
2906 suffixes[i]);
2907 break;
2908 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 2909 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
2910 ((size + (base >> 1)) / base),
2911 suffixes[i]);
2912 break;
2913 }
2914 base = base * 1024;
2915 }
2916 }
2917 return buf;
2918}
2919
2920char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2921{
2922 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
2923#ifdef _WIN32
2924 struct tm *ptm;
2925#else
faea38e7 2926 struct tm tm;
3b9f94e1 2927#endif
faea38e7
FB
2928 time_t ti;
2929 int64_t secs;
2930
2931 if (!sn) {
5fafdf24
TS
2932 snprintf(buf, buf_size,
2933 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2934 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2935 } else {
2936 ti = sn->date_sec;
3b9f94e1
FB
2937#ifdef _WIN32
2938 ptm = localtime(&ti);
2939 strftime(date_buf, sizeof(date_buf),
2940 "%Y-%m-%d %H:%M:%S", ptm);
2941#else
faea38e7
FB
2942 localtime_r(&ti, &tm);
2943 strftime(date_buf, sizeof(date_buf),
2944 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 2945#endif
faea38e7
FB
2946 secs = sn->vm_clock_nsec / 1000000000;
2947 snprintf(clock_buf, sizeof(clock_buf),
2948 "%02d:%02d:%02d.%03d",
2949 (int)(secs / 3600),
2950 (int)((secs / 60) % 60),
5fafdf24 2951 (int)(secs % 60),
faea38e7
FB
2952 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2953 snprintf(buf, buf_size,
5fafdf24 2954 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2955 sn->id_str, sn->name,
2956 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2957 date_buf,
2958 clock_buf);
2959 }
2960 return buf;
2961}
2962
ea2384d3 2963/**************************************************************/
83f64091 2964/* async I/Os */
ea2384d3 2965
3b69e4b9 2966BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 2967 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 2968 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 2969{
bbf0a440
SH
2970 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2971
b2a61371 2972 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2973 cb, opaque, false);
ea2384d3
FB
2974}
2975
f141eafe
AL
2976BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2977 QEMUIOVector *qiov, int nb_sectors,
2978 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 2979{
bbf0a440
SH
2980 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2981
1a6e115b 2982 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2983 cb, opaque, true);
83f64091
FB
2984}
2985
40b4f539
KW
2986
2987typedef struct MultiwriteCB {
2988 int error;
2989 int num_requests;
2990 int num_callbacks;
2991 struct {
2992 BlockDriverCompletionFunc *cb;
2993 void *opaque;
2994 QEMUIOVector *free_qiov;
40b4f539
KW
2995 } callbacks[];
2996} MultiwriteCB;
2997
2998static void multiwrite_user_cb(MultiwriteCB *mcb)
2999{
3000 int i;
3001
3002 for (i = 0; i < mcb->num_callbacks; i++) {
3003 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
3004 if (mcb->callbacks[i].free_qiov) {
3005 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
3006 }
7267c094 3007 g_free(mcb->callbacks[i].free_qiov);
40b4f539
KW
3008 }
3009}
3010
3011static void multiwrite_cb(void *opaque, int ret)
3012{
3013 MultiwriteCB *mcb = opaque;
3014
6d519a5f
SH
3015 trace_multiwrite_cb(mcb, ret);
3016
cb6d3ca0 3017 if (ret < 0 && !mcb->error) {
40b4f539 3018 mcb->error = ret;
40b4f539
KW
3019 }
3020
3021 mcb->num_requests--;
3022 if (mcb->num_requests == 0) {
de189a1b 3023 multiwrite_user_cb(mcb);
7267c094 3024 g_free(mcb);
40b4f539
KW
3025 }
3026}
3027
3028static int multiwrite_req_compare(const void *a, const void *b)
3029{
77be4366
CH
3030 const BlockRequest *req1 = a, *req2 = b;
3031
3032 /*
3033 * Note that we can't simply subtract req2->sector from req1->sector
3034 * here as that could overflow the return value.
3035 */
3036 if (req1->sector > req2->sector) {
3037 return 1;
3038 } else if (req1->sector < req2->sector) {
3039 return -1;
3040 } else {
3041 return 0;
3042 }
40b4f539
KW
3043}
3044
3045/*
3046 * Takes a bunch of requests and tries to merge them. Returns the number of
3047 * requests that remain after merging.
3048 */
3049static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
3050 int num_reqs, MultiwriteCB *mcb)
3051{
3052 int i, outidx;
3053
3054 // Sort requests by start sector
3055 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
3056
3057 // Check if adjacent requests touch the same clusters. If so, combine them,
3058 // filling up gaps with zero sectors.
3059 outidx = 0;
3060 for (i = 1; i < num_reqs; i++) {
3061 int merge = 0;
3062 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
3063
b6a127a1 3064 // Handle exactly sequential writes and overlapping writes.
40b4f539
KW
3065 if (reqs[i].sector <= oldreq_last) {
3066 merge = 1;
3067 }
3068
e2a305fb
CH
3069 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
3070 merge = 0;
3071 }
3072
40b4f539
KW
3073 if (merge) {
3074 size_t size;
7267c094 3075 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
3076 qemu_iovec_init(qiov,
3077 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
3078
3079 // Add the first request to the merged one. If the requests are
3080 // overlapping, drop the last sectors of the first request.
3081 size = (reqs[i].sector - reqs[outidx].sector) << 9;
3082 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
3083
b6a127a1
PB
3084 // We should need to add any zeros between the two requests
3085 assert (reqs[i].sector <= oldreq_last);
40b4f539
KW
3086
3087 // Add the second request
3088 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
3089
cbf1dff2 3090 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
3091 reqs[outidx].qiov = qiov;
3092
3093 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
3094 } else {
3095 outidx++;
3096 reqs[outidx].sector = reqs[i].sector;
3097 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
3098 reqs[outidx].qiov = reqs[i].qiov;
3099 }
3100 }
3101
3102 return outidx + 1;
3103}
3104
3105/*
3106 * Submit multiple AIO write requests at once.
3107 *
3108 * On success, the function returns 0 and all requests in the reqs array have
3109 * been submitted. In error case this function returns -1, and any of the
3110 * requests may or may not be submitted yet. In particular, this means that the
3111 * callback will be called for some of the requests, for others it won't. The
3112 * caller must check the error field of the BlockRequest to wait for the right
3113 * callbacks (if error != 0, no callback will be called).
3114 *
3115 * The implementation may modify the contents of the reqs array, e.g. to merge
3116 * requests. However, the fields opaque and error are left unmodified as they
3117 * are used to signal failure for a single request to the caller.
3118 */
3119int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
3120{
40b4f539
KW
3121 MultiwriteCB *mcb;
3122 int i;
3123
301db7c2
RH
3124 /* don't submit writes if we don't have a medium */
3125 if (bs->drv == NULL) {
3126 for (i = 0; i < num_reqs; i++) {
3127 reqs[i].error = -ENOMEDIUM;
3128 }
3129 return -1;
3130 }
3131
40b4f539
KW
3132 if (num_reqs == 0) {
3133 return 0;
3134 }
3135
3136 // Create MultiwriteCB structure
7267c094 3137 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
3138 mcb->num_requests = 0;
3139 mcb->num_callbacks = num_reqs;
3140
3141 for (i = 0; i < num_reqs; i++) {
3142 mcb->callbacks[i].cb = reqs[i].cb;
3143 mcb->callbacks[i].opaque = reqs[i].opaque;
3144 }
3145
3146 // Check for mergable requests
3147 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
3148
6d519a5f
SH
3149 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
3150
df9309fb
PB
3151 /* Run the aio requests. */
3152 mcb->num_requests = num_reqs;
40b4f539 3153 for (i = 0; i < num_reqs; i++) {
ad54ae80 3154 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
40b4f539 3155 reqs[i].nb_sectors, multiwrite_cb, mcb);
40b4f539
KW
3156 }
3157
3158 return 0;
40b4f539
KW
3159}
3160
83f64091 3161void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 3162{
6bbff9a0 3163 acb->pool->cancel(acb);
83f64091
FB
3164}
3165
98f90dba
ZYW
3166/* block I/O throttling */
3167static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
3168 bool is_write, double elapsed_time, uint64_t *wait)
3169{
3170 uint64_t bps_limit = 0;
3171 double bytes_limit, bytes_base, bytes_res;
3172 double slice_time, wait_time;
3173
3174 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3175 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
3176 } else if (bs->io_limits.bps[is_write]) {
3177 bps_limit = bs->io_limits.bps[is_write];
3178 } else {
3179 if (wait) {
3180 *wait = 0;
3181 }
3182
3183 return false;
3184 }
3185
3186 slice_time = bs->slice_end - bs->slice_start;
3187 slice_time /= (NANOSECONDS_PER_SECOND);
3188 bytes_limit = bps_limit * slice_time;
3189 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
3190 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3191 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
3192 }
3193
3194 /* bytes_base: the bytes of data which have been read/written; and
3195 * it is obtained from the history statistic info.
3196 * bytes_res: the remaining bytes of data which need to be read/written.
3197 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3198 * the total time for completing reading/writting all data.
3199 */
3200 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
3201
3202 if (bytes_base + bytes_res <= bytes_limit) {
3203 if (wait) {
3204 *wait = 0;
3205 }
3206
3207 return false;
3208 }
3209
3210 /* Calc approx time to dispatch */
3211 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
3212
3213 /* When the I/O rate at runtime exceeds the limits,
3214 * bs->slice_end need to be extended in order that the current statistic
3215 * info can be kept until the timer fire, so it is increased and tuned
3216 * based on the result of experiment.
3217 */
3218 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3219 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3220 if (wait) {
3221 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3222 }
3223
3224 return true;
3225}
3226
3227static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
3228 double elapsed_time, uint64_t *wait)
3229{
3230 uint64_t iops_limit = 0;
3231 double ios_limit, ios_base;
3232 double slice_time, wait_time;
3233
3234 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3235 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
3236 } else if (bs->io_limits.iops[is_write]) {
3237 iops_limit = bs->io_limits.iops[is_write];
3238 } else {
3239 if (wait) {
3240 *wait = 0;
3241 }
3242
3243 return false;
3244 }
3245
3246 slice_time = bs->slice_end - bs->slice_start;
3247 slice_time /= (NANOSECONDS_PER_SECOND);
3248 ios_limit = iops_limit * slice_time;
3249 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
3250 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3251 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
3252 }
3253
3254 if (ios_base + 1 <= ios_limit) {
3255 if (wait) {
3256 *wait = 0;
3257 }
3258
3259 return false;
3260 }
3261
3262 /* Calc approx time to dispatch */
3263 wait_time = (ios_base + 1) / iops_limit;
3264 if (wait_time > elapsed_time) {
3265 wait_time = wait_time - elapsed_time;
3266 } else {
3267 wait_time = 0;
3268 }
3269
3270 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3271 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3272 if (wait) {
3273 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3274 }
3275
3276 return true;
3277}
3278
3279static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3280 bool is_write, int64_t *wait)
3281{
3282 int64_t now, max_wait;
3283 uint64_t bps_wait = 0, iops_wait = 0;
3284 double elapsed_time;
3285 int bps_ret, iops_ret;
3286
3287 now = qemu_get_clock_ns(vm_clock);
3288 if ((bs->slice_start < now)
3289 && (bs->slice_end > now)) {
3290 bs->slice_end = now + bs->slice_time;
3291 } else {
3292 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3293 bs->slice_start = now;
3294 bs->slice_end = now + bs->slice_time;
3295
3296 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3297 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3298
3299 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3300 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3301 }
3302
3303 elapsed_time = now - bs->slice_start;
3304 elapsed_time /= (NANOSECONDS_PER_SECOND);
3305
3306 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3307 is_write, elapsed_time, &bps_wait);
3308 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3309 elapsed_time, &iops_wait);
3310 if (bps_ret || iops_ret) {
3311 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3312 if (wait) {
3313 *wait = max_wait;
3314 }
3315
3316 now = qemu_get_clock_ns(vm_clock);
3317 if (bs->slice_end < now + max_wait) {
3318 bs->slice_end = now + max_wait;
3319 }
3320
3321 return true;
3322 }
3323
3324 if (wait) {
3325 *wait = 0;
3326 }
3327
3328 return false;
3329}
ce1a14dc 3330
83f64091
FB
3331/**************************************************************/
3332/* async block device emulation */
3333
c16b5a2c
CH
3334typedef struct BlockDriverAIOCBSync {
3335 BlockDriverAIOCB common;
3336 QEMUBH *bh;
3337 int ret;
3338 /* vector translation state */
3339 QEMUIOVector *qiov;
3340 uint8_t *bounce;
3341 int is_write;
3342} BlockDriverAIOCBSync;
3343
3344static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3345{
b666d239
KW
3346 BlockDriverAIOCBSync *acb =
3347 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 3348 qemu_bh_delete(acb->bh);
36afc451 3349 acb->bh = NULL;
c16b5a2c
CH
3350 qemu_aio_release(acb);
3351}
3352
3353static AIOPool bdrv_em_aio_pool = {
3354 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3355 .cancel = bdrv_aio_cancel_em,
3356};
3357
ce1a14dc 3358static void bdrv_aio_bh_cb(void *opaque)
83f64091 3359{
ce1a14dc 3360 BlockDriverAIOCBSync *acb = opaque;
f141eafe 3361
f141eafe
AL
3362 if (!acb->is_write)
3363 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 3364 qemu_vfree(acb->bounce);
ce1a14dc 3365 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 3366 qemu_bh_delete(acb->bh);
36afc451 3367 acb->bh = NULL;
ce1a14dc 3368 qemu_aio_release(acb);
83f64091 3369}
beac80cd 3370
f141eafe
AL
3371static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3372 int64_t sector_num,
3373 QEMUIOVector *qiov,
3374 int nb_sectors,
3375 BlockDriverCompletionFunc *cb,
3376 void *opaque,
3377 int is_write)
3378
83f64091 3379{
ce1a14dc 3380 BlockDriverAIOCBSync *acb;
ce1a14dc 3381
c16b5a2c 3382 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
3383 acb->is_write = is_write;
3384 acb->qiov = qiov;
e268ca52 3385 acb->bounce = qemu_blockalign(bs, qiov->size);
3f3aace8 3386 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
3387
3388 if (is_write) {
3389 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1ed20acf 3390 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 3391 } else {
1ed20acf 3392 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
3393 }
3394
ce1a14dc 3395 qemu_bh_schedule(acb->bh);
f141eafe 3396
ce1a14dc 3397 return &acb->common;
beac80cd
FB
3398}
3399
f141eafe
AL
3400static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3401 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 3402 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 3403{
f141eafe
AL
3404 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
3405}
83f64091 3406
f141eafe
AL
3407static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3408 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3409 BlockDriverCompletionFunc *cb, void *opaque)
3410{
3411 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 3412}
beac80cd 3413
68485420
KW
3414
3415typedef struct BlockDriverAIOCBCoroutine {
3416 BlockDriverAIOCB common;
3417 BlockRequest req;
3418 bool is_write;
3419 QEMUBH* bh;
3420} BlockDriverAIOCBCoroutine;
3421
3422static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3423{
3424 qemu_aio_flush();
3425}
3426
3427static AIOPool bdrv_em_co_aio_pool = {
3428 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3429 .cancel = bdrv_aio_co_cancel_em,
3430};
3431
35246a68 3432static void bdrv_co_em_bh(void *opaque)
68485420
KW
3433{
3434 BlockDriverAIOCBCoroutine *acb = opaque;
3435
3436 acb->common.cb(acb->common.opaque, acb->req.error);
3437 qemu_bh_delete(acb->bh);
3438 qemu_aio_release(acb);
3439}
3440
b2a61371
SH
3441/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3442static void coroutine_fn bdrv_co_do_rw(void *opaque)
3443{
3444 BlockDriverAIOCBCoroutine *acb = opaque;
3445 BlockDriverState *bs = acb->common.bs;
3446
3447 if (!acb->is_write) {
3448 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
470c0504 3449 acb->req.nb_sectors, acb->req.qiov, 0);
b2a61371
SH
3450 } else {
3451 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
f08f2dda 3452 acb->req.nb_sectors, acb->req.qiov, 0);
b2a61371
SH
3453 }
3454
35246a68 3455 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2a61371
SH
3456 qemu_bh_schedule(acb->bh);
3457}
3458
68485420
KW
3459static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3460 int64_t sector_num,
3461 QEMUIOVector *qiov,
3462 int nb_sectors,
3463 BlockDriverCompletionFunc *cb,
3464 void *opaque,
8c5873d6 3465 bool is_write)
68485420
KW
3466{
3467 Coroutine *co;
3468 BlockDriverAIOCBCoroutine *acb;
3469
3470 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3471 acb->req.sector = sector_num;
3472 acb->req.nb_sectors = nb_sectors;
3473 acb->req.qiov = qiov;
3474 acb->is_write = is_write;
3475
8c5873d6 3476 co = qemu_coroutine_create(bdrv_co_do_rw);
68485420
KW
3477 qemu_coroutine_enter(co, acb);
3478
3479 return &acb->common;
3480}
3481
07f07615 3482static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
b2e12bc6 3483{
07f07615
PB
3484 BlockDriverAIOCBCoroutine *acb = opaque;
3485 BlockDriverState *bs = acb->common.bs;
b2e12bc6 3486
07f07615
PB
3487 acb->req.error = bdrv_co_flush(bs);
3488 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2e12bc6 3489 qemu_bh_schedule(acb->bh);
b2e12bc6
CH
3490}
3491
07f07615 3492BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
016f5cf6
AG
3493 BlockDriverCompletionFunc *cb, void *opaque)
3494{
07f07615 3495 trace_bdrv_aio_flush(bs, opaque);
016f5cf6 3496
07f07615
PB
3497 Coroutine *co;
3498 BlockDriverAIOCBCoroutine *acb;
016f5cf6 3499
07f07615
PB
3500 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3501 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3502 qemu_coroutine_enter(co, acb);
016f5cf6 3503
016f5cf6
AG
3504 return &acb->common;
3505}
3506
4265d620
PB
3507static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3508{
3509 BlockDriverAIOCBCoroutine *acb = opaque;
3510 BlockDriverState *bs = acb->common.bs;
3511
3512 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3513 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3514 qemu_bh_schedule(acb->bh);
3515}
3516
3517BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3518 int64_t sector_num, int nb_sectors,
3519 BlockDriverCompletionFunc *cb, void *opaque)
3520{
3521 Coroutine *co;
3522 BlockDriverAIOCBCoroutine *acb;
3523
3524 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3525
3526 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3527 acb->req.sector = sector_num;
3528 acb->req.nb_sectors = nb_sectors;
3529 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3530 qemu_coroutine_enter(co, acb);
3531
3532 return &acb->common;
3533}
3534
ea2384d3
FB
3535void bdrv_init(void)
3536{
5efa9d5a 3537 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 3538}
ce1a14dc 3539
eb852011
MA
3540void bdrv_init_with_whitelist(void)
3541{
3542 use_bdrv_whitelist = 1;
3543 bdrv_init();
3544}
3545
c16b5a2c
CH
3546void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3547 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 3548{
ce1a14dc
PB
3549 BlockDriverAIOCB *acb;
3550
6bbff9a0
AL
3551 if (pool->free_aiocb) {
3552 acb = pool->free_aiocb;
3553 pool->free_aiocb = acb->next;
ce1a14dc 3554 } else {
7267c094 3555 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 3556 acb->pool = pool;
ce1a14dc
PB
3557 }
3558 acb->bs = bs;
3559 acb->cb = cb;
3560 acb->opaque = opaque;
3561 return acb;
3562}
3563
3564void qemu_aio_release(void *p)
3565{
6bbff9a0
AL
3566 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3567 AIOPool *pool = acb->pool;
3568 acb->next = pool->free_aiocb;
3569 pool->free_aiocb = acb;
ce1a14dc 3570}
19cb3738 3571
f9f05dc5
KW
3572/**************************************************************/
3573/* Coroutine block device emulation */
3574
3575typedef struct CoroutineIOCompletion {
3576 Coroutine *coroutine;
3577 int ret;
3578} CoroutineIOCompletion;
3579
3580static void bdrv_co_io_em_complete(void *opaque, int ret)
3581{
3582 CoroutineIOCompletion *co = opaque;
3583
3584 co->ret = ret;
3585 qemu_coroutine_enter(co->coroutine, NULL);
3586}
3587
3588static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3589 int nb_sectors, QEMUIOVector *iov,
3590 bool is_write)
3591{
3592 CoroutineIOCompletion co = {
3593 .coroutine = qemu_coroutine_self(),
3594 };
3595 BlockDriverAIOCB *acb;
3596
3597 if (is_write) {
a652d160
SH
3598 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3599 bdrv_co_io_em_complete, &co);
f9f05dc5 3600 } else {
a652d160
SH
3601 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3602 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
3603 }
3604
59370aaa 3605 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
3606 if (!acb) {
3607 return -EIO;
3608 }
3609 qemu_coroutine_yield();
3610
3611 return co.ret;
3612}
3613
3614static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3615 int64_t sector_num, int nb_sectors,
3616 QEMUIOVector *iov)
3617{
3618 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3619}
3620
3621static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3622 int64_t sector_num, int nb_sectors,
3623 QEMUIOVector *iov)
3624{
3625 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3626}
3627
07f07615 3628static void coroutine_fn bdrv_flush_co_entry(void *opaque)
e7a8a783 3629{
07f07615
PB
3630 RwCo *rwco = opaque;
3631
3632 rwco->ret = bdrv_co_flush(rwco->bs);
3633}
3634
3635int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3636{
eb489bb1
KW
3637 int ret;
3638
29cdb251 3639 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
07f07615 3640 return 0;
eb489bb1
KW
3641 }
3642
ca716364 3643 /* Write back cached data to the OS even with cache=unsafe */
eb489bb1
KW
3644 if (bs->drv->bdrv_co_flush_to_os) {
3645 ret = bs->drv->bdrv_co_flush_to_os(bs);
3646 if (ret < 0) {
3647 return ret;
3648 }
3649 }
3650
ca716364
KW
3651 /* But don't actually force it to the disk with cache=unsafe */
3652 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3653 return 0;
3654 }
3655
eb489bb1 3656 if (bs->drv->bdrv_co_flush_to_disk) {
29cdb251 3657 ret = bs->drv->bdrv_co_flush_to_disk(bs);
07f07615
PB
3658 } else if (bs->drv->bdrv_aio_flush) {
3659 BlockDriverAIOCB *acb;
3660 CoroutineIOCompletion co = {
3661 .coroutine = qemu_coroutine_self(),
3662 };
3663
3664 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3665 if (acb == NULL) {
29cdb251 3666 ret = -EIO;
07f07615
PB
3667 } else {
3668 qemu_coroutine_yield();
29cdb251 3669 ret = co.ret;
07f07615 3670 }
07f07615
PB
3671 } else {
3672 /*
3673 * Some block drivers always operate in either writethrough or unsafe
3674 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3675 * know how the server works (because the behaviour is hardcoded or
3676 * depends on server-side configuration), so we can't ensure that
3677 * everything is safe on disk. Returning an error doesn't work because
3678 * that would break guests even if the server operates in writethrough
3679 * mode.
3680 *
3681 * Let's hope the user knows what he's doing.
3682 */
29cdb251 3683 ret = 0;
07f07615 3684 }
29cdb251
PB
3685 if (ret < 0) {
3686 return ret;
3687 }
3688
3689 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3690 * in the case of cache=unsafe, so there are no useless flushes.
3691 */
3692 return bdrv_co_flush(bs->file);
07f07615
PB
3693}
3694
0f15423c
AL
3695void bdrv_invalidate_cache(BlockDriverState *bs)
3696{
3697 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3698 bs->drv->bdrv_invalidate_cache(bs);
3699 }
3700}
3701
3702void bdrv_invalidate_cache_all(void)
3703{
3704 BlockDriverState *bs;
3705
3706 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3707 bdrv_invalidate_cache(bs);
3708 }
3709}
3710
07789269
BC
3711void bdrv_clear_incoming_migration_all(void)
3712{
3713 BlockDriverState *bs;
3714
3715 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3716 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
3717 }
3718}
3719
07f07615
PB
3720int bdrv_flush(BlockDriverState *bs)
3721{
3722 Coroutine *co;
3723 RwCo rwco = {
3724 .bs = bs,
3725 .ret = NOT_DONE,
e7a8a783 3726 };
e7a8a783 3727
07f07615
PB
3728 if (qemu_in_coroutine()) {
3729 /* Fast-path if already in coroutine context */
3730 bdrv_flush_co_entry(&rwco);
3731 } else {
3732 co = qemu_coroutine_create(bdrv_flush_co_entry);
3733 qemu_coroutine_enter(co, &rwco);
3734 while (rwco.ret == NOT_DONE) {
3735 qemu_aio_wait();
3736 }
e7a8a783 3737 }
07f07615
PB
3738
3739 return rwco.ret;
e7a8a783
KW
3740}
3741
4265d620
PB
3742static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3743{
3744 RwCo *rwco = opaque;
3745
3746 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3747}
3748
3749int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3750 int nb_sectors)
3751{
3752 if (!bs->drv) {
3753 return -ENOMEDIUM;
3754 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3755 return -EIO;
3756 } else if (bs->read_only) {
3757 return -EROFS;
3758 } else if (bs->drv->bdrv_co_discard) {
3759 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3760 } else if (bs->drv->bdrv_aio_discard) {
3761 BlockDriverAIOCB *acb;
3762 CoroutineIOCompletion co = {
3763 .coroutine = qemu_coroutine_self(),
3764 };
3765
3766 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3767 bdrv_co_io_em_complete, &co);
3768 if (acb == NULL) {
3769 return -EIO;
3770 } else {
3771 qemu_coroutine_yield();
3772 return co.ret;
3773 }
4265d620
PB
3774 } else {
3775 return 0;
3776 }
3777}
3778
3779int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3780{
3781 Coroutine *co;
3782 RwCo rwco = {
3783 .bs = bs,
3784 .sector_num = sector_num,
3785 .nb_sectors = nb_sectors,
3786 .ret = NOT_DONE,
3787 };
3788
3789 if (qemu_in_coroutine()) {
3790 /* Fast-path if already in coroutine context */
3791 bdrv_discard_co_entry(&rwco);
3792 } else {
3793 co = qemu_coroutine_create(bdrv_discard_co_entry);
3794 qemu_coroutine_enter(co, &rwco);
3795 while (rwco.ret == NOT_DONE) {
3796 qemu_aio_wait();
3797 }
3798 }
3799
3800 return rwco.ret;
3801}
3802
19cb3738
FB
3803/**************************************************************/
3804/* removable device support */
3805
3806/**
3807 * Return TRUE if the media is present
3808 */
3809int bdrv_is_inserted(BlockDriverState *bs)
3810{
3811 BlockDriver *drv = bs->drv;
a1aff5bf 3812
19cb3738
FB
3813 if (!drv)
3814 return 0;
3815 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
3816 return 1;
3817 return drv->bdrv_is_inserted(bs);
19cb3738
FB
3818}
3819
3820/**
8e49ca46
MA
3821 * Return whether the media changed since the last call to this
3822 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3823 */
3824int bdrv_media_changed(BlockDriverState *bs)
3825{
3826 BlockDriver *drv = bs->drv;
19cb3738 3827
8e49ca46
MA
3828 if (drv && drv->bdrv_media_changed) {
3829 return drv->bdrv_media_changed(bs);
3830 }
3831 return -ENOTSUP;
19cb3738
FB
3832}
3833
3834/**
3835 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3836 */
f36f3949 3837void bdrv_eject(BlockDriverState *bs, bool eject_flag)
19cb3738
FB
3838{
3839 BlockDriver *drv = bs->drv;
19cb3738 3840
822e1cd1
MA
3841 if (drv && drv->bdrv_eject) {
3842 drv->bdrv_eject(bs, eject_flag);
19cb3738 3843 }
6f382ed2
LC
3844
3845 if (bs->device_name[0] != '\0') {
3846 bdrv_emit_qmp_eject_event(bs, eject_flag);
3847 }
19cb3738
FB
3848}
3849
19cb3738
FB
3850/**
3851 * Lock or unlock the media (if it is locked, the user won't be able
3852 * to eject it manually).
3853 */
025e849a 3854void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3855{
3856 BlockDriver *drv = bs->drv;
3857
025e849a 3858 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3859
025e849a
MA
3860 if (drv && drv->bdrv_lock_medium) {
3861 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3862 }
3863}
985a03b0
TS
3864
3865/* needed for generic scsi interface */
3866
3867int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3868{
3869 BlockDriver *drv = bs->drv;
3870
3871 if (drv && drv->bdrv_ioctl)
3872 return drv->bdrv_ioctl(bs, req, buf);
3873 return -ENOTSUP;
3874}
7d780669 3875
221f715d
AL
3876BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3877 unsigned long int req, void *buf,
3878 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3879{
221f715d 3880 BlockDriver *drv = bs->drv;
7d780669 3881
221f715d
AL
3882 if (drv && drv->bdrv_aio_ioctl)
3883 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3884 return NULL;
7d780669 3885}
e268ca52 3886
7b6f9300
MA
3887void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3888{
3889 bs->buffer_alignment = align;
3890}
7cd1e32a 3891
e268ca52
AL
3892void *qemu_blockalign(BlockDriverState *bs, size_t size)
3893{
3894 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3895}
7cd1e32a
LS
3896
3897void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3898{
3899 int64_t bitmap_size;
a55eb92c 3900
aaa0eb75 3901 bs->dirty_count = 0;
a55eb92c 3902 if (enable) {
c6d22830
JK
3903 if (!bs->dirty_bitmap) {
3904 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
71df14fc
PB
3905 BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG - 1;
3906 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
a55eb92c 3907
71df14fc 3908 bs->dirty_bitmap = g_new0(unsigned long, bitmap_size);
a55eb92c 3909 }
7cd1e32a 3910 } else {
c6d22830 3911 if (bs->dirty_bitmap) {
7267c094 3912 g_free(bs->dirty_bitmap);
c6d22830 3913 bs->dirty_bitmap = NULL;
a55eb92c 3914 }
7cd1e32a
LS
3915 }
3916}
3917
3918int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3919{
6ea44308 3920 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 3921
c6d22830
JK
3922 if (bs->dirty_bitmap &&
3923 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
3924 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3925 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a
LS
3926 } else {
3927 return 0;
3928 }
3929}
3930
a55eb92c
JK
3931void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3932 int nr_sectors)
7cd1e32a
LS
3933{
3934 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3935}
aaa0eb75
LS
3936
3937int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3938{
3939 return bs->dirty_count;
3940}
f88e1a42 3941
db593f25
MT
3942void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3943{
3944 assert(bs->in_use != in_use);
3945 bs->in_use = in_use;
3946}
3947
3948int bdrv_in_use(BlockDriverState *bs)
3949{
3950 return bs->in_use;
3951}
3952
28a7282a
LC
3953void bdrv_iostatus_enable(BlockDriverState *bs)
3954{
d6bf279e 3955 bs->iostatus_enabled = true;
58e21ef5 3956 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3957}
3958
3959/* The I/O status is only enabled if the drive explicitly
3960 * enables it _and_ the VM is configured to stop on errors */
3961bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3962{
d6bf279e 3963 return (bs->iostatus_enabled &&
28a7282a
LC
3964 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3965 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3966 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3967}
3968
3969void bdrv_iostatus_disable(BlockDriverState *bs)
3970{
d6bf279e 3971 bs->iostatus_enabled = false;
28a7282a
LC
3972}
3973
3974void bdrv_iostatus_reset(BlockDriverState *bs)
3975{
3976 if (bdrv_iostatus_is_enabled(bs)) {
58e21ef5 3977 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3978 }
3979}
3980
3981/* XXX: Today this is set by device models because it makes the implementation
3982 quite simple. However, the block layer knows about the error, so it's
3983 possible to implement this without device models being involved */
3984void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3985{
58e21ef5
LC
3986 if (bdrv_iostatus_is_enabled(bs) &&
3987 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
28a7282a 3988 assert(error >= 0);
58e21ef5
LC
3989 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3990 BLOCK_DEVICE_IO_STATUS_FAILED;
28a7282a
LC
3991 }
3992}
3993
a597e79c
CH
3994void
3995bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3996 enum BlockAcctType type)
3997{
3998 assert(type < BDRV_MAX_IOTYPE);
3999
4000 cookie->bytes = bytes;
c488c7f6 4001 cookie->start_time_ns = get_clock();
a597e79c
CH
4002 cookie->type = type;
4003}
4004
4005void
4006bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
4007{
4008 assert(cookie->type < BDRV_MAX_IOTYPE);
4009
4010 bs->nr_bytes[cookie->type] += cookie->bytes;
4011 bs->nr_ops[cookie->type]++;
c488c7f6 4012 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
4013}
4014
f88e1a42
JS
4015int bdrv_img_create(const char *filename, const char *fmt,
4016 const char *base_filename, const char *base_fmt,
4017 char *options, uint64_t img_size, int flags)
4018{
4019 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 4020 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
4021 BlockDriverState *bs = NULL;
4022 BlockDriver *drv, *proto_drv;
96df67d1 4023 BlockDriver *backing_drv = NULL;
f88e1a42
JS
4024 int ret = 0;
4025
4026 /* Find driver and parse its options */
4027 drv = bdrv_find_format(fmt);
4028 if (!drv) {
4029 error_report("Unknown file format '%s'", fmt);
4f70f249 4030 ret = -EINVAL;
f88e1a42
JS
4031 goto out;
4032 }
4033
4034 proto_drv = bdrv_find_protocol(filename);
4035 if (!proto_drv) {
4036 error_report("Unknown protocol '%s'", filename);
4f70f249 4037 ret = -EINVAL;
f88e1a42
JS
4038 goto out;
4039 }
4040
4041 create_options = append_option_parameters(create_options,
4042 drv->create_options);
4043 create_options = append_option_parameters(create_options,
4044 proto_drv->create_options);
4045
4046 /* Create parameter list with default values */
4047 param = parse_option_parameters("", create_options, param);
4048
4049 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
4050
4051 /* Parse -o options */
4052 if (options) {
4053 param = parse_option_parameters(options, create_options, param);
4054 if (param == NULL) {
4055 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 4056 ret = -EINVAL;
f88e1a42
JS
4057 goto out;
4058 }
4059 }
4060
4061 if (base_filename) {
4062 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
4063 base_filename)) {
4064 error_report("Backing file not supported for file format '%s'",
4065 fmt);
4f70f249 4066 ret = -EINVAL;
f88e1a42
JS
4067 goto out;
4068 }
4069 }
4070
4071 if (base_fmt) {
4072 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
4073 error_report("Backing file format not supported for file "
4074 "format '%s'", fmt);
4f70f249 4075 ret = -EINVAL;
f88e1a42
JS
4076 goto out;
4077 }
4078 }
4079
792da93a
JS
4080 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
4081 if (backing_file && backing_file->value.s) {
4082 if (!strcmp(filename, backing_file->value.s)) {
4083 error_report("Error: Trying to create an image with the "
4084 "same filename as the backing file");
4f70f249 4085 ret = -EINVAL;
792da93a
JS
4086 goto out;
4087 }
4088 }
4089
f88e1a42
JS
4090 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
4091 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
4092 backing_drv = bdrv_find_format(backing_fmt->value.s);
4093 if (!backing_drv) {
f88e1a42
JS
4094 error_report("Unknown backing file format '%s'",
4095 backing_fmt->value.s);
4f70f249 4096 ret = -EINVAL;
f88e1a42
JS
4097 goto out;
4098 }
4099 }
4100
4101 // The size for the image must always be specified, with one exception:
4102 // If we are using a backing file, we can obtain the size from there
d220894e
KW
4103 size = get_option_parameter(param, BLOCK_OPT_SIZE);
4104 if (size && size->value.n == -1) {
f88e1a42
JS
4105 if (backing_file && backing_file->value.s) {
4106 uint64_t size;
f88e1a42 4107 char buf[32];
63090dac
PB
4108 int back_flags;
4109
4110 /* backing files always opened read-only */
4111 back_flags =
4112 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
f88e1a42 4113
f88e1a42
JS
4114 bs = bdrv_new("");
4115
63090dac 4116 ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv);
f88e1a42 4117 if (ret < 0) {
96df67d1 4118 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
4119 goto out;
4120 }
4121 bdrv_get_geometry(bs, &size);
4122 size *= 512;
4123
4124 snprintf(buf, sizeof(buf), "%" PRId64, size);
4125 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
4126 } else {
4127 error_report("Image creation needs a size parameter");
4f70f249 4128 ret = -EINVAL;
f88e1a42
JS
4129 goto out;
4130 }
4131 }
4132
4133 printf("Formatting '%s', fmt=%s ", filename, fmt);
4134 print_option_parameters(param);
4135 puts("");
4136
4137 ret = bdrv_create(drv, filename, param);
4138
4139 if (ret < 0) {
4140 if (ret == -ENOTSUP) {
4141 error_report("Formatting or formatting option not supported for "
4142 "file format '%s'", fmt);
4143 } else if (ret == -EFBIG) {
4144 error_report("The image size is too large for file format '%s'",
4145 fmt);
4146 } else {
4147 error_report("%s: error while creating %s: %s", filename, fmt,
4148 strerror(-ret));
4149 }
4150 }
4151
4152out:
4153 free_option_parameters(create_options);
4154 free_option_parameters(param);
4155
4156 if (bs) {
4157 bdrv_delete(bs);
4158 }
4f70f249
JS
4159
4160 return ret;
f88e1a42 4161}
eeec61f2
SH
4162
4163void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
c83c66c3
SH
4164 int64_t speed, BlockDriverCompletionFunc *cb,
4165 void *opaque, Error **errp)
eeec61f2
SH
4166{
4167 BlockJob *job;
4168
4169 if (bs->job || bdrv_in_use(bs)) {
fd7f8c65 4170 error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
eeec61f2
SH
4171 return NULL;
4172 }
4173 bdrv_set_in_use(bs, 1);
4174
4175 job = g_malloc0(job_type->instance_size);
4176 job->job_type = job_type;
4177 job->bs = bs;
4178 job->cb = cb;
4179 job->opaque = opaque;
4180 bs->job = job;
c83c66c3
SH
4181
4182 /* Only set speed when necessary to avoid NotSupported error */
4183 if (speed != 0) {
4184 Error *local_err = NULL;
4185
4186 block_job_set_speed(job, speed, &local_err);
4187 if (error_is_set(&local_err)) {
4188 bs->job = NULL;
4189 g_free(job);
4190 bdrv_set_in_use(bs, 0);
4191 error_propagate(errp, local_err);
4192 return NULL;
4193 }
4194 }
eeec61f2
SH
4195 return job;
4196}
4197
4198void block_job_complete(BlockJob *job, int ret)
4199{
4200 BlockDriverState *bs = job->bs;
4201
4202 assert(bs->job == job);
4203 job->cb(job->opaque, ret);
4204 bs->job = NULL;
4205 g_free(job);
4206 bdrv_set_in_use(bs, 0);
4207}
4208
882ec7ce 4209void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
eeec61f2 4210{
9e6636c7 4211 Error *local_err = NULL;
9f25eccc 4212
eeec61f2 4213 if (!job->job_type->set_speed) {
9e6636c7
SH
4214 error_set(errp, QERR_NOT_SUPPORTED);
4215 return;
eeec61f2 4216 }
882ec7ce 4217 job->job_type->set_speed(job, speed, &local_err);
9e6636c7
SH
4218 if (error_is_set(&local_err)) {
4219 error_propagate(errp, local_err);
4220 return;
9f25eccc 4221 }
9e6636c7 4222
882ec7ce 4223 job->speed = speed;
eeec61f2
SH
4224}
4225
4226void block_job_cancel(BlockJob *job)
4227{
4228 job->cancelled = true;
4229}
4230
4231bool block_job_is_cancelled(BlockJob *job)
4232{
4233 return job->cancelled;
4234}
3e914655
PB
4235
4236void block_job_cancel_sync(BlockJob *job)
4237{
4238 BlockDriverState *bs = job->bs;
4239
4240 assert(bs->job == job);
4241 block_job_cancel(job);
4242 while (bs->job != NULL && bs->job->busy) {
4243 qemu_aio_wait();
4244 }
4245}