]> git.proxmox.com Git - qemu.git/blame - block.c
block: copy over job and dirty bitmap fields in bdrv_append
[qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
f795e743 30#include "qjson.h"
68485420 31#include "qemu-coroutine.h"
b2023818 32#include "qmp-commands.h"
0563e191 33#include "qemu-timer.h"
fc01f7e7 34
71e72a19 35#ifdef CONFIG_BSD
7674e7bf
FB
36#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
72cf2d4f 39#include <sys/queue.h>
c5e97233 40#ifndef __DragonFly__
7674e7bf
FB
41#include <sys/disk.h>
42#endif
c5e97233 43#endif
7674e7bf 44
49dc768d
AL
45#ifdef _WIN32
46#include <windows.h>
47#endif
48
1c9805a3
SH
49#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
470c0504
SH
51typedef enum {
52 BDRV_REQ_COPY_ON_READ = 0x1,
f08f2dda 53 BDRV_REQ_ZERO_WRITE = 0x2,
470c0504
SH
54} BdrvRequestFlags;
55
7d4b4ba5 56static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
57static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
58 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 59 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
60static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
61 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 62 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
63static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors,
65 QEMUIOVector *iov);
66static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors,
68 QEMUIOVector *iov);
c5fbe571 69static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
470c0504
SH
70 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
71 BdrvRequestFlags flags);
1c9805a3 72static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
f08f2dda
SH
73 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
74 BdrvRequestFlags flags);
b2a61371
SH
75static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
76 int64_t sector_num,
77 QEMUIOVector *qiov,
78 int nb_sectors,
79 BlockDriverCompletionFunc *cb,
80 void *opaque,
8c5873d6 81 bool is_write);
b2a61371 82static void coroutine_fn bdrv_co_do_rw(void *opaque);
621f0589
KW
83static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
84 int64_t sector_num, int nb_sectors);
ec530c81 85
98f90dba
ZYW
86static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
87 bool is_write, double elapsed_time, uint64_t *wait);
88static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
89 double elapsed_time, uint64_t *wait);
90static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
91 bool is_write, int64_t *wait);
92
1b7bdbc1
SH
93static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
94 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 95
8a22f02a
SH
96static QLIST_HEAD(, BlockDriver) bdrv_drivers =
97 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 98
f9092b10
MA
99/* The device to use for VM snapshots */
100static BlockDriverState *bs_snapshots;
101
eb852011
MA
102/* If non-zero, use only whitelisted block drivers */
103static int use_bdrv_whitelist;
104
9e0b22f4
SH
105#ifdef _WIN32
106static int is_windows_drive_prefix(const char *filename)
107{
108 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
109 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
110 filename[1] == ':');
111}
112
113int is_windows_drive(const char *filename)
114{
115 if (is_windows_drive_prefix(filename) &&
116 filename[2] == '\0')
117 return 1;
118 if (strstart(filename, "\\\\.\\", NULL) ||
119 strstart(filename, "//./", NULL))
120 return 1;
121 return 0;
122}
123#endif
124
0563e191 125/* throttling disk I/O limits */
98f90dba
ZYW
126void bdrv_io_limits_disable(BlockDriverState *bs)
127{
128 bs->io_limits_enabled = false;
129
130 while (qemu_co_queue_next(&bs->throttled_reqs));
131
132 if (bs->block_timer) {
133 qemu_del_timer(bs->block_timer);
134 qemu_free_timer(bs->block_timer);
135 bs->block_timer = NULL;
136 }
137
138 bs->slice_start = 0;
139 bs->slice_end = 0;
140 bs->slice_time = 0;
141 memset(&bs->io_base, 0, sizeof(bs->io_base));
142}
143
0563e191
ZYW
144static void bdrv_block_timer(void *opaque)
145{
146 BlockDriverState *bs = opaque;
147
148 qemu_co_queue_next(&bs->throttled_reqs);
149}
150
151void bdrv_io_limits_enable(BlockDriverState *bs)
152{
153 qemu_co_queue_init(&bs->throttled_reqs);
154 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
155 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
156 bs->slice_start = qemu_get_clock_ns(vm_clock);
157 bs->slice_end = bs->slice_start + bs->slice_time;
158 memset(&bs->io_base, 0, sizeof(bs->io_base));
159 bs->io_limits_enabled = true;
160}
161
162bool bdrv_io_limits_enabled(BlockDriverState *bs)
163{
164 BlockIOLimit *io_limits = &bs->io_limits;
165 return io_limits->bps[BLOCK_IO_LIMIT_READ]
166 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
167 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
168 || io_limits->iops[BLOCK_IO_LIMIT_READ]
169 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
170 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
171}
172
98f90dba
ZYW
173static void bdrv_io_limits_intercept(BlockDriverState *bs,
174 bool is_write, int nb_sectors)
175{
176 int64_t wait_time = -1;
177
178 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
179 qemu_co_queue_wait(&bs->throttled_reqs);
180 }
181
182 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
183 * throttled requests will not be dequeued until the current request is
184 * allowed to be serviced. So if the current request still exceeds the
185 * limits, it will be inserted to the head. All requests followed it will
186 * be still in throttled_reqs queue.
187 */
188
189 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
190 qemu_mod_timer(bs->block_timer,
191 wait_time + qemu_get_clock_ns(vm_clock));
192 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
193 }
194
195 qemu_co_queue_next(&bs->throttled_reqs);
196}
197
9e0b22f4
SH
198/* check if the path starts with "<protocol>:" */
199static int path_has_protocol(const char *path)
200{
947995c0
PB
201 const char *p;
202
9e0b22f4
SH
203#ifdef _WIN32
204 if (is_windows_drive(path) ||
205 is_windows_drive_prefix(path)) {
206 return 0;
207 }
947995c0
PB
208 p = path + strcspn(path, ":/\\");
209#else
210 p = path + strcspn(path, ":/");
9e0b22f4
SH
211#endif
212
947995c0 213 return *p == ':';
9e0b22f4
SH
214}
215
83f64091 216int path_is_absolute(const char *path)
3b0d4f61 217{
21664424
FB
218#ifdef _WIN32
219 /* specific case for names like: "\\.\d:" */
f53f4da9 220 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
21664424 221 return 1;
f53f4da9
PB
222 }
223 return (*path == '/' || *path == '\\');
3b9f94e1 224#else
f53f4da9 225 return (*path == '/');
3b9f94e1 226#endif
3b0d4f61
FB
227}
228
83f64091
FB
229/* if filename is absolute, just copy it to dest. Otherwise, build a
230 path to it by considering it is relative to base_path. URL are
231 supported. */
232void path_combine(char *dest, int dest_size,
233 const char *base_path,
234 const char *filename)
3b0d4f61 235{
83f64091
FB
236 const char *p, *p1;
237 int len;
238
239 if (dest_size <= 0)
240 return;
241 if (path_is_absolute(filename)) {
242 pstrcpy(dest, dest_size, filename);
243 } else {
244 p = strchr(base_path, ':');
245 if (p)
246 p++;
247 else
248 p = base_path;
3b9f94e1
FB
249 p1 = strrchr(base_path, '/');
250#ifdef _WIN32
251 {
252 const char *p2;
253 p2 = strrchr(base_path, '\\');
254 if (!p1 || p2 > p1)
255 p1 = p2;
256 }
257#endif
83f64091
FB
258 if (p1)
259 p1++;
260 else
261 p1 = base_path;
262 if (p1 > p)
263 p = p1;
264 len = p - base_path;
265 if (len > dest_size - 1)
266 len = dest_size - 1;
267 memcpy(dest, base_path, len);
268 dest[len] = '\0';
269 pstrcat(dest, dest_size, filename);
3b0d4f61 270 }
3b0d4f61
FB
271}
272
dc5a1371
PB
273void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
274{
275 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
276 pstrcpy(dest, sz, bs->backing_file);
277 } else {
278 path_combine(dest, sz, bs->filename, bs->backing_file);
279 }
280}
281
5efa9d5a 282void bdrv_register(BlockDriver *bdrv)
ea2384d3 283{
8c5873d6
SH
284 /* Block drivers without coroutine functions need emulation */
285 if (!bdrv->bdrv_co_readv) {
f9f05dc5
KW
286 bdrv->bdrv_co_readv = bdrv_co_readv_em;
287 bdrv->bdrv_co_writev = bdrv_co_writev_em;
288
f8c35c1d
SH
289 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
290 * the block driver lacks aio we need to emulate that too.
291 */
f9f05dc5
KW
292 if (!bdrv->bdrv_aio_readv) {
293 /* add AIO emulation layer */
294 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
295 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
f9f05dc5 296 }
83f64091 297 }
b2e12bc6 298
8a22f02a 299 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 300}
b338082b
FB
301
302/* create a new block device (by default it is empty) */
303BlockDriverState *bdrv_new(const char *device_name)
304{
1b7bdbc1 305 BlockDriverState *bs;
b338082b 306
7267c094 307 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 308 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 309 if (device_name[0] != '\0') {
1b7bdbc1 310 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 311 }
28a7282a 312 bdrv_iostatus_disable(bs);
b338082b
FB
313 return bs;
314}
315
ea2384d3
FB
316BlockDriver *bdrv_find_format(const char *format_name)
317{
318 BlockDriver *drv1;
8a22f02a
SH
319 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
320 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 321 return drv1;
8a22f02a 322 }
ea2384d3
FB
323 }
324 return NULL;
325}
326
eb852011
MA
327static int bdrv_is_whitelisted(BlockDriver *drv)
328{
329 static const char *whitelist[] = {
330 CONFIG_BDRV_WHITELIST
331 };
332 const char **p;
333
334 if (!whitelist[0])
335 return 1; /* no whitelist, anything goes */
336
337 for (p = whitelist; *p; p++) {
338 if (!strcmp(drv->format_name, *p)) {
339 return 1;
340 }
341 }
342 return 0;
343}
344
345BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
346{
347 BlockDriver *drv = bdrv_find_format(format_name);
348 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
349}
350
5b7e1542
ZYW
351typedef struct CreateCo {
352 BlockDriver *drv;
353 char *filename;
354 QEMUOptionParameter *options;
355 int ret;
356} CreateCo;
357
358static void coroutine_fn bdrv_create_co_entry(void *opaque)
359{
360 CreateCo *cco = opaque;
361 assert(cco->drv);
362
363 cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
364}
365
0e7e1989
KW
366int bdrv_create(BlockDriver *drv, const char* filename,
367 QEMUOptionParameter *options)
ea2384d3 368{
5b7e1542
ZYW
369 int ret;
370
371 Coroutine *co;
372 CreateCo cco = {
373 .drv = drv,
374 .filename = g_strdup(filename),
375 .options = options,
376 .ret = NOT_DONE,
377 };
378
379 if (!drv->bdrv_create) {
ea2384d3 380 return -ENOTSUP;
5b7e1542
ZYW
381 }
382
383 if (qemu_in_coroutine()) {
384 /* Fast-path if already in coroutine context */
385 bdrv_create_co_entry(&cco);
386 } else {
387 co = qemu_coroutine_create(bdrv_create_co_entry);
388 qemu_coroutine_enter(co, &cco);
389 while (cco.ret == NOT_DONE) {
390 qemu_aio_wait();
391 }
392 }
393
394 ret = cco.ret;
395 g_free(cco.filename);
0e7e1989 396
5b7e1542 397 return ret;
ea2384d3
FB
398}
399
84a12e66
CH
400int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
401{
402 BlockDriver *drv;
403
b50cbabc 404 drv = bdrv_find_protocol(filename);
84a12e66 405 if (drv == NULL) {
16905d71 406 return -ENOENT;
84a12e66
CH
407 }
408
409 return bdrv_create(drv, filename, options);
410}
411
eba25057
JM
412/*
413 * Create a uniquely-named empty temporary file.
414 * Return 0 upon success, otherwise a negative errno value.
415 */
416int get_tmp_filename(char *filename, int size)
d5249393 417{
eba25057 418#ifdef _WIN32
3b9f94e1 419 char temp_dir[MAX_PATH];
eba25057
JM
420 /* GetTempFileName requires that its output buffer (4th param)
421 have length MAX_PATH or greater. */
422 assert(size >= MAX_PATH);
423 return (GetTempPath(MAX_PATH, temp_dir)
424 && GetTempFileName(temp_dir, "qem", 0, filename)
425 ? 0 : -GetLastError());
d5249393 426#else
67b915a5 427 int fd;
7ccfb2eb 428 const char *tmpdir;
0badc1ee
AJ
429 tmpdir = getenv("TMPDIR");
430 if (!tmpdir)
431 tmpdir = "/tmp";
eba25057
JM
432 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
433 return -EOVERFLOW;
434 }
ea2384d3 435 fd = mkstemp(filename);
eba25057
JM
436 if (fd < 0 || close(fd)) {
437 return -errno;
438 }
439 return 0;
d5249393 440#endif
eba25057 441}
fc01f7e7 442
84a12e66
CH
443/*
444 * Detect host devices. By convention, /dev/cdrom[N] is always
445 * recognized as a host CDROM.
446 */
447static BlockDriver *find_hdev_driver(const char *filename)
448{
449 int score_max = 0, score;
450 BlockDriver *drv = NULL, *d;
451
452 QLIST_FOREACH(d, &bdrv_drivers, list) {
453 if (d->bdrv_probe_device) {
454 score = d->bdrv_probe_device(filename);
455 if (score > score_max) {
456 score_max = score;
457 drv = d;
458 }
459 }
460 }
461
462 return drv;
463}
464
b50cbabc 465BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
466{
467 BlockDriver *drv1;
468 char protocol[128];
1cec71e3 469 int len;
83f64091 470 const char *p;
19cb3738 471
66f82cee
KW
472 /* TODO Drivers without bdrv_file_open must be specified explicitly */
473
39508e7a
CH
474 /*
475 * XXX(hch): we really should not let host device detection
476 * override an explicit protocol specification, but moving this
477 * later breaks access to device names with colons in them.
478 * Thanks to the brain-dead persistent naming schemes on udev-
479 * based Linux systems those actually are quite common.
480 */
481 drv1 = find_hdev_driver(filename);
482 if (drv1) {
483 return drv1;
484 }
485
9e0b22f4 486 if (!path_has_protocol(filename)) {
39508e7a 487 return bdrv_find_format("file");
84a12e66 488 }
9e0b22f4
SH
489 p = strchr(filename, ':');
490 assert(p != NULL);
1cec71e3
AL
491 len = p - filename;
492 if (len > sizeof(protocol) - 1)
493 len = sizeof(protocol) - 1;
494 memcpy(protocol, filename, len);
495 protocol[len] = '\0';
8a22f02a 496 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 497 if (drv1->protocol_name &&
8a22f02a 498 !strcmp(drv1->protocol_name, protocol)) {
83f64091 499 return drv1;
8a22f02a 500 }
83f64091
FB
501 }
502 return NULL;
503}
504
c98ac35d 505static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
506{
507 int ret, score, score_max;
508 BlockDriver *drv1, *drv;
509 uint8_t buf[2048];
510 BlockDriverState *bs;
511
f5edb014 512 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
513 if (ret < 0) {
514 *pdrv = NULL;
515 return ret;
516 }
f8ea0b00 517
08a00559
KW
518 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
519 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 520 bdrv_delete(bs);
c98ac35d
SW
521 drv = bdrv_find_format("raw");
522 if (!drv) {
523 ret = -ENOENT;
524 }
525 *pdrv = drv;
526 return ret;
1a396859 527 }
f8ea0b00 528
83f64091
FB
529 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
530 bdrv_delete(bs);
531 if (ret < 0) {
c98ac35d
SW
532 *pdrv = NULL;
533 return ret;
83f64091
FB
534 }
535
ea2384d3 536 score_max = 0;
84a12e66 537 drv = NULL;
8a22f02a 538 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
539 if (drv1->bdrv_probe) {
540 score = drv1->bdrv_probe(buf, ret, filename);
541 if (score > score_max) {
542 score_max = score;
543 drv = drv1;
544 }
0849bf08 545 }
fc01f7e7 546 }
c98ac35d
SW
547 if (!drv) {
548 ret = -ENOENT;
549 }
550 *pdrv = drv;
551 return ret;
ea2384d3
FB
552}
553
51762288
SH
554/**
555 * Set the current 'total_sectors' value
556 */
557static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
558{
559 BlockDriver *drv = bs->drv;
560
396759ad
NB
561 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
562 if (bs->sg)
563 return 0;
564
51762288
SH
565 /* query actual device if possible, otherwise just trust the hint */
566 if (drv->bdrv_getlength) {
567 int64_t length = drv->bdrv_getlength(bs);
568 if (length < 0) {
569 return length;
570 }
571 hint = length >> BDRV_SECTOR_BITS;
572 }
573
574 bs->total_sectors = hint;
575 return 0;
576}
577
c3993cdc
SH
578/**
579 * Set open flags for a given cache mode
580 *
581 * Return 0 on success, -1 if the cache mode was invalid.
582 */
583int bdrv_parse_cache_flags(const char *mode, int *flags)
584{
585 *flags &= ~BDRV_O_CACHE_MASK;
586
587 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
588 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
589 } else if (!strcmp(mode, "directsync")) {
590 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
591 } else if (!strcmp(mode, "writeback")) {
592 *flags |= BDRV_O_CACHE_WB;
593 } else if (!strcmp(mode, "unsafe")) {
594 *flags |= BDRV_O_CACHE_WB;
595 *flags |= BDRV_O_NO_FLUSH;
596 } else if (!strcmp(mode, "writethrough")) {
597 /* this is the default */
598 } else {
599 return -1;
600 }
601
602 return 0;
603}
604
53fec9d3
SH
605/**
606 * The copy-on-read flag is actually a reference count so multiple users may
607 * use the feature without worrying about clobbering its previous state.
608 * Copy-on-read stays enabled until all users have called to disable it.
609 */
610void bdrv_enable_copy_on_read(BlockDriverState *bs)
611{
612 bs->copy_on_read++;
613}
614
615void bdrv_disable_copy_on_read(BlockDriverState *bs)
616{
617 assert(bs->copy_on_read > 0);
618 bs->copy_on_read--;
619}
620
57915332
KW
621/*
622 * Common part for opening disk images and files
623 */
624static int bdrv_open_common(BlockDriverState *bs, const char *filename,
625 int flags, BlockDriver *drv)
626{
627 int ret, open_flags;
628
629 assert(drv != NULL);
6405875c 630 assert(bs->file == NULL);
57915332 631
28dcee10
SH
632 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
633
57915332 634 bs->open_flags = flags;
57915332
KW
635 bs->buffer_alignment = 512;
636
53fec9d3
SH
637 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
638 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
639 bdrv_enable_copy_on_read(bs);
640 }
641
57915332
KW
642 pstrcpy(bs->filename, sizeof(bs->filename), filename);
643
644 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
645 return -ENOTSUP;
646 }
647
648 bs->drv = drv;
7267c094 649 bs->opaque = g_malloc0(drv->instance_size);
57915332 650
03f541bd 651 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
e1e9b0ac 652 open_flags = flags | BDRV_O_CACHE_WB;
57915332
KW
653
654 /*
655 * Clear flags that are internal to the block layer before opening the
656 * image.
657 */
e1e9b0ac 658 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
57915332
KW
659
660 /*
ebabb67a 661 * Snapshots should be writable.
57915332
KW
662 */
663 if (bs->is_temporary) {
664 open_flags |= BDRV_O_RDWR;
665 }
666
e7c63796
SH
667 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
668
66f82cee
KW
669 /* Open the image, either directly or using a protocol */
670 if (drv->bdrv_file_open) {
671 ret = drv->bdrv_file_open(bs, filename, open_flags);
672 } else {
673 ret = bdrv_file_open(&bs->file, filename, open_flags);
674 if (ret >= 0) {
675 ret = drv->bdrv_open(bs, open_flags);
676 }
677 }
678
57915332
KW
679 if (ret < 0) {
680 goto free_and_fail;
681 }
682
51762288
SH
683 ret = refresh_total_sectors(bs, bs->total_sectors);
684 if (ret < 0) {
685 goto free_and_fail;
57915332 686 }
51762288 687
57915332
KW
688#ifndef _WIN32
689 if (bs->is_temporary) {
690 unlink(filename);
691 }
692#endif
693 return 0;
694
695free_and_fail:
66f82cee
KW
696 if (bs->file) {
697 bdrv_delete(bs->file);
698 bs->file = NULL;
699 }
7267c094 700 g_free(bs->opaque);
57915332
KW
701 bs->opaque = NULL;
702 bs->drv = NULL;
703 return ret;
704}
705
b6ce07aa
KW
706/*
707 * Opens a file using a protocol (file, host_device, nbd, ...)
708 */
83f64091 709int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 710{
83f64091 711 BlockDriverState *bs;
6db95603 712 BlockDriver *drv;
83f64091
FB
713 int ret;
714
b50cbabc 715 drv = bdrv_find_protocol(filename);
6db95603
CH
716 if (!drv) {
717 return -ENOENT;
718 }
719
83f64091 720 bs = bdrv_new("");
b6ce07aa 721 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
722 if (ret < 0) {
723 bdrv_delete(bs);
724 return ret;
3b0d4f61 725 }
71d0770c 726 bs->growable = 1;
83f64091
FB
727 *pbs = bs;
728 return 0;
729}
730
b6ce07aa
KW
731/*
732 * Opens a disk image (raw, qcow2, vmdk, ...)
733 */
d6e9098e
KW
734int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
735 BlockDriver *drv)
ea2384d3 736{
b6ce07aa 737 int ret;
2b572816 738 char tmp_filename[PATH_MAX];
712e7874 739
83f64091 740 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
741 BlockDriverState *bs1;
742 int64_t total_size;
7c96d46e 743 int is_protocol = 0;
91a073a9
KW
744 BlockDriver *bdrv_qcow2;
745 QEMUOptionParameter *options;
b6ce07aa 746 char backing_filename[PATH_MAX];
3b46e624 747
ea2384d3
FB
748 /* if snapshot, we create a temporary backing file and open it
749 instead of opening 'filename' directly */
33e3963e 750
ea2384d3
FB
751 /* if there is a backing file, use it */
752 bs1 = bdrv_new("");
d6e9098e 753 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 754 if (ret < 0) {
ea2384d3 755 bdrv_delete(bs1);
51d7c00c 756 return ret;
ea2384d3 757 }
3e82990b 758 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
759
760 if (bs1->drv && bs1->drv->protocol_name)
761 is_protocol = 1;
762
ea2384d3 763 bdrv_delete(bs1);
3b46e624 764
eba25057
JM
765 ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
766 if (ret < 0) {
767 return ret;
768 }
7c96d46e
AL
769
770 /* Real path is meaningless for protocols */
771 if (is_protocol)
772 snprintf(backing_filename, sizeof(backing_filename),
773 "%s", filename);
114cdfa9
KS
774 else if (!realpath(filename, backing_filename))
775 return -errno;
7c96d46e 776
91a073a9
KW
777 bdrv_qcow2 = bdrv_find_format("qcow2");
778 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
779
3e82990b 780 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
781 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
782 if (drv) {
783 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
784 drv->format_name);
785 }
786
787 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 788 free_option_parameters(options);
51d7c00c
AL
789 if (ret < 0) {
790 return ret;
ea2384d3 791 }
91a073a9 792
ea2384d3 793 filename = tmp_filename;
91a073a9 794 drv = bdrv_qcow2;
ea2384d3
FB
795 bs->is_temporary = 1;
796 }
712e7874 797
b6ce07aa 798 /* Find the right image format driver */
6db95603 799 if (!drv) {
c98ac35d 800 ret = find_image_format(filename, &drv);
51d7c00c 801 }
6987307c 802
51d7c00c 803 if (!drv) {
51d7c00c 804 goto unlink_and_fail;
ea2384d3 805 }
b6ce07aa
KW
806
807 /* Open the image */
808 ret = bdrv_open_common(bs, filename, flags, drv);
809 if (ret < 0) {
6987307c
CH
810 goto unlink_and_fail;
811 }
812
b6ce07aa
KW
813 /* If there is a backing file, use it */
814 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
815 char backing_filename[PATH_MAX];
816 int back_flags;
817 BlockDriver *back_drv = NULL;
818
819 bs->backing_hd = bdrv_new("");
dc5a1371
PB
820 bdrv_get_full_backing_filename(bs, backing_filename,
821 sizeof(backing_filename));
df2dbb4a
SH
822
823 if (bs->backing_format[0] != '\0') {
b6ce07aa 824 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 825 }
b6ce07aa
KW
826
827 /* backing files always opened read-only */
828 back_flags =
829 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
830
831 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
832 if (ret < 0) {
833 bdrv_close(bs);
834 return ret;
835 }
836 if (bs->is_temporary) {
837 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
838 } else {
839 /* base image inherits from "parent" */
840 bs->backing_hd->keep_read_only = bs->keep_read_only;
841 }
842 }
843
844 if (!bdrv_key_required(bs)) {
7d4b4ba5 845 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
846 }
847
98f90dba
ZYW
848 /* throttling disk I/O limits */
849 if (bs->io_limits_enabled) {
850 bdrv_io_limits_enable(bs);
851 }
852
b6ce07aa
KW
853 return 0;
854
855unlink_and_fail:
856 if (bs->is_temporary) {
857 unlink(filename);
858 }
859 return ret;
860}
861
fc01f7e7
FB
862void bdrv_close(BlockDriverState *bs)
863{
80ccf93b 864 bdrv_flush(bs);
19cb3738 865 if (bs->drv) {
3e914655
PB
866 if (bs->job) {
867 block_job_cancel_sync(bs->job);
868 }
7094f12f
KW
869 bdrv_drain_all();
870
f9092b10
MA
871 if (bs == bs_snapshots) {
872 bs_snapshots = NULL;
873 }
557df6ac 874 if (bs->backing_hd) {
ea2384d3 875 bdrv_delete(bs->backing_hd);
557df6ac
SH
876 bs->backing_hd = NULL;
877 }
ea2384d3 878 bs->drv->bdrv_close(bs);
7267c094 879 g_free(bs->opaque);
ea2384d3
FB
880#ifdef _WIN32
881 if (bs->is_temporary) {
882 unlink(bs->filename);
883 }
67b915a5 884#endif
ea2384d3
FB
885 bs->opaque = NULL;
886 bs->drv = NULL;
53fec9d3 887 bs->copy_on_read = 0;
a275fa42
PB
888 bs->backing_file[0] = '\0';
889 bs->backing_format[0] = '\0';
6405875c
PB
890 bs->total_sectors = 0;
891 bs->encrypted = 0;
892 bs->valid_key = 0;
893 bs->sg = 0;
894 bs->growable = 0;
b338082b 895
66f82cee 896 if (bs->file != NULL) {
0ac9377d
PB
897 bdrv_delete(bs->file);
898 bs->file = NULL;
66f82cee
KW
899 }
900
7d4b4ba5 901 bdrv_dev_change_media_cb(bs, false);
b338082b 902 }
98f90dba
ZYW
903
904 /*throttling disk I/O limits*/
905 if (bs->io_limits_enabled) {
906 bdrv_io_limits_disable(bs);
907 }
b338082b
FB
908}
909
2bc93fed
MK
910void bdrv_close_all(void)
911{
912 BlockDriverState *bs;
913
914 QTAILQ_FOREACH(bs, &bdrv_states, list) {
915 bdrv_close(bs);
916 }
917}
918
922453bc
SH
919/*
920 * Wait for pending requests to complete across all BlockDriverStates
921 *
922 * This function does not flush data to disk, use bdrv_flush_all() for that
923 * after calling this function.
4c355d53
ZYW
924 *
925 * Note that completion of an asynchronous I/O operation can trigger any
926 * number of other I/O operations on other devices---for example a coroutine
927 * can be arbitrarily complex and a constant flow of I/O can come until the
928 * coroutine is complete. Because of this, it is not possible to have a
929 * function to drain a single device's I/O queue.
922453bc
SH
930 */
931void bdrv_drain_all(void)
932{
933 BlockDriverState *bs;
4c355d53
ZYW
934 bool busy;
935
936 do {
937 busy = qemu_aio_wait();
922453bc 938
4c355d53
ZYW
939 /* FIXME: We do not have timer support here, so this is effectively
940 * a busy wait.
941 */
942 QTAILQ_FOREACH(bs, &bdrv_states, list) {
943 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
944 qemu_co_queue_restart_all(&bs->throttled_reqs);
945 busy = true;
946 }
947 }
948 } while (busy);
922453bc
SH
949
950 /* If requests are still pending there is a bug somewhere */
951 QTAILQ_FOREACH(bs, &bdrv_states, list) {
952 assert(QLIST_EMPTY(&bs->tracked_requests));
953 assert(qemu_co_queue_empty(&bs->throttled_reqs));
954 }
955}
956
d22b2f41
RH
957/* make a BlockDriverState anonymous by removing from bdrv_state list.
958 Also, NULL terminate the device_name to prevent double remove */
959void bdrv_make_anon(BlockDriverState *bs)
960{
961 if (bs->device_name[0] != '\0') {
962 QTAILQ_REMOVE(&bdrv_states, bs, list);
963 }
964 bs->device_name[0] = '\0';
965}
966
e023b2e2
PB
967static void bdrv_rebind(BlockDriverState *bs)
968{
969 if (bs->drv && bs->drv->bdrv_rebind) {
970 bs->drv->bdrv_rebind(bs);
971 }
972}
973
8802d1fd
JC
974/*
975 * Add new bs contents at the top of an image chain while the chain is
976 * live, while keeping required fields on the top layer.
977 *
978 * This will modify the BlockDriverState fields, and swap contents
979 * between bs_new and bs_top. Both bs_new and bs_top are modified.
980 *
f6801b83
JC
981 * bs_new is required to be anonymous.
982 *
8802d1fd
JC
983 * This function does not create any image files.
984 */
985void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
986{
987 BlockDriverState tmp;
988
f6801b83
JC
989 /* bs_new must be anonymous */
990 assert(bs_new->device_name[0] == '\0');
8802d1fd
JC
991
992 tmp = *bs_new;
993
994 /* there are some fields that need to stay on the top layer: */
3a389e79 995 tmp.open_flags = bs_top->open_flags;
8802d1fd
JC
996
997 /* dev info */
998 tmp.dev_ops = bs_top->dev_ops;
999 tmp.dev_opaque = bs_top->dev_opaque;
1000 tmp.dev = bs_top->dev;
1001 tmp.buffer_alignment = bs_top->buffer_alignment;
1002 tmp.copy_on_read = bs_top->copy_on_read;
1003
c4a248a1
PB
1004 tmp.enable_write_cache = bs_top->enable_write_cache;
1005
8802d1fd
JC
1006 /* i/o timing parameters */
1007 tmp.slice_time = bs_top->slice_time;
1008 tmp.slice_start = bs_top->slice_start;
1009 tmp.slice_end = bs_top->slice_end;
1010 tmp.io_limits = bs_top->io_limits;
1011 tmp.io_base = bs_top->io_base;
1012 tmp.throttled_reqs = bs_top->throttled_reqs;
1013 tmp.block_timer = bs_top->block_timer;
1014 tmp.io_limits_enabled = bs_top->io_limits_enabled;
1015
1016 /* geometry */
1017 tmp.cyls = bs_top->cyls;
1018 tmp.heads = bs_top->heads;
1019 tmp.secs = bs_top->secs;
1020 tmp.translation = bs_top->translation;
1021
1022 /* r/w error */
1023 tmp.on_read_error = bs_top->on_read_error;
1024 tmp.on_write_error = bs_top->on_write_error;
1025
1026 /* i/o status */
1027 tmp.iostatus_enabled = bs_top->iostatus_enabled;
1028 tmp.iostatus = bs_top->iostatus;
1029
a9fc4408
PB
1030 /* dirty bitmap */
1031 tmp.dirty_count = bs_top->dirty_count;
1032 tmp.dirty_bitmap = bs_top->dirty_bitmap;
1033 assert(bs_new->dirty_bitmap == NULL);
1034
1035 /* job */
1036 tmp.in_use = bs_top->in_use;
1037 tmp.job = bs_top->job;
1038 assert(bs_new->job == NULL);
1039
8802d1fd
JC
1040 /* keep the same entry in bdrv_states */
1041 pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top->device_name);
1042 tmp.list = bs_top->list;
1043
1044 /* The contents of 'tmp' will become bs_top, as we are
1045 * swapping bs_new and bs_top contents. */
1046 tmp.backing_hd = bs_new;
1047 pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
f8d6bba1
MA
1048 pstrcpy(tmp.backing_format, sizeof(tmp.backing_format),
1049 bs_top->drv ? bs_top->drv->format_name : "");
8802d1fd
JC
1050
1051 /* swap contents of the fixed new bs and the current top */
1052 *bs_new = *bs_top;
1053 *bs_top = tmp;
1054
f6801b83
JC
1055 /* device_name[] was carried over from the old bs_top. bs_new
1056 * shouldn't be in bdrv_states, so we need to make device_name[]
1057 * reflect the anonymity of bs_new
1058 */
1059 bs_new->device_name[0] = '\0';
1060
8802d1fd
JC
1061 /* clear the copied fields in the new backing file */
1062 bdrv_detach_dev(bs_new, bs_new->dev);
1063
a9fc4408
PB
1064 bs_new->job = NULL;
1065 bs_new->in_use = 0;
1066 bs_new->dirty_bitmap = NULL;
1067 bs_new->dirty_count = 0;
1068
8802d1fd
JC
1069 qemu_co_queue_init(&bs_new->throttled_reqs);
1070 memset(&bs_new->io_base, 0, sizeof(bs_new->io_base));
1071 memset(&bs_new->io_limits, 0, sizeof(bs_new->io_limits));
1072 bdrv_iostatus_disable(bs_new);
1073
1074 /* we don't use bdrv_io_limits_disable() for this, because we don't want
1075 * to affect or delete the block_timer, as it has been moved to bs_top */
1076 bs_new->io_limits_enabled = false;
1077 bs_new->block_timer = NULL;
1078 bs_new->slice_time = 0;
1079 bs_new->slice_start = 0;
1080 bs_new->slice_end = 0;
e023b2e2
PB
1081
1082 bdrv_rebind(bs_new);
1083 bdrv_rebind(bs_top);
8802d1fd
JC
1084}
1085
b338082b
FB
1086void bdrv_delete(BlockDriverState *bs)
1087{
fa879d62 1088 assert(!bs->dev);
3e914655
PB
1089 assert(!bs->job);
1090 assert(!bs->in_use);
18846dee 1091
1b7bdbc1 1092 /* remove from list, if necessary */
d22b2f41 1093 bdrv_make_anon(bs);
34c6f050 1094
b338082b 1095 bdrv_close(bs);
66f82cee 1096
f9092b10 1097 assert(bs != bs_snapshots);
7267c094 1098 g_free(bs);
fc01f7e7
FB
1099}
1100
fa879d62
MA
1101int bdrv_attach_dev(BlockDriverState *bs, void *dev)
1102/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 1103{
fa879d62 1104 if (bs->dev) {
18846dee
MA
1105 return -EBUSY;
1106 }
fa879d62 1107 bs->dev = dev;
28a7282a 1108 bdrv_iostatus_reset(bs);
18846dee
MA
1109 return 0;
1110}
1111
fa879d62
MA
1112/* TODO qdevified devices don't use this, remove when devices are qdevified */
1113void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 1114{
fa879d62
MA
1115 if (bdrv_attach_dev(bs, dev) < 0) {
1116 abort();
1117 }
1118}
1119
1120void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1121/* TODO change to DeviceState *dev when all users are qdevified */
1122{
1123 assert(bs->dev == dev);
1124 bs->dev = NULL;
0e49de52
MA
1125 bs->dev_ops = NULL;
1126 bs->dev_opaque = NULL;
29e05f20 1127 bs->buffer_alignment = 512;
18846dee
MA
1128}
1129
fa879d62
MA
1130/* TODO change to return DeviceState * when all users are qdevified */
1131void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 1132{
fa879d62 1133 return bs->dev;
18846dee
MA
1134}
1135
0e49de52
MA
1136void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
1137 void *opaque)
1138{
1139 bs->dev_ops = ops;
1140 bs->dev_opaque = opaque;
2c6942fa
MA
1141 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
1142 bs_snapshots = NULL;
1143 }
0e49de52
MA
1144}
1145
329c0a48
LC
1146void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
1147 BlockQMPEventAction action, int is_read)
1148{
1149 QObject *data;
1150 const char *action_str;
1151
1152 switch (action) {
1153 case BDRV_ACTION_REPORT:
1154 action_str = "report";
1155 break;
1156 case BDRV_ACTION_IGNORE:
1157 action_str = "ignore";
1158 break;
1159 case BDRV_ACTION_STOP:
1160 action_str = "stop";
1161 break;
1162 default:
1163 abort();
1164 }
1165
1166 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1167 bdrv->device_name,
1168 action_str,
1169 is_read ? "read" : "write");
1170 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1171
1172 qobject_decref(data);
1173}
1174
6f382ed2
LC
1175static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
1176{
1177 QObject *data;
1178
1179 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1180 bdrv_get_device_name(bs), ejected);
1181 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
1182
1183 qobject_decref(data);
1184}
1185
7d4b4ba5 1186static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 1187{
145feb17 1188 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
6f382ed2 1189 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
7d4b4ba5 1190 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
6f382ed2
LC
1191 if (tray_was_closed) {
1192 /* tray open */
1193 bdrv_emit_qmp_eject_event(bs, true);
1194 }
1195 if (load) {
1196 /* tray close */
1197 bdrv_emit_qmp_eject_event(bs, false);
1198 }
145feb17
MA
1199 }
1200}
1201
2c6942fa
MA
1202bool bdrv_dev_has_removable_media(BlockDriverState *bs)
1203{
1204 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
1205}
1206
025ccaa7
PB
1207void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
1208{
1209 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
1210 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
1211 }
1212}
1213
e4def80b
MA
1214bool bdrv_dev_is_tray_open(BlockDriverState *bs)
1215{
1216 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
1217 return bs->dev_ops->is_tray_open(bs->dev_opaque);
1218 }
1219 return false;
1220}
1221
145feb17
MA
1222static void bdrv_dev_resize_cb(BlockDriverState *bs)
1223{
1224 if (bs->dev_ops && bs->dev_ops->resize_cb) {
1225 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
1226 }
1227}
1228
f107639a
MA
1229bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
1230{
1231 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
1232 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
1233 }
1234 return false;
1235}
1236
e97fc193
AL
1237/*
1238 * Run consistency checks on an image
1239 *
e076f338 1240 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 1241 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 1242 * check are stored in res.
e97fc193 1243 */
4534ff54 1244int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
e97fc193
AL
1245{
1246 if (bs->drv->bdrv_check == NULL) {
1247 return -ENOTSUP;
1248 }
1249
e076f338 1250 memset(res, 0, sizeof(*res));
4534ff54 1251 return bs->drv->bdrv_check(bs, res, fix);
e97fc193
AL
1252}
1253
8a426614
KW
1254#define COMMIT_BUF_SECTORS 2048
1255
33e3963e
FB
1256/* commit COW file into the raw image */
1257int bdrv_commit(BlockDriverState *bs)
1258{
19cb3738 1259 BlockDriver *drv = bs->drv;
ee181196 1260 BlockDriver *backing_drv;
8a426614
KW
1261 int64_t sector, total_sectors;
1262 int n, ro, open_flags;
4dca4b63 1263 int ret = 0, rw_ret = 0;
8a426614 1264 uint8_t *buf;
4dca4b63
NS
1265 char filename[1024];
1266 BlockDriverState *bs_rw, *bs_ro;
33e3963e 1267
19cb3738
FB
1268 if (!drv)
1269 return -ENOMEDIUM;
4dca4b63
NS
1270
1271 if (!bs->backing_hd) {
1272 return -ENOTSUP;
33e3963e
FB
1273 }
1274
4dca4b63
NS
1275 if (bs->backing_hd->keep_read_only) {
1276 return -EACCES;
1277 }
ee181196 1278
2d3735d3
SH
1279 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1280 return -EBUSY;
1281 }
1282
ee181196 1283 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
1284 ro = bs->backing_hd->read_only;
1285 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1286 open_flags = bs->backing_hd->open_flags;
1287
1288 if (ro) {
1289 /* re-open as RW */
1290 bdrv_delete(bs->backing_hd);
1291 bs->backing_hd = NULL;
1292 bs_rw = bdrv_new("");
ee181196
KW
1293 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1294 backing_drv);
4dca4b63
NS
1295 if (rw_ret < 0) {
1296 bdrv_delete(bs_rw);
1297 /* try to re-open read-only */
1298 bs_ro = bdrv_new("");
ee181196
KW
1299 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1300 backing_drv);
4dca4b63
NS
1301 if (ret < 0) {
1302 bdrv_delete(bs_ro);
1303 /* drive not functional anymore */
1304 bs->drv = NULL;
1305 return ret;
1306 }
1307 bs->backing_hd = bs_ro;
1308 return rw_ret;
1309 }
1310 bs->backing_hd = bs_rw;
ea2384d3 1311 }
33e3963e 1312
6ea44308 1313 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 1314 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
1315
1316 for (sector = 0; sector < total_sectors; sector += n) {
05c4af54 1317 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
8a426614
KW
1318
1319 if (bdrv_read(bs, sector, buf, n) != 0) {
1320 ret = -EIO;
1321 goto ro_cleanup;
1322 }
1323
1324 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1325 ret = -EIO;
1326 goto ro_cleanup;
1327 }
ea2384d3 1328 }
33e3963e 1329 }
95389c86 1330
1d44952f
CH
1331 if (drv->bdrv_make_empty) {
1332 ret = drv->bdrv_make_empty(bs);
1333 bdrv_flush(bs);
1334 }
95389c86 1335
3f5075ae
CH
1336 /*
1337 * Make sure all data we wrote to the backing device is actually
1338 * stable on disk.
1339 */
1340 if (bs->backing_hd)
1341 bdrv_flush(bs->backing_hd);
4dca4b63
NS
1342
1343ro_cleanup:
7267c094 1344 g_free(buf);
4dca4b63
NS
1345
1346 if (ro) {
1347 /* re-open as RO */
1348 bdrv_delete(bs->backing_hd);
1349 bs->backing_hd = NULL;
1350 bs_ro = bdrv_new("");
ee181196
KW
1351 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1352 backing_drv);
4dca4b63
NS
1353 if (ret < 0) {
1354 bdrv_delete(bs_ro);
1355 /* drive not functional anymore */
1356 bs->drv = NULL;
1357 return ret;
1358 }
1359 bs->backing_hd = bs_ro;
1360 bs->backing_hd->keep_read_only = 0;
1361 }
1362
1d44952f 1363 return ret;
33e3963e
FB
1364}
1365
e8877497 1366int bdrv_commit_all(void)
6ab4b5ab
MA
1367{
1368 BlockDriverState *bs;
1369
1370 QTAILQ_FOREACH(bs, &bdrv_states, list) {
e8877497
SH
1371 int ret = bdrv_commit(bs);
1372 if (ret < 0) {
1373 return ret;
1374 }
6ab4b5ab 1375 }
e8877497 1376 return 0;
6ab4b5ab
MA
1377}
1378
dbffbdcf
SH
1379struct BdrvTrackedRequest {
1380 BlockDriverState *bs;
1381 int64_t sector_num;
1382 int nb_sectors;
1383 bool is_write;
1384 QLIST_ENTRY(BdrvTrackedRequest) list;
5f8b6491 1385 Coroutine *co; /* owner, used for deadlock detection */
f4658285 1386 CoQueue wait_queue; /* coroutines blocked on this request */
dbffbdcf
SH
1387};
1388
1389/**
1390 * Remove an active request from the tracked requests list
1391 *
1392 * This function should be called when a tracked request is completing.
1393 */
1394static void tracked_request_end(BdrvTrackedRequest *req)
1395{
1396 QLIST_REMOVE(req, list);
f4658285 1397 qemu_co_queue_restart_all(&req->wait_queue);
dbffbdcf
SH
1398}
1399
1400/**
1401 * Add an active request to the tracked requests list
1402 */
1403static void tracked_request_begin(BdrvTrackedRequest *req,
1404 BlockDriverState *bs,
1405 int64_t sector_num,
1406 int nb_sectors, bool is_write)
1407{
1408 *req = (BdrvTrackedRequest){
1409 .bs = bs,
1410 .sector_num = sector_num,
1411 .nb_sectors = nb_sectors,
1412 .is_write = is_write,
5f8b6491 1413 .co = qemu_coroutine_self(),
dbffbdcf
SH
1414 };
1415
f4658285
SH
1416 qemu_co_queue_init(&req->wait_queue);
1417
dbffbdcf
SH
1418 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1419}
1420
d83947ac
SH
1421/**
1422 * Round a region to cluster boundaries
1423 */
1424static void round_to_clusters(BlockDriverState *bs,
1425 int64_t sector_num, int nb_sectors,
1426 int64_t *cluster_sector_num,
1427 int *cluster_nb_sectors)
1428{
1429 BlockDriverInfo bdi;
1430
1431 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1432 *cluster_sector_num = sector_num;
1433 *cluster_nb_sectors = nb_sectors;
1434 } else {
1435 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1436 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1437 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1438 nb_sectors, c);
1439 }
1440}
1441
f4658285
SH
1442static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1443 int64_t sector_num, int nb_sectors) {
d83947ac
SH
1444 /* aaaa bbbb */
1445 if (sector_num >= req->sector_num + req->nb_sectors) {
1446 return false;
1447 }
1448 /* bbbb aaaa */
1449 if (req->sector_num >= sector_num + nb_sectors) {
1450 return false;
1451 }
1452 return true;
f4658285
SH
1453}
1454
1455static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1456 int64_t sector_num, int nb_sectors)
1457{
1458 BdrvTrackedRequest *req;
d83947ac
SH
1459 int64_t cluster_sector_num;
1460 int cluster_nb_sectors;
f4658285
SH
1461 bool retry;
1462
d83947ac
SH
1463 /* If we touch the same cluster it counts as an overlap. This guarantees
1464 * that allocating writes will be serialized and not race with each other
1465 * for the same cluster. For example, in copy-on-read it ensures that the
1466 * CoR read and write operations are atomic and guest writes cannot
1467 * interleave between them.
1468 */
1469 round_to_clusters(bs, sector_num, nb_sectors,
1470 &cluster_sector_num, &cluster_nb_sectors);
1471
f4658285
SH
1472 do {
1473 retry = false;
1474 QLIST_FOREACH(req, &bs->tracked_requests, list) {
d83947ac
SH
1475 if (tracked_request_overlaps(req, cluster_sector_num,
1476 cluster_nb_sectors)) {
5f8b6491
SH
1477 /* Hitting this means there was a reentrant request, for
1478 * example, a block driver issuing nested requests. This must
1479 * never happen since it means deadlock.
1480 */
1481 assert(qemu_coroutine_self() != req->co);
1482
f4658285
SH
1483 qemu_co_queue_wait(&req->wait_queue);
1484 retry = true;
1485 break;
1486 }
1487 }
1488 } while (retry);
1489}
1490
756e6736
KW
1491/*
1492 * Return values:
1493 * 0 - success
1494 * -EINVAL - backing format specified, but no file
1495 * -ENOSPC - can't update the backing file because no space is left in the
1496 * image file header
1497 * -ENOTSUP - format driver doesn't support changing the backing file
1498 */
1499int bdrv_change_backing_file(BlockDriverState *bs,
1500 const char *backing_file, const char *backing_fmt)
1501{
1502 BlockDriver *drv = bs->drv;
469ef350 1503 int ret;
756e6736 1504
5f377794
PB
1505 /* Backing file format doesn't make sense without a backing file */
1506 if (backing_fmt && !backing_file) {
1507 return -EINVAL;
1508 }
1509
756e6736 1510 if (drv->bdrv_change_backing_file != NULL) {
469ef350 1511 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
756e6736 1512 } else {
469ef350 1513 ret = -ENOTSUP;
756e6736 1514 }
469ef350
PB
1515
1516 if (ret == 0) {
1517 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
1518 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
1519 }
1520 return ret;
756e6736
KW
1521}
1522
71d0770c
AL
1523static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1524 size_t size)
1525{
1526 int64_t len;
1527
1528 if (!bdrv_is_inserted(bs))
1529 return -ENOMEDIUM;
1530
1531 if (bs->growable)
1532 return 0;
1533
1534 len = bdrv_getlength(bs);
1535
fbb7b4e0
KW
1536 if (offset < 0)
1537 return -EIO;
1538
1539 if ((offset > len) || (len - offset < size))
71d0770c
AL
1540 return -EIO;
1541
1542 return 0;
1543}
1544
1545static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1546 int nb_sectors)
1547{
eb5a3165
JS
1548 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1549 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1550}
1551
1c9805a3
SH
1552typedef struct RwCo {
1553 BlockDriverState *bs;
1554 int64_t sector_num;
1555 int nb_sectors;
1556 QEMUIOVector *qiov;
1557 bool is_write;
1558 int ret;
1559} RwCo;
1560
1561static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 1562{
1c9805a3 1563 RwCo *rwco = opaque;
ea2384d3 1564
1c9805a3
SH
1565 if (!rwco->is_write) {
1566 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
470c0504 1567 rwco->nb_sectors, rwco->qiov, 0);
1c9805a3
SH
1568 } else {
1569 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
f08f2dda 1570 rwco->nb_sectors, rwco->qiov, 0);
1c9805a3
SH
1571 }
1572}
e7a8a783 1573
1c9805a3
SH
1574/*
1575 * Process a synchronous request using coroutines
1576 */
1577static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1578 int nb_sectors, bool is_write)
1579{
1580 QEMUIOVector qiov;
1581 struct iovec iov = {
1582 .iov_base = (void *)buf,
1583 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1584 };
1585 Coroutine *co;
1586 RwCo rwco = {
1587 .bs = bs,
1588 .sector_num = sector_num,
1589 .nb_sectors = nb_sectors,
1590 .qiov = &qiov,
1591 .is_write = is_write,
1592 .ret = NOT_DONE,
1593 };
e7a8a783 1594
1c9805a3 1595 qemu_iovec_init_external(&qiov, &iov, 1);
e7a8a783 1596
498e386c
ZYW
1597 /**
1598 * In sync call context, when the vcpu is blocked, this throttling timer
1599 * will not fire; so the I/O throttling function has to be disabled here
1600 * if it has been enabled.
1601 */
1602 if (bs->io_limits_enabled) {
1603 fprintf(stderr, "Disabling I/O throttling on '%s' due "
1604 "to synchronous I/O.\n", bdrv_get_device_name(bs));
1605 bdrv_io_limits_disable(bs);
1606 }
1607
1c9805a3
SH
1608 if (qemu_in_coroutine()) {
1609 /* Fast-path if already in coroutine context */
1610 bdrv_rw_co_entry(&rwco);
1611 } else {
1612 co = qemu_coroutine_create(bdrv_rw_co_entry);
1613 qemu_coroutine_enter(co, &rwco);
1614 while (rwco.ret == NOT_DONE) {
1615 qemu_aio_wait();
1616 }
1617 }
1618 return rwco.ret;
1619}
b338082b 1620
1c9805a3
SH
1621/* return < 0 if error. See bdrv_write() for the return codes */
1622int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1623 uint8_t *buf, int nb_sectors)
1624{
1625 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
fc01f7e7
FB
1626}
1627
71df14fc
PB
1628#define BITS_PER_LONG (sizeof(unsigned long) * 8)
1629
7cd1e32a 1630static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1631 int nb_sectors, int dirty)
7cd1e32a 1632{
1633 int64_t start, end;
c6d22830 1634 unsigned long val, idx, bit;
a55eb92c 1635
6ea44308 1636 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1637 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1638
1639 for (; start <= end; start++) {
71df14fc
PB
1640 idx = start / BITS_PER_LONG;
1641 bit = start % BITS_PER_LONG;
c6d22830
JK
1642 val = bs->dirty_bitmap[idx];
1643 if (dirty) {
6d59fec1 1644 if (!(val & (1UL << bit))) {
aaa0eb75 1645 bs->dirty_count++;
6d59fec1 1646 val |= 1UL << bit;
aaa0eb75 1647 }
c6d22830 1648 } else {
6d59fec1 1649 if (val & (1UL << bit)) {
aaa0eb75 1650 bs->dirty_count--;
6d59fec1 1651 val &= ~(1UL << bit);
aaa0eb75 1652 }
c6d22830
JK
1653 }
1654 bs->dirty_bitmap[idx] = val;
7cd1e32a 1655 }
1656}
1657
5fafdf24 1658/* Return < 0 if error. Important errors are:
19cb3738
FB
1659 -EIO generic I/O error (may happen for all errors)
1660 -ENOMEDIUM No media inserted.
1661 -EINVAL Invalid sector number or nb_sectors
1662 -EACCES Trying to write a read-only device
1663*/
5fafdf24 1664int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1665 const uint8_t *buf, int nb_sectors)
1666{
1c9805a3 1667 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
83f64091
FB
1668}
1669
eda578e5
AL
1670int bdrv_pread(BlockDriverState *bs, int64_t offset,
1671 void *buf, int count1)
83f64091 1672{
6ea44308 1673 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1674 int len, nb_sectors, count;
1675 int64_t sector_num;
9a8c4cce 1676 int ret;
83f64091
FB
1677
1678 count = count1;
1679 /* first read to align to sector start */
6ea44308 1680 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1681 if (len > count)
1682 len = count;
6ea44308 1683 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1684 if (len > 0) {
9a8c4cce
KW
1685 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1686 return ret;
6ea44308 1687 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1688 count -= len;
1689 if (count == 0)
1690 return count1;
1691 sector_num++;
1692 buf += len;
1693 }
1694
1695 /* read the sectors "in place" */
6ea44308 1696 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1697 if (nb_sectors > 0) {
9a8c4cce
KW
1698 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1699 return ret;
83f64091 1700 sector_num += nb_sectors;
6ea44308 1701 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1702 buf += len;
1703 count -= len;
1704 }
1705
1706 /* add data from the last sector */
1707 if (count > 0) {
9a8c4cce
KW
1708 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1709 return ret;
83f64091
FB
1710 memcpy(buf, tmp_buf, count);
1711 }
1712 return count1;
1713}
1714
eda578e5
AL
1715int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1716 const void *buf, int count1)
83f64091 1717{
6ea44308 1718 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1719 int len, nb_sectors, count;
1720 int64_t sector_num;
9a8c4cce 1721 int ret;
83f64091
FB
1722
1723 count = count1;
1724 /* first write to align to sector start */
6ea44308 1725 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1726 if (len > count)
1727 len = count;
6ea44308 1728 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1729 if (len > 0) {
9a8c4cce
KW
1730 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1731 return ret;
6ea44308 1732 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1733 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1734 return ret;
83f64091
FB
1735 count -= len;
1736 if (count == 0)
1737 return count1;
1738 sector_num++;
1739 buf += len;
1740 }
1741
1742 /* write the sectors "in place" */
6ea44308 1743 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1744 if (nb_sectors > 0) {
9a8c4cce
KW
1745 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1746 return ret;
83f64091 1747 sector_num += nb_sectors;
6ea44308 1748 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1749 buf += len;
1750 count -= len;
1751 }
1752
1753 /* add data from the last sector */
1754 if (count > 0) {
9a8c4cce
KW
1755 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1756 return ret;
83f64091 1757 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1758 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1759 return ret;
83f64091
FB
1760 }
1761 return count1;
1762}
83f64091 1763
f08145fe
KW
1764/*
1765 * Writes to the file and ensures that no writes are reordered across this
1766 * request (acts as a barrier)
1767 *
1768 * Returns 0 on success, -errno in error cases.
1769 */
1770int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1771 const void *buf, int count)
1772{
1773 int ret;
1774
1775 ret = bdrv_pwrite(bs, offset, buf, count);
1776 if (ret < 0) {
1777 return ret;
1778 }
1779
f05fa4ad
PB
1780 /* No flush needed for cache modes that already do it */
1781 if (bs->enable_write_cache) {
f08145fe
KW
1782 bdrv_flush(bs);
1783 }
1784
1785 return 0;
1786}
1787
470c0504 1788static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
ab185921
SH
1789 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1790{
1791 /* Perform I/O through a temporary buffer so that users who scribble over
1792 * their read buffer while the operation is in progress do not end up
1793 * modifying the image file. This is critical for zero-copy guest I/O
1794 * where anything might happen inside guest memory.
1795 */
1796 void *bounce_buffer;
1797
79c053bd 1798 BlockDriver *drv = bs->drv;
ab185921
SH
1799 struct iovec iov;
1800 QEMUIOVector bounce_qiov;
1801 int64_t cluster_sector_num;
1802 int cluster_nb_sectors;
1803 size_t skip_bytes;
1804 int ret;
1805
1806 /* Cover entire cluster so no additional backing file I/O is required when
1807 * allocating cluster in the image file.
1808 */
1809 round_to_clusters(bs, sector_num, nb_sectors,
1810 &cluster_sector_num, &cluster_nb_sectors);
1811
470c0504
SH
1812 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
1813 cluster_sector_num, cluster_nb_sectors);
ab185921
SH
1814
1815 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1816 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1817 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1818
79c053bd
SH
1819 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1820 &bounce_qiov);
ab185921
SH
1821 if (ret < 0) {
1822 goto err;
1823 }
1824
79c053bd
SH
1825 if (drv->bdrv_co_write_zeroes &&
1826 buffer_is_zero(bounce_buffer, iov.iov_len)) {
621f0589
KW
1827 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
1828 cluster_nb_sectors);
79c053bd 1829 } else {
f05fa4ad
PB
1830 /* This does not change the data on the disk, it is not necessary
1831 * to flush even in cache=writethrough mode.
1832 */
79c053bd 1833 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
ab185921 1834 &bounce_qiov);
79c053bd
SH
1835 }
1836
ab185921
SH
1837 if (ret < 0) {
1838 /* It might be okay to ignore write errors for guest requests. If this
1839 * is a deliberate copy-on-read then we don't want to ignore the error.
1840 * Simply report it in all cases.
1841 */
1842 goto err;
1843 }
1844
1845 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1846 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1847 nb_sectors * BDRV_SECTOR_SIZE);
1848
1849err:
1850 qemu_vfree(bounce_buffer);
1851 return ret;
1852}
1853
c5fbe571
SH
1854/*
1855 * Handle a read request in coroutine context
1856 */
1857static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
470c0504
SH
1858 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1859 BdrvRequestFlags flags)
da1fa91d
KW
1860{
1861 BlockDriver *drv = bs->drv;
dbffbdcf
SH
1862 BdrvTrackedRequest req;
1863 int ret;
da1fa91d 1864
da1fa91d
KW
1865 if (!drv) {
1866 return -ENOMEDIUM;
1867 }
1868 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1869 return -EIO;
1870 }
1871
98f90dba
ZYW
1872 /* throttling disk read I/O */
1873 if (bs->io_limits_enabled) {
1874 bdrv_io_limits_intercept(bs, false, nb_sectors);
1875 }
1876
f4658285 1877 if (bs->copy_on_read) {
470c0504
SH
1878 flags |= BDRV_REQ_COPY_ON_READ;
1879 }
1880 if (flags & BDRV_REQ_COPY_ON_READ) {
1881 bs->copy_on_read_in_flight++;
1882 }
1883
1884 if (bs->copy_on_read_in_flight) {
f4658285
SH
1885 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1886 }
1887
dbffbdcf 1888 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
ab185921 1889
470c0504 1890 if (flags & BDRV_REQ_COPY_ON_READ) {
ab185921
SH
1891 int pnum;
1892
1893 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1894 if (ret < 0) {
1895 goto out;
1896 }
1897
1898 if (!ret || pnum != nb_sectors) {
470c0504 1899 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
ab185921
SH
1900 goto out;
1901 }
1902 }
1903
dbffbdcf 1904 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
ab185921
SH
1905
1906out:
dbffbdcf 1907 tracked_request_end(&req);
470c0504
SH
1908
1909 if (flags & BDRV_REQ_COPY_ON_READ) {
1910 bs->copy_on_read_in_flight--;
1911 }
1912
dbffbdcf 1913 return ret;
da1fa91d
KW
1914}
1915
c5fbe571 1916int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
1917 int nb_sectors, QEMUIOVector *qiov)
1918{
c5fbe571 1919 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 1920
470c0504
SH
1921 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
1922}
1923
1924int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1925 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1926{
1927 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
1928
1929 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
1930 BDRV_REQ_COPY_ON_READ);
c5fbe571
SH
1931}
1932
f08f2dda
SH
1933static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
1934 int64_t sector_num, int nb_sectors)
1935{
1936 BlockDriver *drv = bs->drv;
1937 QEMUIOVector qiov;
1938 struct iovec iov;
1939 int ret;
1940
621f0589
KW
1941 /* TODO Emulate only part of misaligned requests instead of letting block
1942 * drivers return -ENOTSUP and emulate everything */
1943
f08f2dda
SH
1944 /* First try the efficient write zeroes operation */
1945 if (drv->bdrv_co_write_zeroes) {
621f0589
KW
1946 ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1947 if (ret != -ENOTSUP) {
1948 return ret;
1949 }
f08f2dda
SH
1950 }
1951
1952 /* Fall back to bounce buffer if write zeroes is unsupported */
1953 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
1954 iov.iov_base = qemu_blockalign(bs, iov.iov_len);
1955 memset(iov.iov_base, 0, iov.iov_len);
1956 qemu_iovec_init_external(&qiov, &iov, 1);
1957
1958 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1959
1960 qemu_vfree(iov.iov_base);
1961 return ret;
1962}
1963
c5fbe571
SH
1964/*
1965 * Handle a write request in coroutine context
1966 */
1967static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
f08f2dda
SH
1968 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1969 BdrvRequestFlags flags)
c5fbe571
SH
1970{
1971 BlockDriver *drv = bs->drv;
dbffbdcf 1972 BdrvTrackedRequest req;
6b7cb247 1973 int ret;
da1fa91d
KW
1974
1975 if (!bs->drv) {
1976 return -ENOMEDIUM;
1977 }
1978 if (bs->read_only) {
1979 return -EACCES;
1980 }
1981 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1982 return -EIO;
1983 }
1984
98f90dba
ZYW
1985 /* throttling disk write I/O */
1986 if (bs->io_limits_enabled) {
1987 bdrv_io_limits_intercept(bs, true, nb_sectors);
1988 }
1989
470c0504 1990 if (bs->copy_on_read_in_flight) {
f4658285
SH
1991 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1992 }
1993
dbffbdcf
SH
1994 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1995
f08f2dda
SH
1996 if (flags & BDRV_REQ_ZERO_WRITE) {
1997 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
1998 } else {
1999 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
2000 }
6b7cb247 2001
f05fa4ad
PB
2002 if (ret == 0 && !bs->enable_write_cache) {
2003 ret = bdrv_co_flush(bs);
2004 }
2005
da1fa91d
KW
2006 if (bs->dirty_bitmap) {
2007 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2008 }
2009
2010 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2011 bs->wr_highest_sector = sector_num + nb_sectors - 1;
2012 }
2013
dbffbdcf
SH
2014 tracked_request_end(&req);
2015
6b7cb247 2016 return ret;
da1fa91d
KW
2017}
2018
c5fbe571
SH
2019int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
2020 int nb_sectors, QEMUIOVector *qiov)
2021{
2022 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
2023
f08f2dda
SH
2024 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
2025}
2026
2027int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
2028 int64_t sector_num, int nb_sectors)
2029{
2030 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
2031
2032 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
2033 BDRV_REQ_ZERO_WRITE);
c5fbe571
SH
2034}
2035
83f64091
FB
2036/**
2037 * Truncate file to 'offset' bytes (needed only for file protocols)
2038 */
2039int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2040{
2041 BlockDriver *drv = bs->drv;
51762288 2042 int ret;
83f64091 2043 if (!drv)
19cb3738 2044 return -ENOMEDIUM;
83f64091
FB
2045 if (!drv->bdrv_truncate)
2046 return -ENOTSUP;
59f2689d
NS
2047 if (bs->read_only)
2048 return -EACCES;
8591675f
MT
2049 if (bdrv_in_use(bs))
2050 return -EBUSY;
51762288
SH
2051 ret = drv->bdrv_truncate(bs, offset);
2052 if (ret == 0) {
2053 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 2054 bdrv_dev_resize_cb(bs);
51762288
SH
2055 }
2056 return ret;
83f64091
FB
2057}
2058
4a1d5e1f
FZ
2059/**
2060 * Length of a allocated file in bytes. Sparse files are counted by actual
2061 * allocated space. Return < 0 if error or unknown.
2062 */
2063int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2064{
2065 BlockDriver *drv = bs->drv;
2066 if (!drv) {
2067 return -ENOMEDIUM;
2068 }
2069 if (drv->bdrv_get_allocated_file_size) {
2070 return drv->bdrv_get_allocated_file_size(bs);
2071 }
2072 if (bs->file) {
2073 return bdrv_get_allocated_file_size(bs->file);
2074 }
2075 return -ENOTSUP;
2076}
2077
83f64091
FB
2078/**
2079 * Length of a file in bytes. Return < 0 if error or unknown.
2080 */
2081int64_t bdrv_getlength(BlockDriverState *bs)
2082{
2083 BlockDriver *drv = bs->drv;
2084 if (!drv)
19cb3738 2085 return -ENOMEDIUM;
51762288 2086
2c6942fa 2087 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
2088 if (drv->bdrv_getlength) {
2089 return drv->bdrv_getlength(bs);
2090 }
83f64091 2091 }
46a4e4e6 2092 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
2093}
2094
19cb3738 2095/* return 0 as number of sectors if no device present or error */
96b8f136 2096void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 2097{
19cb3738
FB
2098 int64_t length;
2099 length = bdrv_getlength(bs);
2100 if (length < 0)
2101 length = 0;
2102 else
6ea44308 2103 length = length >> BDRV_SECTOR_BITS;
19cb3738 2104 *nb_sectors_ptr = length;
fc01f7e7 2105}
cf98951b 2106
f3d54fc4
AL
2107struct partition {
2108 uint8_t boot_ind; /* 0x80 - active */
2109 uint8_t head; /* starting head */
2110 uint8_t sector; /* starting sector */
2111 uint8_t cyl; /* starting cylinder */
2112 uint8_t sys_ind; /* What partition type */
2113 uint8_t end_head; /* end head */
2114 uint8_t end_sector; /* end sector */
2115 uint8_t end_cyl; /* end cylinder */
2116 uint32_t start_sect; /* starting sector counting from 0 */
2117 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 2118} QEMU_PACKED;
f3d54fc4
AL
2119
2120/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
2121static int guess_disk_lchs(BlockDriverState *bs,
2122 int *pcylinders, int *pheads, int *psectors)
2123{
eb5a3165 2124 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
2125 int ret, i, heads, sectors, cylinders;
2126 struct partition *p;
2127 uint32_t nr_sects;
a38131b6 2128 uint64_t nb_sectors;
498e386c 2129 bool enabled;
f3d54fc4
AL
2130
2131 bdrv_get_geometry(bs, &nb_sectors);
2132
498e386c
ZYW
2133 /**
2134 * The function will be invoked during startup not only in sync I/O mode,
2135 * but also in async I/O mode. So the I/O throttling function has to
2136 * be disabled temporarily here, not permanently.
2137 */
2138 enabled = bs->io_limits_enabled;
2139 bs->io_limits_enabled = false;
f3d54fc4 2140 ret = bdrv_read(bs, 0, buf, 1);
498e386c 2141 bs->io_limits_enabled = enabled;
f3d54fc4
AL
2142 if (ret < 0)
2143 return -1;
2144 /* test msdos magic */
2145 if (buf[510] != 0x55 || buf[511] != 0xaa)
2146 return -1;
2147 for(i = 0; i < 4; i++) {
2148 p = ((struct partition *)(buf + 0x1be)) + i;
2149 nr_sects = le32_to_cpu(p->nr_sects);
2150 if (nr_sects && p->end_head) {
2151 /* We make the assumption that the partition terminates on
2152 a cylinder boundary */
2153 heads = p->end_head + 1;
2154 sectors = p->end_sector & 63;
2155 if (sectors == 0)
2156 continue;
2157 cylinders = nb_sectors / (heads * sectors);
2158 if (cylinders < 1 || cylinders > 16383)
2159 continue;
2160 *pheads = heads;
2161 *psectors = sectors;
2162 *pcylinders = cylinders;
2163#if 0
2164 printf("guessed geometry: LCHS=%d %d %d\n",
2165 cylinders, heads, sectors);
2166#endif
2167 return 0;
2168 }
2169 }
2170 return -1;
2171}
2172
2173void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
2174{
2175 int translation, lba_detected = 0;
2176 int cylinders, heads, secs;
a38131b6 2177 uint64_t nb_sectors;
f3d54fc4
AL
2178
2179 /* if a geometry hint is available, use it */
2180 bdrv_get_geometry(bs, &nb_sectors);
2181 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
2182 translation = bdrv_get_translation_hint(bs);
2183 if (cylinders != 0) {
2184 *pcyls = cylinders;
2185 *pheads = heads;
2186 *psecs = secs;
2187 } else {
2188 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
2189 if (heads > 16) {
2190 /* if heads > 16, it means that a BIOS LBA
2191 translation was active, so the default
2192 hardware geometry is OK */
2193 lba_detected = 1;
2194 goto default_geometry;
2195 } else {
2196 *pcyls = cylinders;
2197 *pheads = heads;
2198 *psecs = secs;
2199 /* disable any translation to be in sync with
2200 the logical geometry */
2201 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
2202 bdrv_set_translation_hint(bs,
2203 BIOS_ATA_TRANSLATION_NONE);
2204 }
2205 }
2206 } else {
2207 default_geometry:
2208 /* if no geometry, use a standard physical disk geometry */
2209 cylinders = nb_sectors / (16 * 63);
2210
2211 if (cylinders > 16383)
2212 cylinders = 16383;
2213 else if (cylinders < 2)
2214 cylinders = 2;
2215 *pcyls = cylinders;
2216 *pheads = 16;
2217 *psecs = 63;
2218 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
2219 if ((*pcyls * *pheads) <= 131072) {
2220 bdrv_set_translation_hint(bs,
2221 BIOS_ATA_TRANSLATION_LARGE);
2222 } else {
2223 bdrv_set_translation_hint(bs,
2224 BIOS_ATA_TRANSLATION_LBA);
2225 }
2226 }
2227 }
2228 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
2229 }
2230}
2231
5fafdf24 2232void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
2233 int cyls, int heads, int secs)
2234{
2235 bs->cyls = cyls;
2236 bs->heads = heads;
2237 bs->secs = secs;
2238}
2239
46d4767d
FB
2240void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
2241{
2242 bs->translation = translation;
2243}
2244
5fafdf24 2245void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
2246 int *pcyls, int *pheads, int *psecs)
2247{
2248 *pcyls = bs->cyls;
2249 *pheads = bs->heads;
2250 *psecs = bs->secs;
2251}
2252
0563e191
ZYW
2253/* throttling disk io limits */
2254void bdrv_set_io_limits(BlockDriverState *bs,
2255 BlockIOLimit *io_limits)
2256{
2257 bs->io_limits = *io_limits;
2258 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
2259}
2260
5bbdbb46
BS
2261/* Recognize floppy formats */
2262typedef struct FDFormat {
2263 FDriveType drive;
2264 uint8_t last_sect;
2265 uint8_t max_track;
2266 uint8_t max_head;
f8d3d128 2267 FDriveRate rate;
5bbdbb46
BS
2268} FDFormat;
2269
2270static const FDFormat fd_formats[] = {
2271 /* First entry is default format */
2272 /* 1.44 MB 3"1/2 floppy disks */
f8d3d128
HP
2273 { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
2274 { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
2275 { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
2276 { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
2277 { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
2278 { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
2279 { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
2280 { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
5bbdbb46 2281 /* 2.88 MB 3"1/2 floppy disks */
f8d3d128
HP
2282 { FDRIVE_DRV_288, 36, 80, 1, FDRIVE_RATE_1M, },
2283 { FDRIVE_DRV_288, 39, 80, 1, FDRIVE_RATE_1M, },
2284 { FDRIVE_DRV_288, 40, 80, 1, FDRIVE_RATE_1M, },
2285 { FDRIVE_DRV_288, 44, 80, 1, FDRIVE_RATE_1M, },
2286 { FDRIVE_DRV_288, 48, 80, 1, FDRIVE_RATE_1M, },
5bbdbb46 2287 /* 720 kB 3"1/2 floppy disks */
f8d3d128
HP
2288 { FDRIVE_DRV_144, 9, 80, 1, FDRIVE_RATE_250K, },
2289 { FDRIVE_DRV_144, 10, 80, 1, FDRIVE_RATE_250K, },
2290 { FDRIVE_DRV_144, 10, 82, 1, FDRIVE_RATE_250K, },
2291 { FDRIVE_DRV_144, 10, 83, 1, FDRIVE_RATE_250K, },
2292 { FDRIVE_DRV_144, 13, 80, 1, FDRIVE_RATE_250K, },
2293 { FDRIVE_DRV_144, 14, 80, 1, FDRIVE_RATE_250K, },
5bbdbb46 2294 /* 1.2 MB 5"1/4 floppy disks */
f8d3d128
HP
2295 { FDRIVE_DRV_120, 15, 80, 1, FDRIVE_RATE_500K, },
2296 { FDRIVE_DRV_120, 18, 80, 1, FDRIVE_RATE_500K, },
2297 { FDRIVE_DRV_120, 18, 82, 1, FDRIVE_RATE_500K, },
2298 { FDRIVE_DRV_120, 18, 83, 1, FDRIVE_RATE_500K, },
2299 { FDRIVE_DRV_120, 20, 80, 1, FDRIVE_RATE_500K, },
5bbdbb46 2300 /* 720 kB 5"1/4 floppy disks */
f8d3d128
HP
2301 { FDRIVE_DRV_120, 9, 80, 1, FDRIVE_RATE_250K, },
2302 { FDRIVE_DRV_120, 11, 80, 1, FDRIVE_RATE_250K, },
5bbdbb46 2303 /* 360 kB 5"1/4 floppy disks */
f8d3d128
HP
2304 { FDRIVE_DRV_120, 9, 40, 1, FDRIVE_RATE_300K, },
2305 { FDRIVE_DRV_120, 9, 40, 0, FDRIVE_RATE_300K, },
2306 { FDRIVE_DRV_120, 10, 41, 1, FDRIVE_RATE_300K, },
2307 { FDRIVE_DRV_120, 10, 42, 1, FDRIVE_RATE_300K, },
5bbdbb46 2308 /* 320 kB 5"1/4 floppy disks */
f8d3d128
HP
2309 { FDRIVE_DRV_120, 8, 40, 1, FDRIVE_RATE_250K, },
2310 { FDRIVE_DRV_120, 8, 40, 0, FDRIVE_RATE_250K, },
5bbdbb46 2311 /* 360 kB must match 5"1/4 better than 3"1/2... */
f8d3d128 2312 { FDRIVE_DRV_144, 9, 80, 0, FDRIVE_RATE_250K, },
5bbdbb46 2313 /* end */
f8d3d128 2314 { FDRIVE_DRV_NONE, -1, -1, 0, 0, },
5bbdbb46
BS
2315};
2316
2317void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
2318 int *max_track, int *last_sect,
f8d3d128
HP
2319 FDriveType drive_in, FDriveType *drive,
2320 FDriveRate *rate)
5bbdbb46
BS
2321{
2322 const FDFormat *parse;
2323 uint64_t nb_sectors, size;
2324 int i, first_match, match;
2325
2326 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
2327 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
2328 /* User defined disk */
f8d3d128 2329 *rate = FDRIVE_RATE_500K;
5bbdbb46
BS
2330 } else {
2331 bdrv_get_geometry(bs, &nb_sectors);
2332 match = -1;
2333 first_match = -1;
2334 for (i = 0; ; i++) {
2335 parse = &fd_formats[i];
2336 if (parse->drive == FDRIVE_DRV_NONE) {
2337 break;
2338 }
2339 if (drive_in == parse->drive ||
2340 drive_in == FDRIVE_DRV_NONE) {
2341 size = (parse->max_head + 1) * parse->max_track *
2342 parse->last_sect;
2343 if (nb_sectors == size) {
2344 match = i;
2345 break;
2346 }
2347 if (first_match == -1) {
2348 first_match = i;
2349 }
2350 }
2351 }
2352 if (match == -1) {
2353 if (first_match == -1) {
2354 match = 1;
2355 } else {
2356 match = first_match;
2357 }
2358 parse = &fd_formats[match];
2359 }
2360 *nb_heads = parse->max_head + 1;
2361 *max_track = parse->max_track;
2362 *last_sect = parse->last_sect;
2363 *drive = parse->drive;
f8d3d128 2364 *rate = parse->rate;
5bbdbb46
BS
2365 }
2366}
2367
46d4767d
FB
2368int bdrv_get_translation_hint(BlockDriverState *bs)
2369{
2370 return bs->translation;
2371}
2372
abd7f68d
MA
2373void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
2374 BlockErrorAction on_write_error)
2375{
2376 bs->on_read_error = on_read_error;
2377 bs->on_write_error = on_write_error;
2378}
2379
2380BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
2381{
2382 return is_read ? bs->on_read_error : bs->on_write_error;
2383}
2384
b338082b
FB
2385int bdrv_is_read_only(BlockDriverState *bs)
2386{
2387 return bs->read_only;
2388}
2389
985a03b0
TS
2390int bdrv_is_sg(BlockDriverState *bs)
2391{
2392 return bs->sg;
2393}
2394
e900a7b7
CH
2395int bdrv_enable_write_cache(BlockDriverState *bs)
2396{
2397 return bs->enable_write_cache;
2398}
2399
425b0148
PB
2400void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2401{
2402 bs->enable_write_cache = wce;
2403}
2404
ea2384d3
FB
2405int bdrv_is_encrypted(BlockDriverState *bs)
2406{
2407 if (bs->backing_hd && bs->backing_hd->encrypted)
2408 return 1;
2409 return bs->encrypted;
2410}
2411
c0f4ce77
AL
2412int bdrv_key_required(BlockDriverState *bs)
2413{
2414 BlockDriverState *backing_hd = bs->backing_hd;
2415
2416 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2417 return 1;
2418 return (bs->encrypted && !bs->valid_key);
2419}
2420
ea2384d3
FB
2421int bdrv_set_key(BlockDriverState *bs, const char *key)
2422{
2423 int ret;
2424 if (bs->backing_hd && bs->backing_hd->encrypted) {
2425 ret = bdrv_set_key(bs->backing_hd, key);
2426 if (ret < 0)
2427 return ret;
2428 if (!bs->encrypted)
2429 return 0;
2430 }
fd04a2ae
SH
2431 if (!bs->encrypted) {
2432 return -EINVAL;
2433 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2434 return -ENOMEDIUM;
2435 }
c0f4ce77 2436 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
2437 if (ret < 0) {
2438 bs->valid_key = 0;
2439 } else if (!bs->valid_key) {
2440 bs->valid_key = 1;
2441 /* call the change callback now, we skipped it on open */
7d4b4ba5 2442 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 2443 }
c0f4ce77 2444 return ret;
ea2384d3
FB
2445}
2446
f8d6bba1 2447const char *bdrv_get_format_name(BlockDriverState *bs)
ea2384d3 2448{
f8d6bba1 2449 return bs->drv ? bs->drv->format_name : NULL;
ea2384d3
FB
2450}
2451
5fafdf24 2452void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
2453 void *opaque)
2454{
2455 BlockDriver *drv;
2456
8a22f02a 2457 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
2458 it(opaque, drv->format_name);
2459 }
2460}
2461
b338082b
FB
2462BlockDriverState *bdrv_find(const char *name)
2463{
2464 BlockDriverState *bs;
2465
1b7bdbc1
SH
2466 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2467 if (!strcmp(name, bs->device_name)) {
b338082b 2468 return bs;
1b7bdbc1 2469 }
b338082b
FB
2470 }
2471 return NULL;
2472}
2473
2f399b0a
MA
2474BlockDriverState *bdrv_next(BlockDriverState *bs)
2475{
2476 if (!bs) {
2477 return QTAILQ_FIRST(&bdrv_states);
2478 }
2479 return QTAILQ_NEXT(bs, list);
2480}
2481
51de9760 2482void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
2483{
2484 BlockDriverState *bs;
2485
1b7bdbc1 2486 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 2487 it(opaque, bs);
81d0912d
FB
2488 }
2489}
2490
ea2384d3
FB
2491const char *bdrv_get_device_name(BlockDriverState *bs)
2492{
2493 return bs->device_name;
2494}
2495
c8433287
MA
2496int bdrv_get_flags(BlockDriverState *bs)
2497{
2498 return bs->open_flags;
2499}
2500
c6ca28d6
AL
2501void bdrv_flush_all(void)
2502{
2503 BlockDriverState *bs;
2504
1b7bdbc1 2505 QTAILQ_FOREACH(bs, &bdrv_states, list) {
29cdb251 2506 bdrv_flush(bs);
1b7bdbc1 2507 }
c6ca28d6
AL
2508}
2509
f2feebbd
KW
2510int bdrv_has_zero_init(BlockDriverState *bs)
2511{
2512 assert(bs->drv);
2513
336c1c12
KW
2514 if (bs->drv->bdrv_has_zero_init) {
2515 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
2516 }
2517
2518 return 1;
2519}
2520
376ae3f1
SH
2521typedef struct BdrvCoIsAllocatedData {
2522 BlockDriverState *bs;
2523 int64_t sector_num;
2524 int nb_sectors;
2525 int *pnum;
2526 int ret;
2527 bool done;
2528} BdrvCoIsAllocatedData;
2529
f58c7b35
TS
2530/*
2531 * Returns true iff the specified sector is present in the disk image. Drivers
2532 * not implementing the functionality are assumed to not support backing files,
2533 * hence all their sectors are reported as allocated.
2534 *
bd9533e3
SH
2535 * If 'sector_num' is beyond the end of the disk image the return value is 0
2536 * and 'pnum' is set to 0.
2537 *
f58c7b35
TS
2538 * 'pnum' is set to the number of sectors (including and immediately following
2539 * the specified sector) that are known to be in the same
2540 * allocated/unallocated state.
2541 *
bd9533e3
SH
2542 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2543 * beyond the end of the disk image it will be clamped.
f58c7b35 2544 */
060f51c9
SH
2545int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2546 int nb_sectors, int *pnum)
f58c7b35 2547{
bd9533e3
SH
2548 int64_t n;
2549
2550 if (sector_num >= bs->total_sectors) {
2551 *pnum = 0;
2552 return 0;
2553 }
2554
2555 n = bs->total_sectors - sector_num;
2556 if (n < nb_sectors) {
2557 nb_sectors = n;
2558 }
2559
6aebab14 2560 if (!bs->drv->bdrv_co_is_allocated) {
bd9533e3 2561 *pnum = nb_sectors;
f58c7b35
TS
2562 return 1;
2563 }
6aebab14 2564
060f51c9
SH
2565 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2566}
2567
2568/* Coroutine wrapper for bdrv_is_allocated() */
2569static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2570{
2571 BdrvCoIsAllocatedData *data = opaque;
2572 BlockDriverState *bs = data->bs;
2573
2574 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2575 data->pnum);
2576 data->done = true;
2577}
2578
2579/*
2580 * Synchronous wrapper around bdrv_co_is_allocated().
2581 *
2582 * See bdrv_co_is_allocated() for details.
2583 */
2584int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2585 int *pnum)
2586{
6aebab14
SH
2587 Coroutine *co;
2588 BdrvCoIsAllocatedData data = {
2589 .bs = bs,
2590 .sector_num = sector_num,
2591 .nb_sectors = nb_sectors,
2592 .pnum = pnum,
2593 .done = false,
2594 };
2595
2596 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2597 qemu_coroutine_enter(co, &data);
2598 while (!data.done) {
2599 qemu_aio_wait();
2600 }
2601 return data.ret;
f58c7b35
TS
2602}
2603
188a7bbf
PB
2604/*
2605 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
2606 *
2607 * Return true if the given sector is allocated in any image between
2608 * BASE and TOP (inclusive). BASE can be NULL to check if the given
2609 * sector is allocated in any image of the chain. Return false otherwise.
2610 *
2611 * 'pnum' is set to the number of sectors (including and immediately following
2612 * the specified sector) that are known to be in the same
2613 * allocated/unallocated state.
2614 *
2615 */
2616int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
2617 BlockDriverState *base,
2618 int64_t sector_num,
2619 int nb_sectors, int *pnum)
2620{
2621 BlockDriverState *intermediate;
2622 int ret, n = nb_sectors;
2623
2624 intermediate = top;
2625 while (intermediate && intermediate != base) {
2626 int pnum_inter;
2627 ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors,
2628 &pnum_inter);
2629 if (ret < 0) {
2630 return ret;
2631 } else if (ret) {
2632 *pnum = pnum_inter;
2633 return 1;
2634 }
2635
2636 /*
2637 * [sector_num, nb_sectors] is unallocated on top but intermediate
2638 * might have
2639 *
2640 * [sector_num+x, nr_sectors] allocated.
2641 */
2642 if (n > pnum_inter) {
2643 n = pnum_inter;
2644 }
2645
2646 intermediate = intermediate->backing_hd;
2647 }
2648
2649 *pnum = n;
2650 return 0;
2651}
2652
b2023818 2653BlockInfoList *qmp_query_block(Error **errp)
b338082b 2654{
b2023818 2655 BlockInfoList *head = NULL, *cur_item = NULL;
b338082b
FB
2656 BlockDriverState *bs;
2657
1b7bdbc1 2658 QTAILQ_FOREACH(bs, &bdrv_states, list) {
b2023818 2659 BlockInfoList *info = g_malloc0(sizeof(*info));
d15e5465 2660
b2023818
LC
2661 info->value = g_malloc0(sizeof(*info->value));
2662 info->value->device = g_strdup(bs->device_name);
2663 info->value->type = g_strdup("unknown");
2664 info->value->locked = bdrv_dev_is_medium_locked(bs);
2665 info->value->removable = bdrv_dev_has_removable_media(bs);
d15e5465 2666
e4def80b 2667 if (bdrv_dev_has_removable_media(bs)) {
b2023818
LC
2668 info->value->has_tray_open = true;
2669 info->value->tray_open = bdrv_dev_is_tray_open(bs);
e4def80b 2670 }
f04ef601
LC
2671
2672 if (bdrv_iostatus_is_enabled(bs)) {
b2023818
LC
2673 info->value->has_io_status = true;
2674 info->value->io_status = bs->iostatus;
f04ef601
LC
2675 }
2676
19cb3738 2677 if (bs->drv) {
b2023818
LC
2678 info->value->has_inserted = true;
2679 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2680 info->value->inserted->file = g_strdup(bs->filename);
2681 info->value->inserted->ro = bs->read_only;
2682 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2683 info->value->inserted->encrypted = bs->encrypted;
2684 if (bs->backing_file[0]) {
2685 info->value->inserted->has_backing_file = true;
2686 info->value->inserted->backing_file = g_strdup(bs->backing_file);
376253ec 2687 }
727f005e
ZYW
2688
2689 if (bs->io_limits_enabled) {
2690 info->value->inserted->bps =
2691 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2692 info->value->inserted->bps_rd =
2693 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2694 info->value->inserted->bps_wr =
2695 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2696 info->value->inserted->iops =
2697 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2698 info->value->inserted->iops_rd =
2699 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2700 info->value->inserted->iops_wr =
2701 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2702 }
b2023818 2703 }
d15e5465 2704
b2023818
LC
2705 /* XXX: waiting for the qapi to support GSList */
2706 if (!cur_item) {
2707 head = cur_item = info;
2708 } else {
2709 cur_item->next = info;
2710 cur_item = info;
b338082b 2711 }
b338082b 2712 }
d15e5465 2713
b2023818 2714 return head;
b338082b 2715}
a36e69dd 2716
f11f57e4
LC
2717/* Consider exposing this as a full fledged QMP command */
2718static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
2719{
2720 BlockStats *s;
2721
2722 s = g_malloc0(sizeof(*s));
2723
2724 if (bs->device_name[0]) {
2725 s->has_device = true;
2726 s->device = g_strdup(bs->device_name);
294cc35f
KW
2727 }
2728
f11f57e4
LC
2729 s->stats = g_malloc0(sizeof(*s->stats));
2730 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2731 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2732 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2733 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2734 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2735 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2736 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2737 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2738 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2739
294cc35f 2740 if (bs->file) {
f11f57e4
LC
2741 s->has_parent = true;
2742 s->parent = qmp_query_blockstat(bs->file, NULL);
294cc35f
KW
2743 }
2744
f11f57e4 2745 return s;
294cc35f
KW
2746}
2747
f11f57e4 2748BlockStatsList *qmp_query_blockstats(Error **errp)
218a536a 2749{
f11f57e4 2750 BlockStatsList *head = NULL, *cur_item = NULL;
a36e69dd
TS
2751 BlockDriverState *bs;
2752
1b7bdbc1 2753 QTAILQ_FOREACH(bs, &bdrv_states, list) {
f11f57e4
LC
2754 BlockStatsList *info = g_malloc0(sizeof(*info));
2755 info->value = qmp_query_blockstat(bs, NULL);
2756
2757 /* XXX: waiting for the qapi to support GSList */
2758 if (!cur_item) {
2759 head = cur_item = info;
2760 } else {
2761 cur_item->next = info;
2762 cur_item = info;
2763 }
a36e69dd 2764 }
218a536a 2765
f11f57e4 2766 return head;
a36e69dd 2767}
ea2384d3 2768
045df330
AL
2769const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2770{
2771 if (bs->backing_hd && bs->backing_hd->encrypted)
2772 return bs->backing_file;
2773 else if (bs->encrypted)
2774 return bs->filename;
2775 else
2776 return NULL;
2777}
2778
5fafdf24 2779void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
2780 char *filename, int filename_size)
2781{
3574c608 2782 pstrcpy(filename, filename_size, bs->backing_file);
83f64091
FB
2783}
2784
5fafdf24 2785int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
2786 const uint8_t *buf, int nb_sectors)
2787{
2788 BlockDriver *drv = bs->drv;
2789 if (!drv)
19cb3738 2790 return -ENOMEDIUM;
faea38e7
FB
2791 if (!drv->bdrv_write_compressed)
2792 return -ENOTSUP;
fbb7b4e0
KW
2793 if (bdrv_check_request(bs, sector_num, nb_sectors))
2794 return -EIO;
a55eb92c 2795
c6d22830 2796 if (bs->dirty_bitmap) {
7cd1e32a 2797 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2798 }
a55eb92c 2799
faea38e7
FB
2800 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2801}
3b46e624 2802
faea38e7
FB
2803int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2804{
2805 BlockDriver *drv = bs->drv;
2806 if (!drv)
19cb3738 2807 return -ENOMEDIUM;
faea38e7
FB
2808 if (!drv->bdrv_get_info)
2809 return -ENOTSUP;
2810 memset(bdi, 0, sizeof(*bdi));
2811 return drv->bdrv_get_info(bs, bdi);
2812}
2813
45566e9c
CH
2814int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2815 int64_t pos, int size)
178e08a5
AL
2816{
2817 BlockDriver *drv = bs->drv;
2818 if (!drv)
2819 return -ENOMEDIUM;
7cdb1f6d
MK
2820 if (drv->bdrv_save_vmstate)
2821 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2822 if (bs->file)
2823 return bdrv_save_vmstate(bs->file, buf, pos, size);
2824 return -ENOTSUP;
178e08a5
AL
2825}
2826
45566e9c
CH
2827int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2828 int64_t pos, int size)
178e08a5
AL
2829{
2830 BlockDriver *drv = bs->drv;
2831 if (!drv)
2832 return -ENOMEDIUM;
7cdb1f6d
MK
2833 if (drv->bdrv_load_vmstate)
2834 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2835 if (bs->file)
2836 return bdrv_load_vmstate(bs->file, buf, pos, size);
2837 return -ENOTSUP;
178e08a5
AL
2838}
2839
8b9b0cc2
KW
2840void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2841{
2842 BlockDriver *drv = bs->drv;
2843
2844 if (!drv || !drv->bdrv_debug_event) {
2845 return;
2846 }
2847
2848 return drv->bdrv_debug_event(bs, event);
2849
2850}
2851
faea38e7
FB
2852/**************************************************************/
2853/* handling of snapshots */
2854
feeee5ac
MDCF
2855int bdrv_can_snapshot(BlockDriverState *bs)
2856{
2857 BlockDriver *drv = bs->drv;
07b70bfb 2858 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2859 return 0;
2860 }
2861
2862 if (!drv->bdrv_snapshot_create) {
2863 if (bs->file != NULL) {
2864 return bdrv_can_snapshot(bs->file);
2865 }
2866 return 0;
2867 }
2868
2869 return 1;
2870}
2871
199630b6
BS
2872int bdrv_is_snapshot(BlockDriverState *bs)
2873{
2874 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2875}
2876
f9092b10
MA
2877BlockDriverState *bdrv_snapshots(void)
2878{
2879 BlockDriverState *bs;
2880
3ac906f7 2881 if (bs_snapshots) {
f9092b10 2882 return bs_snapshots;
3ac906f7 2883 }
f9092b10
MA
2884
2885 bs = NULL;
2886 while ((bs = bdrv_next(bs))) {
2887 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2888 bs_snapshots = bs;
2889 return bs;
f9092b10
MA
2890 }
2891 }
2892 return NULL;
f9092b10
MA
2893}
2894
5fafdf24 2895int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2896 QEMUSnapshotInfo *sn_info)
2897{
2898 BlockDriver *drv = bs->drv;
2899 if (!drv)
19cb3738 2900 return -ENOMEDIUM;
7cdb1f6d
MK
2901 if (drv->bdrv_snapshot_create)
2902 return drv->bdrv_snapshot_create(bs, sn_info);
2903 if (bs->file)
2904 return bdrv_snapshot_create(bs->file, sn_info);
2905 return -ENOTSUP;
faea38e7
FB
2906}
2907
5fafdf24 2908int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2909 const char *snapshot_id)
2910{
2911 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2912 int ret, open_ret;
2913
faea38e7 2914 if (!drv)
19cb3738 2915 return -ENOMEDIUM;
7cdb1f6d
MK
2916 if (drv->bdrv_snapshot_goto)
2917 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2918
2919 if (bs->file) {
2920 drv->bdrv_close(bs);
2921 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2922 open_ret = drv->bdrv_open(bs, bs->open_flags);
2923 if (open_ret < 0) {
2924 bdrv_delete(bs->file);
2925 bs->drv = NULL;
2926 return open_ret;
2927 }
2928 return ret;
2929 }
2930
2931 return -ENOTSUP;
faea38e7
FB
2932}
2933
2934int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2935{
2936 BlockDriver *drv = bs->drv;
2937 if (!drv)
19cb3738 2938 return -ENOMEDIUM;
7cdb1f6d
MK
2939 if (drv->bdrv_snapshot_delete)
2940 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2941 if (bs->file)
2942 return bdrv_snapshot_delete(bs->file, snapshot_id);
2943 return -ENOTSUP;
faea38e7
FB
2944}
2945
5fafdf24 2946int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2947 QEMUSnapshotInfo **psn_info)
2948{
2949 BlockDriver *drv = bs->drv;
2950 if (!drv)
19cb3738 2951 return -ENOMEDIUM;
7cdb1f6d
MK
2952 if (drv->bdrv_snapshot_list)
2953 return drv->bdrv_snapshot_list(bs, psn_info);
2954 if (bs->file)
2955 return bdrv_snapshot_list(bs->file, psn_info);
2956 return -ENOTSUP;
faea38e7
FB
2957}
2958
51ef6727 2959int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2960 const char *snapshot_name)
2961{
2962 BlockDriver *drv = bs->drv;
2963 if (!drv) {
2964 return -ENOMEDIUM;
2965 }
2966 if (!bs->read_only) {
2967 return -EINVAL;
2968 }
2969 if (drv->bdrv_snapshot_load_tmp) {
2970 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2971 }
2972 return -ENOTSUP;
2973}
2974
e8a6bb9c
MT
2975BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2976 const char *backing_file)
2977{
2978 if (!bs->drv) {
2979 return NULL;
2980 }
2981
2982 if (bs->backing_hd) {
2983 if (strcmp(bs->backing_file, backing_file) == 0) {
2984 return bs->backing_hd;
2985 } else {
2986 return bdrv_find_backing_image(bs->backing_hd, backing_file);
2987 }
2988 }
2989
2990 return NULL;
2991}
2992
faea38e7
FB
2993#define NB_SUFFIXES 4
2994
2995char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2996{
2997 static const char suffixes[NB_SUFFIXES] = "KMGT";
2998 int64_t base;
2999 int i;
3000
3001 if (size <= 999) {
3002 snprintf(buf, buf_size, "%" PRId64, size);
3003 } else {
3004 base = 1024;
3005 for(i = 0; i < NB_SUFFIXES; i++) {
3006 if (size < (10 * base)) {
5fafdf24 3007 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
3008 (double)size / base,
3009 suffixes[i]);
3010 break;
3011 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 3012 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
3013 ((size + (base >> 1)) / base),
3014 suffixes[i]);
3015 break;
3016 }
3017 base = base * 1024;
3018 }
3019 }
3020 return buf;
3021}
3022
3023char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
3024{
3025 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
3026#ifdef _WIN32
3027 struct tm *ptm;
3028#else
faea38e7 3029 struct tm tm;
3b9f94e1 3030#endif
faea38e7
FB
3031 time_t ti;
3032 int64_t secs;
3033
3034 if (!sn) {
5fafdf24
TS
3035 snprintf(buf, buf_size,
3036 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
3037 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
3038 } else {
3039 ti = sn->date_sec;
3b9f94e1
FB
3040#ifdef _WIN32
3041 ptm = localtime(&ti);
3042 strftime(date_buf, sizeof(date_buf),
3043 "%Y-%m-%d %H:%M:%S", ptm);
3044#else
faea38e7
FB
3045 localtime_r(&ti, &tm);
3046 strftime(date_buf, sizeof(date_buf),
3047 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 3048#endif
faea38e7
FB
3049 secs = sn->vm_clock_nsec / 1000000000;
3050 snprintf(clock_buf, sizeof(clock_buf),
3051 "%02d:%02d:%02d.%03d",
3052 (int)(secs / 3600),
3053 (int)((secs / 60) % 60),
5fafdf24 3054 (int)(secs % 60),
faea38e7
FB
3055 (int)((sn->vm_clock_nsec / 1000000) % 1000));
3056 snprintf(buf, buf_size,
5fafdf24 3057 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
3058 sn->id_str, sn->name,
3059 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
3060 date_buf,
3061 clock_buf);
3062 }
3063 return buf;
3064}
3065
ea2384d3 3066/**************************************************************/
83f64091 3067/* async I/Os */
ea2384d3 3068
3b69e4b9 3069BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 3070 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 3071 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 3072{
bbf0a440
SH
3073 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
3074
b2a61371 3075 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 3076 cb, opaque, false);
ea2384d3
FB
3077}
3078
f141eafe
AL
3079BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
3080 QEMUIOVector *qiov, int nb_sectors,
3081 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 3082{
bbf0a440
SH
3083 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
3084
1a6e115b 3085 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 3086 cb, opaque, true);
83f64091
FB
3087}
3088
40b4f539
KW
3089
3090typedef struct MultiwriteCB {
3091 int error;
3092 int num_requests;
3093 int num_callbacks;
3094 struct {
3095 BlockDriverCompletionFunc *cb;
3096 void *opaque;
3097 QEMUIOVector *free_qiov;
40b4f539
KW
3098 } callbacks[];
3099} MultiwriteCB;
3100
3101static void multiwrite_user_cb(MultiwriteCB *mcb)
3102{
3103 int i;
3104
3105 for (i = 0; i < mcb->num_callbacks; i++) {
3106 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
3107 if (mcb->callbacks[i].free_qiov) {
3108 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
3109 }
7267c094 3110 g_free(mcb->callbacks[i].free_qiov);
40b4f539
KW
3111 }
3112}
3113
3114static void multiwrite_cb(void *opaque, int ret)
3115{
3116 MultiwriteCB *mcb = opaque;
3117
6d519a5f
SH
3118 trace_multiwrite_cb(mcb, ret);
3119
cb6d3ca0 3120 if (ret < 0 && !mcb->error) {
40b4f539 3121 mcb->error = ret;
40b4f539
KW
3122 }
3123
3124 mcb->num_requests--;
3125 if (mcb->num_requests == 0) {
de189a1b 3126 multiwrite_user_cb(mcb);
7267c094 3127 g_free(mcb);
40b4f539
KW
3128 }
3129}
3130
3131static int multiwrite_req_compare(const void *a, const void *b)
3132{
77be4366
CH
3133 const BlockRequest *req1 = a, *req2 = b;
3134
3135 /*
3136 * Note that we can't simply subtract req2->sector from req1->sector
3137 * here as that could overflow the return value.
3138 */
3139 if (req1->sector > req2->sector) {
3140 return 1;
3141 } else if (req1->sector < req2->sector) {
3142 return -1;
3143 } else {
3144 return 0;
3145 }
40b4f539
KW
3146}
3147
3148/*
3149 * Takes a bunch of requests and tries to merge them. Returns the number of
3150 * requests that remain after merging.
3151 */
3152static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
3153 int num_reqs, MultiwriteCB *mcb)
3154{
3155 int i, outidx;
3156
3157 // Sort requests by start sector
3158 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
3159
3160 // Check if adjacent requests touch the same clusters. If so, combine them,
3161 // filling up gaps with zero sectors.
3162 outidx = 0;
3163 for (i = 1; i < num_reqs; i++) {
3164 int merge = 0;
3165 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
3166
b6a127a1 3167 // Handle exactly sequential writes and overlapping writes.
40b4f539
KW
3168 if (reqs[i].sector <= oldreq_last) {
3169 merge = 1;
3170 }
3171
e2a305fb
CH
3172 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
3173 merge = 0;
3174 }
3175
40b4f539
KW
3176 if (merge) {
3177 size_t size;
7267c094 3178 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
3179 qemu_iovec_init(qiov,
3180 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
3181
3182 // Add the first request to the merged one. If the requests are
3183 // overlapping, drop the last sectors of the first request.
3184 size = (reqs[i].sector - reqs[outidx].sector) << 9;
3185 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
3186
b6a127a1
PB
3187 // We should need to add any zeros between the two requests
3188 assert (reqs[i].sector <= oldreq_last);
40b4f539
KW
3189
3190 // Add the second request
3191 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
3192
cbf1dff2 3193 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
3194 reqs[outidx].qiov = qiov;
3195
3196 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
3197 } else {
3198 outidx++;
3199 reqs[outidx].sector = reqs[i].sector;
3200 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
3201 reqs[outidx].qiov = reqs[i].qiov;
3202 }
3203 }
3204
3205 return outidx + 1;
3206}
3207
3208/*
3209 * Submit multiple AIO write requests at once.
3210 *
3211 * On success, the function returns 0 and all requests in the reqs array have
3212 * been submitted. In error case this function returns -1, and any of the
3213 * requests may or may not be submitted yet. In particular, this means that the
3214 * callback will be called for some of the requests, for others it won't. The
3215 * caller must check the error field of the BlockRequest to wait for the right
3216 * callbacks (if error != 0, no callback will be called).
3217 *
3218 * The implementation may modify the contents of the reqs array, e.g. to merge
3219 * requests. However, the fields opaque and error are left unmodified as they
3220 * are used to signal failure for a single request to the caller.
3221 */
3222int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
3223{
40b4f539
KW
3224 MultiwriteCB *mcb;
3225 int i;
3226
301db7c2
RH
3227 /* don't submit writes if we don't have a medium */
3228 if (bs->drv == NULL) {
3229 for (i = 0; i < num_reqs; i++) {
3230 reqs[i].error = -ENOMEDIUM;
3231 }
3232 return -1;
3233 }
3234
40b4f539
KW
3235 if (num_reqs == 0) {
3236 return 0;
3237 }
3238
3239 // Create MultiwriteCB structure
7267c094 3240 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
3241 mcb->num_requests = 0;
3242 mcb->num_callbacks = num_reqs;
3243
3244 for (i = 0; i < num_reqs; i++) {
3245 mcb->callbacks[i].cb = reqs[i].cb;
3246 mcb->callbacks[i].opaque = reqs[i].opaque;
3247 }
3248
3249 // Check for mergable requests
3250 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
3251
6d519a5f
SH
3252 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
3253
df9309fb
PB
3254 /* Run the aio requests. */
3255 mcb->num_requests = num_reqs;
40b4f539 3256 for (i = 0; i < num_reqs; i++) {
ad54ae80 3257 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
40b4f539 3258 reqs[i].nb_sectors, multiwrite_cb, mcb);
40b4f539
KW
3259 }
3260
3261 return 0;
40b4f539
KW
3262}
3263
83f64091 3264void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 3265{
6bbff9a0 3266 acb->pool->cancel(acb);
83f64091
FB
3267}
3268
98f90dba
ZYW
3269/* block I/O throttling */
3270static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
3271 bool is_write, double elapsed_time, uint64_t *wait)
3272{
3273 uint64_t bps_limit = 0;
3274 double bytes_limit, bytes_base, bytes_res;
3275 double slice_time, wait_time;
3276
3277 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3278 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
3279 } else if (bs->io_limits.bps[is_write]) {
3280 bps_limit = bs->io_limits.bps[is_write];
3281 } else {
3282 if (wait) {
3283 *wait = 0;
3284 }
3285
3286 return false;
3287 }
3288
3289 slice_time = bs->slice_end - bs->slice_start;
3290 slice_time /= (NANOSECONDS_PER_SECOND);
3291 bytes_limit = bps_limit * slice_time;
3292 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
3293 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3294 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
3295 }
3296
3297 /* bytes_base: the bytes of data which have been read/written; and
3298 * it is obtained from the history statistic info.
3299 * bytes_res: the remaining bytes of data which need to be read/written.
3300 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3301 * the total time for completing reading/writting all data.
3302 */
3303 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
3304
3305 if (bytes_base + bytes_res <= bytes_limit) {
3306 if (wait) {
3307 *wait = 0;
3308 }
3309
3310 return false;
3311 }
3312
3313 /* Calc approx time to dispatch */
3314 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
3315
3316 /* When the I/O rate at runtime exceeds the limits,
3317 * bs->slice_end need to be extended in order that the current statistic
3318 * info can be kept until the timer fire, so it is increased and tuned
3319 * based on the result of experiment.
3320 */
3321 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3322 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3323 if (wait) {
3324 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3325 }
3326
3327 return true;
3328}
3329
3330static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
3331 double elapsed_time, uint64_t *wait)
3332{
3333 uint64_t iops_limit = 0;
3334 double ios_limit, ios_base;
3335 double slice_time, wait_time;
3336
3337 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3338 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
3339 } else if (bs->io_limits.iops[is_write]) {
3340 iops_limit = bs->io_limits.iops[is_write];
3341 } else {
3342 if (wait) {
3343 *wait = 0;
3344 }
3345
3346 return false;
3347 }
3348
3349 slice_time = bs->slice_end - bs->slice_start;
3350 slice_time /= (NANOSECONDS_PER_SECOND);
3351 ios_limit = iops_limit * slice_time;
3352 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
3353 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3354 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
3355 }
3356
3357 if (ios_base + 1 <= ios_limit) {
3358 if (wait) {
3359 *wait = 0;
3360 }
3361
3362 return false;
3363 }
3364
3365 /* Calc approx time to dispatch */
3366 wait_time = (ios_base + 1) / iops_limit;
3367 if (wait_time > elapsed_time) {
3368 wait_time = wait_time - elapsed_time;
3369 } else {
3370 wait_time = 0;
3371 }
3372
3373 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3374 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3375 if (wait) {
3376 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3377 }
3378
3379 return true;
3380}
3381
3382static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3383 bool is_write, int64_t *wait)
3384{
3385 int64_t now, max_wait;
3386 uint64_t bps_wait = 0, iops_wait = 0;
3387 double elapsed_time;
3388 int bps_ret, iops_ret;
3389
3390 now = qemu_get_clock_ns(vm_clock);
3391 if ((bs->slice_start < now)
3392 && (bs->slice_end > now)) {
3393 bs->slice_end = now + bs->slice_time;
3394 } else {
3395 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3396 bs->slice_start = now;
3397 bs->slice_end = now + bs->slice_time;
3398
3399 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3400 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3401
3402 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3403 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3404 }
3405
3406 elapsed_time = now - bs->slice_start;
3407 elapsed_time /= (NANOSECONDS_PER_SECOND);
3408
3409 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3410 is_write, elapsed_time, &bps_wait);
3411 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3412 elapsed_time, &iops_wait);
3413 if (bps_ret || iops_ret) {
3414 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3415 if (wait) {
3416 *wait = max_wait;
3417 }
3418
3419 now = qemu_get_clock_ns(vm_clock);
3420 if (bs->slice_end < now + max_wait) {
3421 bs->slice_end = now + max_wait;
3422 }
3423
3424 return true;
3425 }
3426
3427 if (wait) {
3428 *wait = 0;
3429 }
3430
3431 return false;
3432}
ce1a14dc 3433
83f64091
FB
3434/**************************************************************/
3435/* async block device emulation */
3436
c16b5a2c
CH
3437typedef struct BlockDriverAIOCBSync {
3438 BlockDriverAIOCB common;
3439 QEMUBH *bh;
3440 int ret;
3441 /* vector translation state */
3442 QEMUIOVector *qiov;
3443 uint8_t *bounce;
3444 int is_write;
3445} BlockDriverAIOCBSync;
3446
3447static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3448{
b666d239
KW
3449 BlockDriverAIOCBSync *acb =
3450 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 3451 qemu_bh_delete(acb->bh);
36afc451 3452 acb->bh = NULL;
c16b5a2c
CH
3453 qemu_aio_release(acb);
3454}
3455
3456static AIOPool bdrv_em_aio_pool = {
3457 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3458 .cancel = bdrv_aio_cancel_em,
3459};
3460
ce1a14dc 3461static void bdrv_aio_bh_cb(void *opaque)
83f64091 3462{
ce1a14dc 3463 BlockDriverAIOCBSync *acb = opaque;
f141eafe 3464
f141eafe
AL
3465 if (!acb->is_write)
3466 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 3467 qemu_vfree(acb->bounce);
ce1a14dc 3468 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 3469 qemu_bh_delete(acb->bh);
36afc451 3470 acb->bh = NULL;
ce1a14dc 3471 qemu_aio_release(acb);
83f64091 3472}
beac80cd 3473
f141eafe
AL
3474static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3475 int64_t sector_num,
3476 QEMUIOVector *qiov,
3477 int nb_sectors,
3478 BlockDriverCompletionFunc *cb,
3479 void *opaque,
3480 int is_write)
3481
83f64091 3482{
ce1a14dc 3483 BlockDriverAIOCBSync *acb;
ce1a14dc 3484
c16b5a2c 3485 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
3486 acb->is_write = is_write;
3487 acb->qiov = qiov;
e268ca52 3488 acb->bounce = qemu_blockalign(bs, qiov->size);
3f3aace8 3489 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
3490
3491 if (is_write) {
3492 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1ed20acf 3493 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 3494 } else {
1ed20acf 3495 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
3496 }
3497
ce1a14dc 3498 qemu_bh_schedule(acb->bh);
f141eafe 3499
ce1a14dc 3500 return &acb->common;
beac80cd
FB
3501}
3502
f141eafe
AL
3503static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3504 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 3505 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 3506{
f141eafe
AL
3507 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
3508}
83f64091 3509
f141eafe
AL
3510static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3511 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3512 BlockDriverCompletionFunc *cb, void *opaque)
3513{
3514 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 3515}
beac80cd 3516
68485420
KW
3517
3518typedef struct BlockDriverAIOCBCoroutine {
3519 BlockDriverAIOCB common;
3520 BlockRequest req;
3521 bool is_write;
3522 QEMUBH* bh;
3523} BlockDriverAIOCBCoroutine;
3524
3525static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3526{
3527 qemu_aio_flush();
3528}
3529
3530static AIOPool bdrv_em_co_aio_pool = {
3531 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3532 .cancel = bdrv_aio_co_cancel_em,
3533};
3534
35246a68 3535static void bdrv_co_em_bh(void *opaque)
68485420
KW
3536{
3537 BlockDriverAIOCBCoroutine *acb = opaque;
3538
3539 acb->common.cb(acb->common.opaque, acb->req.error);
3540 qemu_bh_delete(acb->bh);
3541 qemu_aio_release(acb);
3542}
3543
b2a61371
SH
3544/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3545static void coroutine_fn bdrv_co_do_rw(void *opaque)
3546{
3547 BlockDriverAIOCBCoroutine *acb = opaque;
3548 BlockDriverState *bs = acb->common.bs;
3549
3550 if (!acb->is_write) {
3551 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
470c0504 3552 acb->req.nb_sectors, acb->req.qiov, 0);
b2a61371
SH
3553 } else {
3554 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
f08f2dda 3555 acb->req.nb_sectors, acb->req.qiov, 0);
b2a61371
SH
3556 }
3557
35246a68 3558 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2a61371
SH
3559 qemu_bh_schedule(acb->bh);
3560}
3561
68485420
KW
3562static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3563 int64_t sector_num,
3564 QEMUIOVector *qiov,
3565 int nb_sectors,
3566 BlockDriverCompletionFunc *cb,
3567 void *opaque,
8c5873d6 3568 bool is_write)
68485420
KW
3569{
3570 Coroutine *co;
3571 BlockDriverAIOCBCoroutine *acb;
3572
3573 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3574 acb->req.sector = sector_num;
3575 acb->req.nb_sectors = nb_sectors;
3576 acb->req.qiov = qiov;
3577 acb->is_write = is_write;
3578
8c5873d6 3579 co = qemu_coroutine_create(bdrv_co_do_rw);
68485420
KW
3580 qemu_coroutine_enter(co, acb);
3581
3582 return &acb->common;
3583}
3584
07f07615 3585static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
b2e12bc6 3586{
07f07615
PB
3587 BlockDriverAIOCBCoroutine *acb = opaque;
3588 BlockDriverState *bs = acb->common.bs;
b2e12bc6 3589
07f07615
PB
3590 acb->req.error = bdrv_co_flush(bs);
3591 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2e12bc6 3592 qemu_bh_schedule(acb->bh);
b2e12bc6
CH
3593}
3594
07f07615 3595BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
016f5cf6
AG
3596 BlockDriverCompletionFunc *cb, void *opaque)
3597{
07f07615 3598 trace_bdrv_aio_flush(bs, opaque);
016f5cf6 3599
07f07615
PB
3600 Coroutine *co;
3601 BlockDriverAIOCBCoroutine *acb;
016f5cf6 3602
07f07615
PB
3603 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3604 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3605 qemu_coroutine_enter(co, acb);
016f5cf6 3606
016f5cf6
AG
3607 return &acb->common;
3608}
3609
4265d620
PB
3610static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3611{
3612 BlockDriverAIOCBCoroutine *acb = opaque;
3613 BlockDriverState *bs = acb->common.bs;
3614
3615 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3616 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3617 qemu_bh_schedule(acb->bh);
3618}
3619
3620BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3621 int64_t sector_num, int nb_sectors,
3622 BlockDriverCompletionFunc *cb, void *opaque)
3623{
3624 Coroutine *co;
3625 BlockDriverAIOCBCoroutine *acb;
3626
3627 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3628
3629 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3630 acb->req.sector = sector_num;
3631 acb->req.nb_sectors = nb_sectors;
3632 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3633 qemu_coroutine_enter(co, acb);
3634
3635 return &acb->common;
3636}
3637
ea2384d3
FB
3638void bdrv_init(void)
3639{
5efa9d5a 3640 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 3641}
ce1a14dc 3642
eb852011
MA
3643void bdrv_init_with_whitelist(void)
3644{
3645 use_bdrv_whitelist = 1;
3646 bdrv_init();
3647}
3648
c16b5a2c
CH
3649void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3650 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 3651{
ce1a14dc
PB
3652 BlockDriverAIOCB *acb;
3653
6bbff9a0
AL
3654 if (pool->free_aiocb) {
3655 acb = pool->free_aiocb;
3656 pool->free_aiocb = acb->next;
ce1a14dc 3657 } else {
7267c094 3658 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 3659 acb->pool = pool;
ce1a14dc
PB
3660 }
3661 acb->bs = bs;
3662 acb->cb = cb;
3663 acb->opaque = opaque;
3664 return acb;
3665}
3666
3667void qemu_aio_release(void *p)
3668{
6bbff9a0
AL
3669 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3670 AIOPool *pool = acb->pool;
3671 acb->next = pool->free_aiocb;
3672 pool->free_aiocb = acb;
ce1a14dc 3673}
19cb3738 3674
f9f05dc5
KW
3675/**************************************************************/
3676/* Coroutine block device emulation */
3677
3678typedef struct CoroutineIOCompletion {
3679 Coroutine *coroutine;
3680 int ret;
3681} CoroutineIOCompletion;
3682
3683static void bdrv_co_io_em_complete(void *opaque, int ret)
3684{
3685 CoroutineIOCompletion *co = opaque;
3686
3687 co->ret = ret;
3688 qemu_coroutine_enter(co->coroutine, NULL);
3689}
3690
3691static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3692 int nb_sectors, QEMUIOVector *iov,
3693 bool is_write)
3694{
3695 CoroutineIOCompletion co = {
3696 .coroutine = qemu_coroutine_self(),
3697 };
3698 BlockDriverAIOCB *acb;
3699
3700 if (is_write) {
a652d160
SH
3701 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3702 bdrv_co_io_em_complete, &co);
f9f05dc5 3703 } else {
a652d160
SH
3704 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3705 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
3706 }
3707
59370aaa 3708 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
3709 if (!acb) {
3710 return -EIO;
3711 }
3712 qemu_coroutine_yield();
3713
3714 return co.ret;
3715}
3716
3717static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3718 int64_t sector_num, int nb_sectors,
3719 QEMUIOVector *iov)
3720{
3721 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3722}
3723
3724static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3725 int64_t sector_num, int nb_sectors,
3726 QEMUIOVector *iov)
3727{
3728 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3729}
3730
07f07615 3731static void coroutine_fn bdrv_flush_co_entry(void *opaque)
e7a8a783 3732{
07f07615
PB
3733 RwCo *rwco = opaque;
3734
3735 rwco->ret = bdrv_co_flush(rwco->bs);
3736}
3737
3738int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3739{
eb489bb1
KW
3740 int ret;
3741
29cdb251 3742 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
07f07615 3743 return 0;
eb489bb1
KW
3744 }
3745
ca716364 3746 /* Write back cached data to the OS even with cache=unsafe */
eb489bb1
KW
3747 if (bs->drv->bdrv_co_flush_to_os) {
3748 ret = bs->drv->bdrv_co_flush_to_os(bs);
3749 if (ret < 0) {
3750 return ret;
3751 }
3752 }
3753
ca716364
KW
3754 /* But don't actually force it to the disk with cache=unsafe */
3755 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3756 return 0;
3757 }
3758
eb489bb1 3759 if (bs->drv->bdrv_co_flush_to_disk) {
29cdb251 3760 ret = bs->drv->bdrv_co_flush_to_disk(bs);
07f07615
PB
3761 } else if (bs->drv->bdrv_aio_flush) {
3762 BlockDriverAIOCB *acb;
3763 CoroutineIOCompletion co = {
3764 .coroutine = qemu_coroutine_self(),
3765 };
3766
3767 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3768 if (acb == NULL) {
29cdb251 3769 ret = -EIO;
07f07615
PB
3770 } else {
3771 qemu_coroutine_yield();
29cdb251 3772 ret = co.ret;
07f07615 3773 }
07f07615
PB
3774 } else {
3775 /*
3776 * Some block drivers always operate in either writethrough or unsafe
3777 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3778 * know how the server works (because the behaviour is hardcoded or
3779 * depends on server-side configuration), so we can't ensure that
3780 * everything is safe on disk. Returning an error doesn't work because
3781 * that would break guests even if the server operates in writethrough
3782 * mode.
3783 *
3784 * Let's hope the user knows what he's doing.
3785 */
29cdb251 3786 ret = 0;
07f07615 3787 }
29cdb251
PB
3788 if (ret < 0) {
3789 return ret;
3790 }
3791
3792 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3793 * in the case of cache=unsafe, so there are no useless flushes.
3794 */
3795 return bdrv_co_flush(bs->file);
07f07615
PB
3796}
3797
0f15423c
AL
3798void bdrv_invalidate_cache(BlockDriverState *bs)
3799{
3800 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3801 bs->drv->bdrv_invalidate_cache(bs);
3802 }
3803}
3804
3805void bdrv_invalidate_cache_all(void)
3806{
3807 BlockDriverState *bs;
3808
3809 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3810 bdrv_invalidate_cache(bs);
3811 }
3812}
3813
07789269
BC
3814void bdrv_clear_incoming_migration_all(void)
3815{
3816 BlockDriverState *bs;
3817
3818 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3819 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
3820 }
3821}
3822
07f07615
PB
3823int bdrv_flush(BlockDriverState *bs)
3824{
3825 Coroutine *co;
3826 RwCo rwco = {
3827 .bs = bs,
3828 .ret = NOT_DONE,
e7a8a783 3829 };
e7a8a783 3830
07f07615
PB
3831 if (qemu_in_coroutine()) {
3832 /* Fast-path if already in coroutine context */
3833 bdrv_flush_co_entry(&rwco);
3834 } else {
3835 co = qemu_coroutine_create(bdrv_flush_co_entry);
3836 qemu_coroutine_enter(co, &rwco);
3837 while (rwco.ret == NOT_DONE) {
3838 qemu_aio_wait();
3839 }
e7a8a783 3840 }
07f07615
PB
3841
3842 return rwco.ret;
e7a8a783
KW
3843}
3844
4265d620
PB
3845static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3846{
3847 RwCo *rwco = opaque;
3848
3849 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3850}
3851
3852int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3853 int nb_sectors)
3854{
3855 if (!bs->drv) {
3856 return -ENOMEDIUM;
3857 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3858 return -EIO;
3859 } else if (bs->read_only) {
3860 return -EROFS;
3861 } else if (bs->drv->bdrv_co_discard) {
3862 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3863 } else if (bs->drv->bdrv_aio_discard) {
3864 BlockDriverAIOCB *acb;
3865 CoroutineIOCompletion co = {
3866 .coroutine = qemu_coroutine_self(),
3867 };
3868
3869 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3870 bdrv_co_io_em_complete, &co);
3871 if (acb == NULL) {
3872 return -EIO;
3873 } else {
3874 qemu_coroutine_yield();
3875 return co.ret;
3876 }
4265d620
PB
3877 } else {
3878 return 0;
3879 }
3880}
3881
3882int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3883{
3884 Coroutine *co;
3885 RwCo rwco = {
3886 .bs = bs,
3887 .sector_num = sector_num,
3888 .nb_sectors = nb_sectors,
3889 .ret = NOT_DONE,
3890 };
3891
3892 if (qemu_in_coroutine()) {
3893 /* Fast-path if already in coroutine context */
3894 bdrv_discard_co_entry(&rwco);
3895 } else {
3896 co = qemu_coroutine_create(bdrv_discard_co_entry);
3897 qemu_coroutine_enter(co, &rwco);
3898 while (rwco.ret == NOT_DONE) {
3899 qemu_aio_wait();
3900 }
3901 }
3902
3903 return rwco.ret;
3904}
3905
19cb3738
FB
3906/**************************************************************/
3907/* removable device support */
3908
3909/**
3910 * Return TRUE if the media is present
3911 */
3912int bdrv_is_inserted(BlockDriverState *bs)
3913{
3914 BlockDriver *drv = bs->drv;
a1aff5bf 3915
19cb3738
FB
3916 if (!drv)
3917 return 0;
3918 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
3919 return 1;
3920 return drv->bdrv_is_inserted(bs);
19cb3738
FB
3921}
3922
3923/**
8e49ca46
MA
3924 * Return whether the media changed since the last call to this
3925 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3926 */
3927int bdrv_media_changed(BlockDriverState *bs)
3928{
3929 BlockDriver *drv = bs->drv;
19cb3738 3930
8e49ca46
MA
3931 if (drv && drv->bdrv_media_changed) {
3932 return drv->bdrv_media_changed(bs);
3933 }
3934 return -ENOTSUP;
19cb3738
FB
3935}
3936
3937/**
3938 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3939 */
f36f3949 3940void bdrv_eject(BlockDriverState *bs, bool eject_flag)
19cb3738
FB
3941{
3942 BlockDriver *drv = bs->drv;
19cb3738 3943
822e1cd1
MA
3944 if (drv && drv->bdrv_eject) {
3945 drv->bdrv_eject(bs, eject_flag);
19cb3738 3946 }
6f382ed2
LC
3947
3948 if (bs->device_name[0] != '\0') {
3949 bdrv_emit_qmp_eject_event(bs, eject_flag);
3950 }
19cb3738
FB
3951}
3952
19cb3738
FB
3953/**
3954 * Lock or unlock the media (if it is locked, the user won't be able
3955 * to eject it manually).
3956 */
025e849a 3957void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3958{
3959 BlockDriver *drv = bs->drv;
3960
025e849a 3961 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3962
025e849a
MA
3963 if (drv && drv->bdrv_lock_medium) {
3964 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3965 }
3966}
985a03b0
TS
3967
3968/* needed for generic scsi interface */
3969
3970int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3971{
3972 BlockDriver *drv = bs->drv;
3973
3974 if (drv && drv->bdrv_ioctl)
3975 return drv->bdrv_ioctl(bs, req, buf);
3976 return -ENOTSUP;
3977}
7d780669 3978
221f715d
AL
3979BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3980 unsigned long int req, void *buf,
3981 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3982{
221f715d 3983 BlockDriver *drv = bs->drv;
7d780669 3984
221f715d
AL
3985 if (drv && drv->bdrv_aio_ioctl)
3986 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3987 return NULL;
7d780669 3988}
e268ca52 3989
7b6f9300
MA
3990void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3991{
3992 bs->buffer_alignment = align;
3993}
7cd1e32a 3994
e268ca52
AL
3995void *qemu_blockalign(BlockDriverState *bs, size_t size)
3996{
3997 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3998}
7cd1e32a 3999
4000void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
4001{
4002 int64_t bitmap_size;
a55eb92c 4003
aaa0eb75 4004 bs->dirty_count = 0;
a55eb92c 4005 if (enable) {
c6d22830
JK
4006 if (!bs->dirty_bitmap) {
4007 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
71df14fc
PB
4008 BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG - 1;
4009 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
a55eb92c 4010
71df14fc 4011 bs->dirty_bitmap = g_new0(unsigned long, bitmap_size);
a55eb92c 4012 }
7cd1e32a 4013 } else {
c6d22830 4014 if (bs->dirty_bitmap) {
7267c094 4015 g_free(bs->dirty_bitmap);
c6d22830 4016 bs->dirty_bitmap = NULL;
a55eb92c 4017 }
7cd1e32a 4018 }
4019}
4020
4021int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
4022{
6ea44308 4023 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 4024
c6d22830
JK
4025 if (bs->dirty_bitmap &&
4026 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
4027 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
4028 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a 4029 } else {
4030 return 0;
4031 }
4032}
4033
a55eb92c
JK
4034void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
4035 int nr_sectors)
7cd1e32a 4036{
4037 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
4038}
aaa0eb75
LS
4039
4040int64_t bdrv_get_dirty_count(BlockDriverState *bs)
4041{
4042 return bs->dirty_count;
4043}
f88e1a42 4044
db593f25
MT
4045void bdrv_set_in_use(BlockDriverState *bs, int in_use)
4046{
4047 assert(bs->in_use != in_use);
4048 bs->in_use = in_use;
4049}
4050
4051int bdrv_in_use(BlockDriverState *bs)
4052{
4053 return bs->in_use;
4054}
4055
28a7282a
LC
4056void bdrv_iostatus_enable(BlockDriverState *bs)
4057{
d6bf279e 4058 bs->iostatus_enabled = true;
58e21ef5 4059 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
4060}
4061
4062/* The I/O status is only enabled if the drive explicitly
4063 * enables it _and_ the VM is configured to stop on errors */
4064bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
4065{
d6bf279e 4066 return (bs->iostatus_enabled &&
28a7282a
LC
4067 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
4068 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
4069 bs->on_read_error == BLOCK_ERR_STOP_ANY));
4070}
4071
4072void bdrv_iostatus_disable(BlockDriverState *bs)
4073{
d6bf279e 4074 bs->iostatus_enabled = false;
28a7282a
LC
4075}
4076
4077void bdrv_iostatus_reset(BlockDriverState *bs)
4078{
4079 if (bdrv_iostatus_is_enabled(bs)) {
58e21ef5 4080 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
4081 }
4082}
4083
4084/* XXX: Today this is set by device models because it makes the implementation
4085 quite simple. However, the block layer knows about the error, so it's
4086 possible to implement this without device models being involved */
4087void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
4088{
58e21ef5
LC
4089 if (bdrv_iostatus_is_enabled(bs) &&
4090 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
28a7282a 4091 assert(error >= 0);
58e21ef5
LC
4092 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
4093 BLOCK_DEVICE_IO_STATUS_FAILED;
28a7282a
LC
4094 }
4095}
4096
a597e79c
CH
4097void
4098bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
4099 enum BlockAcctType type)
4100{
4101 assert(type < BDRV_MAX_IOTYPE);
4102
4103 cookie->bytes = bytes;
c488c7f6 4104 cookie->start_time_ns = get_clock();
a597e79c
CH
4105 cookie->type = type;
4106}
4107
4108void
4109bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
4110{
4111 assert(cookie->type < BDRV_MAX_IOTYPE);
4112
4113 bs->nr_bytes[cookie->type] += cookie->bytes;
4114 bs->nr_ops[cookie->type]++;
c488c7f6 4115 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
4116}
4117
f88e1a42
JS
4118int bdrv_img_create(const char *filename, const char *fmt,
4119 const char *base_filename, const char *base_fmt,
4120 char *options, uint64_t img_size, int flags)
4121{
4122 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 4123 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
4124 BlockDriverState *bs = NULL;
4125 BlockDriver *drv, *proto_drv;
96df67d1 4126 BlockDriver *backing_drv = NULL;
f88e1a42
JS
4127 int ret = 0;
4128
4129 /* Find driver and parse its options */
4130 drv = bdrv_find_format(fmt);
4131 if (!drv) {
4132 error_report("Unknown file format '%s'", fmt);
4f70f249 4133 ret = -EINVAL;
f88e1a42
JS
4134 goto out;
4135 }
4136
4137 proto_drv = bdrv_find_protocol(filename);
4138 if (!proto_drv) {
4139 error_report("Unknown protocol '%s'", filename);
4f70f249 4140 ret = -EINVAL;
f88e1a42
JS
4141 goto out;
4142 }
4143
4144 create_options = append_option_parameters(create_options,
4145 drv->create_options);
4146 create_options = append_option_parameters(create_options,
4147 proto_drv->create_options);
4148
4149 /* Create parameter list with default values */
4150 param = parse_option_parameters("", create_options, param);
4151
4152 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
4153
4154 /* Parse -o options */
4155 if (options) {
4156 param = parse_option_parameters(options, create_options, param);
4157 if (param == NULL) {
4158 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 4159 ret = -EINVAL;
f88e1a42
JS
4160 goto out;
4161 }
4162 }
4163
4164 if (base_filename) {
4165 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
4166 base_filename)) {
4167 error_report("Backing file not supported for file format '%s'",
4168 fmt);
4f70f249 4169 ret = -EINVAL;
f88e1a42
JS
4170 goto out;
4171 }
4172 }
4173
4174 if (base_fmt) {
4175 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
4176 error_report("Backing file format not supported for file "
4177 "format '%s'", fmt);
4f70f249 4178 ret = -EINVAL;
f88e1a42
JS
4179 goto out;
4180 }
4181 }
4182
792da93a
JS
4183 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
4184 if (backing_file && backing_file->value.s) {
4185 if (!strcmp(filename, backing_file->value.s)) {
4186 error_report("Error: Trying to create an image with the "
4187 "same filename as the backing file");
4f70f249 4188 ret = -EINVAL;
792da93a
JS
4189 goto out;
4190 }
4191 }
4192
f88e1a42
JS
4193 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
4194 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
4195 backing_drv = bdrv_find_format(backing_fmt->value.s);
4196 if (!backing_drv) {
f88e1a42
JS
4197 error_report("Unknown backing file format '%s'",
4198 backing_fmt->value.s);
4f70f249 4199 ret = -EINVAL;
f88e1a42
JS
4200 goto out;
4201 }
4202 }
4203
4204 // The size for the image must always be specified, with one exception:
4205 // If we are using a backing file, we can obtain the size from there
d220894e
KW
4206 size = get_option_parameter(param, BLOCK_OPT_SIZE);
4207 if (size && size->value.n == -1) {
f88e1a42
JS
4208 if (backing_file && backing_file->value.s) {
4209 uint64_t size;
f88e1a42 4210 char buf[32];
63090dac
PB
4211 int back_flags;
4212
4213 /* backing files always opened read-only */
4214 back_flags =
4215 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
f88e1a42 4216
f88e1a42
JS
4217 bs = bdrv_new("");
4218
63090dac 4219 ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv);
f88e1a42 4220 if (ret < 0) {
96df67d1 4221 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
4222 goto out;
4223 }
4224 bdrv_get_geometry(bs, &size);
4225 size *= 512;
4226
4227 snprintf(buf, sizeof(buf), "%" PRId64, size);
4228 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
4229 } else {
4230 error_report("Image creation needs a size parameter");
4f70f249 4231 ret = -EINVAL;
f88e1a42
JS
4232 goto out;
4233 }
4234 }
4235
4236 printf("Formatting '%s', fmt=%s ", filename, fmt);
4237 print_option_parameters(param);
4238 puts("");
4239
4240 ret = bdrv_create(drv, filename, param);
4241
4242 if (ret < 0) {
4243 if (ret == -ENOTSUP) {
4244 error_report("Formatting or formatting option not supported for "
4245 "file format '%s'", fmt);
4246 } else if (ret == -EFBIG) {
4247 error_report("The image size is too large for file format '%s'",
4248 fmt);
4249 } else {
4250 error_report("%s: error while creating %s: %s", filename, fmt,
4251 strerror(-ret));
4252 }
4253 }
4254
4255out:
4256 free_option_parameters(create_options);
4257 free_option_parameters(param);
4258
4259 if (bs) {
4260 bdrv_delete(bs);
4261 }
4f70f249
JS
4262
4263 return ret;
f88e1a42 4264}
eeec61f2
SH
4265
4266void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
c83c66c3
SH
4267 int64_t speed, BlockDriverCompletionFunc *cb,
4268 void *opaque, Error **errp)
eeec61f2
SH
4269{
4270 BlockJob *job;
4271
4272 if (bs->job || bdrv_in_use(bs)) {
fd7f8c65 4273 error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
eeec61f2
SH
4274 return NULL;
4275 }
4276 bdrv_set_in_use(bs, 1);
4277
4278 job = g_malloc0(job_type->instance_size);
4279 job->job_type = job_type;
4280 job->bs = bs;
4281 job->cb = cb;
4282 job->opaque = opaque;
4513eafe 4283 job->busy = true;
eeec61f2 4284 bs->job = job;
c83c66c3
SH
4285
4286 /* Only set speed when necessary to avoid NotSupported error */
4287 if (speed != 0) {
4288 Error *local_err = NULL;
4289
4290 block_job_set_speed(job, speed, &local_err);
4291 if (error_is_set(&local_err)) {
4292 bs->job = NULL;
4293 g_free(job);
4294 bdrv_set_in_use(bs, 0);
4295 error_propagate(errp, local_err);
4296 return NULL;
4297 }
4298 }
eeec61f2
SH
4299 return job;
4300}
4301
4302void block_job_complete(BlockJob *job, int ret)
4303{
4304 BlockDriverState *bs = job->bs;
4305
4306 assert(bs->job == job);
4307 job->cb(job->opaque, ret);
4308 bs->job = NULL;
4309 g_free(job);
4310 bdrv_set_in_use(bs, 0);
4311}
4312
882ec7ce 4313void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
eeec61f2 4314{
9e6636c7 4315 Error *local_err = NULL;
9f25eccc 4316
eeec61f2 4317 if (!job->job_type->set_speed) {
9e6636c7
SH
4318 error_set(errp, QERR_NOT_SUPPORTED);
4319 return;
eeec61f2 4320 }
882ec7ce 4321 job->job_type->set_speed(job, speed, &local_err);
9e6636c7
SH
4322 if (error_is_set(&local_err)) {
4323 error_propagate(errp, local_err);
4324 return;
9f25eccc 4325 }
9e6636c7 4326
882ec7ce 4327 job->speed = speed;
eeec61f2
SH
4328}
4329
4330void block_job_cancel(BlockJob *job)
4331{
4332 job->cancelled = true;
fa4478d5
PB
4333 if (job->co && !job->busy) {
4334 qemu_coroutine_enter(job->co, NULL);
4335 }
eeec61f2
SH
4336}
4337
4338bool block_job_is_cancelled(BlockJob *job)
4339{
4340 return job->cancelled;
4341}
3e914655 4342
fa4478d5
PB
4343struct BlockCancelData {
4344 BlockJob *job;
4345 BlockDriverCompletionFunc *cb;
4346 void *opaque;
4347 bool cancelled;
4348 int ret;
4349};
4350
4351static void block_job_cancel_cb(void *opaque, int ret)
3e914655 4352{
fa4478d5
PB
4353 struct BlockCancelData *data = opaque;
4354
4355 data->cancelled = block_job_is_cancelled(data->job);
4356 data->ret = ret;
4357 data->cb(data->opaque, ret);
4358}
4359
4360int block_job_cancel_sync(BlockJob *job)
4361{
4362 struct BlockCancelData data;
3e914655
PB
4363 BlockDriverState *bs = job->bs;
4364
4365 assert(bs->job == job);
fa4478d5
PB
4366
4367 /* Set up our own callback to store the result and chain to
4368 * the original callback.
4369 */
4370 data.job = job;
4371 data.cb = job->cb;
4372 data.opaque = job->opaque;
4373 data.ret = -EINPROGRESS;
4374 job->cb = block_job_cancel_cb;
4375 job->opaque = &data;
3e914655 4376 block_job_cancel(job);
fa4478d5 4377 while (data.ret == -EINPROGRESS) {
3e914655
PB
4378 qemu_aio_wait();
4379 }
fa4478d5 4380 return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
3e914655 4381}
4513eafe
PB
4382
4383void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
4384{
4385 /* Check cancellation *before* setting busy = false, too! */
4386 if (!block_job_is_cancelled(job)) {
4387 job->busy = false;
4388 co_sleep_ns(clock, ns);
4389 job->busy = true;
4390 }
4391}