]> git.proxmox.com Git - mirror_qemu.git/blame - block.c
block: add bdrv_set_enable_write_cache
[mirror_qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
f795e743 30#include "qjson.h"
68485420 31#include "qemu-coroutine.h"
b2023818 32#include "qmp-commands.h"
0563e191 33#include "qemu-timer.h"
fc01f7e7 34
71e72a19 35#ifdef CONFIG_BSD
7674e7bf
FB
36#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
72cf2d4f 39#include <sys/queue.h>
c5e97233 40#ifndef __DragonFly__
7674e7bf
FB
41#include <sys/disk.h>
42#endif
c5e97233 43#endif
7674e7bf 44
49dc768d
AL
45#ifdef _WIN32
46#include <windows.h>
47#endif
48
1c9805a3
SH
49#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
470c0504
SH
51typedef enum {
52 BDRV_REQ_COPY_ON_READ = 0x1,
f08f2dda 53 BDRV_REQ_ZERO_WRITE = 0x2,
470c0504
SH
54} BdrvRequestFlags;
55
7d4b4ba5 56static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
57static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
58 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 59 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
60static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
61 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 62 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
63static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors,
65 QEMUIOVector *iov);
66static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors,
68 QEMUIOVector *iov);
c5fbe571 69static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
470c0504
SH
70 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
71 BdrvRequestFlags flags);
1c9805a3 72static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
f08f2dda
SH
73 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
74 BdrvRequestFlags flags);
b2a61371
SH
75static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
76 int64_t sector_num,
77 QEMUIOVector *qiov,
78 int nb_sectors,
79 BlockDriverCompletionFunc *cb,
80 void *opaque,
8c5873d6 81 bool is_write);
b2a61371 82static void coroutine_fn bdrv_co_do_rw(void *opaque);
621f0589
KW
83static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
84 int64_t sector_num, int nb_sectors);
ec530c81 85
98f90dba
ZYW
86static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
87 bool is_write, double elapsed_time, uint64_t *wait);
88static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
89 double elapsed_time, uint64_t *wait);
90static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
91 bool is_write, int64_t *wait);
92
1b7bdbc1
SH
93static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
94 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 95
8a22f02a
SH
96static QLIST_HEAD(, BlockDriver) bdrv_drivers =
97 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 98
f9092b10
MA
99/* The device to use for VM snapshots */
100static BlockDriverState *bs_snapshots;
101
eb852011
MA
102/* If non-zero, use only whitelisted block drivers */
103static int use_bdrv_whitelist;
104
9e0b22f4
SH
105#ifdef _WIN32
106static int is_windows_drive_prefix(const char *filename)
107{
108 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
109 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
110 filename[1] == ':');
111}
112
113int is_windows_drive(const char *filename)
114{
115 if (is_windows_drive_prefix(filename) &&
116 filename[2] == '\0')
117 return 1;
118 if (strstart(filename, "\\\\.\\", NULL) ||
119 strstart(filename, "//./", NULL))
120 return 1;
121 return 0;
122}
123#endif
124
0563e191 125/* throttling disk I/O limits */
98f90dba
ZYW
126void bdrv_io_limits_disable(BlockDriverState *bs)
127{
128 bs->io_limits_enabled = false;
129
130 while (qemu_co_queue_next(&bs->throttled_reqs));
131
132 if (bs->block_timer) {
133 qemu_del_timer(bs->block_timer);
134 qemu_free_timer(bs->block_timer);
135 bs->block_timer = NULL;
136 }
137
138 bs->slice_start = 0;
139 bs->slice_end = 0;
140 bs->slice_time = 0;
141 memset(&bs->io_base, 0, sizeof(bs->io_base));
142}
143
0563e191
ZYW
144static void bdrv_block_timer(void *opaque)
145{
146 BlockDriverState *bs = opaque;
147
148 qemu_co_queue_next(&bs->throttled_reqs);
149}
150
151void bdrv_io_limits_enable(BlockDriverState *bs)
152{
153 qemu_co_queue_init(&bs->throttled_reqs);
154 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
155 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
156 bs->slice_start = qemu_get_clock_ns(vm_clock);
157 bs->slice_end = bs->slice_start + bs->slice_time;
158 memset(&bs->io_base, 0, sizeof(bs->io_base));
159 bs->io_limits_enabled = true;
160}
161
162bool bdrv_io_limits_enabled(BlockDriverState *bs)
163{
164 BlockIOLimit *io_limits = &bs->io_limits;
165 return io_limits->bps[BLOCK_IO_LIMIT_READ]
166 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
167 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
168 || io_limits->iops[BLOCK_IO_LIMIT_READ]
169 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
170 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
171}
172
98f90dba
ZYW
173static void bdrv_io_limits_intercept(BlockDriverState *bs,
174 bool is_write, int nb_sectors)
175{
176 int64_t wait_time = -1;
177
178 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
179 qemu_co_queue_wait(&bs->throttled_reqs);
180 }
181
182 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
183 * throttled requests will not be dequeued until the current request is
184 * allowed to be serviced. So if the current request still exceeds the
185 * limits, it will be inserted to the head. All requests followed it will
186 * be still in throttled_reqs queue.
187 */
188
189 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
190 qemu_mod_timer(bs->block_timer,
191 wait_time + qemu_get_clock_ns(vm_clock));
192 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
193 }
194
195 qemu_co_queue_next(&bs->throttled_reqs);
196}
197
9e0b22f4
SH
198/* check if the path starts with "<protocol>:" */
199static int path_has_protocol(const char *path)
200{
947995c0
PB
201 const char *p;
202
9e0b22f4
SH
203#ifdef _WIN32
204 if (is_windows_drive(path) ||
205 is_windows_drive_prefix(path)) {
206 return 0;
207 }
947995c0
PB
208 p = path + strcspn(path, ":/\\");
209#else
210 p = path + strcspn(path, ":/");
9e0b22f4
SH
211#endif
212
947995c0 213 return *p == ':';
9e0b22f4
SH
214}
215
83f64091 216int path_is_absolute(const char *path)
3b0d4f61 217{
21664424
FB
218#ifdef _WIN32
219 /* specific case for names like: "\\.\d:" */
f53f4da9 220 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
21664424 221 return 1;
f53f4da9
PB
222 }
223 return (*path == '/' || *path == '\\');
3b9f94e1 224#else
f53f4da9 225 return (*path == '/');
3b9f94e1 226#endif
3b0d4f61
FB
227}
228
83f64091
FB
229/* if filename is absolute, just copy it to dest. Otherwise, build a
230 path to it by considering it is relative to base_path. URL are
231 supported. */
232void path_combine(char *dest, int dest_size,
233 const char *base_path,
234 const char *filename)
3b0d4f61 235{
83f64091
FB
236 const char *p, *p1;
237 int len;
238
239 if (dest_size <= 0)
240 return;
241 if (path_is_absolute(filename)) {
242 pstrcpy(dest, dest_size, filename);
243 } else {
244 p = strchr(base_path, ':');
245 if (p)
246 p++;
247 else
248 p = base_path;
3b9f94e1
FB
249 p1 = strrchr(base_path, '/');
250#ifdef _WIN32
251 {
252 const char *p2;
253 p2 = strrchr(base_path, '\\');
254 if (!p1 || p2 > p1)
255 p1 = p2;
256 }
257#endif
83f64091
FB
258 if (p1)
259 p1++;
260 else
261 p1 = base_path;
262 if (p1 > p)
263 p = p1;
264 len = p - base_path;
265 if (len > dest_size - 1)
266 len = dest_size - 1;
267 memcpy(dest, base_path, len);
268 dest[len] = '\0';
269 pstrcat(dest, dest_size, filename);
3b0d4f61 270 }
3b0d4f61
FB
271}
272
dc5a1371
PB
273void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
274{
275 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
276 pstrcpy(dest, sz, bs->backing_file);
277 } else {
278 path_combine(dest, sz, bs->filename, bs->backing_file);
279 }
280}
281
5efa9d5a 282void bdrv_register(BlockDriver *bdrv)
ea2384d3 283{
8c5873d6
SH
284 /* Block drivers without coroutine functions need emulation */
285 if (!bdrv->bdrv_co_readv) {
f9f05dc5
KW
286 bdrv->bdrv_co_readv = bdrv_co_readv_em;
287 bdrv->bdrv_co_writev = bdrv_co_writev_em;
288
f8c35c1d
SH
289 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
290 * the block driver lacks aio we need to emulate that too.
291 */
f9f05dc5
KW
292 if (!bdrv->bdrv_aio_readv) {
293 /* add AIO emulation layer */
294 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
295 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
f9f05dc5 296 }
83f64091 297 }
b2e12bc6 298
8a22f02a 299 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 300}
b338082b
FB
301
302/* create a new block device (by default it is empty) */
303BlockDriverState *bdrv_new(const char *device_name)
304{
1b7bdbc1 305 BlockDriverState *bs;
b338082b 306
7267c094 307 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 308 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 309 if (device_name[0] != '\0') {
1b7bdbc1 310 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 311 }
28a7282a 312 bdrv_iostatus_disable(bs);
b338082b
FB
313 return bs;
314}
315
ea2384d3
FB
316BlockDriver *bdrv_find_format(const char *format_name)
317{
318 BlockDriver *drv1;
8a22f02a
SH
319 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
320 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 321 return drv1;
8a22f02a 322 }
ea2384d3
FB
323 }
324 return NULL;
325}
326
eb852011
MA
327static int bdrv_is_whitelisted(BlockDriver *drv)
328{
329 static const char *whitelist[] = {
330 CONFIG_BDRV_WHITELIST
331 };
332 const char **p;
333
334 if (!whitelist[0])
335 return 1; /* no whitelist, anything goes */
336
337 for (p = whitelist; *p; p++) {
338 if (!strcmp(drv->format_name, *p)) {
339 return 1;
340 }
341 }
342 return 0;
343}
344
345BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
346{
347 BlockDriver *drv = bdrv_find_format(format_name);
348 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
349}
350
5b7e1542
ZYW
351typedef struct CreateCo {
352 BlockDriver *drv;
353 char *filename;
354 QEMUOptionParameter *options;
355 int ret;
356} CreateCo;
357
358static void coroutine_fn bdrv_create_co_entry(void *opaque)
359{
360 CreateCo *cco = opaque;
361 assert(cco->drv);
362
363 cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
364}
365
0e7e1989
KW
366int bdrv_create(BlockDriver *drv, const char* filename,
367 QEMUOptionParameter *options)
ea2384d3 368{
5b7e1542
ZYW
369 int ret;
370
371 Coroutine *co;
372 CreateCo cco = {
373 .drv = drv,
374 .filename = g_strdup(filename),
375 .options = options,
376 .ret = NOT_DONE,
377 };
378
379 if (!drv->bdrv_create) {
ea2384d3 380 return -ENOTSUP;
5b7e1542
ZYW
381 }
382
383 if (qemu_in_coroutine()) {
384 /* Fast-path if already in coroutine context */
385 bdrv_create_co_entry(&cco);
386 } else {
387 co = qemu_coroutine_create(bdrv_create_co_entry);
388 qemu_coroutine_enter(co, &cco);
389 while (cco.ret == NOT_DONE) {
390 qemu_aio_wait();
391 }
392 }
393
394 ret = cco.ret;
395 g_free(cco.filename);
0e7e1989 396
5b7e1542 397 return ret;
ea2384d3
FB
398}
399
84a12e66
CH
400int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
401{
402 BlockDriver *drv;
403
b50cbabc 404 drv = bdrv_find_protocol(filename);
84a12e66 405 if (drv == NULL) {
16905d71 406 return -ENOENT;
84a12e66
CH
407 }
408
409 return bdrv_create(drv, filename, options);
410}
411
eba25057
JM
412/*
413 * Create a uniquely-named empty temporary file.
414 * Return 0 upon success, otherwise a negative errno value.
415 */
416int get_tmp_filename(char *filename, int size)
d5249393 417{
eba25057 418#ifdef _WIN32
3b9f94e1 419 char temp_dir[MAX_PATH];
eba25057
JM
420 /* GetTempFileName requires that its output buffer (4th param)
421 have length MAX_PATH or greater. */
422 assert(size >= MAX_PATH);
423 return (GetTempPath(MAX_PATH, temp_dir)
424 && GetTempFileName(temp_dir, "qem", 0, filename)
425 ? 0 : -GetLastError());
d5249393 426#else
67b915a5 427 int fd;
7ccfb2eb 428 const char *tmpdir;
0badc1ee
AJ
429 tmpdir = getenv("TMPDIR");
430 if (!tmpdir)
431 tmpdir = "/tmp";
eba25057
JM
432 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
433 return -EOVERFLOW;
434 }
ea2384d3 435 fd = mkstemp(filename);
eba25057
JM
436 if (fd < 0 || close(fd)) {
437 return -errno;
438 }
439 return 0;
d5249393 440#endif
eba25057 441}
fc01f7e7 442
84a12e66
CH
443/*
444 * Detect host devices. By convention, /dev/cdrom[N] is always
445 * recognized as a host CDROM.
446 */
447static BlockDriver *find_hdev_driver(const char *filename)
448{
449 int score_max = 0, score;
450 BlockDriver *drv = NULL, *d;
451
452 QLIST_FOREACH(d, &bdrv_drivers, list) {
453 if (d->bdrv_probe_device) {
454 score = d->bdrv_probe_device(filename);
455 if (score > score_max) {
456 score_max = score;
457 drv = d;
458 }
459 }
460 }
461
462 return drv;
463}
464
b50cbabc 465BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
466{
467 BlockDriver *drv1;
468 char protocol[128];
1cec71e3 469 int len;
83f64091 470 const char *p;
19cb3738 471
66f82cee
KW
472 /* TODO Drivers without bdrv_file_open must be specified explicitly */
473
39508e7a
CH
474 /*
475 * XXX(hch): we really should not let host device detection
476 * override an explicit protocol specification, but moving this
477 * later breaks access to device names with colons in them.
478 * Thanks to the brain-dead persistent naming schemes on udev-
479 * based Linux systems those actually are quite common.
480 */
481 drv1 = find_hdev_driver(filename);
482 if (drv1) {
483 return drv1;
484 }
485
9e0b22f4 486 if (!path_has_protocol(filename)) {
39508e7a 487 return bdrv_find_format("file");
84a12e66 488 }
9e0b22f4
SH
489 p = strchr(filename, ':');
490 assert(p != NULL);
1cec71e3
AL
491 len = p - filename;
492 if (len > sizeof(protocol) - 1)
493 len = sizeof(protocol) - 1;
494 memcpy(protocol, filename, len);
495 protocol[len] = '\0';
8a22f02a 496 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 497 if (drv1->protocol_name &&
8a22f02a 498 !strcmp(drv1->protocol_name, protocol)) {
83f64091 499 return drv1;
8a22f02a 500 }
83f64091
FB
501 }
502 return NULL;
503}
504
c98ac35d 505static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
506{
507 int ret, score, score_max;
508 BlockDriver *drv1, *drv;
509 uint8_t buf[2048];
510 BlockDriverState *bs;
511
f5edb014 512 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
513 if (ret < 0) {
514 *pdrv = NULL;
515 return ret;
516 }
f8ea0b00 517
08a00559
KW
518 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
519 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 520 bdrv_delete(bs);
c98ac35d
SW
521 drv = bdrv_find_format("raw");
522 if (!drv) {
523 ret = -ENOENT;
524 }
525 *pdrv = drv;
526 return ret;
1a396859 527 }
f8ea0b00 528
83f64091
FB
529 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
530 bdrv_delete(bs);
531 if (ret < 0) {
c98ac35d
SW
532 *pdrv = NULL;
533 return ret;
83f64091
FB
534 }
535
ea2384d3 536 score_max = 0;
84a12e66 537 drv = NULL;
8a22f02a 538 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
539 if (drv1->bdrv_probe) {
540 score = drv1->bdrv_probe(buf, ret, filename);
541 if (score > score_max) {
542 score_max = score;
543 drv = drv1;
544 }
0849bf08 545 }
fc01f7e7 546 }
c98ac35d
SW
547 if (!drv) {
548 ret = -ENOENT;
549 }
550 *pdrv = drv;
551 return ret;
ea2384d3
FB
552}
553
51762288
SH
554/**
555 * Set the current 'total_sectors' value
556 */
557static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
558{
559 BlockDriver *drv = bs->drv;
560
396759ad
NB
561 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
562 if (bs->sg)
563 return 0;
564
51762288
SH
565 /* query actual device if possible, otherwise just trust the hint */
566 if (drv->bdrv_getlength) {
567 int64_t length = drv->bdrv_getlength(bs);
568 if (length < 0) {
569 return length;
570 }
571 hint = length >> BDRV_SECTOR_BITS;
572 }
573
574 bs->total_sectors = hint;
575 return 0;
576}
577
c3993cdc
SH
578/**
579 * Set open flags for a given cache mode
580 *
581 * Return 0 on success, -1 if the cache mode was invalid.
582 */
583int bdrv_parse_cache_flags(const char *mode, int *flags)
584{
585 *flags &= ~BDRV_O_CACHE_MASK;
586
587 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
588 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
589 } else if (!strcmp(mode, "directsync")) {
590 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
591 } else if (!strcmp(mode, "writeback")) {
592 *flags |= BDRV_O_CACHE_WB;
593 } else if (!strcmp(mode, "unsafe")) {
594 *flags |= BDRV_O_CACHE_WB;
595 *flags |= BDRV_O_NO_FLUSH;
596 } else if (!strcmp(mode, "writethrough")) {
597 /* this is the default */
598 } else {
599 return -1;
600 }
601
602 return 0;
603}
604
53fec9d3
SH
605/**
606 * The copy-on-read flag is actually a reference count so multiple users may
607 * use the feature without worrying about clobbering its previous state.
608 * Copy-on-read stays enabled until all users have called to disable it.
609 */
610void bdrv_enable_copy_on_read(BlockDriverState *bs)
611{
612 bs->copy_on_read++;
613}
614
615void bdrv_disable_copy_on_read(BlockDriverState *bs)
616{
617 assert(bs->copy_on_read > 0);
618 bs->copy_on_read--;
619}
620
57915332
KW
621/*
622 * Common part for opening disk images and files
623 */
624static int bdrv_open_common(BlockDriverState *bs, const char *filename,
625 int flags, BlockDriver *drv)
626{
627 int ret, open_flags;
628
629 assert(drv != NULL);
6405875c 630 assert(bs->file == NULL);
57915332 631
28dcee10
SH
632 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
633
57915332 634 bs->open_flags = flags;
57915332
KW
635 bs->buffer_alignment = 512;
636
53fec9d3
SH
637 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
638 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
639 bdrv_enable_copy_on_read(bs);
640 }
641
57915332
KW
642 pstrcpy(bs->filename, sizeof(bs->filename), filename);
643
644 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
645 return -ENOTSUP;
646 }
647
648 bs->drv = drv;
7267c094 649 bs->opaque = g_malloc0(drv->instance_size);
57915332 650
03f541bd 651 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
57915332
KW
652
653 /*
654 * Clear flags that are internal to the block layer before opening the
655 * image.
656 */
657 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
658
659 /*
ebabb67a 660 * Snapshots should be writable.
57915332
KW
661 */
662 if (bs->is_temporary) {
663 open_flags |= BDRV_O_RDWR;
664 }
665
e7c63796
SH
666 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
667
66f82cee
KW
668 /* Open the image, either directly or using a protocol */
669 if (drv->bdrv_file_open) {
670 ret = drv->bdrv_file_open(bs, filename, open_flags);
671 } else {
672 ret = bdrv_file_open(&bs->file, filename, open_flags);
673 if (ret >= 0) {
674 ret = drv->bdrv_open(bs, open_flags);
675 }
676 }
677
57915332
KW
678 if (ret < 0) {
679 goto free_and_fail;
680 }
681
51762288
SH
682 ret = refresh_total_sectors(bs, bs->total_sectors);
683 if (ret < 0) {
684 goto free_and_fail;
57915332 685 }
51762288 686
57915332
KW
687#ifndef _WIN32
688 if (bs->is_temporary) {
689 unlink(filename);
690 }
691#endif
692 return 0;
693
694free_and_fail:
66f82cee
KW
695 if (bs->file) {
696 bdrv_delete(bs->file);
697 bs->file = NULL;
698 }
7267c094 699 g_free(bs->opaque);
57915332
KW
700 bs->opaque = NULL;
701 bs->drv = NULL;
702 return ret;
703}
704
b6ce07aa
KW
705/*
706 * Opens a file using a protocol (file, host_device, nbd, ...)
707 */
83f64091 708int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 709{
83f64091 710 BlockDriverState *bs;
6db95603 711 BlockDriver *drv;
83f64091
FB
712 int ret;
713
b50cbabc 714 drv = bdrv_find_protocol(filename);
6db95603
CH
715 if (!drv) {
716 return -ENOENT;
717 }
718
83f64091 719 bs = bdrv_new("");
b6ce07aa 720 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
721 if (ret < 0) {
722 bdrv_delete(bs);
723 return ret;
3b0d4f61 724 }
71d0770c 725 bs->growable = 1;
83f64091
FB
726 *pbs = bs;
727 return 0;
728}
729
b6ce07aa
KW
730/*
731 * Opens a disk image (raw, qcow2, vmdk, ...)
732 */
d6e9098e
KW
733int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
734 BlockDriver *drv)
ea2384d3 735{
b6ce07aa 736 int ret;
2b572816 737 char tmp_filename[PATH_MAX];
712e7874 738
83f64091 739 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
740 BlockDriverState *bs1;
741 int64_t total_size;
7c96d46e 742 int is_protocol = 0;
91a073a9
KW
743 BlockDriver *bdrv_qcow2;
744 QEMUOptionParameter *options;
b6ce07aa 745 char backing_filename[PATH_MAX];
3b46e624 746
ea2384d3
FB
747 /* if snapshot, we create a temporary backing file and open it
748 instead of opening 'filename' directly */
33e3963e 749
ea2384d3
FB
750 /* if there is a backing file, use it */
751 bs1 = bdrv_new("");
d6e9098e 752 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 753 if (ret < 0) {
ea2384d3 754 bdrv_delete(bs1);
51d7c00c 755 return ret;
ea2384d3 756 }
3e82990b 757 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
758
759 if (bs1->drv && bs1->drv->protocol_name)
760 is_protocol = 1;
761
ea2384d3 762 bdrv_delete(bs1);
3b46e624 763
eba25057
JM
764 ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
765 if (ret < 0) {
766 return ret;
767 }
7c96d46e
AL
768
769 /* Real path is meaningless for protocols */
770 if (is_protocol)
771 snprintf(backing_filename, sizeof(backing_filename),
772 "%s", filename);
114cdfa9
KS
773 else if (!realpath(filename, backing_filename))
774 return -errno;
7c96d46e 775
91a073a9
KW
776 bdrv_qcow2 = bdrv_find_format("qcow2");
777 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
778
3e82990b 779 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
780 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
781 if (drv) {
782 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
783 drv->format_name);
784 }
785
786 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 787 free_option_parameters(options);
51d7c00c
AL
788 if (ret < 0) {
789 return ret;
ea2384d3 790 }
91a073a9 791
ea2384d3 792 filename = tmp_filename;
91a073a9 793 drv = bdrv_qcow2;
ea2384d3
FB
794 bs->is_temporary = 1;
795 }
712e7874 796
b6ce07aa 797 /* Find the right image format driver */
6db95603 798 if (!drv) {
c98ac35d 799 ret = find_image_format(filename, &drv);
51d7c00c 800 }
6987307c 801
51d7c00c 802 if (!drv) {
51d7c00c 803 goto unlink_and_fail;
ea2384d3 804 }
b6ce07aa
KW
805
806 /* Open the image */
807 ret = bdrv_open_common(bs, filename, flags, drv);
808 if (ret < 0) {
6987307c
CH
809 goto unlink_and_fail;
810 }
811
b6ce07aa
KW
812 /* If there is a backing file, use it */
813 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
814 char backing_filename[PATH_MAX];
815 int back_flags;
816 BlockDriver *back_drv = NULL;
817
818 bs->backing_hd = bdrv_new("");
dc5a1371
PB
819 bdrv_get_full_backing_filename(bs, backing_filename,
820 sizeof(backing_filename));
df2dbb4a
SH
821
822 if (bs->backing_format[0] != '\0') {
b6ce07aa 823 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 824 }
b6ce07aa
KW
825
826 /* backing files always opened read-only */
827 back_flags =
828 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
829
830 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
831 if (ret < 0) {
832 bdrv_close(bs);
833 return ret;
834 }
835 if (bs->is_temporary) {
836 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
837 } else {
838 /* base image inherits from "parent" */
839 bs->backing_hd->keep_read_only = bs->keep_read_only;
840 }
841 }
842
843 if (!bdrv_key_required(bs)) {
7d4b4ba5 844 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
845 }
846
98f90dba
ZYW
847 /* throttling disk I/O limits */
848 if (bs->io_limits_enabled) {
849 bdrv_io_limits_enable(bs);
850 }
851
b6ce07aa
KW
852 return 0;
853
854unlink_and_fail:
855 if (bs->is_temporary) {
856 unlink(filename);
857 }
858 return ret;
859}
860
fc01f7e7
FB
861void bdrv_close(BlockDriverState *bs)
862{
80ccf93b 863 bdrv_flush(bs);
19cb3738 864 if (bs->drv) {
3e914655
PB
865 if (bs->job) {
866 block_job_cancel_sync(bs->job);
867 }
7094f12f
KW
868 bdrv_drain_all();
869
f9092b10
MA
870 if (bs == bs_snapshots) {
871 bs_snapshots = NULL;
872 }
557df6ac 873 if (bs->backing_hd) {
ea2384d3 874 bdrv_delete(bs->backing_hd);
557df6ac
SH
875 bs->backing_hd = NULL;
876 }
ea2384d3 877 bs->drv->bdrv_close(bs);
7267c094 878 g_free(bs->opaque);
ea2384d3
FB
879#ifdef _WIN32
880 if (bs->is_temporary) {
881 unlink(bs->filename);
882 }
67b915a5 883#endif
ea2384d3
FB
884 bs->opaque = NULL;
885 bs->drv = NULL;
53fec9d3 886 bs->copy_on_read = 0;
a275fa42
PB
887 bs->backing_file[0] = '\0';
888 bs->backing_format[0] = '\0';
6405875c
PB
889 bs->total_sectors = 0;
890 bs->encrypted = 0;
891 bs->valid_key = 0;
892 bs->sg = 0;
893 bs->growable = 0;
b338082b 894
66f82cee 895 if (bs->file != NULL) {
0ac9377d
PB
896 bdrv_delete(bs->file);
897 bs->file = NULL;
66f82cee
KW
898 }
899
7d4b4ba5 900 bdrv_dev_change_media_cb(bs, false);
b338082b 901 }
98f90dba
ZYW
902
903 /*throttling disk I/O limits*/
904 if (bs->io_limits_enabled) {
905 bdrv_io_limits_disable(bs);
906 }
b338082b
FB
907}
908
2bc93fed
MK
909void bdrv_close_all(void)
910{
911 BlockDriverState *bs;
912
913 QTAILQ_FOREACH(bs, &bdrv_states, list) {
914 bdrv_close(bs);
915 }
916}
917
922453bc
SH
918/*
919 * Wait for pending requests to complete across all BlockDriverStates
920 *
921 * This function does not flush data to disk, use bdrv_flush_all() for that
922 * after calling this function.
4c355d53
ZYW
923 *
924 * Note that completion of an asynchronous I/O operation can trigger any
925 * number of other I/O operations on other devices---for example a coroutine
926 * can be arbitrarily complex and a constant flow of I/O can come until the
927 * coroutine is complete. Because of this, it is not possible to have a
928 * function to drain a single device's I/O queue.
922453bc
SH
929 */
930void bdrv_drain_all(void)
931{
932 BlockDriverState *bs;
4c355d53
ZYW
933 bool busy;
934
935 do {
936 busy = qemu_aio_wait();
922453bc 937
4c355d53
ZYW
938 /* FIXME: We do not have timer support here, so this is effectively
939 * a busy wait.
940 */
941 QTAILQ_FOREACH(bs, &bdrv_states, list) {
942 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
943 qemu_co_queue_restart_all(&bs->throttled_reqs);
944 busy = true;
945 }
946 }
947 } while (busy);
922453bc
SH
948
949 /* If requests are still pending there is a bug somewhere */
950 QTAILQ_FOREACH(bs, &bdrv_states, list) {
951 assert(QLIST_EMPTY(&bs->tracked_requests));
952 assert(qemu_co_queue_empty(&bs->throttled_reqs));
953 }
954}
955
d22b2f41
RH
956/* make a BlockDriverState anonymous by removing from bdrv_state list.
957 Also, NULL terminate the device_name to prevent double remove */
958void bdrv_make_anon(BlockDriverState *bs)
959{
960 if (bs->device_name[0] != '\0') {
961 QTAILQ_REMOVE(&bdrv_states, bs, list);
962 }
963 bs->device_name[0] = '\0';
964}
965
e023b2e2
PB
966static void bdrv_rebind(BlockDriverState *bs)
967{
968 if (bs->drv && bs->drv->bdrv_rebind) {
969 bs->drv->bdrv_rebind(bs);
970 }
971}
972
8802d1fd
JC
973/*
974 * Add new bs contents at the top of an image chain while the chain is
975 * live, while keeping required fields on the top layer.
976 *
977 * This will modify the BlockDriverState fields, and swap contents
978 * between bs_new and bs_top. Both bs_new and bs_top are modified.
979 *
f6801b83
JC
980 * bs_new is required to be anonymous.
981 *
8802d1fd
JC
982 * This function does not create any image files.
983 */
984void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
985{
986 BlockDriverState tmp;
987
f6801b83
JC
988 /* bs_new must be anonymous */
989 assert(bs_new->device_name[0] == '\0');
8802d1fd
JC
990
991 tmp = *bs_new;
992
993 /* there are some fields that need to stay on the top layer: */
3a389e79 994 tmp.open_flags = bs_top->open_flags;
8802d1fd
JC
995
996 /* dev info */
997 tmp.dev_ops = bs_top->dev_ops;
998 tmp.dev_opaque = bs_top->dev_opaque;
999 tmp.dev = bs_top->dev;
1000 tmp.buffer_alignment = bs_top->buffer_alignment;
1001 tmp.copy_on_read = bs_top->copy_on_read;
1002
c4a248a1
PB
1003 tmp.enable_write_cache = bs_top->enable_write_cache;
1004
8802d1fd
JC
1005 /* i/o timing parameters */
1006 tmp.slice_time = bs_top->slice_time;
1007 tmp.slice_start = bs_top->slice_start;
1008 tmp.slice_end = bs_top->slice_end;
1009 tmp.io_limits = bs_top->io_limits;
1010 tmp.io_base = bs_top->io_base;
1011 tmp.throttled_reqs = bs_top->throttled_reqs;
1012 tmp.block_timer = bs_top->block_timer;
1013 tmp.io_limits_enabled = bs_top->io_limits_enabled;
1014
1015 /* geometry */
1016 tmp.cyls = bs_top->cyls;
1017 tmp.heads = bs_top->heads;
1018 tmp.secs = bs_top->secs;
1019 tmp.translation = bs_top->translation;
1020
1021 /* r/w error */
1022 tmp.on_read_error = bs_top->on_read_error;
1023 tmp.on_write_error = bs_top->on_write_error;
1024
1025 /* i/o status */
1026 tmp.iostatus_enabled = bs_top->iostatus_enabled;
1027 tmp.iostatus = bs_top->iostatus;
1028
1029 /* keep the same entry in bdrv_states */
1030 pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top->device_name);
1031 tmp.list = bs_top->list;
1032
1033 /* The contents of 'tmp' will become bs_top, as we are
1034 * swapping bs_new and bs_top contents. */
1035 tmp.backing_hd = bs_new;
1036 pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
f6801b83 1037 bdrv_get_format(bs_top, tmp.backing_format, sizeof(tmp.backing_format));
8802d1fd
JC
1038
1039 /* swap contents of the fixed new bs and the current top */
1040 *bs_new = *bs_top;
1041 *bs_top = tmp;
1042
f6801b83
JC
1043 /* device_name[] was carried over from the old bs_top. bs_new
1044 * shouldn't be in bdrv_states, so we need to make device_name[]
1045 * reflect the anonymity of bs_new
1046 */
1047 bs_new->device_name[0] = '\0';
1048
8802d1fd
JC
1049 /* clear the copied fields in the new backing file */
1050 bdrv_detach_dev(bs_new, bs_new->dev);
1051
1052 qemu_co_queue_init(&bs_new->throttled_reqs);
1053 memset(&bs_new->io_base, 0, sizeof(bs_new->io_base));
1054 memset(&bs_new->io_limits, 0, sizeof(bs_new->io_limits));
1055 bdrv_iostatus_disable(bs_new);
1056
1057 /* we don't use bdrv_io_limits_disable() for this, because we don't want
1058 * to affect or delete the block_timer, as it has been moved to bs_top */
1059 bs_new->io_limits_enabled = false;
1060 bs_new->block_timer = NULL;
1061 bs_new->slice_time = 0;
1062 bs_new->slice_start = 0;
1063 bs_new->slice_end = 0;
e023b2e2
PB
1064
1065 bdrv_rebind(bs_new);
1066 bdrv_rebind(bs_top);
8802d1fd
JC
1067}
1068
b338082b
FB
1069void bdrv_delete(BlockDriverState *bs)
1070{
fa879d62 1071 assert(!bs->dev);
3e914655
PB
1072 assert(!bs->job);
1073 assert(!bs->in_use);
18846dee 1074
1b7bdbc1 1075 /* remove from list, if necessary */
d22b2f41 1076 bdrv_make_anon(bs);
34c6f050 1077
b338082b 1078 bdrv_close(bs);
66f82cee 1079
f9092b10 1080 assert(bs != bs_snapshots);
7267c094 1081 g_free(bs);
fc01f7e7
FB
1082}
1083
fa879d62
MA
1084int bdrv_attach_dev(BlockDriverState *bs, void *dev)
1085/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 1086{
fa879d62 1087 if (bs->dev) {
18846dee
MA
1088 return -EBUSY;
1089 }
fa879d62 1090 bs->dev = dev;
28a7282a 1091 bdrv_iostatus_reset(bs);
18846dee
MA
1092 return 0;
1093}
1094
fa879d62
MA
1095/* TODO qdevified devices don't use this, remove when devices are qdevified */
1096void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 1097{
fa879d62
MA
1098 if (bdrv_attach_dev(bs, dev) < 0) {
1099 abort();
1100 }
1101}
1102
1103void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1104/* TODO change to DeviceState *dev when all users are qdevified */
1105{
1106 assert(bs->dev == dev);
1107 bs->dev = NULL;
0e49de52
MA
1108 bs->dev_ops = NULL;
1109 bs->dev_opaque = NULL;
29e05f20 1110 bs->buffer_alignment = 512;
18846dee
MA
1111}
1112
fa879d62
MA
1113/* TODO change to return DeviceState * when all users are qdevified */
1114void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 1115{
fa879d62 1116 return bs->dev;
18846dee
MA
1117}
1118
0e49de52
MA
1119void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
1120 void *opaque)
1121{
1122 bs->dev_ops = ops;
1123 bs->dev_opaque = opaque;
2c6942fa
MA
1124 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
1125 bs_snapshots = NULL;
1126 }
0e49de52
MA
1127}
1128
329c0a48
LC
1129void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
1130 BlockQMPEventAction action, int is_read)
1131{
1132 QObject *data;
1133 const char *action_str;
1134
1135 switch (action) {
1136 case BDRV_ACTION_REPORT:
1137 action_str = "report";
1138 break;
1139 case BDRV_ACTION_IGNORE:
1140 action_str = "ignore";
1141 break;
1142 case BDRV_ACTION_STOP:
1143 action_str = "stop";
1144 break;
1145 default:
1146 abort();
1147 }
1148
1149 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1150 bdrv->device_name,
1151 action_str,
1152 is_read ? "read" : "write");
1153 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1154
1155 qobject_decref(data);
1156}
1157
6f382ed2
LC
1158static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
1159{
1160 QObject *data;
1161
1162 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1163 bdrv_get_device_name(bs), ejected);
1164 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
1165
1166 qobject_decref(data);
1167}
1168
7d4b4ba5 1169static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 1170{
145feb17 1171 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
6f382ed2 1172 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
7d4b4ba5 1173 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
6f382ed2
LC
1174 if (tray_was_closed) {
1175 /* tray open */
1176 bdrv_emit_qmp_eject_event(bs, true);
1177 }
1178 if (load) {
1179 /* tray close */
1180 bdrv_emit_qmp_eject_event(bs, false);
1181 }
145feb17
MA
1182 }
1183}
1184
2c6942fa
MA
1185bool bdrv_dev_has_removable_media(BlockDriverState *bs)
1186{
1187 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
1188}
1189
025ccaa7
PB
1190void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
1191{
1192 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
1193 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
1194 }
1195}
1196
e4def80b
MA
1197bool bdrv_dev_is_tray_open(BlockDriverState *bs)
1198{
1199 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
1200 return bs->dev_ops->is_tray_open(bs->dev_opaque);
1201 }
1202 return false;
1203}
1204
145feb17
MA
1205static void bdrv_dev_resize_cb(BlockDriverState *bs)
1206{
1207 if (bs->dev_ops && bs->dev_ops->resize_cb) {
1208 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
1209 }
1210}
1211
f107639a
MA
1212bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
1213{
1214 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
1215 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
1216 }
1217 return false;
1218}
1219
e97fc193
AL
1220/*
1221 * Run consistency checks on an image
1222 *
e076f338 1223 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 1224 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 1225 * check are stored in res.
e97fc193 1226 */
4534ff54 1227int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
e97fc193
AL
1228{
1229 if (bs->drv->bdrv_check == NULL) {
1230 return -ENOTSUP;
1231 }
1232
e076f338 1233 memset(res, 0, sizeof(*res));
4534ff54 1234 return bs->drv->bdrv_check(bs, res, fix);
e97fc193
AL
1235}
1236
8a426614
KW
1237#define COMMIT_BUF_SECTORS 2048
1238
33e3963e
FB
1239/* commit COW file into the raw image */
1240int bdrv_commit(BlockDriverState *bs)
1241{
19cb3738 1242 BlockDriver *drv = bs->drv;
ee181196 1243 BlockDriver *backing_drv;
8a426614
KW
1244 int64_t sector, total_sectors;
1245 int n, ro, open_flags;
4dca4b63 1246 int ret = 0, rw_ret = 0;
8a426614 1247 uint8_t *buf;
4dca4b63
NS
1248 char filename[1024];
1249 BlockDriverState *bs_rw, *bs_ro;
33e3963e 1250
19cb3738
FB
1251 if (!drv)
1252 return -ENOMEDIUM;
4dca4b63
NS
1253
1254 if (!bs->backing_hd) {
1255 return -ENOTSUP;
33e3963e
FB
1256 }
1257
4dca4b63
NS
1258 if (bs->backing_hd->keep_read_only) {
1259 return -EACCES;
1260 }
ee181196 1261
2d3735d3
SH
1262 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1263 return -EBUSY;
1264 }
1265
ee181196 1266 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
1267 ro = bs->backing_hd->read_only;
1268 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1269 open_flags = bs->backing_hd->open_flags;
1270
1271 if (ro) {
1272 /* re-open as RW */
1273 bdrv_delete(bs->backing_hd);
1274 bs->backing_hd = NULL;
1275 bs_rw = bdrv_new("");
ee181196
KW
1276 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1277 backing_drv);
4dca4b63
NS
1278 if (rw_ret < 0) {
1279 bdrv_delete(bs_rw);
1280 /* try to re-open read-only */
1281 bs_ro = bdrv_new("");
ee181196
KW
1282 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1283 backing_drv);
4dca4b63
NS
1284 if (ret < 0) {
1285 bdrv_delete(bs_ro);
1286 /* drive not functional anymore */
1287 bs->drv = NULL;
1288 return ret;
1289 }
1290 bs->backing_hd = bs_ro;
1291 return rw_ret;
1292 }
1293 bs->backing_hd = bs_rw;
ea2384d3 1294 }
33e3963e 1295
6ea44308 1296 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 1297 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
1298
1299 for (sector = 0; sector < total_sectors; sector += n) {
05c4af54 1300 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
8a426614
KW
1301
1302 if (bdrv_read(bs, sector, buf, n) != 0) {
1303 ret = -EIO;
1304 goto ro_cleanup;
1305 }
1306
1307 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1308 ret = -EIO;
1309 goto ro_cleanup;
1310 }
ea2384d3 1311 }
33e3963e 1312 }
95389c86 1313
1d44952f
CH
1314 if (drv->bdrv_make_empty) {
1315 ret = drv->bdrv_make_empty(bs);
1316 bdrv_flush(bs);
1317 }
95389c86 1318
3f5075ae
CH
1319 /*
1320 * Make sure all data we wrote to the backing device is actually
1321 * stable on disk.
1322 */
1323 if (bs->backing_hd)
1324 bdrv_flush(bs->backing_hd);
4dca4b63
NS
1325
1326ro_cleanup:
7267c094 1327 g_free(buf);
4dca4b63
NS
1328
1329 if (ro) {
1330 /* re-open as RO */
1331 bdrv_delete(bs->backing_hd);
1332 bs->backing_hd = NULL;
1333 bs_ro = bdrv_new("");
ee181196
KW
1334 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1335 backing_drv);
4dca4b63
NS
1336 if (ret < 0) {
1337 bdrv_delete(bs_ro);
1338 /* drive not functional anymore */
1339 bs->drv = NULL;
1340 return ret;
1341 }
1342 bs->backing_hd = bs_ro;
1343 bs->backing_hd->keep_read_only = 0;
1344 }
1345
1d44952f 1346 return ret;
33e3963e
FB
1347}
1348
e8877497 1349int bdrv_commit_all(void)
6ab4b5ab
MA
1350{
1351 BlockDriverState *bs;
1352
1353 QTAILQ_FOREACH(bs, &bdrv_states, list) {
e8877497
SH
1354 int ret = bdrv_commit(bs);
1355 if (ret < 0) {
1356 return ret;
1357 }
6ab4b5ab 1358 }
e8877497 1359 return 0;
6ab4b5ab
MA
1360}
1361
dbffbdcf
SH
1362struct BdrvTrackedRequest {
1363 BlockDriverState *bs;
1364 int64_t sector_num;
1365 int nb_sectors;
1366 bool is_write;
1367 QLIST_ENTRY(BdrvTrackedRequest) list;
5f8b6491 1368 Coroutine *co; /* owner, used for deadlock detection */
f4658285 1369 CoQueue wait_queue; /* coroutines blocked on this request */
dbffbdcf
SH
1370};
1371
1372/**
1373 * Remove an active request from the tracked requests list
1374 *
1375 * This function should be called when a tracked request is completing.
1376 */
1377static void tracked_request_end(BdrvTrackedRequest *req)
1378{
1379 QLIST_REMOVE(req, list);
f4658285 1380 qemu_co_queue_restart_all(&req->wait_queue);
dbffbdcf
SH
1381}
1382
1383/**
1384 * Add an active request to the tracked requests list
1385 */
1386static void tracked_request_begin(BdrvTrackedRequest *req,
1387 BlockDriverState *bs,
1388 int64_t sector_num,
1389 int nb_sectors, bool is_write)
1390{
1391 *req = (BdrvTrackedRequest){
1392 .bs = bs,
1393 .sector_num = sector_num,
1394 .nb_sectors = nb_sectors,
1395 .is_write = is_write,
5f8b6491 1396 .co = qemu_coroutine_self(),
dbffbdcf
SH
1397 };
1398
f4658285
SH
1399 qemu_co_queue_init(&req->wait_queue);
1400
dbffbdcf
SH
1401 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1402}
1403
d83947ac
SH
1404/**
1405 * Round a region to cluster boundaries
1406 */
1407static void round_to_clusters(BlockDriverState *bs,
1408 int64_t sector_num, int nb_sectors,
1409 int64_t *cluster_sector_num,
1410 int *cluster_nb_sectors)
1411{
1412 BlockDriverInfo bdi;
1413
1414 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1415 *cluster_sector_num = sector_num;
1416 *cluster_nb_sectors = nb_sectors;
1417 } else {
1418 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1419 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1420 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1421 nb_sectors, c);
1422 }
1423}
1424
f4658285
SH
1425static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1426 int64_t sector_num, int nb_sectors) {
d83947ac
SH
1427 /* aaaa bbbb */
1428 if (sector_num >= req->sector_num + req->nb_sectors) {
1429 return false;
1430 }
1431 /* bbbb aaaa */
1432 if (req->sector_num >= sector_num + nb_sectors) {
1433 return false;
1434 }
1435 return true;
f4658285
SH
1436}
1437
1438static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1439 int64_t sector_num, int nb_sectors)
1440{
1441 BdrvTrackedRequest *req;
d83947ac
SH
1442 int64_t cluster_sector_num;
1443 int cluster_nb_sectors;
f4658285
SH
1444 bool retry;
1445
d83947ac
SH
1446 /* If we touch the same cluster it counts as an overlap. This guarantees
1447 * that allocating writes will be serialized and not race with each other
1448 * for the same cluster. For example, in copy-on-read it ensures that the
1449 * CoR read and write operations are atomic and guest writes cannot
1450 * interleave between them.
1451 */
1452 round_to_clusters(bs, sector_num, nb_sectors,
1453 &cluster_sector_num, &cluster_nb_sectors);
1454
f4658285
SH
1455 do {
1456 retry = false;
1457 QLIST_FOREACH(req, &bs->tracked_requests, list) {
d83947ac
SH
1458 if (tracked_request_overlaps(req, cluster_sector_num,
1459 cluster_nb_sectors)) {
5f8b6491
SH
1460 /* Hitting this means there was a reentrant request, for
1461 * example, a block driver issuing nested requests. This must
1462 * never happen since it means deadlock.
1463 */
1464 assert(qemu_coroutine_self() != req->co);
1465
f4658285
SH
1466 qemu_co_queue_wait(&req->wait_queue);
1467 retry = true;
1468 break;
1469 }
1470 }
1471 } while (retry);
1472}
1473
756e6736
KW
1474/*
1475 * Return values:
1476 * 0 - success
1477 * -EINVAL - backing format specified, but no file
1478 * -ENOSPC - can't update the backing file because no space is left in the
1479 * image file header
1480 * -ENOTSUP - format driver doesn't support changing the backing file
1481 */
1482int bdrv_change_backing_file(BlockDriverState *bs,
1483 const char *backing_file, const char *backing_fmt)
1484{
1485 BlockDriver *drv = bs->drv;
469ef350 1486 int ret;
756e6736 1487
5f377794
PB
1488 /* Backing file format doesn't make sense without a backing file */
1489 if (backing_fmt && !backing_file) {
1490 return -EINVAL;
1491 }
1492
756e6736 1493 if (drv->bdrv_change_backing_file != NULL) {
469ef350 1494 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
756e6736 1495 } else {
469ef350 1496 ret = -ENOTSUP;
756e6736 1497 }
469ef350
PB
1498
1499 if (ret == 0) {
1500 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
1501 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
1502 }
1503 return ret;
756e6736
KW
1504}
1505
71d0770c
AL
1506static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1507 size_t size)
1508{
1509 int64_t len;
1510
1511 if (!bdrv_is_inserted(bs))
1512 return -ENOMEDIUM;
1513
1514 if (bs->growable)
1515 return 0;
1516
1517 len = bdrv_getlength(bs);
1518
fbb7b4e0
KW
1519 if (offset < 0)
1520 return -EIO;
1521
1522 if ((offset > len) || (len - offset < size))
71d0770c
AL
1523 return -EIO;
1524
1525 return 0;
1526}
1527
1528static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1529 int nb_sectors)
1530{
eb5a3165
JS
1531 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1532 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1533}
1534
1c9805a3
SH
1535typedef struct RwCo {
1536 BlockDriverState *bs;
1537 int64_t sector_num;
1538 int nb_sectors;
1539 QEMUIOVector *qiov;
1540 bool is_write;
1541 int ret;
1542} RwCo;
1543
1544static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 1545{
1c9805a3 1546 RwCo *rwco = opaque;
ea2384d3 1547
1c9805a3
SH
1548 if (!rwco->is_write) {
1549 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
470c0504 1550 rwco->nb_sectors, rwco->qiov, 0);
1c9805a3
SH
1551 } else {
1552 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
f08f2dda 1553 rwco->nb_sectors, rwco->qiov, 0);
1c9805a3
SH
1554 }
1555}
e7a8a783 1556
1c9805a3
SH
1557/*
1558 * Process a synchronous request using coroutines
1559 */
1560static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1561 int nb_sectors, bool is_write)
1562{
1563 QEMUIOVector qiov;
1564 struct iovec iov = {
1565 .iov_base = (void *)buf,
1566 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1567 };
1568 Coroutine *co;
1569 RwCo rwco = {
1570 .bs = bs,
1571 .sector_num = sector_num,
1572 .nb_sectors = nb_sectors,
1573 .qiov = &qiov,
1574 .is_write = is_write,
1575 .ret = NOT_DONE,
1576 };
e7a8a783 1577
1c9805a3 1578 qemu_iovec_init_external(&qiov, &iov, 1);
e7a8a783 1579
498e386c
ZYW
1580 /**
1581 * In sync call context, when the vcpu is blocked, this throttling timer
1582 * will not fire; so the I/O throttling function has to be disabled here
1583 * if it has been enabled.
1584 */
1585 if (bs->io_limits_enabled) {
1586 fprintf(stderr, "Disabling I/O throttling on '%s' due "
1587 "to synchronous I/O.\n", bdrv_get_device_name(bs));
1588 bdrv_io_limits_disable(bs);
1589 }
1590
1c9805a3
SH
1591 if (qemu_in_coroutine()) {
1592 /* Fast-path if already in coroutine context */
1593 bdrv_rw_co_entry(&rwco);
1594 } else {
1595 co = qemu_coroutine_create(bdrv_rw_co_entry);
1596 qemu_coroutine_enter(co, &rwco);
1597 while (rwco.ret == NOT_DONE) {
1598 qemu_aio_wait();
1599 }
1600 }
1601 return rwco.ret;
1602}
b338082b 1603
1c9805a3
SH
1604/* return < 0 if error. See bdrv_write() for the return codes */
1605int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1606 uint8_t *buf, int nb_sectors)
1607{
1608 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
fc01f7e7
FB
1609}
1610
71df14fc
PB
1611#define BITS_PER_LONG (sizeof(unsigned long) * 8)
1612
7cd1e32a 1613static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1614 int nb_sectors, int dirty)
7cd1e32a
LS
1615{
1616 int64_t start, end;
c6d22830 1617 unsigned long val, idx, bit;
a55eb92c 1618
6ea44308 1619 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1620 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1621
1622 for (; start <= end; start++) {
71df14fc
PB
1623 idx = start / BITS_PER_LONG;
1624 bit = start % BITS_PER_LONG;
c6d22830
JK
1625 val = bs->dirty_bitmap[idx];
1626 if (dirty) {
6d59fec1 1627 if (!(val & (1UL << bit))) {
aaa0eb75 1628 bs->dirty_count++;
6d59fec1 1629 val |= 1UL << bit;
aaa0eb75 1630 }
c6d22830 1631 } else {
6d59fec1 1632 if (val & (1UL << bit)) {
aaa0eb75 1633 bs->dirty_count--;
6d59fec1 1634 val &= ~(1UL << bit);
aaa0eb75 1635 }
c6d22830
JK
1636 }
1637 bs->dirty_bitmap[idx] = val;
7cd1e32a
LS
1638 }
1639}
1640
5fafdf24 1641/* Return < 0 if error. Important errors are:
19cb3738
FB
1642 -EIO generic I/O error (may happen for all errors)
1643 -ENOMEDIUM No media inserted.
1644 -EINVAL Invalid sector number or nb_sectors
1645 -EACCES Trying to write a read-only device
1646*/
5fafdf24 1647int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1648 const uint8_t *buf, int nb_sectors)
1649{
1c9805a3 1650 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
83f64091
FB
1651}
1652
eda578e5
AL
1653int bdrv_pread(BlockDriverState *bs, int64_t offset,
1654 void *buf, int count1)
83f64091 1655{
6ea44308 1656 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1657 int len, nb_sectors, count;
1658 int64_t sector_num;
9a8c4cce 1659 int ret;
83f64091
FB
1660
1661 count = count1;
1662 /* first read to align to sector start */
6ea44308 1663 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1664 if (len > count)
1665 len = count;
6ea44308 1666 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1667 if (len > 0) {
9a8c4cce
KW
1668 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1669 return ret;
6ea44308 1670 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1671 count -= len;
1672 if (count == 0)
1673 return count1;
1674 sector_num++;
1675 buf += len;
1676 }
1677
1678 /* read the sectors "in place" */
6ea44308 1679 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1680 if (nb_sectors > 0) {
9a8c4cce
KW
1681 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1682 return ret;
83f64091 1683 sector_num += nb_sectors;
6ea44308 1684 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1685 buf += len;
1686 count -= len;
1687 }
1688
1689 /* add data from the last sector */
1690 if (count > 0) {
9a8c4cce
KW
1691 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1692 return ret;
83f64091
FB
1693 memcpy(buf, tmp_buf, count);
1694 }
1695 return count1;
1696}
1697
eda578e5
AL
1698int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1699 const void *buf, int count1)
83f64091 1700{
6ea44308 1701 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1702 int len, nb_sectors, count;
1703 int64_t sector_num;
9a8c4cce 1704 int ret;
83f64091
FB
1705
1706 count = count1;
1707 /* first write to align to sector start */
6ea44308 1708 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1709 if (len > count)
1710 len = count;
6ea44308 1711 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1712 if (len > 0) {
9a8c4cce
KW
1713 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1714 return ret;
6ea44308 1715 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1716 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1717 return ret;
83f64091
FB
1718 count -= len;
1719 if (count == 0)
1720 return count1;
1721 sector_num++;
1722 buf += len;
1723 }
1724
1725 /* write the sectors "in place" */
6ea44308 1726 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1727 if (nb_sectors > 0) {
9a8c4cce
KW
1728 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1729 return ret;
83f64091 1730 sector_num += nb_sectors;
6ea44308 1731 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1732 buf += len;
1733 count -= len;
1734 }
1735
1736 /* add data from the last sector */
1737 if (count > 0) {
9a8c4cce
KW
1738 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1739 return ret;
83f64091 1740 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1741 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1742 return ret;
83f64091
FB
1743 }
1744 return count1;
1745}
83f64091 1746
f08145fe
KW
1747/*
1748 * Writes to the file and ensures that no writes are reordered across this
1749 * request (acts as a barrier)
1750 *
1751 * Returns 0 on success, -errno in error cases.
1752 */
1753int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1754 const void *buf, int count)
1755{
1756 int ret;
1757
1758 ret = bdrv_pwrite(bs, offset, buf, count);
1759 if (ret < 0) {
1760 return ret;
1761 }
1762
f05fa4ad
PB
1763 /* No flush needed for cache modes that already do it */
1764 if (bs->enable_write_cache) {
f08145fe
KW
1765 bdrv_flush(bs);
1766 }
1767
1768 return 0;
1769}
1770
470c0504 1771static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
ab185921
SH
1772 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1773{
1774 /* Perform I/O through a temporary buffer so that users who scribble over
1775 * their read buffer while the operation is in progress do not end up
1776 * modifying the image file. This is critical for zero-copy guest I/O
1777 * where anything might happen inside guest memory.
1778 */
1779 void *bounce_buffer;
1780
79c053bd 1781 BlockDriver *drv = bs->drv;
ab185921
SH
1782 struct iovec iov;
1783 QEMUIOVector bounce_qiov;
1784 int64_t cluster_sector_num;
1785 int cluster_nb_sectors;
1786 size_t skip_bytes;
1787 int ret;
1788
1789 /* Cover entire cluster so no additional backing file I/O is required when
1790 * allocating cluster in the image file.
1791 */
1792 round_to_clusters(bs, sector_num, nb_sectors,
1793 &cluster_sector_num, &cluster_nb_sectors);
1794
470c0504
SH
1795 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
1796 cluster_sector_num, cluster_nb_sectors);
ab185921
SH
1797
1798 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1799 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1800 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1801
79c053bd
SH
1802 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1803 &bounce_qiov);
ab185921
SH
1804 if (ret < 0) {
1805 goto err;
1806 }
1807
79c053bd
SH
1808 if (drv->bdrv_co_write_zeroes &&
1809 buffer_is_zero(bounce_buffer, iov.iov_len)) {
621f0589
KW
1810 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
1811 cluster_nb_sectors);
79c053bd 1812 } else {
f05fa4ad
PB
1813 /* This does not change the data on the disk, it is not necessary
1814 * to flush even in cache=writethrough mode.
1815 */
79c053bd 1816 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
ab185921 1817 &bounce_qiov);
79c053bd
SH
1818 }
1819
ab185921
SH
1820 if (ret < 0) {
1821 /* It might be okay to ignore write errors for guest requests. If this
1822 * is a deliberate copy-on-read then we don't want to ignore the error.
1823 * Simply report it in all cases.
1824 */
1825 goto err;
1826 }
1827
1828 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1829 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1830 nb_sectors * BDRV_SECTOR_SIZE);
1831
1832err:
1833 qemu_vfree(bounce_buffer);
1834 return ret;
1835}
1836
c5fbe571
SH
1837/*
1838 * Handle a read request in coroutine context
1839 */
1840static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
470c0504
SH
1841 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1842 BdrvRequestFlags flags)
da1fa91d
KW
1843{
1844 BlockDriver *drv = bs->drv;
dbffbdcf
SH
1845 BdrvTrackedRequest req;
1846 int ret;
da1fa91d 1847
da1fa91d
KW
1848 if (!drv) {
1849 return -ENOMEDIUM;
1850 }
1851 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1852 return -EIO;
1853 }
1854
98f90dba
ZYW
1855 /* throttling disk read I/O */
1856 if (bs->io_limits_enabled) {
1857 bdrv_io_limits_intercept(bs, false, nb_sectors);
1858 }
1859
f4658285 1860 if (bs->copy_on_read) {
470c0504
SH
1861 flags |= BDRV_REQ_COPY_ON_READ;
1862 }
1863 if (flags & BDRV_REQ_COPY_ON_READ) {
1864 bs->copy_on_read_in_flight++;
1865 }
1866
1867 if (bs->copy_on_read_in_flight) {
f4658285
SH
1868 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1869 }
1870
dbffbdcf 1871 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
ab185921 1872
470c0504 1873 if (flags & BDRV_REQ_COPY_ON_READ) {
ab185921
SH
1874 int pnum;
1875
1876 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1877 if (ret < 0) {
1878 goto out;
1879 }
1880
1881 if (!ret || pnum != nb_sectors) {
470c0504 1882 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
ab185921
SH
1883 goto out;
1884 }
1885 }
1886
dbffbdcf 1887 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
ab185921
SH
1888
1889out:
dbffbdcf 1890 tracked_request_end(&req);
470c0504
SH
1891
1892 if (flags & BDRV_REQ_COPY_ON_READ) {
1893 bs->copy_on_read_in_flight--;
1894 }
1895
dbffbdcf 1896 return ret;
da1fa91d
KW
1897}
1898
c5fbe571 1899int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
1900 int nb_sectors, QEMUIOVector *qiov)
1901{
c5fbe571 1902 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 1903
470c0504
SH
1904 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
1905}
1906
1907int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1908 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1909{
1910 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
1911
1912 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
1913 BDRV_REQ_COPY_ON_READ);
c5fbe571
SH
1914}
1915
f08f2dda
SH
1916static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
1917 int64_t sector_num, int nb_sectors)
1918{
1919 BlockDriver *drv = bs->drv;
1920 QEMUIOVector qiov;
1921 struct iovec iov;
1922 int ret;
1923
621f0589
KW
1924 /* TODO Emulate only part of misaligned requests instead of letting block
1925 * drivers return -ENOTSUP and emulate everything */
1926
f08f2dda
SH
1927 /* First try the efficient write zeroes operation */
1928 if (drv->bdrv_co_write_zeroes) {
621f0589
KW
1929 ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1930 if (ret != -ENOTSUP) {
1931 return ret;
1932 }
f08f2dda
SH
1933 }
1934
1935 /* Fall back to bounce buffer if write zeroes is unsupported */
1936 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
1937 iov.iov_base = qemu_blockalign(bs, iov.iov_len);
1938 memset(iov.iov_base, 0, iov.iov_len);
1939 qemu_iovec_init_external(&qiov, &iov, 1);
1940
1941 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1942
1943 qemu_vfree(iov.iov_base);
1944 return ret;
1945}
1946
c5fbe571
SH
1947/*
1948 * Handle a write request in coroutine context
1949 */
1950static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
f08f2dda
SH
1951 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1952 BdrvRequestFlags flags)
c5fbe571
SH
1953{
1954 BlockDriver *drv = bs->drv;
dbffbdcf 1955 BdrvTrackedRequest req;
6b7cb247 1956 int ret;
da1fa91d
KW
1957
1958 if (!bs->drv) {
1959 return -ENOMEDIUM;
1960 }
1961 if (bs->read_only) {
1962 return -EACCES;
1963 }
1964 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1965 return -EIO;
1966 }
1967
98f90dba
ZYW
1968 /* throttling disk write I/O */
1969 if (bs->io_limits_enabled) {
1970 bdrv_io_limits_intercept(bs, true, nb_sectors);
1971 }
1972
470c0504 1973 if (bs->copy_on_read_in_flight) {
f4658285
SH
1974 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1975 }
1976
dbffbdcf
SH
1977 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1978
f08f2dda
SH
1979 if (flags & BDRV_REQ_ZERO_WRITE) {
1980 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
1981 } else {
1982 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1983 }
6b7cb247 1984
f05fa4ad
PB
1985 if (ret == 0 && !bs->enable_write_cache) {
1986 ret = bdrv_co_flush(bs);
1987 }
1988
da1fa91d
KW
1989 if (bs->dirty_bitmap) {
1990 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1991 }
1992
1993 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1994 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1995 }
1996
dbffbdcf
SH
1997 tracked_request_end(&req);
1998
6b7cb247 1999 return ret;
da1fa91d
KW
2000}
2001
c5fbe571
SH
2002int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
2003 int nb_sectors, QEMUIOVector *qiov)
2004{
2005 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
2006
f08f2dda
SH
2007 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
2008}
2009
2010int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
2011 int64_t sector_num, int nb_sectors)
2012{
2013 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
2014
2015 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
2016 BDRV_REQ_ZERO_WRITE);
c5fbe571
SH
2017}
2018
83f64091
FB
2019/**
2020 * Truncate file to 'offset' bytes (needed only for file protocols)
2021 */
2022int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2023{
2024 BlockDriver *drv = bs->drv;
51762288 2025 int ret;
83f64091 2026 if (!drv)
19cb3738 2027 return -ENOMEDIUM;
83f64091
FB
2028 if (!drv->bdrv_truncate)
2029 return -ENOTSUP;
59f2689d
NS
2030 if (bs->read_only)
2031 return -EACCES;
8591675f
MT
2032 if (bdrv_in_use(bs))
2033 return -EBUSY;
51762288
SH
2034 ret = drv->bdrv_truncate(bs, offset);
2035 if (ret == 0) {
2036 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 2037 bdrv_dev_resize_cb(bs);
51762288
SH
2038 }
2039 return ret;
83f64091
FB
2040}
2041
4a1d5e1f
FZ
2042/**
2043 * Length of a allocated file in bytes. Sparse files are counted by actual
2044 * allocated space. Return < 0 if error or unknown.
2045 */
2046int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2047{
2048 BlockDriver *drv = bs->drv;
2049 if (!drv) {
2050 return -ENOMEDIUM;
2051 }
2052 if (drv->bdrv_get_allocated_file_size) {
2053 return drv->bdrv_get_allocated_file_size(bs);
2054 }
2055 if (bs->file) {
2056 return bdrv_get_allocated_file_size(bs->file);
2057 }
2058 return -ENOTSUP;
2059}
2060
83f64091
FB
2061/**
2062 * Length of a file in bytes. Return < 0 if error or unknown.
2063 */
2064int64_t bdrv_getlength(BlockDriverState *bs)
2065{
2066 BlockDriver *drv = bs->drv;
2067 if (!drv)
19cb3738 2068 return -ENOMEDIUM;
51762288 2069
2c6942fa 2070 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
2071 if (drv->bdrv_getlength) {
2072 return drv->bdrv_getlength(bs);
2073 }
83f64091 2074 }
46a4e4e6 2075 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
2076}
2077
19cb3738 2078/* return 0 as number of sectors if no device present or error */
96b8f136 2079void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 2080{
19cb3738
FB
2081 int64_t length;
2082 length = bdrv_getlength(bs);
2083 if (length < 0)
2084 length = 0;
2085 else
6ea44308 2086 length = length >> BDRV_SECTOR_BITS;
19cb3738 2087 *nb_sectors_ptr = length;
fc01f7e7 2088}
cf98951b 2089
f3d54fc4
AL
2090struct partition {
2091 uint8_t boot_ind; /* 0x80 - active */
2092 uint8_t head; /* starting head */
2093 uint8_t sector; /* starting sector */
2094 uint8_t cyl; /* starting cylinder */
2095 uint8_t sys_ind; /* What partition type */
2096 uint8_t end_head; /* end head */
2097 uint8_t end_sector; /* end sector */
2098 uint8_t end_cyl; /* end cylinder */
2099 uint32_t start_sect; /* starting sector counting from 0 */
2100 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 2101} QEMU_PACKED;
f3d54fc4
AL
2102
2103/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
2104static int guess_disk_lchs(BlockDriverState *bs,
2105 int *pcylinders, int *pheads, int *psectors)
2106{
eb5a3165 2107 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
2108 int ret, i, heads, sectors, cylinders;
2109 struct partition *p;
2110 uint32_t nr_sects;
a38131b6 2111 uint64_t nb_sectors;
498e386c 2112 bool enabled;
f3d54fc4
AL
2113
2114 bdrv_get_geometry(bs, &nb_sectors);
2115
498e386c
ZYW
2116 /**
2117 * The function will be invoked during startup not only in sync I/O mode,
2118 * but also in async I/O mode. So the I/O throttling function has to
2119 * be disabled temporarily here, not permanently.
2120 */
2121 enabled = bs->io_limits_enabled;
2122 bs->io_limits_enabled = false;
f3d54fc4 2123 ret = bdrv_read(bs, 0, buf, 1);
498e386c 2124 bs->io_limits_enabled = enabled;
f3d54fc4
AL
2125 if (ret < 0)
2126 return -1;
2127 /* test msdos magic */
2128 if (buf[510] != 0x55 || buf[511] != 0xaa)
2129 return -1;
2130 for(i = 0; i < 4; i++) {
2131 p = ((struct partition *)(buf + 0x1be)) + i;
2132 nr_sects = le32_to_cpu(p->nr_sects);
2133 if (nr_sects && p->end_head) {
2134 /* We make the assumption that the partition terminates on
2135 a cylinder boundary */
2136 heads = p->end_head + 1;
2137 sectors = p->end_sector & 63;
2138 if (sectors == 0)
2139 continue;
2140 cylinders = nb_sectors / (heads * sectors);
2141 if (cylinders < 1 || cylinders > 16383)
2142 continue;
2143 *pheads = heads;
2144 *psectors = sectors;
2145 *pcylinders = cylinders;
2146#if 0
2147 printf("guessed geometry: LCHS=%d %d %d\n",
2148 cylinders, heads, sectors);
2149#endif
2150 return 0;
2151 }
2152 }
2153 return -1;
2154}
2155
2156void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
2157{
2158 int translation, lba_detected = 0;
2159 int cylinders, heads, secs;
a38131b6 2160 uint64_t nb_sectors;
f3d54fc4
AL
2161
2162 /* if a geometry hint is available, use it */
2163 bdrv_get_geometry(bs, &nb_sectors);
2164 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
2165 translation = bdrv_get_translation_hint(bs);
2166 if (cylinders != 0) {
2167 *pcyls = cylinders;
2168 *pheads = heads;
2169 *psecs = secs;
2170 } else {
2171 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
2172 if (heads > 16) {
2173 /* if heads > 16, it means that a BIOS LBA
2174 translation was active, so the default
2175 hardware geometry is OK */
2176 lba_detected = 1;
2177 goto default_geometry;
2178 } else {
2179 *pcyls = cylinders;
2180 *pheads = heads;
2181 *psecs = secs;
2182 /* disable any translation to be in sync with
2183 the logical geometry */
2184 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
2185 bdrv_set_translation_hint(bs,
2186 BIOS_ATA_TRANSLATION_NONE);
2187 }
2188 }
2189 } else {
2190 default_geometry:
2191 /* if no geometry, use a standard physical disk geometry */
2192 cylinders = nb_sectors / (16 * 63);
2193
2194 if (cylinders > 16383)
2195 cylinders = 16383;
2196 else if (cylinders < 2)
2197 cylinders = 2;
2198 *pcyls = cylinders;
2199 *pheads = 16;
2200 *psecs = 63;
2201 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
2202 if ((*pcyls * *pheads) <= 131072) {
2203 bdrv_set_translation_hint(bs,
2204 BIOS_ATA_TRANSLATION_LARGE);
2205 } else {
2206 bdrv_set_translation_hint(bs,
2207 BIOS_ATA_TRANSLATION_LBA);
2208 }
2209 }
2210 }
2211 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
2212 }
2213}
2214
5fafdf24 2215void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
2216 int cyls, int heads, int secs)
2217{
2218 bs->cyls = cyls;
2219 bs->heads = heads;
2220 bs->secs = secs;
2221}
2222
46d4767d
FB
2223void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
2224{
2225 bs->translation = translation;
2226}
2227
5fafdf24 2228void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
2229 int *pcyls, int *pheads, int *psecs)
2230{
2231 *pcyls = bs->cyls;
2232 *pheads = bs->heads;
2233 *psecs = bs->secs;
2234}
2235
0563e191
ZYW
2236/* throttling disk io limits */
2237void bdrv_set_io_limits(BlockDriverState *bs,
2238 BlockIOLimit *io_limits)
2239{
2240 bs->io_limits = *io_limits;
2241 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
2242}
2243
5bbdbb46
BS
2244/* Recognize floppy formats */
2245typedef struct FDFormat {
2246 FDriveType drive;
2247 uint8_t last_sect;
2248 uint8_t max_track;
2249 uint8_t max_head;
f8d3d128 2250 FDriveRate rate;
5bbdbb46
BS
2251} FDFormat;
2252
2253static const FDFormat fd_formats[] = {
2254 /* First entry is default format */
2255 /* 1.44 MB 3"1/2 floppy disks */
f8d3d128
HP
2256 { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
2257 { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
2258 { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
2259 { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
2260 { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
2261 { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
2262 { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
2263 { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
5bbdbb46 2264 /* 2.88 MB 3"1/2 floppy disks */
f8d3d128
HP
2265 { FDRIVE_DRV_288, 36, 80, 1, FDRIVE_RATE_1M, },
2266 { FDRIVE_DRV_288, 39, 80, 1, FDRIVE_RATE_1M, },
2267 { FDRIVE_DRV_288, 40, 80, 1, FDRIVE_RATE_1M, },
2268 { FDRIVE_DRV_288, 44, 80, 1, FDRIVE_RATE_1M, },
2269 { FDRIVE_DRV_288, 48, 80, 1, FDRIVE_RATE_1M, },
5bbdbb46 2270 /* 720 kB 3"1/2 floppy disks */
f8d3d128
HP
2271 { FDRIVE_DRV_144, 9, 80, 1, FDRIVE_RATE_250K, },
2272 { FDRIVE_DRV_144, 10, 80, 1, FDRIVE_RATE_250K, },
2273 { FDRIVE_DRV_144, 10, 82, 1, FDRIVE_RATE_250K, },
2274 { FDRIVE_DRV_144, 10, 83, 1, FDRIVE_RATE_250K, },
2275 { FDRIVE_DRV_144, 13, 80, 1, FDRIVE_RATE_250K, },
2276 { FDRIVE_DRV_144, 14, 80, 1, FDRIVE_RATE_250K, },
5bbdbb46 2277 /* 1.2 MB 5"1/4 floppy disks */
f8d3d128
HP
2278 { FDRIVE_DRV_120, 15, 80, 1, FDRIVE_RATE_500K, },
2279 { FDRIVE_DRV_120, 18, 80, 1, FDRIVE_RATE_500K, },
2280 { FDRIVE_DRV_120, 18, 82, 1, FDRIVE_RATE_500K, },
2281 { FDRIVE_DRV_120, 18, 83, 1, FDRIVE_RATE_500K, },
2282 { FDRIVE_DRV_120, 20, 80, 1, FDRIVE_RATE_500K, },
5bbdbb46 2283 /* 720 kB 5"1/4 floppy disks */
f8d3d128
HP
2284 { FDRIVE_DRV_120, 9, 80, 1, FDRIVE_RATE_250K, },
2285 { FDRIVE_DRV_120, 11, 80, 1, FDRIVE_RATE_250K, },
5bbdbb46 2286 /* 360 kB 5"1/4 floppy disks */
f8d3d128
HP
2287 { FDRIVE_DRV_120, 9, 40, 1, FDRIVE_RATE_300K, },
2288 { FDRIVE_DRV_120, 9, 40, 0, FDRIVE_RATE_300K, },
2289 { FDRIVE_DRV_120, 10, 41, 1, FDRIVE_RATE_300K, },
2290 { FDRIVE_DRV_120, 10, 42, 1, FDRIVE_RATE_300K, },
5bbdbb46 2291 /* 320 kB 5"1/4 floppy disks */
f8d3d128
HP
2292 { FDRIVE_DRV_120, 8, 40, 1, FDRIVE_RATE_250K, },
2293 { FDRIVE_DRV_120, 8, 40, 0, FDRIVE_RATE_250K, },
5bbdbb46 2294 /* 360 kB must match 5"1/4 better than 3"1/2... */
f8d3d128 2295 { FDRIVE_DRV_144, 9, 80, 0, FDRIVE_RATE_250K, },
5bbdbb46 2296 /* end */
f8d3d128 2297 { FDRIVE_DRV_NONE, -1, -1, 0, 0, },
5bbdbb46
BS
2298};
2299
2300void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
2301 int *max_track, int *last_sect,
f8d3d128
HP
2302 FDriveType drive_in, FDriveType *drive,
2303 FDriveRate *rate)
5bbdbb46
BS
2304{
2305 const FDFormat *parse;
2306 uint64_t nb_sectors, size;
2307 int i, first_match, match;
2308
2309 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
2310 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
2311 /* User defined disk */
f8d3d128 2312 *rate = FDRIVE_RATE_500K;
5bbdbb46
BS
2313 } else {
2314 bdrv_get_geometry(bs, &nb_sectors);
2315 match = -1;
2316 first_match = -1;
2317 for (i = 0; ; i++) {
2318 parse = &fd_formats[i];
2319 if (parse->drive == FDRIVE_DRV_NONE) {
2320 break;
2321 }
2322 if (drive_in == parse->drive ||
2323 drive_in == FDRIVE_DRV_NONE) {
2324 size = (parse->max_head + 1) * parse->max_track *
2325 parse->last_sect;
2326 if (nb_sectors == size) {
2327 match = i;
2328 break;
2329 }
2330 if (first_match == -1) {
2331 first_match = i;
2332 }
2333 }
2334 }
2335 if (match == -1) {
2336 if (first_match == -1) {
2337 match = 1;
2338 } else {
2339 match = first_match;
2340 }
2341 parse = &fd_formats[match];
2342 }
2343 *nb_heads = parse->max_head + 1;
2344 *max_track = parse->max_track;
2345 *last_sect = parse->last_sect;
2346 *drive = parse->drive;
f8d3d128 2347 *rate = parse->rate;
5bbdbb46
BS
2348 }
2349}
2350
46d4767d
FB
2351int bdrv_get_translation_hint(BlockDriverState *bs)
2352{
2353 return bs->translation;
2354}
2355
abd7f68d
MA
2356void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
2357 BlockErrorAction on_write_error)
2358{
2359 bs->on_read_error = on_read_error;
2360 bs->on_write_error = on_write_error;
2361}
2362
2363BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
2364{
2365 return is_read ? bs->on_read_error : bs->on_write_error;
2366}
2367
b338082b
FB
2368int bdrv_is_read_only(BlockDriverState *bs)
2369{
2370 return bs->read_only;
2371}
2372
985a03b0
TS
2373int bdrv_is_sg(BlockDriverState *bs)
2374{
2375 return bs->sg;
2376}
2377
e900a7b7
CH
2378int bdrv_enable_write_cache(BlockDriverState *bs)
2379{
2380 return bs->enable_write_cache;
2381}
2382
425b0148
PB
2383void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2384{
2385 bs->enable_write_cache = wce;
2386}
2387
ea2384d3
FB
2388int bdrv_is_encrypted(BlockDriverState *bs)
2389{
2390 if (bs->backing_hd && bs->backing_hd->encrypted)
2391 return 1;
2392 return bs->encrypted;
2393}
2394
c0f4ce77
AL
2395int bdrv_key_required(BlockDriverState *bs)
2396{
2397 BlockDriverState *backing_hd = bs->backing_hd;
2398
2399 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2400 return 1;
2401 return (bs->encrypted && !bs->valid_key);
2402}
2403
ea2384d3
FB
2404int bdrv_set_key(BlockDriverState *bs, const char *key)
2405{
2406 int ret;
2407 if (bs->backing_hd && bs->backing_hd->encrypted) {
2408 ret = bdrv_set_key(bs->backing_hd, key);
2409 if (ret < 0)
2410 return ret;
2411 if (!bs->encrypted)
2412 return 0;
2413 }
fd04a2ae
SH
2414 if (!bs->encrypted) {
2415 return -EINVAL;
2416 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2417 return -ENOMEDIUM;
2418 }
c0f4ce77 2419 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
2420 if (ret < 0) {
2421 bs->valid_key = 0;
2422 } else if (!bs->valid_key) {
2423 bs->valid_key = 1;
2424 /* call the change callback now, we skipped it on open */
7d4b4ba5 2425 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 2426 }
c0f4ce77 2427 return ret;
ea2384d3
FB
2428}
2429
2430void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2431{
19cb3738 2432 if (!bs->drv) {
ea2384d3
FB
2433 buf[0] = '\0';
2434 } else {
2435 pstrcpy(buf, buf_size, bs->drv->format_name);
2436 }
2437}
2438
5fafdf24 2439void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
2440 void *opaque)
2441{
2442 BlockDriver *drv;
2443
8a22f02a 2444 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
2445 it(opaque, drv->format_name);
2446 }
2447}
2448
b338082b
FB
2449BlockDriverState *bdrv_find(const char *name)
2450{
2451 BlockDriverState *bs;
2452
1b7bdbc1
SH
2453 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2454 if (!strcmp(name, bs->device_name)) {
b338082b 2455 return bs;
1b7bdbc1 2456 }
b338082b
FB
2457 }
2458 return NULL;
2459}
2460
2f399b0a
MA
2461BlockDriverState *bdrv_next(BlockDriverState *bs)
2462{
2463 if (!bs) {
2464 return QTAILQ_FIRST(&bdrv_states);
2465 }
2466 return QTAILQ_NEXT(bs, list);
2467}
2468
51de9760 2469void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
2470{
2471 BlockDriverState *bs;
2472
1b7bdbc1 2473 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 2474 it(opaque, bs);
81d0912d
FB
2475 }
2476}
2477
ea2384d3
FB
2478const char *bdrv_get_device_name(BlockDriverState *bs)
2479{
2480 return bs->device_name;
2481}
2482
c8433287
MA
2483int bdrv_get_flags(BlockDriverState *bs)
2484{
2485 return bs->open_flags;
2486}
2487
c6ca28d6
AL
2488void bdrv_flush_all(void)
2489{
2490 BlockDriverState *bs;
2491
1b7bdbc1 2492 QTAILQ_FOREACH(bs, &bdrv_states, list) {
29cdb251 2493 bdrv_flush(bs);
1b7bdbc1 2494 }
c6ca28d6
AL
2495}
2496
f2feebbd
KW
2497int bdrv_has_zero_init(BlockDriverState *bs)
2498{
2499 assert(bs->drv);
2500
336c1c12
KW
2501 if (bs->drv->bdrv_has_zero_init) {
2502 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
2503 }
2504
2505 return 1;
2506}
2507
376ae3f1
SH
2508typedef struct BdrvCoIsAllocatedData {
2509 BlockDriverState *bs;
2510 int64_t sector_num;
2511 int nb_sectors;
2512 int *pnum;
2513 int ret;
2514 bool done;
2515} BdrvCoIsAllocatedData;
2516
f58c7b35
TS
2517/*
2518 * Returns true iff the specified sector is present in the disk image. Drivers
2519 * not implementing the functionality are assumed to not support backing files,
2520 * hence all their sectors are reported as allocated.
2521 *
bd9533e3
SH
2522 * If 'sector_num' is beyond the end of the disk image the return value is 0
2523 * and 'pnum' is set to 0.
2524 *
f58c7b35
TS
2525 * 'pnum' is set to the number of sectors (including and immediately following
2526 * the specified sector) that are known to be in the same
2527 * allocated/unallocated state.
2528 *
bd9533e3
SH
2529 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2530 * beyond the end of the disk image it will be clamped.
f58c7b35 2531 */
060f51c9
SH
2532int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2533 int nb_sectors, int *pnum)
f58c7b35 2534{
bd9533e3
SH
2535 int64_t n;
2536
2537 if (sector_num >= bs->total_sectors) {
2538 *pnum = 0;
2539 return 0;
2540 }
2541
2542 n = bs->total_sectors - sector_num;
2543 if (n < nb_sectors) {
2544 nb_sectors = n;
2545 }
2546
6aebab14 2547 if (!bs->drv->bdrv_co_is_allocated) {
bd9533e3 2548 *pnum = nb_sectors;
f58c7b35
TS
2549 return 1;
2550 }
6aebab14 2551
060f51c9
SH
2552 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2553}
2554
2555/* Coroutine wrapper for bdrv_is_allocated() */
2556static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2557{
2558 BdrvCoIsAllocatedData *data = opaque;
2559 BlockDriverState *bs = data->bs;
2560
2561 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2562 data->pnum);
2563 data->done = true;
2564}
2565
2566/*
2567 * Synchronous wrapper around bdrv_co_is_allocated().
2568 *
2569 * See bdrv_co_is_allocated() for details.
2570 */
2571int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2572 int *pnum)
2573{
6aebab14
SH
2574 Coroutine *co;
2575 BdrvCoIsAllocatedData data = {
2576 .bs = bs,
2577 .sector_num = sector_num,
2578 .nb_sectors = nb_sectors,
2579 .pnum = pnum,
2580 .done = false,
2581 };
2582
2583 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2584 qemu_coroutine_enter(co, &data);
2585 while (!data.done) {
2586 qemu_aio_wait();
2587 }
2588 return data.ret;
f58c7b35
TS
2589}
2590
188a7bbf
PB
2591/*
2592 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
2593 *
2594 * Return true if the given sector is allocated in any image between
2595 * BASE and TOP (inclusive). BASE can be NULL to check if the given
2596 * sector is allocated in any image of the chain. Return false otherwise.
2597 *
2598 * 'pnum' is set to the number of sectors (including and immediately following
2599 * the specified sector) that are known to be in the same
2600 * allocated/unallocated state.
2601 *
2602 */
2603int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
2604 BlockDriverState *base,
2605 int64_t sector_num,
2606 int nb_sectors, int *pnum)
2607{
2608 BlockDriverState *intermediate;
2609 int ret, n = nb_sectors;
2610
2611 intermediate = top;
2612 while (intermediate && intermediate != base) {
2613 int pnum_inter;
2614 ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors,
2615 &pnum_inter);
2616 if (ret < 0) {
2617 return ret;
2618 } else if (ret) {
2619 *pnum = pnum_inter;
2620 return 1;
2621 }
2622
2623 /*
2624 * [sector_num, nb_sectors] is unallocated on top but intermediate
2625 * might have
2626 *
2627 * [sector_num+x, nr_sectors] allocated.
2628 */
2629 if (n > pnum_inter) {
2630 n = pnum_inter;
2631 }
2632
2633 intermediate = intermediate->backing_hd;
2634 }
2635
2636 *pnum = n;
2637 return 0;
2638}
2639
b2023818 2640BlockInfoList *qmp_query_block(Error **errp)
b338082b 2641{
b2023818 2642 BlockInfoList *head = NULL, *cur_item = NULL;
b338082b
FB
2643 BlockDriverState *bs;
2644
1b7bdbc1 2645 QTAILQ_FOREACH(bs, &bdrv_states, list) {
b2023818 2646 BlockInfoList *info = g_malloc0(sizeof(*info));
d15e5465 2647
b2023818
LC
2648 info->value = g_malloc0(sizeof(*info->value));
2649 info->value->device = g_strdup(bs->device_name);
2650 info->value->type = g_strdup("unknown");
2651 info->value->locked = bdrv_dev_is_medium_locked(bs);
2652 info->value->removable = bdrv_dev_has_removable_media(bs);
d15e5465 2653
e4def80b 2654 if (bdrv_dev_has_removable_media(bs)) {
b2023818
LC
2655 info->value->has_tray_open = true;
2656 info->value->tray_open = bdrv_dev_is_tray_open(bs);
e4def80b 2657 }
f04ef601
LC
2658
2659 if (bdrv_iostatus_is_enabled(bs)) {
b2023818
LC
2660 info->value->has_io_status = true;
2661 info->value->io_status = bs->iostatus;
f04ef601
LC
2662 }
2663
19cb3738 2664 if (bs->drv) {
b2023818
LC
2665 info->value->has_inserted = true;
2666 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2667 info->value->inserted->file = g_strdup(bs->filename);
2668 info->value->inserted->ro = bs->read_only;
2669 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2670 info->value->inserted->encrypted = bs->encrypted;
2671 if (bs->backing_file[0]) {
2672 info->value->inserted->has_backing_file = true;
2673 info->value->inserted->backing_file = g_strdup(bs->backing_file);
376253ec 2674 }
727f005e
ZYW
2675
2676 if (bs->io_limits_enabled) {
2677 info->value->inserted->bps =
2678 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2679 info->value->inserted->bps_rd =
2680 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2681 info->value->inserted->bps_wr =
2682 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2683 info->value->inserted->iops =
2684 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2685 info->value->inserted->iops_rd =
2686 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2687 info->value->inserted->iops_wr =
2688 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2689 }
b2023818 2690 }
d15e5465 2691
b2023818
LC
2692 /* XXX: waiting for the qapi to support GSList */
2693 if (!cur_item) {
2694 head = cur_item = info;
2695 } else {
2696 cur_item->next = info;
2697 cur_item = info;
b338082b 2698 }
b338082b 2699 }
d15e5465 2700
b2023818 2701 return head;
b338082b 2702}
a36e69dd 2703
f11f57e4
LC
2704/* Consider exposing this as a full fledged QMP command */
2705static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
2706{
2707 BlockStats *s;
2708
2709 s = g_malloc0(sizeof(*s));
2710
2711 if (bs->device_name[0]) {
2712 s->has_device = true;
2713 s->device = g_strdup(bs->device_name);
294cc35f
KW
2714 }
2715
f11f57e4
LC
2716 s->stats = g_malloc0(sizeof(*s->stats));
2717 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2718 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2719 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2720 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2721 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2722 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2723 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2724 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2725 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2726
294cc35f 2727 if (bs->file) {
f11f57e4
LC
2728 s->has_parent = true;
2729 s->parent = qmp_query_blockstat(bs->file, NULL);
294cc35f
KW
2730 }
2731
f11f57e4 2732 return s;
294cc35f
KW
2733}
2734
f11f57e4 2735BlockStatsList *qmp_query_blockstats(Error **errp)
218a536a 2736{
f11f57e4 2737 BlockStatsList *head = NULL, *cur_item = NULL;
a36e69dd
TS
2738 BlockDriverState *bs;
2739
1b7bdbc1 2740 QTAILQ_FOREACH(bs, &bdrv_states, list) {
f11f57e4
LC
2741 BlockStatsList *info = g_malloc0(sizeof(*info));
2742 info->value = qmp_query_blockstat(bs, NULL);
2743
2744 /* XXX: waiting for the qapi to support GSList */
2745 if (!cur_item) {
2746 head = cur_item = info;
2747 } else {
2748 cur_item->next = info;
2749 cur_item = info;
2750 }
a36e69dd 2751 }
218a536a 2752
f11f57e4 2753 return head;
a36e69dd 2754}
ea2384d3 2755
045df330
AL
2756const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2757{
2758 if (bs->backing_hd && bs->backing_hd->encrypted)
2759 return bs->backing_file;
2760 else if (bs->encrypted)
2761 return bs->filename;
2762 else
2763 return NULL;
2764}
2765
5fafdf24 2766void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
2767 char *filename, int filename_size)
2768{
3574c608 2769 pstrcpy(filename, filename_size, bs->backing_file);
83f64091
FB
2770}
2771
5fafdf24 2772int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
2773 const uint8_t *buf, int nb_sectors)
2774{
2775 BlockDriver *drv = bs->drv;
2776 if (!drv)
19cb3738 2777 return -ENOMEDIUM;
faea38e7
FB
2778 if (!drv->bdrv_write_compressed)
2779 return -ENOTSUP;
fbb7b4e0
KW
2780 if (bdrv_check_request(bs, sector_num, nb_sectors))
2781 return -EIO;
a55eb92c 2782
c6d22830 2783 if (bs->dirty_bitmap) {
7cd1e32a
LS
2784 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2785 }
a55eb92c 2786
faea38e7
FB
2787 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2788}
3b46e624 2789
faea38e7
FB
2790int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2791{
2792 BlockDriver *drv = bs->drv;
2793 if (!drv)
19cb3738 2794 return -ENOMEDIUM;
faea38e7
FB
2795 if (!drv->bdrv_get_info)
2796 return -ENOTSUP;
2797 memset(bdi, 0, sizeof(*bdi));
2798 return drv->bdrv_get_info(bs, bdi);
2799}
2800
45566e9c
CH
2801int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2802 int64_t pos, int size)
178e08a5
AL
2803{
2804 BlockDriver *drv = bs->drv;
2805 if (!drv)
2806 return -ENOMEDIUM;
7cdb1f6d
MK
2807 if (drv->bdrv_save_vmstate)
2808 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2809 if (bs->file)
2810 return bdrv_save_vmstate(bs->file, buf, pos, size);
2811 return -ENOTSUP;
178e08a5
AL
2812}
2813
45566e9c
CH
2814int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2815 int64_t pos, int size)
178e08a5
AL
2816{
2817 BlockDriver *drv = bs->drv;
2818 if (!drv)
2819 return -ENOMEDIUM;
7cdb1f6d
MK
2820 if (drv->bdrv_load_vmstate)
2821 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2822 if (bs->file)
2823 return bdrv_load_vmstate(bs->file, buf, pos, size);
2824 return -ENOTSUP;
178e08a5
AL
2825}
2826
8b9b0cc2
KW
2827void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2828{
2829 BlockDriver *drv = bs->drv;
2830
2831 if (!drv || !drv->bdrv_debug_event) {
2832 return;
2833 }
2834
2835 return drv->bdrv_debug_event(bs, event);
2836
2837}
2838
faea38e7
FB
2839/**************************************************************/
2840/* handling of snapshots */
2841
feeee5ac
MDCF
2842int bdrv_can_snapshot(BlockDriverState *bs)
2843{
2844 BlockDriver *drv = bs->drv;
07b70bfb 2845 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2846 return 0;
2847 }
2848
2849 if (!drv->bdrv_snapshot_create) {
2850 if (bs->file != NULL) {
2851 return bdrv_can_snapshot(bs->file);
2852 }
2853 return 0;
2854 }
2855
2856 return 1;
2857}
2858
199630b6
BS
2859int bdrv_is_snapshot(BlockDriverState *bs)
2860{
2861 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2862}
2863
f9092b10
MA
2864BlockDriverState *bdrv_snapshots(void)
2865{
2866 BlockDriverState *bs;
2867
3ac906f7 2868 if (bs_snapshots) {
f9092b10 2869 return bs_snapshots;
3ac906f7 2870 }
f9092b10
MA
2871
2872 bs = NULL;
2873 while ((bs = bdrv_next(bs))) {
2874 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2875 bs_snapshots = bs;
2876 return bs;
f9092b10
MA
2877 }
2878 }
2879 return NULL;
f9092b10
MA
2880}
2881
5fafdf24 2882int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2883 QEMUSnapshotInfo *sn_info)
2884{
2885 BlockDriver *drv = bs->drv;
2886 if (!drv)
19cb3738 2887 return -ENOMEDIUM;
7cdb1f6d
MK
2888 if (drv->bdrv_snapshot_create)
2889 return drv->bdrv_snapshot_create(bs, sn_info);
2890 if (bs->file)
2891 return bdrv_snapshot_create(bs->file, sn_info);
2892 return -ENOTSUP;
faea38e7
FB
2893}
2894
5fafdf24 2895int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2896 const char *snapshot_id)
2897{
2898 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2899 int ret, open_ret;
2900
faea38e7 2901 if (!drv)
19cb3738 2902 return -ENOMEDIUM;
7cdb1f6d
MK
2903 if (drv->bdrv_snapshot_goto)
2904 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2905
2906 if (bs->file) {
2907 drv->bdrv_close(bs);
2908 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2909 open_ret = drv->bdrv_open(bs, bs->open_flags);
2910 if (open_ret < 0) {
2911 bdrv_delete(bs->file);
2912 bs->drv = NULL;
2913 return open_ret;
2914 }
2915 return ret;
2916 }
2917
2918 return -ENOTSUP;
faea38e7
FB
2919}
2920
2921int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2922{
2923 BlockDriver *drv = bs->drv;
2924 if (!drv)
19cb3738 2925 return -ENOMEDIUM;
7cdb1f6d
MK
2926 if (drv->bdrv_snapshot_delete)
2927 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2928 if (bs->file)
2929 return bdrv_snapshot_delete(bs->file, snapshot_id);
2930 return -ENOTSUP;
faea38e7
FB
2931}
2932
5fafdf24 2933int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2934 QEMUSnapshotInfo **psn_info)
2935{
2936 BlockDriver *drv = bs->drv;
2937 if (!drv)
19cb3738 2938 return -ENOMEDIUM;
7cdb1f6d
MK
2939 if (drv->bdrv_snapshot_list)
2940 return drv->bdrv_snapshot_list(bs, psn_info);
2941 if (bs->file)
2942 return bdrv_snapshot_list(bs->file, psn_info);
2943 return -ENOTSUP;
faea38e7
FB
2944}
2945
51ef6727 2946int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2947 const char *snapshot_name)
2948{
2949 BlockDriver *drv = bs->drv;
2950 if (!drv) {
2951 return -ENOMEDIUM;
2952 }
2953 if (!bs->read_only) {
2954 return -EINVAL;
2955 }
2956 if (drv->bdrv_snapshot_load_tmp) {
2957 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2958 }
2959 return -ENOTSUP;
2960}
2961
e8a6bb9c
MT
2962BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2963 const char *backing_file)
2964{
2965 if (!bs->drv) {
2966 return NULL;
2967 }
2968
2969 if (bs->backing_hd) {
2970 if (strcmp(bs->backing_file, backing_file) == 0) {
2971 return bs->backing_hd;
2972 } else {
2973 return bdrv_find_backing_image(bs->backing_hd, backing_file);
2974 }
2975 }
2976
2977 return NULL;
2978}
2979
faea38e7
FB
2980#define NB_SUFFIXES 4
2981
2982char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2983{
2984 static const char suffixes[NB_SUFFIXES] = "KMGT";
2985 int64_t base;
2986 int i;
2987
2988 if (size <= 999) {
2989 snprintf(buf, buf_size, "%" PRId64, size);
2990 } else {
2991 base = 1024;
2992 for(i = 0; i < NB_SUFFIXES; i++) {
2993 if (size < (10 * base)) {
5fafdf24 2994 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
2995 (double)size / base,
2996 suffixes[i]);
2997 break;
2998 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 2999 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
3000 ((size + (base >> 1)) / base),
3001 suffixes[i]);
3002 break;
3003 }
3004 base = base * 1024;
3005 }
3006 }
3007 return buf;
3008}
3009
3010char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
3011{
3012 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
3013#ifdef _WIN32
3014 struct tm *ptm;
3015#else
faea38e7 3016 struct tm tm;
3b9f94e1 3017#endif
faea38e7
FB
3018 time_t ti;
3019 int64_t secs;
3020
3021 if (!sn) {
5fafdf24
TS
3022 snprintf(buf, buf_size,
3023 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
3024 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
3025 } else {
3026 ti = sn->date_sec;
3b9f94e1
FB
3027#ifdef _WIN32
3028 ptm = localtime(&ti);
3029 strftime(date_buf, sizeof(date_buf),
3030 "%Y-%m-%d %H:%M:%S", ptm);
3031#else
faea38e7
FB
3032 localtime_r(&ti, &tm);
3033 strftime(date_buf, sizeof(date_buf),
3034 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 3035#endif
faea38e7
FB
3036 secs = sn->vm_clock_nsec / 1000000000;
3037 snprintf(clock_buf, sizeof(clock_buf),
3038 "%02d:%02d:%02d.%03d",
3039 (int)(secs / 3600),
3040 (int)((secs / 60) % 60),
5fafdf24 3041 (int)(secs % 60),
faea38e7
FB
3042 (int)((sn->vm_clock_nsec / 1000000) % 1000));
3043 snprintf(buf, buf_size,
5fafdf24 3044 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
3045 sn->id_str, sn->name,
3046 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
3047 date_buf,
3048 clock_buf);
3049 }
3050 return buf;
3051}
3052
ea2384d3 3053/**************************************************************/
83f64091 3054/* async I/Os */
ea2384d3 3055
3b69e4b9 3056BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 3057 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 3058 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 3059{
bbf0a440
SH
3060 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
3061
b2a61371 3062 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 3063 cb, opaque, false);
ea2384d3
FB
3064}
3065
f141eafe
AL
3066BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
3067 QEMUIOVector *qiov, int nb_sectors,
3068 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 3069{
bbf0a440
SH
3070 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
3071
1a6e115b 3072 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 3073 cb, opaque, true);
83f64091
FB
3074}
3075
40b4f539
KW
3076
3077typedef struct MultiwriteCB {
3078 int error;
3079 int num_requests;
3080 int num_callbacks;
3081 struct {
3082 BlockDriverCompletionFunc *cb;
3083 void *opaque;
3084 QEMUIOVector *free_qiov;
40b4f539
KW
3085 } callbacks[];
3086} MultiwriteCB;
3087
3088static void multiwrite_user_cb(MultiwriteCB *mcb)
3089{
3090 int i;
3091
3092 for (i = 0; i < mcb->num_callbacks; i++) {
3093 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
3094 if (mcb->callbacks[i].free_qiov) {
3095 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
3096 }
7267c094 3097 g_free(mcb->callbacks[i].free_qiov);
40b4f539
KW
3098 }
3099}
3100
3101static void multiwrite_cb(void *opaque, int ret)
3102{
3103 MultiwriteCB *mcb = opaque;
3104
6d519a5f
SH
3105 trace_multiwrite_cb(mcb, ret);
3106
cb6d3ca0 3107 if (ret < 0 && !mcb->error) {
40b4f539 3108 mcb->error = ret;
40b4f539
KW
3109 }
3110
3111 mcb->num_requests--;
3112 if (mcb->num_requests == 0) {
de189a1b 3113 multiwrite_user_cb(mcb);
7267c094 3114 g_free(mcb);
40b4f539
KW
3115 }
3116}
3117
3118static int multiwrite_req_compare(const void *a, const void *b)
3119{
77be4366
CH
3120 const BlockRequest *req1 = a, *req2 = b;
3121
3122 /*
3123 * Note that we can't simply subtract req2->sector from req1->sector
3124 * here as that could overflow the return value.
3125 */
3126 if (req1->sector > req2->sector) {
3127 return 1;
3128 } else if (req1->sector < req2->sector) {
3129 return -1;
3130 } else {
3131 return 0;
3132 }
40b4f539
KW
3133}
3134
3135/*
3136 * Takes a bunch of requests and tries to merge them. Returns the number of
3137 * requests that remain after merging.
3138 */
3139static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
3140 int num_reqs, MultiwriteCB *mcb)
3141{
3142 int i, outidx;
3143
3144 // Sort requests by start sector
3145 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
3146
3147 // Check if adjacent requests touch the same clusters. If so, combine them,
3148 // filling up gaps with zero sectors.
3149 outidx = 0;
3150 for (i = 1; i < num_reqs; i++) {
3151 int merge = 0;
3152 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
3153
b6a127a1 3154 // Handle exactly sequential writes and overlapping writes.
40b4f539
KW
3155 if (reqs[i].sector <= oldreq_last) {
3156 merge = 1;
3157 }
3158
e2a305fb
CH
3159 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
3160 merge = 0;
3161 }
3162
40b4f539
KW
3163 if (merge) {
3164 size_t size;
7267c094 3165 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
3166 qemu_iovec_init(qiov,
3167 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
3168
3169 // Add the first request to the merged one. If the requests are
3170 // overlapping, drop the last sectors of the first request.
3171 size = (reqs[i].sector - reqs[outidx].sector) << 9;
3172 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
3173
b6a127a1
PB
3174 // We should need to add any zeros between the two requests
3175 assert (reqs[i].sector <= oldreq_last);
40b4f539
KW
3176
3177 // Add the second request
3178 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
3179
cbf1dff2 3180 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
3181 reqs[outidx].qiov = qiov;
3182
3183 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
3184 } else {
3185 outidx++;
3186 reqs[outidx].sector = reqs[i].sector;
3187 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
3188 reqs[outidx].qiov = reqs[i].qiov;
3189 }
3190 }
3191
3192 return outidx + 1;
3193}
3194
3195/*
3196 * Submit multiple AIO write requests at once.
3197 *
3198 * On success, the function returns 0 and all requests in the reqs array have
3199 * been submitted. In error case this function returns -1, and any of the
3200 * requests may or may not be submitted yet. In particular, this means that the
3201 * callback will be called for some of the requests, for others it won't. The
3202 * caller must check the error field of the BlockRequest to wait for the right
3203 * callbacks (if error != 0, no callback will be called).
3204 *
3205 * The implementation may modify the contents of the reqs array, e.g. to merge
3206 * requests. However, the fields opaque and error are left unmodified as they
3207 * are used to signal failure for a single request to the caller.
3208 */
3209int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
3210{
40b4f539
KW
3211 MultiwriteCB *mcb;
3212 int i;
3213
301db7c2
RH
3214 /* don't submit writes if we don't have a medium */
3215 if (bs->drv == NULL) {
3216 for (i = 0; i < num_reqs; i++) {
3217 reqs[i].error = -ENOMEDIUM;
3218 }
3219 return -1;
3220 }
3221
40b4f539
KW
3222 if (num_reqs == 0) {
3223 return 0;
3224 }
3225
3226 // Create MultiwriteCB structure
7267c094 3227 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
3228 mcb->num_requests = 0;
3229 mcb->num_callbacks = num_reqs;
3230
3231 for (i = 0; i < num_reqs; i++) {
3232 mcb->callbacks[i].cb = reqs[i].cb;
3233 mcb->callbacks[i].opaque = reqs[i].opaque;
3234 }
3235
3236 // Check for mergable requests
3237 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
3238
6d519a5f
SH
3239 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
3240
df9309fb
PB
3241 /* Run the aio requests. */
3242 mcb->num_requests = num_reqs;
40b4f539 3243 for (i = 0; i < num_reqs; i++) {
ad54ae80 3244 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
40b4f539 3245 reqs[i].nb_sectors, multiwrite_cb, mcb);
40b4f539
KW
3246 }
3247
3248 return 0;
40b4f539
KW
3249}
3250
83f64091 3251void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 3252{
6bbff9a0 3253 acb->pool->cancel(acb);
83f64091
FB
3254}
3255
98f90dba
ZYW
3256/* block I/O throttling */
3257static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
3258 bool is_write, double elapsed_time, uint64_t *wait)
3259{
3260 uint64_t bps_limit = 0;
3261 double bytes_limit, bytes_base, bytes_res;
3262 double slice_time, wait_time;
3263
3264 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3265 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
3266 } else if (bs->io_limits.bps[is_write]) {
3267 bps_limit = bs->io_limits.bps[is_write];
3268 } else {
3269 if (wait) {
3270 *wait = 0;
3271 }
3272
3273 return false;
3274 }
3275
3276 slice_time = bs->slice_end - bs->slice_start;
3277 slice_time /= (NANOSECONDS_PER_SECOND);
3278 bytes_limit = bps_limit * slice_time;
3279 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
3280 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3281 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
3282 }
3283
3284 /* bytes_base: the bytes of data which have been read/written; and
3285 * it is obtained from the history statistic info.
3286 * bytes_res: the remaining bytes of data which need to be read/written.
3287 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3288 * the total time for completing reading/writting all data.
3289 */
3290 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
3291
3292 if (bytes_base + bytes_res <= bytes_limit) {
3293 if (wait) {
3294 *wait = 0;
3295 }
3296
3297 return false;
3298 }
3299
3300 /* Calc approx time to dispatch */
3301 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
3302
3303 /* When the I/O rate at runtime exceeds the limits,
3304 * bs->slice_end need to be extended in order that the current statistic
3305 * info can be kept until the timer fire, so it is increased and tuned
3306 * based on the result of experiment.
3307 */
3308 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3309 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3310 if (wait) {
3311 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3312 }
3313
3314 return true;
3315}
3316
3317static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
3318 double elapsed_time, uint64_t *wait)
3319{
3320 uint64_t iops_limit = 0;
3321 double ios_limit, ios_base;
3322 double slice_time, wait_time;
3323
3324 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3325 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
3326 } else if (bs->io_limits.iops[is_write]) {
3327 iops_limit = bs->io_limits.iops[is_write];
3328 } else {
3329 if (wait) {
3330 *wait = 0;
3331 }
3332
3333 return false;
3334 }
3335
3336 slice_time = bs->slice_end - bs->slice_start;
3337 slice_time /= (NANOSECONDS_PER_SECOND);
3338 ios_limit = iops_limit * slice_time;
3339 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
3340 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3341 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
3342 }
3343
3344 if (ios_base + 1 <= ios_limit) {
3345 if (wait) {
3346 *wait = 0;
3347 }
3348
3349 return false;
3350 }
3351
3352 /* Calc approx time to dispatch */
3353 wait_time = (ios_base + 1) / iops_limit;
3354 if (wait_time > elapsed_time) {
3355 wait_time = wait_time - elapsed_time;
3356 } else {
3357 wait_time = 0;
3358 }
3359
3360 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3361 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3362 if (wait) {
3363 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3364 }
3365
3366 return true;
3367}
3368
3369static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3370 bool is_write, int64_t *wait)
3371{
3372 int64_t now, max_wait;
3373 uint64_t bps_wait = 0, iops_wait = 0;
3374 double elapsed_time;
3375 int bps_ret, iops_ret;
3376
3377 now = qemu_get_clock_ns(vm_clock);
3378 if ((bs->slice_start < now)
3379 && (bs->slice_end > now)) {
3380 bs->slice_end = now + bs->slice_time;
3381 } else {
3382 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3383 bs->slice_start = now;
3384 bs->slice_end = now + bs->slice_time;
3385
3386 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3387 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3388
3389 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3390 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3391 }
3392
3393 elapsed_time = now - bs->slice_start;
3394 elapsed_time /= (NANOSECONDS_PER_SECOND);
3395
3396 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3397 is_write, elapsed_time, &bps_wait);
3398 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3399 elapsed_time, &iops_wait);
3400 if (bps_ret || iops_ret) {
3401 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3402 if (wait) {
3403 *wait = max_wait;
3404 }
3405
3406 now = qemu_get_clock_ns(vm_clock);
3407 if (bs->slice_end < now + max_wait) {
3408 bs->slice_end = now + max_wait;
3409 }
3410
3411 return true;
3412 }
3413
3414 if (wait) {
3415 *wait = 0;
3416 }
3417
3418 return false;
3419}
ce1a14dc 3420
83f64091
FB
3421/**************************************************************/
3422/* async block device emulation */
3423
c16b5a2c
CH
3424typedef struct BlockDriverAIOCBSync {
3425 BlockDriverAIOCB common;
3426 QEMUBH *bh;
3427 int ret;
3428 /* vector translation state */
3429 QEMUIOVector *qiov;
3430 uint8_t *bounce;
3431 int is_write;
3432} BlockDriverAIOCBSync;
3433
3434static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3435{
b666d239
KW
3436 BlockDriverAIOCBSync *acb =
3437 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 3438 qemu_bh_delete(acb->bh);
36afc451 3439 acb->bh = NULL;
c16b5a2c
CH
3440 qemu_aio_release(acb);
3441}
3442
3443static AIOPool bdrv_em_aio_pool = {
3444 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3445 .cancel = bdrv_aio_cancel_em,
3446};
3447
ce1a14dc 3448static void bdrv_aio_bh_cb(void *opaque)
83f64091 3449{
ce1a14dc 3450 BlockDriverAIOCBSync *acb = opaque;
f141eafe 3451
f141eafe
AL
3452 if (!acb->is_write)
3453 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 3454 qemu_vfree(acb->bounce);
ce1a14dc 3455 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 3456 qemu_bh_delete(acb->bh);
36afc451 3457 acb->bh = NULL;
ce1a14dc 3458 qemu_aio_release(acb);
83f64091 3459}
beac80cd 3460
f141eafe
AL
3461static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3462 int64_t sector_num,
3463 QEMUIOVector *qiov,
3464 int nb_sectors,
3465 BlockDriverCompletionFunc *cb,
3466 void *opaque,
3467 int is_write)
3468
83f64091 3469{
ce1a14dc 3470 BlockDriverAIOCBSync *acb;
ce1a14dc 3471
c16b5a2c 3472 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
3473 acb->is_write = is_write;
3474 acb->qiov = qiov;
e268ca52 3475 acb->bounce = qemu_blockalign(bs, qiov->size);
3f3aace8 3476 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
3477
3478 if (is_write) {
3479 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1ed20acf 3480 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 3481 } else {
1ed20acf 3482 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
3483 }
3484
ce1a14dc 3485 qemu_bh_schedule(acb->bh);
f141eafe 3486
ce1a14dc 3487 return &acb->common;
beac80cd
FB
3488}
3489
f141eafe
AL
3490static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3491 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 3492 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 3493{
f141eafe
AL
3494 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
3495}
83f64091 3496
f141eafe
AL
3497static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3498 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3499 BlockDriverCompletionFunc *cb, void *opaque)
3500{
3501 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 3502}
beac80cd 3503
68485420
KW
3504
3505typedef struct BlockDriverAIOCBCoroutine {
3506 BlockDriverAIOCB common;
3507 BlockRequest req;
3508 bool is_write;
3509 QEMUBH* bh;
3510} BlockDriverAIOCBCoroutine;
3511
3512static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3513{
3514 qemu_aio_flush();
3515}
3516
3517static AIOPool bdrv_em_co_aio_pool = {
3518 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3519 .cancel = bdrv_aio_co_cancel_em,
3520};
3521
35246a68 3522static void bdrv_co_em_bh(void *opaque)
68485420
KW
3523{
3524 BlockDriverAIOCBCoroutine *acb = opaque;
3525
3526 acb->common.cb(acb->common.opaque, acb->req.error);
3527 qemu_bh_delete(acb->bh);
3528 qemu_aio_release(acb);
3529}
3530
b2a61371
SH
3531/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3532static void coroutine_fn bdrv_co_do_rw(void *opaque)
3533{
3534 BlockDriverAIOCBCoroutine *acb = opaque;
3535 BlockDriverState *bs = acb->common.bs;
3536
3537 if (!acb->is_write) {
3538 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
470c0504 3539 acb->req.nb_sectors, acb->req.qiov, 0);
b2a61371
SH
3540 } else {
3541 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
f08f2dda 3542 acb->req.nb_sectors, acb->req.qiov, 0);
b2a61371
SH
3543 }
3544
35246a68 3545 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2a61371
SH
3546 qemu_bh_schedule(acb->bh);
3547}
3548
68485420
KW
3549static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3550 int64_t sector_num,
3551 QEMUIOVector *qiov,
3552 int nb_sectors,
3553 BlockDriverCompletionFunc *cb,
3554 void *opaque,
8c5873d6 3555 bool is_write)
68485420
KW
3556{
3557 Coroutine *co;
3558 BlockDriverAIOCBCoroutine *acb;
3559
3560 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3561 acb->req.sector = sector_num;
3562 acb->req.nb_sectors = nb_sectors;
3563 acb->req.qiov = qiov;
3564 acb->is_write = is_write;
3565
8c5873d6 3566 co = qemu_coroutine_create(bdrv_co_do_rw);
68485420
KW
3567 qemu_coroutine_enter(co, acb);
3568
3569 return &acb->common;
3570}
3571
07f07615 3572static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
b2e12bc6 3573{
07f07615
PB
3574 BlockDriverAIOCBCoroutine *acb = opaque;
3575 BlockDriverState *bs = acb->common.bs;
b2e12bc6 3576
07f07615
PB
3577 acb->req.error = bdrv_co_flush(bs);
3578 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2e12bc6 3579 qemu_bh_schedule(acb->bh);
b2e12bc6
CH
3580}
3581
07f07615 3582BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
016f5cf6
AG
3583 BlockDriverCompletionFunc *cb, void *opaque)
3584{
07f07615 3585 trace_bdrv_aio_flush(bs, opaque);
016f5cf6 3586
07f07615
PB
3587 Coroutine *co;
3588 BlockDriverAIOCBCoroutine *acb;
016f5cf6 3589
07f07615
PB
3590 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3591 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3592 qemu_coroutine_enter(co, acb);
016f5cf6 3593
016f5cf6
AG
3594 return &acb->common;
3595}
3596
4265d620
PB
3597static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3598{
3599 BlockDriverAIOCBCoroutine *acb = opaque;
3600 BlockDriverState *bs = acb->common.bs;
3601
3602 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3603 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3604 qemu_bh_schedule(acb->bh);
3605}
3606
3607BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3608 int64_t sector_num, int nb_sectors,
3609 BlockDriverCompletionFunc *cb, void *opaque)
3610{
3611 Coroutine *co;
3612 BlockDriverAIOCBCoroutine *acb;
3613
3614 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3615
3616 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3617 acb->req.sector = sector_num;
3618 acb->req.nb_sectors = nb_sectors;
3619 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3620 qemu_coroutine_enter(co, acb);
3621
3622 return &acb->common;
3623}
3624
ea2384d3
FB
3625void bdrv_init(void)
3626{
5efa9d5a 3627 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 3628}
ce1a14dc 3629
eb852011
MA
3630void bdrv_init_with_whitelist(void)
3631{
3632 use_bdrv_whitelist = 1;
3633 bdrv_init();
3634}
3635
c16b5a2c
CH
3636void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3637 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 3638{
ce1a14dc
PB
3639 BlockDriverAIOCB *acb;
3640
6bbff9a0
AL
3641 if (pool->free_aiocb) {
3642 acb = pool->free_aiocb;
3643 pool->free_aiocb = acb->next;
ce1a14dc 3644 } else {
7267c094 3645 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 3646 acb->pool = pool;
ce1a14dc
PB
3647 }
3648 acb->bs = bs;
3649 acb->cb = cb;
3650 acb->opaque = opaque;
3651 return acb;
3652}
3653
3654void qemu_aio_release(void *p)
3655{
6bbff9a0
AL
3656 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3657 AIOPool *pool = acb->pool;
3658 acb->next = pool->free_aiocb;
3659 pool->free_aiocb = acb;
ce1a14dc 3660}
19cb3738 3661
f9f05dc5
KW
3662/**************************************************************/
3663/* Coroutine block device emulation */
3664
3665typedef struct CoroutineIOCompletion {
3666 Coroutine *coroutine;
3667 int ret;
3668} CoroutineIOCompletion;
3669
3670static void bdrv_co_io_em_complete(void *opaque, int ret)
3671{
3672 CoroutineIOCompletion *co = opaque;
3673
3674 co->ret = ret;
3675 qemu_coroutine_enter(co->coroutine, NULL);
3676}
3677
3678static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3679 int nb_sectors, QEMUIOVector *iov,
3680 bool is_write)
3681{
3682 CoroutineIOCompletion co = {
3683 .coroutine = qemu_coroutine_self(),
3684 };
3685 BlockDriverAIOCB *acb;
3686
3687 if (is_write) {
a652d160
SH
3688 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3689 bdrv_co_io_em_complete, &co);
f9f05dc5 3690 } else {
a652d160
SH
3691 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3692 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
3693 }
3694
59370aaa 3695 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
3696 if (!acb) {
3697 return -EIO;
3698 }
3699 qemu_coroutine_yield();
3700
3701 return co.ret;
3702}
3703
3704static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3705 int64_t sector_num, int nb_sectors,
3706 QEMUIOVector *iov)
3707{
3708 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3709}
3710
3711static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3712 int64_t sector_num, int nb_sectors,
3713 QEMUIOVector *iov)
3714{
3715 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3716}
3717
07f07615 3718static void coroutine_fn bdrv_flush_co_entry(void *opaque)
e7a8a783 3719{
07f07615
PB
3720 RwCo *rwco = opaque;
3721
3722 rwco->ret = bdrv_co_flush(rwco->bs);
3723}
3724
3725int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3726{
eb489bb1
KW
3727 int ret;
3728
29cdb251 3729 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
07f07615 3730 return 0;
eb489bb1
KW
3731 }
3732
ca716364 3733 /* Write back cached data to the OS even with cache=unsafe */
eb489bb1
KW
3734 if (bs->drv->bdrv_co_flush_to_os) {
3735 ret = bs->drv->bdrv_co_flush_to_os(bs);
3736 if (ret < 0) {
3737 return ret;
3738 }
3739 }
3740
ca716364
KW
3741 /* But don't actually force it to the disk with cache=unsafe */
3742 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3743 return 0;
3744 }
3745
eb489bb1 3746 if (bs->drv->bdrv_co_flush_to_disk) {
29cdb251 3747 ret = bs->drv->bdrv_co_flush_to_disk(bs);
07f07615
PB
3748 } else if (bs->drv->bdrv_aio_flush) {
3749 BlockDriverAIOCB *acb;
3750 CoroutineIOCompletion co = {
3751 .coroutine = qemu_coroutine_self(),
3752 };
3753
3754 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3755 if (acb == NULL) {
29cdb251 3756 ret = -EIO;
07f07615
PB
3757 } else {
3758 qemu_coroutine_yield();
29cdb251 3759 ret = co.ret;
07f07615 3760 }
07f07615
PB
3761 } else {
3762 /*
3763 * Some block drivers always operate in either writethrough or unsafe
3764 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3765 * know how the server works (because the behaviour is hardcoded or
3766 * depends on server-side configuration), so we can't ensure that
3767 * everything is safe on disk. Returning an error doesn't work because
3768 * that would break guests even if the server operates in writethrough
3769 * mode.
3770 *
3771 * Let's hope the user knows what he's doing.
3772 */
29cdb251 3773 ret = 0;
07f07615 3774 }
29cdb251
PB
3775 if (ret < 0) {
3776 return ret;
3777 }
3778
3779 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3780 * in the case of cache=unsafe, so there are no useless flushes.
3781 */
3782 return bdrv_co_flush(bs->file);
07f07615
PB
3783}
3784
0f15423c
AL
3785void bdrv_invalidate_cache(BlockDriverState *bs)
3786{
3787 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3788 bs->drv->bdrv_invalidate_cache(bs);
3789 }
3790}
3791
3792void bdrv_invalidate_cache_all(void)
3793{
3794 BlockDriverState *bs;
3795
3796 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3797 bdrv_invalidate_cache(bs);
3798 }
3799}
3800
07789269
BC
3801void bdrv_clear_incoming_migration_all(void)
3802{
3803 BlockDriverState *bs;
3804
3805 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3806 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
3807 }
3808}
3809
07f07615
PB
3810int bdrv_flush(BlockDriverState *bs)
3811{
3812 Coroutine *co;
3813 RwCo rwco = {
3814 .bs = bs,
3815 .ret = NOT_DONE,
e7a8a783 3816 };
e7a8a783 3817
07f07615
PB
3818 if (qemu_in_coroutine()) {
3819 /* Fast-path if already in coroutine context */
3820 bdrv_flush_co_entry(&rwco);
3821 } else {
3822 co = qemu_coroutine_create(bdrv_flush_co_entry);
3823 qemu_coroutine_enter(co, &rwco);
3824 while (rwco.ret == NOT_DONE) {
3825 qemu_aio_wait();
3826 }
e7a8a783 3827 }
07f07615
PB
3828
3829 return rwco.ret;
e7a8a783
KW
3830}
3831
4265d620
PB
3832static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3833{
3834 RwCo *rwco = opaque;
3835
3836 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3837}
3838
3839int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3840 int nb_sectors)
3841{
3842 if (!bs->drv) {
3843 return -ENOMEDIUM;
3844 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3845 return -EIO;
3846 } else if (bs->read_only) {
3847 return -EROFS;
3848 } else if (bs->drv->bdrv_co_discard) {
3849 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3850 } else if (bs->drv->bdrv_aio_discard) {
3851 BlockDriverAIOCB *acb;
3852 CoroutineIOCompletion co = {
3853 .coroutine = qemu_coroutine_self(),
3854 };
3855
3856 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3857 bdrv_co_io_em_complete, &co);
3858 if (acb == NULL) {
3859 return -EIO;
3860 } else {
3861 qemu_coroutine_yield();
3862 return co.ret;
3863 }
4265d620
PB
3864 } else {
3865 return 0;
3866 }
3867}
3868
3869int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3870{
3871 Coroutine *co;
3872 RwCo rwco = {
3873 .bs = bs,
3874 .sector_num = sector_num,
3875 .nb_sectors = nb_sectors,
3876 .ret = NOT_DONE,
3877 };
3878
3879 if (qemu_in_coroutine()) {
3880 /* Fast-path if already in coroutine context */
3881 bdrv_discard_co_entry(&rwco);
3882 } else {
3883 co = qemu_coroutine_create(bdrv_discard_co_entry);
3884 qemu_coroutine_enter(co, &rwco);
3885 while (rwco.ret == NOT_DONE) {
3886 qemu_aio_wait();
3887 }
3888 }
3889
3890 return rwco.ret;
3891}
3892
19cb3738
FB
3893/**************************************************************/
3894/* removable device support */
3895
3896/**
3897 * Return TRUE if the media is present
3898 */
3899int bdrv_is_inserted(BlockDriverState *bs)
3900{
3901 BlockDriver *drv = bs->drv;
a1aff5bf 3902
19cb3738
FB
3903 if (!drv)
3904 return 0;
3905 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
3906 return 1;
3907 return drv->bdrv_is_inserted(bs);
19cb3738
FB
3908}
3909
3910/**
8e49ca46
MA
3911 * Return whether the media changed since the last call to this
3912 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3913 */
3914int bdrv_media_changed(BlockDriverState *bs)
3915{
3916 BlockDriver *drv = bs->drv;
19cb3738 3917
8e49ca46
MA
3918 if (drv && drv->bdrv_media_changed) {
3919 return drv->bdrv_media_changed(bs);
3920 }
3921 return -ENOTSUP;
19cb3738
FB
3922}
3923
3924/**
3925 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3926 */
f36f3949 3927void bdrv_eject(BlockDriverState *bs, bool eject_flag)
19cb3738
FB
3928{
3929 BlockDriver *drv = bs->drv;
19cb3738 3930
822e1cd1
MA
3931 if (drv && drv->bdrv_eject) {
3932 drv->bdrv_eject(bs, eject_flag);
19cb3738 3933 }
6f382ed2
LC
3934
3935 if (bs->device_name[0] != '\0') {
3936 bdrv_emit_qmp_eject_event(bs, eject_flag);
3937 }
19cb3738
FB
3938}
3939
19cb3738
FB
3940/**
3941 * Lock or unlock the media (if it is locked, the user won't be able
3942 * to eject it manually).
3943 */
025e849a 3944void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3945{
3946 BlockDriver *drv = bs->drv;
3947
025e849a 3948 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3949
025e849a
MA
3950 if (drv && drv->bdrv_lock_medium) {
3951 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3952 }
3953}
985a03b0
TS
3954
3955/* needed for generic scsi interface */
3956
3957int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3958{
3959 BlockDriver *drv = bs->drv;
3960
3961 if (drv && drv->bdrv_ioctl)
3962 return drv->bdrv_ioctl(bs, req, buf);
3963 return -ENOTSUP;
3964}
7d780669 3965
221f715d
AL
3966BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3967 unsigned long int req, void *buf,
3968 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3969{
221f715d 3970 BlockDriver *drv = bs->drv;
7d780669 3971
221f715d
AL
3972 if (drv && drv->bdrv_aio_ioctl)
3973 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3974 return NULL;
7d780669 3975}
e268ca52 3976
7b6f9300
MA
3977void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3978{
3979 bs->buffer_alignment = align;
3980}
7cd1e32a 3981
e268ca52
AL
3982void *qemu_blockalign(BlockDriverState *bs, size_t size)
3983{
3984 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3985}
7cd1e32a
LS
3986
3987void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3988{
3989 int64_t bitmap_size;
a55eb92c 3990
aaa0eb75 3991 bs->dirty_count = 0;
a55eb92c 3992 if (enable) {
c6d22830
JK
3993 if (!bs->dirty_bitmap) {
3994 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
71df14fc
PB
3995 BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG - 1;
3996 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
a55eb92c 3997
71df14fc 3998 bs->dirty_bitmap = g_new0(unsigned long, bitmap_size);
a55eb92c 3999 }
7cd1e32a 4000 } else {
c6d22830 4001 if (bs->dirty_bitmap) {
7267c094 4002 g_free(bs->dirty_bitmap);
c6d22830 4003 bs->dirty_bitmap = NULL;
a55eb92c 4004 }
7cd1e32a
LS
4005 }
4006}
4007
4008int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
4009{
6ea44308 4010 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 4011
c6d22830
JK
4012 if (bs->dirty_bitmap &&
4013 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
4014 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
4015 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a
LS
4016 } else {
4017 return 0;
4018 }
4019}
4020
a55eb92c
JK
4021void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
4022 int nr_sectors)
7cd1e32a
LS
4023{
4024 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
4025}
aaa0eb75
LS
4026
4027int64_t bdrv_get_dirty_count(BlockDriverState *bs)
4028{
4029 return bs->dirty_count;
4030}
f88e1a42 4031
db593f25
MT
4032void bdrv_set_in_use(BlockDriverState *bs, int in_use)
4033{
4034 assert(bs->in_use != in_use);
4035 bs->in_use = in_use;
4036}
4037
4038int bdrv_in_use(BlockDriverState *bs)
4039{
4040 return bs->in_use;
4041}
4042
28a7282a
LC
4043void bdrv_iostatus_enable(BlockDriverState *bs)
4044{
d6bf279e 4045 bs->iostatus_enabled = true;
58e21ef5 4046 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
4047}
4048
4049/* The I/O status is only enabled if the drive explicitly
4050 * enables it _and_ the VM is configured to stop on errors */
4051bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
4052{
d6bf279e 4053 return (bs->iostatus_enabled &&
28a7282a
LC
4054 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
4055 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
4056 bs->on_read_error == BLOCK_ERR_STOP_ANY));
4057}
4058
4059void bdrv_iostatus_disable(BlockDriverState *bs)
4060{
d6bf279e 4061 bs->iostatus_enabled = false;
28a7282a
LC
4062}
4063
4064void bdrv_iostatus_reset(BlockDriverState *bs)
4065{
4066 if (bdrv_iostatus_is_enabled(bs)) {
58e21ef5 4067 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
4068 }
4069}
4070
4071/* XXX: Today this is set by device models because it makes the implementation
4072 quite simple. However, the block layer knows about the error, so it's
4073 possible to implement this without device models being involved */
4074void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
4075{
58e21ef5
LC
4076 if (bdrv_iostatus_is_enabled(bs) &&
4077 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
28a7282a 4078 assert(error >= 0);
58e21ef5
LC
4079 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
4080 BLOCK_DEVICE_IO_STATUS_FAILED;
28a7282a
LC
4081 }
4082}
4083
a597e79c
CH
4084void
4085bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
4086 enum BlockAcctType type)
4087{
4088 assert(type < BDRV_MAX_IOTYPE);
4089
4090 cookie->bytes = bytes;
c488c7f6 4091 cookie->start_time_ns = get_clock();
a597e79c
CH
4092 cookie->type = type;
4093}
4094
4095void
4096bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
4097{
4098 assert(cookie->type < BDRV_MAX_IOTYPE);
4099
4100 bs->nr_bytes[cookie->type] += cookie->bytes;
4101 bs->nr_ops[cookie->type]++;
c488c7f6 4102 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
4103}
4104
f88e1a42
JS
4105int bdrv_img_create(const char *filename, const char *fmt,
4106 const char *base_filename, const char *base_fmt,
4107 char *options, uint64_t img_size, int flags)
4108{
4109 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 4110 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
4111 BlockDriverState *bs = NULL;
4112 BlockDriver *drv, *proto_drv;
96df67d1 4113 BlockDriver *backing_drv = NULL;
f88e1a42
JS
4114 int ret = 0;
4115
4116 /* Find driver and parse its options */
4117 drv = bdrv_find_format(fmt);
4118 if (!drv) {
4119 error_report("Unknown file format '%s'", fmt);
4f70f249 4120 ret = -EINVAL;
f88e1a42
JS
4121 goto out;
4122 }
4123
4124 proto_drv = bdrv_find_protocol(filename);
4125 if (!proto_drv) {
4126 error_report("Unknown protocol '%s'", filename);
4f70f249 4127 ret = -EINVAL;
f88e1a42
JS
4128 goto out;
4129 }
4130
4131 create_options = append_option_parameters(create_options,
4132 drv->create_options);
4133 create_options = append_option_parameters(create_options,
4134 proto_drv->create_options);
4135
4136 /* Create parameter list with default values */
4137 param = parse_option_parameters("", create_options, param);
4138
4139 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
4140
4141 /* Parse -o options */
4142 if (options) {
4143 param = parse_option_parameters(options, create_options, param);
4144 if (param == NULL) {
4145 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 4146 ret = -EINVAL;
f88e1a42
JS
4147 goto out;
4148 }
4149 }
4150
4151 if (base_filename) {
4152 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
4153 base_filename)) {
4154 error_report("Backing file not supported for file format '%s'",
4155 fmt);
4f70f249 4156 ret = -EINVAL;
f88e1a42
JS
4157 goto out;
4158 }
4159 }
4160
4161 if (base_fmt) {
4162 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
4163 error_report("Backing file format not supported for file "
4164 "format '%s'", fmt);
4f70f249 4165 ret = -EINVAL;
f88e1a42
JS
4166 goto out;
4167 }
4168 }
4169
792da93a
JS
4170 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
4171 if (backing_file && backing_file->value.s) {
4172 if (!strcmp(filename, backing_file->value.s)) {
4173 error_report("Error: Trying to create an image with the "
4174 "same filename as the backing file");
4f70f249 4175 ret = -EINVAL;
792da93a
JS
4176 goto out;
4177 }
4178 }
4179
f88e1a42
JS
4180 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
4181 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
4182 backing_drv = bdrv_find_format(backing_fmt->value.s);
4183 if (!backing_drv) {
f88e1a42
JS
4184 error_report("Unknown backing file format '%s'",
4185 backing_fmt->value.s);
4f70f249 4186 ret = -EINVAL;
f88e1a42
JS
4187 goto out;
4188 }
4189 }
4190
4191 // The size for the image must always be specified, with one exception:
4192 // If we are using a backing file, we can obtain the size from there
d220894e
KW
4193 size = get_option_parameter(param, BLOCK_OPT_SIZE);
4194 if (size && size->value.n == -1) {
f88e1a42
JS
4195 if (backing_file && backing_file->value.s) {
4196 uint64_t size;
f88e1a42 4197 char buf[32];
63090dac
PB
4198 int back_flags;
4199
4200 /* backing files always opened read-only */
4201 back_flags =
4202 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
f88e1a42 4203
f88e1a42
JS
4204 bs = bdrv_new("");
4205
63090dac 4206 ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv);
f88e1a42 4207 if (ret < 0) {
96df67d1 4208 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
4209 goto out;
4210 }
4211 bdrv_get_geometry(bs, &size);
4212 size *= 512;
4213
4214 snprintf(buf, sizeof(buf), "%" PRId64, size);
4215 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
4216 } else {
4217 error_report("Image creation needs a size parameter");
4f70f249 4218 ret = -EINVAL;
f88e1a42
JS
4219 goto out;
4220 }
4221 }
4222
4223 printf("Formatting '%s', fmt=%s ", filename, fmt);
4224 print_option_parameters(param);
4225 puts("");
4226
4227 ret = bdrv_create(drv, filename, param);
4228
4229 if (ret < 0) {
4230 if (ret == -ENOTSUP) {
4231 error_report("Formatting or formatting option not supported for "
4232 "file format '%s'", fmt);
4233 } else if (ret == -EFBIG) {
4234 error_report("The image size is too large for file format '%s'",
4235 fmt);
4236 } else {
4237 error_report("%s: error while creating %s: %s", filename, fmt,
4238 strerror(-ret));
4239 }
4240 }
4241
4242out:
4243 free_option_parameters(create_options);
4244 free_option_parameters(param);
4245
4246 if (bs) {
4247 bdrv_delete(bs);
4248 }
4f70f249
JS
4249
4250 return ret;
f88e1a42 4251}
eeec61f2
SH
4252
4253void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
c83c66c3
SH
4254 int64_t speed, BlockDriverCompletionFunc *cb,
4255 void *opaque, Error **errp)
eeec61f2
SH
4256{
4257 BlockJob *job;
4258
4259 if (bs->job || bdrv_in_use(bs)) {
fd7f8c65 4260 error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
eeec61f2
SH
4261 return NULL;
4262 }
4263 bdrv_set_in_use(bs, 1);
4264
4265 job = g_malloc0(job_type->instance_size);
4266 job->job_type = job_type;
4267 job->bs = bs;
4268 job->cb = cb;
4269 job->opaque = opaque;
4513eafe 4270 job->busy = true;
eeec61f2 4271 bs->job = job;
c83c66c3
SH
4272
4273 /* Only set speed when necessary to avoid NotSupported error */
4274 if (speed != 0) {
4275 Error *local_err = NULL;
4276
4277 block_job_set_speed(job, speed, &local_err);
4278 if (error_is_set(&local_err)) {
4279 bs->job = NULL;
4280 g_free(job);
4281 bdrv_set_in_use(bs, 0);
4282 error_propagate(errp, local_err);
4283 return NULL;
4284 }
4285 }
eeec61f2
SH
4286 return job;
4287}
4288
4289void block_job_complete(BlockJob *job, int ret)
4290{
4291 BlockDriverState *bs = job->bs;
4292
4293 assert(bs->job == job);
4294 job->cb(job->opaque, ret);
4295 bs->job = NULL;
4296 g_free(job);
4297 bdrv_set_in_use(bs, 0);
4298}
4299
882ec7ce 4300void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
eeec61f2 4301{
9e6636c7 4302 Error *local_err = NULL;
9f25eccc 4303
eeec61f2 4304 if (!job->job_type->set_speed) {
9e6636c7
SH
4305 error_set(errp, QERR_NOT_SUPPORTED);
4306 return;
eeec61f2 4307 }
882ec7ce 4308 job->job_type->set_speed(job, speed, &local_err);
9e6636c7
SH
4309 if (error_is_set(&local_err)) {
4310 error_propagate(errp, local_err);
4311 return;
9f25eccc 4312 }
9e6636c7 4313
882ec7ce 4314 job->speed = speed;
eeec61f2
SH
4315}
4316
4317void block_job_cancel(BlockJob *job)
4318{
4319 job->cancelled = true;
fa4478d5
PB
4320 if (job->co && !job->busy) {
4321 qemu_coroutine_enter(job->co, NULL);
4322 }
eeec61f2
SH
4323}
4324
4325bool block_job_is_cancelled(BlockJob *job)
4326{
4327 return job->cancelled;
4328}
3e914655 4329
fa4478d5
PB
4330struct BlockCancelData {
4331 BlockJob *job;
4332 BlockDriverCompletionFunc *cb;
4333 void *opaque;
4334 bool cancelled;
4335 int ret;
4336};
4337
4338static void block_job_cancel_cb(void *opaque, int ret)
3e914655 4339{
fa4478d5
PB
4340 struct BlockCancelData *data = opaque;
4341
4342 data->cancelled = block_job_is_cancelled(data->job);
4343 data->ret = ret;
4344 data->cb(data->opaque, ret);
4345}
4346
4347int block_job_cancel_sync(BlockJob *job)
4348{
4349 struct BlockCancelData data;
3e914655
PB
4350 BlockDriverState *bs = job->bs;
4351
4352 assert(bs->job == job);
fa4478d5
PB
4353
4354 /* Set up our own callback to store the result and chain to
4355 * the original callback.
4356 */
4357 data.job = job;
4358 data.cb = job->cb;
4359 data.opaque = job->opaque;
4360 data.ret = -EINPROGRESS;
4361 job->cb = block_job_cancel_cb;
4362 job->opaque = &data;
3e914655 4363 block_job_cancel(job);
fa4478d5 4364 while (data.ret == -EINPROGRESS) {
3e914655
PB
4365 qemu_aio_wait();
4366 }
fa4478d5 4367 return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
3e914655 4368}
4513eafe
PB
4369
4370void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
4371{
4372 /* Check cancellation *before* setting busy = false, too! */
4373 if (!block_job_is_cancelled(job)) {
4374 job->busy = false;
4375 co_sleep_ns(clock, ns);
4376 job->busy = true;
4377 }
4378}