]> git.proxmox.com Git - qemu.git/blame - block.c
block: add interface to toggle copy-on-read
[qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
f795e743 30#include "qjson.h"
68485420 31#include "qemu-coroutine.h"
b2023818 32#include "qmp-commands.h"
0563e191 33#include "qemu-timer.h"
fc01f7e7 34
71e72a19 35#ifdef CONFIG_BSD
7674e7bf
FB
36#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
72cf2d4f 39#include <sys/queue.h>
c5e97233 40#ifndef __DragonFly__
7674e7bf
FB
41#include <sys/disk.h>
42#endif
c5e97233 43#endif
7674e7bf 44
49dc768d
AL
45#ifdef _WIN32
46#include <windows.h>
47#endif
48
1c9805a3
SH
49#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
7d4b4ba5 51static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
52static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
53 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 54 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
55static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
56 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 57 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
58static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
59 int64_t sector_num, int nb_sectors,
60 QEMUIOVector *iov);
61static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
62 int64_t sector_num, int nb_sectors,
63 QEMUIOVector *iov);
c5fbe571
SH
64static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
1c9805a3
SH
66static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
b2a61371
SH
68static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
69 int64_t sector_num,
70 QEMUIOVector *qiov,
71 int nb_sectors,
72 BlockDriverCompletionFunc *cb,
73 void *opaque,
8c5873d6 74 bool is_write);
b2a61371 75static void coroutine_fn bdrv_co_do_rw(void *opaque);
ec530c81 76
98f90dba
ZYW
77static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
78 bool is_write, double elapsed_time, uint64_t *wait);
79static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
80 double elapsed_time, uint64_t *wait);
81static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
82 bool is_write, int64_t *wait);
83
1b7bdbc1
SH
84static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
85 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 86
8a22f02a
SH
87static QLIST_HEAD(, BlockDriver) bdrv_drivers =
88 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 89
f9092b10
MA
90/* The device to use for VM snapshots */
91static BlockDriverState *bs_snapshots;
92
eb852011
MA
93/* If non-zero, use only whitelisted block drivers */
94static int use_bdrv_whitelist;
95
9e0b22f4
SH
96#ifdef _WIN32
97static int is_windows_drive_prefix(const char *filename)
98{
99 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
100 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
101 filename[1] == ':');
102}
103
104int is_windows_drive(const char *filename)
105{
106 if (is_windows_drive_prefix(filename) &&
107 filename[2] == '\0')
108 return 1;
109 if (strstart(filename, "\\\\.\\", NULL) ||
110 strstart(filename, "//./", NULL))
111 return 1;
112 return 0;
113}
114#endif
115
0563e191 116/* throttling disk I/O limits */
98f90dba
ZYW
117void bdrv_io_limits_disable(BlockDriverState *bs)
118{
119 bs->io_limits_enabled = false;
120
121 while (qemu_co_queue_next(&bs->throttled_reqs));
122
123 if (bs->block_timer) {
124 qemu_del_timer(bs->block_timer);
125 qemu_free_timer(bs->block_timer);
126 bs->block_timer = NULL;
127 }
128
129 bs->slice_start = 0;
130 bs->slice_end = 0;
131 bs->slice_time = 0;
132 memset(&bs->io_base, 0, sizeof(bs->io_base));
133}
134
0563e191
ZYW
135static void bdrv_block_timer(void *opaque)
136{
137 BlockDriverState *bs = opaque;
138
139 qemu_co_queue_next(&bs->throttled_reqs);
140}
141
142void bdrv_io_limits_enable(BlockDriverState *bs)
143{
144 qemu_co_queue_init(&bs->throttled_reqs);
145 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
146 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
147 bs->slice_start = qemu_get_clock_ns(vm_clock);
148 bs->slice_end = bs->slice_start + bs->slice_time;
149 memset(&bs->io_base, 0, sizeof(bs->io_base));
150 bs->io_limits_enabled = true;
151}
152
153bool bdrv_io_limits_enabled(BlockDriverState *bs)
154{
155 BlockIOLimit *io_limits = &bs->io_limits;
156 return io_limits->bps[BLOCK_IO_LIMIT_READ]
157 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
158 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
159 || io_limits->iops[BLOCK_IO_LIMIT_READ]
160 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
161 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
162}
163
98f90dba
ZYW
164static void bdrv_io_limits_intercept(BlockDriverState *bs,
165 bool is_write, int nb_sectors)
166{
167 int64_t wait_time = -1;
168
169 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
170 qemu_co_queue_wait(&bs->throttled_reqs);
171 }
172
173 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
174 * throttled requests will not be dequeued until the current request is
175 * allowed to be serviced. So if the current request still exceeds the
176 * limits, it will be inserted to the head. All requests followed it will
177 * be still in throttled_reqs queue.
178 */
179
180 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
181 qemu_mod_timer(bs->block_timer,
182 wait_time + qemu_get_clock_ns(vm_clock));
183 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
184 }
185
186 qemu_co_queue_next(&bs->throttled_reqs);
187}
188
9e0b22f4
SH
189/* check if the path starts with "<protocol>:" */
190static int path_has_protocol(const char *path)
191{
192#ifdef _WIN32
193 if (is_windows_drive(path) ||
194 is_windows_drive_prefix(path)) {
195 return 0;
196 }
197#endif
198
199 return strchr(path, ':') != NULL;
200}
201
83f64091 202int path_is_absolute(const char *path)
3b0d4f61 203{
83f64091 204 const char *p;
21664424
FB
205#ifdef _WIN32
206 /* specific case for names like: "\\.\d:" */
207 if (*path == '/' || *path == '\\')
208 return 1;
209#endif
83f64091
FB
210 p = strchr(path, ':');
211 if (p)
212 p++;
213 else
214 p = path;
3b9f94e1
FB
215#ifdef _WIN32
216 return (*p == '/' || *p == '\\');
217#else
218 return (*p == '/');
219#endif
3b0d4f61
FB
220}
221
83f64091
FB
222/* if filename is absolute, just copy it to dest. Otherwise, build a
223 path to it by considering it is relative to base_path. URL are
224 supported. */
225void path_combine(char *dest, int dest_size,
226 const char *base_path,
227 const char *filename)
3b0d4f61 228{
83f64091
FB
229 const char *p, *p1;
230 int len;
231
232 if (dest_size <= 0)
233 return;
234 if (path_is_absolute(filename)) {
235 pstrcpy(dest, dest_size, filename);
236 } else {
237 p = strchr(base_path, ':');
238 if (p)
239 p++;
240 else
241 p = base_path;
3b9f94e1
FB
242 p1 = strrchr(base_path, '/');
243#ifdef _WIN32
244 {
245 const char *p2;
246 p2 = strrchr(base_path, '\\');
247 if (!p1 || p2 > p1)
248 p1 = p2;
249 }
250#endif
83f64091
FB
251 if (p1)
252 p1++;
253 else
254 p1 = base_path;
255 if (p1 > p)
256 p = p1;
257 len = p - base_path;
258 if (len > dest_size - 1)
259 len = dest_size - 1;
260 memcpy(dest, base_path, len);
261 dest[len] = '\0';
262 pstrcat(dest, dest_size, filename);
3b0d4f61 263 }
3b0d4f61
FB
264}
265
5efa9d5a 266void bdrv_register(BlockDriver *bdrv)
ea2384d3 267{
8c5873d6
SH
268 /* Block drivers without coroutine functions need emulation */
269 if (!bdrv->bdrv_co_readv) {
f9f05dc5
KW
270 bdrv->bdrv_co_readv = bdrv_co_readv_em;
271 bdrv->bdrv_co_writev = bdrv_co_writev_em;
272
f8c35c1d
SH
273 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
274 * the block driver lacks aio we need to emulate that too.
275 */
f9f05dc5
KW
276 if (!bdrv->bdrv_aio_readv) {
277 /* add AIO emulation layer */
278 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
279 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
f9f05dc5 280 }
83f64091 281 }
b2e12bc6 282
8a22f02a 283 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 284}
b338082b
FB
285
286/* create a new block device (by default it is empty) */
287BlockDriverState *bdrv_new(const char *device_name)
288{
1b7bdbc1 289 BlockDriverState *bs;
b338082b 290
7267c094 291 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 292 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 293 if (device_name[0] != '\0') {
1b7bdbc1 294 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 295 }
28a7282a 296 bdrv_iostatus_disable(bs);
b338082b
FB
297 return bs;
298}
299
ea2384d3
FB
300BlockDriver *bdrv_find_format(const char *format_name)
301{
302 BlockDriver *drv1;
8a22f02a
SH
303 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
304 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 305 return drv1;
8a22f02a 306 }
ea2384d3
FB
307 }
308 return NULL;
309}
310
eb852011
MA
311static int bdrv_is_whitelisted(BlockDriver *drv)
312{
313 static const char *whitelist[] = {
314 CONFIG_BDRV_WHITELIST
315 };
316 const char **p;
317
318 if (!whitelist[0])
319 return 1; /* no whitelist, anything goes */
320
321 for (p = whitelist; *p; p++) {
322 if (!strcmp(drv->format_name, *p)) {
323 return 1;
324 }
325 }
326 return 0;
327}
328
329BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
330{
331 BlockDriver *drv = bdrv_find_format(format_name);
332 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
333}
334
0e7e1989
KW
335int bdrv_create(BlockDriver *drv, const char* filename,
336 QEMUOptionParameter *options)
ea2384d3
FB
337{
338 if (!drv->bdrv_create)
339 return -ENOTSUP;
0e7e1989
KW
340
341 return drv->bdrv_create(filename, options);
ea2384d3
FB
342}
343
84a12e66
CH
344int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
345{
346 BlockDriver *drv;
347
b50cbabc 348 drv = bdrv_find_protocol(filename);
84a12e66 349 if (drv == NULL) {
16905d71 350 return -ENOENT;
84a12e66
CH
351 }
352
353 return bdrv_create(drv, filename, options);
354}
355
d5249393 356#ifdef _WIN32
95389c86 357void get_tmp_filename(char *filename, int size)
d5249393 358{
3b9f94e1 359 char temp_dir[MAX_PATH];
3b46e624 360
3b9f94e1
FB
361 GetTempPath(MAX_PATH, temp_dir);
362 GetTempFileName(temp_dir, "qem", 0, filename);
d5249393
FB
363}
364#else
95389c86 365void get_tmp_filename(char *filename, int size)
fc01f7e7 366{
67b915a5 367 int fd;
7ccfb2eb 368 const char *tmpdir;
d5249393 369 /* XXX: race condition possible */
0badc1ee
AJ
370 tmpdir = getenv("TMPDIR");
371 if (!tmpdir)
372 tmpdir = "/tmp";
373 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
ea2384d3
FB
374 fd = mkstemp(filename);
375 close(fd);
376}
d5249393 377#endif
fc01f7e7 378
84a12e66
CH
379/*
380 * Detect host devices. By convention, /dev/cdrom[N] is always
381 * recognized as a host CDROM.
382 */
383static BlockDriver *find_hdev_driver(const char *filename)
384{
385 int score_max = 0, score;
386 BlockDriver *drv = NULL, *d;
387
388 QLIST_FOREACH(d, &bdrv_drivers, list) {
389 if (d->bdrv_probe_device) {
390 score = d->bdrv_probe_device(filename);
391 if (score > score_max) {
392 score_max = score;
393 drv = d;
394 }
395 }
396 }
397
398 return drv;
399}
400
b50cbabc 401BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
402{
403 BlockDriver *drv1;
404 char protocol[128];
1cec71e3 405 int len;
83f64091 406 const char *p;
19cb3738 407
66f82cee
KW
408 /* TODO Drivers without bdrv_file_open must be specified explicitly */
409
39508e7a
CH
410 /*
411 * XXX(hch): we really should not let host device detection
412 * override an explicit protocol specification, but moving this
413 * later breaks access to device names with colons in them.
414 * Thanks to the brain-dead persistent naming schemes on udev-
415 * based Linux systems those actually are quite common.
416 */
417 drv1 = find_hdev_driver(filename);
418 if (drv1) {
419 return drv1;
420 }
421
9e0b22f4 422 if (!path_has_protocol(filename)) {
39508e7a 423 return bdrv_find_format("file");
84a12e66 424 }
9e0b22f4
SH
425 p = strchr(filename, ':');
426 assert(p != NULL);
1cec71e3
AL
427 len = p - filename;
428 if (len > sizeof(protocol) - 1)
429 len = sizeof(protocol) - 1;
430 memcpy(protocol, filename, len);
431 protocol[len] = '\0';
8a22f02a 432 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 433 if (drv1->protocol_name &&
8a22f02a 434 !strcmp(drv1->protocol_name, protocol)) {
83f64091 435 return drv1;
8a22f02a 436 }
83f64091
FB
437 }
438 return NULL;
439}
440
c98ac35d 441static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
442{
443 int ret, score, score_max;
444 BlockDriver *drv1, *drv;
445 uint8_t buf[2048];
446 BlockDriverState *bs;
447
f5edb014 448 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
449 if (ret < 0) {
450 *pdrv = NULL;
451 return ret;
452 }
f8ea0b00 453
08a00559
KW
454 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
455 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 456 bdrv_delete(bs);
c98ac35d
SW
457 drv = bdrv_find_format("raw");
458 if (!drv) {
459 ret = -ENOENT;
460 }
461 *pdrv = drv;
462 return ret;
1a396859 463 }
f8ea0b00 464
83f64091
FB
465 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
466 bdrv_delete(bs);
467 if (ret < 0) {
c98ac35d
SW
468 *pdrv = NULL;
469 return ret;
83f64091
FB
470 }
471
ea2384d3 472 score_max = 0;
84a12e66 473 drv = NULL;
8a22f02a 474 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
475 if (drv1->bdrv_probe) {
476 score = drv1->bdrv_probe(buf, ret, filename);
477 if (score > score_max) {
478 score_max = score;
479 drv = drv1;
480 }
0849bf08 481 }
fc01f7e7 482 }
c98ac35d
SW
483 if (!drv) {
484 ret = -ENOENT;
485 }
486 *pdrv = drv;
487 return ret;
ea2384d3
FB
488}
489
51762288
SH
490/**
491 * Set the current 'total_sectors' value
492 */
493static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
494{
495 BlockDriver *drv = bs->drv;
496
396759ad
NB
497 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
498 if (bs->sg)
499 return 0;
500
51762288
SH
501 /* query actual device if possible, otherwise just trust the hint */
502 if (drv->bdrv_getlength) {
503 int64_t length = drv->bdrv_getlength(bs);
504 if (length < 0) {
505 return length;
506 }
507 hint = length >> BDRV_SECTOR_BITS;
508 }
509
510 bs->total_sectors = hint;
511 return 0;
512}
513
c3993cdc
SH
514/**
515 * Set open flags for a given cache mode
516 *
517 * Return 0 on success, -1 if the cache mode was invalid.
518 */
519int bdrv_parse_cache_flags(const char *mode, int *flags)
520{
521 *flags &= ~BDRV_O_CACHE_MASK;
522
523 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
524 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
525 } else if (!strcmp(mode, "directsync")) {
526 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
527 } else if (!strcmp(mode, "writeback")) {
528 *flags |= BDRV_O_CACHE_WB;
529 } else if (!strcmp(mode, "unsafe")) {
530 *flags |= BDRV_O_CACHE_WB;
531 *flags |= BDRV_O_NO_FLUSH;
532 } else if (!strcmp(mode, "writethrough")) {
533 /* this is the default */
534 } else {
535 return -1;
536 }
537
538 return 0;
539}
540
53fec9d3
SH
541/**
542 * The copy-on-read flag is actually a reference count so multiple users may
543 * use the feature without worrying about clobbering its previous state.
544 * Copy-on-read stays enabled until all users have called to disable it.
545 */
546void bdrv_enable_copy_on_read(BlockDriverState *bs)
547{
548 bs->copy_on_read++;
549}
550
551void bdrv_disable_copy_on_read(BlockDriverState *bs)
552{
553 assert(bs->copy_on_read > 0);
554 bs->copy_on_read--;
555}
556
57915332
KW
557/*
558 * Common part for opening disk images and files
559 */
560static int bdrv_open_common(BlockDriverState *bs, const char *filename,
561 int flags, BlockDriver *drv)
562{
563 int ret, open_flags;
564
565 assert(drv != NULL);
566
28dcee10
SH
567 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
568
66f82cee 569 bs->file = NULL;
51762288 570 bs->total_sectors = 0;
57915332
KW
571 bs->encrypted = 0;
572 bs->valid_key = 0;
03f541bd 573 bs->sg = 0;
57915332 574 bs->open_flags = flags;
03f541bd 575 bs->growable = 0;
57915332
KW
576 bs->buffer_alignment = 512;
577
53fec9d3
SH
578 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
579 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
580 bdrv_enable_copy_on_read(bs);
581 }
582
57915332 583 pstrcpy(bs->filename, sizeof(bs->filename), filename);
03f541bd 584 bs->backing_file[0] = '\0';
57915332
KW
585
586 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
587 return -ENOTSUP;
588 }
589
590 bs->drv = drv;
7267c094 591 bs->opaque = g_malloc0(drv->instance_size);
57915332 592
03f541bd 593 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
57915332
KW
594
595 /*
596 * Clear flags that are internal to the block layer before opening the
597 * image.
598 */
599 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
600
601 /*
ebabb67a 602 * Snapshots should be writable.
57915332
KW
603 */
604 if (bs->is_temporary) {
605 open_flags |= BDRV_O_RDWR;
606 }
607
e7c63796
SH
608 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
609
66f82cee
KW
610 /* Open the image, either directly or using a protocol */
611 if (drv->bdrv_file_open) {
612 ret = drv->bdrv_file_open(bs, filename, open_flags);
613 } else {
614 ret = bdrv_file_open(&bs->file, filename, open_flags);
615 if (ret >= 0) {
616 ret = drv->bdrv_open(bs, open_flags);
617 }
618 }
619
57915332
KW
620 if (ret < 0) {
621 goto free_and_fail;
622 }
623
51762288
SH
624 ret = refresh_total_sectors(bs, bs->total_sectors);
625 if (ret < 0) {
626 goto free_and_fail;
57915332 627 }
51762288 628
57915332
KW
629#ifndef _WIN32
630 if (bs->is_temporary) {
631 unlink(filename);
632 }
633#endif
634 return 0;
635
636free_and_fail:
66f82cee
KW
637 if (bs->file) {
638 bdrv_delete(bs->file);
639 bs->file = NULL;
640 }
7267c094 641 g_free(bs->opaque);
57915332
KW
642 bs->opaque = NULL;
643 bs->drv = NULL;
644 return ret;
645}
646
b6ce07aa
KW
647/*
648 * Opens a file using a protocol (file, host_device, nbd, ...)
649 */
83f64091 650int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 651{
83f64091 652 BlockDriverState *bs;
6db95603 653 BlockDriver *drv;
83f64091
FB
654 int ret;
655
b50cbabc 656 drv = bdrv_find_protocol(filename);
6db95603
CH
657 if (!drv) {
658 return -ENOENT;
659 }
660
83f64091 661 bs = bdrv_new("");
b6ce07aa 662 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
663 if (ret < 0) {
664 bdrv_delete(bs);
665 return ret;
3b0d4f61 666 }
71d0770c 667 bs->growable = 1;
83f64091
FB
668 *pbs = bs;
669 return 0;
670}
671
b6ce07aa
KW
672/*
673 * Opens a disk image (raw, qcow2, vmdk, ...)
674 */
d6e9098e
KW
675int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
676 BlockDriver *drv)
ea2384d3 677{
b6ce07aa 678 int ret;
2b572816 679 char tmp_filename[PATH_MAX];
712e7874 680
83f64091 681 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
682 BlockDriverState *bs1;
683 int64_t total_size;
7c96d46e 684 int is_protocol = 0;
91a073a9
KW
685 BlockDriver *bdrv_qcow2;
686 QEMUOptionParameter *options;
b6ce07aa 687 char backing_filename[PATH_MAX];
3b46e624 688
ea2384d3
FB
689 /* if snapshot, we create a temporary backing file and open it
690 instead of opening 'filename' directly */
33e3963e 691
ea2384d3
FB
692 /* if there is a backing file, use it */
693 bs1 = bdrv_new("");
d6e9098e 694 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 695 if (ret < 0) {
ea2384d3 696 bdrv_delete(bs1);
51d7c00c 697 return ret;
ea2384d3 698 }
3e82990b 699 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
700
701 if (bs1->drv && bs1->drv->protocol_name)
702 is_protocol = 1;
703
ea2384d3 704 bdrv_delete(bs1);
3b46e624 705
ea2384d3 706 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
7c96d46e
AL
707
708 /* Real path is meaningless for protocols */
709 if (is_protocol)
710 snprintf(backing_filename, sizeof(backing_filename),
711 "%s", filename);
114cdfa9
KS
712 else if (!realpath(filename, backing_filename))
713 return -errno;
7c96d46e 714
91a073a9
KW
715 bdrv_qcow2 = bdrv_find_format("qcow2");
716 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
717
3e82990b 718 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
719 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
720 if (drv) {
721 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
722 drv->format_name);
723 }
724
725 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 726 free_option_parameters(options);
51d7c00c
AL
727 if (ret < 0) {
728 return ret;
ea2384d3 729 }
91a073a9 730
ea2384d3 731 filename = tmp_filename;
91a073a9 732 drv = bdrv_qcow2;
ea2384d3
FB
733 bs->is_temporary = 1;
734 }
712e7874 735
b6ce07aa 736 /* Find the right image format driver */
6db95603 737 if (!drv) {
c98ac35d 738 ret = find_image_format(filename, &drv);
51d7c00c 739 }
6987307c 740
51d7c00c 741 if (!drv) {
51d7c00c 742 goto unlink_and_fail;
ea2384d3 743 }
b6ce07aa
KW
744
745 /* Open the image */
746 ret = bdrv_open_common(bs, filename, flags, drv);
747 if (ret < 0) {
6987307c
CH
748 goto unlink_and_fail;
749 }
750
b6ce07aa
KW
751 /* If there is a backing file, use it */
752 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
753 char backing_filename[PATH_MAX];
754 int back_flags;
755 BlockDriver *back_drv = NULL;
756
757 bs->backing_hd = bdrv_new("");
df2dbb4a
SH
758
759 if (path_has_protocol(bs->backing_file)) {
760 pstrcpy(backing_filename, sizeof(backing_filename),
761 bs->backing_file);
762 } else {
763 path_combine(backing_filename, sizeof(backing_filename),
764 filename, bs->backing_file);
765 }
766
767 if (bs->backing_format[0] != '\0') {
b6ce07aa 768 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 769 }
b6ce07aa
KW
770
771 /* backing files always opened read-only */
772 back_flags =
773 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
774
775 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
776 if (ret < 0) {
777 bdrv_close(bs);
778 return ret;
779 }
780 if (bs->is_temporary) {
781 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
782 } else {
783 /* base image inherits from "parent" */
784 bs->backing_hd->keep_read_only = bs->keep_read_only;
785 }
786 }
787
788 if (!bdrv_key_required(bs)) {
7d4b4ba5 789 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
790 }
791
98f90dba
ZYW
792 /* throttling disk I/O limits */
793 if (bs->io_limits_enabled) {
794 bdrv_io_limits_enable(bs);
795 }
796
b6ce07aa
KW
797 return 0;
798
799unlink_and_fail:
800 if (bs->is_temporary) {
801 unlink(filename);
802 }
803 return ret;
804}
805
fc01f7e7
FB
806void bdrv_close(BlockDriverState *bs)
807{
19cb3738 808 if (bs->drv) {
f9092b10
MA
809 if (bs == bs_snapshots) {
810 bs_snapshots = NULL;
811 }
557df6ac 812 if (bs->backing_hd) {
ea2384d3 813 bdrv_delete(bs->backing_hd);
557df6ac
SH
814 bs->backing_hd = NULL;
815 }
ea2384d3 816 bs->drv->bdrv_close(bs);
7267c094 817 g_free(bs->opaque);
ea2384d3
FB
818#ifdef _WIN32
819 if (bs->is_temporary) {
820 unlink(bs->filename);
821 }
67b915a5 822#endif
ea2384d3
FB
823 bs->opaque = NULL;
824 bs->drv = NULL;
53fec9d3 825 bs->copy_on_read = 0;
b338082b 826
66f82cee
KW
827 if (bs->file != NULL) {
828 bdrv_close(bs->file);
829 }
830
7d4b4ba5 831 bdrv_dev_change_media_cb(bs, false);
b338082b 832 }
98f90dba
ZYW
833
834 /*throttling disk I/O limits*/
835 if (bs->io_limits_enabled) {
836 bdrv_io_limits_disable(bs);
837 }
b338082b
FB
838}
839
2bc93fed
MK
840void bdrv_close_all(void)
841{
842 BlockDriverState *bs;
843
844 QTAILQ_FOREACH(bs, &bdrv_states, list) {
845 bdrv_close(bs);
846 }
847}
848
d22b2f41
RH
849/* make a BlockDriverState anonymous by removing from bdrv_state list.
850 Also, NULL terminate the device_name to prevent double remove */
851void bdrv_make_anon(BlockDriverState *bs)
852{
853 if (bs->device_name[0] != '\0') {
854 QTAILQ_REMOVE(&bdrv_states, bs, list);
855 }
856 bs->device_name[0] = '\0';
857}
858
b338082b
FB
859void bdrv_delete(BlockDriverState *bs)
860{
fa879d62 861 assert(!bs->dev);
18846dee 862
1b7bdbc1 863 /* remove from list, if necessary */
d22b2f41 864 bdrv_make_anon(bs);
34c6f050 865
b338082b 866 bdrv_close(bs);
66f82cee
KW
867 if (bs->file != NULL) {
868 bdrv_delete(bs->file);
869 }
870
f9092b10 871 assert(bs != bs_snapshots);
7267c094 872 g_free(bs);
fc01f7e7
FB
873}
874
fa879d62
MA
875int bdrv_attach_dev(BlockDriverState *bs, void *dev)
876/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 877{
fa879d62 878 if (bs->dev) {
18846dee
MA
879 return -EBUSY;
880 }
fa879d62 881 bs->dev = dev;
28a7282a 882 bdrv_iostatus_reset(bs);
18846dee
MA
883 return 0;
884}
885
fa879d62
MA
886/* TODO qdevified devices don't use this, remove when devices are qdevified */
887void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 888{
fa879d62
MA
889 if (bdrv_attach_dev(bs, dev) < 0) {
890 abort();
891 }
892}
893
894void bdrv_detach_dev(BlockDriverState *bs, void *dev)
895/* TODO change to DeviceState *dev when all users are qdevified */
896{
897 assert(bs->dev == dev);
898 bs->dev = NULL;
0e49de52
MA
899 bs->dev_ops = NULL;
900 bs->dev_opaque = NULL;
29e05f20 901 bs->buffer_alignment = 512;
18846dee
MA
902}
903
fa879d62
MA
904/* TODO change to return DeviceState * when all users are qdevified */
905void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 906{
fa879d62 907 return bs->dev;
18846dee
MA
908}
909
0e49de52
MA
910void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
911 void *opaque)
912{
913 bs->dev_ops = ops;
914 bs->dev_opaque = opaque;
2c6942fa
MA
915 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
916 bs_snapshots = NULL;
917 }
0e49de52
MA
918}
919
7d4b4ba5 920static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 921{
145feb17 922 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
7d4b4ba5 923 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
145feb17
MA
924 }
925}
926
2c6942fa
MA
927bool bdrv_dev_has_removable_media(BlockDriverState *bs)
928{
929 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
930}
931
025ccaa7
PB
932void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
933{
934 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
935 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
936 }
937}
938
e4def80b
MA
939bool bdrv_dev_is_tray_open(BlockDriverState *bs)
940{
941 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
942 return bs->dev_ops->is_tray_open(bs->dev_opaque);
943 }
944 return false;
945}
946
145feb17
MA
947static void bdrv_dev_resize_cb(BlockDriverState *bs)
948{
949 if (bs->dev_ops && bs->dev_ops->resize_cb) {
950 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
951 }
952}
953
f107639a
MA
954bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
955{
956 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
957 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
958 }
959 return false;
960}
961
e97fc193
AL
962/*
963 * Run consistency checks on an image
964 *
e076f338 965 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 966 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 967 * check are stored in res.
e97fc193 968 */
e076f338 969int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
e97fc193
AL
970{
971 if (bs->drv->bdrv_check == NULL) {
972 return -ENOTSUP;
973 }
974
e076f338 975 memset(res, 0, sizeof(*res));
9ac228e0 976 return bs->drv->bdrv_check(bs, res);
e97fc193
AL
977}
978
8a426614
KW
979#define COMMIT_BUF_SECTORS 2048
980
33e3963e
FB
981/* commit COW file into the raw image */
982int bdrv_commit(BlockDriverState *bs)
983{
19cb3738 984 BlockDriver *drv = bs->drv;
ee181196 985 BlockDriver *backing_drv;
8a426614
KW
986 int64_t sector, total_sectors;
987 int n, ro, open_flags;
4dca4b63 988 int ret = 0, rw_ret = 0;
8a426614 989 uint8_t *buf;
4dca4b63
NS
990 char filename[1024];
991 BlockDriverState *bs_rw, *bs_ro;
33e3963e 992
19cb3738
FB
993 if (!drv)
994 return -ENOMEDIUM;
4dca4b63
NS
995
996 if (!bs->backing_hd) {
997 return -ENOTSUP;
33e3963e
FB
998 }
999
4dca4b63
NS
1000 if (bs->backing_hd->keep_read_only) {
1001 return -EACCES;
1002 }
ee181196
KW
1003
1004 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
1005 ro = bs->backing_hd->read_only;
1006 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1007 open_flags = bs->backing_hd->open_flags;
1008
1009 if (ro) {
1010 /* re-open as RW */
1011 bdrv_delete(bs->backing_hd);
1012 bs->backing_hd = NULL;
1013 bs_rw = bdrv_new("");
ee181196
KW
1014 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1015 backing_drv);
4dca4b63
NS
1016 if (rw_ret < 0) {
1017 bdrv_delete(bs_rw);
1018 /* try to re-open read-only */
1019 bs_ro = bdrv_new("");
ee181196
KW
1020 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1021 backing_drv);
4dca4b63
NS
1022 if (ret < 0) {
1023 bdrv_delete(bs_ro);
1024 /* drive not functional anymore */
1025 bs->drv = NULL;
1026 return ret;
1027 }
1028 bs->backing_hd = bs_ro;
1029 return rw_ret;
1030 }
1031 bs->backing_hd = bs_rw;
ea2384d3 1032 }
33e3963e 1033
6ea44308 1034 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 1035 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
1036
1037 for (sector = 0; sector < total_sectors; sector += n) {
05c4af54 1038 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
8a426614
KW
1039
1040 if (bdrv_read(bs, sector, buf, n) != 0) {
1041 ret = -EIO;
1042 goto ro_cleanup;
1043 }
1044
1045 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1046 ret = -EIO;
1047 goto ro_cleanup;
1048 }
ea2384d3 1049 }
33e3963e 1050 }
95389c86 1051
1d44952f
CH
1052 if (drv->bdrv_make_empty) {
1053 ret = drv->bdrv_make_empty(bs);
1054 bdrv_flush(bs);
1055 }
95389c86 1056
3f5075ae
CH
1057 /*
1058 * Make sure all data we wrote to the backing device is actually
1059 * stable on disk.
1060 */
1061 if (bs->backing_hd)
1062 bdrv_flush(bs->backing_hd);
4dca4b63
NS
1063
1064ro_cleanup:
7267c094 1065 g_free(buf);
4dca4b63
NS
1066
1067 if (ro) {
1068 /* re-open as RO */
1069 bdrv_delete(bs->backing_hd);
1070 bs->backing_hd = NULL;
1071 bs_ro = bdrv_new("");
ee181196
KW
1072 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1073 backing_drv);
4dca4b63
NS
1074 if (ret < 0) {
1075 bdrv_delete(bs_ro);
1076 /* drive not functional anymore */
1077 bs->drv = NULL;
1078 return ret;
1079 }
1080 bs->backing_hd = bs_ro;
1081 bs->backing_hd->keep_read_only = 0;
1082 }
1083
1d44952f 1084 return ret;
33e3963e
FB
1085}
1086
6ab4b5ab
MA
1087void bdrv_commit_all(void)
1088{
1089 BlockDriverState *bs;
1090
1091 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1092 bdrv_commit(bs);
1093 }
1094}
1095
dbffbdcf
SH
1096struct BdrvTrackedRequest {
1097 BlockDriverState *bs;
1098 int64_t sector_num;
1099 int nb_sectors;
1100 bool is_write;
1101 QLIST_ENTRY(BdrvTrackedRequest) list;
1102};
1103
1104/**
1105 * Remove an active request from the tracked requests list
1106 *
1107 * This function should be called when a tracked request is completing.
1108 */
1109static void tracked_request_end(BdrvTrackedRequest *req)
1110{
1111 QLIST_REMOVE(req, list);
1112}
1113
1114/**
1115 * Add an active request to the tracked requests list
1116 */
1117static void tracked_request_begin(BdrvTrackedRequest *req,
1118 BlockDriverState *bs,
1119 int64_t sector_num,
1120 int nb_sectors, bool is_write)
1121{
1122 *req = (BdrvTrackedRequest){
1123 .bs = bs,
1124 .sector_num = sector_num,
1125 .nb_sectors = nb_sectors,
1126 .is_write = is_write,
1127 };
1128
1129 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1130}
1131
756e6736
KW
1132/*
1133 * Return values:
1134 * 0 - success
1135 * -EINVAL - backing format specified, but no file
1136 * -ENOSPC - can't update the backing file because no space is left in the
1137 * image file header
1138 * -ENOTSUP - format driver doesn't support changing the backing file
1139 */
1140int bdrv_change_backing_file(BlockDriverState *bs,
1141 const char *backing_file, const char *backing_fmt)
1142{
1143 BlockDriver *drv = bs->drv;
1144
1145 if (drv->bdrv_change_backing_file != NULL) {
1146 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1147 } else {
1148 return -ENOTSUP;
1149 }
1150}
1151
71d0770c
AL
1152static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1153 size_t size)
1154{
1155 int64_t len;
1156
1157 if (!bdrv_is_inserted(bs))
1158 return -ENOMEDIUM;
1159
1160 if (bs->growable)
1161 return 0;
1162
1163 len = bdrv_getlength(bs);
1164
fbb7b4e0
KW
1165 if (offset < 0)
1166 return -EIO;
1167
1168 if ((offset > len) || (len - offset < size))
71d0770c
AL
1169 return -EIO;
1170
1171 return 0;
1172}
1173
1174static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1175 int nb_sectors)
1176{
eb5a3165
JS
1177 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1178 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1179}
1180
1c9805a3
SH
1181typedef struct RwCo {
1182 BlockDriverState *bs;
1183 int64_t sector_num;
1184 int nb_sectors;
1185 QEMUIOVector *qiov;
1186 bool is_write;
1187 int ret;
1188} RwCo;
1189
1190static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 1191{
1c9805a3 1192 RwCo *rwco = opaque;
ea2384d3 1193
1c9805a3
SH
1194 if (!rwco->is_write) {
1195 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1196 rwco->nb_sectors, rwco->qiov);
1197 } else {
1198 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1199 rwco->nb_sectors, rwco->qiov);
1200 }
1201}
e7a8a783 1202
1c9805a3
SH
1203/*
1204 * Process a synchronous request using coroutines
1205 */
1206static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1207 int nb_sectors, bool is_write)
1208{
1209 QEMUIOVector qiov;
1210 struct iovec iov = {
1211 .iov_base = (void *)buf,
1212 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1213 };
1214 Coroutine *co;
1215 RwCo rwco = {
1216 .bs = bs,
1217 .sector_num = sector_num,
1218 .nb_sectors = nb_sectors,
1219 .qiov = &qiov,
1220 .is_write = is_write,
1221 .ret = NOT_DONE,
1222 };
e7a8a783 1223
1c9805a3 1224 qemu_iovec_init_external(&qiov, &iov, 1);
e7a8a783 1225
1c9805a3
SH
1226 if (qemu_in_coroutine()) {
1227 /* Fast-path if already in coroutine context */
1228 bdrv_rw_co_entry(&rwco);
1229 } else {
1230 co = qemu_coroutine_create(bdrv_rw_co_entry);
1231 qemu_coroutine_enter(co, &rwco);
1232 while (rwco.ret == NOT_DONE) {
1233 qemu_aio_wait();
1234 }
1235 }
1236 return rwco.ret;
1237}
b338082b 1238
1c9805a3
SH
1239/* return < 0 if error. See bdrv_write() for the return codes */
1240int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1241 uint8_t *buf, int nb_sectors)
1242{
1243 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
fc01f7e7
FB
1244}
1245
7cd1e32a 1246static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1247 int nb_sectors, int dirty)
7cd1e32a 1248{
1249 int64_t start, end;
c6d22830 1250 unsigned long val, idx, bit;
a55eb92c 1251
6ea44308 1252 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1253 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1254
1255 for (; start <= end; start++) {
c6d22830
JK
1256 idx = start / (sizeof(unsigned long) * 8);
1257 bit = start % (sizeof(unsigned long) * 8);
1258 val = bs->dirty_bitmap[idx];
1259 if (dirty) {
6d59fec1 1260 if (!(val & (1UL << bit))) {
aaa0eb75 1261 bs->dirty_count++;
6d59fec1 1262 val |= 1UL << bit;
aaa0eb75 1263 }
c6d22830 1264 } else {
6d59fec1 1265 if (val & (1UL << bit)) {
aaa0eb75 1266 bs->dirty_count--;
6d59fec1 1267 val &= ~(1UL << bit);
aaa0eb75 1268 }
c6d22830
JK
1269 }
1270 bs->dirty_bitmap[idx] = val;
7cd1e32a 1271 }
1272}
1273
5fafdf24 1274/* Return < 0 if error. Important errors are:
19cb3738
FB
1275 -EIO generic I/O error (may happen for all errors)
1276 -ENOMEDIUM No media inserted.
1277 -EINVAL Invalid sector number or nb_sectors
1278 -EACCES Trying to write a read-only device
1279*/
5fafdf24 1280int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1281 const uint8_t *buf, int nb_sectors)
1282{
1c9805a3 1283 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
83f64091
FB
1284}
1285
eda578e5
AL
1286int bdrv_pread(BlockDriverState *bs, int64_t offset,
1287 void *buf, int count1)
83f64091 1288{
6ea44308 1289 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1290 int len, nb_sectors, count;
1291 int64_t sector_num;
9a8c4cce 1292 int ret;
83f64091
FB
1293
1294 count = count1;
1295 /* first read to align to sector start */
6ea44308 1296 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1297 if (len > count)
1298 len = count;
6ea44308 1299 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1300 if (len > 0) {
9a8c4cce
KW
1301 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1302 return ret;
6ea44308 1303 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1304 count -= len;
1305 if (count == 0)
1306 return count1;
1307 sector_num++;
1308 buf += len;
1309 }
1310
1311 /* read the sectors "in place" */
6ea44308 1312 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1313 if (nb_sectors > 0) {
9a8c4cce
KW
1314 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1315 return ret;
83f64091 1316 sector_num += nb_sectors;
6ea44308 1317 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1318 buf += len;
1319 count -= len;
1320 }
1321
1322 /* add data from the last sector */
1323 if (count > 0) {
9a8c4cce
KW
1324 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1325 return ret;
83f64091
FB
1326 memcpy(buf, tmp_buf, count);
1327 }
1328 return count1;
1329}
1330
eda578e5
AL
1331int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1332 const void *buf, int count1)
83f64091 1333{
6ea44308 1334 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1335 int len, nb_sectors, count;
1336 int64_t sector_num;
9a8c4cce 1337 int ret;
83f64091
FB
1338
1339 count = count1;
1340 /* first write to align to sector start */
6ea44308 1341 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1342 if (len > count)
1343 len = count;
6ea44308 1344 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1345 if (len > 0) {
9a8c4cce
KW
1346 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1347 return ret;
6ea44308 1348 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1349 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1350 return ret;
83f64091
FB
1351 count -= len;
1352 if (count == 0)
1353 return count1;
1354 sector_num++;
1355 buf += len;
1356 }
1357
1358 /* write the sectors "in place" */
6ea44308 1359 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1360 if (nb_sectors > 0) {
9a8c4cce
KW
1361 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1362 return ret;
83f64091 1363 sector_num += nb_sectors;
6ea44308 1364 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1365 buf += len;
1366 count -= len;
1367 }
1368
1369 /* add data from the last sector */
1370 if (count > 0) {
9a8c4cce
KW
1371 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1372 return ret;
83f64091 1373 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1374 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1375 return ret;
83f64091
FB
1376 }
1377 return count1;
1378}
83f64091 1379
f08145fe
KW
1380/*
1381 * Writes to the file and ensures that no writes are reordered across this
1382 * request (acts as a barrier)
1383 *
1384 * Returns 0 on success, -errno in error cases.
1385 */
1386int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1387 const void *buf, int count)
1388{
1389 int ret;
1390
1391 ret = bdrv_pwrite(bs, offset, buf, count);
1392 if (ret < 0) {
1393 return ret;
1394 }
1395
92196b2f
SH
1396 /* No flush needed for cache modes that use O_DSYNC */
1397 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
f08145fe
KW
1398 bdrv_flush(bs);
1399 }
1400
1401 return 0;
1402}
1403
c5fbe571
SH
1404/*
1405 * Handle a read request in coroutine context
1406 */
1407static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1408 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
da1fa91d
KW
1409{
1410 BlockDriver *drv = bs->drv;
dbffbdcf
SH
1411 BdrvTrackedRequest req;
1412 int ret;
da1fa91d 1413
da1fa91d
KW
1414 if (!drv) {
1415 return -ENOMEDIUM;
1416 }
1417 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1418 return -EIO;
1419 }
1420
98f90dba
ZYW
1421 /* throttling disk read I/O */
1422 if (bs->io_limits_enabled) {
1423 bdrv_io_limits_intercept(bs, false, nb_sectors);
1424 }
1425
dbffbdcf
SH
1426 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
1427 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1428 tracked_request_end(&req);
1429 return ret;
da1fa91d
KW
1430}
1431
c5fbe571 1432int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
1433 int nb_sectors, QEMUIOVector *qiov)
1434{
c5fbe571 1435 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 1436
c5fbe571
SH
1437 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1438}
1439
1440/*
1441 * Handle a write request in coroutine context
1442 */
1443static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1444 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1445{
1446 BlockDriver *drv = bs->drv;
dbffbdcf 1447 BdrvTrackedRequest req;
6b7cb247 1448 int ret;
da1fa91d
KW
1449
1450 if (!bs->drv) {
1451 return -ENOMEDIUM;
1452 }
1453 if (bs->read_only) {
1454 return -EACCES;
1455 }
1456 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1457 return -EIO;
1458 }
1459
98f90dba
ZYW
1460 /* throttling disk write I/O */
1461 if (bs->io_limits_enabled) {
1462 bdrv_io_limits_intercept(bs, true, nb_sectors);
1463 }
1464
dbffbdcf
SH
1465 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1466
6b7cb247
SH
1467 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1468
da1fa91d
KW
1469 if (bs->dirty_bitmap) {
1470 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1471 }
1472
1473 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1474 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1475 }
1476
dbffbdcf
SH
1477 tracked_request_end(&req);
1478
6b7cb247 1479 return ret;
da1fa91d
KW
1480}
1481
c5fbe571
SH
1482int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1483 int nb_sectors, QEMUIOVector *qiov)
1484{
1485 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1486
1487 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1488}
1489
83f64091
FB
1490/**
1491 * Truncate file to 'offset' bytes (needed only for file protocols)
1492 */
1493int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1494{
1495 BlockDriver *drv = bs->drv;
51762288 1496 int ret;
83f64091 1497 if (!drv)
19cb3738 1498 return -ENOMEDIUM;
83f64091
FB
1499 if (!drv->bdrv_truncate)
1500 return -ENOTSUP;
59f2689d
NS
1501 if (bs->read_only)
1502 return -EACCES;
8591675f
MT
1503 if (bdrv_in_use(bs))
1504 return -EBUSY;
51762288
SH
1505 ret = drv->bdrv_truncate(bs, offset);
1506 if (ret == 0) {
1507 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 1508 bdrv_dev_resize_cb(bs);
51762288
SH
1509 }
1510 return ret;
83f64091
FB
1511}
1512
4a1d5e1f
FZ
1513/**
1514 * Length of a allocated file in bytes. Sparse files are counted by actual
1515 * allocated space. Return < 0 if error or unknown.
1516 */
1517int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1518{
1519 BlockDriver *drv = bs->drv;
1520 if (!drv) {
1521 return -ENOMEDIUM;
1522 }
1523 if (drv->bdrv_get_allocated_file_size) {
1524 return drv->bdrv_get_allocated_file_size(bs);
1525 }
1526 if (bs->file) {
1527 return bdrv_get_allocated_file_size(bs->file);
1528 }
1529 return -ENOTSUP;
1530}
1531
83f64091
FB
1532/**
1533 * Length of a file in bytes. Return < 0 if error or unknown.
1534 */
1535int64_t bdrv_getlength(BlockDriverState *bs)
1536{
1537 BlockDriver *drv = bs->drv;
1538 if (!drv)
19cb3738 1539 return -ENOMEDIUM;
51762288 1540
2c6942fa 1541 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
1542 if (drv->bdrv_getlength) {
1543 return drv->bdrv_getlength(bs);
1544 }
83f64091 1545 }
46a4e4e6 1546 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
1547}
1548
19cb3738 1549/* return 0 as number of sectors if no device present or error */
96b8f136 1550void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 1551{
19cb3738
FB
1552 int64_t length;
1553 length = bdrv_getlength(bs);
1554 if (length < 0)
1555 length = 0;
1556 else
6ea44308 1557 length = length >> BDRV_SECTOR_BITS;
19cb3738 1558 *nb_sectors_ptr = length;
fc01f7e7 1559}
cf98951b 1560
f3d54fc4
AL
1561struct partition {
1562 uint8_t boot_ind; /* 0x80 - active */
1563 uint8_t head; /* starting head */
1564 uint8_t sector; /* starting sector */
1565 uint8_t cyl; /* starting cylinder */
1566 uint8_t sys_ind; /* What partition type */
1567 uint8_t end_head; /* end head */
1568 uint8_t end_sector; /* end sector */
1569 uint8_t end_cyl; /* end cylinder */
1570 uint32_t start_sect; /* starting sector counting from 0 */
1571 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 1572} QEMU_PACKED;
f3d54fc4
AL
1573
1574/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1575static int guess_disk_lchs(BlockDriverState *bs,
1576 int *pcylinders, int *pheads, int *psectors)
1577{
eb5a3165 1578 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
1579 int ret, i, heads, sectors, cylinders;
1580 struct partition *p;
1581 uint32_t nr_sects;
a38131b6 1582 uint64_t nb_sectors;
f3d54fc4
AL
1583
1584 bdrv_get_geometry(bs, &nb_sectors);
1585
1586 ret = bdrv_read(bs, 0, buf, 1);
1587 if (ret < 0)
1588 return -1;
1589 /* test msdos magic */
1590 if (buf[510] != 0x55 || buf[511] != 0xaa)
1591 return -1;
1592 for(i = 0; i < 4; i++) {
1593 p = ((struct partition *)(buf + 0x1be)) + i;
1594 nr_sects = le32_to_cpu(p->nr_sects);
1595 if (nr_sects && p->end_head) {
1596 /* We make the assumption that the partition terminates on
1597 a cylinder boundary */
1598 heads = p->end_head + 1;
1599 sectors = p->end_sector & 63;
1600 if (sectors == 0)
1601 continue;
1602 cylinders = nb_sectors / (heads * sectors);
1603 if (cylinders < 1 || cylinders > 16383)
1604 continue;
1605 *pheads = heads;
1606 *psectors = sectors;
1607 *pcylinders = cylinders;
1608#if 0
1609 printf("guessed geometry: LCHS=%d %d %d\n",
1610 cylinders, heads, sectors);
1611#endif
1612 return 0;
1613 }
1614 }
1615 return -1;
1616}
1617
1618void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1619{
1620 int translation, lba_detected = 0;
1621 int cylinders, heads, secs;
a38131b6 1622 uint64_t nb_sectors;
f3d54fc4
AL
1623
1624 /* if a geometry hint is available, use it */
1625 bdrv_get_geometry(bs, &nb_sectors);
1626 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1627 translation = bdrv_get_translation_hint(bs);
1628 if (cylinders != 0) {
1629 *pcyls = cylinders;
1630 *pheads = heads;
1631 *psecs = secs;
1632 } else {
1633 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1634 if (heads > 16) {
1635 /* if heads > 16, it means that a BIOS LBA
1636 translation was active, so the default
1637 hardware geometry is OK */
1638 lba_detected = 1;
1639 goto default_geometry;
1640 } else {
1641 *pcyls = cylinders;
1642 *pheads = heads;
1643 *psecs = secs;
1644 /* disable any translation to be in sync with
1645 the logical geometry */
1646 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1647 bdrv_set_translation_hint(bs,
1648 BIOS_ATA_TRANSLATION_NONE);
1649 }
1650 }
1651 } else {
1652 default_geometry:
1653 /* if no geometry, use a standard physical disk geometry */
1654 cylinders = nb_sectors / (16 * 63);
1655
1656 if (cylinders > 16383)
1657 cylinders = 16383;
1658 else if (cylinders < 2)
1659 cylinders = 2;
1660 *pcyls = cylinders;
1661 *pheads = 16;
1662 *psecs = 63;
1663 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1664 if ((*pcyls * *pheads) <= 131072) {
1665 bdrv_set_translation_hint(bs,
1666 BIOS_ATA_TRANSLATION_LARGE);
1667 } else {
1668 bdrv_set_translation_hint(bs,
1669 BIOS_ATA_TRANSLATION_LBA);
1670 }
1671 }
1672 }
1673 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1674 }
1675}
1676
5fafdf24 1677void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
1678 int cyls, int heads, int secs)
1679{
1680 bs->cyls = cyls;
1681 bs->heads = heads;
1682 bs->secs = secs;
1683}
1684
46d4767d
FB
1685void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1686{
1687 bs->translation = translation;
1688}
1689
5fafdf24 1690void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
1691 int *pcyls, int *pheads, int *psecs)
1692{
1693 *pcyls = bs->cyls;
1694 *pheads = bs->heads;
1695 *psecs = bs->secs;
1696}
1697
0563e191
ZYW
1698/* throttling disk io limits */
1699void bdrv_set_io_limits(BlockDriverState *bs,
1700 BlockIOLimit *io_limits)
1701{
1702 bs->io_limits = *io_limits;
1703 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
1704}
1705
5bbdbb46
BS
1706/* Recognize floppy formats */
1707typedef struct FDFormat {
1708 FDriveType drive;
1709 uint8_t last_sect;
1710 uint8_t max_track;
1711 uint8_t max_head;
1712} FDFormat;
1713
1714static const FDFormat fd_formats[] = {
1715 /* First entry is default format */
1716 /* 1.44 MB 3"1/2 floppy disks */
1717 { FDRIVE_DRV_144, 18, 80, 1, },
1718 { FDRIVE_DRV_144, 20, 80, 1, },
1719 { FDRIVE_DRV_144, 21, 80, 1, },
1720 { FDRIVE_DRV_144, 21, 82, 1, },
1721 { FDRIVE_DRV_144, 21, 83, 1, },
1722 { FDRIVE_DRV_144, 22, 80, 1, },
1723 { FDRIVE_DRV_144, 23, 80, 1, },
1724 { FDRIVE_DRV_144, 24, 80, 1, },
1725 /* 2.88 MB 3"1/2 floppy disks */
1726 { FDRIVE_DRV_288, 36, 80, 1, },
1727 { FDRIVE_DRV_288, 39, 80, 1, },
1728 { FDRIVE_DRV_288, 40, 80, 1, },
1729 { FDRIVE_DRV_288, 44, 80, 1, },
1730 { FDRIVE_DRV_288, 48, 80, 1, },
1731 /* 720 kB 3"1/2 floppy disks */
1732 { FDRIVE_DRV_144, 9, 80, 1, },
1733 { FDRIVE_DRV_144, 10, 80, 1, },
1734 { FDRIVE_DRV_144, 10, 82, 1, },
1735 { FDRIVE_DRV_144, 10, 83, 1, },
1736 { FDRIVE_DRV_144, 13, 80, 1, },
1737 { FDRIVE_DRV_144, 14, 80, 1, },
1738 /* 1.2 MB 5"1/4 floppy disks */
1739 { FDRIVE_DRV_120, 15, 80, 1, },
1740 { FDRIVE_DRV_120, 18, 80, 1, },
1741 { FDRIVE_DRV_120, 18, 82, 1, },
1742 { FDRIVE_DRV_120, 18, 83, 1, },
1743 { FDRIVE_DRV_120, 20, 80, 1, },
1744 /* 720 kB 5"1/4 floppy disks */
1745 { FDRIVE_DRV_120, 9, 80, 1, },
1746 { FDRIVE_DRV_120, 11, 80, 1, },
1747 /* 360 kB 5"1/4 floppy disks */
1748 { FDRIVE_DRV_120, 9, 40, 1, },
1749 { FDRIVE_DRV_120, 9, 40, 0, },
1750 { FDRIVE_DRV_120, 10, 41, 1, },
1751 { FDRIVE_DRV_120, 10, 42, 1, },
1752 /* 320 kB 5"1/4 floppy disks */
1753 { FDRIVE_DRV_120, 8, 40, 1, },
1754 { FDRIVE_DRV_120, 8, 40, 0, },
1755 /* 360 kB must match 5"1/4 better than 3"1/2... */
1756 { FDRIVE_DRV_144, 9, 80, 0, },
1757 /* end */
1758 { FDRIVE_DRV_NONE, -1, -1, 0, },
1759};
1760
1761void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1762 int *max_track, int *last_sect,
1763 FDriveType drive_in, FDriveType *drive)
1764{
1765 const FDFormat *parse;
1766 uint64_t nb_sectors, size;
1767 int i, first_match, match;
1768
1769 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1770 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1771 /* User defined disk */
1772 } else {
1773 bdrv_get_geometry(bs, &nb_sectors);
1774 match = -1;
1775 first_match = -1;
1776 for (i = 0; ; i++) {
1777 parse = &fd_formats[i];
1778 if (parse->drive == FDRIVE_DRV_NONE) {
1779 break;
1780 }
1781 if (drive_in == parse->drive ||
1782 drive_in == FDRIVE_DRV_NONE) {
1783 size = (parse->max_head + 1) * parse->max_track *
1784 parse->last_sect;
1785 if (nb_sectors == size) {
1786 match = i;
1787 break;
1788 }
1789 if (first_match == -1) {
1790 first_match = i;
1791 }
1792 }
1793 }
1794 if (match == -1) {
1795 if (first_match == -1) {
1796 match = 1;
1797 } else {
1798 match = first_match;
1799 }
1800 parse = &fd_formats[match];
1801 }
1802 *nb_heads = parse->max_head + 1;
1803 *max_track = parse->max_track;
1804 *last_sect = parse->last_sect;
1805 *drive = parse->drive;
1806 }
1807}
1808
46d4767d
FB
1809int bdrv_get_translation_hint(BlockDriverState *bs)
1810{
1811 return bs->translation;
1812}
1813
abd7f68d
MA
1814void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1815 BlockErrorAction on_write_error)
1816{
1817 bs->on_read_error = on_read_error;
1818 bs->on_write_error = on_write_error;
1819}
1820
1821BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1822{
1823 return is_read ? bs->on_read_error : bs->on_write_error;
1824}
1825
b338082b
FB
1826int bdrv_is_read_only(BlockDriverState *bs)
1827{
1828 return bs->read_only;
1829}
1830
985a03b0
TS
1831int bdrv_is_sg(BlockDriverState *bs)
1832{
1833 return bs->sg;
1834}
1835
e900a7b7
CH
1836int bdrv_enable_write_cache(BlockDriverState *bs)
1837{
1838 return bs->enable_write_cache;
1839}
1840
ea2384d3
FB
1841int bdrv_is_encrypted(BlockDriverState *bs)
1842{
1843 if (bs->backing_hd && bs->backing_hd->encrypted)
1844 return 1;
1845 return bs->encrypted;
1846}
1847
c0f4ce77
AL
1848int bdrv_key_required(BlockDriverState *bs)
1849{
1850 BlockDriverState *backing_hd = bs->backing_hd;
1851
1852 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1853 return 1;
1854 return (bs->encrypted && !bs->valid_key);
1855}
1856
ea2384d3
FB
1857int bdrv_set_key(BlockDriverState *bs, const char *key)
1858{
1859 int ret;
1860 if (bs->backing_hd && bs->backing_hd->encrypted) {
1861 ret = bdrv_set_key(bs->backing_hd, key);
1862 if (ret < 0)
1863 return ret;
1864 if (!bs->encrypted)
1865 return 0;
1866 }
fd04a2ae
SH
1867 if (!bs->encrypted) {
1868 return -EINVAL;
1869 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1870 return -ENOMEDIUM;
1871 }
c0f4ce77 1872 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
1873 if (ret < 0) {
1874 bs->valid_key = 0;
1875 } else if (!bs->valid_key) {
1876 bs->valid_key = 1;
1877 /* call the change callback now, we skipped it on open */
7d4b4ba5 1878 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 1879 }
c0f4ce77 1880 return ret;
ea2384d3
FB
1881}
1882
1883void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1884{
19cb3738 1885 if (!bs->drv) {
ea2384d3
FB
1886 buf[0] = '\0';
1887 } else {
1888 pstrcpy(buf, buf_size, bs->drv->format_name);
1889 }
1890}
1891
5fafdf24 1892void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
1893 void *opaque)
1894{
1895 BlockDriver *drv;
1896
8a22f02a 1897 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
1898 it(opaque, drv->format_name);
1899 }
1900}
1901
b338082b
FB
1902BlockDriverState *bdrv_find(const char *name)
1903{
1904 BlockDriverState *bs;
1905
1b7bdbc1
SH
1906 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1907 if (!strcmp(name, bs->device_name)) {
b338082b 1908 return bs;
1b7bdbc1 1909 }
b338082b
FB
1910 }
1911 return NULL;
1912}
1913
2f399b0a
MA
1914BlockDriverState *bdrv_next(BlockDriverState *bs)
1915{
1916 if (!bs) {
1917 return QTAILQ_FIRST(&bdrv_states);
1918 }
1919 return QTAILQ_NEXT(bs, list);
1920}
1921
51de9760 1922void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
1923{
1924 BlockDriverState *bs;
1925
1b7bdbc1 1926 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 1927 it(opaque, bs);
81d0912d
FB
1928 }
1929}
1930
ea2384d3
FB
1931const char *bdrv_get_device_name(BlockDriverState *bs)
1932{
1933 return bs->device_name;
1934}
1935
c6ca28d6
AL
1936void bdrv_flush_all(void)
1937{
1938 BlockDriverState *bs;
1939
1b7bdbc1 1940 QTAILQ_FOREACH(bs, &bdrv_states, list) {
c602a489 1941 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
c6ca28d6 1942 bdrv_flush(bs);
1b7bdbc1
SH
1943 }
1944 }
c6ca28d6
AL
1945}
1946
f2feebbd
KW
1947int bdrv_has_zero_init(BlockDriverState *bs)
1948{
1949 assert(bs->drv);
1950
336c1c12
KW
1951 if (bs->drv->bdrv_has_zero_init) {
1952 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
1953 }
1954
1955 return 1;
1956}
1957
376ae3f1
SH
1958typedef struct BdrvCoIsAllocatedData {
1959 BlockDriverState *bs;
1960 int64_t sector_num;
1961 int nb_sectors;
1962 int *pnum;
1963 int ret;
1964 bool done;
1965} BdrvCoIsAllocatedData;
1966
f58c7b35
TS
1967/*
1968 * Returns true iff the specified sector is present in the disk image. Drivers
1969 * not implementing the functionality are assumed to not support backing files,
1970 * hence all their sectors are reported as allocated.
1971 *
1972 * 'pnum' is set to the number of sectors (including and immediately following
1973 * the specified sector) that are known to be in the same
1974 * allocated/unallocated state.
1975 *
1976 * 'nb_sectors' is the max value 'pnum' should be set to.
1977 */
060f51c9
SH
1978int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
1979 int nb_sectors, int *pnum)
f58c7b35 1980{
6aebab14
SH
1981 if (!bs->drv->bdrv_co_is_allocated) {
1982 int64_t n;
f58c7b35
TS
1983 if (sector_num >= bs->total_sectors) {
1984 *pnum = 0;
1985 return 0;
1986 }
1987 n = bs->total_sectors - sector_num;
1988 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1989 return 1;
1990 }
6aebab14 1991
060f51c9
SH
1992 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
1993}
1994
1995/* Coroutine wrapper for bdrv_is_allocated() */
1996static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
1997{
1998 BdrvCoIsAllocatedData *data = opaque;
1999 BlockDriverState *bs = data->bs;
2000
2001 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2002 data->pnum);
2003 data->done = true;
2004}
2005
2006/*
2007 * Synchronous wrapper around bdrv_co_is_allocated().
2008 *
2009 * See bdrv_co_is_allocated() for details.
2010 */
2011int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2012 int *pnum)
2013{
6aebab14
SH
2014 Coroutine *co;
2015 BdrvCoIsAllocatedData data = {
2016 .bs = bs,
2017 .sector_num = sector_num,
2018 .nb_sectors = nb_sectors,
2019 .pnum = pnum,
2020 .done = false,
2021 };
2022
2023 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2024 qemu_coroutine_enter(co, &data);
2025 while (!data.done) {
2026 qemu_aio_wait();
2027 }
2028 return data.ret;
f58c7b35
TS
2029}
2030
2582bfed
LC
2031void bdrv_mon_event(const BlockDriverState *bdrv,
2032 BlockMonEventAction action, int is_read)
2033{
2034 QObject *data;
2035 const char *action_str;
2036
2037 switch (action) {
2038 case BDRV_ACTION_REPORT:
2039 action_str = "report";
2040 break;
2041 case BDRV_ACTION_IGNORE:
2042 action_str = "ignore";
2043 break;
2044 case BDRV_ACTION_STOP:
2045 action_str = "stop";
2046 break;
2047 default:
2048 abort();
2049 }
2050
2051 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2052 bdrv->device_name,
2053 action_str,
2054 is_read ? "read" : "write");
2055 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
2056
2057 qobject_decref(data);
2058}
2059
b2023818 2060BlockInfoList *qmp_query_block(Error **errp)
b338082b 2061{
b2023818 2062 BlockInfoList *head = NULL, *cur_item = NULL;
b338082b
FB
2063 BlockDriverState *bs;
2064
1b7bdbc1 2065 QTAILQ_FOREACH(bs, &bdrv_states, list) {
b2023818 2066 BlockInfoList *info = g_malloc0(sizeof(*info));
d15e5465 2067
b2023818
LC
2068 info->value = g_malloc0(sizeof(*info->value));
2069 info->value->device = g_strdup(bs->device_name);
2070 info->value->type = g_strdup("unknown");
2071 info->value->locked = bdrv_dev_is_medium_locked(bs);
2072 info->value->removable = bdrv_dev_has_removable_media(bs);
d15e5465 2073
e4def80b 2074 if (bdrv_dev_has_removable_media(bs)) {
b2023818
LC
2075 info->value->has_tray_open = true;
2076 info->value->tray_open = bdrv_dev_is_tray_open(bs);
e4def80b 2077 }
f04ef601
LC
2078
2079 if (bdrv_iostatus_is_enabled(bs)) {
b2023818
LC
2080 info->value->has_io_status = true;
2081 info->value->io_status = bs->iostatus;
f04ef601
LC
2082 }
2083
19cb3738 2084 if (bs->drv) {
b2023818
LC
2085 info->value->has_inserted = true;
2086 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2087 info->value->inserted->file = g_strdup(bs->filename);
2088 info->value->inserted->ro = bs->read_only;
2089 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2090 info->value->inserted->encrypted = bs->encrypted;
2091 if (bs->backing_file[0]) {
2092 info->value->inserted->has_backing_file = true;
2093 info->value->inserted->backing_file = g_strdup(bs->backing_file);
376253ec 2094 }
727f005e
ZYW
2095
2096 if (bs->io_limits_enabled) {
2097 info->value->inserted->bps =
2098 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2099 info->value->inserted->bps_rd =
2100 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2101 info->value->inserted->bps_wr =
2102 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2103 info->value->inserted->iops =
2104 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2105 info->value->inserted->iops_rd =
2106 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2107 info->value->inserted->iops_wr =
2108 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2109 }
b2023818 2110 }
d15e5465 2111
b2023818
LC
2112 /* XXX: waiting for the qapi to support GSList */
2113 if (!cur_item) {
2114 head = cur_item = info;
2115 } else {
2116 cur_item->next = info;
2117 cur_item = info;
b338082b 2118 }
b338082b 2119 }
d15e5465 2120
b2023818 2121 return head;
b338082b 2122}
a36e69dd 2123
f11f57e4
LC
2124/* Consider exposing this as a full fledged QMP command */
2125static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
2126{
2127 BlockStats *s;
2128
2129 s = g_malloc0(sizeof(*s));
2130
2131 if (bs->device_name[0]) {
2132 s->has_device = true;
2133 s->device = g_strdup(bs->device_name);
294cc35f
KW
2134 }
2135
f11f57e4
LC
2136 s->stats = g_malloc0(sizeof(*s->stats));
2137 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2138 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2139 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2140 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2141 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2142 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2143 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2144 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2145 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2146
294cc35f 2147 if (bs->file) {
f11f57e4
LC
2148 s->has_parent = true;
2149 s->parent = qmp_query_blockstat(bs->file, NULL);
294cc35f
KW
2150 }
2151
f11f57e4 2152 return s;
294cc35f
KW
2153}
2154
f11f57e4 2155BlockStatsList *qmp_query_blockstats(Error **errp)
218a536a 2156{
f11f57e4 2157 BlockStatsList *head = NULL, *cur_item = NULL;
a36e69dd
TS
2158 BlockDriverState *bs;
2159
1b7bdbc1 2160 QTAILQ_FOREACH(bs, &bdrv_states, list) {
f11f57e4
LC
2161 BlockStatsList *info = g_malloc0(sizeof(*info));
2162 info->value = qmp_query_blockstat(bs, NULL);
2163
2164 /* XXX: waiting for the qapi to support GSList */
2165 if (!cur_item) {
2166 head = cur_item = info;
2167 } else {
2168 cur_item->next = info;
2169 cur_item = info;
2170 }
a36e69dd 2171 }
218a536a 2172
f11f57e4 2173 return head;
a36e69dd 2174}
ea2384d3 2175
045df330
AL
2176const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2177{
2178 if (bs->backing_hd && bs->backing_hd->encrypted)
2179 return bs->backing_file;
2180 else if (bs->encrypted)
2181 return bs->filename;
2182 else
2183 return NULL;
2184}
2185
5fafdf24 2186void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
2187 char *filename, int filename_size)
2188{
3574c608 2189 pstrcpy(filename, filename_size, bs->backing_file);
83f64091
FB
2190}
2191
5fafdf24 2192int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
2193 const uint8_t *buf, int nb_sectors)
2194{
2195 BlockDriver *drv = bs->drv;
2196 if (!drv)
19cb3738 2197 return -ENOMEDIUM;
faea38e7
FB
2198 if (!drv->bdrv_write_compressed)
2199 return -ENOTSUP;
fbb7b4e0
KW
2200 if (bdrv_check_request(bs, sector_num, nb_sectors))
2201 return -EIO;
a55eb92c 2202
c6d22830 2203 if (bs->dirty_bitmap) {
7cd1e32a 2204 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2205 }
a55eb92c 2206
faea38e7
FB
2207 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2208}
3b46e624 2209
faea38e7
FB
2210int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2211{
2212 BlockDriver *drv = bs->drv;
2213 if (!drv)
19cb3738 2214 return -ENOMEDIUM;
faea38e7
FB
2215 if (!drv->bdrv_get_info)
2216 return -ENOTSUP;
2217 memset(bdi, 0, sizeof(*bdi));
2218 return drv->bdrv_get_info(bs, bdi);
2219}
2220
45566e9c
CH
2221int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2222 int64_t pos, int size)
178e08a5
AL
2223{
2224 BlockDriver *drv = bs->drv;
2225 if (!drv)
2226 return -ENOMEDIUM;
7cdb1f6d
MK
2227 if (drv->bdrv_save_vmstate)
2228 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2229 if (bs->file)
2230 return bdrv_save_vmstate(bs->file, buf, pos, size);
2231 return -ENOTSUP;
178e08a5
AL
2232}
2233
45566e9c
CH
2234int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2235 int64_t pos, int size)
178e08a5
AL
2236{
2237 BlockDriver *drv = bs->drv;
2238 if (!drv)
2239 return -ENOMEDIUM;
7cdb1f6d
MK
2240 if (drv->bdrv_load_vmstate)
2241 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2242 if (bs->file)
2243 return bdrv_load_vmstate(bs->file, buf, pos, size);
2244 return -ENOTSUP;
178e08a5
AL
2245}
2246
8b9b0cc2
KW
2247void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2248{
2249 BlockDriver *drv = bs->drv;
2250
2251 if (!drv || !drv->bdrv_debug_event) {
2252 return;
2253 }
2254
2255 return drv->bdrv_debug_event(bs, event);
2256
2257}
2258
faea38e7
FB
2259/**************************************************************/
2260/* handling of snapshots */
2261
feeee5ac
MDCF
2262int bdrv_can_snapshot(BlockDriverState *bs)
2263{
2264 BlockDriver *drv = bs->drv;
07b70bfb 2265 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2266 return 0;
2267 }
2268
2269 if (!drv->bdrv_snapshot_create) {
2270 if (bs->file != NULL) {
2271 return bdrv_can_snapshot(bs->file);
2272 }
2273 return 0;
2274 }
2275
2276 return 1;
2277}
2278
199630b6
BS
2279int bdrv_is_snapshot(BlockDriverState *bs)
2280{
2281 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2282}
2283
f9092b10
MA
2284BlockDriverState *bdrv_snapshots(void)
2285{
2286 BlockDriverState *bs;
2287
3ac906f7 2288 if (bs_snapshots) {
f9092b10 2289 return bs_snapshots;
3ac906f7 2290 }
f9092b10
MA
2291
2292 bs = NULL;
2293 while ((bs = bdrv_next(bs))) {
2294 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2295 bs_snapshots = bs;
2296 return bs;
f9092b10
MA
2297 }
2298 }
2299 return NULL;
f9092b10
MA
2300}
2301
5fafdf24 2302int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2303 QEMUSnapshotInfo *sn_info)
2304{
2305 BlockDriver *drv = bs->drv;
2306 if (!drv)
19cb3738 2307 return -ENOMEDIUM;
7cdb1f6d
MK
2308 if (drv->bdrv_snapshot_create)
2309 return drv->bdrv_snapshot_create(bs, sn_info);
2310 if (bs->file)
2311 return bdrv_snapshot_create(bs->file, sn_info);
2312 return -ENOTSUP;
faea38e7
FB
2313}
2314
5fafdf24 2315int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2316 const char *snapshot_id)
2317{
2318 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2319 int ret, open_ret;
2320
faea38e7 2321 if (!drv)
19cb3738 2322 return -ENOMEDIUM;
7cdb1f6d
MK
2323 if (drv->bdrv_snapshot_goto)
2324 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2325
2326 if (bs->file) {
2327 drv->bdrv_close(bs);
2328 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2329 open_ret = drv->bdrv_open(bs, bs->open_flags);
2330 if (open_ret < 0) {
2331 bdrv_delete(bs->file);
2332 bs->drv = NULL;
2333 return open_ret;
2334 }
2335 return ret;
2336 }
2337
2338 return -ENOTSUP;
faea38e7
FB
2339}
2340
2341int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2342{
2343 BlockDriver *drv = bs->drv;
2344 if (!drv)
19cb3738 2345 return -ENOMEDIUM;
7cdb1f6d
MK
2346 if (drv->bdrv_snapshot_delete)
2347 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2348 if (bs->file)
2349 return bdrv_snapshot_delete(bs->file, snapshot_id);
2350 return -ENOTSUP;
faea38e7
FB
2351}
2352
5fafdf24 2353int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2354 QEMUSnapshotInfo **psn_info)
2355{
2356 BlockDriver *drv = bs->drv;
2357 if (!drv)
19cb3738 2358 return -ENOMEDIUM;
7cdb1f6d
MK
2359 if (drv->bdrv_snapshot_list)
2360 return drv->bdrv_snapshot_list(bs, psn_info);
2361 if (bs->file)
2362 return bdrv_snapshot_list(bs->file, psn_info);
2363 return -ENOTSUP;
faea38e7
FB
2364}
2365
51ef6727 2366int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2367 const char *snapshot_name)
2368{
2369 BlockDriver *drv = bs->drv;
2370 if (!drv) {
2371 return -ENOMEDIUM;
2372 }
2373 if (!bs->read_only) {
2374 return -EINVAL;
2375 }
2376 if (drv->bdrv_snapshot_load_tmp) {
2377 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2378 }
2379 return -ENOTSUP;
2380}
2381
faea38e7
FB
2382#define NB_SUFFIXES 4
2383
2384char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2385{
2386 static const char suffixes[NB_SUFFIXES] = "KMGT";
2387 int64_t base;
2388 int i;
2389
2390 if (size <= 999) {
2391 snprintf(buf, buf_size, "%" PRId64, size);
2392 } else {
2393 base = 1024;
2394 for(i = 0; i < NB_SUFFIXES; i++) {
2395 if (size < (10 * base)) {
5fafdf24 2396 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
2397 (double)size / base,
2398 suffixes[i]);
2399 break;
2400 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 2401 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
2402 ((size + (base >> 1)) / base),
2403 suffixes[i]);
2404 break;
2405 }
2406 base = base * 1024;
2407 }
2408 }
2409 return buf;
2410}
2411
2412char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2413{
2414 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
2415#ifdef _WIN32
2416 struct tm *ptm;
2417#else
faea38e7 2418 struct tm tm;
3b9f94e1 2419#endif
faea38e7
FB
2420 time_t ti;
2421 int64_t secs;
2422
2423 if (!sn) {
5fafdf24
TS
2424 snprintf(buf, buf_size,
2425 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2426 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2427 } else {
2428 ti = sn->date_sec;
3b9f94e1
FB
2429#ifdef _WIN32
2430 ptm = localtime(&ti);
2431 strftime(date_buf, sizeof(date_buf),
2432 "%Y-%m-%d %H:%M:%S", ptm);
2433#else
faea38e7
FB
2434 localtime_r(&ti, &tm);
2435 strftime(date_buf, sizeof(date_buf),
2436 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 2437#endif
faea38e7
FB
2438 secs = sn->vm_clock_nsec / 1000000000;
2439 snprintf(clock_buf, sizeof(clock_buf),
2440 "%02d:%02d:%02d.%03d",
2441 (int)(secs / 3600),
2442 (int)((secs / 60) % 60),
5fafdf24 2443 (int)(secs % 60),
faea38e7
FB
2444 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2445 snprintf(buf, buf_size,
5fafdf24 2446 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2447 sn->id_str, sn->name,
2448 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2449 date_buf,
2450 clock_buf);
2451 }
2452 return buf;
2453}
2454
ea2384d3 2455/**************************************************************/
83f64091 2456/* async I/Os */
ea2384d3 2457
3b69e4b9 2458BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 2459 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 2460 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 2461{
bbf0a440
SH
2462 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2463
b2a61371 2464 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2465 cb, opaque, false);
ea2384d3
FB
2466}
2467
f141eafe
AL
2468BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2469 QEMUIOVector *qiov, int nb_sectors,
2470 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 2471{
bbf0a440
SH
2472 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2473
1a6e115b 2474 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2475 cb, opaque, true);
83f64091
FB
2476}
2477
40b4f539
KW
2478
2479typedef struct MultiwriteCB {
2480 int error;
2481 int num_requests;
2482 int num_callbacks;
2483 struct {
2484 BlockDriverCompletionFunc *cb;
2485 void *opaque;
2486 QEMUIOVector *free_qiov;
2487 void *free_buf;
2488 } callbacks[];
2489} MultiwriteCB;
2490
2491static void multiwrite_user_cb(MultiwriteCB *mcb)
2492{
2493 int i;
2494
2495 for (i = 0; i < mcb->num_callbacks; i++) {
2496 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
2497 if (mcb->callbacks[i].free_qiov) {
2498 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2499 }
7267c094 2500 g_free(mcb->callbacks[i].free_qiov);
f8a83245 2501 qemu_vfree(mcb->callbacks[i].free_buf);
40b4f539
KW
2502 }
2503}
2504
2505static void multiwrite_cb(void *opaque, int ret)
2506{
2507 MultiwriteCB *mcb = opaque;
2508
6d519a5f
SH
2509 trace_multiwrite_cb(mcb, ret);
2510
cb6d3ca0 2511 if (ret < 0 && !mcb->error) {
40b4f539 2512 mcb->error = ret;
40b4f539
KW
2513 }
2514
2515 mcb->num_requests--;
2516 if (mcb->num_requests == 0) {
de189a1b 2517 multiwrite_user_cb(mcb);
7267c094 2518 g_free(mcb);
40b4f539
KW
2519 }
2520}
2521
2522static int multiwrite_req_compare(const void *a, const void *b)
2523{
77be4366
CH
2524 const BlockRequest *req1 = a, *req2 = b;
2525
2526 /*
2527 * Note that we can't simply subtract req2->sector from req1->sector
2528 * here as that could overflow the return value.
2529 */
2530 if (req1->sector > req2->sector) {
2531 return 1;
2532 } else if (req1->sector < req2->sector) {
2533 return -1;
2534 } else {
2535 return 0;
2536 }
40b4f539
KW
2537}
2538
2539/*
2540 * Takes a bunch of requests and tries to merge them. Returns the number of
2541 * requests that remain after merging.
2542 */
2543static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2544 int num_reqs, MultiwriteCB *mcb)
2545{
2546 int i, outidx;
2547
2548 // Sort requests by start sector
2549 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2550
2551 // Check if adjacent requests touch the same clusters. If so, combine them,
2552 // filling up gaps with zero sectors.
2553 outidx = 0;
2554 for (i = 1; i < num_reqs; i++) {
2555 int merge = 0;
2556 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2557
2558 // This handles the cases that are valid for all block drivers, namely
2559 // exactly sequential writes and overlapping writes.
2560 if (reqs[i].sector <= oldreq_last) {
2561 merge = 1;
2562 }
2563
2564 // The block driver may decide that it makes sense to combine requests
2565 // even if there is a gap of some sectors between them. In this case,
2566 // the gap is filled with zeros (therefore only applicable for yet
2567 // unused space in format like qcow2).
2568 if (!merge && bs->drv->bdrv_merge_requests) {
2569 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2570 }
2571
e2a305fb
CH
2572 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2573 merge = 0;
2574 }
2575
40b4f539
KW
2576 if (merge) {
2577 size_t size;
7267c094 2578 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
2579 qemu_iovec_init(qiov,
2580 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2581
2582 // Add the first request to the merged one. If the requests are
2583 // overlapping, drop the last sectors of the first request.
2584 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2585 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2586
2587 // We might need to add some zeros between the two requests
2588 if (reqs[i].sector > oldreq_last) {
2589 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2590 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2591 memset(buf, 0, zero_bytes);
2592 qemu_iovec_add(qiov, buf, zero_bytes);
2593 mcb->callbacks[i].free_buf = buf;
2594 }
2595
2596 // Add the second request
2597 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2598
cbf1dff2 2599 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
2600 reqs[outidx].qiov = qiov;
2601
2602 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2603 } else {
2604 outidx++;
2605 reqs[outidx].sector = reqs[i].sector;
2606 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2607 reqs[outidx].qiov = reqs[i].qiov;
2608 }
2609 }
2610
2611 return outidx + 1;
2612}
2613
2614/*
2615 * Submit multiple AIO write requests at once.
2616 *
2617 * On success, the function returns 0 and all requests in the reqs array have
2618 * been submitted. In error case this function returns -1, and any of the
2619 * requests may or may not be submitted yet. In particular, this means that the
2620 * callback will be called for some of the requests, for others it won't. The
2621 * caller must check the error field of the BlockRequest to wait for the right
2622 * callbacks (if error != 0, no callback will be called).
2623 *
2624 * The implementation may modify the contents of the reqs array, e.g. to merge
2625 * requests. However, the fields opaque and error are left unmodified as they
2626 * are used to signal failure for a single request to the caller.
2627 */
2628int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2629{
2630 BlockDriverAIOCB *acb;
2631 MultiwriteCB *mcb;
2632 int i;
2633
301db7c2
RH
2634 /* don't submit writes if we don't have a medium */
2635 if (bs->drv == NULL) {
2636 for (i = 0; i < num_reqs; i++) {
2637 reqs[i].error = -ENOMEDIUM;
2638 }
2639 return -1;
2640 }
2641
40b4f539
KW
2642 if (num_reqs == 0) {
2643 return 0;
2644 }
2645
2646 // Create MultiwriteCB structure
7267c094 2647 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
2648 mcb->num_requests = 0;
2649 mcb->num_callbacks = num_reqs;
2650
2651 for (i = 0; i < num_reqs; i++) {
2652 mcb->callbacks[i].cb = reqs[i].cb;
2653 mcb->callbacks[i].opaque = reqs[i].opaque;
2654 }
2655
2656 // Check for mergable requests
2657 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2658
6d519a5f
SH
2659 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2660
453f9a16
KW
2661 /*
2662 * Run the aio requests. As soon as one request can't be submitted
2663 * successfully, fail all requests that are not yet submitted (we must
2664 * return failure for all requests anyway)
2665 *
2666 * num_requests cannot be set to the right value immediately: If
2667 * bdrv_aio_writev fails for some request, num_requests would be too high
2668 * and therefore multiwrite_cb() would never recognize the multiwrite
2669 * request as completed. We also cannot use the loop variable i to set it
2670 * when the first request fails because the callback may already have been
2671 * called for previously submitted requests. Thus, num_requests must be
2672 * incremented for each request that is submitted.
2673 *
2674 * The problem that callbacks may be called early also means that we need
2675 * to take care that num_requests doesn't become 0 before all requests are
2676 * submitted - multiwrite_cb() would consider the multiwrite request
2677 * completed. A dummy request that is "completed" by a manual call to
2678 * multiwrite_cb() takes care of this.
2679 */
2680 mcb->num_requests = 1;
2681
6d519a5f 2682 // Run the aio requests
40b4f539 2683 for (i = 0; i < num_reqs; i++) {
453f9a16 2684 mcb->num_requests++;
40b4f539
KW
2685 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2686 reqs[i].nb_sectors, multiwrite_cb, mcb);
2687
2688 if (acb == NULL) {
2689 // We can only fail the whole thing if no request has been
2690 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2691 // complete and report the error in the callback.
453f9a16 2692 if (i == 0) {
6d519a5f 2693 trace_bdrv_aio_multiwrite_earlyfail(mcb);
40b4f539
KW
2694 goto fail;
2695 } else {
6d519a5f 2696 trace_bdrv_aio_multiwrite_latefail(mcb, i);
7eb58a6c 2697 multiwrite_cb(mcb, -EIO);
40b4f539
KW
2698 break;
2699 }
40b4f539
KW
2700 }
2701 }
2702
453f9a16
KW
2703 /* Complete the dummy request */
2704 multiwrite_cb(mcb, 0);
2705
40b4f539
KW
2706 return 0;
2707
2708fail:
453f9a16
KW
2709 for (i = 0; i < mcb->num_callbacks; i++) {
2710 reqs[i].error = -EIO;
2711 }
7267c094 2712 g_free(mcb);
40b4f539
KW
2713 return -1;
2714}
2715
83f64091 2716void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 2717{
6bbff9a0 2718 acb->pool->cancel(acb);
83f64091
FB
2719}
2720
98f90dba
ZYW
2721/* block I/O throttling */
2722static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
2723 bool is_write, double elapsed_time, uint64_t *wait)
2724{
2725 uint64_t bps_limit = 0;
2726 double bytes_limit, bytes_base, bytes_res;
2727 double slice_time, wait_time;
2728
2729 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2730 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2731 } else if (bs->io_limits.bps[is_write]) {
2732 bps_limit = bs->io_limits.bps[is_write];
2733 } else {
2734 if (wait) {
2735 *wait = 0;
2736 }
2737
2738 return false;
2739 }
2740
2741 slice_time = bs->slice_end - bs->slice_start;
2742 slice_time /= (NANOSECONDS_PER_SECOND);
2743 bytes_limit = bps_limit * slice_time;
2744 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
2745 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2746 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
2747 }
2748
2749 /* bytes_base: the bytes of data which have been read/written; and
2750 * it is obtained from the history statistic info.
2751 * bytes_res: the remaining bytes of data which need to be read/written.
2752 * (bytes_base + bytes_res) / bps_limit: used to calcuate
2753 * the total time for completing reading/writting all data.
2754 */
2755 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2756
2757 if (bytes_base + bytes_res <= bytes_limit) {
2758 if (wait) {
2759 *wait = 0;
2760 }
2761
2762 return false;
2763 }
2764
2765 /* Calc approx time to dispatch */
2766 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
2767
2768 /* When the I/O rate at runtime exceeds the limits,
2769 * bs->slice_end need to be extended in order that the current statistic
2770 * info can be kept until the timer fire, so it is increased and tuned
2771 * based on the result of experiment.
2772 */
2773 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2774 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2775 if (wait) {
2776 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2777 }
2778
2779 return true;
2780}
2781
2782static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
2783 double elapsed_time, uint64_t *wait)
2784{
2785 uint64_t iops_limit = 0;
2786 double ios_limit, ios_base;
2787 double slice_time, wait_time;
2788
2789 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2790 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2791 } else if (bs->io_limits.iops[is_write]) {
2792 iops_limit = bs->io_limits.iops[is_write];
2793 } else {
2794 if (wait) {
2795 *wait = 0;
2796 }
2797
2798 return false;
2799 }
2800
2801 slice_time = bs->slice_end - bs->slice_start;
2802 slice_time /= (NANOSECONDS_PER_SECOND);
2803 ios_limit = iops_limit * slice_time;
2804 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
2805 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2806 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
2807 }
2808
2809 if (ios_base + 1 <= ios_limit) {
2810 if (wait) {
2811 *wait = 0;
2812 }
2813
2814 return false;
2815 }
2816
2817 /* Calc approx time to dispatch */
2818 wait_time = (ios_base + 1) / iops_limit;
2819 if (wait_time > elapsed_time) {
2820 wait_time = wait_time - elapsed_time;
2821 } else {
2822 wait_time = 0;
2823 }
2824
2825 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2826 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2827 if (wait) {
2828 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2829 }
2830
2831 return true;
2832}
2833
2834static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
2835 bool is_write, int64_t *wait)
2836{
2837 int64_t now, max_wait;
2838 uint64_t bps_wait = 0, iops_wait = 0;
2839 double elapsed_time;
2840 int bps_ret, iops_ret;
2841
2842 now = qemu_get_clock_ns(vm_clock);
2843 if ((bs->slice_start < now)
2844 && (bs->slice_end > now)) {
2845 bs->slice_end = now + bs->slice_time;
2846 } else {
2847 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
2848 bs->slice_start = now;
2849 bs->slice_end = now + bs->slice_time;
2850
2851 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
2852 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
2853
2854 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
2855 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
2856 }
2857
2858 elapsed_time = now - bs->slice_start;
2859 elapsed_time /= (NANOSECONDS_PER_SECOND);
2860
2861 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
2862 is_write, elapsed_time, &bps_wait);
2863 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
2864 elapsed_time, &iops_wait);
2865 if (bps_ret || iops_ret) {
2866 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
2867 if (wait) {
2868 *wait = max_wait;
2869 }
2870
2871 now = qemu_get_clock_ns(vm_clock);
2872 if (bs->slice_end < now + max_wait) {
2873 bs->slice_end = now + max_wait;
2874 }
2875
2876 return true;
2877 }
2878
2879 if (wait) {
2880 *wait = 0;
2881 }
2882
2883 return false;
2884}
ce1a14dc 2885
83f64091
FB
2886/**************************************************************/
2887/* async block device emulation */
2888
c16b5a2c
CH
2889typedef struct BlockDriverAIOCBSync {
2890 BlockDriverAIOCB common;
2891 QEMUBH *bh;
2892 int ret;
2893 /* vector translation state */
2894 QEMUIOVector *qiov;
2895 uint8_t *bounce;
2896 int is_write;
2897} BlockDriverAIOCBSync;
2898
2899static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2900{
b666d239
KW
2901 BlockDriverAIOCBSync *acb =
2902 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 2903 qemu_bh_delete(acb->bh);
36afc451 2904 acb->bh = NULL;
c16b5a2c
CH
2905 qemu_aio_release(acb);
2906}
2907
2908static AIOPool bdrv_em_aio_pool = {
2909 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2910 .cancel = bdrv_aio_cancel_em,
2911};
2912
ce1a14dc 2913static void bdrv_aio_bh_cb(void *opaque)
83f64091 2914{
ce1a14dc 2915 BlockDriverAIOCBSync *acb = opaque;
f141eafe 2916
f141eafe
AL
2917 if (!acb->is_write)
2918 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 2919 qemu_vfree(acb->bounce);
ce1a14dc 2920 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 2921 qemu_bh_delete(acb->bh);
36afc451 2922 acb->bh = NULL;
ce1a14dc 2923 qemu_aio_release(acb);
83f64091 2924}
beac80cd 2925
f141eafe
AL
2926static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2927 int64_t sector_num,
2928 QEMUIOVector *qiov,
2929 int nb_sectors,
2930 BlockDriverCompletionFunc *cb,
2931 void *opaque,
2932 int is_write)
2933
83f64091 2934{
ce1a14dc 2935 BlockDriverAIOCBSync *acb;
ce1a14dc 2936
c16b5a2c 2937 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
2938 acb->is_write = is_write;
2939 acb->qiov = qiov;
e268ca52 2940 acb->bounce = qemu_blockalign(bs, qiov->size);
f141eafe 2941
ce1a14dc
PB
2942 if (!acb->bh)
2943 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
2944
2945 if (is_write) {
2946 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1ed20acf 2947 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 2948 } else {
1ed20acf 2949 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
2950 }
2951
ce1a14dc 2952 qemu_bh_schedule(acb->bh);
f141eafe 2953
ce1a14dc 2954 return &acb->common;
beac80cd
FB
2955}
2956
f141eafe
AL
2957static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2958 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 2959 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 2960{
f141eafe
AL
2961 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2962}
83f64091 2963
f141eafe
AL
2964static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2965 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2966 BlockDriverCompletionFunc *cb, void *opaque)
2967{
2968 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 2969}
beac80cd 2970
68485420
KW
2971
2972typedef struct BlockDriverAIOCBCoroutine {
2973 BlockDriverAIOCB common;
2974 BlockRequest req;
2975 bool is_write;
2976 QEMUBH* bh;
2977} BlockDriverAIOCBCoroutine;
2978
2979static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2980{
2981 qemu_aio_flush();
2982}
2983
2984static AIOPool bdrv_em_co_aio_pool = {
2985 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2986 .cancel = bdrv_aio_co_cancel_em,
2987};
2988
35246a68 2989static void bdrv_co_em_bh(void *opaque)
68485420
KW
2990{
2991 BlockDriverAIOCBCoroutine *acb = opaque;
2992
2993 acb->common.cb(acb->common.opaque, acb->req.error);
2994 qemu_bh_delete(acb->bh);
2995 qemu_aio_release(acb);
2996}
2997
b2a61371
SH
2998/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2999static void coroutine_fn bdrv_co_do_rw(void *opaque)
3000{
3001 BlockDriverAIOCBCoroutine *acb = opaque;
3002 BlockDriverState *bs = acb->common.bs;
3003
3004 if (!acb->is_write) {
3005 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
3006 acb->req.nb_sectors, acb->req.qiov);
3007 } else {
3008 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
3009 acb->req.nb_sectors, acb->req.qiov);
3010 }
3011
35246a68 3012 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2a61371
SH
3013 qemu_bh_schedule(acb->bh);
3014}
3015
68485420
KW
3016static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3017 int64_t sector_num,
3018 QEMUIOVector *qiov,
3019 int nb_sectors,
3020 BlockDriverCompletionFunc *cb,
3021 void *opaque,
8c5873d6 3022 bool is_write)
68485420
KW
3023{
3024 Coroutine *co;
3025 BlockDriverAIOCBCoroutine *acb;
3026
3027 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3028 acb->req.sector = sector_num;
3029 acb->req.nb_sectors = nb_sectors;
3030 acb->req.qiov = qiov;
3031 acb->is_write = is_write;
3032
8c5873d6 3033 co = qemu_coroutine_create(bdrv_co_do_rw);
68485420
KW
3034 qemu_coroutine_enter(co, acb);
3035
3036 return &acb->common;
3037}
3038
07f07615 3039static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
b2e12bc6 3040{
07f07615
PB
3041 BlockDriverAIOCBCoroutine *acb = opaque;
3042 BlockDriverState *bs = acb->common.bs;
b2e12bc6 3043
07f07615
PB
3044 acb->req.error = bdrv_co_flush(bs);
3045 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2e12bc6 3046 qemu_bh_schedule(acb->bh);
b2e12bc6
CH
3047}
3048
07f07615 3049BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
016f5cf6
AG
3050 BlockDriverCompletionFunc *cb, void *opaque)
3051{
07f07615 3052 trace_bdrv_aio_flush(bs, opaque);
016f5cf6 3053
07f07615
PB
3054 Coroutine *co;
3055 BlockDriverAIOCBCoroutine *acb;
016f5cf6 3056
07f07615
PB
3057 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3058 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3059 qemu_coroutine_enter(co, acb);
016f5cf6 3060
016f5cf6
AG
3061 return &acb->common;
3062}
3063
4265d620
PB
3064static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3065{
3066 BlockDriverAIOCBCoroutine *acb = opaque;
3067 BlockDriverState *bs = acb->common.bs;
3068
3069 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3070 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3071 qemu_bh_schedule(acb->bh);
3072}
3073
3074BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3075 int64_t sector_num, int nb_sectors,
3076 BlockDriverCompletionFunc *cb, void *opaque)
3077{
3078 Coroutine *co;
3079 BlockDriverAIOCBCoroutine *acb;
3080
3081 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3082
3083 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3084 acb->req.sector = sector_num;
3085 acb->req.nb_sectors = nb_sectors;
3086 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3087 qemu_coroutine_enter(co, acb);
3088
3089 return &acb->common;
3090}
3091
ea2384d3
FB
3092void bdrv_init(void)
3093{
5efa9d5a 3094 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 3095}
ce1a14dc 3096
eb852011
MA
3097void bdrv_init_with_whitelist(void)
3098{
3099 use_bdrv_whitelist = 1;
3100 bdrv_init();
3101}
3102
c16b5a2c
CH
3103void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3104 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 3105{
ce1a14dc
PB
3106 BlockDriverAIOCB *acb;
3107
6bbff9a0
AL
3108 if (pool->free_aiocb) {
3109 acb = pool->free_aiocb;
3110 pool->free_aiocb = acb->next;
ce1a14dc 3111 } else {
7267c094 3112 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 3113 acb->pool = pool;
ce1a14dc
PB
3114 }
3115 acb->bs = bs;
3116 acb->cb = cb;
3117 acb->opaque = opaque;
3118 return acb;
3119}
3120
3121void qemu_aio_release(void *p)
3122{
6bbff9a0
AL
3123 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3124 AIOPool *pool = acb->pool;
3125 acb->next = pool->free_aiocb;
3126 pool->free_aiocb = acb;
ce1a14dc 3127}
19cb3738 3128
f9f05dc5
KW
3129/**************************************************************/
3130/* Coroutine block device emulation */
3131
3132typedef struct CoroutineIOCompletion {
3133 Coroutine *coroutine;
3134 int ret;
3135} CoroutineIOCompletion;
3136
3137static void bdrv_co_io_em_complete(void *opaque, int ret)
3138{
3139 CoroutineIOCompletion *co = opaque;
3140
3141 co->ret = ret;
3142 qemu_coroutine_enter(co->coroutine, NULL);
3143}
3144
3145static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3146 int nb_sectors, QEMUIOVector *iov,
3147 bool is_write)
3148{
3149 CoroutineIOCompletion co = {
3150 .coroutine = qemu_coroutine_self(),
3151 };
3152 BlockDriverAIOCB *acb;
3153
3154 if (is_write) {
a652d160
SH
3155 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3156 bdrv_co_io_em_complete, &co);
f9f05dc5 3157 } else {
a652d160
SH
3158 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3159 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
3160 }
3161
59370aaa 3162 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
3163 if (!acb) {
3164 return -EIO;
3165 }
3166 qemu_coroutine_yield();
3167
3168 return co.ret;
3169}
3170
3171static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3172 int64_t sector_num, int nb_sectors,
3173 QEMUIOVector *iov)
3174{
3175 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3176}
3177
3178static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3179 int64_t sector_num, int nb_sectors,
3180 QEMUIOVector *iov)
3181{
3182 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3183}
3184
07f07615 3185static void coroutine_fn bdrv_flush_co_entry(void *opaque)
e7a8a783 3186{
07f07615
PB
3187 RwCo *rwco = opaque;
3188
3189 rwco->ret = bdrv_co_flush(rwco->bs);
3190}
3191
3192int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3193{
eb489bb1
KW
3194 int ret;
3195
ca716364 3196 if (!bs->drv) {
07f07615 3197 return 0;
eb489bb1
KW
3198 }
3199
ca716364 3200 /* Write back cached data to the OS even with cache=unsafe */
eb489bb1
KW
3201 if (bs->drv->bdrv_co_flush_to_os) {
3202 ret = bs->drv->bdrv_co_flush_to_os(bs);
3203 if (ret < 0) {
3204 return ret;
3205 }
3206 }
3207
ca716364
KW
3208 /* But don't actually force it to the disk with cache=unsafe */
3209 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3210 return 0;
3211 }
3212
eb489bb1 3213 if (bs->drv->bdrv_co_flush_to_disk) {
c68b89ac 3214 return bs->drv->bdrv_co_flush_to_disk(bs);
07f07615
PB
3215 } else if (bs->drv->bdrv_aio_flush) {
3216 BlockDriverAIOCB *acb;
3217 CoroutineIOCompletion co = {
3218 .coroutine = qemu_coroutine_self(),
3219 };
3220
3221 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3222 if (acb == NULL) {
3223 return -EIO;
3224 } else {
3225 qemu_coroutine_yield();
3226 return co.ret;
3227 }
07f07615
PB
3228 } else {
3229 /*
3230 * Some block drivers always operate in either writethrough or unsafe
3231 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3232 * know how the server works (because the behaviour is hardcoded or
3233 * depends on server-side configuration), so we can't ensure that
3234 * everything is safe on disk. Returning an error doesn't work because
3235 * that would break guests even if the server operates in writethrough
3236 * mode.
3237 *
3238 * Let's hope the user knows what he's doing.
3239 */
3240 return 0;
3241 }
3242}
3243
0f15423c
AL
3244void bdrv_invalidate_cache(BlockDriverState *bs)
3245{
3246 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3247 bs->drv->bdrv_invalidate_cache(bs);
3248 }
3249}
3250
3251void bdrv_invalidate_cache_all(void)
3252{
3253 BlockDriverState *bs;
3254
3255 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3256 bdrv_invalidate_cache(bs);
3257 }
3258}
3259
07f07615
PB
3260int bdrv_flush(BlockDriverState *bs)
3261{
3262 Coroutine *co;
3263 RwCo rwco = {
3264 .bs = bs,
3265 .ret = NOT_DONE,
e7a8a783 3266 };
e7a8a783 3267
07f07615
PB
3268 if (qemu_in_coroutine()) {
3269 /* Fast-path if already in coroutine context */
3270 bdrv_flush_co_entry(&rwco);
3271 } else {
3272 co = qemu_coroutine_create(bdrv_flush_co_entry);
3273 qemu_coroutine_enter(co, &rwco);
3274 while (rwco.ret == NOT_DONE) {
3275 qemu_aio_wait();
3276 }
e7a8a783 3277 }
07f07615
PB
3278
3279 return rwco.ret;
e7a8a783
KW
3280}
3281
4265d620
PB
3282static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3283{
3284 RwCo *rwco = opaque;
3285
3286 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3287}
3288
3289int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3290 int nb_sectors)
3291{
3292 if (!bs->drv) {
3293 return -ENOMEDIUM;
3294 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3295 return -EIO;
3296 } else if (bs->read_only) {
3297 return -EROFS;
3298 } else if (bs->drv->bdrv_co_discard) {
3299 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3300 } else if (bs->drv->bdrv_aio_discard) {
3301 BlockDriverAIOCB *acb;
3302 CoroutineIOCompletion co = {
3303 .coroutine = qemu_coroutine_self(),
3304 };
3305
3306 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3307 bdrv_co_io_em_complete, &co);
3308 if (acb == NULL) {
3309 return -EIO;
3310 } else {
3311 qemu_coroutine_yield();
3312 return co.ret;
3313 }
4265d620
PB
3314 } else {
3315 return 0;
3316 }
3317}
3318
3319int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3320{
3321 Coroutine *co;
3322 RwCo rwco = {
3323 .bs = bs,
3324 .sector_num = sector_num,
3325 .nb_sectors = nb_sectors,
3326 .ret = NOT_DONE,
3327 };
3328
3329 if (qemu_in_coroutine()) {
3330 /* Fast-path if already in coroutine context */
3331 bdrv_discard_co_entry(&rwco);
3332 } else {
3333 co = qemu_coroutine_create(bdrv_discard_co_entry);
3334 qemu_coroutine_enter(co, &rwco);
3335 while (rwco.ret == NOT_DONE) {
3336 qemu_aio_wait();
3337 }
3338 }
3339
3340 return rwco.ret;
3341}
3342
19cb3738
FB
3343/**************************************************************/
3344/* removable device support */
3345
3346/**
3347 * Return TRUE if the media is present
3348 */
3349int bdrv_is_inserted(BlockDriverState *bs)
3350{
3351 BlockDriver *drv = bs->drv;
a1aff5bf 3352
19cb3738
FB
3353 if (!drv)
3354 return 0;
3355 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
3356 return 1;
3357 return drv->bdrv_is_inserted(bs);
19cb3738
FB
3358}
3359
3360/**
8e49ca46
MA
3361 * Return whether the media changed since the last call to this
3362 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3363 */
3364int bdrv_media_changed(BlockDriverState *bs)
3365{
3366 BlockDriver *drv = bs->drv;
19cb3738 3367
8e49ca46
MA
3368 if (drv && drv->bdrv_media_changed) {
3369 return drv->bdrv_media_changed(bs);
3370 }
3371 return -ENOTSUP;
19cb3738
FB
3372}
3373
3374/**
3375 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3376 */
fdec4404 3377void bdrv_eject(BlockDriverState *bs, int eject_flag)
19cb3738
FB
3378{
3379 BlockDriver *drv = bs->drv;
19cb3738 3380
822e1cd1
MA
3381 if (drv && drv->bdrv_eject) {
3382 drv->bdrv_eject(bs, eject_flag);
19cb3738
FB
3383 }
3384}
3385
19cb3738
FB
3386/**
3387 * Lock or unlock the media (if it is locked, the user won't be able
3388 * to eject it manually).
3389 */
025e849a 3390void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3391{
3392 BlockDriver *drv = bs->drv;
3393
025e849a 3394 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3395
025e849a
MA
3396 if (drv && drv->bdrv_lock_medium) {
3397 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3398 }
3399}
985a03b0
TS
3400
3401/* needed for generic scsi interface */
3402
3403int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3404{
3405 BlockDriver *drv = bs->drv;
3406
3407 if (drv && drv->bdrv_ioctl)
3408 return drv->bdrv_ioctl(bs, req, buf);
3409 return -ENOTSUP;
3410}
7d780669 3411
221f715d
AL
3412BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3413 unsigned long int req, void *buf,
3414 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3415{
221f715d 3416 BlockDriver *drv = bs->drv;
7d780669 3417
221f715d
AL
3418 if (drv && drv->bdrv_aio_ioctl)
3419 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3420 return NULL;
7d780669 3421}
e268ca52 3422
7b6f9300
MA
3423void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3424{
3425 bs->buffer_alignment = align;
3426}
7cd1e32a 3427
e268ca52
AL
3428void *qemu_blockalign(BlockDriverState *bs, size_t size)
3429{
3430 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3431}
7cd1e32a 3432
3433void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3434{
3435 int64_t bitmap_size;
a55eb92c 3436
aaa0eb75 3437 bs->dirty_count = 0;
a55eb92c 3438 if (enable) {
c6d22830
JK
3439 if (!bs->dirty_bitmap) {
3440 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3441 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3442 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
a55eb92c 3443
7267c094 3444 bs->dirty_bitmap = g_malloc0(bitmap_size);
a55eb92c 3445 }
7cd1e32a 3446 } else {
c6d22830 3447 if (bs->dirty_bitmap) {
7267c094 3448 g_free(bs->dirty_bitmap);
c6d22830 3449 bs->dirty_bitmap = NULL;
a55eb92c 3450 }
7cd1e32a 3451 }
3452}
3453
3454int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3455{
6ea44308 3456 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 3457
c6d22830
JK
3458 if (bs->dirty_bitmap &&
3459 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
3460 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3461 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a 3462 } else {
3463 return 0;
3464 }
3465}
3466
a55eb92c
JK
3467void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3468 int nr_sectors)
7cd1e32a 3469{
3470 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3471}
aaa0eb75
LS
3472
3473int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3474{
3475 return bs->dirty_count;
3476}
f88e1a42 3477
db593f25
MT
3478void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3479{
3480 assert(bs->in_use != in_use);
3481 bs->in_use = in_use;
3482}
3483
3484int bdrv_in_use(BlockDriverState *bs)
3485{
3486 return bs->in_use;
3487}
3488
28a7282a
LC
3489void bdrv_iostatus_enable(BlockDriverState *bs)
3490{
d6bf279e 3491 bs->iostatus_enabled = true;
58e21ef5 3492 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3493}
3494
3495/* The I/O status is only enabled if the drive explicitly
3496 * enables it _and_ the VM is configured to stop on errors */
3497bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3498{
d6bf279e 3499 return (bs->iostatus_enabled &&
28a7282a
LC
3500 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3501 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3502 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3503}
3504
3505void bdrv_iostatus_disable(BlockDriverState *bs)
3506{
d6bf279e 3507 bs->iostatus_enabled = false;
28a7282a
LC
3508}
3509
3510void bdrv_iostatus_reset(BlockDriverState *bs)
3511{
3512 if (bdrv_iostatus_is_enabled(bs)) {
58e21ef5 3513 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3514 }
3515}
3516
3517/* XXX: Today this is set by device models because it makes the implementation
3518 quite simple. However, the block layer knows about the error, so it's
3519 possible to implement this without device models being involved */
3520void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3521{
58e21ef5
LC
3522 if (bdrv_iostatus_is_enabled(bs) &&
3523 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
28a7282a 3524 assert(error >= 0);
58e21ef5
LC
3525 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3526 BLOCK_DEVICE_IO_STATUS_FAILED;
28a7282a
LC
3527 }
3528}
3529
a597e79c
CH
3530void
3531bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3532 enum BlockAcctType type)
3533{
3534 assert(type < BDRV_MAX_IOTYPE);
3535
3536 cookie->bytes = bytes;
c488c7f6 3537 cookie->start_time_ns = get_clock();
a597e79c
CH
3538 cookie->type = type;
3539}
3540
3541void
3542bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3543{
3544 assert(cookie->type < BDRV_MAX_IOTYPE);
3545
3546 bs->nr_bytes[cookie->type] += cookie->bytes;
3547 bs->nr_ops[cookie->type]++;
c488c7f6 3548 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
3549}
3550
f88e1a42
JS
3551int bdrv_img_create(const char *filename, const char *fmt,
3552 const char *base_filename, const char *base_fmt,
3553 char *options, uint64_t img_size, int flags)
3554{
3555 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 3556 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
3557 BlockDriverState *bs = NULL;
3558 BlockDriver *drv, *proto_drv;
96df67d1 3559 BlockDriver *backing_drv = NULL;
f88e1a42
JS
3560 int ret = 0;
3561
3562 /* Find driver and parse its options */
3563 drv = bdrv_find_format(fmt);
3564 if (!drv) {
3565 error_report("Unknown file format '%s'", fmt);
4f70f249 3566 ret = -EINVAL;
f88e1a42
JS
3567 goto out;
3568 }
3569
3570 proto_drv = bdrv_find_protocol(filename);
3571 if (!proto_drv) {
3572 error_report("Unknown protocol '%s'", filename);
4f70f249 3573 ret = -EINVAL;
f88e1a42
JS
3574 goto out;
3575 }
3576
3577 create_options = append_option_parameters(create_options,
3578 drv->create_options);
3579 create_options = append_option_parameters(create_options,
3580 proto_drv->create_options);
3581
3582 /* Create parameter list with default values */
3583 param = parse_option_parameters("", create_options, param);
3584
3585 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3586
3587 /* Parse -o options */
3588 if (options) {
3589 param = parse_option_parameters(options, create_options, param);
3590 if (param == NULL) {
3591 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 3592 ret = -EINVAL;
f88e1a42
JS
3593 goto out;
3594 }
3595 }
3596
3597 if (base_filename) {
3598 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3599 base_filename)) {
3600 error_report("Backing file not supported for file format '%s'",
3601 fmt);
4f70f249 3602 ret = -EINVAL;
f88e1a42
JS
3603 goto out;
3604 }
3605 }
3606
3607 if (base_fmt) {
3608 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3609 error_report("Backing file format not supported for file "
3610 "format '%s'", fmt);
4f70f249 3611 ret = -EINVAL;
f88e1a42
JS
3612 goto out;
3613 }
3614 }
3615
792da93a
JS
3616 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3617 if (backing_file && backing_file->value.s) {
3618 if (!strcmp(filename, backing_file->value.s)) {
3619 error_report("Error: Trying to create an image with the "
3620 "same filename as the backing file");
4f70f249 3621 ret = -EINVAL;
792da93a
JS
3622 goto out;
3623 }
3624 }
3625
f88e1a42
JS
3626 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3627 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
3628 backing_drv = bdrv_find_format(backing_fmt->value.s);
3629 if (!backing_drv) {
f88e1a42
JS
3630 error_report("Unknown backing file format '%s'",
3631 backing_fmt->value.s);
4f70f249 3632 ret = -EINVAL;
f88e1a42
JS
3633 goto out;
3634 }
3635 }
3636
3637 // The size for the image must always be specified, with one exception:
3638 // If we are using a backing file, we can obtain the size from there
d220894e
KW
3639 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3640 if (size && size->value.n == -1) {
f88e1a42
JS
3641 if (backing_file && backing_file->value.s) {
3642 uint64_t size;
f88e1a42
JS
3643 char buf[32];
3644
f88e1a42
JS
3645 bs = bdrv_new("");
3646
96df67d1 3647 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
f88e1a42 3648 if (ret < 0) {
96df67d1 3649 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
3650 goto out;
3651 }
3652 bdrv_get_geometry(bs, &size);
3653 size *= 512;
3654
3655 snprintf(buf, sizeof(buf), "%" PRId64, size);
3656 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3657 } else {
3658 error_report("Image creation needs a size parameter");
4f70f249 3659 ret = -EINVAL;
f88e1a42
JS
3660 goto out;
3661 }
3662 }
3663
3664 printf("Formatting '%s', fmt=%s ", filename, fmt);
3665 print_option_parameters(param);
3666 puts("");
3667
3668 ret = bdrv_create(drv, filename, param);
3669
3670 if (ret < 0) {
3671 if (ret == -ENOTSUP) {
3672 error_report("Formatting or formatting option not supported for "
3673 "file format '%s'", fmt);
3674 } else if (ret == -EFBIG) {
3675 error_report("The image size is too large for file format '%s'",
3676 fmt);
3677 } else {
3678 error_report("%s: error while creating %s: %s", filename, fmt,
3679 strerror(-ret));
3680 }
3681 }
3682
3683out:
3684 free_option_parameters(create_options);
3685 free_option_parameters(param);
3686
3687 if (bs) {
3688 bdrv_delete(bs);
3689 }
4f70f249
JS
3690
3691 return ret;
f88e1a42 3692}