]> git.proxmox.com Git - qemu.git/blame - block.c
block: wait for overlapping requests
[qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
f795e743 30#include "qjson.h"
68485420 31#include "qemu-coroutine.h"
b2023818 32#include "qmp-commands.h"
0563e191 33#include "qemu-timer.h"
fc01f7e7 34
71e72a19 35#ifdef CONFIG_BSD
7674e7bf
FB
36#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
72cf2d4f 39#include <sys/queue.h>
c5e97233 40#ifndef __DragonFly__
7674e7bf
FB
41#include <sys/disk.h>
42#endif
c5e97233 43#endif
7674e7bf 44
49dc768d
AL
45#ifdef _WIN32
46#include <windows.h>
47#endif
48
1c9805a3
SH
49#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
7d4b4ba5 51static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
52static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
53 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 54 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
55static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
56 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 57 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
58static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
59 int64_t sector_num, int nb_sectors,
60 QEMUIOVector *iov);
61static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
62 int64_t sector_num, int nb_sectors,
63 QEMUIOVector *iov);
c5fbe571
SH
64static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
1c9805a3
SH
66static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
b2a61371
SH
68static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
69 int64_t sector_num,
70 QEMUIOVector *qiov,
71 int nb_sectors,
72 BlockDriverCompletionFunc *cb,
73 void *opaque,
8c5873d6 74 bool is_write);
b2a61371 75static void coroutine_fn bdrv_co_do_rw(void *opaque);
ec530c81 76
98f90dba
ZYW
77static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
78 bool is_write, double elapsed_time, uint64_t *wait);
79static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
80 double elapsed_time, uint64_t *wait);
81static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
82 bool is_write, int64_t *wait);
83
1b7bdbc1
SH
84static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
85 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 86
8a22f02a
SH
87static QLIST_HEAD(, BlockDriver) bdrv_drivers =
88 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 89
f9092b10
MA
90/* The device to use for VM snapshots */
91static BlockDriverState *bs_snapshots;
92
eb852011
MA
93/* If non-zero, use only whitelisted block drivers */
94static int use_bdrv_whitelist;
95
9e0b22f4
SH
96#ifdef _WIN32
97static int is_windows_drive_prefix(const char *filename)
98{
99 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
100 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
101 filename[1] == ':');
102}
103
104int is_windows_drive(const char *filename)
105{
106 if (is_windows_drive_prefix(filename) &&
107 filename[2] == '\0')
108 return 1;
109 if (strstart(filename, "\\\\.\\", NULL) ||
110 strstart(filename, "//./", NULL))
111 return 1;
112 return 0;
113}
114#endif
115
0563e191 116/* throttling disk I/O limits */
98f90dba
ZYW
117void bdrv_io_limits_disable(BlockDriverState *bs)
118{
119 bs->io_limits_enabled = false;
120
121 while (qemu_co_queue_next(&bs->throttled_reqs));
122
123 if (bs->block_timer) {
124 qemu_del_timer(bs->block_timer);
125 qemu_free_timer(bs->block_timer);
126 bs->block_timer = NULL;
127 }
128
129 bs->slice_start = 0;
130 bs->slice_end = 0;
131 bs->slice_time = 0;
132 memset(&bs->io_base, 0, sizeof(bs->io_base));
133}
134
0563e191
ZYW
135static void bdrv_block_timer(void *opaque)
136{
137 BlockDriverState *bs = opaque;
138
139 qemu_co_queue_next(&bs->throttled_reqs);
140}
141
142void bdrv_io_limits_enable(BlockDriverState *bs)
143{
144 qemu_co_queue_init(&bs->throttled_reqs);
145 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
146 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
147 bs->slice_start = qemu_get_clock_ns(vm_clock);
148 bs->slice_end = bs->slice_start + bs->slice_time;
149 memset(&bs->io_base, 0, sizeof(bs->io_base));
150 bs->io_limits_enabled = true;
151}
152
153bool bdrv_io_limits_enabled(BlockDriverState *bs)
154{
155 BlockIOLimit *io_limits = &bs->io_limits;
156 return io_limits->bps[BLOCK_IO_LIMIT_READ]
157 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
158 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
159 || io_limits->iops[BLOCK_IO_LIMIT_READ]
160 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
161 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
162}
163
98f90dba
ZYW
164static void bdrv_io_limits_intercept(BlockDriverState *bs,
165 bool is_write, int nb_sectors)
166{
167 int64_t wait_time = -1;
168
169 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
170 qemu_co_queue_wait(&bs->throttled_reqs);
171 }
172
173 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
174 * throttled requests will not be dequeued until the current request is
175 * allowed to be serviced. So if the current request still exceeds the
176 * limits, it will be inserted to the head. All requests followed it will
177 * be still in throttled_reqs queue.
178 */
179
180 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
181 qemu_mod_timer(bs->block_timer,
182 wait_time + qemu_get_clock_ns(vm_clock));
183 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
184 }
185
186 qemu_co_queue_next(&bs->throttled_reqs);
187}
188
9e0b22f4
SH
189/* check if the path starts with "<protocol>:" */
190static int path_has_protocol(const char *path)
191{
192#ifdef _WIN32
193 if (is_windows_drive(path) ||
194 is_windows_drive_prefix(path)) {
195 return 0;
196 }
197#endif
198
199 return strchr(path, ':') != NULL;
200}
201
83f64091 202int path_is_absolute(const char *path)
3b0d4f61 203{
83f64091 204 const char *p;
21664424
FB
205#ifdef _WIN32
206 /* specific case for names like: "\\.\d:" */
207 if (*path == '/' || *path == '\\')
208 return 1;
209#endif
83f64091
FB
210 p = strchr(path, ':');
211 if (p)
212 p++;
213 else
214 p = path;
3b9f94e1
FB
215#ifdef _WIN32
216 return (*p == '/' || *p == '\\');
217#else
218 return (*p == '/');
219#endif
3b0d4f61
FB
220}
221
83f64091
FB
222/* if filename is absolute, just copy it to dest. Otherwise, build a
223 path to it by considering it is relative to base_path. URL are
224 supported. */
225void path_combine(char *dest, int dest_size,
226 const char *base_path,
227 const char *filename)
3b0d4f61 228{
83f64091
FB
229 const char *p, *p1;
230 int len;
231
232 if (dest_size <= 0)
233 return;
234 if (path_is_absolute(filename)) {
235 pstrcpy(dest, dest_size, filename);
236 } else {
237 p = strchr(base_path, ':');
238 if (p)
239 p++;
240 else
241 p = base_path;
3b9f94e1
FB
242 p1 = strrchr(base_path, '/');
243#ifdef _WIN32
244 {
245 const char *p2;
246 p2 = strrchr(base_path, '\\');
247 if (!p1 || p2 > p1)
248 p1 = p2;
249 }
250#endif
83f64091
FB
251 if (p1)
252 p1++;
253 else
254 p1 = base_path;
255 if (p1 > p)
256 p = p1;
257 len = p - base_path;
258 if (len > dest_size - 1)
259 len = dest_size - 1;
260 memcpy(dest, base_path, len);
261 dest[len] = '\0';
262 pstrcat(dest, dest_size, filename);
3b0d4f61 263 }
3b0d4f61
FB
264}
265
5efa9d5a 266void bdrv_register(BlockDriver *bdrv)
ea2384d3 267{
8c5873d6
SH
268 /* Block drivers without coroutine functions need emulation */
269 if (!bdrv->bdrv_co_readv) {
f9f05dc5
KW
270 bdrv->bdrv_co_readv = bdrv_co_readv_em;
271 bdrv->bdrv_co_writev = bdrv_co_writev_em;
272
f8c35c1d
SH
273 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
274 * the block driver lacks aio we need to emulate that too.
275 */
f9f05dc5
KW
276 if (!bdrv->bdrv_aio_readv) {
277 /* add AIO emulation layer */
278 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
279 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
f9f05dc5 280 }
83f64091 281 }
b2e12bc6 282
8a22f02a 283 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 284}
b338082b
FB
285
286/* create a new block device (by default it is empty) */
287BlockDriverState *bdrv_new(const char *device_name)
288{
1b7bdbc1 289 BlockDriverState *bs;
b338082b 290
7267c094 291 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 292 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 293 if (device_name[0] != '\0') {
1b7bdbc1 294 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 295 }
28a7282a 296 bdrv_iostatus_disable(bs);
b338082b
FB
297 return bs;
298}
299
ea2384d3
FB
300BlockDriver *bdrv_find_format(const char *format_name)
301{
302 BlockDriver *drv1;
8a22f02a
SH
303 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
304 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 305 return drv1;
8a22f02a 306 }
ea2384d3
FB
307 }
308 return NULL;
309}
310
eb852011
MA
311static int bdrv_is_whitelisted(BlockDriver *drv)
312{
313 static const char *whitelist[] = {
314 CONFIG_BDRV_WHITELIST
315 };
316 const char **p;
317
318 if (!whitelist[0])
319 return 1; /* no whitelist, anything goes */
320
321 for (p = whitelist; *p; p++) {
322 if (!strcmp(drv->format_name, *p)) {
323 return 1;
324 }
325 }
326 return 0;
327}
328
329BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
330{
331 BlockDriver *drv = bdrv_find_format(format_name);
332 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
333}
334
0e7e1989
KW
335int bdrv_create(BlockDriver *drv, const char* filename,
336 QEMUOptionParameter *options)
ea2384d3
FB
337{
338 if (!drv->bdrv_create)
339 return -ENOTSUP;
0e7e1989
KW
340
341 return drv->bdrv_create(filename, options);
ea2384d3
FB
342}
343
84a12e66
CH
344int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
345{
346 BlockDriver *drv;
347
b50cbabc 348 drv = bdrv_find_protocol(filename);
84a12e66 349 if (drv == NULL) {
16905d71 350 return -ENOENT;
84a12e66
CH
351 }
352
353 return bdrv_create(drv, filename, options);
354}
355
d5249393 356#ifdef _WIN32
95389c86 357void get_tmp_filename(char *filename, int size)
d5249393 358{
3b9f94e1 359 char temp_dir[MAX_PATH];
3b46e624 360
3b9f94e1
FB
361 GetTempPath(MAX_PATH, temp_dir);
362 GetTempFileName(temp_dir, "qem", 0, filename);
d5249393
FB
363}
364#else
95389c86 365void get_tmp_filename(char *filename, int size)
fc01f7e7 366{
67b915a5 367 int fd;
7ccfb2eb 368 const char *tmpdir;
d5249393 369 /* XXX: race condition possible */
0badc1ee
AJ
370 tmpdir = getenv("TMPDIR");
371 if (!tmpdir)
372 tmpdir = "/tmp";
373 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
ea2384d3
FB
374 fd = mkstemp(filename);
375 close(fd);
376}
d5249393 377#endif
fc01f7e7 378
84a12e66
CH
379/*
380 * Detect host devices. By convention, /dev/cdrom[N] is always
381 * recognized as a host CDROM.
382 */
383static BlockDriver *find_hdev_driver(const char *filename)
384{
385 int score_max = 0, score;
386 BlockDriver *drv = NULL, *d;
387
388 QLIST_FOREACH(d, &bdrv_drivers, list) {
389 if (d->bdrv_probe_device) {
390 score = d->bdrv_probe_device(filename);
391 if (score > score_max) {
392 score_max = score;
393 drv = d;
394 }
395 }
396 }
397
398 return drv;
399}
400
b50cbabc 401BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
402{
403 BlockDriver *drv1;
404 char protocol[128];
1cec71e3 405 int len;
83f64091 406 const char *p;
19cb3738 407
66f82cee
KW
408 /* TODO Drivers without bdrv_file_open must be specified explicitly */
409
39508e7a
CH
410 /*
411 * XXX(hch): we really should not let host device detection
412 * override an explicit protocol specification, but moving this
413 * later breaks access to device names with colons in them.
414 * Thanks to the brain-dead persistent naming schemes on udev-
415 * based Linux systems those actually are quite common.
416 */
417 drv1 = find_hdev_driver(filename);
418 if (drv1) {
419 return drv1;
420 }
421
9e0b22f4 422 if (!path_has_protocol(filename)) {
39508e7a 423 return bdrv_find_format("file");
84a12e66 424 }
9e0b22f4
SH
425 p = strchr(filename, ':');
426 assert(p != NULL);
1cec71e3
AL
427 len = p - filename;
428 if (len > sizeof(protocol) - 1)
429 len = sizeof(protocol) - 1;
430 memcpy(protocol, filename, len);
431 protocol[len] = '\0';
8a22f02a 432 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 433 if (drv1->protocol_name &&
8a22f02a 434 !strcmp(drv1->protocol_name, protocol)) {
83f64091 435 return drv1;
8a22f02a 436 }
83f64091
FB
437 }
438 return NULL;
439}
440
c98ac35d 441static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
442{
443 int ret, score, score_max;
444 BlockDriver *drv1, *drv;
445 uint8_t buf[2048];
446 BlockDriverState *bs;
447
f5edb014 448 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
449 if (ret < 0) {
450 *pdrv = NULL;
451 return ret;
452 }
f8ea0b00 453
08a00559
KW
454 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
455 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 456 bdrv_delete(bs);
c98ac35d
SW
457 drv = bdrv_find_format("raw");
458 if (!drv) {
459 ret = -ENOENT;
460 }
461 *pdrv = drv;
462 return ret;
1a396859 463 }
f8ea0b00 464
83f64091
FB
465 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
466 bdrv_delete(bs);
467 if (ret < 0) {
c98ac35d
SW
468 *pdrv = NULL;
469 return ret;
83f64091
FB
470 }
471
ea2384d3 472 score_max = 0;
84a12e66 473 drv = NULL;
8a22f02a 474 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
475 if (drv1->bdrv_probe) {
476 score = drv1->bdrv_probe(buf, ret, filename);
477 if (score > score_max) {
478 score_max = score;
479 drv = drv1;
480 }
0849bf08 481 }
fc01f7e7 482 }
c98ac35d
SW
483 if (!drv) {
484 ret = -ENOENT;
485 }
486 *pdrv = drv;
487 return ret;
ea2384d3
FB
488}
489
51762288
SH
490/**
491 * Set the current 'total_sectors' value
492 */
493static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
494{
495 BlockDriver *drv = bs->drv;
496
396759ad
NB
497 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
498 if (bs->sg)
499 return 0;
500
51762288
SH
501 /* query actual device if possible, otherwise just trust the hint */
502 if (drv->bdrv_getlength) {
503 int64_t length = drv->bdrv_getlength(bs);
504 if (length < 0) {
505 return length;
506 }
507 hint = length >> BDRV_SECTOR_BITS;
508 }
509
510 bs->total_sectors = hint;
511 return 0;
512}
513
c3993cdc
SH
514/**
515 * Set open flags for a given cache mode
516 *
517 * Return 0 on success, -1 if the cache mode was invalid.
518 */
519int bdrv_parse_cache_flags(const char *mode, int *flags)
520{
521 *flags &= ~BDRV_O_CACHE_MASK;
522
523 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
524 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
525 } else if (!strcmp(mode, "directsync")) {
526 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
527 } else if (!strcmp(mode, "writeback")) {
528 *flags |= BDRV_O_CACHE_WB;
529 } else if (!strcmp(mode, "unsafe")) {
530 *flags |= BDRV_O_CACHE_WB;
531 *flags |= BDRV_O_NO_FLUSH;
532 } else if (!strcmp(mode, "writethrough")) {
533 /* this is the default */
534 } else {
535 return -1;
536 }
537
538 return 0;
539}
540
53fec9d3
SH
541/**
542 * The copy-on-read flag is actually a reference count so multiple users may
543 * use the feature without worrying about clobbering its previous state.
544 * Copy-on-read stays enabled until all users have called to disable it.
545 */
546void bdrv_enable_copy_on_read(BlockDriverState *bs)
547{
548 bs->copy_on_read++;
549}
550
551void bdrv_disable_copy_on_read(BlockDriverState *bs)
552{
553 assert(bs->copy_on_read > 0);
554 bs->copy_on_read--;
555}
556
57915332
KW
557/*
558 * Common part for opening disk images and files
559 */
560static int bdrv_open_common(BlockDriverState *bs, const char *filename,
561 int flags, BlockDriver *drv)
562{
563 int ret, open_flags;
564
565 assert(drv != NULL);
566
28dcee10
SH
567 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
568
66f82cee 569 bs->file = NULL;
51762288 570 bs->total_sectors = 0;
57915332
KW
571 bs->encrypted = 0;
572 bs->valid_key = 0;
03f541bd 573 bs->sg = 0;
57915332 574 bs->open_flags = flags;
03f541bd 575 bs->growable = 0;
57915332
KW
576 bs->buffer_alignment = 512;
577
53fec9d3
SH
578 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
579 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
580 bdrv_enable_copy_on_read(bs);
581 }
582
57915332 583 pstrcpy(bs->filename, sizeof(bs->filename), filename);
03f541bd 584 bs->backing_file[0] = '\0';
57915332
KW
585
586 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
587 return -ENOTSUP;
588 }
589
590 bs->drv = drv;
7267c094 591 bs->opaque = g_malloc0(drv->instance_size);
57915332 592
03f541bd 593 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
57915332
KW
594
595 /*
596 * Clear flags that are internal to the block layer before opening the
597 * image.
598 */
599 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
600
601 /*
ebabb67a 602 * Snapshots should be writable.
57915332
KW
603 */
604 if (bs->is_temporary) {
605 open_flags |= BDRV_O_RDWR;
606 }
607
e7c63796
SH
608 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
609
66f82cee
KW
610 /* Open the image, either directly or using a protocol */
611 if (drv->bdrv_file_open) {
612 ret = drv->bdrv_file_open(bs, filename, open_flags);
613 } else {
614 ret = bdrv_file_open(&bs->file, filename, open_flags);
615 if (ret >= 0) {
616 ret = drv->bdrv_open(bs, open_flags);
617 }
618 }
619
57915332
KW
620 if (ret < 0) {
621 goto free_and_fail;
622 }
623
51762288
SH
624 ret = refresh_total_sectors(bs, bs->total_sectors);
625 if (ret < 0) {
626 goto free_and_fail;
57915332 627 }
51762288 628
57915332
KW
629#ifndef _WIN32
630 if (bs->is_temporary) {
631 unlink(filename);
632 }
633#endif
634 return 0;
635
636free_and_fail:
66f82cee
KW
637 if (bs->file) {
638 bdrv_delete(bs->file);
639 bs->file = NULL;
640 }
7267c094 641 g_free(bs->opaque);
57915332
KW
642 bs->opaque = NULL;
643 bs->drv = NULL;
644 return ret;
645}
646
b6ce07aa
KW
647/*
648 * Opens a file using a protocol (file, host_device, nbd, ...)
649 */
83f64091 650int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 651{
83f64091 652 BlockDriverState *bs;
6db95603 653 BlockDriver *drv;
83f64091
FB
654 int ret;
655
b50cbabc 656 drv = bdrv_find_protocol(filename);
6db95603
CH
657 if (!drv) {
658 return -ENOENT;
659 }
660
83f64091 661 bs = bdrv_new("");
b6ce07aa 662 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
663 if (ret < 0) {
664 bdrv_delete(bs);
665 return ret;
3b0d4f61 666 }
71d0770c 667 bs->growable = 1;
83f64091
FB
668 *pbs = bs;
669 return 0;
670}
671
b6ce07aa
KW
672/*
673 * Opens a disk image (raw, qcow2, vmdk, ...)
674 */
d6e9098e
KW
675int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
676 BlockDriver *drv)
ea2384d3 677{
b6ce07aa 678 int ret;
2b572816 679 char tmp_filename[PATH_MAX];
712e7874 680
83f64091 681 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
682 BlockDriverState *bs1;
683 int64_t total_size;
7c96d46e 684 int is_protocol = 0;
91a073a9
KW
685 BlockDriver *bdrv_qcow2;
686 QEMUOptionParameter *options;
b6ce07aa 687 char backing_filename[PATH_MAX];
3b46e624 688
ea2384d3
FB
689 /* if snapshot, we create a temporary backing file and open it
690 instead of opening 'filename' directly */
33e3963e 691
ea2384d3
FB
692 /* if there is a backing file, use it */
693 bs1 = bdrv_new("");
d6e9098e 694 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 695 if (ret < 0) {
ea2384d3 696 bdrv_delete(bs1);
51d7c00c 697 return ret;
ea2384d3 698 }
3e82990b 699 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
700
701 if (bs1->drv && bs1->drv->protocol_name)
702 is_protocol = 1;
703
ea2384d3 704 bdrv_delete(bs1);
3b46e624 705
ea2384d3 706 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
7c96d46e
AL
707
708 /* Real path is meaningless for protocols */
709 if (is_protocol)
710 snprintf(backing_filename, sizeof(backing_filename),
711 "%s", filename);
114cdfa9
KS
712 else if (!realpath(filename, backing_filename))
713 return -errno;
7c96d46e 714
91a073a9
KW
715 bdrv_qcow2 = bdrv_find_format("qcow2");
716 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
717
3e82990b 718 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
719 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
720 if (drv) {
721 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
722 drv->format_name);
723 }
724
725 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 726 free_option_parameters(options);
51d7c00c
AL
727 if (ret < 0) {
728 return ret;
ea2384d3 729 }
91a073a9 730
ea2384d3 731 filename = tmp_filename;
91a073a9 732 drv = bdrv_qcow2;
ea2384d3
FB
733 bs->is_temporary = 1;
734 }
712e7874 735
b6ce07aa 736 /* Find the right image format driver */
6db95603 737 if (!drv) {
c98ac35d 738 ret = find_image_format(filename, &drv);
51d7c00c 739 }
6987307c 740
51d7c00c 741 if (!drv) {
51d7c00c 742 goto unlink_and_fail;
ea2384d3 743 }
b6ce07aa
KW
744
745 /* Open the image */
746 ret = bdrv_open_common(bs, filename, flags, drv);
747 if (ret < 0) {
6987307c
CH
748 goto unlink_and_fail;
749 }
750
b6ce07aa
KW
751 /* If there is a backing file, use it */
752 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
753 char backing_filename[PATH_MAX];
754 int back_flags;
755 BlockDriver *back_drv = NULL;
756
757 bs->backing_hd = bdrv_new("");
df2dbb4a
SH
758
759 if (path_has_protocol(bs->backing_file)) {
760 pstrcpy(backing_filename, sizeof(backing_filename),
761 bs->backing_file);
762 } else {
763 path_combine(backing_filename, sizeof(backing_filename),
764 filename, bs->backing_file);
765 }
766
767 if (bs->backing_format[0] != '\0') {
b6ce07aa 768 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 769 }
b6ce07aa
KW
770
771 /* backing files always opened read-only */
772 back_flags =
773 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
774
775 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
776 if (ret < 0) {
777 bdrv_close(bs);
778 return ret;
779 }
780 if (bs->is_temporary) {
781 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
782 } else {
783 /* base image inherits from "parent" */
784 bs->backing_hd->keep_read_only = bs->keep_read_only;
785 }
786 }
787
788 if (!bdrv_key_required(bs)) {
7d4b4ba5 789 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
790 }
791
98f90dba
ZYW
792 /* throttling disk I/O limits */
793 if (bs->io_limits_enabled) {
794 bdrv_io_limits_enable(bs);
795 }
796
b6ce07aa
KW
797 return 0;
798
799unlink_and_fail:
800 if (bs->is_temporary) {
801 unlink(filename);
802 }
803 return ret;
804}
805
fc01f7e7
FB
806void bdrv_close(BlockDriverState *bs)
807{
19cb3738 808 if (bs->drv) {
f9092b10
MA
809 if (bs == bs_snapshots) {
810 bs_snapshots = NULL;
811 }
557df6ac 812 if (bs->backing_hd) {
ea2384d3 813 bdrv_delete(bs->backing_hd);
557df6ac
SH
814 bs->backing_hd = NULL;
815 }
ea2384d3 816 bs->drv->bdrv_close(bs);
7267c094 817 g_free(bs->opaque);
ea2384d3
FB
818#ifdef _WIN32
819 if (bs->is_temporary) {
820 unlink(bs->filename);
821 }
67b915a5 822#endif
ea2384d3
FB
823 bs->opaque = NULL;
824 bs->drv = NULL;
53fec9d3 825 bs->copy_on_read = 0;
b338082b 826
66f82cee
KW
827 if (bs->file != NULL) {
828 bdrv_close(bs->file);
829 }
830
7d4b4ba5 831 bdrv_dev_change_media_cb(bs, false);
b338082b 832 }
98f90dba
ZYW
833
834 /*throttling disk I/O limits*/
835 if (bs->io_limits_enabled) {
836 bdrv_io_limits_disable(bs);
837 }
b338082b
FB
838}
839
2bc93fed
MK
840void bdrv_close_all(void)
841{
842 BlockDriverState *bs;
843
844 QTAILQ_FOREACH(bs, &bdrv_states, list) {
845 bdrv_close(bs);
846 }
847}
848
d22b2f41
RH
849/* make a BlockDriverState anonymous by removing from bdrv_state list.
850 Also, NULL terminate the device_name to prevent double remove */
851void bdrv_make_anon(BlockDriverState *bs)
852{
853 if (bs->device_name[0] != '\0') {
854 QTAILQ_REMOVE(&bdrv_states, bs, list);
855 }
856 bs->device_name[0] = '\0';
857}
858
b338082b
FB
859void bdrv_delete(BlockDriverState *bs)
860{
fa879d62 861 assert(!bs->dev);
18846dee 862
1b7bdbc1 863 /* remove from list, if necessary */
d22b2f41 864 bdrv_make_anon(bs);
34c6f050 865
b338082b 866 bdrv_close(bs);
66f82cee
KW
867 if (bs->file != NULL) {
868 bdrv_delete(bs->file);
869 }
870
f9092b10 871 assert(bs != bs_snapshots);
7267c094 872 g_free(bs);
fc01f7e7
FB
873}
874
fa879d62
MA
875int bdrv_attach_dev(BlockDriverState *bs, void *dev)
876/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 877{
fa879d62 878 if (bs->dev) {
18846dee
MA
879 return -EBUSY;
880 }
fa879d62 881 bs->dev = dev;
28a7282a 882 bdrv_iostatus_reset(bs);
18846dee
MA
883 return 0;
884}
885
fa879d62
MA
886/* TODO qdevified devices don't use this, remove when devices are qdevified */
887void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 888{
fa879d62
MA
889 if (bdrv_attach_dev(bs, dev) < 0) {
890 abort();
891 }
892}
893
894void bdrv_detach_dev(BlockDriverState *bs, void *dev)
895/* TODO change to DeviceState *dev when all users are qdevified */
896{
897 assert(bs->dev == dev);
898 bs->dev = NULL;
0e49de52
MA
899 bs->dev_ops = NULL;
900 bs->dev_opaque = NULL;
29e05f20 901 bs->buffer_alignment = 512;
18846dee
MA
902}
903
fa879d62
MA
904/* TODO change to return DeviceState * when all users are qdevified */
905void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 906{
fa879d62 907 return bs->dev;
18846dee
MA
908}
909
0e49de52
MA
910void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
911 void *opaque)
912{
913 bs->dev_ops = ops;
914 bs->dev_opaque = opaque;
2c6942fa
MA
915 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
916 bs_snapshots = NULL;
917 }
0e49de52
MA
918}
919
7d4b4ba5 920static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 921{
145feb17 922 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
7d4b4ba5 923 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
145feb17
MA
924 }
925}
926
2c6942fa
MA
927bool bdrv_dev_has_removable_media(BlockDriverState *bs)
928{
929 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
930}
931
025ccaa7
PB
932void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
933{
934 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
935 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
936 }
937}
938
e4def80b
MA
939bool bdrv_dev_is_tray_open(BlockDriverState *bs)
940{
941 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
942 return bs->dev_ops->is_tray_open(bs->dev_opaque);
943 }
944 return false;
945}
946
145feb17
MA
947static void bdrv_dev_resize_cb(BlockDriverState *bs)
948{
949 if (bs->dev_ops && bs->dev_ops->resize_cb) {
950 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
951 }
952}
953
f107639a
MA
954bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
955{
956 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
957 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
958 }
959 return false;
960}
961
e97fc193
AL
962/*
963 * Run consistency checks on an image
964 *
e076f338 965 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 966 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 967 * check are stored in res.
e97fc193 968 */
e076f338 969int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
e97fc193
AL
970{
971 if (bs->drv->bdrv_check == NULL) {
972 return -ENOTSUP;
973 }
974
e076f338 975 memset(res, 0, sizeof(*res));
9ac228e0 976 return bs->drv->bdrv_check(bs, res);
e97fc193
AL
977}
978
8a426614
KW
979#define COMMIT_BUF_SECTORS 2048
980
33e3963e
FB
981/* commit COW file into the raw image */
982int bdrv_commit(BlockDriverState *bs)
983{
19cb3738 984 BlockDriver *drv = bs->drv;
ee181196 985 BlockDriver *backing_drv;
8a426614
KW
986 int64_t sector, total_sectors;
987 int n, ro, open_flags;
4dca4b63 988 int ret = 0, rw_ret = 0;
8a426614 989 uint8_t *buf;
4dca4b63
NS
990 char filename[1024];
991 BlockDriverState *bs_rw, *bs_ro;
33e3963e 992
19cb3738
FB
993 if (!drv)
994 return -ENOMEDIUM;
4dca4b63
NS
995
996 if (!bs->backing_hd) {
997 return -ENOTSUP;
33e3963e
FB
998 }
999
4dca4b63
NS
1000 if (bs->backing_hd->keep_read_only) {
1001 return -EACCES;
1002 }
ee181196
KW
1003
1004 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
1005 ro = bs->backing_hd->read_only;
1006 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1007 open_flags = bs->backing_hd->open_flags;
1008
1009 if (ro) {
1010 /* re-open as RW */
1011 bdrv_delete(bs->backing_hd);
1012 bs->backing_hd = NULL;
1013 bs_rw = bdrv_new("");
ee181196
KW
1014 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1015 backing_drv);
4dca4b63
NS
1016 if (rw_ret < 0) {
1017 bdrv_delete(bs_rw);
1018 /* try to re-open read-only */
1019 bs_ro = bdrv_new("");
ee181196
KW
1020 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1021 backing_drv);
4dca4b63
NS
1022 if (ret < 0) {
1023 bdrv_delete(bs_ro);
1024 /* drive not functional anymore */
1025 bs->drv = NULL;
1026 return ret;
1027 }
1028 bs->backing_hd = bs_ro;
1029 return rw_ret;
1030 }
1031 bs->backing_hd = bs_rw;
ea2384d3 1032 }
33e3963e 1033
6ea44308 1034 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 1035 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
1036
1037 for (sector = 0; sector < total_sectors; sector += n) {
05c4af54 1038 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
8a426614
KW
1039
1040 if (bdrv_read(bs, sector, buf, n) != 0) {
1041 ret = -EIO;
1042 goto ro_cleanup;
1043 }
1044
1045 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1046 ret = -EIO;
1047 goto ro_cleanup;
1048 }
ea2384d3 1049 }
33e3963e 1050 }
95389c86 1051
1d44952f
CH
1052 if (drv->bdrv_make_empty) {
1053 ret = drv->bdrv_make_empty(bs);
1054 bdrv_flush(bs);
1055 }
95389c86 1056
3f5075ae
CH
1057 /*
1058 * Make sure all data we wrote to the backing device is actually
1059 * stable on disk.
1060 */
1061 if (bs->backing_hd)
1062 bdrv_flush(bs->backing_hd);
4dca4b63
NS
1063
1064ro_cleanup:
7267c094 1065 g_free(buf);
4dca4b63
NS
1066
1067 if (ro) {
1068 /* re-open as RO */
1069 bdrv_delete(bs->backing_hd);
1070 bs->backing_hd = NULL;
1071 bs_ro = bdrv_new("");
ee181196
KW
1072 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1073 backing_drv);
4dca4b63
NS
1074 if (ret < 0) {
1075 bdrv_delete(bs_ro);
1076 /* drive not functional anymore */
1077 bs->drv = NULL;
1078 return ret;
1079 }
1080 bs->backing_hd = bs_ro;
1081 bs->backing_hd->keep_read_only = 0;
1082 }
1083
1d44952f 1084 return ret;
33e3963e
FB
1085}
1086
6ab4b5ab
MA
1087void bdrv_commit_all(void)
1088{
1089 BlockDriverState *bs;
1090
1091 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1092 bdrv_commit(bs);
1093 }
1094}
1095
dbffbdcf
SH
1096struct BdrvTrackedRequest {
1097 BlockDriverState *bs;
1098 int64_t sector_num;
1099 int nb_sectors;
1100 bool is_write;
1101 QLIST_ENTRY(BdrvTrackedRequest) list;
f4658285 1102 CoQueue wait_queue; /* coroutines blocked on this request */
dbffbdcf
SH
1103};
1104
1105/**
1106 * Remove an active request from the tracked requests list
1107 *
1108 * This function should be called when a tracked request is completing.
1109 */
1110static void tracked_request_end(BdrvTrackedRequest *req)
1111{
1112 QLIST_REMOVE(req, list);
f4658285 1113 qemu_co_queue_restart_all(&req->wait_queue);
dbffbdcf
SH
1114}
1115
1116/**
1117 * Add an active request to the tracked requests list
1118 */
1119static void tracked_request_begin(BdrvTrackedRequest *req,
1120 BlockDriverState *bs,
1121 int64_t sector_num,
1122 int nb_sectors, bool is_write)
1123{
1124 *req = (BdrvTrackedRequest){
1125 .bs = bs,
1126 .sector_num = sector_num,
1127 .nb_sectors = nb_sectors,
1128 .is_write = is_write,
1129 };
1130
f4658285
SH
1131 qemu_co_queue_init(&req->wait_queue);
1132
dbffbdcf
SH
1133 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1134}
1135
f4658285
SH
1136static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1137 int64_t sector_num, int nb_sectors) {
1138 return false; /* not yet implemented */
1139}
1140
1141static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1142 int64_t sector_num, int nb_sectors)
1143{
1144 BdrvTrackedRequest *req;
1145 bool retry;
1146
1147 do {
1148 retry = false;
1149 QLIST_FOREACH(req, &bs->tracked_requests, list) {
1150 if (tracked_request_overlaps(req, sector_num, nb_sectors)) {
1151 qemu_co_queue_wait(&req->wait_queue);
1152 retry = true;
1153 break;
1154 }
1155 }
1156 } while (retry);
1157}
1158
756e6736
KW
1159/*
1160 * Return values:
1161 * 0 - success
1162 * -EINVAL - backing format specified, but no file
1163 * -ENOSPC - can't update the backing file because no space is left in the
1164 * image file header
1165 * -ENOTSUP - format driver doesn't support changing the backing file
1166 */
1167int bdrv_change_backing_file(BlockDriverState *bs,
1168 const char *backing_file, const char *backing_fmt)
1169{
1170 BlockDriver *drv = bs->drv;
1171
1172 if (drv->bdrv_change_backing_file != NULL) {
1173 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1174 } else {
1175 return -ENOTSUP;
1176 }
1177}
1178
71d0770c
AL
1179static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1180 size_t size)
1181{
1182 int64_t len;
1183
1184 if (!bdrv_is_inserted(bs))
1185 return -ENOMEDIUM;
1186
1187 if (bs->growable)
1188 return 0;
1189
1190 len = bdrv_getlength(bs);
1191
fbb7b4e0
KW
1192 if (offset < 0)
1193 return -EIO;
1194
1195 if ((offset > len) || (len - offset < size))
71d0770c
AL
1196 return -EIO;
1197
1198 return 0;
1199}
1200
1201static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1202 int nb_sectors)
1203{
eb5a3165
JS
1204 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1205 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1206}
1207
1c9805a3
SH
1208typedef struct RwCo {
1209 BlockDriverState *bs;
1210 int64_t sector_num;
1211 int nb_sectors;
1212 QEMUIOVector *qiov;
1213 bool is_write;
1214 int ret;
1215} RwCo;
1216
1217static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 1218{
1c9805a3 1219 RwCo *rwco = opaque;
ea2384d3 1220
1c9805a3
SH
1221 if (!rwco->is_write) {
1222 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1223 rwco->nb_sectors, rwco->qiov);
1224 } else {
1225 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1226 rwco->nb_sectors, rwco->qiov);
1227 }
1228}
e7a8a783 1229
1c9805a3
SH
1230/*
1231 * Process a synchronous request using coroutines
1232 */
1233static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1234 int nb_sectors, bool is_write)
1235{
1236 QEMUIOVector qiov;
1237 struct iovec iov = {
1238 .iov_base = (void *)buf,
1239 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1240 };
1241 Coroutine *co;
1242 RwCo rwco = {
1243 .bs = bs,
1244 .sector_num = sector_num,
1245 .nb_sectors = nb_sectors,
1246 .qiov = &qiov,
1247 .is_write = is_write,
1248 .ret = NOT_DONE,
1249 };
e7a8a783 1250
1c9805a3 1251 qemu_iovec_init_external(&qiov, &iov, 1);
e7a8a783 1252
1c9805a3
SH
1253 if (qemu_in_coroutine()) {
1254 /* Fast-path if already in coroutine context */
1255 bdrv_rw_co_entry(&rwco);
1256 } else {
1257 co = qemu_coroutine_create(bdrv_rw_co_entry);
1258 qemu_coroutine_enter(co, &rwco);
1259 while (rwco.ret == NOT_DONE) {
1260 qemu_aio_wait();
1261 }
1262 }
1263 return rwco.ret;
1264}
b338082b 1265
1c9805a3
SH
1266/* return < 0 if error. See bdrv_write() for the return codes */
1267int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1268 uint8_t *buf, int nb_sectors)
1269{
1270 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
fc01f7e7
FB
1271}
1272
7cd1e32a 1273static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1274 int nb_sectors, int dirty)
7cd1e32a 1275{
1276 int64_t start, end;
c6d22830 1277 unsigned long val, idx, bit;
a55eb92c 1278
6ea44308 1279 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1280 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1281
1282 for (; start <= end; start++) {
c6d22830
JK
1283 idx = start / (sizeof(unsigned long) * 8);
1284 bit = start % (sizeof(unsigned long) * 8);
1285 val = bs->dirty_bitmap[idx];
1286 if (dirty) {
6d59fec1 1287 if (!(val & (1UL << bit))) {
aaa0eb75 1288 bs->dirty_count++;
6d59fec1 1289 val |= 1UL << bit;
aaa0eb75 1290 }
c6d22830 1291 } else {
6d59fec1 1292 if (val & (1UL << bit)) {
aaa0eb75 1293 bs->dirty_count--;
6d59fec1 1294 val &= ~(1UL << bit);
aaa0eb75 1295 }
c6d22830
JK
1296 }
1297 bs->dirty_bitmap[idx] = val;
7cd1e32a 1298 }
1299}
1300
5fafdf24 1301/* Return < 0 if error. Important errors are:
19cb3738
FB
1302 -EIO generic I/O error (may happen for all errors)
1303 -ENOMEDIUM No media inserted.
1304 -EINVAL Invalid sector number or nb_sectors
1305 -EACCES Trying to write a read-only device
1306*/
5fafdf24 1307int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1308 const uint8_t *buf, int nb_sectors)
1309{
1c9805a3 1310 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
83f64091
FB
1311}
1312
eda578e5
AL
1313int bdrv_pread(BlockDriverState *bs, int64_t offset,
1314 void *buf, int count1)
83f64091 1315{
6ea44308 1316 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1317 int len, nb_sectors, count;
1318 int64_t sector_num;
9a8c4cce 1319 int ret;
83f64091
FB
1320
1321 count = count1;
1322 /* first read to align to sector start */
6ea44308 1323 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1324 if (len > count)
1325 len = count;
6ea44308 1326 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1327 if (len > 0) {
9a8c4cce
KW
1328 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1329 return ret;
6ea44308 1330 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1331 count -= len;
1332 if (count == 0)
1333 return count1;
1334 sector_num++;
1335 buf += len;
1336 }
1337
1338 /* read the sectors "in place" */
6ea44308 1339 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1340 if (nb_sectors > 0) {
9a8c4cce
KW
1341 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1342 return ret;
83f64091 1343 sector_num += nb_sectors;
6ea44308 1344 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1345 buf += len;
1346 count -= len;
1347 }
1348
1349 /* add data from the last sector */
1350 if (count > 0) {
9a8c4cce
KW
1351 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1352 return ret;
83f64091
FB
1353 memcpy(buf, tmp_buf, count);
1354 }
1355 return count1;
1356}
1357
eda578e5
AL
1358int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1359 const void *buf, int count1)
83f64091 1360{
6ea44308 1361 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1362 int len, nb_sectors, count;
1363 int64_t sector_num;
9a8c4cce 1364 int ret;
83f64091
FB
1365
1366 count = count1;
1367 /* first write to align to sector start */
6ea44308 1368 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1369 if (len > count)
1370 len = count;
6ea44308 1371 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1372 if (len > 0) {
9a8c4cce
KW
1373 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1374 return ret;
6ea44308 1375 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1376 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1377 return ret;
83f64091
FB
1378 count -= len;
1379 if (count == 0)
1380 return count1;
1381 sector_num++;
1382 buf += len;
1383 }
1384
1385 /* write the sectors "in place" */
6ea44308 1386 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1387 if (nb_sectors > 0) {
9a8c4cce
KW
1388 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1389 return ret;
83f64091 1390 sector_num += nb_sectors;
6ea44308 1391 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1392 buf += len;
1393 count -= len;
1394 }
1395
1396 /* add data from the last sector */
1397 if (count > 0) {
9a8c4cce
KW
1398 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1399 return ret;
83f64091 1400 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1401 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1402 return ret;
83f64091
FB
1403 }
1404 return count1;
1405}
83f64091 1406
f08145fe
KW
1407/*
1408 * Writes to the file and ensures that no writes are reordered across this
1409 * request (acts as a barrier)
1410 *
1411 * Returns 0 on success, -errno in error cases.
1412 */
1413int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1414 const void *buf, int count)
1415{
1416 int ret;
1417
1418 ret = bdrv_pwrite(bs, offset, buf, count);
1419 if (ret < 0) {
1420 return ret;
1421 }
1422
92196b2f
SH
1423 /* No flush needed for cache modes that use O_DSYNC */
1424 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
f08145fe
KW
1425 bdrv_flush(bs);
1426 }
1427
1428 return 0;
1429}
1430
c5fbe571
SH
1431/*
1432 * Handle a read request in coroutine context
1433 */
1434static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1435 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
da1fa91d
KW
1436{
1437 BlockDriver *drv = bs->drv;
dbffbdcf
SH
1438 BdrvTrackedRequest req;
1439 int ret;
da1fa91d 1440
da1fa91d
KW
1441 if (!drv) {
1442 return -ENOMEDIUM;
1443 }
1444 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1445 return -EIO;
1446 }
1447
98f90dba
ZYW
1448 /* throttling disk read I/O */
1449 if (bs->io_limits_enabled) {
1450 bdrv_io_limits_intercept(bs, false, nb_sectors);
1451 }
1452
f4658285
SH
1453 if (bs->copy_on_read) {
1454 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1455 }
1456
dbffbdcf
SH
1457 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
1458 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1459 tracked_request_end(&req);
1460 return ret;
da1fa91d
KW
1461}
1462
c5fbe571 1463int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
1464 int nb_sectors, QEMUIOVector *qiov)
1465{
c5fbe571 1466 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 1467
c5fbe571
SH
1468 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1469}
1470
1471/*
1472 * Handle a write request in coroutine context
1473 */
1474static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1475 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1476{
1477 BlockDriver *drv = bs->drv;
dbffbdcf 1478 BdrvTrackedRequest req;
6b7cb247 1479 int ret;
da1fa91d
KW
1480
1481 if (!bs->drv) {
1482 return -ENOMEDIUM;
1483 }
1484 if (bs->read_only) {
1485 return -EACCES;
1486 }
1487 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1488 return -EIO;
1489 }
1490
98f90dba
ZYW
1491 /* throttling disk write I/O */
1492 if (bs->io_limits_enabled) {
1493 bdrv_io_limits_intercept(bs, true, nb_sectors);
1494 }
1495
f4658285
SH
1496 if (bs->copy_on_read) {
1497 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1498 }
1499
dbffbdcf
SH
1500 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1501
6b7cb247
SH
1502 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1503
da1fa91d
KW
1504 if (bs->dirty_bitmap) {
1505 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1506 }
1507
1508 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1509 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1510 }
1511
dbffbdcf
SH
1512 tracked_request_end(&req);
1513
6b7cb247 1514 return ret;
da1fa91d
KW
1515}
1516
c5fbe571
SH
1517int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1518 int nb_sectors, QEMUIOVector *qiov)
1519{
1520 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1521
1522 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1523}
1524
83f64091
FB
1525/**
1526 * Truncate file to 'offset' bytes (needed only for file protocols)
1527 */
1528int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1529{
1530 BlockDriver *drv = bs->drv;
51762288 1531 int ret;
83f64091 1532 if (!drv)
19cb3738 1533 return -ENOMEDIUM;
83f64091
FB
1534 if (!drv->bdrv_truncate)
1535 return -ENOTSUP;
59f2689d
NS
1536 if (bs->read_only)
1537 return -EACCES;
8591675f
MT
1538 if (bdrv_in_use(bs))
1539 return -EBUSY;
51762288
SH
1540 ret = drv->bdrv_truncate(bs, offset);
1541 if (ret == 0) {
1542 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 1543 bdrv_dev_resize_cb(bs);
51762288
SH
1544 }
1545 return ret;
83f64091
FB
1546}
1547
4a1d5e1f
FZ
1548/**
1549 * Length of a allocated file in bytes. Sparse files are counted by actual
1550 * allocated space. Return < 0 if error or unknown.
1551 */
1552int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1553{
1554 BlockDriver *drv = bs->drv;
1555 if (!drv) {
1556 return -ENOMEDIUM;
1557 }
1558 if (drv->bdrv_get_allocated_file_size) {
1559 return drv->bdrv_get_allocated_file_size(bs);
1560 }
1561 if (bs->file) {
1562 return bdrv_get_allocated_file_size(bs->file);
1563 }
1564 return -ENOTSUP;
1565}
1566
83f64091
FB
1567/**
1568 * Length of a file in bytes. Return < 0 if error or unknown.
1569 */
1570int64_t bdrv_getlength(BlockDriverState *bs)
1571{
1572 BlockDriver *drv = bs->drv;
1573 if (!drv)
19cb3738 1574 return -ENOMEDIUM;
51762288 1575
2c6942fa 1576 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
1577 if (drv->bdrv_getlength) {
1578 return drv->bdrv_getlength(bs);
1579 }
83f64091 1580 }
46a4e4e6 1581 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
1582}
1583
19cb3738 1584/* return 0 as number of sectors if no device present or error */
96b8f136 1585void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 1586{
19cb3738
FB
1587 int64_t length;
1588 length = bdrv_getlength(bs);
1589 if (length < 0)
1590 length = 0;
1591 else
6ea44308 1592 length = length >> BDRV_SECTOR_BITS;
19cb3738 1593 *nb_sectors_ptr = length;
fc01f7e7 1594}
cf98951b 1595
f3d54fc4
AL
1596struct partition {
1597 uint8_t boot_ind; /* 0x80 - active */
1598 uint8_t head; /* starting head */
1599 uint8_t sector; /* starting sector */
1600 uint8_t cyl; /* starting cylinder */
1601 uint8_t sys_ind; /* What partition type */
1602 uint8_t end_head; /* end head */
1603 uint8_t end_sector; /* end sector */
1604 uint8_t end_cyl; /* end cylinder */
1605 uint32_t start_sect; /* starting sector counting from 0 */
1606 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 1607} QEMU_PACKED;
f3d54fc4
AL
1608
1609/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1610static int guess_disk_lchs(BlockDriverState *bs,
1611 int *pcylinders, int *pheads, int *psectors)
1612{
eb5a3165 1613 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
1614 int ret, i, heads, sectors, cylinders;
1615 struct partition *p;
1616 uint32_t nr_sects;
a38131b6 1617 uint64_t nb_sectors;
f3d54fc4
AL
1618
1619 bdrv_get_geometry(bs, &nb_sectors);
1620
1621 ret = bdrv_read(bs, 0, buf, 1);
1622 if (ret < 0)
1623 return -1;
1624 /* test msdos magic */
1625 if (buf[510] != 0x55 || buf[511] != 0xaa)
1626 return -1;
1627 for(i = 0; i < 4; i++) {
1628 p = ((struct partition *)(buf + 0x1be)) + i;
1629 nr_sects = le32_to_cpu(p->nr_sects);
1630 if (nr_sects && p->end_head) {
1631 /* We make the assumption that the partition terminates on
1632 a cylinder boundary */
1633 heads = p->end_head + 1;
1634 sectors = p->end_sector & 63;
1635 if (sectors == 0)
1636 continue;
1637 cylinders = nb_sectors / (heads * sectors);
1638 if (cylinders < 1 || cylinders > 16383)
1639 continue;
1640 *pheads = heads;
1641 *psectors = sectors;
1642 *pcylinders = cylinders;
1643#if 0
1644 printf("guessed geometry: LCHS=%d %d %d\n",
1645 cylinders, heads, sectors);
1646#endif
1647 return 0;
1648 }
1649 }
1650 return -1;
1651}
1652
1653void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1654{
1655 int translation, lba_detected = 0;
1656 int cylinders, heads, secs;
a38131b6 1657 uint64_t nb_sectors;
f3d54fc4
AL
1658
1659 /* if a geometry hint is available, use it */
1660 bdrv_get_geometry(bs, &nb_sectors);
1661 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1662 translation = bdrv_get_translation_hint(bs);
1663 if (cylinders != 0) {
1664 *pcyls = cylinders;
1665 *pheads = heads;
1666 *psecs = secs;
1667 } else {
1668 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1669 if (heads > 16) {
1670 /* if heads > 16, it means that a BIOS LBA
1671 translation was active, so the default
1672 hardware geometry is OK */
1673 lba_detected = 1;
1674 goto default_geometry;
1675 } else {
1676 *pcyls = cylinders;
1677 *pheads = heads;
1678 *psecs = secs;
1679 /* disable any translation to be in sync with
1680 the logical geometry */
1681 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1682 bdrv_set_translation_hint(bs,
1683 BIOS_ATA_TRANSLATION_NONE);
1684 }
1685 }
1686 } else {
1687 default_geometry:
1688 /* if no geometry, use a standard physical disk geometry */
1689 cylinders = nb_sectors / (16 * 63);
1690
1691 if (cylinders > 16383)
1692 cylinders = 16383;
1693 else if (cylinders < 2)
1694 cylinders = 2;
1695 *pcyls = cylinders;
1696 *pheads = 16;
1697 *psecs = 63;
1698 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1699 if ((*pcyls * *pheads) <= 131072) {
1700 bdrv_set_translation_hint(bs,
1701 BIOS_ATA_TRANSLATION_LARGE);
1702 } else {
1703 bdrv_set_translation_hint(bs,
1704 BIOS_ATA_TRANSLATION_LBA);
1705 }
1706 }
1707 }
1708 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1709 }
1710}
1711
5fafdf24 1712void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
1713 int cyls, int heads, int secs)
1714{
1715 bs->cyls = cyls;
1716 bs->heads = heads;
1717 bs->secs = secs;
1718}
1719
46d4767d
FB
1720void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1721{
1722 bs->translation = translation;
1723}
1724
5fafdf24 1725void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
1726 int *pcyls, int *pheads, int *psecs)
1727{
1728 *pcyls = bs->cyls;
1729 *pheads = bs->heads;
1730 *psecs = bs->secs;
1731}
1732
0563e191
ZYW
1733/* throttling disk io limits */
1734void bdrv_set_io_limits(BlockDriverState *bs,
1735 BlockIOLimit *io_limits)
1736{
1737 bs->io_limits = *io_limits;
1738 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
1739}
1740
5bbdbb46
BS
1741/* Recognize floppy formats */
1742typedef struct FDFormat {
1743 FDriveType drive;
1744 uint8_t last_sect;
1745 uint8_t max_track;
1746 uint8_t max_head;
1747} FDFormat;
1748
1749static const FDFormat fd_formats[] = {
1750 /* First entry is default format */
1751 /* 1.44 MB 3"1/2 floppy disks */
1752 { FDRIVE_DRV_144, 18, 80, 1, },
1753 { FDRIVE_DRV_144, 20, 80, 1, },
1754 { FDRIVE_DRV_144, 21, 80, 1, },
1755 { FDRIVE_DRV_144, 21, 82, 1, },
1756 { FDRIVE_DRV_144, 21, 83, 1, },
1757 { FDRIVE_DRV_144, 22, 80, 1, },
1758 { FDRIVE_DRV_144, 23, 80, 1, },
1759 { FDRIVE_DRV_144, 24, 80, 1, },
1760 /* 2.88 MB 3"1/2 floppy disks */
1761 { FDRIVE_DRV_288, 36, 80, 1, },
1762 { FDRIVE_DRV_288, 39, 80, 1, },
1763 { FDRIVE_DRV_288, 40, 80, 1, },
1764 { FDRIVE_DRV_288, 44, 80, 1, },
1765 { FDRIVE_DRV_288, 48, 80, 1, },
1766 /* 720 kB 3"1/2 floppy disks */
1767 { FDRIVE_DRV_144, 9, 80, 1, },
1768 { FDRIVE_DRV_144, 10, 80, 1, },
1769 { FDRIVE_DRV_144, 10, 82, 1, },
1770 { FDRIVE_DRV_144, 10, 83, 1, },
1771 { FDRIVE_DRV_144, 13, 80, 1, },
1772 { FDRIVE_DRV_144, 14, 80, 1, },
1773 /* 1.2 MB 5"1/4 floppy disks */
1774 { FDRIVE_DRV_120, 15, 80, 1, },
1775 { FDRIVE_DRV_120, 18, 80, 1, },
1776 { FDRIVE_DRV_120, 18, 82, 1, },
1777 { FDRIVE_DRV_120, 18, 83, 1, },
1778 { FDRIVE_DRV_120, 20, 80, 1, },
1779 /* 720 kB 5"1/4 floppy disks */
1780 { FDRIVE_DRV_120, 9, 80, 1, },
1781 { FDRIVE_DRV_120, 11, 80, 1, },
1782 /* 360 kB 5"1/4 floppy disks */
1783 { FDRIVE_DRV_120, 9, 40, 1, },
1784 { FDRIVE_DRV_120, 9, 40, 0, },
1785 { FDRIVE_DRV_120, 10, 41, 1, },
1786 { FDRIVE_DRV_120, 10, 42, 1, },
1787 /* 320 kB 5"1/4 floppy disks */
1788 { FDRIVE_DRV_120, 8, 40, 1, },
1789 { FDRIVE_DRV_120, 8, 40, 0, },
1790 /* 360 kB must match 5"1/4 better than 3"1/2... */
1791 { FDRIVE_DRV_144, 9, 80, 0, },
1792 /* end */
1793 { FDRIVE_DRV_NONE, -1, -1, 0, },
1794};
1795
1796void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1797 int *max_track, int *last_sect,
1798 FDriveType drive_in, FDriveType *drive)
1799{
1800 const FDFormat *parse;
1801 uint64_t nb_sectors, size;
1802 int i, first_match, match;
1803
1804 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1805 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1806 /* User defined disk */
1807 } else {
1808 bdrv_get_geometry(bs, &nb_sectors);
1809 match = -1;
1810 first_match = -1;
1811 for (i = 0; ; i++) {
1812 parse = &fd_formats[i];
1813 if (parse->drive == FDRIVE_DRV_NONE) {
1814 break;
1815 }
1816 if (drive_in == parse->drive ||
1817 drive_in == FDRIVE_DRV_NONE) {
1818 size = (parse->max_head + 1) * parse->max_track *
1819 parse->last_sect;
1820 if (nb_sectors == size) {
1821 match = i;
1822 break;
1823 }
1824 if (first_match == -1) {
1825 first_match = i;
1826 }
1827 }
1828 }
1829 if (match == -1) {
1830 if (first_match == -1) {
1831 match = 1;
1832 } else {
1833 match = first_match;
1834 }
1835 parse = &fd_formats[match];
1836 }
1837 *nb_heads = parse->max_head + 1;
1838 *max_track = parse->max_track;
1839 *last_sect = parse->last_sect;
1840 *drive = parse->drive;
1841 }
1842}
1843
46d4767d
FB
1844int bdrv_get_translation_hint(BlockDriverState *bs)
1845{
1846 return bs->translation;
1847}
1848
abd7f68d
MA
1849void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1850 BlockErrorAction on_write_error)
1851{
1852 bs->on_read_error = on_read_error;
1853 bs->on_write_error = on_write_error;
1854}
1855
1856BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1857{
1858 return is_read ? bs->on_read_error : bs->on_write_error;
1859}
1860
b338082b
FB
1861int bdrv_is_read_only(BlockDriverState *bs)
1862{
1863 return bs->read_only;
1864}
1865
985a03b0
TS
1866int bdrv_is_sg(BlockDriverState *bs)
1867{
1868 return bs->sg;
1869}
1870
e900a7b7
CH
1871int bdrv_enable_write_cache(BlockDriverState *bs)
1872{
1873 return bs->enable_write_cache;
1874}
1875
ea2384d3
FB
1876int bdrv_is_encrypted(BlockDriverState *bs)
1877{
1878 if (bs->backing_hd && bs->backing_hd->encrypted)
1879 return 1;
1880 return bs->encrypted;
1881}
1882
c0f4ce77
AL
1883int bdrv_key_required(BlockDriverState *bs)
1884{
1885 BlockDriverState *backing_hd = bs->backing_hd;
1886
1887 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1888 return 1;
1889 return (bs->encrypted && !bs->valid_key);
1890}
1891
ea2384d3
FB
1892int bdrv_set_key(BlockDriverState *bs, const char *key)
1893{
1894 int ret;
1895 if (bs->backing_hd && bs->backing_hd->encrypted) {
1896 ret = bdrv_set_key(bs->backing_hd, key);
1897 if (ret < 0)
1898 return ret;
1899 if (!bs->encrypted)
1900 return 0;
1901 }
fd04a2ae
SH
1902 if (!bs->encrypted) {
1903 return -EINVAL;
1904 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1905 return -ENOMEDIUM;
1906 }
c0f4ce77 1907 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
1908 if (ret < 0) {
1909 bs->valid_key = 0;
1910 } else if (!bs->valid_key) {
1911 bs->valid_key = 1;
1912 /* call the change callback now, we skipped it on open */
7d4b4ba5 1913 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 1914 }
c0f4ce77 1915 return ret;
ea2384d3
FB
1916}
1917
1918void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1919{
19cb3738 1920 if (!bs->drv) {
ea2384d3
FB
1921 buf[0] = '\0';
1922 } else {
1923 pstrcpy(buf, buf_size, bs->drv->format_name);
1924 }
1925}
1926
5fafdf24 1927void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
1928 void *opaque)
1929{
1930 BlockDriver *drv;
1931
8a22f02a 1932 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
1933 it(opaque, drv->format_name);
1934 }
1935}
1936
b338082b
FB
1937BlockDriverState *bdrv_find(const char *name)
1938{
1939 BlockDriverState *bs;
1940
1b7bdbc1
SH
1941 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1942 if (!strcmp(name, bs->device_name)) {
b338082b 1943 return bs;
1b7bdbc1 1944 }
b338082b
FB
1945 }
1946 return NULL;
1947}
1948
2f399b0a
MA
1949BlockDriverState *bdrv_next(BlockDriverState *bs)
1950{
1951 if (!bs) {
1952 return QTAILQ_FIRST(&bdrv_states);
1953 }
1954 return QTAILQ_NEXT(bs, list);
1955}
1956
51de9760 1957void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
1958{
1959 BlockDriverState *bs;
1960
1b7bdbc1 1961 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 1962 it(opaque, bs);
81d0912d
FB
1963 }
1964}
1965
ea2384d3
FB
1966const char *bdrv_get_device_name(BlockDriverState *bs)
1967{
1968 return bs->device_name;
1969}
1970
c6ca28d6
AL
1971void bdrv_flush_all(void)
1972{
1973 BlockDriverState *bs;
1974
1b7bdbc1 1975 QTAILQ_FOREACH(bs, &bdrv_states, list) {
c602a489 1976 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
c6ca28d6 1977 bdrv_flush(bs);
1b7bdbc1
SH
1978 }
1979 }
c6ca28d6
AL
1980}
1981
f2feebbd
KW
1982int bdrv_has_zero_init(BlockDriverState *bs)
1983{
1984 assert(bs->drv);
1985
336c1c12
KW
1986 if (bs->drv->bdrv_has_zero_init) {
1987 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
1988 }
1989
1990 return 1;
1991}
1992
376ae3f1
SH
1993typedef struct BdrvCoIsAllocatedData {
1994 BlockDriverState *bs;
1995 int64_t sector_num;
1996 int nb_sectors;
1997 int *pnum;
1998 int ret;
1999 bool done;
2000} BdrvCoIsAllocatedData;
2001
f58c7b35
TS
2002/*
2003 * Returns true iff the specified sector is present in the disk image. Drivers
2004 * not implementing the functionality are assumed to not support backing files,
2005 * hence all their sectors are reported as allocated.
2006 *
2007 * 'pnum' is set to the number of sectors (including and immediately following
2008 * the specified sector) that are known to be in the same
2009 * allocated/unallocated state.
2010 *
2011 * 'nb_sectors' is the max value 'pnum' should be set to.
2012 */
060f51c9
SH
2013int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2014 int nb_sectors, int *pnum)
f58c7b35 2015{
6aebab14
SH
2016 if (!bs->drv->bdrv_co_is_allocated) {
2017 int64_t n;
f58c7b35
TS
2018 if (sector_num >= bs->total_sectors) {
2019 *pnum = 0;
2020 return 0;
2021 }
2022 n = bs->total_sectors - sector_num;
2023 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
2024 return 1;
2025 }
6aebab14 2026
060f51c9
SH
2027 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2028}
2029
2030/* Coroutine wrapper for bdrv_is_allocated() */
2031static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2032{
2033 BdrvCoIsAllocatedData *data = opaque;
2034 BlockDriverState *bs = data->bs;
2035
2036 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2037 data->pnum);
2038 data->done = true;
2039}
2040
2041/*
2042 * Synchronous wrapper around bdrv_co_is_allocated().
2043 *
2044 * See bdrv_co_is_allocated() for details.
2045 */
2046int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2047 int *pnum)
2048{
6aebab14
SH
2049 Coroutine *co;
2050 BdrvCoIsAllocatedData data = {
2051 .bs = bs,
2052 .sector_num = sector_num,
2053 .nb_sectors = nb_sectors,
2054 .pnum = pnum,
2055 .done = false,
2056 };
2057
2058 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2059 qemu_coroutine_enter(co, &data);
2060 while (!data.done) {
2061 qemu_aio_wait();
2062 }
2063 return data.ret;
f58c7b35
TS
2064}
2065
2582bfed
LC
2066void bdrv_mon_event(const BlockDriverState *bdrv,
2067 BlockMonEventAction action, int is_read)
2068{
2069 QObject *data;
2070 const char *action_str;
2071
2072 switch (action) {
2073 case BDRV_ACTION_REPORT:
2074 action_str = "report";
2075 break;
2076 case BDRV_ACTION_IGNORE:
2077 action_str = "ignore";
2078 break;
2079 case BDRV_ACTION_STOP:
2080 action_str = "stop";
2081 break;
2082 default:
2083 abort();
2084 }
2085
2086 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2087 bdrv->device_name,
2088 action_str,
2089 is_read ? "read" : "write");
2090 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
2091
2092 qobject_decref(data);
2093}
2094
b2023818 2095BlockInfoList *qmp_query_block(Error **errp)
b338082b 2096{
b2023818 2097 BlockInfoList *head = NULL, *cur_item = NULL;
b338082b
FB
2098 BlockDriverState *bs;
2099
1b7bdbc1 2100 QTAILQ_FOREACH(bs, &bdrv_states, list) {
b2023818 2101 BlockInfoList *info = g_malloc0(sizeof(*info));
d15e5465 2102
b2023818
LC
2103 info->value = g_malloc0(sizeof(*info->value));
2104 info->value->device = g_strdup(bs->device_name);
2105 info->value->type = g_strdup("unknown");
2106 info->value->locked = bdrv_dev_is_medium_locked(bs);
2107 info->value->removable = bdrv_dev_has_removable_media(bs);
d15e5465 2108
e4def80b 2109 if (bdrv_dev_has_removable_media(bs)) {
b2023818
LC
2110 info->value->has_tray_open = true;
2111 info->value->tray_open = bdrv_dev_is_tray_open(bs);
e4def80b 2112 }
f04ef601
LC
2113
2114 if (bdrv_iostatus_is_enabled(bs)) {
b2023818
LC
2115 info->value->has_io_status = true;
2116 info->value->io_status = bs->iostatus;
f04ef601
LC
2117 }
2118
19cb3738 2119 if (bs->drv) {
b2023818
LC
2120 info->value->has_inserted = true;
2121 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2122 info->value->inserted->file = g_strdup(bs->filename);
2123 info->value->inserted->ro = bs->read_only;
2124 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2125 info->value->inserted->encrypted = bs->encrypted;
2126 if (bs->backing_file[0]) {
2127 info->value->inserted->has_backing_file = true;
2128 info->value->inserted->backing_file = g_strdup(bs->backing_file);
376253ec 2129 }
727f005e
ZYW
2130
2131 if (bs->io_limits_enabled) {
2132 info->value->inserted->bps =
2133 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2134 info->value->inserted->bps_rd =
2135 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2136 info->value->inserted->bps_wr =
2137 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2138 info->value->inserted->iops =
2139 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2140 info->value->inserted->iops_rd =
2141 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2142 info->value->inserted->iops_wr =
2143 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2144 }
b2023818 2145 }
d15e5465 2146
b2023818
LC
2147 /* XXX: waiting for the qapi to support GSList */
2148 if (!cur_item) {
2149 head = cur_item = info;
2150 } else {
2151 cur_item->next = info;
2152 cur_item = info;
b338082b 2153 }
b338082b 2154 }
d15e5465 2155
b2023818 2156 return head;
b338082b 2157}
a36e69dd 2158
f11f57e4
LC
2159/* Consider exposing this as a full fledged QMP command */
2160static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
2161{
2162 BlockStats *s;
2163
2164 s = g_malloc0(sizeof(*s));
2165
2166 if (bs->device_name[0]) {
2167 s->has_device = true;
2168 s->device = g_strdup(bs->device_name);
294cc35f
KW
2169 }
2170
f11f57e4
LC
2171 s->stats = g_malloc0(sizeof(*s->stats));
2172 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2173 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2174 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2175 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2176 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2177 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2178 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2179 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2180 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2181
294cc35f 2182 if (bs->file) {
f11f57e4
LC
2183 s->has_parent = true;
2184 s->parent = qmp_query_blockstat(bs->file, NULL);
294cc35f
KW
2185 }
2186
f11f57e4 2187 return s;
294cc35f
KW
2188}
2189
f11f57e4 2190BlockStatsList *qmp_query_blockstats(Error **errp)
218a536a 2191{
f11f57e4 2192 BlockStatsList *head = NULL, *cur_item = NULL;
a36e69dd
TS
2193 BlockDriverState *bs;
2194
1b7bdbc1 2195 QTAILQ_FOREACH(bs, &bdrv_states, list) {
f11f57e4
LC
2196 BlockStatsList *info = g_malloc0(sizeof(*info));
2197 info->value = qmp_query_blockstat(bs, NULL);
2198
2199 /* XXX: waiting for the qapi to support GSList */
2200 if (!cur_item) {
2201 head = cur_item = info;
2202 } else {
2203 cur_item->next = info;
2204 cur_item = info;
2205 }
a36e69dd 2206 }
218a536a 2207
f11f57e4 2208 return head;
a36e69dd 2209}
ea2384d3 2210
045df330
AL
2211const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2212{
2213 if (bs->backing_hd && bs->backing_hd->encrypted)
2214 return bs->backing_file;
2215 else if (bs->encrypted)
2216 return bs->filename;
2217 else
2218 return NULL;
2219}
2220
5fafdf24 2221void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
2222 char *filename, int filename_size)
2223{
3574c608 2224 pstrcpy(filename, filename_size, bs->backing_file);
83f64091
FB
2225}
2226
5fafdf24 2227int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
2228 const uint8_t *buf, int nb_sectors)
2229{
2230 BlockDriver *drv = bs->drv;
2231 if (!drv)
19cb3738 2232 return -ENOMEDIUM;
faea38e7
FB
2233 if (!drv->bdrv_write_compressed)
2234 return -ENOTSUP;
fbb7b4e0
KW
2235 if (bdrv_check_request(bs, sector_num, nb_sectors))
2236 return -EIO;
a55eb92c 2237
c6d22830 2238 if (bs->dirty_bitmap) {
7cd1e32a 2239 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2240 }
a55eb92c 2241
faea38e7
FB
2242 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2243}
3b46e624 2244
faea38e7
FB
2245int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2246{
2247 BlockDriver *drv = bs->drv;
2248 if (!drv)
19cb3738 2249 return -ENOMEDIUM;
faea38e7
FB
2250 if (!drv->bdrv_get_info)
2251 return -ENOTSUP;
2252 memset(bdi, 0, sizeof(*bdi));
2253 return drv->bdrv_get_info(bs, bdi);
2254}
2255
45566e9c
CH
2256int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2257 int64_t pos, int size)
178e08a5
AL
2258{
2259 BlockDriver *drv = bs->drv;
2260 if (!drv)
2261 return -ENOMEDIUM;
7cdb1f6d
MK
2262 if (drv->bdrv_save_vmstate)
2263 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2264 if (bs->file)
2265 return bdrv_save_vmstate(bs->file, buf, pos, size);
2266 return -ENOTSUP;
178e08a5
AL
2267}
2268
45566e9c
CH
2269int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2270 int64_t pos, int size)
178e08a5
AL
2271{
2272 BlockDriver *drv = bs->drv;
2273 if (!drv)
2274 return -ENOMEDIUM;
7cdb1f6d
MK
2275 if (drv->bdrv_load_vmstate)
2276 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2277 if (bs->file)
2278 return bdrv_load_vmstate(bs->file, buf, pos, size);
2279 return -ENOTSUP;
178e08a5
AL
2280}
2281
8b9b0cc2
KW
2282void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2283{
2284 BlockDriver *drv = bs->drv;
2285
2286 if (!drv || !drv->bdrv_debug_event) {
2287 return;
2288 }
2289
2290 return drv->bdrv_debug_event(bs, event);
2291
2292}
2293
faea38e7
FB
2294/**************************************************************/
2295/* handling of snapshots */
2296
feeee5ac
MDCF
2297int bdrv_can_snapshot(BlockDriverState *bs)
2298{
2299 BlockDriver *drv = bs->drv;
07b70bfb 2300 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2301 return 0;
2302 }
2303
2304 if (!drv->bdrv_snapshot_create) {
2305 if (bs->file != NULL) {
2306 return bdrv_can_snapshot(bs->file);
2307 }
2308 return 0;
2309 }
2310
2311 return 1;
2312}
2313
199630b6
BS
2314int bdrv_is_snapshot(BlockDriverState *bs)
2315{
2316 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2317}
2318
f9092b10
MA
2319BlockDriverState *bdrv_snapshots(void)
2320{
2321 BlockDriverState *bs;
2322
3ac906f7 2323 if (bs_snapshots) {
f9092b10 2324 return bs_snapshots;
3ac906f7 2325 }
f9092b10
MA
2326
2327 bs = NULL;
2328 while ((bs = bdrv_next(bs))) {
2329 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2330 bs_snapshots = bs;
2331 return bs;
f9092b10
MA
2332 }
2333 }
2334 return NULL;
f9092b10
MA
2335}
2336
5fafdf24 2337int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2338 QEMUSnapshotInfo *sn_info)
2339{
2340 BlockDriver *drv = bs->drv;
2341 if (!drv)
19cb3738 2342 return -ENOMEDIUM;
7cdb1f6d
MK
2343 if (drv->bdrv_snapshot_create)
2344 return drv->bdrv_snapshot_create(bs, sn_info);
2345 if (bs->file)
2346 return bdrv_snapshot_create(bs->file, sn_info);
2347 return -ENOTSUP;
faea38e7
FB
2348}
2349
5fafdf24 2350int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2351 const char *snapshot_id)
2352{
2353 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2354 int ret, open_ret;
2355
faea38e7 2356 if (!drv)
19cb3738 2357 return -ENOMEDIUM;
7cdb1f6d
MK
2358 if (drv->bdrv_snapshot_goto)
2359 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2360
2361 if (bs->file) {
2362 drv->bdrv_close(bs);
2363 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2364 open_ret = drv->bdrv_open(bs, bs->open_flags);
2365 if (open_ret < 0) {
2366 bdrv_delete(bs->file);
2367 bs->drv = NULL;
2368 return open_ret;
2369 }
2370 return ret;
2371 }
2372
2373 return -ENOTSUP;
faea38e7
FB
2374}
2375
2376int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2377{
2378 BlockDriver *drv = bs->drv;
2379 if (!drv)
19cb3738 2380 return -ENOMEDIUM;
7cdb1f6d
MK
2381 if (drv->bdrv_snapshot_delete)
2382 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2383 if (bs->file)
2384 return bdrv_snapshot_delete(bs->file, snapshot_id);
2385 return -ENOTSUP;
faea38e7
FB
2386}
2387
5fafdf24 2388int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2389 QEMUSnapshotInfo **psn_info)
2390{
2391 BlockDriver *drv = bs->drv;
2392 if (!drv)
19cb3738 2393 return -ENOMEDIUM;
7cdb1f6d
MK
2394 if (drv->bdrv_snapshot_list)
2395 return drv->bdrv_snapshot_list(bs, psn_info);
2396 if (bs->file)
2397 return bdrv_snapshot_list(bs->file, psn_info);
2398 return -ENOTSUP;
faea38e7
FB
2399}
2400
51ef6727 2401int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2402 const char *snapshot_name)
2403{
2404 BlockDriver *drv = bs->drv;
2405 if (!drv) {
2406 return -ENOMEDIUM;
2407 }
2408 if (!bs->read_only) {
2409 return -EINVAL;
2410 }
2411 if (drv->bdrv_snapshot_load_tmp) {
2412 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2413 }
2414 return -ENOTSUP;
2415}
2416
faea38e7
FB
2417#define NB_SUFFIXES 4
2418
2419char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2420{
2421 static const char suffixes[NB_SUFFIXES] = "KMGT";
2422 int64_t base;
2423 int i;
2424
2425 if (size <= 999) {
2426 snprintf(buf, buf_size, "%" PRId64, size);
2427 } else {
2428 base = 1024;
2429 for(i = 0; i < NB_SUFFIXES; i++) {
2430 if (size < (10 * base)) {
5fafdf24 2431 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
2432 (double)size / base,
2433 suffixes[i]);
2434 break;
2435 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 2436 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
2437 ((size + (base >> 1)) / base),
2438 suffixes[i]);
2439 break;
2440 }
2441 base = base * 1024;
2442 }
2443 }
2444 return buf;
2445}
2446
2447char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2448{
2449 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
2450#ifdef _WIN32
2451 struct tm *ptm;
2452#else
faea38e7 2453 struct tm tm;
3b9f94e1 2454#endif
faea38e7
FB
2455 time_t ti;
2456 int64_t secs;
2457
2458 if (!sn) {
5fafdf24
TS
2459 snprintf(buf, buf_size,
2460 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2461 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2462 } else {
2463 ti = sn->date_sec;
3b9f94e1
FB
2464#ifdef _WIN32
2465 ptm = localtime(&ti);
2466 strftime(date_buf, sizeof(date_buf),
2467 "%Y-%m-%d %H:%M:%S", ptm);
2468#else
faea38e7
FB
2469 localtime_r(&ti, &tm);
2470 strftime(date_buf, sizeof(date_buf),
2471 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 2472#endif
faea38e7
FB
2473 secs = sn->vm_clock_nsec / 1000000000;
2474 snprintf(clock_buf, sizeof(clock_buf),
2475 "%02d:%02d:%02d.%03d",
2476 (int)(secs / 3600),
2477 (int)((secs / 60) % 60),
5fafdf24 2478 (int)(secs % 60),
faea38e7
FB
2479 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2480 snprintf(buf, buf_size,
5fafdf24 2481 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2482 sn->id_str, sn->name,
2483 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2484 date_buf,
2485 clock_buf);
2486 }
2487 return buf;
2488}
2489
ea2384d3 2490/**************************************************************/
83f64091 2491/* async I/Os */
ea2384d3 2492
3b69e4b9 2493BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 2494 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 2495 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 2496{
bbf0a440
SH
2497 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2498
b2a61371 2499 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2500 cb, opaque, false);
ea2384d3
FB
2501}
2502
f141eafe
AL
2503BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2504 QEMUIOVector *qiov, int nb_sectors,
2505 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 2506{
bbf0a440
SH
2507 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2508
1a6e115b 2509 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2510 cb, opaque, true);
83f64091
FB
2511}
2512
40b4f539
KW
2513
2514typedef struct MultiwriteCB {
2515 int error;
2516 int num_requests;
2517 int num_callbacks;
2518 struct {
2519 BlockDriverCompletionFunc *cb;
2520 void *opaque;
2521 QEMUIOVector *free_qiov;
2522 void *free_buf;
2523 } callbacks[];
2524} MultiwriteCB;
2525
2526static void multiwrite_user_cb(MultiwriteCB *mcb)
2527{
2528 int i;
2529
2530 for (i = 0; i < mcb->num_callbacks; i++) {
2531 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
2532 if (mcb->callbacks[i].free_qiov) {
2533 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2534 }
7267c094 2535 g_free(mcb->callbacks[i].free_qiov);
f8a83245 2536 qemu_vfree(mcb->callbacks[i].free_buf);
40b4f539
KW
2537 }
2538}
2539
2540static void multiwrite_cb(void *opaque, int ret)
2541{
2542 MultiwriteCB *mcb = opaque;
2543
6d519a5f
SH
2544 trace_multiwrite_cb(mcb, ret);
2545
cb6d3ca0 2546 if (ret < 0 && !mcb->error) {
40b4f539 2547 mcb->error = ret;
40b4f539
KW
2548 }
2549
2550 mcb->num_requests--;
2551 if (mcb->num_requests == 0) {
de189a1b 2552 multiwrite_user_cb(mcb);
7267c094 2553 g_free(mcb);
40b4f539
KW
2554 }
2555}
2556
2557static int multiwrite_req_compare(const void *a, const void *b)
2558{
77be4366
CH
2559 const BlockRequest *req1 = a, *req2 = b;
2560
2561 /*
2562 * Note that we can't simply subtract req2->sector from req1->sector
2563 * here as that could overflow the return value.
2564 */
2565 if (req1->sector > req2->sector) {
2566 return 1;
2567 } else if (req1->sector < req2->sector) {
2568 return -1;
2569 } else {
2570 return 0;
2571 }
40b4f539
KW
2572}
2573
2574/*
2575 * Takes a bunch of requests and tries to merge them. Returns the number of
2576 * requests that remain after merging.
2577 */
2578static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2579 int num_reqs, MultiwriteCB *mcb)
2580{
2581 int i, outidx;
2582
2583 // Sort requests by start sector
2584 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2585
2586 // Check if adjacent requests touch the same clusters. If so, combine them,
2587 // filling up gaps with zero sectors.
2588 outidx = 0;
2589 for (i = 1; i < num_reqs; i++) {
2590 int merge = 0;
2591 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2592
2593 // This handles the cases that are valid for all block drivers, namely
2594 // exactly sequential writes and overlapping writes.
2595 if (reqs[i].sector <= oldreq_last) {
2596 merge = 1;
2597 }
2598
2599 // The block driver may decide that it makes sense to combine requests
2600 // even if there is a gap of some sectors between them. In this case,
2601 // the gap is filled with zeros (therefore only applicable for yet
2602 // unused space in format like qcow2).
2603 if (!merge && bs->drv->bdrv_merge_requests) {
2604 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2605 }
2606
e2a305fb
CH
2607 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2608 merge = 0;
2609 }
2610
40b4f539
KW
2611 if (merge) {
2612 size_t size;
7267c094 2613 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
2614 qemu_iovec_init(qiov,
2615 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2616
2617 // Add the first request to the merged one. If the requests are
2618 // overlapping, drop the last sectors of the first request.
2619 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2620 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2621
2622 // We might need to add some zeros between the two requests
2623 if (reqs[i].sector > oldreq_last) {
2624 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2625 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2626 memset(buf, 0, zero_bytes);
2627 qemu_iovec_add(qiov, buf, zero_bytes);
2628 mcb->callbacks[i].free_buf = buf;
2629 }
2630
2631 // Add the second request
2632 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2633
cbf1dff2 2634 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
2635 reqs[outidx].qiov = qiov;
2636
2637 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2638 } else {
2639 outidx++;
2640 reqs[outidx].sector = reqs[i].sector;
2641 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2642 reqs[outidx].qiov = reqs[i].qiov;
2643 }
2644 }
2645
2646 return outidx + 1;
2647}
2648
2649/*
2650 * Submit multiple AIO write requests at once.
2651 *
2652 * On success, the function returns 0 and all requests in the reqs array have
2653 * been submitted. In error case this function returns -1, and any of the
2654 * requests may or may not be submitted yet. In particular, this means that the
2655 * callback will be called for some of the requests, for others it won't. The
2656 * caller must check the error field of the BlockRequest to wait for the right
2657 * callbacks (if error != 0, no callback will be called).
2658 *
2659 * The implementation may modify the contents of the reqs array, e.g. to merge
2660 * requests. However, the fields opaque and error are left unmodified as they
2661 * are used to signal failure for a single request to the caller.
2662 */
2663int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2664{
2665 BlockDriverAIOCB *acb;
2666 MultiwriteCB *mcb;
2667 int i;
2668
301db7c2
RH
2669 /* don't submit writes if we don't have a medium */
2670 if (bs->drv == NULL) {
2671 for (i = 0; i < num_reqs; i++) {
2672 reqs[i].error = -ENOMEDIUM;
2673 }
2674 return -1;
2675 }
2676
40b4f539
KW
2677 if (num_reqs == 0) {
2678 return 0;
2679 }
2680
2681 // Create MultiwriteCB structure
7267c094 2682 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
2683 mcb->num_requests = 0;
2684 mcb->num_callbacks = num_reqs;
2685
2686 for (i = 0; i < num_reqs; i++) {
2687 mcb->callbacks[i].cb = reqs[i].cb;
2688 mcb->callbacks[i].opaque = reqs[i].opaque;
2689 }
2690
2691 // Check for mergable requests
2692 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2693
6d519a5f
SH
2694 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2695
453f9a16
KW
2696 /*
2697 * Run the aio requests. As soon as one request can't be submitted
2698 * successfully, fail all requests that are not yet submitted (we must
2699 * return failure for all requests anyway)
2700 *
2701 * num_requests cannot be set to the right value immediately: If
2702 * bdrv_aio_writev fails for some request, num_requests would be too high
2703 * and therefore multiwrite_cb() would never recognize the multiwrite
2704 * request as completed. We also cannot use the loop variable i to set it
2705 * when the first request fails because the callback may already have been
2706 * called for previously submitted requests. Thus, num_requests must be
2707 * incremented for each request that is submitted.
2708 *
2709 * The problem that callbacks may be called early also means that we need
2710 * to take care that num_requests doesn't become 0 before all requests are
2711 * submitted - multiwrite_cb() would consider the multiwrite request
2712 * completed. A dummy request that is "completed" by a manual call to
2713 * multiwrite_cb() takes care of this.
2714 */
2715 mcb->num_requests = 1;
2716
6d519a5f 2717 // Run the aio requests
40b4f539 2718 for (i = 0; i < num_reqs; i++) {
453f9a16 2719 mcb->num_requests++;
40b4f539
KW
2720 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2721 reqs[i].nb_sectors, multiwrite_cb, mcb);
2722
2723 if (acb == NULL) {
2724 // We can only fail the whole thing if no request has been
2725 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2726 // complete and report the error in the callback.
453f9a16 2727 if (i == 0) {
6d519a5f 2728 trace_bdrv_aio_multiwrite_earlyfail(mcb);
40b4f539
KW
2729 goto fail;
2730 } else {
6d519a5f 2731 trace_bdrv_aio_multiwrite_latefail(mcb, i);
7eb58a6c 2732 multiwrite_cb(mcb, -EIO);
40b4f539
KW
2733 break;
2734 }
40b4f539
KW
2735 }
2736 }
2737
453f9a16
KW
2738 /* Complete the dummy request */
2739 multiwrite_cb(mcb, 0);
2740
40b4f539
KW
2741 return 0;
2742
2743fail:
453f9a16
KW
2744 for (i = 0; i < mcb->num_callbacks; i++) {
2745 reqs[i].error = -EIO;
2746 }
7267c094 2747 g_free(mcb);
40b4f539
KW
2748 return -1;
2749}
2750
83f64091 2751void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 2752{
6bbff9a0 2753 acb->pool->cancel(acb);
83f64091
FB
2754}
2755
98f90dba
ZYW
2756/* block I/O throttling */
2757static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
2758 bool is_write, double elapsed_time, uint64_t *wait)
2759{
2760 uint64_t bps_limit = 0;
2761 double bytes_limit, bytes_base, bytes_res;
2762 double slice_time, wait_time;
2763
2764 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2765 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2766 } else if (bs->io_limits.bps[is_write]) {
2767 bps_limit = bs->io_limits.bps[is_write];
2768 } else {
2769 if (wait) {
2770 *wait = 0;
2771 }
2772
2773 return false;
2774 }
2775
2776 slice_time = bs->slice_end - bs->slice_start;
2777 slice_time /= (NANOSECONDS_PER_SECOND);
2778 bytes_limit = bps_limit * slice_time;
2779 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
2780 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2781 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
2782 }
2783
2784 /* bytes_base: the bytes of data which have been read/written; and
2785 * it is obtained from the history statistic info.
2786 * bytes_res: the remaining bytes of data which need to be read/written.
2787 * (bytes_base + bytes_res) / bps_limit: used to calcuate
2788 * the total time for completing reading/writting all data.
2789 */
2790 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2791
2792 if (bytes_base + bytes_res <= bytes_limit) {
2793 if (wait) {
2794 *wait = 0;
2795 }
2796
2797 return false;
2798 }
2799
2800 /* Calc approx time to dispatch */
2801 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
2802
2803 /* When the I/O rate at runtime exceeds the limits,
2804 * bs->slice_end need to be extended in order that the current statistic
2805 * info can be kept until the timer fire, so it is increased and tuned
2806 * based on the result of experiment.
2807 */
2808 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2809 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2810 if (wait) {
2811 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2812 }
2813
2814 return true;
2815}
2816
2817static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
2818 double elapsed_time, uint64_t *wait)
2819{
2820 uint64_t iops_limit = 0;
2821 double ios_limit, ios_base;
2822 double slice_time, wait_time;
2823
2824 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2825 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2826 } else if (bs->io_limits.iops[is_write]) {
2827 iops_limit = bs->io_limits.iops[is_write];
2828 } else {
2829 if (wait) {
2830 *wait = 0;
2831 }
2832
2833 return false;
2834 }
2835
2836 slice_time = bs->slice_end - bs->slice_start;
2837 slice_time /= (NANOSECONDS_PER_SECOND);
2838 ios_limit = iops_limit * slice_time;
2839 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
2840 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2841 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
2842 }
2843
2844 if (ios_base + 1 <= ios_limit) {
2845 if (wait) {
2846 *wait = 0;
2847 }
2848
2849 return false;
2850 }
2851
2852 /* Calc approx time to dispatch */
2853 wait_time = (ios_base + 1) / iops_limit;
2854 if (wait_time > elapsed_time) {
2855 wait_time = wait_time - elapsed_time;
2856 } else {
2857 wait_time = 0;
2858 }
2859
2860 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2861 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2862 if (wait) {
2863 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2864 }
2865
2866 return true;
2867}
2868
2869static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
2870 bool is_write, int64_t *wait)
2871{
2872 int64_t now, max_wait;
2873 uint64_t bps_wait = 0, iops_wait = 0;
2874 double elapsed_time;
2875 int bps_ret, iops_ret;
2876
2877 now = qemu_get_clock_ns(vm_clock);
2878 if ((bs->slice_start < now)
2879 && (bs->slice_end > now)) {
2880 bs->slice_end = now + bs->slice_time;
2881 } else {
2882 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
2883 bs->slice_start = now;
2884 bs->slice_end = now + bs->slice_time;
2885
2886 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
2887 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
2888
2889 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
2890 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
2891 }
2892
2893 elapsed_time = now - bs->slice_start;
2894 elapsed_time /= (NANOSECONDS_PER_SECOND);
2895
2896 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
2897 is_write, elapsed_time, &bps_wait);
2898 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
2899 elapsed_time, &iops_wait);
2900 if (bps_ret || iops_ret) {
2901 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
2902 if (wait) {
2903 *wait = max_wait;
2904 }
2905
2906 now = qemu_get_clock_ns(vm_clock);
2907 if (bs->slice_end < now + max_wait) {
2908 bs->slice_end = now + max_wait;
2909 }
2910
2911 return true;
2912 }
2913
2914 if (wait) {
2915 *wait = 0;
2916 }
2917
2918 return false;
2919}
ce1a14dc 2920
83f64091
FB
2921/**************************************************************/
2922/* async block device emulation */
2923
c16b5a2c
CH
2924typedef struct BlockDriverAIOCBSync {
2925 BlockDriverAIOCB common;
2926 QEMUBH *bh;
2927 int ret;
2928 /* vector translation state */
2929 QEMUIOVector *qiov;
2930 uint8_t *bounce;
2931 int is_write;
2932} BlockDriverAIOCBSync;
2933
2934static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2935{
b666d239
KW
2936 BlockDriverAIOCBSync *acb =
2937 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 2938 qemu_bh_delete(acb->bh);
36afc451 2939 acb->bh = NULL;
c16b5a2c
CH
2940 qemu_aio_release(acb);
2941}
2942
2943static AIOPool bdrv_em_aio_pool = {
2944 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2945 .cancel = bdrv_aio_cancel_em,
2946};
2947
ce1a14dc 2948static void bdrv_aio_bh_cb(void *opaque)
83f64091 2949{
ce1a14dc 2950 BlockDriverAIOCBSync *acb = opaque;
f141eafe 2951
f141eafe
AL
2952 if (!acb->is_write)
2953 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 2954 qemu_vfree(acb->bounce);
ce1a14dc 2955 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 2956 qemu_bh_delete(acb->bh);
36afc451 2957 acb->bh = NULL;
ce1a14dc 2958 qemu_aio_release(acb);
83f64091 2959}
beac80cd 2960
f141eafe
AL
2961static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2962 int64_t sector_num,
2963 QEMUIOVector *qiov,
2964 int nb_sectors,
2965 BlockDriverCompletionFunc *cb,
2966 void *opaque,
2967 int is_write)
2968
83f64091 2969{
ce1a14dc 2970 BlockDriverAIOCBSync *acb;
ce1a14dc 2971
c16b5a2c 2972 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
2973 acb->is_write = is_write;
2974 acb->qiov = qiov;
e268ca52 2975 acb->bounce = qemu_blockalign(bs, qiov->size);
f141eafe 2976
ce1a14dc
PB
2977 if (!acb->bh)
2978 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
2979
2980 if (is_write) {
2981 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1ed20acf 2982 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 2983 } else {
1ed20acf 2984 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
2985 }
2986
ce1a14dc 2987 qemu_bh_schedule(acb->bh);
f141eafe 2988
ce1a14dc 2989 return &acb->common;
beac80cd
FB
2990}
2991
f141eafe
AL
2992static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2993 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 2994 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 2995{
f141eafe
AL
2996 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2997}
83f64091 2998
f141eafe
AL
2999static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3000 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3001 BlockDriverCompletionFunc *cb, void *opaque)
3002{
3003 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 3004}
beac80cd 3005
68485420
KW
3006
3007typedef struct BlockDriverAIOCBCoroutine {
3008 BlockDriverAIOCB common;
3009 BlockRequest req;
3010 bool is_write;
3011 QEMUBH* bh;
3012} BlockDriverAIOCBCoroutine;
3013
3014static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3015{
3016 qemu_aio_flush();
3017}
3018
3019static AIOPool bdrv_em_co_aio_pool = {
3020 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3021 .cancel = bdrv_aio_co_cancel_em,
3022};
3023
35246a68 3024static void bdrv_co_em_bh(void *opaque)
68485420
KW
3025{
3026 BlockDriverAIOCBCoroutine *acb = opaque;
3027
3028 acb->common.cb(acb->common.opaque, acb->req.error);
3029 qemu_bh_delete(acb->bh);
3030 qemu_aio_release(acb);
3031}
3032
b2a61371
SH
3033/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3034static void coroutine_fn bdrv_co_do_rw(void *opaque)
3035{
3036 BlockDriverAIOCBCoroutine *acb = opaque;
3037 BlockDriverState *bs = acb->common.bs;
3038
3039 if (!acb->is_write) {
3040 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
3041 acb->req.nb_sectors, acb->req.qiov);
3042 } else {
3043 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
3044 acb->req.nb_sectors, acb->req.qiov);
3045 }
3046
35246a68 3047 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2a61371
SH
3048 qemu_bh_schedule(acb->bh);
3049}
3050
68485420
KW
3051static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3052 int64_t sector_num,
3053 QEMUIOVector *qiov,
3054 int nb_sectors,
3055 BlockDriverCompletionFunc *cb,
3056 void *opaque,
8c5873d6 3057 bool is_write)
68485420
KW
3058{
3059 Coroutine *co;
3060 BlockDriverAIOCBCoroutine *acb;
3061
3062 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3063 acb->req.sector = sector_num;
3064 acb->req.nb_sectors = nb_sectors;
3065 acb->req.qiov = qiov;
3066 acb->is_write = is_write;
3067
8c5873d6 3068 co = qemu_coroutine_create(bdrv_co_do_rw);
68485420
KW
3069 qemu_coroutine_enter(co, acb);
3070
3071 return &acb->common;
3072}
3073
07f07615 3074static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
b2e12bc6 3075{
07f07615
PB
3076 BlockDriverAIOCBCoroutine *acb = opaque;
3077 BlockDriverState *bs = acb->common.bs;
b2e12bc6 3078
07f07615
PB
3079 acb->req.error = bdrv_co_flush(bs);
3080 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2e12bc6 3081 qemu_bh_schedule(acb->bh);
b2e12bc6
CH
3082}
3083
07f07615 3084BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
016f5cf6
AG
3085 BlockDriverCompletionFunc *cb, void *opaque)
3086{
07f07615 3087 trace_bdrv_aio_flush(bs, opaque);
016f5cf6 3088
07f07615
PB
3089 Coroutine *co;
3090 BlockDriverAIOCBCoroutine *acb;
016f5cf6 3091
07f07615
PB
3092 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3093 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3094 qemu_coroutine_enter(co, acb);
016f5cf6 3095
016f5cf6
AG
3096 return &acb->common;
3097}
3098
4265d620
PB
3099static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3100{
3101 BlockDriverAIOCBCoroutine *acb = opaque;
3102 BlockDriverState *bs = acb->common.bs;
3103
3104 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3105 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3106 qemu_bh_schedule(acb->bh);
3107}
3108
3109BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3110 int64_t sector_num, int nb_sectors,
3111 BlockDriverCompletionFunc *cb, void *opaque)
3112{
3113 Coroutine *co;
3114 BlockDriverAIOCBCoroutine *acb;
3115
3116 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3117
3118 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3119 acb->req.sector = sector_num;
3120 acb->req.nb_sectors = nb_sectors;
3121 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3122 qemu_coroutine_enter(co, acb);
3123
3124 return &acb->common;
3125}
3126
ea2384d3
FB
3127void bdrv_init(void)
3128{
5efa9d5a 3129 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 3130}
ce1a14dc 3131
eb852011
MA
3132void bdrv_init_with_whitelist(void)
3133{
3134 use_bdrv_whitelist = 1;
3135 bdrv_init();
3136}
3137
c16b5a2c
CH
3138void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3139 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 3140{
ce1a14dc
PB
3141 BlockDriverAIOCB *acb;
3142
6bbff9a0
AL
3143 if (pool->free_aiocb) {
3144 acb = pool->free_aiocb;
3145 pool->free_aiocb = acb->next;
ce1a14dc 3146 } else {
7267c094 3147 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 3148 acb->pool = pool;
ce1a14dc
PB
3149 }
3150 acb->bs = bs;
3151 acb->cb = cb;
3152 acb->opaque = opaque;
3153 return acb;
3154}
3155
3156void qemu_aio_release(void *p)
3157{
6bbff9a0
AL
3158 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3159 AIOPool *pool = acb->pool;
3160 acb->next = pool->free_aiocb;
3161 pool->free_aiocb = acb;
ce1a14dc 3162}
19cb3738 3163
f9f05dc5
KW
3164/**************************************************************/
3165/* Coroutine block device emulation */
3166
3167typedef struct CoroutineIOCompletion {
3168 Coroutine *coroutine;
3169 int ret;
3170} CoroutineIOCompletion;
3171
3172static void bdrv_co_io_em_complete(void *opaque, int ret)
3173{
3174 CoroutineIOCompletion *co = opaque;
3175
3176 co->ret = ret;
3177 qemu_coroutine_enter(co->coroutine, NULL);
3178}
3179
3180static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3181 int nb_sectors, QEMUIOVector *iov,
3182 bool is_write)
3183{
3184 CoroutineIOCompletion co = {
3185 .coroutine = qemu_coroutine_self(),
3186 };
3187 BlockDriverAIOCB *acb;
3188
3189 if (is_write) {
a652d160
SH
3190 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3191 bdrv_co_io_em_complete, &co);
f9f05dc5 3192 } else {
a652d160
SH
3193 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3194 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
3195 }
3196
59370aaa 3197 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
3198 if (!acb) {
3199 return -EIO;
3200 }
3201 qemu_coroutine_yield();
3202
3203 return co.ret;
3204}
3205
3206static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3207 int64_t sector_num, int nb_sectors,
3208 QEMUIOVector *iov)
3209{
3210 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3211}
3212
3213static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3214 int64_t sector_num, int nb_sectors,
3215 QEMUIOVector *iov)
3216{
3217 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3218}
3219
07f07615 3220static void coroutine_fn bdrv_flush_co_entry(void *opaque)
e7a8a783 3221{
07f07615
PB
3222 RwCo *rwco = opaque;
3223
3224 rwco->ret = bdrv_co_flush(rwco->bs);
3225}
3226
3227int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3228{
eb489bb1
KW
3229 int ret;
3230
ca716364 3231 if (!bs->drv) {
07f07615 3232 return 0;
eb489bb1
KW
3233 }
3234
ca716364 3235 /* Write back cached data to the OS even with cache=unsafe */
eb489bb1
KW
3236 if (bs->drv->bdrv_co_flush_to_os) {
3237 ret = bs->drv->bdrv_co_flush_to_os(bs);
3238 if (ret < 0) {
3239 return ret;
3240 }
3241 }
3242
ca716364
KW
3243 /* But don't actually force it to the disk with cache=unsafe */
3244 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3245 return 0;
3246 }
3247
eb489bb1 3248 if (bs->drv->bdrv_co_flush_to_disk) {
c68b89ac 3249 return bs->drv->bdrv_co_flush_to_disk(bs);
07f07615
PB
3250 } else if (bs->drv->bdrv_aio_flush) {
3251 BlockDriverAIOCB *acb;
3252 CoroutineIOCompletion co = {
3253 .coroutine = qemu_coroutine_self(),
3254 };
3255
3256 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3257 if (acb == NULL) {
3258 return -EIO;
3259 } else {
3260 qemu_coroutine_yield();
3261 return co.ret;
3262 }
07f07615
PB
3263 } else {
3264 /*
3265 * Some block drivers always operate in either writethrough or unsafe
3266 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3267 * know how the server works (because the behaviour is hardcoded or
3268 * depends on server-side configuration), so we can't ensure that
3269 * everything is safe on disk. Returning an error doesn't work because
3270 * that would break guests even if the server operates in writethrough
3271 * mode.
3272 *
3273 * Let's hope the user knows what he's doing.
3274 */
3275 return 0;
3276 }
3277}
3278
0f15423c
AL
3279void bdrv_invalidate_cache(BlockDriverState *bs)
3280{
3281 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3282 bs->drv->bdrv_invalidate_cache(bs);
3283 }
3284}
3285
3286void bdrv_invalidate_cache_all(void)
3287{
3288 BlockDriverState *bs;
3289
3290 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3291 bdrv_invalidate_cache(bs);
3292 }
3293}
3294
07f07615
PB
3295int bdrv_flush(BlockDriverState *bs)
3296{
3297 Coroutine *co;
3298 RwCo rwco = {
3299 .bs = bs,
3300 .ret = NOT_DONE,
e7a8a783 3301 };
e7a8a783 3302
07f07615
PB
3303 if (qemu_in_coroutine()) {
3304 /* Fast-path if already in coroutine context */
3305 bdrv_flush_co_entry(&rwco);
3306 } else {
3307 co = qemu_coroutine_create(bdrv_flush_co_entry);
3308 qemu_coroutine_enter(co, &rwco);
3309 while (rwco.ret == NOT_DONE) {
3310 qemu_aio_wait();
3311 }
e7a8a783 3312 }
07f07615
PB
3313
3314 return rwco.ret;
e7a8a783
KW
3315}
3316
4265d620
PB
3317static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3318{
3319 RwCo *rwco = opaque;
3320
3321 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3322}
3323
3324int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3325 int nb_sectors)
3326{
3327 if (!bs->drv) {
3328 return -ENOMEDIUM;
3329 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3330 return -EIO;
3331 } else if (bs->read_only) {
3332 return -EROFS;
3333 } else if (bs->drv->bdrv_co_discard) {
3334 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3335 } else if (bs->drv->bdrv_aio_discard) {
3336 BlockDriverAIOCB *acb;
3337 CoroutineIOCompletion co = {
3338 .coroutine = qemu_coroutine_self(),
3339 };
3340
3341 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3342 bdrv_co_io_em_complete, &co);
3343 if (acb == NULL) {
3344 return -EIO;
3345 } else {
3346 qemu_coroutine_yield();
3347 return co.ret;
3348 }
4265d620
PB
3349 } else {
3350 return 0;
3351 }
3352}
3353
3354int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3355{
3356 Coroutine *co;
3357 RwCo rwco = {
3358 .bs = bs,
3359 .sector_num = sector_num,
3360 .nb_sectors = nb_sectors,
3361 .ret = NOT_DONE,
3362 };
3363
3364 if (qemu_in_coroutine()) {
3365 /* Fast-path if already in coroutine context */
3366 bdrv_discard_co_entry(&rwco);
3367 } else {
3368 co = qemu_coroutine_create(bdrv_discard_co_entry);
3369 qemu_coroutine_enter(co, &rwco);
3370 while (rwco.ret == NOT_DONE) {
3371 qemu_aio_wait();
3372 }
3373 }
3374
3375 return rwco.ret;
3376}
3377
19cb3738
FB
3378/**************************************************************/
3379/* removable device support */
3380
3381/**
3382 * Return TRUE if the media is present
3383 */
3384int bdrv_is_inserted(BlockDriverState *bs)
3385{
3386 BlockDriver *drv = bs->drv;
a1aff5bf 3387
19cb3738
FB
3388 if (!drv)
3389 return 0;
3390 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
3391 return 1;
3392 return drv->bdrv_is_inserted(bs);
19cb3738
FB
3393}
3394
3395/**
8e49ca46
MA
3396 * Return whether the media changed since the last call to this
3397 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3398 */
3399int bdrv_media_changed(BlockDriverState *bs)
3400{
3401 BlockDriver *drv = bs->drv;
19cb3738 3402
8e49ca46
MA
3403 if (drv && drv->bdrv_media_changed) {
3404 return drv->bdrv_media_changed(bs);
3405 }
3406 return -ENOTSUP;
19cb3738
FB
3407}
3408
3409/**
3410 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3411 */
fdec4404 3412void bdrv_eject(BlockDriverState *bs, int eject_flag)
19cb3738
FB
3413{
3414 BlockDriver *drv = bs->drv;
19cb3738 3415
822e1cd1
MA
3416 if (drv && drv->bdrv_eject) {
3417 drv->bdrv_eject(bs, eject_flag);
19cb3738
FB
3418 }
3419}
3420
19cb3738
FB
3421/**
3422 * Lock or unlock the media (if it is locked, the user won't be able
3423 * to eject it manually).
3424 */
025e849a 3425void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3426{
3427 BlockDriver *drv = bs->drv;
3428
025e849a 3429 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3430
025e849a
MA
3431 if (drv && drv->bdrv_lock_medium) {
3432 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3433 }
3434}
985a03b0
TS
3435
3436/* needed for generic scsi interface */
3437
3438int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3439{
3440 BlockDriver *drv = bs->drv;
3441
3442 if (drv && drv->bdrv_ioctl)
3443 return drv->bdrv_ioctl(bs, req, buf);
3444 return -ENOTSUP;
3445}
7d780669 3446
221f715d
AL
3447BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3448 unsigned long int req, void *buf,
3449 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3450{
221f715d 3451 BlockDriver *drv = bs->drv;
7d780669 3452
221f715d
AL
3453 if (drv && drv->bdrv_aio_ioctl)
3454 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3455 return NULL;
7d780669 3456}
e268ca52 3457
7b6f9300
MA
3458void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3459{
3460 bs->buffer_alignment = align;
3461}
7cd1e32a 3462
e268ca52
AL
3463void *qemu_blockalign(BlockDriverState *bs, size_t size)
3464{
3465 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3466}
7cd1e32a 3467
3468void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3469{
3470 int64_t bitmap_size;
a55eb92c 3471
aaa0eb75 3472 bs->dirty_count = 0;
a55eb92c 3473 if (enable) {
c6d22830
JK
3474 if (!bs->dirty_bitmap) {
3475 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3476 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3477 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
a55eb92c 3478
7267c094 3479 bs->dirty_bitmap = g_malloc0(bitmap_size);
a55eb92c 3480 }
7cd1e32a 3481 } else {
c6d22830 3482 if (bs->dirty_bitmap) {
7267c094 3483 g_free(bs->dirty_bitmap);
c6d22830 3484 bs->dirty_bitmap = NULL;
a55eb92c 3485 }
7cd1e32a 3486 }
3487}
3488
3489int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3490{
6ea44308 3491 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 3492
c6d22830
JK
3493 if (bs->dirty_bitmap &&
3494 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
3495 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3496 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a 3497 } else {
3498 return 0;
3499 }
3500}
3501
a55eb92c
JK
3502void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3503 int nr_sectors)
7cd1e32a 3504{
3505 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3506}
aaa0eb75
LS
3507
3508int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3509{
3510 return bs->dirty_count;
3511}
f88e1a42 3512
db593f25
MT
3513void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3514{
3515 assert(bs->in_use != in_use);
3516 bs->in_use = in_use;
3517}
3518
3519int bdrv_in_use(BlockDriverState *bs)
3520{
3521 return bs->in_use;
3522}
3523
28a7282a
LC
3524void bdrv_iostatus_enable(BlockDriverState *bs)
3525{
d6bf279e 3526 bs->iostatus_enabled = true;
58e21ef5 3527 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3528}
3529
3530/* The I/O status is only enabled if the drive explicitly
3531 * enables it _and_ the VM is configured to stop on errors */
3532bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3533{
d6bf279e 3534 return (bs->iostatus_enabled &&
28a7282a
LC
3535 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3536 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3537 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3538}
3539
3540void bdrv_iostatus_disable(BlockDriverState *bs)
3541{
d6bf279e 3542 bs->iostatus_enabled = false;
28a7282a
LC
3543}
3544
3545void bdrv_iostatus_reset(BlockDriverState *bs)
3546{
3547 if (bdrv_iostatus_is_enabled(bs)) {
58e21ef5 3548 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3549 }
3550}
3551
3552/* XXX: Today this is set by device models because it makes the implementation
3553 quite simple. However, the block layer knows about the error, so it's
3554 possible to implement this without device models being involved */
3555void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3556{
58e21ef5
LC
3557 if (bdrv_iostatus_is_enabled(bs) &&
3558 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
28a7282a 3559 assert(error >= 0);
58e21ef5
LC
3560 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3561 BLOCK_DEVICE_IO_STATUS_FAILED;
28a7282a
LC
3562 }
3563}
3564
a597e79c
CH
3565void
3566bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3567 enum BlockAcctType type)
3568{
3569 assert(type < BDRV_MAX_IOTYPE);
3570
3571 cookie->bytes = bytes;
c488c7f6 3572 cookie->start_time_ns = get_clock();
a597e79c
CH
3573 cookie->type = type;
3574}
3575
3576void
3577bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3578{
3579 assert(cookie->type < BDRV_MAX_IOTYPE);
3580
3581 bs->nr_bytes[cookie->type] += cookie->bytes;
3582 bs->nr_ops[cookie->type]++;
c488c7f6 3583 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
3584}
3585
f88e1a42
JS
3586int bdrv_img_create(const char *filename, const char *fmt,
3587 const char *base_filename, const char *base_fmt,
3588 char *options, uint64_t img_size, int flags)
3589{
3590 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 3591 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
3592 BlockDriverState *bs = NULL;
3593 BlockDriver *drv, *proto_drv;
96df67d1 3594 BlockDriver *backing_drv = NULL;
f88e1a42
JS
3595 int ret = 0;
3596
3597 /* Find driver and parse its options */
3598 drv = bdrv_find_format(fmt);
3599 if (!drv) {
3600 error_report("Unknown file format '%s'", fmt);
4f70f249 3601 ret = -EINVAL;
f88e1a42
JS
3602 goto out;
3603 }
3604
3605 proto_drv = bdrv_find_protocol(filename);
3606 if (!proto_drv) {
3607 error_report("Unknown protocol '%s'", filename);
4f70f249 3608 ret = -EINVAL;
f88e1a42
JS
3609 goto out;
3610 }
3611
3612 create_options = append_option_parameters(create_options,
3613 drv->create_options);
3614 create_options = append_option_parameters(create_options,
3615 proto_drv->create_options);
3616
3617 /* Create parameter list with default values */
3618 param = parse_option_parameters("", create_options, param);
3619
3620 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3621
3622 /* Parse -o options */
3623 if (options) {
3624 param = parse_option_parameters(options, create_options, param);
3625 if (param == NULL) {
3626 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 3627 ret = -EINVAL;
f88e1a42
JS
3628 goto out;
3629 }
3630 }
3631
3632 if (base_filename) {
3633 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3634 base_filename)) {
3635 error_report("Backing file not supported for file format '%s'",
3636 fmt);
4f70f249 3637 ret = -EINVAL;
f88e1a42
JS
3638 goto out;
3639 }
3640 }
3641
3642 if (base_fmt) {
3643 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3644 error_report("Backing file format not supported for file "
3645 "format '%s'", fmt);
4f70f249 3646 ret = -EINVAL;
f88e1a42
JS
3647 goto out;
3648 }
3649 }
3650
792da93a
JS
3651 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3652 if (backing_file && backing_file->value.s) {
3653 if (!strcmp(filename, backing_file->value.s)) {
3654 error_report("Error: Trying to create an image with the "
3655 "same filename as the backing file");
4f70f249 3656 ret = -EINVAL;
792da93a
JS
3657 goto out;
3658 }
3659 }
3660
f88e1a42
JS
3661 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3662 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
3663 backing_drv = bdrv_find_format(backing_fmt->value.s);
3664 if (!backing_drv) {
f88e1a42
JS
3665 error_report("Unknown backing file format '%s'",
3666 backing_fmt->value.s);
4f70f249 3667 ret = -EINVAL;
f88e1a42
JS
3668 goto out;
3669 }
3670 }
3671
3672 // The size for the image must always be specified, with one exception:
3673 // If we are using a backing file, we can obtain the size from there
d220894e
KW
3674 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3675 if (size && size->value.n == -1) {
f88e1a42
JS
3676 if (backing_file && backing_file->value.s) {
3677 uint64_t size;
f88e1a42
JS
3678 char buf[32];
3679
f88e1a42
JS
3680 bs = bdrv_new("");
3681
96df67d1 3682 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
f88e1a42 3683 if (ret < 0) {
96df67d1 3684 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
3685 goto out;
3686 }
3687 bdrv_get_geometry(bs, &size);
3688 size *= 512;
3689
3690 snprintf(buf, sizeof(buf), "%" PRId64, size);
3691 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3692 } else {
3693 error_report("Image creation needs a size parameter");
4f70f249 3694 ret = -EINVAL;
f88e1a42
JS
3695 goto out;
3696 }
3697 }
3698
3699 printf("Formatting '%s', fmt=%s ", filename, fmt);
3700 print_option_parameters(param);
3701 puts("");
3702
3703 ret = bdrv_create(drv, filename, param);
3704
3705 if (ret < 0) {
3706 if (ret == -ENOTSUP) {
3707 error_report("Formatting or formatting option not supported for "
3708 "file format '%s'", fmt);
3709 } else if (ret == -EFBIG) {
3710 error_report("The image size is too large for file format '%s'",
3711 fmt);
3712 } else {
3713 error_report("%s: error while creating %s: %s", filename, fmt,
3714 strerror(-ret));
3715 }
3716 }
3717
3718out:
3719 free_option_parameters(create_options);
3720 free_option_parameters(param);
3721
3722 if (bs) {
3723 bdrv_delete(bs);
3724 }
4f70f249
JS
3725
3726 return ret;
f88e1a42 3727}