]> git.proxmox.com Git - mirror_qemu.git/blame - block.c
block: check bdrv_in_use() before blockdev operations
[mirror_qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
f795e743 30#include "qjson.h"
68485420 31#include "qemu-coroutine.h"
b2023818 32#include "qmp-commands.h"
0563e191 33#include "qemu-timer.h"
fc01f7e7 34
71e72a19 35#ifdef CONFIG_BSD
7674e7bf
FB
36#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
72cf2d4f 39#include <sys/queue.h>
c5e97233 40#ifndef __DragonFly__
7674e7bf
FB
41#include <sys/disk.h>
42#endif
c5e97233 43#endif
7674e7bf 44
49dc768d
AL
45#ifdef _WIN32
46#include <windows.h>
47#endif
48
1c9805a3
SH
49#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
7d4b4ba5 51static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
52static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
53 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 54 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
55static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
56 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 57 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
58static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
59 int64_t sector_num, int nb_sectors,
60 QEMUIOVector *iov);
61static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
62 int64_t sector_num, int nb_sectors,
63 QEMUIOVector *iov);
c5fbe571
SH
64static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
1c9805a3
SH
66static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
b2a61371
SH
68static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
69 int64_t sector_num,
70 QEMUIOVector *qiov,
71 int nb_sectors,
72 BlockDriverCompletionFunc *cb,
73 void *opaque,
8c5873d6 74 bool is_write);
b2a61371 75static void coroutine_fn bdrv_co_do_rw(void *opaque);
ec530c81 76
98f90dba
ZYW
77static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
78 bool is_write, double elapsed_time, uint64_t *wait);
79static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
80 double elapsed_time, uint64_t *wait);
81static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
82 bool is_write, int64_t *wait);
83
1b7bdbc1
SH
84static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
85 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 86
8a22f02a
SH
87static QLIST_HEAD(, BlockDriver) bdrv_drivers =
88 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 89
f9092b10
MA
90/* The device to use for VM snapshots */
91static BlockDriverState *bs_snapshots;
92
eb852011
MA
93/* If non-zero, use only whitelisted block drivers */
94static int use_bdrv_whitelist;
95
9e0b22f4
SH
96#ifdef _WIN32
97static int is_windows_drive_prefix(const char *filename)
98{
99 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
100 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
101 filename[1] == ':');
102}
103
104int is_windows_drive(const char *filename)
105{
106 if (is_windows_drive_prefix(filename) &&
107 filename[2] == '\0')
108 return 1;
109 if (strstart(filename, "\\\\.\\", NULL) ||
110 strstart(filename, "//./", NULL))
111 return 1;
112 return 0;
113}
114#endif
115
0563e191 116/* throttling disk I/O limits */
98f90dba
ZYW
117void bdrv_io_limits_disable(BlockDriverState *bs)
118{
119 bs->io_limits_enabled = false;
120
121 while (qemu_co_queue_next(&bs->throttled_reqs));
122
123 if (bs->block_timer) {
124 qemu_del_timer(bs->block_timer);
125 qemu_free_timer(bs->block_timer);
126 bs->block_timer = NULL;
127 }
128
129 bs->slice_start = 0;
130 bs->slice_end = 0;
131 bs->slice_time = 0;
132 memset(&bs->io_base, 0, sizeof(bs->io_base));
133}
134
0563e191
ZYW
135static void bdrv_block_timer(void *opaque)
136{
137 BlockDriverState *bs = opaque;
138
139 qemu_co_queue_next(&bs->throttled_reqs);
140}
141
142void bdrv_io_limits_enable(BlockDriverState *bs)
143{
144 qemu_co_queue_init(&bs->throttled_reqs);
145 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
146 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
147 bs->slice_start = qemu_get_clock_ns(vm_clock);
148 bs->slice_end = bs->slice_start + bs->slice_time;
149 memset(&bs->io_base, 0, sizeof(bs->io_base));
150 bs->io_limits_enabled = true;
151}
152
153bool bdrv_io_limits_enabled(BlockDriverState *bs)
154{
155 BlockIOLimit *io_limits = &bs->io_limits;
156 return io_limits->bps[BLOCK_IO_LIMIT_READ]
157 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
158 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
159 || io_limits->iops[BLOCK_IO_LIMIT_READ]
160 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
161 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
162}
163
98f90dba
ZYW
164static void bdrv_io_limits_intercept(BlockDriverState *bs,
165 bool is_write, int nb_sectors)
166{
167 int64_t wait_time = -1;
168
169 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
170 qemu_co_queue_wait(&bs->throttled_reqs);
171 }
172
173 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
174 * throttled requests will not be dequeued until the current request is
175 * allowed to be serviced. So if the current request still exceeds the
176 * limits, it will be inserted to the head. All requests followed it will
177 * be still in throttled_reqs queue.
178 */
179
180 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
181 qemu_mod_timer(bs->block_timer,
182 wait_time + qemu_get_clock_ns(vm_clock));
183 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
184 }
185
186 qemu_co_queue_next(&bs->throttled_reqs);
187}
188
9e0b22f4
SH
189/* check if the path starts with "<protocol>:" */
190static int path_has_protocol(const char *path)
191{
192#ifdef _WIN32
193 if (is_windows_drive(path) ||
194 is_windows_drive_prefix(path)) {
195 return 0;
196 }
197#endif
198
199 return strchr(path, ':') != NULL;
200}
201
83f64091 202int path_is_absolute(const char *path)
3b0d4f61 203{
83f64091 204 const char *p;
21664424
FB
205#ifdef _WIN32
206 /* specific case for names like: "\\.\d:" */
207 if (*path == '/' || *path == '\\')
208 return 1;
209#endif
83f64091
FB
210 p = strchr(path, ':');
211 if (p)
212 p++;
213 else
214 p = path;
3b9f94e1
FB
215#ifdef _WIN32
216 return (*p == '/' || *p == '\\');
217#else
218 return (*p == '/');
219#endif
3b0d4f61
FB
220}
221
83f64091
FB
222/* if filename is absolute, just copy it to dest. Otherwise, build a
223 path to it by considering it is relative to base_path. URL are
224 supported. */
225void path_combine(char *dest, int dest_size,
226 const char *base_path,
227 const char *filename)
3b0d4f61 228{
83f64091
FB
229 const char *p, *p1;
230 int len;
231
232 if (dest_size <= 0)
233 return;
234 if (path_is_absolute(filename)) {
235 pstrcpy(dest, dest_size, filename);
236 } else {
237 p = strchr(base_path, ':');
238 if (p)
239 p++;
240 else
241 p = base_path;
3b9f94e1
FB
242 p1 = strrchr(base_path, '/');
243#ifdef _WIN32
244 {
245 const char *p2;
246 p2 = strrchr(base_path, '\\');
247 if (!p1 || p2 > p1)
248 p1 = p2;
249 }
250#endif
83f64091
FB
251 if (p1)
252 p1++;
253 else
254 p1 = base_path;
255 if (p1 > p)
256 p = p1;
257 len = p - base_path;
258 if (len > dest_size - 1)
259 len = dest_size - 1;
260 memcpy(dest, base_path, len);
261 dest[len] = '\0';
262 pstrcat(dest, dest_size, filename);
3b0d4f61 263 }
3b0d4f61
FB
264}
265
5efa9d5a 266void bdrv_register(BlockDriver *bdrv)
ea2384d3 267{
8c5873d6
SH
268 /* Block drivers without coroutine functions need emulation */
269 if (!bdrv->bdrv_co_readv) {
f9f05dc5
KW
270 bdrv->bdrv_co_readv = bdrv_co_readv_em;
271 bdrv->bdrv_co_writev = bdrv_co_writev_em;
272
f8c35c1d
SH
273 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
274 * the block driver lacks aio we need to emulate that too.
275 */
f9f05dc5
KW
276 if (!bdrv->bdrv_aio_readv) {
277 /* add AIO emulation layer */
278 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
279 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
f9f05dc5 280 }
83f64091 281 }
b2e12bc6 282
8a22f02a 283 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 284}
b338082b
FB
285
286/* create a new block device (by default it is empty) */
287BlockDriverState *bdrv_new(const char *device_name)
288{
1b7bdbc1 289 BlockDriverState *bs;
b338082b 290
7267c094 291 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 292 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 293 if (device_name[0] != '\0') {
1b7bdbc1 294 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 295 }
28a7282a 296 bdrv_iostatus_disable(bs);
b338082b
FB
297 return bs;
298}
299
ea2384d3
FB
300BlockDriver *bdrv_find_format(const char *format_name)
301{
302 BlockDriver *drv1;
8a22f02a
SH
303 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
304 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 305 return drv1;
8a22f02a 306 }
ea2384d3
FB
307 }
308 return NULL;
309}
310
eb852011
MA
311static int bdrv_is_whitelisted(BlockDriver *drv)
312{
313 static const char *whitelist[] = {
314 CONFIG_BDRV_WHITELIST
315 };
316 const char **p;
317
318 if (!whitelist[0])
319 return 1; /* no whitelist, anything goes */
320
321 for (p = whitelist; *p; p++) {
322 if (!strcmp(drv->format_name, *p)) {
323 return 1;
324 }
325 }
326 return 0;
327}
328
329BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
330{
331 BlockDriver *drv = bdrv_find_format(format_name);
332 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
333}
334
0e7e1989
KW
335int bdrv_create(BlockDriver *drv, const char* filename,
336 QEMUOptionParameter *options)
ea2384d3
FB
337{
338 if (!drv->bdrv_create)
339 return -ENOTSUP;
0e7e1989
KW
340
341 return drv->bdrv_create(filename, options);
ea2384d3
FB
342}
343
84a12e66
CH
344int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
345{
346 BlockDriver *drv;
347
b50cbabc 348 drv = bdrv_find_protocol(filename);
84a12e66 349 if (drv == NULL) {
16905d71 350 return -ENOENT;
84a12e66
CH
351 }
352
353 return bdrv_create(drv, filename, options);
354}
355
d5249393 356#ifdef _WIN32
95389c86 357void get_tmp_filename(char *filename, int size)
d5249393 358{
3b9f94e1 359 char temp_dir[MAX_PATH];
3b46e624 360
3b9f94e1
FB
361 GetTempPath(MAX_PATH, temp_dir);
362 GetTempFileName(temp_dir, "qem", 0, filename);
d5249393
FB
363}
364#else
95389c86 365void get_tmp_filename(char *filename, int size)
fc01f7e7 366{
67b915a5 367 int fd;
7ccfb2eb 368 const char *tmpdir;
d5249393 369 /* XXX: race condition possible */
0badc1ee
AJ
370 tmpdir = getenv("TMPDIR");
371 if (!tmpdir)
372 tmpdir = "/tmp";
373 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
ea2384d3
FB
374 fd = mkstemp(filename);
375 close(fd);
376}
d5249393 377#endif
fc01f7e7 378
84a12e66
CH
379/*
380 * Detect host devices. By convention, /dev/cdrom[N] is always
381 * recognized as a host CDROM.
382 */
383static BlockDriver *find_hdev_driver(const char *filename)
384{
385 int score_max = 0, score;
386 BlockDriver *drv = NULL, *d;
387
388 QLIST_FOREACH(d, &bdrv_drivers, list) {
389 if (d->bdrv_probe_device) {
390 score = d->bdrv_probe_device(filename);
391 if (score > score_max) {
392 score_max = score;
393 drv = d;
394 }
395 }
396 }
397
398 return drv;
399}
400
b50cbabc 401BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
402{
403 BlockDriver *drv1;
404 char protocol[128];
1cec71e3 405 int len;
83f64091 406 const char *p;
19cb3738 407
66f82cee
KW
408 /* TODO Drivers without bdrv_file_open must be specified explicitly */
409
39508e7a
CH
410 /*
411 * XXX(hch): we really should not let host device detection
412 * override an explicit protocol specification, but moving this
413 * later breaks access to device names with colons in them.
414 * Thanks to the brain-dead persistent naming schemes on udev-
415 * based Linux systems those actually are quite common.
416 */
417 drv1 = find_hdev_driver(filename);
418 if (drv1) {
419 return drv1;
420 }
421
9e0b22f4 422 if (!path_has_protocol(filename)) {
39508e7a 423 return bdrv_find_format("file");
84a12e66 424 }
9e0b22f4
SH
425 p = strchr(filename, ':');
426 assert(p != NULL);
1cec71e3
AL
427 len = p - filename;
428 if (len > sizeof(protocol) - 1)
429 len = sizeof(protocol) - 1;
430 memcpy(protocol, filename, len);
431 protocol[len] = '\0';
8a22f02a 432 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 433 if (drv1->protocol_name &&
8a22f02a 434 !strcmp(drv1->protocol_name, protocol)) {
83f64091 435 return drv1;
8a22f02a 436 }
83f64091
FB
437 }
438 return NULL;
439}
440
c98ac35d 441static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
442{
443 int ret, score, score_max;
444 BlockDriver *drv1, *drv;
445 uint8_t buf[2048];
446 BlockDriverState *bs;
447
f5edb014 448 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
449 if (ret < 0) {
450 *pdrv = NULL;
451 return ret;
452 }
f8ea0b00 453
08a00559
KW
454 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
455 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 456 bdrv_delete(bs);
c98ac35d
SW
457 drv = bdrv_find_format("raw");
458 if (!drv) {
459 ret = -ENOENT;
460 }
461 *pdrv = drv;
462 return ret;
1a396859 463 }
f8ea0b00 464
83f64091
FB
465 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
466 bdrv_delete(bs);
467 if (ret < 0) {
c98ac35d
SW
468 *pdrv = NULL;
469 return ret;
83f64091
FB
470 }
471
ea2384d3 472 score_max = 0;
84a12e66 473 drv = NULL;
8a22f02a 474 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
475 if (drv1->bdrv_probe) {
476 score = drv1->bdrv_probe(buf, ret, filename);
477 if (score > score_max) {
478 score_max = score;
479 drv = drv1;
480 }
0849bf08 481 }
fc01f7e7 482 }
c98ac35d
SW
483 if (!drv) {
484 ret = -ENOENT;
485 }
486 *pdrv = drv;
487 return ret;
ea2384d3
FB
488}
489
51762288
SH
490/**
491 * Set the current 'total_sectors' value
492 */
493static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
494{
495 BlockDriver *drv = bs->drv;
496
396759ad
NB
497 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
498 if (bs->sg)
499 return 0;
500
51762288
SH
501 /* query actual device if possible, otherwise just trust the hint */
502 if (drv->bdrv_getlength) {
503 int64_t length = drv->bdrv_getlength(bs);
504 if (length < 0) {
505 return length;
506 }
507 hint = length >> BDRV_SECTOR_BITS;
508 }
509
510 bs->total_sectors = hint;
511 return 0;
512}
513
c3993cdc
SH
514/**
515 * Set open flags for a given cache mode
516 *
517 * Return 0 on success, -1 if the cache mode was invalid.
518 */
519int bdrv_parse_cache_flags(const char *mode, int *flags)
520{
521 *flags &= ~BDRV_O_CACHE_MASK;
522
523 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
524 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
525 } else if (!strcmp(mode, "directsync")) {
526 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
527 } else if (!strcmp(mode, "writeback")) {
528 *flags |= BDRV_O_CACHE_WB;
529 } else if (!strcmp(mode, "unsafe")) {
530 *flags |= BDRV_O_CACHE_WB;
531 *flags |= BDRV_O_NO_FLUSH;
532 } else if (!strcmp(mode, "writethrough")) {
533 /* this is the default */
534 } else {
535 return -1;
536 }
537
538 return 0;
539}
540
53fec9d3
SH
541/**
542 * The copy-on-read flag is actually a reference count so multiple users may
543 * use the feature without worrying about clobbering its previous state.
544 * Copy-on-read stays enabled until all users have called to disable it.
545 */
546void bdrv_enable_copy_on_read(BlockDriverState *bs)
547{
548 bs->copy_on_read++;
549}
550
551void bdrv_disable_copy_on_read(BlockDriverState *bs)
552{
553 assert(bs->copy_on_read > 0);
554 bs->copy_on_read--;
555}
556
57915332
KW
557/*
558 * Common part for opening disk images and files
559 */
560static int bdrv_open_common(BlockDriverState *bs, const char *filename,
561 int flags, BlockDriver *drv)
562{
563 int ret, open_flags;
564
565 assert(drv != NULL);
566
28dcee10
SH
567 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
568
66f82cee 569 bs->file = NULL;
51762288 570 bs->total_sectors = 0;
57915332
KW
571 bs->encrypted = 0;
572 bs->valid_key = 0;
03f541bd 573 bs->sg = 0;
57915332 574 bs->open_flags = flags;
03f541bd 575 bs->growable = 0;
57915332
KW
576 bs->buffer_alignment = 512;
577
53fec9d3
SH
578 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
579 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
580 bdrv_enable_copy_on_read(bs);
581 }
582
57915332 583 pstrcpy(bs->filename, sizeof(bs->filename), filename);
03f541bd 584 bs->backing_file[0] = '\0';
57915332
KW
585
586 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
587 return -ENOTSUP;
588 }
589
590 bs->drv = drv;
7267c094 591 bs->opaque = g_malloc0(drv->instance_size);
57915332 592
03f541bd 593 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
57915332
KW
594
595 /*
596 * Clear flags that are internal to the block layer before opening the
597 * image.
598 */
599 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
600
601 /*
ebabb67a 602 * Snapshots should be writable.
57915332
KW
603 */
604 if (bs->is_temporary) {
605 open_flags |= BDRV_O_RDWR;
606 }
607
e7c63796
SH
608 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
609
66f82cee
KW
610 /* Open the image, either directly or using a protocol */
611 if (drv->bdrv_file_open) {
612 ret = drv->bdrv_file_open(bs, filename, open_flags);
613 } else {
614 ret = bdrv_file_open(&bs->file, filename, open_flags);
615 if (ret >= 0) {
616 ret = drv->bdrv_open(bs, open_flags);
617 }
618 }
619
57915332
KW
620 if (ret < 0) {
621 goto free_and_fail;
622 }
623
51762288
SH
624 ret = refresh_total_sectors(bs, bs->total_sectors);
625 if (ret < 0) {
626 goto free_and_fail;
57915332 627 }
51762288 628
57915332
KW
629#ifndef _WIN32
630 if (bs->is_temporary) {
631 unlink(filename);
632 }
633#endif
634 return 0;
635
636free_and_fail:
66f82cee
KW
637 if (bs->file) {
638 bdrv_delete(bs->file);
639 bs->file = NULL;
640 }
7267c094 641 g_free(bs->opaque);
57915332
KW
642 bs->opaque = NULL;
643 bs->drv = NULL;
644 return ret;
645}
646
b6ce07aa
KW
647/*
648 * Opens a file using a protocol (file, host_device, nbd, ...)
649 */
83f64091 650int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 651{
83f64091 652 BlockDriverState *bs;
6db95603 653 BlockDriver *drv;
83f64091
FB
654 int ret;
655
b50cbabc 656 drv = bdrv_find_protocol(filename);
6db95603
CH
657 if (!drv) {
658 return -ENOENT;
659 }
660
83f64091 661 bs = bdrv_new("");
b6ce07aa 662 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
663 if (ret < 0) {
664 bdrv_delete(bs);
665 return ret;
3b0d4f61 666 }
71d0770c 667 bs->growable = 1;
83f64091
FB
668 *pbs = bs;
669 return 0;
670}
671
b6ce07aa
KW
672/*
673 * Opens a disk image (raw, qcow2, vmdk, ...)
674 */
d6e9098e
KW
675int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
676 BlockDriver *drv)
ea2384d3 677{
b6ce07aa 678 int ret;
2b572816 679 char tmp_filename[PATH_MAX];
712e7874 680
83f64091 681 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
682 BlockDriverState *bs1;
683 int64_t total_size;
7c96d46e 684 int is_protocol = 0;
91a073a9
KW
685 BlockDriver *bdrv_qcow2;
686 QEMUOptionParameter *options;
b6ce07aa 687 char backing_filename[PATH_MAX];
3b46e624 688
ea2384d3
FB
689 /* if snapshot, we create a temporary backing file and open it
690 instead of opening 'filename' directly */
33e3963e 691
ea2384d3
FB
692 /* if there is a backing file, use it */
693 bs1 = bdrv_new("");
d6e9098e 694 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 695 if (ret < 0) {
ea2384d3 696 bdrv_delete(bs1);
51d7c00c 697 return ret;
ea2384d3 698 }
3e82990b 699 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
700
701 if (bs1->drv && bs1->drv->protocol_name)
702 is_protocol = 1;
703
ea2384d3 704 bdrv_delete(bs1);
3b46e624 705
ea2384d3 706 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
7c96d46e
AL
707
708 /* Real path is meaningless for protocols */
709 if (is_protocol)
710 snprintf(backing_filename, sizeof(backing_filename),
711 "%s", filename);
114cdfa9
KS
712 else if (!realpath(filename, backing_filename))
713 return -errno;
7c96d46e 714
91a073a9
KW
715 bdrv_qcow2 = bdrv_find_format("qcow2");
716 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
717
3e82990b 718 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
719 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
720 if (drv) {
721 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
722 drv->format_name);
723 }
724
725 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 726 free_option_parameters(options);
51d7c00c
AL
727 if (ret < 0) {
728 return ret;
ea2384d3 729 }
91a073a9 730
ea2384d3 731 filename = tmp_filename;
91a073a9 732 drv = bdrv_qcow2;
ea2384d3
FB
733 bs->is_temporary = 1;
734 }
712e7874 735
b6ce07aa 736 /* Find the right image format driver */
6db95603 737 if (!drv) {
c98ac35d 738 ret = find_image_format(filename, &drv);
51d7c00c 739 }
6987307c 740
51d7c00c 741 if (!drv) {
51d7c00c 742 goto unlink_and_fail;
ea2384d3 743 }
b6ce07aa
KW
744
745 /* Open the image */
746 ret = bdrv_open_common(bs, filename, flags, drv);
747 if (ret < 0) {
6987307c
CH
748 goto unlink_and_fail;
749 }
750
b6ce07aa
KW
751 /* If there is a backing file, use it */
752 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
753 char backing_filename[PATH_MAX];
754 int back_flags;
755 BlockDriver *back_drv = NULL;
756
757 bs->backing_hd = bdrv_new("");
df2dbb4a
SH
758
759 if (path_has_protocol(bs->backing_file)) {
760 pstrcpy(backing_filename, sizeof(backing_filename),
761 bs->backing_file);
762 } else {
763 path_combine(backing_filename, sizeof(backing_filename),
764 filename, bs->backing_file);
765 }
766
767 if (bs->backing_format[0] != '\0') {
b6ce07aa 768 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 769 }
b6ce07aa
KW
770
771 /* backing files always opened read-only */
772 back_flags =
773 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
774
775 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
776 if (ret < 0) {
777 bdrv_close(bs);
778 return ret;
779 }
780 if (bs->is_temporary) {
781 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
782 } else {
783 /* base image inherits from "parent" */
784 bs->backing_hd->keep_read_only = bs->keep_read_only;
785 }
786 }
787
788 if (!bdrv_key_required(bs)) {
7d4b4ba5 789 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
790 }
791
98f90dba
ZYW
792 /* throttling disk I/O limits */
793 if (bs->io_limits_enabled) {
794 bdrv_io_limits_enable(bs);
795 }
796
b6ce07aa
KW
797 return 0;
798
799unlink_and_fail:
800 if (bs->is_temporary) {
801 unlink(filename);
802 }
803 return ret;
804}
805
fc01f7e7
FB
806void bdrv_close(BlockDriverState *bs)
807{
19cb3738 808 if (bs->drv) {
f9092b10
MA
809 if (bs == bs_snapshots) {
810 bs_snapshots = NULL;
811 }
557df6ac 812 if (bs->backing_hd) {
ea2384d3 813 bdrv_delete(bs->backing_hd);
557df6ac
SH
814 bs->backing_hd = NULL;
815 }
ea2384d3 816 bs->drv->bdrv_close(bs);
7267c094 817 g_free(bs->opaque);
ea2384d3
FB
818#ifdef _WIN32
819 if (bs->is_temporary) {
820 unlink(bs->filename);
821 }
67b915a5 822#endif
ea2384d3
FB
823 bs->opaque = NULL;
824 bs->drv = NULL;
53fec9d3 825 bs->copy_on_read = 0;
b338082b 826
66f82cee
KW
827 if (bs->file != NULL) {
828 bdrv_close(bs->file);
829 }
830
7d4b4ba5 831 bdrv_dev_change_media_cb(bs, false);
b338082b 832 }
98f90dba
ZYW
833
834 /*throttling disk I/O limits*/
835 if (bs->io_limits_enabled) {
836 bdrv_io_limits_disable(bs);
837 }
b338082b
FB
838}
839
2bc93fed
MK
840void bdrv_close_all(void)
841{
842 BlockDriverState *bs;
843
844 QTAILQ_FOREACH(bs, &bdrv_states, list) {
845 bdrv_close(bs);
846 }
847}
848
922453bc
SH
849/*
850 * Wait for pending requests to complete across all BlockDriverStates
851 *
852 * This function does not flush data to disk, use bdrv_flush_all() for that
853 * after calling this function.
854 */
855void bdrv_drain_all(void)
856{
857 BlockDriverState *bs;
858
859 qemu_aio_flush();
860
861 /* If requests are still pending there is a bug somewhere */
862 QTAILQ_FOREACH(bs, &bdrv_states, list) {
863 assert(QLIST_EMPTY(&bs->tracked_requests));
864 assert(qemu_co_queue_empty(&bs->throttled_reqs));
865 }
866}
867
d22b2f41
RH
868/* make a BlockDriverState anonymous by removing from bdrv_state list.
869 Also, NULL terminate the device_name to prevent double remove */
870void bdrv_make_anon(BlockDriverState *bs)
871{
872 if (bs->device_name[0] != '\0') {
873 QTAILQ_REMOVE(&bdrv_states, bs, list);
874 }
875 bs->device_name[0] = '\0';
876}
877
b338082b
FB
878void bdrv_delete(BlockDriverState *bs)
879{
fa879d62 880 assert(!bs->dev);
18846dee 881
1b7bdbc1 882 /* remove from list, if necessary */
d22b2f41 883 bdrv_make_anon(bs);
34c6f050 884
b338082b 885 bdrv_close(bs);
66f82cee
KW
886 if (bs->file != NULL) {
887 bdrv_delete(bs->file);
888 }
889
f9092b10 890 assert(bs != bs_snapshots);
7267c094 891 g_free(bs);
fc01f7e7
FB
892}
893
fa879d62
MA
894int bdrv_attach_dev(BlockDriverState *bs, void *dev)
895/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 896{
fa879d62 897 if (bs->dev) {
18846dee
MA
898 return -EBUSY;
899 }
fa879d62 900 bs->dev = dev;
28a7282a 901 bdrv_iostatus_reset(bs);
18846dee
MA
902 return 0;
903}
904
fa879d62
MA
905/* TODO qdevified devices don't use this, remove when devices are qdevified */
906void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 907{
fa879d62
MA
908 if (bdrv_attach_dev(bs, dev) < 0) {
909 abort();
910 }
911}
912
913void bdrv_detach_dev(BlockDriverState *bs, void *dev)
914/* TODO change to DeviceState *dev when all users are qdevified */
915{
916 assert(bs->dev == dev);
917 bs->dev = NULL;
0e49de52
MA
918 bs->dev_ops = NULL;
919 bs->dev_opaque = NULL;
29e05f20 920 bs->buffer_alignment = 512;
18846dee
MA
921}
922
fa879d62
MA
923/* TODO change to return DeviceState * when all users are qdevified */
924void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 925{
fa879d62 926 return bs->dev;
18846dee
MA
927}
928
0e49de52
MA
929void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
930 void *opaque)
931{
932 bs->dev_ops = ops;
933 bs->dev_opaque = opaque;
2c6942fa
MA
934 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
935 bs_snapshots = NULL;
936 }
0e49de52
MA
937}
938
7d4b4ba5 939static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 940{
145feb17 941 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
7d4b4ba5 942 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
145feb17
MA
943 }
944}
945
2c6942fa
MA
946bool bdrv_dev_has_removable_media(BlockDriverState *bs)
947{
948 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
949}
950
025ccaa7
PB
951void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
952{
953 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
954 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
955 }
956}
957
e4def80b
MA
958bool bdrv_dev_is_tray_open(BlockDriverState *bs)
959{
960 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
961 return bs->dev_ops->is_tray_open(bs->dev_opaque);
962 }
963 return false;
964}
965
145feb17
MA
966static void bdrv_dev_resize_cb(BlockDriverState *bs)
967{
968 if (bs->dev_ops && bs->dev_ops->resize_cb) {
969 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
970 }
971}
972
f107639a
MA
973bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
974{
975 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
976 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
977 }
978 return false;
979}
980
e97fc193
AL
981/*
982 * Run consistency checks on an image
983 *
e076f338 984 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 985 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 986 * check are stored in res.
e97fc193 987 */
e076f338 988int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
e97fc193
AL
989{
990 if (bs->drv->bdrv_check == NULL) {
991 return -ENOTSUP;
992 }
993
e076f338 994 memset(res, 0, sizeof(*res));
9ac228e0 995 return bs->drv->bdrv_check(bs, res);
e97fc193
AL
996}
997
8a426614
KW
998#define COMMIT_BUF_SECTORS 2048
999
33e3963e
FB
1000/* commit COW file into the raw image */
1001int bdrv_commit(BlockDriverState *bs)
1002{
19cb3738 1003 BlockDriver *drv = bs->drv;
ee181196 1004 BlockDriver *backing_drv;
8a426614
KW
1005 int64_t sector, total_sectors;
1006 int n, ro, open_flags;
4dca4b63 1007 int ret = 0, rw_ret = 0;
8a426614 1008 uint8_t *buf;
4dca4b63
NS
1009 char filename[1024];
1010 BlockDriverState *bs_rw, *bs_ro;
33e3963e 1011
19cb3738
FB
1012 if (!drv)
1013 return -ENOMEDIUM;
4dca4b63
NS
1014
1015 if (!bs->backing_hd) {
1016 return -ENOTSUP;
33e3963e
FB
1017 }
1018
4dca4b63
NS
1019 if (bs->backing_hd->keep_read_only) {
1020 return -EACCES;
1021 }
ee181196 1022
2d3735d3
SH
1023 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1024 return -EBUSY;
1025 }
1026
ee181196 1027 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
1028 ro = bs->backing_hd->read_only;
1029 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1030 open_flags = bs->backing_hd->open_flags;
1031
1032 if (ro) {
1033 /* re-open as RW */
1034 bdrv_delete(bs->backing_hd);
1035 bs->backing_hd = NULL;
1036 bs_rw = bdrv_new("");
ee181196
KW
1037 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1038 backing_drv);
4dca4b63
NS
1039 if (rw_ret < 0) {
1040 bdrv_delete(bs_rw);
1041 /* try to re-open read-only */
1042 bs_ro = bdrv_new("");
ee181196
KW
1043 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1044 backing_drv);
4dca4b63
NS
1045 if (ret < 0) {
1046 bdrv_delete(bs_ro);
1047 /* drive not functional anymore */
1048 bs->drv = NULL;
1049 return ret;
1050 }
1051 bs->backing_hd = bs_ro;
1052 return rw_ret;
1053 }
1054 bs->backing_hd = bs_rw;
ea2384d3 1055 }
33e3963e 1056
6ea44308 1057 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 1058 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
1059
1060 for (sector = 0; sector < total_sectors; sector += n) {
05c4af54 1061 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
8a426614
KW
1062
1063 if (bdrv_read(bs, sector, buf, n) != 0) {
1064 ret = -EIO;
1065 goto ro_cleanup;
1066 }
1067
1068 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1069 ret = -EIO;
1070 goto ro_cleanup;
1071 }
ea2384d3 1072 }
33e3963e 1073 }
95389c86 1074
1d44952f
CH
1075 if (drv->bdrv_make_empty) {
1076 ret = drv->bdrv_make_empty(bs);
1077 bdrv_flush(bs);
1078 }
95389c86 1079
3f5075ae
CH
1080 /*
1081 * Make sure all data we wrote to the backing device is actually
1082 * stable on disk.
1083 */
1084 if (bs->backing_hd)
1085 bdrv_flush(bs->backing_hd);
4dca4b63
NS
1086
1087ro_cleanup:
7267c094 1088 g_free(buf);
4dca4b63
NS
1089
1090 if (ro) {
1091 /* re-open as RO */
1092 bdrv_delete(bs->backing_hd);
1093 bs->backing_hd = NULL;
1094 bs_ro = bdrv_new("");
ee181196
KW
1095 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1096 backing_drv);
4dca4b63
NS
1097 if (ret < 0) {
1098 bdrv_delete(bs_ro);
1099 /* drive not functional anymore */
1100 bs->drv = NULL;
1101 return ret;
1102 }
1103 bs->backing_hd = bs_ro;
1104 bs->backing_hd->keep_read_only = 0;
1105 }
1106
1d44952f 1107 return ret;
33e3963e
FB
1108}
1109
6ab4b5ab
MA
1110void bdrv_commit_all(void)
1111{
1112 BlockDriverState *bs;
1113
1114 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1115 bdrv_commit(bs);
1116 }
1117}
1118
dbffbdcf
SH
1119struct BdrvTrackedRequest {
1120 BlockDriverState *bs;
1121 int64_t sector_num;
1122 int nb_sectors;
1123 bool is_write;
1124 QLIST_ENTRY(BdrvTrackedRequest) list;
5f8b6491 1125 Coroutine *co; /* owner, used for deadlock detection */
f4658285 1126 CoQueue wait_queue; /* coroutines blocked on this request */
dbffbdcf
SH
1127};
1128
1129/**
1130 * Remove an active request from the tracked requests list
1131 *
1132 * This function should be called when a tracked request is completing.
1133 */
1134static void tracked_request_end(BdrvTrackedRequest *req)
1135{
1136 QLIST_REMOVE(req, list);
f4658285 1137 qemu_co_queue_restart_all(&req->wait_queue);
dbffbdcf
SH
1138}
1139
1140/**
1141 * Add an active request to the tracked requests list
1142 */
1143static void tracked_request_begin(BdrvTrackedRequest *req,
1144 BlockDriverState *bs,
1145 int64_t sector_num,
1146 int nb_sectors, bool is_write)
1147{
1148 *req = (BdrvTrackedRequest){
1149 .bs = bs,
1150 .sector_num = sector_num,
1151 .nb_sectors = nb_sectors,
1152 .is_write = is_write,
5f8b6491 1153 .co = qemu_coroutine_self(),
dbffbdcf
SH
1154 };
1155
f4658285
SH
1156 qemu_co_queue_init(&req->wait_queue);
1157
dbffbdcf
SH
1158 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1159}
1160
d83947ac
SH
1161/**
1162 * Round a region to cluster boundaries
1163 */
1164static void round_to_clusters(BlockDriverState *bs,
1165 int64_t sector_num, int nb_sectors,
1166 int64_t *cluster_sector_num,
1167 int *cluster_nb_sectors)
1168{
1169 BlockDriverInfo bdi;
1170
1171 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1172 *cluster_sector_num = sector_num;
1173 *cluster_nb_sectors = nb_sectors;
1174 } else {
1175 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1176 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1177 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1178 nb_sectors, c);
1179 }
1180}
1181
f4658285
SH
1182static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1183 int64_t sector_num, int nb_sectors) {
d83947ac
SH
1184 /* aaaa bbbb */
1185 if (sector_num >= req->sector_num + req->nb_sectors) {
1186 return false;
1187 }
1188 /* bbbb aaaa */
1189 if (req->sector_num >= sector_num + nb_sectors) {
1190 return false;
1191 }
1192 return true;
f4658285
SH
1193}
1194
1195static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1196 int64_t sector_num, int nb_sectors)
1197{
1198 BdrvTrackedRequest *req;
d83947ac
SH
1199 int64_t cluster_sector_num;
1200 int cluster_nb_sectors;
f4658285
SH
1201 bool retry;
1202
d83947ac
SH
1203 /* If we touch the same cluster it counts as an overlap. This guarantees
1204 * that allocating writes will be serialized and not race with each other
1205 * for the same cluster. For example, in copy-on-read it ensures that the
1206 * CoR read and write operations are atomic and guest writes cannot
1207 * interleave between them.
1208 */
1209 round_to_clusters(bs, sector_num, nb_sectors,
1210 &cluster_sector_num, &cluster_nb_sectors);
1211
f4658285
SH
1212 do {
1213 retry = false;
1214 QLIST_FOREACH(req, &bs->tracked_requests, list) {
d83947ac
SH
1215 if (tracked_request_overlaps(req, cluster_sector_num,
1216 cluster_nb_sectors)) {
5f8b6491
SH
1217 /* Hitting this means there was a reentrant request, for
1218 * example, a block driver issuing nested requests. This must
1219 * never happen since it means deadlock.
1220 */
1221 assert(qemu_coroutine_self() != req->co);
1222
f4658285
SH
1223 qemu_co_queue_wait(&req->wait_queue);
1224 retry = true;
1225 break;
1226 }
1227 }
1228 } while (retry);
1229}
1230
756e6736
KW
1231/*
1232 * Return values:
1233 * 0 - success
1234 * -EINVAL - backing format specified, but no file
1235 * -ENOSPC - can't update the backing file because no space is left in the
1236 * image file header
1237 * -ENOTSUP - format driver doesn't support changing the backing file
1238 */
1239int bdrv_change_backing_file(BlockDriverState *bs,
1240 const char *backing_file, const char *backing_fmt)
1241{
1242 BlockDriver *drv = bs->drv;
1243
1244 if (drv->bdrv_change_backing_file != NULL) {
1245 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1246 } else {
1247 return -ENOTSUP;
1248 }
1249}
1250
71d0770c
AL
1251static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1252 size_t size)
1253{
1254 int64_t len;
1255
1256 if (!bdrv_is_inserted(bs))
1257 return -ENOMEDIUM;
1258
1259 if (bs->growable)
1260 return 0;
1261
1262 len = bdrv_getlength(bs);
1263
fbb7b4e0
KW
1264 if (offset < 0)
1265 return -EIO;
1266
1267 if ((offset > len) || (len - offset < size))
71d0770c
AL
1268 return -EIO;
1269
1270 return 0;
1271}
1272
1273static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1274 int nb_sectors)
1275{
eb5a3165
JS
1276 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1277 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1278}
1279
1c9805a3
SH
1280typedef struct RwCo {
1281 BlockDriverState *bs;
1282 int64_t sector_num;
1283 int nb_sectors;
1284 QEMUIOVector *qiov;
1285 bool is_write;
1286 int ret;
1287} RwCo;
1288
1289static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 1290{
1c9805a3 1291 RwCo *rwco = opaque;
ea2384d3 1292
1c9805a3
SH
1293 if (!rwco->is_write) {
1294 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1295 rwco->nb_sectors, rwco->qiov);
1296 } else {
1297 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1298 rwco->nb_sectors, rwco->qiov);
1299 }
1300}
e7a8a783 1301
1c9805a3
SH
1302/*
1303 * Process a synchronous request using coroutines
1304 */
1305static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1306 int nb_sectors, bool is_write)
1307{
1308 QEMUIOVector qiov;
1309 struct iovec iov = {
1310 .iov_base = (void *)buf,
1311 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1312 };
1313 Coroutine *co;
1314 RwCo rwco = {
1315 .bs = bs,
1316 .sector_num = sector_num,
1317 .nb_sectors = nb_sectors,
1318 .qiov = &qiov,
1319 .is_write = is_write,
1320 .ret = NOT_DONE,
1321 };
e7a8a783 1322
1c9805a3 1323 qemu_iovec_init_external(&qiov, &iov, 1);
e7a8a783 1324
1c9805a3
SH
1325 if (qemu_in_coroutine()) {
1326 /* Fast-path if already in coroutine context */
1327 bdrv_rw_co_entry(&rwco);
1328 } else {
1329 co = qemu_coroutine_create(bdrv_rw_co_entry);
1330 qemu_coroutine_enter(co, &rwco);
1331 while (rwco.ret == NOT_DONE) {
1332 qemu_aio_wait();
1333 }
1334 }
1335 return rwco.ret;
1336}
b338082b 1337
1c9805a3
SH
1338/* return < 0 if error. See bdrv_write() for the return codes */
1339int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1340 uint8_t *buf, int nb_sectors)
1341{
1342 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
fc01f7e7
FB
1343}
1344
7cd1e32a 1345static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1346 int nb_sectors, int dirty)
7cd1e32a
LS
1347{
1348 int64_t start, end;
c6d22830 1349 unsigned long val, idx, bit;
a55eb92c 1350
6ea44308 1351 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1352 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1353
1354 for (; start <= end; start++) {
c6d22830
JK
1355 idx = start / (sizeof(unsigned long) * 8);
1356 bit = start % (sizeof(unsigned long) * 8);
1357 val = bs->dirty_bitmap[idx];
1358 if (dirty) {
6d59fec1 1359 if (!(val & (1UL << bit))) {
aaa0eb75 1360 bs->dirty_count++;
6d59fec1 1361 val |= 1UL << bit;
aaa0eb75 1362 }
c6d22830 1363 } else {
6d59fec1 1364 if (val & (1UL << bit)) {
aaa0eb75 1365 bs->dirty_count--;
6d59fec1 1366 val &= ~(1UL << bit);
aaa0eb75 1367 }
c6d22830
JK
1368 }
1369 bs->dirty_bitmap[idx] = val;
7cd1e32a
LS
1370 }
1371}
1372
5fafdf24 1373/* Return < 0 if error. Important errors are:
19cb3738
FB
1374 -EIO generic I/O error (may happen for all errors)
1375 -ENOMEDIUM No media inserted.
1376 -EINVAL Invalid sector number or nb_sectors
1377 -EACCES Trying to write a read-only device
1378*/
5fafdf24 1379int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1380 const uint8_t *buf, int nb_sectors)
1381{
1c9805a3 1382 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
83f64091
FB
1383}
1384
eda578e5
AL
1385int bdrv_pread(BlockDriverState *bs, int64_t offset,
1386 void *buf, int count1)
83f64091 1387{
6ea44308 1388 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1389 int len, nb_sectors, count;
1390 int64_t sector_num;
9a8c4cce 1391 int ret;
83f64091
FB
1392
1393 count = count1;
1394 /* first read to align to sector start */
6ea44308 1395 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1396 if (len > count)
1397 len = count;
6ea44308 1398 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1399 if (len > 0) {
9a8c4cce
KW
1400 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1401 return ret;
6ea44308 1402 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1403 count -= len;
1404 if (count == 0)
1405 return count1;
1406 sector_num++;
1407 buf += len;
1408 }
1409
1410 /* read the sectors "in place" */
6ea44308 1411 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1412 if (nb_sectors > 0) {
9a8c4cce
KW
1413 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1414 return ret;
83f64091 1415 sector_num += nb_sectors;
6ea44308 1416 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1417 buf += len;
1418 count -= len;
1419 }
1420
1421 /* add data from the last sector */
1422 if (count > 0) {
9a8c4cce
KW
1423 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1424 return ret;
83f64091
FB
1425 memcpy(buf, tmp_buf, count);
1426 }
1427 return count1;
1428}
1429
eda578e5
AL
1430int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1431 const void *buf, int count1)
83f64091 1432{
6ea44308 1433 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1434 int len, nb_sectors, count;
1435 int64_t sector_num;
9a8c4cce 1436 int ret;
83f64091
FB
1437
1438 count = count1;
1439 /* first write to align to sector start */
6ea44308 1440 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1441 if (len > count)
1442 len = count;
6ea44308 1443 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1444 if (len > 0) {
9a8c4cce
KW
1445 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1446 return ret;
6ea44308 1447 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1448 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1449 return ret;
83f64091
FB
1450 count -= len;
1451 if (count == 0)
1452 return count1;
1453 sector_num++;
1454 buf += len;
1455 }
1456
1457 /* write the sectors "in place" */
6ea44308 1458 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1459 if (nb_sectors > 0) {
9a8c4cce
KW
1460 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1461 return ret;
83f64091 1462 sector_num += nb_sectors;
6ea44308 1463 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1464 buf += len;
1465 count -= len;
1466 }
1467
1468 /* add data from the last sector */
1469 if (count > 0) {
9a8c4cce
KW
1470 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1471 return ret;
83f64091 1472 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1473 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1474 return ret;
83f64091
FB
1475 }
1476 return count1;
1477}
83f64091 1478
f08145fe
KW
1479/*
1480 * Writes to the file and ensures that no writes are reordered across this
1481 * request (acts as a barrier)
1482 *
1483 * Returns 0 on success, -errno in error cases.
1484 */
1485int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1486 const void *buf, int count)
1487{
1488 int ret;
1489
1490 ret = bdrv_pwrite(bs, offset, buf, count);
1491 if (ret < 0) {
1492 return ret;
1493 }
1494
92196b2f
SH
1495 /* No flush needed for cache modes that use O_DSYNC */
1496 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
f08145fe
KW
1497 bdrv_flush(bs);
1498 }
1499
1500 return 0;
1501}
1502
ab185921
SH
1503static int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1504 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1505{
1506 /* Perform I/O through a temporary buffer so that users who scribble over
1507 * their read buffer while the operation is in progress do not end up
1508 * modifying the image file. This is critical for zero-copy guest I/O
1509 * where anything might happen inside guest memory.
1510 */
1511 void *bounce_buffer;
1512
1513 struct iovec iov;
1514 QEMUIOVector bounce_qiov;
1515 int64_t cluster_sector_num;
1516 int cluster_nb_sectors;
1517 size_t skip_bytes;
1518 int ret;
1519
1520 /* Cover entire cluster so no additional backing file I/O is required when
1521 * allocating cluster in the image file.
1522 */
1523 round_to_clusters(bs, sector_num, nb_sectors,
1524 &cluster_sector_num, &cluster_nb_sectors);
1525
1526 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors,
1527 cluster_sector_num, cluster_nb_sectors);
1528
1529 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1530 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1531 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1532
1533 ret = bs->drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1534 &bounce_qiov);
1535 if (ret < 0) {
1536 goto err;
1537 }
1538
1539 ret = bs->drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
1540 &bounce_qiov);
1541 if (ret < 0) {
1542 /* It might be okay to ignore write errors for guest requests. If this
1543 * is a deliberate copy-on-read then we don't want to ignore the error.
1544 * Simply report it in all cases.
1545 */
1546 goto err;
1547 }
1548
1549 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1550 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1551 nb_sectors * BDRV_SECTOR_SIZE);
1552
1553err:
1554 qemu_vfree(bounce_buffer);
1555 return ret;
1556}
1557
c5fbe571
SH
1558/*
1559 * Handle a read request in coroutine context
1560 */
1561static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1562 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
da1fa91d
KW
1563{
1564 BlockDriver *drv = bs->drv;
dbffbdcf
SH
1565 BdrvTrackedRequest req;
1566 int ret;
da1fa91d 1567
da1fa91d
KW
1568 if (!drv) {
1569 return -ENOMEDIUM;
1570 }
1571 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1572 return -EIO;
1573 }
1574
98f90dba
ZYW
1575 /* throttling disk read I/O */
1576 if (bs->io_limits_enabled) {
1577 bdrv_io_limits_intercept(bs, false, nb_sectors);
1578 }
1579
f4658285
SH
1580 if (bs->copy_on_read) {
1581 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1582 }
1583
dbffbdcf 1584 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
ab185921
SH
1585
1586 if (bs->copy_on_read) {
1587 int pnum;
1588
1589 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1590 if (ret < 0) {
1591 goto out;
1592 }
1593
1594 if (!ret || pnum != nb_sectors) {
1595 ret = bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, qiov);
1596 goto out;
1597 }
1598 }
1599
dbffbdcf 1600 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
ab185921
SH
1601
1602out:
dbffbdcf
SH
1603 tracked_request_end(&req);
1604 return ret;
da1fa91d
KW
1605}
1606
c5fbe571 1607int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
1608 int nb_sectors, QEMUIOVector *qiov)
1609{
c5fbe571 1610 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 1611
c5fbe571
SH
1612 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1613}
1614
1615/*
1616 * Handle a write request in coroutine context
1617 */
1618static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1619 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1620{
1621 BlockDriver *drv = bs->drv;
dbffbdcf 1622 BdrvTrackedRequest req;
6b7cb247 1623 int ret;
da1fa91d
KW
1624
1625 if (!bs->drv) {
1626 return -ENOMEDIUM;
1627 }
1628 if (bs->read_only) {
1629 return -EACCES;
1630 }
1631 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1632 return -EIO;
1633 }
1634
98f90dba
ZYW
1635 /* throttling disk write I/O */
1636 if (bs->io_limits_enabled) {
1637 bdrv_io_limits_intercept(bs, true, nb_sectors);
1638 }
1639
f4658285
SH
1640 if (bs->copy_on_read) {
1641 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1642 }
1643
dbffbdcf
SH
1644 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1645
6b7cb247
SH
1646 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1647
da1fa91d
KW
1648 if (bs->dirty_bitmap) {
1649 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1650 }
1651
1652 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1653 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1654 }
1655
dbffbdcf
SH
1656 tracked_request_end(&req);
1657
6b7cb247 1658 return ret;
da1fa91d
KW
1659}
1660
c5fbe571
SH
1661int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1662 int nb_sectors, QEMUIOVector *qiov)
1663{
1664 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1665
1666 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1667}
1668
83f64091
FB
1669/**
1670 * Truncate file to 'offset' bytes (needed only for file protocols)
1671 */
1672int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1673{
1674 BlockDriver *drv = bs->drv;
51762288 1675 int ret;
83f64091 1676 if (!drv)
19cb3738 1677 return -ENOMEDIUM;
83f64091
FB
1678 if (!drv->bdrv_truncate)
1679 return -ENOTSUP;
59f2689d
NS
1680 if (bs->read_only)
1681 return -EACCES;
8591675f
MT
1682 if (bdrv_in_use(bs))
1683 return -EBUSY;
51762288
SH
1684 ret = drv->bdrv_truncate(bs, offset);
1685 if (ret == 0) {
1686 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 1687 bdrv_dev_resize_cb(bs);
51762288
SH
1688 }
1689 return ret;
83f64091
FB
1690}
1691
4a1d5e1f
FZ
1692/**
1693 * Length of a allocated file in bytes. Sparse files are counted by actual
1694 * allocated space. Return < 0 if error or unknown.
1695 */
1696int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1697{
1698 BlockDriver *drv = bs->drv;
1699 if (!drv) {
1700 return -ENOMEDIUM;
1701 }
1702 if (drv->bdrv_get_allocated_file_size) {
1703 return drv->bdrv_get_allocated_file_size(bs);
1704 }
1705 if (bs->file) {
1706 return bdrv_get_allocated_file_size(bs->file);
1707 }
1708 return -ENOTSUP;
1709}
1710
83f64091
FB
1711/**
1712 * Length of a file in bytes. Return < 0 if error or unknown.
1713 */
1714int64_t bdrv_getlength(BlockDriverState *bs)
1715{
1716 BlockDriver *drv = bs->drv;
1717 if (!drv)
19cb3738 1718 return -ENOMEDIUM;
51762288 1719
2c6942fa 1720 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
1721 if (drv->bdrv_getlength) {
1722 return drv->bdrv_getlength(bs);
1723 }
83f64091 1724 }
46a4e4e6 1725 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
1726}
1727
19cb3738 1728/* return 0 as number of sectors if no device present or error */
96b8f136 1729void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 1730{
19cb3738
FB
1731 int64_t length;
1732 length = bdrv_getlength(bs);
1733 if (length < 0)
1734 length = 0;
1735 else
6ea44308 1736 length = length >> BDRV_SECTOR_BITS;
19cb3738 1737 *nb_sectors_ptr = length;
fc01f7e7 1738}
cf98951b 1739
f3d54fc4
AL
1740struct partition {
1741 uint8_t boot_ind; /* 0x80 - active */
1742 uint8_t head; /* starting head */
1743 uint8_t sector; /* starting sector */
1744 uint8_t cyl; /* starting cylinder */
1745 uint8_t sys_ind; /* What partition type */
1746 uint8_t end_head; /* end head */
1747 uint8_t end_sector; /* end sector */
1748 uint8_t end_cyl; /* end cylinder */
1749 uint32_t start_sect; /* starting sector counting from 0 */
1750 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 1751} QEMU_PACKED;
f3d54fc4
AL
1752
1753/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1754static int guess_disk_lchs(BlockDriverState *bs,
1755 int *pcylinders, int *pheads, int *psectors)
1756{
eb5a3165 1757 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
1758 int ret, i, heads, sectors, cylinders;
1759 struct partition *p;
1760 uint32_t nr_sects;
a38131b6 1761 uint64_t nb_sectors;
f3d54fc4
AL
1762
1763 bdrv_get_geometry(bs, &nb_sectors);
1764
1765 ret = bdrv_read(bs, 0, buf, 1);
1766 if (ret < 0)
1767 return -1;
1768 /* test msdos magic */
1769 if (buf[510] != 0x55 || buf[511] != 0xaa)
1770 return -1;
1771 for(i = 0; i < 4; i++) {
1772 p = ((struct partition *)(buf + 0x1be)) + i;
1773 nr_sects = le32_to_cpu(p->nr_sects);
1774 if (nr_sects && p->end_head) {
1775 /* We make the assumption that the partition terminates on
1776 a cylinder boundary */
1777 heads = p->end_head + 1;
1778 sectors = p->end_sector & 63;
1779 if (sectors == 0)
1780 continue;
1781 cylinders = nb_sectors / (heads * sectors);
1782 if (cylinders < 1 || cylinders > 16383)
1783 continue;
1784 *pheads = heads;
1785 *psectors = sectors;
1786 *pcylinders = cylinders;
1787#if 0
1788 printf("guessed geometry: LCHS=%d %d %d\n",
1789 cylinders, heads, sectors);
1790#endif
1791 return 0;
1792 }
1793 }
1794 return -1;
1795}
1796
1797void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1798{
1799 int translation, lba_detected = 0;
1800 int cylinders, heads, secs;
a38131b6 1801 uint64_t nb_sectors;
f3d54fc4
AL
1802
1803 /* if a geometry hint is available, use it */
1804 bdrv_get_geometry(bs, &nb_sectors);
1805 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1806 translation = bdrv_get_translation_hint(bs);
1807 if (cylinders != 0) {
1808 *pcyls = cylinders;
1809 *pheads = heads;
1810 *psecs = secs;
1811 } else {
1812 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1813 if (heads > 16) {
1814 /* if heads > 16, it means that a BIOS LBA
1815 translation was active, so the default
1816 hardware geometry is OK */
1817 lba_detected = 1;
1818 goto default_geometry;
1819 } else {
1820 *pcyls = cylinders;
1821 *pheads = heads;
1822 *psecs = secs;
1823 /* disable any translation to be in sync with
1824 the logical geometry */
1825 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1826 bdrv_set_translation_hint(bs,
1827 BIOS_ATA_TRANSLATION_NONE);
1828 }
1829 }
1830 } else {
1831 default_geometry:
1832 /* if no geometry, use a standard physical disk geometry */
1833 cylinders = nb_sectors / (16 * 63);
1834
1835 if (cylinders > 16383)
1836 cylinders = 16383;
1837 else if (cylinders < 2)
1838 cylinders = 2;
1839 *pcyls = cylinders;
1840 *pheads = 16;
1841 *psecs = 63;
1842 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1843 if ((*pcyls * *pheads) <= 131072) {
1844 bdrv_set_translation_hint(bs,
1845 BIOS_ATA_TRANSLATION_LARGE);
1846 } else {
1847 bdrv_set_translation_hint(bs,
1848 BIOS_ATA_TRANSLATION_LBA);
1849 }
1850 }
1851 }
1852 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1853 }
1854}
1855
5fafdf24 1856void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
1857 int cyls, int heads, int secs)
1858{
1859 bs->cyls = cyls;
1860 bs->heads = heads;
1861 bs->secs = secs;
1862}
1863
46d4767d
FB
1864void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1865{
1866 bs->translation = translation;
1867}
1868
5fafdf24 1869void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
1870 int *pcyls, int *pheads, int *psecs)
1871{
1872 *pcyls = bs->cyls;
1873 *pheads = bs->heads;
1874 *psecs = bs->secs;
1875}
1876
0563e191
ZYW
1877/* throttling disk io limits */
1878void bdrv_set_io_limits(BlockDriverState *bs,
1879 BlockIOLimit *io_limits)
1880{
1881 bs->io_limits = *io_limits;
1882 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
1883}
1884
5bbdbb46
BS
1885/* Recognize floppy formats */
1886typedef struct FDFormat {
1887 FDriveType drive;
1888 uint8_t last_sect;
1889 uint8_t max_track;
1890 uint8_t max_head;
1891} FDFormat;
1892
1893static const FDFormat fd_formats[] = {
1894 /* First entry is default format */
1895 /* 1.44 MB 3"1/2 floppy disks */
1896 { FDRIVE_DRV_144, 18, 80, 1, },
1897 { FDRIVE_DRV_144, 20, 80, 1, },
1898 { FDRIVE_DRV_144, 21, 80, 1, },
1899 { FDRIVE_DRV_144, 21, 82, 1, },
1900 { FDRIVE_DRV_144, 21, 83, 1, },
1901 { FDRIVE_DRV_144, 22, 80, 1, },
1902 { FDRIVE_DRV_144, 23, 80, 1, },
1903 { FDRIVE_DRV_144, 24, 80, 1, },
1904 /* 2.88 MB 3"1/2 floppy disks */
1905 { FDRIVE_DRV_288, 36, 80, 1, },
1906 { FDRIVE_DRV_288, 39, 80, 1, },
1907 { FDRIVE_DRV_288, 40, 80, 1, },
1908 { FDRIVE_DRV_288, 44, 80, 1, },
1909 { FDRIVE_DRV_288, 48, 80, 1, },
1910 /* 720 kB 3"1/2 floppy disks */
1911 { FDRIVE_DRV_144, 9, 80, 1, },
1912 { FDRIVE_DRV_144, 10, 80, 1, },
1913 { FDRIVE_DRV_144, 10, 82, 1, },
1914 { FDRIVE_DRV_144, 10, 83, 1, },
1915 { FDRIVE_DRV_144, 13, 80, 1, },
1916 { FDRIVE_DRV_144, 14, 80, 1, },
1917 /* 1.2 MB 5"1/4 floppy disks */
1918 { FDRIVE_DRV_120, 15, 80, 1, },
1919 { FDRIVE_DRV_120, 18, 80, 1, },
1920 { FDRIVE_DRV_120, 18, 82, 1, },
1921 { FDRIVE_DRV_120, 18, 83, 1, },
1922 { FDRIVE_DRV_120, 20, 80, 1, },
1923 /* 720 kB 5"1/4 floppy disks */
1924 { FDRIVE_DRV_120, 9, 80, 1, },
1925 { FDRIVE_DRV_120, 11, 80, 1, },
1926 /* 360 kB 5"1/4 floppy disks */
1927 { FDRIVE_DRV_120, 9, 40, 1, },
1928 { FDRIVE_DRV_120, 9, 40, 0, },
1929 { FDRIVE_DRV_120, 10, 41, 1, },
1930 { FDRIVE_DRV_120, 10, 42, 1, },
1931 /* 320 kB 5"1/4 floppy disks */
1932 { FDRIVE_DRV_120, 8, 40, 1, },
1933 { FDRIVE_DRV_120, 8, 40, 0, },
1934 /* 360 kB must match 5"1/4 better than 3"1/2... */
1935 { FDRIVE_DRV_144, 9, 80, 0, },
1936 /* end */
1937 { FDRIVE_DRV_NONE, -1, -1, 0, },
1938};
1939
1940void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1941 int *max_track, int *last_sect,
1942 FDriveType drive_in, FDriveType *drive)
1943{
1944 const FDFormat *parse;
1945 uint64_t nb_sectors, size;
1946 int i, first_match, match;
1947
1948 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1949 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1950 /* User defined disk */
1951 } else {
1952 bdrv_get_geometry(bs, &nb_sectors);
1953 match = -1;
1954 first_match = -1;
1955 for (i = 0; ; i++) {
1956 parse = &fd_formats[i];
1957 if (parse->drive == FDRIVE_DRV_NONE) {
1958 break;
1959 }
1960 if (drive_in == parse->drive ||
1961 drive_in == FDRIVE_DRV_NONE) {
1962 size = (parse->max_head + 1) * parse->max_track *
1963 parse->last_sect;
1964 if (nb_sectors == size) {
1965 match = i;
1966 break;
1967 }
1968 if (first_match == -1) {
1969 first_match = i;
1970 }
1971 }
1972 }
1973 if (match == -1) {
1974 if (first_match == -1) {
1975 match = 1;
1976 } else {
1977 match = first_match;
1978 }
1979 parse = &fd_formats[match];
1980 }
1981 *nb_heads = parse->max_head + 1;
1982 *max_track = parse->max_track;
1983 *last_sect = parse->last_sect;
1984 *drive = parse->drive;
1985 }
1986}
1987
46d4767d
FB
1988int bdrv_get_translation_hint(BlockDriverState *bs)
1989{
1990 return bs->translation;
1991}
1992
abd7f68d
MA
1993void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1994 BlockErrorAction on_write_error)
1995{
1996 bs->on_read_error = on_read_error;
1997 bs->on_write_error = on_write_error;
1998}
1999
2000BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
2001{
2002 return is_read ? bs->on_read_error : bs->on_write_error;
2003}
2004
b338082b
FB
2005int bdrv_is_read_only(BlockDriverState *bs)
2006{
2007 return bs->read_only;
2008}
2009
985a03b0
TS
2010int bdrv_is_sg(BlockDriverState *bs)
2011{
2012 return bs->sg;
2013}
2014
e900a7b7
CH
2015int bdrv_enable_write_cache(BlockDriverState *bs)
2016{
2017 return bs->enable_write_cache;
2018}
2019
ea2384d3
FB
2020int bdrv_is_encrypted(BlockDriverState *bs)
2021{
2022 if (bs->backing_hd && bs->backing_hd->encrypted)
2023 return 1;
2024 return bs->encrypted;
2025}
2026
c0f4ce77
AL
2027int bdrv_key_required(BlockDriverState *bs)
2028{
2029 BlockDriverState *backing_hd = bs->backing_hd;
2030
2031 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2032 return 1;
2033 return (bs->encrypted && !bs->valid_key);
2034}
2035
ea2384d3
FB
2036int bdrv_set_key(BlockDriverState *bs, const char *key)
2037{
2038 int ret;
2039 if (bs->backing_hd && bs->backing_hd->encrypted) {
2040 ret = bdrv_set_key(bs->backing_hd, key);
2041 if (ret < 0)
2042 return ret;
2043 if (!bs->encrypted)
2044 return 0;
2045 }
fd04a2ae
SH
2046 if (!bs->encrypted) {
2047 return -EINVAL;
2048 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2049 return -ENOMEDIUM;
2050 }
c0f4ce77 2051 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
2052 if (ret < 0) {
2053 bs->valid_key = 0;
2054 } else if (!bs->valid_key) {
2055 bs->valid_key = 1;
2056 /* call the change callback now, we skipped it on open */
7d4b4ba5 2057 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 2058 }
c0f4ce77 2059 return ret;
ea2384d3
FB
2060}
2061
2062void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2063{
19cb3738 2064 if (!bs->drv) {
ea2384d3
FB
2065 buf[0] = '\0';
2066 } else {
2067 pstrcpy(buf, buf_size, bs->drv->format_name);
2068 }
2069}
2070
5fafdf24 2071void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
2072 void *opaque)
2073{
2074 BlockDriver *drv;
2075
8a22f02a 2076 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
2077 it(opaque, drv->format_name);
2078 }
2079}
2080
b338082b
FB
2081BlockDriverState *bdrv_find(const char *name)
2082{
2083 BlockDriverState *bs;
2084
1b7bdbc1
SH
2085 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2086 if (!strcmp(name, bs->device_name)) {
b338082b 2087 return bs;
1b7bdbc1 2088 }
b338082b
FB
2089 }
2090 return NULL;
2091}
2092
2f399b0a
MA
2093BlockDriverState *bdrv_next(BlockDriverState *bs)
2094{
2095 if (!bs) {
2096 return QTAILQ_FIRST(&bdrv_states);
2097 }
2098 return QTAILQ_NEXT(bs, list);
2099}
2100
51de9760 2101void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
2102{
2103 BlockDriverState *bs;
2104
1b7bdbc1 2105 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 2106 it(opaque, bs);
81d0912d
FB
2107 }
2108}
2109
ea2384d3
FB
2110const char *bdrv_get_device_name(BlockDriverState *bs)
2111{
2112 return bs->device_name;
2113}
2114
c6ca28d6
AL
2115void bdrv_flush_all(void)
2116{
2117 BlockDriverState *bs;
2118
1b7bdbc1 2119 QTAILQ_FOREACH(bs, &bdrv_states, list) {
c602a489 2120 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
c6ca28d6 2121 bdrv_flush(bs);
1b7bdbc1
SH
2122 }
2123 }
c6ca28d6
AL
2124}
2125
f2feebbd
KW
2126int bdrv_has_zero_init(BlockDriverState *bs)
2127{
2128 assert(bs->drv);
2129
336c1c12
KW
2130 if (bs->drv->bdrv_has_zero_init) {
2131 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
2132 }
2133
2134 return 1;
2135}
2136
376ae3f1
SH
2137typedef struct BdrvCoIsAllocatedData {
2138 BlockDriverState *bs;
2139 int64_t sector_num;
2140 int nb_sectors;
2141 int *pnum;
2142 int ret;
2143 bool done;
2144} BdrvCoIsAllocatedData;
2145
f58c7b35
TS
2146/*
2147 * Returns true iff the specified sector is present in the disk image. Drivers
2148 * not implementing the functionality are assumed to not support backing files,
2149 * hence all their sectors are reported as allocated.
2150 *
bd9533e3
SH
2151 * If 'sector_num' is beyond the end of the disk image the return value is 0
2152 * and 'pnum' is set to 0.
2153 *
f58c7b35
TS
2154 * 'pnum' is set to the number of sectors (including and immediately following
2155 * the specified sector) that are known to be in the same
2156 * allocated/unallocated state.
2157 *
bd9533e3
SH
2158 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2159 * beyond the end of the disk image it will be clamped.
f58c7b35 2160 */
060f51c9
SH
2161int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2162 int nb_sectors, int *pnum)
f58c7b35 2163{
bd9533e3
SH
2164 int64_t n;
2165
2166 if (sector_num >= bs->total_sectors) {
2167 *pnum = 0;
2168 return 0;
2169 }
2170
2171 n = bs->total_sectors - sector_num;
2172 if (n < nb_sectors) {
2173 nb_sectors = n;
2174 }
2175
6aebab14 2176 if (!bs->drv->bdrv_co_is_allocated) {
bd9533e3 2177 *pnum = nb_sectors;
f58c7b35
TS
2178 return 1;
2179 }
6aebab14 2180
060f51c9
SH
2181 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2182}
2183
2184/* Coroutine wrapper for bdrv_is_allocated() */
2185static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2186{
2187 BdrvCoIsAllocatedData *data = opaque;
2188 BlockDriverState *bs = data->bs;
2189
2190 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2191 data->pnum);
2192 data->done = true;
2193}
2194
2195/*
2196 * Synchronous wrapper around bdrv_co_is_allocated().
2197 *
2198 * See bdrv_co_is_allocated() for details.
2199 */
2200int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2201 int *pnum)
2202{
6aebab14
SH
2203 Coroutine *co;
2204 BdrvCoIsAllocatedData data = {
2205 .bs = bs,
2206 .sector_num = sector_num,
2207 .nb_sectors = nb_sectors,
2208 .pnum = pnum,
2209 .done = false,
2210 };
2211
2212 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2213 qemu_coroutine_enter(co, &data);
2214 while (!data.done) {
2215 qemu_aio_wait();
2216 }
2217 return data.ret;
f58c7b35
TS
2218}
2219
2582bfed
LC
2220void bdrv_mon_event(const BlockDriverState *bdrv,
2221 BlockMonEventAction action, int is_read)
2222{
2223 QObject *data;
2224 const char *action_str;
2225
2226 switch (action) {
2227 case BDRV_ACTION_REPORT:
2228 action_str = "report";
2229 break;
2230 case BDRV_ACTION_IGNORE:
2231 action_str = "ignore";
2232 break;
2233 case BDRV_ACTION_STOP:
2234 action_str = "stop";
2235 break;
2236 default:
2237 abort();
2238 }
2239
2240 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2241 bdrv->device_name,
2242 action_str,
2243 is_read ? "read" : "write");
2244 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
2245
2246 qobject_decref(data);
2247}
2248
b2023818 2249BlockInfoList *qmp_query_block(Error **errp)
b338082b 2250{
b2023818 2251 BlockInfoList *head = NULL, *cur_item = NULL;
b338082b
FB
2252 BlockDriverState *bs;
2253
1b7bdbc1 2254 QTAILQ_FOREACH(bs, &bdrv_states, list) {
b2023818 2255 BlockInfoList *info = g_malloc0(sizeof(*info));
d15e5465 2256
b2023818
LC
2257 info->value = g_malloc0(sizeof(*info->value));
2258 info->value->device = g_strdup(bs->device_name);
2259 info->value->type = g_strdup("unknown");
2260 info->value->locked = bdrv_dev_is_medium_locked(bs);
2261 info->value->removable = bdrv_dev_has_removable_media(bs);
d15e5465 2262
e4def80b 2263 if (bdrv_dev_has_removable_media(bs)) {
b2023818
LC
2264 info->value->has_tray_open = true;
2265 info->value->tray_open = bdrv_dev_is_tray_open(bs);
e4def80b 2266 }
f04ef601
LC
2267
2268 if (bdrv_iostatus_is_enabled(bs)) {
b2023818
LC
2269 info->value->has_io_status = true;
2270 info->value->io_status = bs->iostatus;
f04ef601
LC
2271 }
2272
19cb3738 2273 if (bs->drv) {
b2023818
LC
2274 info->value->has_inserted = true;
2275 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2276 info->value->inserted->file = g_strdup(bs->filename);
2277 info->value->inserted->ro = bs->read_only;
2278 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2279 info->value->inserted->encrypted = bs->encrypted;
2280 if (bs->backing_file[0]) {
2281 info->value->inserted->has_backing_file = true;
2282 info->value->inserted->backing_file = g_strdup(bs->backing_file);
376253ec 2283 }
727f005e
ZYW
2284
2285 if (bs->io_limits_enabled) {
2286 info->value->inserted->bps =
2287 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2288 info->value->inserted->bps_rd =
2289 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2290 info->value->inserted->bps_wr =
2291 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2292 info->value->inserted->iops =
2293 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2294 info->value->inserted->iops_rd =
2295 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2296 info->value->inserted->iops_wr =
2297 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2298 }
b2023818 2299 }
d15e5465 2300
b2023818
LC
2301 /* XXX: waiting for the qapi to support GSList */
2302 if (!cur_item) {
2303 head = cur_item = info;
2304 } else {
2305 cur_item->next = info;
2306 cur_item = info;
b338082b 2307 }
b338082b 2308 }
d15e5465 2309
b2023818 2310 return head;
b338082b 2311}
a36e69dd 2312
f11f57e4
LC
2313/* Consider exposing this as a full fledged QMP command */
2314static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
2315{
2316 BlockStats *s;
2317
2318 s = g_malloc0(sizeof(*s));
2319
2320 if (bs->device_name[0]) {
2321 s->has_device = true;
2322 s->device = g_strdup(bs->device_name);
294cc35f
KW
2323 }
2324
f11f57e4
LC
2325 s->stats = g_malloc0(sizeof(*s->stats));
2326 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2327 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2328 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2329 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2330 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2331 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2332 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2333 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2334 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2335
294cc35f 2336 if (bs->file) {
f11f57e4
LC
2337 s->has_parent = true;
2338 s->parent = qmp_query_blockstat(bs->file, NULL);
294cc35f
KW
2339 }
2340
f11f57e4 2341 return s;
294cc35f
KW
2342}
2343
f11f57e4 2344BlockStatsList *qmp_query_blockstats(Error **errp)
218a536a 2345{
f11f57e4 2346 BlockStatsList *head = NULL, *cur_item = NULL;
a36e69dd
TS
2347 BlockDriverState *bs;
2348
1b7bdbc1 2349 QTAILQ_FOREACH(bs, &bdrv_states, list) {
f11f57e4
LC
2350 BlockStatsList *info = g_malloc0(sizeof(*info));
2351 info->value = qmp_query_blockstat(bs, NULL);
2352
2353 /* XXX: waiting for the qapi to support GSList */
2354 if (!cur_item) {
2355 head = cur_item = info;
2356 } else {
2357 cur_item->next = info;
2358 cur_item = info;
2359 }
a36e69dd 2360 }
218a536a 2361
f11f57e4 2362 return head;
a36e69dd 2363}
ea2384d3 2364
045df330
AL
2365const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2366{
2367 if (bs->backing_hd && bs->backing_hd->encrypted)
2368 return bs->backing_file;
2369 else if (bs->encrypted)
2370 return bs->filename;
2371 else
2372 return NULL;
2373}
2374
5fafdf24 2375void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
2376 char *filename, int filename_size)
2377{
3574c608 2378 pstrcpy(filename, filename_size, bs->backing_file);
83f64091
FB
2379}
2380
5fafdf24 2381int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
2382 const uint8_t *buf, int nb_sectors)
2383{
2384 BlockDriver *drv = bs->drv;
2385 if (!drv)
19cb3738 2386 return -ENOMEDIUM;
faea38e7
FB
2387 if (!drv->bdrv_write_compressed)
2388 return -ENOTSUP;
fbb7b4e0
KW
2389 if (bdrv_check_request(bs, sector_num, nb_sectors))
2390 return -EIO;
a55eb92c 2391
c6d22830 2392 if (bs->dirty_bitmap) {
7cd1e32a
LS
2393 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2394 }
a55eb92c 2395
faea38e7
FB
2396 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2397}
3b46e624 2398
faea38e7
FB
2399int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2400{
2401 BlockDriver *drv = bs->drv;
2402 if (!drv)
19cb3738 2403 return -ENOMEDIUM;
faea38e7
FB
2404 if (!drv->bdrv_get_info)
2405 return -ENOTSUP;
2406 memset(bdi, 0, sizeof(*bdi));
2407 return drv->bdrv_get_info(bs, bdi);
2408}
2409
45566e9c
CH
2410int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2411 int64_t pos, int size)
178e08a5
AL
2412{
2413 BlockDriver *drv = bs->drv;
2414 if (!drv)
2415 return -ENOMEDIUM;
7cdb1f6d
MK
2416 if (drv->bdrv_save_vmstate)
2417 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2418 if (bs->file)
2419 return bdrv_save_vmstate(bs->file, buf, pos, size);
2420 return -ENOTSUP;
178e08a5
AL
2421}
2422
45566e9c
CH
2423int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2424 int64_t pos, int size)
178e08a5
AL
2425{
2426 BlockDriver *drv = bs->drv;
2427 if (!drv)
2428 return -ENOMEDIUM;
7cdb1f6d
MK
2429 if (drv->bdrv_load_vmstate)
2430 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2431 if (bs->file)
2432 return bdrv_load_vmstate(bs->file, buf, pos, size);
2433 return -ENOTSUP;
178e08a5
AL
2434}
2435
8b9b0cc2
KW
2436void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2437{
2438 BlockDriver *drv = bs->drv;
2439
2440 if (!drv || !drv->bdrv_debug_event) {
2441 return;
2442 }
2443
2444 return drv->bdrv_debug_event(bs, event);
2445
2446}
2447
faea38e7
FB
2448/**************************************************************/
2449/* handling of snapshots */
2450
feeee5ac
MDCF
2451int bdrv_can_snapshot(BlockDriverState *bs)
2452{
2453 BlockDriver *drv = bs->drv;
07b70bfb 2454 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2455 return 0;
2456 }
2457
2458 if (!drv->bdrv_snapshot_create) {
2459 if (bs->file != NULL) {
2460 return bdrv_can_snapshot(bs->file);
2461 }
2462 return 0;
2463 }
2464
2465 return 1;
2466}
2467
199630b6
BS
2468int bdrv_is_snapshot(BlockDriverState *bs)
2469{
2470 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2471}
2472
f9092b10
MA
2473BlockDriverState *bdrv_snapshots(void)
2474{
2475 BlockDriverState *bs;
2476
3ac906f7 2477 if (bs_snapshots) {
f9092b10 2478 return bs_snapshots;
3ac906f7 2479 }
f9092b10
MA
2480
2481 bs = NULL;
2482 while ((bs = bdrv_next(bs))) {
2483 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2484 bs_snapshots = bs;
2485 return bs;
f9092b10
MA
2486 }
2487 }
2488 return NULL;
f9092b10
MA
2489}
2490
5fafdf24 2491int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2492 QEMUSnapshotInfo *sn_info)
2493{
2494 BlockDriver *drv = bs->drv;
2495 if (!drv)
19cb3738 2496 return -ENOMEDIUM;
7cdb1f6d
MK
2497 if (drv->bdrv_snapshot_create)
2498 return drv->bdrv_snapshot_create(bs, sn_info);
2499 if (bs->file)
2500 return bdrv_snapshot_create(bs->file, sn_info);
2501 return -ENOTSUP;
faea38e7
FB
2502}
2503
5fafdf24 2504int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2505 const char *snapshot_id)
2506{
2507 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2508 int ret, open_ret;
2509
faea38e7 2510 if (!drv)
19cb3738 2511 return -ENOMEDIUM;
7cdb1f6d
MK
2512 if (drv->bdrv_snapshot_goto)
2513 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2514
2515 if (bs->file) {
2516 drv->bdrv_close(bs);
2517 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2518 open_ret = drv->bdrv_open(bs, bs->open_flags);
2519 if (open_ret < 0) {
2520 bdrv_delete(bs->file);
2521 bs->drv = NULL;
2522 return open_ret;
2523 }
2524 return ret;
2525 }
2526
2527 return -ENOTSUP;
faea38e7
FB
2528}
2529
2530int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2531{
2532 BlockDriver *drv = bs->drv;
2533 if (!drv)
19cb3738 2534 return -ENOMEDIUM;
7cdb1f6d
MK
2535 if (drv->bdrv_snapshot_delete)
2536 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2537 if (bs->file)
2538 return bdrv_snapshot_delete(bs->file, snapshot_id);
2539 return -ENOTSUP;
faea38e7
FB
2540}
2541
5fafdf24 2542int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2543 QEMUSnapshotInfo **psn_info)
2544{
2545 BlockDriver *drv = bs->drv;
2546 if (!drv)
19cb3738 2547 return -ENOMEDIUM;
7cdb1f6d
MK
2548 if (drv->bdrv_snapshot_list)
2549 return drv->bdrv_snapshot_list(bs, psn_info);
2550 if (bs->file)
2551 return bdrv_snapshot_list(bs->file, psn_info);
2552 return -ENOTSUP;
faea38e7
FB
2553}
2554
51ef6727 2555int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2556 const char *snapshot_name)
2557{
2558 BlockDriver *drv = bs->drv;
2559 if (!drv) {
2560 return -ENOMEDIUM;
2561 }
2562 if (!bs->read_only) {
2563 return -EINVAL;
2564 }
2565 if (drv->bdrv_snapshot_load_tmp) {
2566 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2567 }
2568 return -ENOTSUP;
2569}
2570
faea38e7
FB
2571#define NB_SUFFIXES 4
2572
2573char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2574{
2575 static const char suffixes[NB_SUFFIXES] = "KMGT";
2576 int64_t base;
2577 int i;
2578
2579 if (size <= 999) {
2580 snprintf(buf, buf_size, "%" PRId64, size);
2581 } else {
2582 base = 1024;
2583 for(i = 0; i < NB_SUFFIXES; i++) {
2584 if (size < (10 * base)) {
5fafdf24 2585 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
2586 (double)size / base,
2587 suffixes[i]);
2588 break;
2589 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 2590 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
2591 ((size + (base >> 1)) / base),
2592 suffixes[i]);
2593 break;
2594 }
2595 base = base * 1024;
2596 }
2597 }
2598 return buf;
2599}
2600
2601char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2602{
2603 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
2604#ifdef _WIN32
2605 struct tm *ptm;
2606#else
faea38e7 2607 struct tm tm;
3b9f94e1 2608#endif
faea38e7
FB
2609 time_t ti;
2610 int64_t secs;
2611
2612 if (!sn) {
5fafdf24
TS
2613 snprintf(buf, buf_size,
2614 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2615 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2616 } else {
2617 ti = sn->date_sec;
3b9f94e1
FB
2618#ifdef _WIN32
2619 ptm = localtime(&ti);
2620 strftime(date_buf, sizeof(date_buf),
2621 "%Y-%m-%d %H:%M:%S", ptm);
2622#else
faea38e7
FB
2623 localtime_r(&ti, &tm);
2624 strftime(date_buf, sizeof(date_buf),
2625 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 2626#endif
faea38e7
FB
2627 secs = sn->vm_clock_nsec / 1000000000;
2628 snprintf(clock_buf, sizeof(clock_buf),
2629 "%02d:%02d:%02d.%03d",
2630 (int)(secs / 3600),
2631 (int)((secs / 60) % 60),
5fafdf24 2632 (int)(secs % 60),
faea38e7
FB
2633 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2634 snprintf(buf, buf_size,
5fafdf24 2635 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2636 sn->id_str, sn->name,
2637 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2638 date_buf,
2639 clock_buf);
2640 }
2641 return buf;
2642}
2643
ea2384d3 2644/**************************************************************/
83f64091 2645/* async I/Os */
ea2384d3 2646
3b69e4b9 2647BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 2648 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 2649 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 2650{
bbf0a440
SH
2651 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2652
b2a61371 2653 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2654 cb, opaque, false);
ea2384d3
FB
2655}
2656
f141eafe
AL
2657BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2658 QEMUIOVector *qiov, int nb_sectors,
2659 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 2660{
bbf0a440
SH
2661 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2662
1a6e115b 2663 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2664 cb, opaque, true);
83f64091
FB
2665}
2666
40b4f539
KW
2667
2668typedef struct MultiwriteCB {
2669 int error;
2670 int num_requests;
2671 int num_callbacks;
2672 struct {
2673 BlockDriverCompletionFunc *cb;
2674 void *opaque;
2675 QEMUIOVector *free_qiov;
2676 void *free_buf;
2677 } callbacks[];
2678} MultiwriteCB;
2679
2680static void multiwrite_user_cb(MultiwriteCB *mcb)
2681{
2682 int i;
2683
2684 for (i = 0; i < mcb->num_callbacks; i++) {
2685 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
2686 if (mcb->callbacks[i].free_qiov) {
2687 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2688 }
7267c094 2689 g_free(mcb->callbacks[i].free_qiov);
f8a83245 2690 qemu_vfree(mcb->callbacks[i].free_buf);
40b4f539
KW
2691 }
2692}
2693
2694static void multiwrite_cb(void *opaque, int ret)
2695{
2696 MultiwriteCB *mcb = opaque;
2697
6d519a5f
SH
2698 trace_multiwrite_cb(mcb, ret);
2699
cb6d3ca0 2700 if (ret < 0 && !mcb->error) {
40b4f539 2701 mcb->error = ret;
40b4f539
KW
2702 }
2703
2704 mcb->num_requests--;
2705 if (mcb->num_requests == 0) {
de189a1b 2706 multiwrite_user_cb(mcb);
7267c094 2707 g_free(mcb);
40b4f539
KW
2708 }
2709}
2710
2711static int multiwrite_req_compare(const void *a, const void *b)
2712{
77be4366
CH
2713 const BlockRequest *req1 = a, *req2 = b;
2714
2715 /*
2716 * Note that we can't simply subtract req2->sector from req1->sector
2717 * here as that could overflow the return value.
2718 */
2719 if (req1->sector > req2->sector) {
2720 return 1;
2721 } else if (req1->sector < req2->sector) {
2722 return -1;
2723 } else {
2724 return 0;
2725 }
40b4f539
KW
2726}
2727
2728/*
2729 * Takes a bunch of requests and tries to merge them. Returns the number of
2730 * requests that remain after merging.
2731 */
2732static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2733 int num_reqs, MultiwriteCB *mcb)
2734{
2735 int i, outidx;
2736
2737 // Sort requests by start sector
2738 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2739
2740 // Check if adjacent requests touch the same clusters. If so, combine them,
2741 // filling up gaps with zero sectors.
2742 outidx = 0;
2743 for (i = 1; i < num_reqs; i++) {
2744 int merge = 0;
2745 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2746
2747 // This handles the cases that are valid for all block drivers, namely
2748 // exactly sequential writes and overlapping writes.
2749 if (reqs[i].sector <= oldreq_last) {
2750 merge = 1;
2751 }
2752
2753 // The block driver may decide that it makes sense to combine requests
2754 // even if there is a gap of some sectors between them. In this case,
2755 // the gap is filled with zeros (therefore only applicable for yet
2756 // unused space in format like qcow2).
2757 if (!merge && bs->drv->bdrv_merge_requests) {
2758 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2759 }
2760
e2a305fb
CH
2761 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2762 merge = 0;
2763 }
2764
40b4f539
KW
2765 if (merge) {
2766 size_t size;
7267c094 2767 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
2768 qemu_iovec_init(qiov,
2769 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2770
2771 // Add the first request to the merged one. If the requests are
2772 // overlapping, drop the last sectors of the first request.
2773 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2774 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2775
2776 // We might need to add some zeros between the two requests
2777 if (reqs[i].sector > oldreq_last) {
2778 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2779 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2780 memset(buf, 0, zero_bytes);
2781 qemu_iovec_add(qiov, buf, zero_bytes);
2782 mcb->callbacks[i].free_buf = buf;
2783 }
2784
2785 // Add the second request
2786 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2787
cbf1dff2 2788 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
2789 reqs[outidx].qiov = qiov;
2790
2791 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2792 } else {
2793 outidx++;
2794 reqs[outidx].sector = reqs[i].sector;
2795 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2796 reqs[outidx].qiov = reqs[i].qiov;
2797 }
2798 }
2799
2800 return outidx + 1;
2801}
2802
2803/*
2804 * Submit multiple AIO write requests at once.
2805 *
2806 * On success, the function returns 0 and all requests in the reqs array have
2807 * been submitted. In error case this function returns -1, and any of the
2808 * requests may or may not be submitted yet. In particular, this means that the
2809 * callback will be called for some of the requests, for others it won't. The
2810 * caller must check the error field of the BlockRequest to wait for the right
2811 * callbacks (if error != 0, no callback will be called).
2812 *
2813 * The implementation may modify the contents of the reqs array, e.g. to merge
2814 * requests. However, the fields opaque and error are left unmodified as they
2815 * are used to signal failure for a single request to the caller.
2816 */
2817int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2818{
40b4f539
KW
2819 MultiwriteCB *mcb;
2820 int i;
2821
301db7c2
RH
2822 /* don't submit writes if we don't have a medium */
2823 if (bs->drv == NULL) {
2824 for (i = 0; i < num_reqs; i++) {
2825 reqs[i].error = -ENOMEDIUM;
2826 }
2827 return -1;
2828 }
2829
40b4f539
KW
2830 if (num_reqs == 0) {
2831 return 0;
2832 }
2833
2834 // Create MultiwriteCB structure
7267c094 2835 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
2836 mcb->num_requests = 0;
2837 mcb->num_callbacks = num_reqs;
2838
2839 for (i = 0; i < num_reqs; i++) {
2840 mcb->callbacks[i].cb = reqs[i].cb;
2841 mcb->callbacks[i].opaque = reqs[i].opaque;
2842 }
2843
2844 // Check for mergable requests
2845 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2846
6d519a5f
SH
2847 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2848
df9309fb
PB
2849 /* Run the aio requests. */
2850 mcb->num_requests = num_reqs;
40b4f539 2851 for (i = 0; i < num_reqs; i++) {
ad54ae80 2852 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
40b4f539 2853 reqs[i].nb_sectors, multiwrite_cb, mcb);
40b4f539
KW
2854 }
2855
2856 return 0;
40b4f539
KW
2857}
2858
83f64091 2859void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 2860{
6bbff9a0 2861 acb->pool->cancel(acb);
83f64091
FB
2862}
2863
98f90dba
ZYW
2864/* block I/O throttling */
2865static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
2866 bool is_write, double elapsed_time, uint64_t *wait)
2867{
2868 uint64_t bps_limit = 0;
2869 double bytes_limit, bytes_base, bytes_res;
2870 double slice_time, wait_time;
2871
2872 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2873 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2874 } else if (bs->io_limits.bps[is_write]) {
2875 bps_limit = bs->io_limits.bps[is_write];
2876 } else {
2877 if (wait) {
2878 *wait = 0;
2879 }
2880
2881 return false;
2882 }
2883
2884 slice_time = bs->slice_end - bs->slice_start;
2885 slice_time /= (NANOSECONDS_PER_SECOND);
2886 bytes_limit = bps_limit * slice_time;
2887 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
2888 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2889 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
2890 }
2891
2892 /* bytes_base: the bytes of data which have been read/written; and
2893 * it is obtained from the history statistic info.
2894 * bytes_res: the remaining bytes of data which need to be read/written.
2895 * (bytes_base + bytes_res) / bps_limit: used to calcuate
2896 * the total time for completing reading/writting all data.
2897 */
2898 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2899
2900 if (bytes_base + bytes_res <= bytes_limit) {
2901 if (wait) {
2902 *wait = 0;
2903 }
2904
2905 return false;
2906 }
2907
2908 /* Calc approx time to dispatch */
2909 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
2910
2911 /* When the I/O rate at runtime exceeds the limits,
2912 * bs->slice_end need to be extended in order that the current statistic
2913 * info can be kept until the timer fire, so it is increased and tuned
2914 * based on the result of experiment.
2915 */
2916 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2917 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2918 if (wait) {
2919 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2920 }
2921
2922 return true;
2923}
2924
2925static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
2926 double elapsed_time, uint64_t *wait)
2927{
2928 uint64_t iops_limit = 0;
2929 double ios_limit, ios_base;
2930 double slice_time, wait_time;
2931
2932 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2933 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2934 } else if (bs->io_limits.iops[is_write]) {
2935 iops_limit = bs->io_limits.iops[is_write];
2936 } else {
2937 if (wait) {
2938 *wait = 0;
2939 }
2940
2941 return false;
2942 }
2943
2944 slice_time = bs->slice_end - bs->slice_start;
2945 slice_time /= (NANOSECONDS_PER_SECOND);
2946 ios_limit = iops_limit * slice_time;
2947 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
2948 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2949 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
2950 }
2951
2952 if (ios_base + 1 <= ios_limit) {
2953 if (wait) {
2954 *wait = 0;
2955 }
2956
2957 return false;
2958 }
2959
2960 /* Calc approx time to dispatch */
2961 wait_time = (ios_base + 1) / iops_limit;
2962 if (wait_time > elapsed_time) {
2963 wait_time = wait_time - elapsed_time;
2964 } else {
2965 wait_time = 0;
2966 }
2967
2968 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2969 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2970 if (wait) {
2971 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2972 }
2973
2974 return true;
2975}
2976
2977static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
2978 bool is_write, int64_t *wait)
2979{
2980 int64_t now, max_wait;
2981 uint64_t bps_wait = 0, iops_wait = 0;
2982 double elapsed_time;
2983 int bps_ret, iops_ret;
2984
2985 now = qemu_get_clock_ns(vm_clock);
2986 if ((bs->slice_start < now)
2987 && (bs->slice_end > now)) {
2988 bs->slice_end = now + bs->slice_time;
2989 } else {
2990 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
2991 bs->slice_start = now;
2992 bs->slice_end = now + bs->slice_time;
2993
2994 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
2995 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
2996
2997 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
2998 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
2999 }
3000
3001 elapsed_time = now - bs->slice_start;
3002 elapsed_time /= (NANOSECONDS_PER_SECOND);
3003
3004 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3005 is_write, elapsed_time, &bps_wait);
3006 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3007 elapsed_time, &iops_wait);
3008 if (bps_ret || iops_ret) {
3009 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3010 if (wait) {
3011 *wait = max_wait;
3012 }
3013
3014 now = qemu_get_clock_ns(vm_clock);
3015 if (bs->slice_end < now + max_wait) {
3016 bs->slice_end = now + max_wait;
3017 }
3018
3019 return true;
3020 }
3021
3022 if (wait) {
3023 *wait = 0;
3024 }
3025
3026 return false;
3027}
ce1a14dc 3028
83f64091
FB
3029/**************************************************************/
3030/* async block device emulation */
3031
c16b5a2c
CH
3032typedef struct BlockDriverAIOCBSync {
3033 BlockDriverAIOCB common;
3034 QEMUBH *bh;
3035 int ret;
3036 /* vector translation state */
3037 QEMUIOVector *qiov;
3038 uint8_t *bounce;
3039 int is_write;
3040} BlockDriverAIOCBSync;
3041
3042static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3043{
b666d239
KW
3044 BlockDriverAIOCBSync *acb =
3045 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 3046 qemu_bh_delete(acb->bh);
36afc451 3047 acb->bh = NULL;
c16b5a2c
CH
3048 qemu_aio_release(acb);
3049}
3050
3051static AIOPool bdrv_em_aio_pool = {
3052 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3053 .cancel = bdrv_aio_cancel_em,
3054};
3055
ce1a14dc 3056static void bdrv_aio_bh_cb(void *opaque)
83f64091 3057{
ce1a14dc 3058 BlockDriverAIOCBSync *acb = opaque;
f141eafe 3059
f141eafe
AL
3060 if (!acb->is_write)
3061 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 3062 qemu_vfree(acb->bounce);
ce1a14dc 3063 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 3064 qemu_bh_delete(acb->bh);
36afc451 3065 acb->bh = NULL;
ce1a14dc 3066 qemu_aio_release(acb);
83f64091 3067}
beac80cd 3068
f141eafe
AL
3069static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3070 int64_t sector_num,
3071 QEMUIOVector *qiov,
3072 int nb_sectors,
3073 BlockDriverCompletionFunc *cb,
3074 void *opaque,
3075 int is_write)
3076
83f64091 3077{
ce1a14dc 3078 BlockDriverAIOCBSync *acb;
ce1a14dc 3079
c16b5a2c 3080 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
3081 acb->is_write = is_write;
3082 acb->qiov = qiov;
e268ca52 3083 acb->bounce = qemu_blockalign(bs, qiov->size);
3f3aace8 3084 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
3085
3086 if (is_write) {
3087 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1ed20acf 3088 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 3089 } else {
1ed20acf 3090 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
3091 }
3092
ce1a14dc 3093 qemu_bh_schedule(acb->bh);
f141eafe 3094
ce1a14dc 3095 return &acb->common;
beac80cd
FB
3096}
3097
f141eafe
AL
3098static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3099 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 3100 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 3101{
f141eafe
AL
3102 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
3103}
83f64091 3104
f141eafe
AL
3105static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3106 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3107 BlockDriverCompletionFunc *cb, void *opaque)
3108{
3109 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 3110}
beac80cd 3111
68485420
KW
3112
3113typedef struct BlockDriverAIOCBCoroutine {
3114 BlockDriverAIOCB common;
3115 BlockRequest req;
3116 bool is_write;
3117 QEMUBH* bh;
3118} BlockDriverAIOCBCoroutine;
3119
3120static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3121{
3122 qemu_aio_flush();
3123}
3124
3125static AIOPool bdrv_em_co_aio_pool = {
3126 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3127 .cancel = bdrv_aio_co_cancel_em,
3128};
3129
35246a68 3130static void bdrv_co_em_bh(void *opaque)
68485420
KW
3131{
3132 BlockDriverAIOCBCoroutine *acb = opaque;
3133
3134 acb->common.cb(acb->common.opaque, acb->req.error);
3135 qemu_bh_delete(acb->bh);
3136 qemu_aio_release(acb);
3137}
3138
b2a61371
SH
3139/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3140static void coroutine_fn bdrv_co_do_rw(void *opaque)
3141{
3142 BlockDriverAIOCBCoroutine *acb = opaque;
3143 BlockDriverState *bs = acb->common.bs;
3144
3145 if (!acb->is_write) {
3146 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
3147 acb->req.nb_sectors, acb->req.qiov);
3148 } else {
3149 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
3150 acb->req.nb_sectors, acb->req.qiov);
3151 }
3152
35246a68 3153 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2a61371
SH
3154 qemu_bh_schedule(acb->bh);
3155}
3156
68485420
KW
3157static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3158 int64_t sector_num,
3159 QEMUIOVector *qiov,
3160 int nb_sectors,
3161 BlockDriverCompletionFunc *cb,
3162 void *opaque,
8c5873d6 3163 bool is_write)
68485420
KW
3164{
3165 Coroutine *co;
3166 BlockDriverAIOCBCoroutine *acb;
3167
3168 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3169 acb->req.sector = sector_num;
3170 acb->req.nb_sectors = nb_sectors;
3171 acb->req.qiov = qiov;
3172 acb->is_write = is_write;
3173
8c5873d6 3174 co = qemu_coroutine_create(bdrv_co_do_rw);
68485420
KW
3175 qemu_coroutine_enter(co, acb);
3176
3177 return &acb->common;
3178}
3179
07f07615 3180static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
b2e12bc6 3181{
07f07615
PB
3182 BlockDriverAIOCBCoroutine *acb = opaque;
3183 BlockDriverState *bs = acb->common.bs;
b2e12bc6 3184
07f07615
PB
3185 acb->req.error = bdrv_co_flush(bs);
3186 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2e12bc6 3187 qemu_bh_schedule(acb->bh);
b2e12bc6
CH
3188}
3189
07f07615 3190BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
016f5cf6
AG
3191 BlockDriverCompletionFunc *cb, void *opaque)
3192{
07f07615 3193 trace_bdrv_aio_flush(bs, opaque);
016f5cf6 3194
07f07615
PB
3195 Coroutine *co;
3196 BlockDriverAIOCBCoroutine *acb;
016f5cf6 3197
07f07615
PB
3198 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3199 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3200 qemu_coroutine_enter(co, acb);
016f5cf6 3201
016f5cf6
AG
3202 return &acb->common;
3203}
3204
4265d620
PB
3205static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3206{
3207 BlockDriverAIOCBCoroutine *acb = opaque;
3208 BlockDriverState *bs = acb->common.bs;
3209
3210 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3211 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3212 qemu_bh_schedule(acb->bh);
3213}
3214
3215BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3216 int64_t sector_num, int nb_sectors,
3217 BlockDriverCompletionFunc *cb, void *opaque)
3218{
3219 Coroutine *co;
3220 BlockDriverAIOCBCoroutine *acb;
3221
3222 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3223
3224 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3225 acb->req.sector = sector_num;
3226 acb->req.nb_sectors = nb_sectors;
3227 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3228 qemu_coroutine_enter(co, acb);
3229
3230 return &acb->common;
3231}
3232
ea2384d3
FB
3233void bdrv_init(void)
3234{
5efa9d5a 3235 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 3236}
ce1a14dc 3237
eb852011
MA
3238void bdrv_init_with_whitelist(void)
3239{
3240 use_bdrv_whitelist = 1;
3241 bdrv_init();
3242}
3243
c16b5a2c
CH
3244void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3245 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 3246{
ce1a14dc
PB
3247 BlockDriverAIOCB *acb;
3248
6bbff9a0
AL
3249 if (pool->free_aiocb) {
3250 acb = pool->free_aiocb;
3251 pool->free_aiocb = acb->next;
ce1a14dc 3252 } else {
7267c094 3253 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 3254 acb->pool = pool;
ce1a14dc
PB
3255 }
3256 acb->bs = bs;
3257 acb->cb = cb;
3258 acb->opaque = opaque;
3259 return acb;
3260}
3261
3262void qemu_aio_release(void *p)
3263{
6bbff9a0
AL
3264 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3265 AIOPool *pool = acb->pool;
3266 acb->next = pool->free_aiocb;
3267 pool->free_aiocb = acb;
ce1a14dc 3268}
19cb3738 3269
f9f05dc5
KW
3270/**************************************************************/
3271/* Coroutine block device emulation */
3272
3273typedef struct CoroutineIOCompletion {
3274 Coroutine *coroutine;
3275 int ret;
3276} CoroutineIOCompletion;
3277
3278static void bdrv_co_io_em_complete(void *opaque, int ret)
3279{
3280 CoroutineIOCompletion *co = opaque;
3281
3282 co->ret = ret;
3283 qemu_coroutine_enter(co->coroutine, NULL);
3284}
3285
3286static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3287 int nb_sectors, QEMUIOVector *iov,
3288 bool is_write)
3289{
3290 CoroutineIOCompletion co = {
3291 .coroutine = qemu_coroutine_self(),
3292 };
3293 BlockDriverAIOCB *acb;
3294
3295 if (is_write) {
a652d160
SH
3296 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3297 bdrv_co_io_em_complete, &co);
f9f05dc5 3298 } else {
a652d160
SH
3299 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3300 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
3301 }
3302
59370aaa 3303 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
3304 if (!acb) {
3305 return -EIO;
3306 }
3307 qemu_coroutine_yield();
3308
3309 return co.ret;
3310}
3311
3312static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3313 int64_t sector_num, int nb_sectors,
3314 QEMUIOVector *iov)
3315{
3316 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3317}
3318
3319static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3320 int64_t sector_num, int nb_sectors,
3321 QEMUIOVector *iov)
3322{
3323 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3324}
3325
07f07615 3326static void coroutine_fn bdrv_flush_co_entry(void *opaque)
e7a8a783 3327{
07f07615
PB
3328 RwCo *rwco = opaque;
3329
3330 rwco->ret = bdrv_co_flush(rwco->bs);
3331}
3332
3333int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3334{
eb489bb1
KW
3335 int ret;
3336
ca716364 3337 if (!bs->drv) {
07f07615 3338 return 0;
eb489bb1
KW
3339 }
3340
ca716364 3341 /* Write back cached data to the OS even with cache=unsafe */
eb489bb1
KW
3342 if (bs->drv->bdrv_co_flush_to_os) {
3343 ret = bs->drv->bdrv_co_flush_to_os(bs);
3344 if (ret < 0) {
3345 return ret;
3346 }
3347 }
3348
ca716364
KW
3349 /* But don't actually force it to the disk with cache=unsafe */
3350 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3351 return 0;
3352 }
3353
eb489bb1 3354 if (bs->drv->bdrv_co_flush_to_disk) {
c68b89ac 3355 return bs->drv->bdrv_co_flush_to_disk(bs);
07f07615
PB
3356 } else if (bs->drv->bdrv_aio_flush) {
3357 BlockDriverAIOCB *acb;
3358 CoroutineIOCompletion co = {
3359 .coroutine = qemu_coroutine_self(),
3360 };
3361
3362 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3363 if (acb == NULL) {
3364 return -EIO;
3365 } else {
3366 qemu_coroutine_yield();
3367 return co.ret;
3368 }
07f07615
PB
3369 } else {
3370 /*
3371 * Some block drivers always operate in either writethrough or unsafe
3372 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3373 * know how the server works (because the behaviour is hardcoded or
3374 * depends on server-side configuration), so we can't ensure that
3375 * everything is safe on disk. Returning an error doesn't work because
3376 * that would break guests even if the server operates in writethrough
3377 * mode.
3378 *
3379 * Let's hope the user knows what he's doing.
3380 */
3381 return 0;
3382 }
3383}
3384
0f15423c
AL
3385void bdrv_invalidate_cache(BlockDriverState *bs)
3386{
3387 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3388 bs->drv->bdrv_invalidate_cache(bs);
3389 }
3390}
3391
3392void bdrv_invalidate_cache_all(void)
3393{
3394 BlockDriverState *bs;
3395
3396 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3397 bdrv_invalidate_cache(bs);
3398 }
3399}
3400
07f07615
PB
3401int bdrv_flush(BlockDriverState *bs)
3402{
3403 Coroutine *co;
3404 RwCo rwco = {
3405 .bs = bs,
3406 .ret = NOT_DONE,
e7a8a783 3407 };
e7a8a783 3408
07f07615
PB
3409 if (qemu_in_coroutine()) {
3410 /* Fast-path if already in coroutine context */
3411 bdrv_flush_co_entry(&rwco);
3412 } else {
3413 co = qemu_coroutine_create(bdrv_flush_co_entry);
3414 qemu_coroutine_enter(co, &rwco);
3415 while (rwco.ret == NOT_DONE) {
3416 qemu_aio_wait();
3417 }
e7a8a783 3418 }
07f07615
PB
3419
3420 return rwco.ret;
e7a8a783
KW
3421}
3422
4265d620
PB
3423static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3424{
3425 RwCo *rwco = opaque;
3426
3427 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3428}
3429
3430int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3431 int nb_sectors)
3432{
3433 if (!bs->drv) {
3434 return -ENOMEDIUM;
3435 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3436 return -EIO;
3437 } else if (bs->read_only) {
3438 return -EROFS;
3439 } else if (bs->drv->bdrv_co_discard) {
3440 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3441 } else if (bs->drv->bdrv_aio_discard) {
3442 BlockDriverAIOCB *acb;
3443 CoroutineIOCompletion co = {
3444 .coroutine = qemu_coroutine_self(),
3445 };
3446
3447 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3448 bdrv_co_io_em_complete, &co);
3449 if (acb == NULL) {
3450 return -EIO;
3451 } else {
3452 qemu_coroutine_yield();
3453 return co.ret;
3454 }
4265d620
PB
3455 } else {
3456 return 0;
3457 }
3458}
3459
3460int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3461{
3462 Coroutine *co;
3463 RwCo rwco = {
3464 .bs = bs,
3465 .sector_num = sector_num,
3466 .nb_sectors = nb_sectors,
3467 .ret = NOT_DONE,
3468 };
3469
3470 if (qemu_in_coroutine()) {
3471 /* Fast-path if already in coroutine context */
3472 bdrv_discard_co_entry(&rwco);
3473 } else {
3474 co = qemu_coroutine_create(bdrv_discard_co_entry);
3475 qemu_coroutine_enter(co, &rwco);
3476 while (rwco.ret == NOT_DONE) {
3477 qemu_aio_wait();
3478 }
3479 }
3480
3481 return rwco.ret;
3482}
3483
19cb3738
FB
3484/**************************************************************/
3485/* removable device support */
3486
3487/**
3488 * Return TRUE if the media is present
3489 */
3490int bdrv_is_inserted(BlockDriverState *bs)
3491{
3492 BlockDriver *drv = bs->drv;
a1aff5bf 3493
19cb3738
FB
3494 if (!drv)
3495 return 0;
3496 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
3497 return 1;
3498 return drv->bdrv_is_inserted(bs);
19cb3738
FB
3499}
3500
3501/**
8e49ca46
MA
3502 * Return whether the media changed since the last call to this
3503 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3504 */
3505int bdrv_media_changed(BlockDriverState *bs)
3506{
3507 BlockDriver *drv = bs->drv;
19cb3738 3508
8e49ca46
MA
3509 if (drv && drv->bdrv_media_changed) {
3510 return drv->bdrv_media_changed(bs);
3511 }
3512 return -ENOTSUP;
19cb3738
FB
3513}
3514
3515/**
3516 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3517 */
fdec4404 3518void bdrv_eject(BlockDriverState *bs, int eject_flag)
19cb3738
FB
3519{
3520 BlockDriver *drv = bs->drv;
19cb3738 3521
822e1cd1
MA
3522 if (drv && drv->bdrv_eject) {
3523 drv->bdrv_eject(bs, eject_flag);
19cb3738
FB
3524 }
3525}
3526
19cb3738
FB
3527/**
3528 * Lock or unlock the media (if it is locked, the user won't be able
3529 * to eject it manually).
3530 */
025e849a 3531void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3532{
3533 BlockDriver *drv = bs->drv;
3534
025e849a 3535 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3536
025e849a
MA
3537 if (drv && drv->bdrv_lock_medium) {
3538 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3539 }
3540}
985a03b0
TS
3541
3542/* needed for generic scsi interface */
3543
3544int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3545{
3546 BlockDriver *drv = bs->drv;
3547
3548 if (drv && drv->bdrv_ioctl)
3549 return drv->bdrv_ioctl(bs, req, buf);
3550 return -ENOTSUP;
3551}
7d780669 3552
221f715d
AL
3553BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3554 unsigned long int req, void *buf,
3555 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3556{
221f715d 3557 BlockDriver *drv = bs->drv;
7d780669 3558
221f715d
AL
3559 if (drv && drv->bdrv_aio_ioctl)
3560 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3561 return NULL;
7d780669 3562}
e268ca52 3563
7b6f9300
MA
3564void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3565{
3566 bs->buffer_alignment = align;
3567}
7cd1e32a 3568
e268ca52
AL
3569void *qemu_blockalign(BlockDriverState *bs, size_t size)
3570{
3571 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3572}
7cd1e32a
LS
3573
3574void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3575{
3576 int64_t bitmap_size;
a55eb92c 3577
aaa0eb75 3578 bs->dirty_count = 0;
a55eb92c 3579 if (enable) {
c6d22830
JK
3580 if (!bs->dirty_bitmap) {
3581 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3582 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3583 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
a55eb92c 3584
7267c094 3585 bs->dirty_bitmap = g_malloc0(bitmap_size);
a55eb92c 3586 }
7cd1e32a 3587 } else {
c6d22830 3588 if (bs->dirty_bitmap) {
7267c094 3589 g_free(bs->dirty_bitmap);
c6d22830 3590 bs->dirty_bitmap = NULL;
a55eb92c 3591 }
7cd1e32a
LS
3592 }
3593}
3594
3595int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3596{
6ea44308 3597 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 3598
c6d22830
JK
3599 if (bs->dirty_bitmap &&
3600 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
3601 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3602 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a
LS
3603 } else {
3604 return 0;
3605 }
3606}
3607
a55eb92c
JK
3608void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3609 int nr_sectors)
7cd1e32a
LS
3610{
3611 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3612}
aaa0eb75
LS
3613
3614int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3615{
3616 return bs->dirty_count;
3617}
f88e1a42 3618
db593f25
MT
3619void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3620{
3621 assert(bs->in_use != in_use);
3622 bs->in_use = in_use;
3623}
3624
3625int bdrv_in_use(BlockDriverState *bs)
3626{
3627 return bs->in_use;
3628}
3629
28a7282a
LC
3630void bdrv_iostatus_enable(BlockDriverState *bs)
3631{
d6bf279e 3632 bs->iostatus_enabled = true;
58e21ef5 3633 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3634}
3635
3636/* The I/O status is only enabled if the drive explicitly
3637 * enables it _and_ the VM is configured to stop on errors */
3638bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3639{
d6bf279e 3640 return (bs->iostatus_enabled &&
28a7282a
LC
3641 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3642 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3643 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3644}
3645
3646void bdrv_iostatus_disable(BlockDriverState *bs)
3647{
d6bf279e 3648 bs->iostatus_enabled = false;
28a7282a
LC
3649}
3650
3651void bdrv_iostatus_reset(BlockDriverState *bs)
3652{
3653 if (bdrv_iostatus_is_enabled(bs)) {
58e21ef5 3654 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3655 }
3656}
3657
3658/* XXX: Today this is set by device models because it makes the implementation
3659 quite simple. However, the block layer knows about the error, so it's
3660 possible to implement this without device models being involved */
3661void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3662{
58e21ef5
LC
3663 if (bdrv_iostatus_is_enabled(bs) &&
3664 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
28a7282a 3665 assert(error >= 0);
58e21ef5
LC
3666 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3667 BLOCK_DEVICE_IO_STATUS_FAILED;
28a7282a
LC
3668 }
3669}
3670
a597e79c
CH
3671void
3672bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3673 enum BlockAcctType type)
3674{
3675 assert(type < BDRV_MAX_IOTYPE);
3676
3677 cookie->bytes = bytes;
c488c7f6 3678 cookie->start_time_ns = get_clock();
a597e79c
CH
3679 cookie->type = type;
3680}
3681
3682void
3683bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3684{
3685 assert(cookie->type < BDRV_MAX_IOTYPE);
3686
3687 bs->nr_bytes[cookie->type] += cookie->bytes;
3688 bs->nr_ops[cookie->type]++;
c488c7f6 3689 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
3690}
3691
f88e1a42
JS
3692int bdrv_img_create(const char *filename, const char *fmt,
3693 const char *base_filename, const char *base_fmt,
3694 char *options, uint64_t img_size, int flags)
3695{
3696 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 3697 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
3698 BlockDriverState *bs = NULL;
3699 BlockDriver *drv, *proto_drv;
96df67d1 3700 BlockDriver *backing_drv = NULL;
f88e1a42
JS
3701 int ret = 0;
3702
3703 /* Find driver and parse its options */
3704 drv = bdrv_find_format(fmt);
3705 if (!drv) {
3706 error_report("Unknown file format '%s'", fmt);
4f70f249 3707 ret = -EINVAL;
f88e1a42
JS
3708 goto out;
3709 }
3710
3711 proto_drv = bdrv_find_protocol(filename);
3712 if (!proto_drv) {
3713 error_report("Unknown protocol '%s'", filename);
4f70f249 3714 ret = -EINVAL;
f88e1a42
JS
3715 goto out;
3716 }
3717
3718 create_options = append_option_parameters(create_options,
3719 drv->create_options);
3720 create_options = append_option_parameters(create_options,
3721 proto_drv->create_options);
3722
3723 /* Create parameter list with default values */
3724 param = parse_option_parameters("", create_options, param);
3725
3726 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3727
3728 /* Parse -o options */
3729 if (options) {
3730 param = parse_option_parameters(options, create_options, param);
3731 if (param == NULL) {
3732 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 3733 ret = -EINVAL;
f88e1a42
JS
3734 goto out;
3735 }
3736 }
3737
3738 if (base_filename) {
3739 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3740 base_filename)) {
3741 error_report("Backing file not supported for file format '%s'",
3742 fmt);
4f70f249 3743 ret = -EINVAL;
f88e1a42
JS
3744 goto out;
3745 }
3746 }
3747
3748 if (base_fmt) {
3749 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3750 error_report("Backing file format not supported for file "
3751 "format '%s'", fmt);
4f70f249 3752 ret = -EINVAL;
f88e1a42
JS
3753 goto out;
3754 }
3755 }
3756
792da93a
JS
3757 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3758 if (backing_file && backing_file->value.s) {
3759 if (!strcmp(filename, backing_file->value.s)) {
3760 error_report("Error: Trying to create an image with the "
3761 "same filename as the backing file");
4f70f249 3762 ret = -EINVAL;
792da93a
JS
3763 goto out;
3764 }
3765 }
3766
f88e1a42
JS
3767 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3768 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
3769 backing_drv = bdrv_find_format(backing_fmt->value.s);
3770 if (!backing_drv) {
f88e1a42
JS
3771 error_report("Unknown backing file format '%s'",
3772 backing_fmt->value.s);
4f70f249 3773 ret = -EINVAL;
f88e1a42
JS
3774 goto out;
3775 }
3776 }
3777
3778 // The size for the image must always be specified, with one exception:
3779 // If we are using a backing file, we can obtain the size from there
d220894e
KW
3780 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3781 if (size && size->value.n == -1) {
f88e1a42
JS
3782 if (backing_file && backing_file->value.s) {
3783 uint64_t size;
f88e1a42
JS
3784 char buf[32];
3785
f88e1a42
JS
3786 bs = bdrv_new("");
3787
96df67d1 3788 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
f88e1a42 3789 if (ret < 0) {
96df67d1 3790 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
3791 goto out;
3792 }
3793 bdrv_get_geometry(bs, &size);
3794 size *= 512;
3795
3796 snprintf(buf, sizeof(buf), "%" PRId64, size);
3797 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3798 } else {
3799 error_report("Image creation needs a size parameter");
4f70f249 3800 ret = -EINVAL;
f88e1a42
JS
3801 goto out;
3802 }
3803 }
3804
3805 printf("Formatting '%s', fmt=%s ", filename, fmt);
3806 print_option_parameters(param);
3807 puts("");
3808
3809 ret = bdrv_create(drv, filename, param);
3810
3811 if (ret < 0) {
3812 if (ret == -ENOTSUP) {
3813 error_report("Formatting or formatting option not supported for "
3814 "file format '%s'", fmt);
3815 } else if (ret == -EFBIG) {
3816 error_report("The image size is too large for file format '%s'",
3817 fmt);
3818 } else {
3819 error_report("%s: error while creating %s: %s", filename, fmt,
3820 strerror(-ret));
3821 }
3822 }
3823
3824out:
3825 free_option_parameters(create_options);
3826 free_option_parameters(param);
3827
3828 if (bs) {
3829 bdrv_delete(bs);
3830 }
4f70f249
JS
3831
3832 return ret;
f88e1a42 3833}