]> git.proxmox.com Git - qemu.git/blame - block.c
block: implement bdrv_co_is_allocated() boundary cases
[qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
f795e743 30#include "qjson.h"
68485420 31#include "qemu-coroutine.h"
b2023818 32#include "qmp-commands.h"
0563e191 33#include "qemu-timer.h"
fc01f7e7 34
71e72a19 35#ifdef CONFIG_BSD
7674e7bf
FB
36#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
72cf2d4f 39#include <sys/queue.h>
c5e97233 40#ifndef __DragonFly__
7674e7bf
FB
41#include <sys/disk.h>
42#endif
c5e97233 43#endif
7674e7bf 44
49dc768d
AL
45#ifdef _WIN32
46#include <windows.h>
47#endif
48
1c9805a3
SH
49#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
7d4b4ba5 51static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
52static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
53 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 54 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
55static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
56 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 57 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
58static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
59 int64_t sector_num, int nb_sectors,
60 QEMUIOVector *iov);
61static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
62 int64_t sector_num, int nb_sectors,
63 QEMUIOVector *iov);
c5fbe571
SH
64static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
1c9805a3
SH
66static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
b2a61371
SH
68static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
69 int64_t sector_num,
70 QEMUIOVector *qiov,
71 int nb_sectors,
72 BlockDriverCompletionFunc *cb,
73 void *opaque,
8c5873d6 74 bool is_write);
b2a61371 75static void coroutine_fn bdrv_co_do_rw(void *opaque);
ec530c81 76
98f90dba
ZYW
77static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
78 bool is_write, double elapsed_time, uint64_t *wait);
79static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
80 double elapsed_time, uint64_t *wait);
81static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
82 bool is_write, int64_t *wait);
83
1b7bdbc1
SH
84static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
85 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 86
8a22f02a
SH
87static QLIST_HEAD(, BlockDriver) bdrv_drivers =
88 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 89
f9092b10
MA
90/* The device to use for VM snapshots */
91static BlockDriverState *bs_snapshots;
92
eb852011
MA
93/* If non-zero, use only whitelisted block drivers */
94static int use_bdrv_whitelist;
95
9e0b22f4
SH
96#ifdef _WIN32
97static int is_windows_drive_prefix(const char *filename)
98{
99 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
100 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
101 filename[1] == ':');
102}
103
104int is_windows_drive(const char *filename)
105{
106 if (is_windows_drive_prefix(filename) &&
107 filename[2] == '\0')
108 return 1;
109 if (strstart(filename, "\\\\.\\", NULL) ||
110 strstart(filename, "//./", NULL))
111 return 1;
112 return 0;
113}
114#endif
115
0563e191 116/* throttling disk I/O limits */
98f90dba
ZYW
117void bdrv_io_limits_disable(BlockDriverState *bs)
118{
119 bs->io_limits_enabled = false;
120
121 while (qemu_co_queue_next(&bs->throttled_reqs));
122
123 if (bs->block_timer) {
124 qemu_del_timer(bs->block_timer);
125 qemu_free_timer(bs->block_timer);
126 bs->block_timer = NULL;
127 }
128
129 bs->slice_start = 0;
130 bs->slice_end = 0;
131 bs->slice_time = 0;
132 memset(&bs->io_base, 0, sizeof(bs->io_base));
133}
134
0563e191
ZYW
135static void bdrv_block_timer(void *opaque)
136{
137 BlockDriverState *bs = opaque;
138
139 qemu_co_queue_next(&bs->throttled_reqs);
140}
141
142void bdrv_io_limits_enable(BlockDriverState *bs)
143{
144 qemu_co_queue_init(&bs->throttled_reqs);
145 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
146 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
147 bs->slice_start = qemu_get_clock_ns(vm_clock);
148 bs->slice_end = bs->slice_start + bs->slice_time;
149 memset(&bs->io_base, 0, sizeof(bs->io_base));
150 bs->io_limits_enabled = true;
151}
152
153bool bdrv_io_limits_enabled(BlockDriverState *bs)
154{
155 BlockIOLimit *io_limits = &bs->io_limits;
156 return io_limits->bps[BLOCK_IO_LIMIT_READ]
157 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
158 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
159 || io_limits->iops[BLOCK_IO_LIMIT_READ]
160 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
161 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
162}
163
98f90dba
ZYW
164static void bdrv_io_limits_intercept(BlockDriverState *bs,
165 bool is_write, int nb_sectors)
166{
167 int64_t wait_time = -1;
168
169 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
170 qemu_co_queue_wait(&bs->throttled_reqs);
171 }
172
173 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
174 * throttled requests will not be dequeued until the current request is
175 * allowed to be serviced. So if the current request still exceeds the
176 * limits, it will be inserted to the head. All requests followed it will
177 * be still in throttled_reqs queue.
178 */
179
180 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
181 qemu_mod_timer(bs->block_timer,
182 wait_time + qemu_get_clock_ns(vm_clock));
183 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
184 }
185
186 qemu_co_queue_next(&bs->throttled_reqs);
187}
188
9e0b22f4
SH
189/* check if the path starts with "<protocol>:" */
190static int path_has_protocol(const char *path)
191{
192#ifdef _WIN32
193 if (is_windows_drive(path) ||
194 is_windows_drive_prefix(path)) {
195 return 0;
196 }
197#endif
198
199 return strchr(path, ':') != NULL;
200}
201
83f64091 202int path_is_absolute(const char *path)
3b0d4f61 203{
83f64091 204 const char *p;
21664424
FB
205#ifdef _WIN32
206 /* specific case for names like: "\\.\d:" */
207 if (*path == '/' || *path == '\\')
208 return 1;
209#endif
83f64091
FB
210 p = strchr(path, ':');
211 if (p)
212 p++;
213 else
214 p = path;
3b9f94e1
FB
215#ifdef _WIN32
216 return (*p == '/' || *p == '\\');
217#else
218 return (*p == '/');
219#endif
3b0d4f61
FB
220}
221
83f64091
FB
222/* if filename is absolute, just copy it to dest. Otherwise, build a
223 path to it by considering it is relative to base_path. URL are
224 supported. */
225void path_combine(char *dest, int dest_size,
226 const char *base_path,
227 const char *filename)
3b0d4f61 228{
83f64091
FB
229 const char *p, *p1;
230 int len;
231
232 if (dest_size <= 0)
233 return;
234 if (path_is_absolute(filename)) {
235 pstrcpy(dest, dest_size, filename);
236 } else {
237 p = strchr(base_path, ':');
238 if (p)
239 p++;
240 else
241 p = base_path;
3b9f94e1
FB
242 p1 = strrchr(base_path, '/');
243#ifdef _WIN32
244 {
245 const char *p2;
246 p2 = strrchr(base_path, '\\');
247 if (!p1 || p2 > p1)
248 p1 = p2;
249 }
250#endif
83f64091
FB
251 if (p1)
252 p1++;
253 else
254 p1 = base_path;
255 if (p1 > p)
256 p = p1;
257 len = p - base_path;
258 if (len > dest_size - 1)
259 len = dest_size - 1;
260 memcpy(dest, base_path, len);
261 dest[len] = '\0';
262 pstrcat(dest, dest_size, filename);
3b0d4f61 263 }
3b0d4f61
FB
264}
265
5efa9d5a 266void bdrv_register(BlockDriver *bdrv)
ea2384d3 267{
8c5873d6
SH
268 /* Block drivers without coroutine functions need emulation */
269 if (!bdrv->bdrv_co_readv) {
f9f05dc5
KW
270 bdrv->bdrv_co_readv = bdrv_co_readv_em;
271 bdrv->bdrv_co_writev = bdrv_co_writev_em;
272
f8c35c1d
SH
273 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
274 * the block driver lacks aio we need to emulate that too.
275 */
f9f05dc5
KW
276 if (!bdrv->bdrv_aio_readv) {
277 /* add AIO emulation layer */
278 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
279 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
f9f05dc5 280 }
83f64091 281 }
b2e12bc6 282
8a22f02a 283 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 284}
b338082b
FB
285
286/* create a new block device (by default it is empty) */
287BlockDriverState *bdrv_new(const char *device_name)
288{
1b7bdbc1 289 BlockDriverState *bs;
b338082b 290
7267c094 291 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 292 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 293 if (device_name[0] != '\0') {
1b7bdbc1 294 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 295 }
28a7282a 296 bdrv_iostatus_disable(bs);
b338082b
FB
297 return bs;
298}
299
ea2384d3
FB
300BlockDriver *bdrv_find_format(const char *format_name)
301{
302 BlockDriver *drv1;
8a22f02a
SH
303 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
304 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 305 return drv1;
8a22f02a 306 }
ea2384d3
FB
307 }
308 return NULL;
309}
310
eb852011
MA
311static int bdrv_is_whitelisted(BlockDriver *drv)
312{
313 static const char *whitelist[] = {
314 CONFIG_BDRV_WHITELIST
315 };
316 const char **p;
317
318 if (!whitelist[0])
319 return 1; /* no whitelist, anything goes */
320
321 for (p = whitelist; *p; p++) {
322 if (!strcmp(drv->format_name, *p)) {
323 return 1;
324 }
325 }
326 return 0;
327}
328
329BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
330{
331 BlockDriver *drv = bdrv_find_format(format_name);
332 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
333}
334
0e7e1989
KW
335int bdrv_create(BlockDriver *drv, const char* filename,
336 QEMUOptionParameter *options)
ea2384d3
FB
337{
338 if (!drv->bdrv_create)
339 return -ENOTSUP;
0e7e1989
KW
340
341 return drv->bdrv_create(filename, options);
ea2384d3
FB
342}
343
84a12e66
CH
344int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
345{
346 BlockDriver *drv;
347
b50cbabc 348 drv = bdrv_find_protocol(filename);
84a12e66 349 if (drv == NULL) {
16905d71 350 return -ENOENT;
84a12e66
CH
351 }
352
353 return bdrv_create(drv, filename, options);
354}
355
d5249393 356#ifdef _WIN32
95389c86 357void get_tmp_filename(char *filename, int size)
d5249393 358{
3b9f94e1 359 char temp_dir[MAX_PATH];
3b46e624 360
3b9f94e1
FB
361 GetTempPath(MAX_PATH, temp_dir);
362 GetTempFileName(temp_dir, "qem", 0, filename);
d5249393
FB
363}
364#else
95389c86 365void get_tmp_filename(char *filename, int size)
fc01f7e7 366{
67b915a5 367 int fd;
7ccfb2eb 368 const char *tmpdir;
d5249393 369 /* XXX: race condition possible */
0badc1ee
AJ
370 tmpdir = getenv("TMPDIR");
371 if (!tmpdir)
372 tmpdir = "/tmp";
373 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
ea2384d3
FB
374 fd = mkstemp(filename);
375 close(fd);
376}
d5249393 377#endif
fc01f7e7 378
84a12e66
CH
379/*
380 * Detect host devices. By convention, /dev/cdrom[N] is always
381 * recognized as a host CDROM.
382 */
383static BlockDriver *find_hdev_driver(const char *filename)
384{
385 int score_max = 0, score;
386 BlockDriver *drv = NULL, *d;
387
388 QLIST_FOREACH(d, &bdrv_drivers, list) {
389 if (d->bdrv_probe_device) {
390 score = d->bdrv_probe_device(filename);
391 if (score > score_max) {
392 score_max = score;
393 drv = d;
394 }
395 }
396 }
397
398 return drv;
399}
400
b50cbabc 401BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
402{
403 BlockDriver *drv1;
404 char protocol[128];
1cec71e3 405 int len;
83f64091 406 const char *p;
19cb3738 407
66f82cee
KW
408 /* TODO Drivers without bdrv_file_open must be specified explicitly */
409
39508e7a
CH
410 /*
411 * XXX(hch): we really should not let host device detection
412 * override an explicit protocol specification, but moving this
413 * later breaks access to device names with colons in them.
414 * Thanks to the brain-dead persistent naming schemes on udev-
415 * based Linux systems those actually are quite common.
416 */
417 drv1 = find_hdev_driver(filename);
418 if (drv1) {
419 return drv1;
420 }
421
9e0b22f4 422 if (!path_has_protocol(filename)) {
39508e7a 423 return bdrv_find_format("file");
84a12e66 424 }
9e0b22f4
SH
425 p = strchr(filename, ':');
426 assert(p != NULL);
1cec71e3
AL
427 len = p - filename;
428 if (len > sizeof(protocol) - 1)
429 len = sizeof(protocol) - 1;
430 memcpy(protocol, filename, len);
431 protocol[len] = '\0';
8a22f02a 432 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 433 if (drv1->protocol_name &&
8a22f02a 434 !strcmp(drv1->protocol_name, protocol)) {
83f64091 435 return drv1;
8a22f02a 436 }
83f64091
FB
437 }
438 return NULL;
439}
440
c98ac35d 441static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
442{
443 int ret, score, score_max;
444 BlockDriver *drv1, *drv;
445 uint8_t buf[2048];
446 BlockDriverState *bs;
447
f5edb014 448 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
449 if (ret < 0) {
450 *pdrv = NULL;
451 return ret;
452 }
f8ea0b00 453
08a00559
KW
454 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
455 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 456 bdrv_delete(bs);
c98ac35d
SW
457 drv = bdrv_find_format("raw");
458 if (!drv) {
459 ret = -ENOENT;
460 }
461 *pdrv = drv;
462 return ret;
1a396859 463 }
f8ea0b00 464
83f64091
FB
465 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
466 bdrv_delete(bs);
467 if (ret < 0) {
c98ac35d
SW
468 *pdrv = NULL;
469 return ret;
83f64091
FB
470 }
471
ea2384d3 472 score_max = 0;
84a12e66 473 drv = NULL;
8a22f02a 474 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
475 if (drv1->bdrv_probe) {
476 score = drv1->bdrv_probe(buf, ret, filename);
477 if (score > score_max) {
478 score_max = score;
479 drv = drv1;
480 }
0849bf08 481 }
fc01f7e7 482 }
c98ac35d
SW
483 if (!drv) {
484 ret = -ENOENT;
485 }
486 *pdrv = drv;
487 return ret;
ea2384d3
FB
488}
489
51762288
SH
490/**
491 * Set the current 'total_sectors' value
492 */
493static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
494{
495 BlockDriver *drv = bs->drv;
496
396759ad
NB
497 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
498 if (bs->sg)
499 return 0;
500
51762288
SH
501 /* query actual device if possible, otherwise just trust the hint */
502 if (drv->bdrv_getlength) {
503 int64_t length = drv->bdrv_getlength(bs);
504 if (length < 0) {
505 return length;
506 }
507 hint = length >> BDRV_SECTOR_BITS;
508 }
509
510 bs->total_sectors = hint;
511 return 0;
512}
513
c3993cdc
SH
514/**
515 * Set open flags for a given cache mode
516 *
517 * Return 0 on success, -1 if the cache mode was invalid.
518 */
519int bdrv_parse_cache_flags(const char *mode, int *flags)
520{
521 *flags &= ~BDRV_O_CACHE_MASK;
522
523 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
524 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
525 } else if (!strcmp(mode, "directsync")) {
526 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
527 } else if (!strcmp(mode, "writeback")) {
528 *flags |= BDRV_O_CACHE_WB;
529 } else if (!strcmp(mode, "unsafe")) {
530 *flags |= BDRV_O_CACHE_WB;
531 *flags |= BDRV_O_NO_FLUSH;
532 } else if (!strcmp(mode, "writethrough")) {
533 /* this is the default */
534 } else {
535 return -1;
536 }
537
538 return 0;
539}
540
53fec9d3
SH
541/**
542 * The copy-on-read flag is actually a reference count so multiple users may
543 * use the feature without worrying about clobbering its previous state.
544 * Copy-on-read stays enabled until all users have called to disable it.
545 */
546void bdrv_enable_copy_on_read(BlockDriverState *bs)
547{
548 bs->copy_on_read++;
549}
550
551void bdrv_disable_copy_on_read(BlockDriverState *bs)
552{
553 assert(bs->copy_on_read > 0);
554 bs->copy_on_read--;
555}
556
57915332
KW
557/*
558 * Common part for opening disk images and files
559 */
560static int bdrv_open_common(BlockDriverState *bs, const char *filename,
561 int flags, BlockDriver *drv)
562{
563 int ret, open_flags;
564
565 assert(drv != NULL);
566
28dcee10
SH
567 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
568
66f82cee 569 bs->file = NULL;
51762288 570 bs->total_sectors = 0;
57915332
KW
571 bs->encrypted = 0;
572 bs->valid_key = 0;
03f541bd 573 bs->sg = 0;
57915332 574 bs->open_flags = flags;
03f541bd 575 bs->growable = 0;
57915332
KW
576 bs->buffer_alignment = 512;
577
53fec9d3
SH
578 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
579 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
580 bdrv_enable_copy_on_read(bs);
581 }
582
57915332 583 pstrcpy(bs->filename, sizeof(bs->filename), filename);
03f541bd 584 bs->backing_file[0] = '\0';
57915332
KW
585
586 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
587 return -ENOTSUP;
588 }
589
590 bs->drv = drv;
7267c094 591 bs->opaque = g_malloc0(drv->instance_size);
57915332 592
03f541bd 593 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
57915332
KW
594
595 /*
596 * Clear flags that are internal to the block layer before opening the
597 * image.
598 */
599 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
600
601 /*
ebabb67a 602 * Snapshots should be writable.
57915332
KW
603 */
604 if (bs->is_temporary) {
605 open_flags |= BDRV_O_RDWR;
606 }
607
e7c63796
SH
608 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
609
66f82cee
KW
610 /* Open the image, either directly or using a protocol */
611 if (drv->bdrv_file_open) {
612 ret = drv->bdrv_file_open(bs, filename, open_flags);
613 } else {
614 ret = bdrv_file_open(&bs->file, filename, open_flags);
615 if (ret >= 0) {
616 ret = drv->bdrv_open(bs, open_flags);
617 }
618 }
619
57915332
KW
620 if (ret < 0) {
621 goto free_and_fail;
622 }
623
51762288
SH
624 ret = refresh_total_sectors(bs, bs->total_sectors);
625 if (ret < 0) {
626 goto free_and_fail;
57915332 627 }
51762288 628
57915332
KW
629#ifndef _WIN32
630 if (bs->is_temporary) {
631 unlink(filename);
632 }
633#endif
634 return 0;
635
636free_and_fail:
66f82cee
KW
637 if (bs->file) {
638 bdrv_delete(bs->file);
639 bs->file = NULL;
640 }
7267c094 641 g_free(bs->opaque);
57915332
KW
642 bs->opaque = NULL;
643 bs->drv = NULL;
644 return ret;
645}
646
b6ce07aa
KW
647/*
648 * Opens a file using a protocol (file, host_device, nbd, ...)
649 */
83f64091 650int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 651{
83f64091 652 BlockDriverState *bs;
6db95603 653 BlockDriver *drv;
83f64091
FB
654 int ret;
655
b50cbabc 656 drv = bdrv_find_protocol(filename);
6db95603
CH
657 if (!drv) {
658 return -ENOENT;
659 }
660
83f64091 661 bs = bdrv_new("");
b6ce07aa 662 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
663 if (ret < 0) {
664 bdrv_delete(bs);
665 return ret;
3b0d4f61 666 }
71d0770c 667 bs->growable = 1;
83f64091
FB
668 *pbs = bs;
669 return 0;
670}
671
b6ce07aa
KW
672/*
673 * Opens a disk image (raw, qcow2, vmdk, ...)
674 */
d6e9098e
KW
675int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
676 BlockDriver *drv)
ea2384d3 677{
b6ce07aa 678 int ret;
2b572816 679 char tmp_filename[PATH_MAX];
712e7874 680
83f64091 681 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
682 BlockDriverState *bs1;
683 int64_t total_size;
7c96d46e 684 int is_protocol = 0;
91a073a9
KW
685 BlockDriver *bdrv_qcow2;
686 QEMUOptionParameter *options;
b6ce07aa 687 char backing_filename[PATH_MAX];
3b46e624 688
ea2384d3
FB
689 /* if snapshot, we create a temporary backing file and open it
690 instead of opening 'filename' directly */
33e3963e 691
ea2384d3
FB
692 /* if there is a backing file, use it */
693 bs1 = bdrv_new("");
d6e9098e 694 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 695 if (ret < 0) {
ea2384d3 696 bdrv_delete(bs1);
51d7c00c 697 return ret;
ea2384d3 698 }
3e82990b 699 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
700
701 if (bs1->drv && bs1->drv->protocol_name)
702 is_protocol = 1;
703
ea2384d3 704 bdrv_delete(bs1);
3b46e624 705
ea2384d3 706 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
7c96d46e
AL
707
708 /* Real path is meaningless for protocols */
709 if (is_protocol)
710 snprintf(backing_filename, sizeof(backing_filename),
711 "%s", filename);
114cdfa9
KS
712 else if (!realpath(filename, backing_filename))
713 return -errno;
7c96d46e 714
91a073a9
KW
715 bdrv_qcow2 = bdrv_find_format("qcow2");
716 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
717
3e82990b 718 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
719 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
720 if (drv) {
721 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
722 drv->format_name);
723 }
724
725 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 726 free_option_parameters(options);
51d7c00c
AL
727 if (ret < 0) {
728 return ret;
ea2384d3 729 }
91a073a9 730
ea2384d3 731 filename = tmp_filename;
91a073a9 732 drv = bdrv_qcow2;
ea2384d3
FB
733 bs->is_temporary = 1;
734 }
712e7874 735
b6ce07aa 736 /* Find the right image format driver */
6db95603 737 if (!drv) {
c98ac35d 738 ret = find_image_format(filename, &drv);
51d7c00c 739 }
6987307c 740
51d7c00c 741 if (!drv) {
51d7c00c 742 goto unlink_and_fail;
ea2384d3 743 }
b6ce07aa
KW
744
745 /* Open the image */
746 ret = bdrv_open_common(bs, filename, flags, drv);
747 if (ret < 0) {
6987307c
CH
748 goto unlink_and_fail;
749 }
750
b6ce07aa
KW
751 /* If there is a backing file, use it */
752 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
753 char backing_filename[PATH_MAX];
754 int back_flags;
755 BlockDriver *back_drv = NULL;
756
757 bs->backing_hd = bdrv_new("");
df2dbb4a
SH
758
759 if (path_has_protocol(bs->backing_file)) {
760 pstrcpy(backing_filename, sizeof(backing_filename),
761 bs->backing_file);
762 } else {
763 path_combine(backing_filename, sizeof(backing_filename),
764 filename, bs->backing_file);
765 }
766
767 if (bs->backing_format[0] != '\0') {
b6ce07aa 768 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 769 }
b6ce07aa
KW
770
771 /* backing files always opened read-only */
772 back_flags =
773 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
774
775 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
776 if (ret < 0) {
777 bdrv_close(bs);
778 return ret;
779 }
780 if (bs->is_temporary) {
781 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
782 } else {
783 /* base image inherits from "parent" */
784 bs->backing_hd->keep_read_only = bs->keep_read_only;
785 }
786 }
787
788 if (!bdrv_key_required(bs)) {
7d4b4ba5 789 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
790 }
791
98f90dba
ZYW
792 /* throttling disk I/O limits */
793 if (bs->io_limits_enabled) {
794 bdrv_io_limits_enable(bs);
795 }
796
b6ce07aa
KW
797 return 0;
798
799unlink_and_fail:
800 if (bs->is_temporary) {
801 unlink(filename);
802 }
803 return ret;
804}
805
fc01f7e7
FB
806void bdrv_close(BlockDriverState *bs)
807{
19cb3738 808 if (bs->drv) {
f9092b10
MA
809 if (bs == bs_snapshots) {
810 bs_snapshots = NULL;
811 }
557df6ac 812 if (bs->backing_hd) {
ea2384d3 813 bdrv_delete(bs->backing_hd);
557df6ac
SH
814 bs->backing_hd = NULL;
815 }
ea2384d3 816 bs->drv->bdrv_close(bs);
7267c094 817 g_free(bs->opaque);
ea2384d3
FB
818#ifdef _WIN32
819 if (bs->is_temporary) {
820 unlink(bs->filename);
821 }
67b915a5 822#endif
ea2384d3
FB
823 bs->opaque = NULL;
824 bs->drv = NULL;
53fec9d3 825 bs->copy_on_read = 0;
b338082b 826
66f82cee
KW
827 if (bs->file != NULL) {
828 bdrv_close(bs->file);
829 }
830
7d4b4ba5 831 bdrv_dev_change_media_cb(bs, false);
b338082b 832 }
98f90dba
ZYW
833
834 /*throttling disk I/O limits*/
835 if (bs->io_limits_enabled) {
836 bdrv_io_limits_disable(bs);
837 }
b338082b
FB
838}
839
2bc93fed
MK
840void bdrv_close_all(void)
841{
842 BlockDriverState *bs;
843
844 QTAILQ_FOREACH(bs, &bdrv_states, list) {
845 bdrv_close(bs);
846 }
847}
848
d22b2f41
RH
849/* make a BlockDriverState anonymous by removing from bdrv_state list.
850 Also, NULL terminate the device_name to prevent double remove */
851void bdrv_make_anon(BlockDriverState *bs)
852{
853 if (bs->device_name[0] != '\0') {
854 QTAILQ_REMOVE(&bdrv_states, bs, list);
855 }
856 bs->device_name[0] = '\0';
857}
858
b338082b
FB
859void bdrv_delete(BlockDriverState *bs)
860{
fa879d62 861 assert(!bs->dev);
18846dee 862
1b7bdbc1 863 /* remove from list, if necessary */
d22b2f41 864 bdrv_make_anon(bs);
34c6f050 865
b338082b 866 bdrv_close(bs);
66f82cee
KW
867 if (bs->file != NULL) {
868 bdrv_delete(bs->file);
869 }
870
f9092b10 871 assert(bs != bs_snapshots);
7267c094 872 g_free(bs);
fc01f7e7
FB
873}
874
fa879d62
MA
875int bdrv_attach_dev(BlockDriverState *bs, void *dev)
876/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 877{
fa879d62 878 if (bs->dev) {
18846dee
MA
879 return -EBUSY;
880 }
fa879d62 881 bs->dev = dev;
28a7282a 882 bdrv_iostatus_reset(bs);
18846dee
MA
883 return 0;
884}
885
fa879d62
MA
886/* TODO qdevified devices don't use this, remove when devices are qdevified */
887void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 888{
fa879d62
MA
889 if (bdrv_attach_dev(bs, dev) < 0) {
890 abort();
891 }
892}
893
894void bdrv_detach_dev(BlockDriverState *bs, void *dev)
895/* TODO change to DeviceState *dev when all users are qdevified */
896{
897 assert(bs->dev == dev);
898 bs->dev = NULL;
0e49de52
MA
899 bs->dev_ops = NULL;
900 bs->dev_opaque = NULL;
29e05f20 901 bs->buffer_alignment = 512;
18846dee
MA
902}
903
fa879d62
MA
904/* TODO change to return DeviceState * when all users are qdevified */
905void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 906{
fa879d62 907 return bs->dev;
18846dee
MA
908}
909
0e49de52
MA
910void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
911 void *opaque)
912{
913 bs->dev_ops = ops;
914 bs->dev_opaque = opaque;
2c6942fa
MA
915 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
916 bs_snapshots = NULL;
917 }
0e49de52
MA
918}
919
7d4b4ba5 920static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 921{
145feb17 922 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
7d4b4ba5 923 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
145feb17
MA
924 }
925}
926
2c6942fa
MA
927bool bdrv_dev_has_removable_media(BlockDriverState *bs)
928{
929 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
930}
931
025ccaa7
PB
932void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
933{
934 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
935 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
936 }
937}
938
e4def80b
MA
939bool bdrv_dev_is_tray_open(BlockDriverState *bs)
940{
941 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
942 return bs->dev_ops->is_tray_open(bs->dev_opaque);
943 }
944 return false;
945}
946
145feb17
MA
947static void bdrv_dev_resize_cb(BlockDriverState *bs)
948{
949 if (bs->dev_ops && bs->dev_ops->resize_cb) {
950 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
951 }
952}
953
f107639a
MA
954bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
955{
956 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
957 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
958 }
959 return false;
960}
961
e97fc193
AL
962/*
963 * Run consistency checks on an image
964 *
e076f338 965 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 966 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 967 * check are stored in res.
e97fc193 968 */
e076f338 969int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
e97fc193
AL
970{
971 if (bs->drv->bdrv_check == NULL) {
972 return -ENOTSUP;
973 }
974
e076f338 975 memset(res, 0, sizeof(*res));
9ac228e0 976 return bs->drv->bdrv_check(bs, res);
e97fc193
AL
977}
978
8a426614
KW
979#define COMMIT_BUF_SECTORS 2048
980
33e3963e
FB
981/* commit COW file into the raw image */
982int bdrv_commit(BlockDriverState *bs)
983{
19cb3738 984 BlockDriver *drv = bs->drv;
ee181196 985 BlockDriver *backing_drv;
8a426614
KW
986 int64_t sector, total_sectors;
987 int n, ro, open_flags;
4dca4b63 988 int ret = 0, rw_ret = 0;
8a426614 989 uint8_t *buf;
4dca4b63
NS
990 char filename[1024];
991 BlockDriverState *bs_rw, *bs_ro;
33e3963e 992
19cb3738
FB
993 if (!drv)
994 return -ENOMEDIUM;
4dca4b63
NS
995
996 if (!bs->backing_hd) {
997 return -ENOTSUP;
33e3963e
FB
998 }
999
4dca4b63
NS
1000 if (bs->backing_hd->keep_read_only) {
1001 return -EACCES;
1002 }
ee181196
KW
1003
1004 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
1005 ro = bs->backing_hd->read_only;
1006 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1007 open_flags = bs->backing_hd->open_flags;
1008
1009 if (ro) {
1010 /* re-open as RW */
1011 bdrv_delete(bs->backing_hd);
1012 bs->backing_hd = NULL;
1013 bs_rw = bdrv_new("");
ee181196
KW
1014 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1015 backing_drv);
4dca4b63
NS
1016 if (rw_ret < 0) {
1017 bdrv_delete(bs_rw);
1018 /* try to re-open read-only */
1019 bs_ro = bdrv_new("");
ee181196
KW
1020 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1021 backing_drv);
4dca4b63
NS
1022 if (ret < 0) {
1023 bdrv_delete(bs_ro);
1024 /* drive not functional anymore */
1025 bs->drv = NULL;
1026 return ret;
1027 }
1028 bs->backing_hd = bs_ro;
1029 return rw_ret;
1030 }
1031 bs->backing_hd = bs_rw;
ea2384d3 1032 }
33e3963e 1033
6ea44308 1034 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 1035 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
1036
1037 for (sector = 0; sector < total_sectors; sector += n) {
05c4af54 1038 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
8a426614
KW
1039
1040 if (bdrv_read(bs, sector, buf, n) != 0) {
1041 ret = -EIO;
1042 goto ro_cleanup;
1043 }
1044
1045 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1046 ret = -EIO;
1047 goto ro_cleanup;
1048 }
ea2384d3 1049 }
33e3963e 1050 }
95389c86 1051
1d44952f
CH
1052 if (drv->bdrv_make_empty) {
1053 ret = drv->bdrv_make_empty(bs);
1054 bdrv_flush(bs);
1055 }
95389c86 1056
3f5075ae
CH
1057 /*
1058 * Make sure all data we wrote to the backing device is actually
1059 * stable on disk.
1060 */
1061 if (bs->backing_hd)
1062 bdrv_flush(bs->backing_hd);
4dca4b63
NS
1063
1064ro_cleanup:
7267c094 1065 g_free(buf);
4dca4b63
NS
1066
1067 if (ro) {
1068 /* re-open as RO */
1069 bdrv_delete(bs->backing_hd);
1070 bs->backing_hd = NULL;
1071 bs_ro = bdrv_new("");
ee181196
KW
1072 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1073 backing_drv);
4dca4b63
NS
1074 if (ret < 0) {
1075 bdrv_delete(bs_ro);
1076 /* drive not functional anymore */
1077 bs->drv = NULL;
1078 return ret;
1079 }
1080 bs->backing_hd = bs_ro;
1081 bs->backing_hd->keep_read_only = 0;
1082 }
1083
1d44952f 1084 return ret;
33e3963e
FB
1085}
1086
6ab4b5ab
MA
1087void bdrv_commit_all(void)
1088{
1089 BlockDriverState *bs;
1090
1091 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1092 bdrv_commit(bs);
1093 }
1094}
1095
dbffbdcf
SH
1096struct BdrvTrackedRequest {
1097 BlockDriverState *bs;
1098 int64_t sector_num;
1099 int nb_sectors;
1100 bool is_write;
1101 QLIST_ENTRY(BdrvTrackedRequest) list;
f4658285 1102 CoQueue wait_queue; /* coroutines blocked on this request */
dbffbdcf
SH
1103};
1104
1105/**
1106 * Remove an active request from the tracked requests list
1107 *
1108 * This function should be called when a tracked request is completing.
1109 */
1110static void tracked_request_end(BdrvTrackedRequest *req)
1111{
1112 QLIST_REMOVE(req, list);
f4658285 1113 qemu_co_queue_restart_all(&req->wait_queue);
dbffbdcf
SH
1114}
1115
1116/**
1117 * Add an active request to the tracked requests list
1118 */
1119static void tracked_request_begin(BdrvTrackedRequest *req,
1120 BlockDriverState *bs,
1121 int64_t sector_num,
1122 int nb_sectors, bool is_write)
1123{
1124 *req = (BdrvTrackedRequest){
1125 .bs = bs,
1126 .sector_num = sector_num,
1127 .nb_sectors = nb_sectors,
1128 .is_write = is_write,
1129 };
1130
f4658285
SH
1131 qemu_co_queue_init(&req->wait_queue);
1132
dbffbdcf
SH
1133 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1134}
1135
d83947ac
SH
1136/**
1137 * Round a region to cluster boundaries
1138 */
1139static void round_to_clusters(BlockDriverState *bs,
1140 int64_t sector_num, int nb_sectors,
1141 int64_t *cluster_sector_num,
1142 int *cluster_nb_sectors)
1143{
1144 BlockDriverInfo bdi;
1145
1146 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1147 *cluster_sector_num = sector_num;
1148 *cluster_nb_sectors = nb_sectors;
1149 } else {
1150 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1151 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1152 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1153 nb_sectors, c);
1154 }
1155}
1156
f4658285
SH
1157static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1158 int64_t sector_num, int nb_sectors) {
d83947ac
SH
1159 /* aaaa bbbb */
1160 if (sector_num >= req->sector_num + req->nb_sectors) {
1161 return false;
1162 }
1163 /* bbbb aaaa */
1164 if (req->sector_num >= sector_num + nb_sectors) {
1165 return false;
1166 }
1167 return true;
f4658285
SH
1168}
1169
1170static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1171 int64_t sector_num, int nb_sectors)
1172{
1173 BdrvTrackedRequest *req;
d83947ac
SH
1174 int64_t cluster_sector_num;
1175 int cluster_nb_sectors;
f4658285
SH
1176 bool retry;
1177
d83947ac
SH
1178 /* If we touch the same cluster it counts as an overlap. This guarantees
1179 * that allocating writes will be serialized and not race with each other
1180 * for the same cluster. For example, in copy-on-read it ensures that the
1181 * CoR read and write operations are atomic and guest writes cannot
1182 * interleave between them.
1183 */
1184 round_to_clusters(bs, sector_num, nb_sectors,
1185 &cluster_sector_num, &cluster_nb_sectors);
1186
f4658285
SH
1187 do {
1188 retry = false;
1189 QLIST_FOREACH(req, &bs->tracked_requests, list) {
d83947ac
SH
1190 if (tracked_request_overlaps(req, cluster_sector_num,
1191 cluster_nb_sectors)) {
f4658285
SH
1192 qemu_co_queue_wait(&req->wait_queue);
1193 retry = true;
1194 break;
1195 }
1196 }
1197 } while (retry);
1198}
1199
756e6736
KW
1200/*
1201 * Return values:
1202 * 0 - success
1203 * -EINVAL - backing format specified, but no file
1204 * -ENOSPC - can't update the backing file because no space is left in the
1205 * image file header
1206 * -ENOTSUP - format driver doesn't support changing the backing file
1207 */
1208int bdrv_change_backing_file(BlockDriverState *bs,
1209 const char *backing_file, const char *backing_fmt)
1210{
1211 BlockDriver *drv = bs->drv;
1212
1213 if (drv->bdrv_change_backing_file != NULL) {
1214 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1215 } else {
1216 return -ENOTSUP;
1217 }
1218}
1219
71d0770c
AL
1220static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1221 size_t size)
1222{
1223 int64_t len;
1224
1225 if (!bdrv_is_inserted(bs))
1226 return -ENOMEDIUM;
1227
1228 if (bs->growable)
1229 return 0;
1230
1231 len = bdrv_getlength(bs);
1232
fbb7b4e0
KW
1233 if (offset < 0)
1234 return -EIO;
1235
1236 if ((offset > len) || (len - offset < size))
71d0770c
AL
1237 return -EIO;
1238
1239 return 0;
1240}
1241
1242static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1243 int nb_sectors)
1244{
eb5a3165
JS
1245 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1246 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1247}
1248
1c9805a3
SH
1249typedef struct RwCo {
1250 BlockDriverState *bs;
1251 int64_t sector_num;
1252 int nb_sectors;
1253 QEMUIOVector *qiov;
1254 bool is_write;
1255 int ret;
1256} RwCo;
1257
1258static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 1259{
1c9805a3 1260 RwCo *rwco = opaque;
ea2384d3 1261
1c9805a3
SH
1262 if (!rwco->is_write) {
1263 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1264 rwco->nb_sectors, rwco->qiov);
1265 } else {
1266 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1267 rwco->nb_sectors, rwco->qiov);
1268 }
1269}
e7a8a783 1270
1c9805a3
SH
1271/*
1272 * Process a synchronous request using coroutines
1273 */
1274static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1275 int nb_sectors, bool is_write)
1276{
1277 QEMUIOVector qiov;
1278 struct iovec iov = {
1279 .iov_base = (void *)buf,
1280 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1281 };
1282 Coroutine *co;
1283 RwCo rwco = {
1284 .bs = bs,
1285 .sector_num = sector_num,
1286 .nb_sectors = nb_sectors,
1287 .qiov = &qiov,
1288 .is_write = is_write,
1289 .ret = NOT_DONE,
1290 };
e7a8a783 1291
1c9805a3 1292 qemu_iovec_init_external(&qiov, &iov, 1);
e7a8a783 1293
1c9805a3
SH
1294 if (qemu_in_coroutine()) {
1295 /* Fast-path if already in coroutine context */
1296 bdrv_rw_co_entry(&rwco);
1297 } else {
1298 co = qemu_coroutine_create(bdrv_rw_co_entry);
1299 qemu_coroutine_enter(co, &rwco);
1300 while (rwco.ret == NOT_DONE) {
1301 qemu_aio_wait();
1302 }
1303 }
1304 return rwco.ret;
1305}
b338082b 1306
1c9805a3
SH
1307/* return < 0 if error. See bdrv_write() for the return codes */
1308int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1309 uint8_t *buf, int nb_sectors)
1310{
1311 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
fc01f7e7
FB
1312}
1313
7cd1e32a 1314static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1315 int nb_sectors, int dirty)
7cd1e32a 1316{
1317 int64_t start, end;
c6d22830 1318 unsigned long val, idx, bit;
a55eb92c 1319
6ea44308 1320 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1321 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1322
1323 for (; start <= end; start++) {
c6d22830
JK
1324 idx = start / (sizeof(unsigned long) * 8);
1325 bit = start % (sizeof(unsigned long) * 8);
1326 val = bs->dirty_bitmap[idx];
1327 if (dirty) {
6d59fec1 1328 if (!(val & (1UL << bit))) {
aaa0eb75 1329 bs->dirty_count++;
6d59fec1 1330 val |= 1UL << bit;
aaa0eb75 1331 }
c6d22830 1332 } else {
6d59fec1 1333 if (val & (1UL << bit)) {
aaa0eb75 1334 bs->dirty_count--;
6d59fec1 1335 val &= ~(1UL << bit);
aaa0eb75 1336 }
c6d22830
JK
1337 }
1338 bs->dirty_bitmap[idx] = val;
7cd1e32a 1339 }
1340}
1341
5fafdf24 1342/* Return < 0 if error. Important errors are:
19cb3738
FB
1343 -EIO generic I/O error (may happen for all errors)
1344 -ENOMEDIUM No media inserted.
1345 -EINVAL Invalid sector number or nb_sectors
1346 -EACCES Trying to write a read-only device
1347*/
5fafdf24 1348int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1349 const uint8_t *buf, int nb_sectors)
1350{
1c9805a3 1351 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
83f64091
FB
1352}
1353
eda578e5
AL
1354int bdrv_pread(BlockDriverState *bs, int64_t offset,
1355 void *buf, int count1)
83f64091 1356{
6ea44308 1357 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1358 int len, nb_sectors, count;
1359 int64_t sector_num;
9a8c4cce 1360 int ret;
83f64091
FB
1361
1362 count = count1;
1363 /* first read to align to sector start */
6ea44308 1364 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1365 if (len > count)
1366 len = count;
6ea44308 1367 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1368 if (len > 0) {
9a8c4cce
KW
1369 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1370 return ret;
6ea44308 1371 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1372 count -= len;
1373 if (count == 0)
1374 return count1;
1375 sector_num++;
1376 buf += len;
1377 }
1378
1379 /* read the sectors "in place" */
6ea44308 1380 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1381 if (nb_sectors > 0) {
9a8c4cce
KW
1382 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1383 return ret;
83f64091 1384 sector_num += nb_sectors;
6ea44308 1385 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1386 buf += len;
1387 count -= len;
1388 }
1389
1390 /* add data from the last sector */
1391 if (count > 0) {
9a8c4cce
KW
1392 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1393 return ret;
83f64091
FB
1394 memcpy(buf, tmp_buf, count);
1395 }
1396 return count1;
1397}
1398
eda578e5
AL
1399int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1400 const void *buf, int count1)
83f64091 1401{
6ea44308 1402 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1403 int len, nb_sectors, count;
1404 int64_t sector_num;
9a8c4cce 1405 int ret;
83f64091
FB
1406
1407 count = count1;
1408 /* first write to align to sector start */
6ea44308 1409 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1410 if (len > count)
1411 len = count;
6ea44308 1412 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1413 if (len > 0) {
9a8c4cce
KW
1414 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1415 return ret;
6ea44308 1416 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1417 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1418 return ret;
83f64091
FB
1419 count -= len;
1420 if (count == 0)
1421 return count1;
1422 sector_num++;
1423 buf += len;
1424 }
1425
1426 /* write the sectors "in place" */
6ea44308 1427 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1428 if (nb_sectors > 0) {
9a8c4cce
KW
1429 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1430 return ret;
83f64091 1431 sector_num += nb_sectors;
6ea44308 1432 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1433 buf += len;
1434 count -= len;
1435 }
1436
1437 /* add data from the last sector */
1438 if (count > 0) {
9a8c4cce
KW
1439 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1440 return ret;
83f64091 1441 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1442 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1443 return ret;
83f64091
FB
1444 }
1445 return count1;
1446}
83f64091 1447
f08145fe
KW
1448/*
1449 * Writes to the file and ensures that no writes are reordered across this
1450 * request (acts as a barrier)
1451 *
1452 * Returns 0 on success, -errno in error cases.
1453 */
1454int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1455 const void *buf, int count)
1456{
1457 int ret;
1458
1459 ret = bdrv_pwrite(bs, offset, buf, count);
1460 if (ret < 0) {
1461 return ret;
1462 }
1463
92196b2f
SH
1464 /* No flush needed for cache modes that use O_DSYNC */
1465 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
f08145fe
KW
1466 bdrv_flush(bs);
1467 }
1468
1469 return 0;
1470}
1471
ab185921
SH
1472static int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1473 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1474{
1475 /* Perform I/O through a temporary buffer so that users who scribble over
1476 * their read buffer while the operation is in progress do not end up
1477 * modifying the image file. This is critical for zero-copy guest I/O
1478 * where anything might happen inside guest memory.
1479 */
1480 void *bounce_buffer;
1481
1482 struct iovec iov;
1483 QEMUIOVector bounce_qiov;
1484 int64_t cluster_sector_num;
1485 int cluster_nb_sectors;
1486 size_t skip_bytes;
1487 int ret;
1488
1489 /* Cover entire cluster so no additional backing file I/O is required when
1490 * allocating cluster in the image file.
1491 */
1492 round_to_clusters(bs, sector_num, nb_sectors,
1493 &cluster_sector_num, &cluster_nb_sectors);
1494
1495 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors,
1496 cluster_sector_num, cluster_nb_sectors);
1497
1498 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1499 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1500 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1501
1502 ret = bs->drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1503 &bounce_qiov);
1504 if (ret < 0) {
1505 goto err;
1506 }
1507
1508 ret = bs->drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
1509 &bounce_qiov);
1510 if (ret < 0) {
1511 /* It might be okay to ignore write errors for guest requests. If this
1512 * is a deliberate copy-on-read then we don't want to ignore the error.
1513 * Simply report it in all cases.
1514 */
1515 goto err;
1516 }
1517
1518 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1519 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1520 nb_sectors * BDRV_SECTOR_SIZE);
1521
1522err:
1523 qemu_vfree(bounce_buffer);
1524 return ret;
1525}
1526
c5fbe571
SH
1527/*
1528 * Handle a read request in coroutine context
1529 */
1530static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1531 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
da1fa91d
KW
1532{
1533 BlockDriver *drv = bs->drv;
dbffbdcf
SH
1534 BdrvTrackedRequest req;
1535 int ret;
da1fa91d 1536
da1fa91d
KW
1537 if (!drv) {
1538 return -ENOMEDIUM;
1539 }
1540 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1541 return -EIO;
1542 }
1543
98f90dba
ZYW
1544 /* throttling disk read I/O */
1545 if (bs->io_limits_enabled) {
1546 bdrv_io_limits_intercept(bs, false, nb_sectors);
1547 }
1548
f4658285
SH
1549 if (bs->copy_on_read) {
1550 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1551 }
1552
dbffbdcf 1553 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
ab185921
SH
1554
1555 if (bs->copy_on_read) {
1556 int pnum;
1557
1558 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1559 if (ret < 0) {
1560 goto out;
1561 }
1562
1563 if (!ret || pnum != nb_sectors) {
1564 ret = bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, qiov);
1565 goto out;
1566 }
1567 }
1568
dbffbdcf 1569 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
ab185921
SH
1570
1571out:
dbffbdcf
SH
1572 tracked_request_end(&req);
1573 return ret;
da1fa91d
KW
1574}
1575
c5fbe571 1576int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
1577 int nb_sectors, QEMUIOVector *qiov)
1578{
c5fbe571 1579 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 1580
c5fbe571
SH
1581 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1582}
1583
1584/*
1585 * Handle a write request in coroutine context
1586 */
1587static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1588 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1589{
1590 BlockDriver *drv = bs->drv;
dbffbdcf 1591 BdrvTrackedRequest req;
6b7cb247 1592 int ret;
da1fa91d
KW
1593
1594 if (!bs->drv) {
1595 return -ENOMEDIUM;
1596 }
1597 if (bs->read_only) {
1598 return -EACCES;
1599 }
1600 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1601 return -EIO;
1602 }
1603
98f90dba
ZYW
1604 /* throttling disk write I/O */
1605 if (bs->io_limits_enabled) {
1606 bdrv_io_limits_intercept(bs, true, nb_sectors);
1607 }
1608
f4658285
SH
1609 if (bs->copy_on_read) {
1610 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1611 }
1612
dbffbdcf
SH
1613 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1614
6b7cb247
SH
1615 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1616
da1fa91d
KW
1617 if (bs->dirty_bitmap) {
1618 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1619 }
1620
1621 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1622 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1623 }
1624
dbffbdcf
SH
1625 tracked_request_end(&req);
1626
6b7cb247 1627 return ret;
da1fa91d
KW
1628}
1629
c5fbe571
SH
1630int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1631 int nb_sectors, QEMUIOVector *qiov)
1632{
1633 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1634
1635 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1636}
1637
83f64091
FB
1638/**
1639 * Truncate file to 'offset' bytes (needed only for file protocols)
1640 */
1641int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1642{
1643 BlockDriver *drv = bs->drv;
51762288 1644 int ret;
83f64091 1645 if (!drv)
19cb3738 1646 return -ENOMEDIUM;
83f64091
FB
1647 if (!drv->bdrv_truncate)
1648 return -ENOTSUP;
59f2689d
NS
1649 if (bs->read_only)
1650 return -EACCES;
8591675f
MT
1651 if (bdrv_in_use(bs))
1652 return -EBUSY;
51762288
SH
1653 ret = drv->bdrv_truncate(bs, offset);
1654 if (ret == 0) {
1655 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 1656 bdrv_dev_resize_cb(bs);
51762288
SH
1657 }
1658 return ret;
83f64091
FB
1659}
1660
4a1d5e1f
FZ
1661/**
1662 * Length of a allocated file in bytes. Sparse files are counted by actual
1663 * allocated space. Return < 0 if error or unknown.
1664 */
1665int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1666{
1667 BlockDriver *drv = bs->drv;
1668 if (!drv) {
1669 return -ENOMEDIUM;
1670 }
1671 if (drv->bdrv_get_allocated_file_size) {
1672 return drv->bdrv_get_allocated_file_size(bs);
1673 }
1674 if (bs->file) {
1675 return bdrv_get_allocated_file_size(bs->file);
1676 }
1677 return -ENOTSUP;
1678}
1679
83f64091
FB
1680/**
1681 * Length of a file in bytes. Return < 0 if error or unknown.
1682 */
1683int64_t bdrv_getlength(BlockDriverState *bs)
1684{
1685 BlockDriver *drv = bs->drv;
1686 if (!drv)
19cb3738 1687 return -ENOMEDIUM;
51762288 1688
2c6942fa 1689 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
1690 if (drv->bdrv_getlength) {
1691 return drv->bdrv_getlength(bs);
1692 }
83f64091 1693 }
46a4e4e6 1694 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
1695}
1696
19cb3738 1697/* return 0 as number of sectors if no device present or error */
96b8f136 1698void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 1699{
19cb3738
FB
1700 int64_t length;
1701 length = bdrv_getlength(bs);
1702 if (length < 0)
1703 length = 0;
1704 else
6ea44308 1705 length = length >> BDRV_SECTOR_BITS;
19cb3738 1706 *nb_sectors_ptr = length;
fc01f7e7 1707}
cf98951b 1708
f3d54fc4
AL
1709struct partition {
1710 uint8_t boot_ind; /* 0x80 - active */
1711 uint8_t head; /* starting head */
1712 uint8_t sector; /* starting sector */
1713 uint8_t cyl; /* starting cylinder */
1714 uint8_t sys_ind; /* What partition type */
1715 uint8_t end_head; /* end head */
1716 uint8_t end_sector; /* end sector */
1717 uint8_t end_cyl; /* end cylinder */
1718 uint32_t start_sect; /* starting sector counting from 0 */
1719 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 1720} QEMU_PACKED;
f3d54fc4
AL
1721
1722/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1723static int guess_disk_lchs(BlockDriverState *bs,
1724 int *pcylinders, int *pheads, int *psectors)
1725{
eb5a3165 1726 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
1727 int ret, i, heads, sectors, cylinders;
1728 struct partition *p;
1729 uint32_t nr_sects;
a38131b6 1730 uint64_t nb_sectors;
f3d54fc4
AL
1731
1732 bdrv_get_geometry(bs, &nb_sectors);
1733
1734 ret = bdrv_read(bs, 0, buf, 1);
1735 if (ret < 0)
1736 return -1;
1737 /* test msdos magic */
1738 if (buf[510] != 0x55 || buf[511] != 0xaa)
1739 return -1;
1740 for(i = 0; i < 4; i++) {
1741 p = ((struct partition *)(buf + 0x1be)) + i;
1742 nr_sects = le32_to_cpu(p->nr_sects);
1743 if (nr_sects && p->end_head) {
1744 /* We make the assumption that the partition terminates on
1745 a cylinder boundary */
1746 heads = p->end_head + 1;
1747 sectors = p->end_sector & 63;
1748 if (sectors == 0)
1749 continue;
1750 cylinders = nb_sectors / (heads * sectors);
1751 if (cylinders < 1 || cylinders > 16383)
1752 continue;
1753 *pheads = heads;
1754 *psectors = sectors;
1755 *pcylinders = cylinders;
1756#if 0
1757 printf("guessed geometry: LCHS=%d %d %d\n",
1758 cylinders, heads, sectors);
1759#endif
1760 return 0;
1761 }
1762 }
1763 return -1;
1764}
1765
1766void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1767{
1768 int translation, lba_detected = 0;
1769 int cylinders, heads, secs;
a38131b6 1770 uint64_t nb_sectors;
f3d54fc4
AL
1771
1772 /* if a geometry hint is available, use it */
1773 bdrv_get_geometry(bs, &nb_sectors);
1774 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1775 translation = bdrv_get_translation_hint(bs);
1776 if (cylinders != 0) {
1777 *pcyls = cylinders;
1778 *pheads = heads;
1779 *psecs = secs;
1780 } else {
1781 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1782 if (heads > 16) {
1783 /* if heads > 16, it means that a BIOS LBA
1784 translation was active, so the default
1785 hardware geometry is OK */
1786 lba_detected = 1;
1787 goto default_geometry;
1788 } else {
1789 *pcyls = cylinders;
1790 *pheads = heads;
1791 *psecs = secs;
1792 /* disable any translation to be in sync with
1793 the logical geometry */
1794 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1795 bdrv_set_translation_hint(bs,
1796 BIOS_ATA_TRANSLATION_NONE);
1797 }
1798 }
1799 } else {
1800 default_geometry:
1801 /* if no geometry, use a standard physical disk geometry */
1802 cylinders = nb_sectors / (16 * 63);
1803
1804 if (cylinders > 16383)
1805 cylinders = 16383;
1806 else if (cylinders < 2)
1807 cylinders = 2;
1808 *pcyls = cylinders;
1809 *pheads = 16;
1810 *psecs = 63;
1811 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1812 if ((*pcyls * *pheads) <= 131072) {
1813 bdrv_set_translation_hint(bs,
1814 BIOS_ATA_TRANSLATION_LARGE);
1815 } else {
1816 bdrv_set_translation_hint(bs,
1817 BIOS_ATA_TRANSLATION_LBA);
1818 }
1819 }
1820 }
1821 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1822 }
1823}
1824
5fafdf24 1825void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
1826 int cyls, int heads, int secs)
1827{
1828 bs->cyls = cyls;
1829 bs->heads = heads;
1830 bs->secs = secs;
1831}
1832
46d4767d
FB
1833void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1834{
1835 bs->translation = translation;
1836}
1837
5fafdf24 1838void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
1839 int *pcyls, int *pheads, int *psecs)
1840{
1841 *pcyls = bs->cyls;
1842 *pheads = bs->heads;
1843 *psecs = bs->secs;
1844}
1845
0563e191
ZYW
1846/* throttling disk io limits */
1847void bdrv_set_io_limits(BlockDriverState *bs,
1848 BlockIOLimit *io_limits)
1849{
1850 bs->io_limits = *io_limits;
1851 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
1852}
1853
5bbdbb46
BS
1854/* Recognize floppy formats */
1855typedef struct FDFormat {
1856 FDriveType drive;
1857 uint8_t last_sect;
1858 uint8_t max_track;
1859 uint8_t max_head;
1860} FDFormat;
1861
1862static const FDFormat fd_formats[] = {
1863 /* First entry is default format */
1864 /* 1.44 MB 3"1/2 floppy disks */
1865 { FDRIVE_DRV_144, 18, 80, 1, },
1866 { FDRIVE_DRV_144, 20, 80, 1, },
1867 { FDRIVE_DRV_144, 21, 80, 1, },
1868 { FDRIVE_DRV_144, 21, 82, 1, },
1869 { FDRIVE_DRV_144, 21, 83, 1, },
1870 { FDRIVE_DRV_144, 22, 80, 1, },
1871 { FDRIVE_DRV_144, 23, 80, 1, },
1872 { FDRIVE_DRV_144, 24, 80, 1, },
1873 /* 2.88 MB 3"1/2 floppy disks */
1874 { FDRIVE_DRV_288, 36, 80, 1, },
1875 { FDRIVE_DRV_288, 39, 80, 1, },
1876 { FDRIVE_DRV_288, 40, 80, 1, },
1877 { FDRIVE_DRV_288, 44, 80, 1, },
1878 { FDRIVE_DRV_288, 48, 80, 1, },
1879 /* 720 kB 3"1/2 floppy disks */
1880 { FDRIVE_DRV_144, 9, 80, 1, },
1881 { FDRIVE_DRV_144, 10, 80, 1, },
1882 { FDRIVE_DRV_144, 10, 82, 1, },
1883 { FDRIVE_DRV_144, 10, 83, 1, },
1884 { FDRIVE_DRV_144, 13, 80, 1, },
1885 { FDRIVE_DRV_144, 14, 80, 1, },
1886 /* 1.2 MB 5"1/4 floppy disks */
1887 { FDRIVE_DRV_120, 15, 80, 1, },
1888 { FDRIVE_DRV_120, 18, 80, 1, },
1889 { FDRIVE_DRV_120, 18, 82, 1, },
1890 { FDRIVE_DRV_120, 18, 83, 1, },
1891 { FDRIVE_DRV_120, 20, 80, 1, },
1892 /* 720 kB 5"1/4 floppy disks */
1893 { FDRIVE_DRV_120, 9, 80, 1, },
1894 { FDRIVE_DRV_120, 11, 80, 1, },
1895 /* 360 kB 5"1/4 floppy disks */
1896 { FDRIVE_DRV_120, 9, 40, 1, },
1897 { FDRIVE_DRV_120, 9, 40, 0, },
1898 { FDRIVE_DRV_120, 10, 41, 1, },
1899 { FDRIVE_DRV_120, 10, 42, 1, },
1900 /* 320 kB 5"1/4 floppy disks */
1901 { FDRIVE_DRV_120, 8, 40, 1, },
1902 { FDRIVE_DRV_120, 8, 40, 0, },
1903 /* 360 kB must match 5"1/4 better than 3"1/2... */
1904 { FDRIVE_DRV_144, 9, 80, 0, },
1905 /* end */
1906 { FDRIVE_DRV_NONE, -1, -1, 0, },
1907};
1908
1909void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1910 int *max_track, int *last_sect,
1911 FDriveType drive_in, FDriveType *drive)
1912{
1913 const FDFormat *parse;
1914 uint64_t nb_sectors, size;
1915 int i, first_match, match;
1916
1917 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1918 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1919 /* User defined disk */
1920 } else {
1921 bdrv_get_geometry(bs, &nb_sectors);
1922 match = -1;
1923 first_match = -1;
1924 for (i = 0; ; i++) {
1925 parse = &fd_formats[i];
1926 if (parse->drive == FDRIVE_DRV_NONE) {
1927 break;
1928 }
1929 if (drive_in == parse->drive ||
1930 drive_in == FDRIVE_DRV_NONE) {
1931 size = (parse->max_head + 1) * parse->max_track *
1932 parse->last_sect;
1933 if (nb_sectors == size) {
1934 match = i;
1935 break;
1936 }
1937 if (first_match == -1) {
1938 first_match = i;
1939 }
1940 }
1941 }
1942 if (match == -1) {
1943 if (first_match == -1) {
1944 match = 1;
1945 } else {
1946 match = first_match;
1947 }
1948 parse = &fd_formats[match];
1949 }
1950 *nb_heads = parse->max_head + 1;
1951 *max_track = parse->max_track;
1952 *last_sect = parse->last_sect;
1953 *drive = parse->drive;
1954 }
1955}
1956
46d4767d
FB
1957int bdrv_get_translation_hint(BlockDriverState *bs)
1958{
1959 return bs->translation;
1960}
1961
abd7f68d
MA
1962void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1963 BlockErrorAction on_write_error)
1964{
1965 bs->on_read_error = on_read_error;
1966 bs->on_write_error = on_write_error;
1967}
1968
1969BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1970{
1971 return is_read ? bs->on_read_error : bs->on_write_error;
1972}
1973
b338082b
FB
1974int bdrv_is_read_only(BlockDriverState *bs)
1975{
1976 return bs->read_only;
1977}
1978
985a03b0
TS
1979int bdrv_is_sg(BlockDriverState *bs)
1980{
1981 return bs->sg;
1982}
1983
e900a7b7
CH
1984int bdrv_enable_write_cache(BlockDriverState *bs)
1985{
1986 return bs->enable_write_cache;
1987}
1988
ea2384d3
FB
1989int bdrv_is_encrypted(BlockDriverState *bs)
1990{
1991 if (bs->backing_hd && bs->backing_hd->encrypted)
1992 return 1;
1993 return bs->encrypted;
1994}
1995
c0f4ce77
AL
1996int bdrv_key_required(BlockDriverState *bs)
1997{
1998 BlockDriverState *backing_hd = bs->backing_hd;
1999
2000 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2001 return 1;
2002 return (bs->encrypted && !bs->valid_key);
2003}
2004
ea2384d3
FB
2005int bdrv_set_key(BlockDriverState *bs, const char *key)
2006{
2007 int ret;
2008 if (bs->backing_hd && bs->backing_hd->encrypted) {
2009 ret = bdrv_set_key(bs->backing_hd, key);
2010 if (ret < 0)
2011 return ret;
2012 if (!bs->encrypted)
2013 return 0;
2014 }
fd04a2ae
SH
2015 if (!bs->encrypted) {
2016 return -EINVAL;
2017 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2018 return -ENOMEDIUM;
2019 }
c0f4ce77 2020 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
2021 if (ret < 0) {
2022 bs->valid_key = 0;
2023 } else if (!bs->valid_key) {
2024 bs->valid_key = 1;
2025 /* call the change callback now, we skipped it on open */
7d4b4ba5 2026 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 2027 }
c0f4ce77 2028 return ret;
ea2384d3
FB
2029}
2030
2031void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2032{
19cb3738 2033 if (!bs->drv) {
ea2384d3
FB
2034 buf[0] = '\0';
2035 } else {
2036 pstrcpy(buf, buf_size, bs->drv->format_name);
2037 }
2038}
2039
5fafdf24 2040void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
2041 void *opaque)
2042{
2043 BlockDriver *drv;
2044
8a22f02a 2045 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
2046 it(opaque, drv->format_name);
2047 }
2048}
2049
b338082b
FB
2050BlockDriverState *bdrv_find(const char *name)
2051{
2052 BlockDriverState *bs;
2053
1b7bdbc1
SH
2054 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2055 if (!strcmp(name, bs->device_name)) {
b338082b 2056 return bs;
1b7bdbc1 2057 }
b338082b
FB
2058 }
2059 return NULL;
2060}
2061
2f399b0a
MA
2062BlockDriverState *bdrv_next(BlockDriverState *bs)
2063{
2064 if (!bs) {
2065 return QTAILQ_FIRST(&bdrv_states);
2066 }
2067 return QTAILQ_NEXT(bs, list);
2068}
2069
51de9760 2070void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
2071{
2072 BlockDriverState *bs;
2073
1b7bdbc1 2074 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 2075 it(opaque, bs);
81d0912d
FB
2076 }
2077}
2078
ea2384d3
FB
2079const char *bdrv_get_device_name(BlockDriverState *bs)
2080{
2081 return bs->device_name;
2082}
2083
c6ca28d6
AL
2084void bdrv_flush_all(void)
2085{
2086 BlockDriverState *bs;
2087
1b7bdbc1 2088 QTAILQ_FOREACH(bs, &bdrv_states, list) {
c602a489 2089 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
c6ca28d6 2090 bdrv_flush(bs);
1b7bdbc1
SH
2091 }
2092 }
c6ca28d6
AL
2093}
2094
f2feebbd
KW
2095int bdrv_has_zero_init(BlockDriverState *bs)
2096{
2097 assert(bs->drv);
2098
336c1c12
KW
2099 if (bs->drv->bdrv_has_zero_init) {
2100 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
2101 }
2102
2103 return 1;
2104}
2105
376ae3f1
SH
2106typedef struct BdrvCoIsAllocatedData {
2107 BlockDriverState *bs;
2108 int64_t sector_num;
2109 int nb_sectors;
2110 int *pnum;
2111 int ret;
2112 bool done;
2113} BdrvCoIsAllocatedData;
2114
f58c7b35
TS
2115/*
2116 * Returns true iff the specified sector is present in the disk image. Drivers
2117 * not implementing the functionality are assumed to not support backing files,
2118 * hence all their sectors are reported as allocated.
2119 *
bd9533e3
SH
2120 * If 'sector_num' is beyond the end of the disk image the return value is 0
2121 * and 'pnum' is set to 0.
2122 *
f58c7b35
TS
2123 * 'pnum' is set to the number of sectors (including and immediately following
2124 * the specified sector) that are known to be in the same
2125 * allocated/unallocated state.
2126 *
bd9533e3
SH
2127 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2128 * beyond the end of the disk image it will be clamped.
f58c7b35 2129 */
060f51c9
SH
2130int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2131 int nb_sectors, int *pnum)
f58c7b35 2132{
bd9533e3
SH
2133 int64_t n;
2134
2135 if (sector_num >= bs->total_sectors) {
2136 *pnum = 0;
2137 return 0;
2138 }
2139
2140 n = bs->total_sectors - sector_num;
2141 if (n < nb_sectors) {
2142 nb_sectors = n;
2143 }
2144
6aebab14 2145 if (!bs->drv->bdrv_co_is_allocated) {
bd9533e3 2146 *pnum = nb_sectors;
f58c7b35
TS
2147 return 1;
2148 }
6aebab14 2149
060f51c9
SH
2150 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2151}
2152
2153/* Coroutine wrapper for bdrv_is_allocated() */
2154static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2155{
2156 BdrvCoIsAllocatedData *data = opaque;
2157 BlockDriverState *bs = data->bs;
2158
2159 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2160 data->pnum);
2161 data->done = true;
2162}
2163
2164/*
2165 * Synchronous wrapper around bdrv_co_is_allocated().
2166 *
2167 * See bdrv_co_is_allocated() for details.
2168 */
2169int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2170 int *pnum)
2171{
6aebab14
SH
2172 Coroutine *co;
2173 BdrvCoIsAllocatedData data = {
2174 .bs = bs,
2175 .sector_num = sector_num,
2176 .nb_sectors = nb_sectors,
2177 .pnum = pnum,
2178 .done = false,
2179 };
2180
2181 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2182 qemu_coroutine_enter(co, &data);
2183 while (!data.done) {
2184 qemu_aio_wait();
2185 }
2186 return data.ret;
f58c7b35
TS
2187}
2188
2582bfed
LC
2189void bdrv_mon_event(const BlockDriverState *bdrv,
2190 BlockMonEventAction action, int is_read)
2191{
2192 QObject *data;
2193 const char *action_str;
2194
2195 switch (action) {
2196 case BDRV_ACTION_REPORT:
2197 action_str = "report";
2198 break;
2199 case BDRV_ACTION_IGNORE:
2200 action_str = "ignore";
2201 break;
2202 case BDRV_ACTION_STOP:
2203 action_str = "stop";
2204 break;
2205 default:
2206 abort();
2207 }
2208
2209 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2210 bdrv->device_name,
2211 action_str,
2212 is_read ? "read" : "write");
2213 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
2214
2215 qobject_decref(data);
2216}
2217
b2023818 2218BlockInfoList *qmp_query_block(Error **errp)
b338082b 2219{
b2023818 2220 BlockInfoList *head = NULL, *cur_item = NULL;
b338082b
FB
2221 BlockDriverState *bs;
2222
1b7bdbc1 2223 QTAILQ_FOREACH(bs, &bdrv_states, list) {
b2023818 2224 BlockInfoList *info = g_malloc0(sizeof(*info));
d15e5465 2225
b2023818
LC
2226 info->value = g_malloc0(sizeof(*info->value));
2227 info->value->device = g_strdup(bs->device_name);
2228 info->value->type = g_strdup("unknown");
2229 info->value->locked = bdrv_dev_is_medium_locked(bs);
2230 info->value->removable = bdrv_dev_has_removable_media(bs);
d15e5465 2231
e4def80b 2232 if (bdrv_dev_has_removable_media(bs)) {
b2023818
LC
2233 info->value->has_tray_open = true;
2234 info->value->tray_open = bdrv_dev_is_tray_open(bs);
e4def80b 2235 }
f04ef601
LC
2236
2237 if (bdrv_iostatus_is_enabled(bs)) {
b2023818
LC
2238 info->value->has_io_status = true;
2239 info->value->io_status = bs->iostatus;
f04ef601
LC
2240 }
2241
19cb3738 2242 if (bs->drv) {
b2023818
LC
2243 info->value->has_inserted = true;
2244 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2245 info->value->inserted->file = g_strdup(bs->filename);
2246 info->value->inserted->ro = bs->read_only;
2247 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2248 info->value->inserted->encrypted = bs->encrypted;
2249 if (bs->backing_file[0]) {
2250 info->value->inserted->has_backing_file = true;
2251 info->value->inserted->backing_file = g_strdup(bs->backing_file);
376253ec 2252 }
727f005e
ZYW
2253
2254 if (bs->io_limits_enabled) {
2255 info->value->inserted->bps =
2256 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2257 info->value->inserted->bps_rd =
2258 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2259 info->value->inserted->bps_wr =
2260 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2261 info->value->inserted->iops =
2262 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2263 info->value->inserted->iops_rd =
2264 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2265 info->value->inserted->iops_wr =
2266 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2267 }
b2023818 2268 }
d15e5465 2269
b2023818
LC
2270 /* XXX: waiting for the qapi to support GSList */
2271 if (!cur_item) {
2272 head = cur_item = info;
2273 } else {
2274 cur_item->next = info;
2275 cur_item = info;
b338082b 2276 }
b338082b 2277 }
d15e5465 2278
b2023818 2279 return head;
b338082b 2280}
a36e69dd 2281
f11f57e4
LC
2282/* Consider exposing this as a full fledged QMP command */
2283static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
2284{
2285 BlockStats *s;
2286
2287 s = g_malloc0(sizeof(*s));
2288
2289 if (bs->device_name[0]) {
2290 s->has_device = true;
2291 s->device = g_strdup(bs->device_name);
294cc35f
KW
2292 }
2293
f11f57e4
LC
2294 s->stats = g_malloc0(sizeof(*s->stats));
2295 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2296 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2297 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2298 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2299 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2300 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2301 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2302 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2303 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2304
294cc35f 2305 if (bs->file) {
f11f57e4
LC
2306 s->has_parent = true;
2307 s->parent = qmp_query_blockstat(bs->file, NULL);
294cc35f
KW
2308 }
2309
f11f57e4 2310 return s;
294cc35f
KW
2311}
2312
f11f57e4 2313BlockStatsList *qmp_query_blockstats(Error **errp)
218a536a 2314{
f11f57e4 2315 BlockStatsList *head = NULL, *cur_item = NULL;
a36e69dd
TS
2316 BlockDriverState *bs;
2317
1b7bdbc1 2318 QTAILQ_FOREACH(bs, &bdrv_states, list) {
f11f57e4
LC
2319 BlockStatsList *info = g_malloc0(sizeof(*info));
2320 info->value = qmp_query_blockstat(bs, NULL);
2321
2322 /* XXX: waiting for the qapi to support GSList */
2323 if (!cur_item) {
2324 head = cur_item = info;
2325 } else {
2326 cur_item->next = info;
2327 cur_item = info;
2328 }
a36e69dd 2329 }
218a536a 2330
f11f57e4 2331 return head;
a36e69dd 2332}
ea2384d3 2333
045df330
AL
2334const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2335{
2336 if (bs->backing_hd && bs->backing_hd->encrypted)
2337 return bs->backing_file;
2338 else if (bs->encrypted)
2339 return bs->filename;
2340 else
2341 return NULL;
2342}
2343
5fafdf24 2344void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
2345 char *filename, int filename_size)
2346{
3574c608 2347 pstrcpy(filename, filename_size, bs->backing_file);
83f64091
FB
2348}
2349
5fafdf24 2350int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
2351 const uint8_t *buf, int nb_sectors)
2352{
2353 BlockDriver *drv = bs->drv;
2354 if (!drv)
19cb3738 2355 return -ENOMEDIUM;
faea38e7
FB
2356 if (!drv->bdrv_write_compressed)
2357 return -ENOTSUP;
fbb7b4e0
KW
2358 if (bdrv_check_request(bs, sector_num, nb_sectors))
2359 return -EIO;
a55eb92c 2360
c6d22830 2361 if (bs->dirty_bitmap) {
7cd1e32a 2362 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2363 }
a55eb92c 2364
faea38e7
FB
2365 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2366}
3b46e624 2367
faea38e7
FB
2368int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2369{
2370 BlockDriver *drv = bs->drv;
2371 if (!drv)
19cb3738 2372 return -ENOMEDIUM;
faea38e7
FB
2373 if (!drv->bdrv_get_info)
2374 return -ENOTSUP;
2375 memset(bdi, 0, sizeof(*bdi));
2376 return drv->bdrv_get_info(bs, bdi);
2377}
2378
45566e9c
CH
2379int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2380 int64_t pos, int size)
178e08a5
AL
2381{
2382 BlockDriver *drv = bs->drv;
2383 if (!drv)
2384 return -ENOMEDIUM;
7cdb1f6d
MK
2385 if (drv->bdrv_save_vmstate)
2386 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2387 if (bs->file)
2388 return bdrv_save_vmstate(bs->file, buf, pos, size);
2389 return -ENOTSUP;
178e08a5
AL
2390}
2391
45566e9c
CH
2392int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2393 int64_t pos, int size)
178e08a5
AL
2394{
2395 BlockDriver *drv = bs->drv;
2396 if (!drv)
2397 return -ENOMEDIUM;
7cdb1f6d
MK
2398 if (drv->bdrv_load_vmstate)
2399 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2400 if (bs->file)
2401 return bdrv_load_vmstate(bs->file, buf, pos, size);
2402 return -ENOTSUP;
178e08a5
AL
2403}
2404
8b9b0cc2
KW
2405void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2406{
2407 BlockDriver *drv = bs->drv;
2408
2409 if (!drv || !drv->bdrv_debug_event) {
2410 return;
2411 }
2412
2413 return drv->bdrv_debug_event(bs, event);
2414
2415}
2416
faea38e7
FB
2417/**************************************************************/
2418/* handling of snapshots */
2419
feeee5ac
MDCF
2420int bdrv_can_snapshot(BlockDriverState *bs)
2421{
2422 BlockDriver *drv = bs->drv;
07b70bfb 2423 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2424 return 0;
2425 }
2426
2427 if (!drv->bdrv_snapshot_create) {
2428 if (bs->file != NULL) {
2429 return bdrv_can_snapshot(bs->file);
2430 }
2431 return 0;
2432 }
2433
2434 return 1;
2435}
2436
199630b6
BS
2437int bdrv_is_snapshot(BlockDriverState *bs)
2438{
2439 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2440}
2441
f9092b10
MA
2442BlockDriverState *bdrv_snapshots(void)
2443{
2444 BlockDriverState *bs;
2445
3ac906f7 2446 if (bs_snapshots) {
f9092b10 2447 return bs_snapshots;
3ac906f7 2448 }
f9092b10
MA
2449
2450 bs = NULL;
2451 while ((bs = bdrv_next(bs))) {
2452 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2453 bs_snapshots = bs;
2454 return bs;
f9092b10
MA
2455 }
2456 }
2457 return NULL;
f9092b10
MA
2458}
2459
5fafdf24 2460int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2461 QEMUSnapshotInfo *sn_info)
2462{
2463 BlockDriver *drv = bs->drv;
2464 if (!drv)
19cb3738 2465 return -ENOMEDIUM;
7cdb1f6d
MK
2466 if (drv->bdrv_snapshot_create)
2467 return drv->bdrv_snapshot_create(bs, sn_info);
2468 if (bs->file)
2469 return bdrv_snapshot_create(bs->file, sn_info);
2470 return -ENOTSUP;
faea38e7
FB
2471}
2472
5fafdf24 2473int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2474 const char *snapshot_id)
2475{
2476 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2477 int ret, open_ret;
2478
faea38e7 2479 if (!drv)
19cb3738 2480 return -ENOMEDIUM;
7cdb1f6d
MK
2481 if (drv->bdrv_snapshot_goto)
2482 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2483
2484 if (bs->file) {
2485 drv->bdrv_close(bs);
2486 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2487 open_ret = drv->bdrv_open(bs, bs->open_flags);
2488 if (open_ret < 0) {
2489 bdrv_delete(bs->file);
2490 bs->drv = NULL;
2491 return open_ret;
2492 }
2493 return ret;
2494 }
2495
2496 return -ENOTSUP;
faea38e7
FB
2497}
2498
2499int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2500{
2501 BlockDriver *drv = bs->drv;
2502 if (!drv)
19cb3738 2503 return -ENOMEDIUM;
7cdb1f6d
MK
2504 if (drv->bdrv_snapshot_delete)
2505 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2506 if (bs->file)
2507 return bdrv_snapshot_delete(bs->file, snapshot_id);
2508 return -ENOTSUP;
faea38e7
FB
2509}
2510
5fafdf24 2511int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2512 QEMUSnapshotInfo **psn_info)
2513{
2514 BlockDriver *drv = bs->drv;
2515 if (!drv)
19cb3738 2516 return -ENOMEDIUM;
7cdb1f6d
MK
2517 if (drv->bdrv_snapshot_list)
2518 return drv->bdrv_snapshot_list(bs, psn_info);
2519 if (bs->file)
2520 return bdrv_snapshot_list(bs->file, psn_info);
2521 return -ENOTSUP;
faea38e7
FB
2522}
2523
51ef6727 2524int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2525 const char *snapshot_name)
2526{
2527 BlockDriver *drv = bs->drv;
2528 if (!drv) {
2529 return -ENOMEDIUM;
2530 }
2531 if (!bs->read_only) {
2532 return -EINVAL;
2533 }
2534 if (drv->bdrv_snapshot_load_tmp) {
2535 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2536 }
2537 return -ENOTSUP;
2538}
2539
faea38e7
FB
2540#define NB_SUFFIXES 4
2541
2542char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2543{
2544 static const char suffixes[NB_SUFFIXES] = "KMGT";
2545 int64_t base;
2546 int i;
2547
2548 if (size <= 999) {
2549 snprintf(buf, buf_size, "%" PRId64, size);
2550 } else {
2551 base = 1024;
2552 for(i = 0; i < NB_SUFFIXES; i++) {
2553 if (size < (10 * base)) {
5fafdf24 2554 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
2555 (double)size / base,
2556 suffixes[i]);
2557 break;
2558 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 2559 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
2560 ((size + (base >> 1)) / base),
2561 suffixes[i]);
2562 break;
2563 }
2564 base = base * 1024;
2565 }
2566 }
2567 return buf;
2568}
2569
2570char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2571{
2572 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
2573#ifdef _WIN32
2574 struct tm *ptm;
2575#else
faea38e7 2576 struct tm tm;
3b9f94e1 2577#endif
faea38e7
FB
2578 time_t ti;
2579 int64_t secs;
2580
2581 if (!sn) {
5fafdf24
TS
2582 snprintf(buf, buf_size,
2583 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2584 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2585 } else {
2586 ti = sn->date_sec;
3b9f94e1
FB
2587#ifdef _WIN32
2588 ptm = localtime(&ti);
2589 strftime(date_buf, sizeof(date_buf),
2590 "%Y-%m-%d %H:%M:%S", ptm);
2591#else
faea38e7
FB
2592 localtime_r(&ti, &tm);
2593 strftime(date_buf, sizeof(date_buf),
2594 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 2595#endif
faea38e7
FB
2596 secs = sn->vm_clock_nsec / 1000000000;
2597 snprintf(clock_buf, sizeof(clock_buf),
2598 "%02d:%02d:%02d.%03d",
2599 (int)(secs / 3600),
2600 (int)((secs / 60) % 60),
5fafdf24 2601 (int)(secs % 60),
faea38e7
FB
2602 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2603 snprintf(buf, buf_size,
5fafdf24 2604 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2605 sn->id_str, sn->name,
2606 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2607 date_buf,
2608 clock_buf);
2609 }
2610 return buf;
2611}
2612
ea2384d3 2613/**************************************************************/
83f64091 2614/* async I/Os */
ea2384d3 2615
3b69e4b9 2616BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 2617 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 2618 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 2619{
bbf0a440
SH
2620 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2621
b2a61371 2622 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2623 cb, opaque, false);
ea2384d3
FB
2624}
2625
f141eafe
AL
2626BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2627 QEMUIOVector *qiov, int nb_sectors,
2628 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 2629{
bbf0a440
SH
2630 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2631
1a6e115b 2632 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2633 cb, opaque, true);
83f64091
FB
2634}
2635
40b4f539
KW
2636
2637typedef struct MultiwriteCB {
2638 int error;
2639 int num_requests;
2640 int num_callbacks;
2641 struct {
2642 BlockDriverCompletionFunc *cb;
2643 void *opaque;
2644 QEMUIOVector *free_qiov;
2645 void *free_buf;
2646 } callbacks[];
2647} MultiwriteCB;
2648
2649static void multiwrite_user_cb(MultiwriteCB *mcb)
2650{
2651 int i;
2652
2653 for (i = 0; i < mcb->num_callbacks; i++) {
2654 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
2655 if (mcb->callbacks[i].free_qiov) {
2656 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2657 }
7267c094 2658 g_free(mcb->callbacks[i].free_qiov);
f8a83245 2659 qemu_vfree(mcb->callbacks[i].free_buf);
40b4f539
KW
2660 }
2661}
2662
2663static void multiwrite_cb(void *opaque, int ret)
2664{
2665 MultiwriteCB *mcb = opaque;
2666
6d519a5f
SH
2667 trace_multiwrite_cb(mcb, ret);
2668
cb6d3ca0 2669 if (ret < 0 && !mcb->error) {
40b4f539 2670 mcb->error = ret;
40b4f539
KW
2671 }
2672
2673 mcb->num_requests--;
2674 if (mcb->num_requests == 0) {
de189a1b 2675 multiwrite_user_cb(mcb);
7267c094 2676 g_free(mcb);
40b4f539
KW
2677 }
2678}
2679
2680static int multiwrite_req_compare(const void *a, const void *b)
2681{
77be4366
CH
2682 const BlockRequest *req1 = a, *req2 = b;
2683
2684 /*
2685 * Note that we can't simply subtract req2->sector from req1->sector
2686 * here as that could overflow the return value.
2687 */
2688 if (req1->sector > req2->sector) {
2689 return 1;
2690 } else if (req1->sector < req2->sector) {
2691 return -1;
2692 } else {
2693 return 0;
2694 }
40b4f539
KW
2695}
2696
2697/*
2698 * Takes a bunch of requests and tries to merge them. Returns the number of
2699 * requests that remain after merging.
2700 */
2701static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2702 int num_reqs, MultiwriteCB *mcb)
2703{
2704 int i, outidx;
2705
2706 // Sort requests by start sector
2707 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2708
2709 // Check if adjacent requests touch the same clusters. If so, combine them,
2710 // filling up gaps with zero sectors.
2711 outidx = 0;
2712 for (i = 1; i < num_reqs; i++) {
2713 int merge = 0;
2714 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2715
2716 // This handles the cases that are valid for all block drivers, namely
2717 // exactly sequential writes and overlapping writes.
2718 if (reqs[i].sector <= oldreq_last) {
2719 merge = 1;
2720 }
2721
2722 // The block driver may decide that it makes sense to combine requests
2723 // even if there is a gap of some sectors between them. In this case,
2724 // the gap is filled with zeros (therefore only applicable for yet
2725 // unused space in format like qcow2).
2726 if (!merge && bs->drv->bdrv_merge_requests) {
2727 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2728 }
2729
e2a305fb
CH
2730 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2731 merge = 0;
2732 }
2733
40b4f539
KW
2734 if (merge) {
2735 size_t size;
7267c094 2736 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
2737 qemu_iovec_init(qiov,
2738 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2739
2740 // Add the first request to the merged one. If the requests are
2741 // overlapping, drop the last sectors of the first request.
2742 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2743 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2744
2745 // We might need to add some zeros between the two requests
2746 if (reqs[i].sector > oldreq_last) {
2747 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2748 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2749 memset(buf, 0, zero_bytes);
2750 qemu_iovec_add(qiov, buf, zero_bytes);
2751 mcb->callbacks[i].free_buf = buf;
2752 }
2753
2754 // Add the second request
2755 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2756
cbf1dff2 2757 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
2758 reqs[outidx].qiov = qiov;
2759
2760 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2761 } else {
2762 outidx++;
2763 reqs[outidx].sector = reqs[i].sector;
2764 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2765 reqs[outidx].qiov = reqs[i].qiov;
2766 }
2767 }
2768
2769 return outidx + 1;
2770}
2771
2772/*
2773 * Submit multiple AIO write requests at once.
2774 *
2775 * On success, the function returns 0 and all requests in the reqs array have
2776 * been submitted. In error case this function returns -1, and any of the
2777 * requests may or may not be submitted yet. In particular, this means that the
2778 * callback will be called for some of the requests, for others it won't. The
2779 * caller must check the error field of the BlockRequest to wait for the right
2780 * callbacks (if error != 0, no callback will be called).
2781 *
2782 * The implementation may modify the contents of the reqs array, e.g. to merge
2783 * requests. However, the fields opaque and error are left unmodified as they
2784 * are used to signal failure for a single request to the caller.
2785 */
2786int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2787{
2788 BlockDriverAIOCB *acb;
2789 MultiwriteCB *mcb;
2790 int i;
2791
301db7c2
RH
2792 /* don't submit writes if we don't have a medium */
2793 if (bs->drv == NULL) {
2794 for (i = 0; i < num_reqs; i++) {
2795 reqs[i].error = -ENOMEDIUM;
2796 }
2797 return -1;
2798 }
2799
40b4f539
KW
2800 if (num_reqs == 0) {
2801 return 0;
2802 }
2803
2804 // Create MultiwriteCB structure
7267c094 2805 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
2806 mcb->num_requests = 0;
2807 mcb->num_callbacks = num_reqs;
2808
2809 for (i = 0; i < num_reqs; i++) {
2810 mcb->callbacks[i].cb = reqs[i].cb;
2811 mcb->callbacks[i].opaque = reqs[i].opaque;
2812 }
2813
2814 // Check for mergable requests
2815 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2816
6d519a5f
SH
2817 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2818
453f9a16
KW
2819 /*
2820 * Run the aio requests. As soon as one request can't be submitted
2821 * successfully, fail all requests that are not yet submitted (we must
2822 * return failure for all requests anyway)
2823 *
2824 * num_requests cannot be set to the right value immediately: If
2825 * bdrv_aio_writev fails for some request, num_requests would be too high
2826 * and therefore multiwrite_cb() would never recognize the multiwrite
2827 * request as completed. We also cannot use the loop variable i to set it
2828 * when the first request fails because the callback may already have been
2829 * called for previously submitted requests. Thus, num_requests must be
2830 * incremented for each request that is submitted.
2831 *
2832 * The problem that callbacks may be called early also means that we need
2833 * to take care that num_requests doesn't become 0 before all requests are
2834 * submitted - multiwrite_cb() would consider the multiwrite request
2835 * completed. A dummy request that is "completed" by a manual call to
2836 * multiwrite_cb() takes care of this.
2837 */
2838 mcb->num_requests = 1;
2839
6d519a5f 2840 // Run the aio requests
40b4f539 2841 for (i = 0; i < num_reqs; i++) {
453f9a16 2842 mcb->num_requests++;
40b4f539
KW
2843 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2844 reqs[i].nb_sectors, multiwrite_cb, mcb);
2845
2846 if (acb == NULL) {
2847 // We can only fail the whole thing if no request has been
2848 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2849 // complete and report the error in the callback.
453f9a16 2850 if (i == 0) {
6d519a5f 2851 trace_bdrv_aio_multiwrite_earlyfail(mcb);
40b4f539
KW
2852 goto fail;
2853 } else {
6d519a5f 2854 trace_bdrv_aio_multiwrite_latefail(mcb, i);
7eb58a6c 2855 multiwrite_cb(mcb, -EIO);
40b4f539
KW
2856 break;
2857 }
40b4f539
KW
2858 }
2859 }
2860
453f9a16
KW
2861 /* Complete the dummy request */
2862 multiwrite_cb(mcb, 0);
2863
40b4f539
KW
2864 return 0;
2865
2866fail:
453f9a16
KW
2867 for (i = 0; i < mcb->num_callbacks; i++) {
2868 reqs[i].error = -EIO;
2869 }
7267c094 2870 g_free(mcb);
40b4f539
KW
2871 return -1;
2872}
2873
83f64091 2874void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 2875{
6bbff9a0 2876 acb->pool->cancel(acb);
83f64091
FB
2877}
2878
98f90dba
ZYW
2879/* block I/O throttling */
2880static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
2881 bool is_write, double elapsed_time, uint64_t *wait)
2882{
2883 uint64_t bps_limit = 0;
2884 double bytes_limit, bytes_base, bytes_res;
2885 double slice_time, wait_time;
2886
2887 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2888 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2889 } else if (bs->io_limits.bps[is_write]) {
2890 bps_limit = bs->io_limits.bps[is_write];
2891 } else {
2892 if (wait) {
2893 *wait = 0;
2894 }
2895
2896 return false;
2897 }
2898
2899 slice_time = bs->slice_end - bs->slice_start;
2900 slice_time /= (NANOSECONDS_PER_SECOND);
2901 bytes_limit = bps_limit * slice_time;
2902 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
2903 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2904 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
2905 }
2906
2907 /* bytes_base: the bytes of data which have been read/written; and
2908 * it is obtained from the history statistic info.
2909 * bytes_res: the remaining bytes of data which need to be read/written.
2910 * (bytes_base + bytes_res) / bps_limit: used to calcuate
2911 * the total time for completing reading/writting all data.
2912 */
2913 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2914
2915 if (bytes_base + bytes_res <= bytes_limit) {
2916 if (wait) {
2917 *wait = 0;
2918 }
2919
2920 return false;
2921 }
2922
2923 /* Calc approx time to dispatch */
2924 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
2925
2926 /* When the I/O rate at runtime exceeds the limits,
2927 * bs->slice_end need to be extended in order that the current statistic
2928 * info can be kept until the timer fire, so it is increased and tuned
2929 * based on the result of experiment.
2930 */
2931 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2932 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2933 if (wait) {
2934 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2935 }
2936
2937 return true;
2938}
2939
2940static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
2941 double elapsed_time, uint64_t *wait)
2942{
2943 uint64_t iops_limit = 0;
2944 double ios_limit, ios_base;
2945 double slice_time, wait_time;
2946
2947 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2948 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2949 } else if (bs->io_limits.iops[is_write]) {
2950 iops_limit = bs->io_limits.iops[is_write];
2951 } else {
2952 if (wait) {
2953 *wait = 0;
2954 }
2955
2956 return false;
2957 }
2958
2959 slice_time = bs->slice_end - bs->slice_start;
2960 slice_time /= (NANOSECONDS_PER_SECOND);
2961 ios_limit = iops_limit * slice_time;
2962 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
2963 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2964 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
2965 }
2966
2967 if (ios_base + 1 <= ios_limit) {
2968 if (wait) {
2969 *wait = 0;
2970 }
2971
2972 return false;
2973 }
2974
2975 /* Calc approx time to dispatch */
2976 wait_time = (ios_base + 1) / iops_limit;
2977 if (wait_time > elapsed_time) {
2978 wait_time = wait_time - elapsed_time;
2979 } else {
2980 wait_time = 0;
2981 }
2982
2983 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2984 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2985 if (wait) {
2986 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2987 }
2988
2989 return true;
2990}
2991
2992static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
2993 bool is_write, int64_t *wait)
2994{
2995 int64_t now, max_wait;
2996 uint64_t bps_wait = 0, iops_wait = 0;
2997 double elapsed_time;
2998 int bps_ret, iops_ret;
2999
3000 now = qemu_get_clock_ns(vm_clock);
3001 if ((bs->slice_start < now)
3002 && (bs->slice_end > now)) {
3003 bs->slice_end = now + bs->slice_time;
3004 } else {
3005 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3006 bs->slice_start = now;
3007 bs->slice_end = now + bs->slice_time;
3008
3009 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3010 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3011
3012 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3013 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3014 }
3015
3016 elapsed_time = now - bs->slice_start;
3017 elapsed_time /= (NANOSECONDS_PER_SECOND);
3018
3019 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3020 is_write, elapsed_time, &bps_wait);
3021 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3022 elapsed_time, &iops_wait);
3023 if (bps_ret || iops_ret) {
3024 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3025 if (wait) {
3026 *wait = max_wait;
3027 }
3028
3029 now = qemu_get_clock_ns(vm_clock);
3030 if (bs->slice_end < now + max_wait) {
3031 bs->slice_end = now + max_wait;
3032 }
3033
3034 return true;
3035 }
3036
3037 if (wait) {
3038 *wait = 0;
3039 }
3040
3041 return false;
3042}
ce1a14dc 3043
83f64091
FB
3044/**************************************************************/
3045/* async block device emulation */
3046
c16b5a2c
CH
3047typedef struct BlockDriverAIOCBSync {
3048 BlockDriverAIOCB common;
3049 QEMUBH *bh;
3050 int ret;
3051 /* vector translation state */
3052 QEMUIOVector *qiov;
3053 uint8_t *bounce;
3054 int is_write;
3055} BlockDriverAIOCBSync;
3056
3057static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3058{
b666d239
KW
3059 BlockDriverAIOCBSync *acb =
3060 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 3061 qemu_bh_delete(acb->bh);
36afc451 3062 acb->bh = NULL;
c16b5a2c
CH
3063 qemu_aio_release(acb);
3064}
3065
3066static AIOPool bdrv_em_aio_pool = {
3067 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3068 .cancel = bdrv_aio_cancel_em,
3069};
3070
ce1a14dc 3071static void bdrv_aio_bh_cb(void *opaque)
83f64091 3072{
ce1a14dc 3073 BlockDriverAIOCBSync *acb = opaque;
f141eafe 3074
f141eafe
AL
3075 if (!acb->is_write)
3076 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 3077 qemu_vfree(acb->bounce);
ce1a14dc 3078 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 3079 qemu_bh_delete(acb->bh);
36afc451 3080 acb->bh = NULL;
ce1a14dc 3081 qemu_aio_release(acb);
83f64091 3082}
beac80cd 3083
f141eafe
AL
3084static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3085 int64_t sector_num,
3086 QEMUIOVector *qiov,
3087 int nb_sectors,
3088 BlockDriverCompletionFunc *cb,
3089 void *opaque,
3090 int is_write)
3091
83f64091 3092{
ce1a14dc 3093 BlockDriverAIOCBSync *acb;
ce1a14dc 3094
c16b5a2c 3095 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
3096 acb->is_write = is_write;
3097 acb->qiov = qiov;
e268ca52 3098 acb->bounce = qemu_blockalign(bs, qiov->size);
f141eafe 3099
ce1a14dc
PB
3100 if (!acb->bh)
3101 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
3102
3103 if (is_write) {
3104 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1ed20acf 3105 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 3106 } else {
1ed20acf 3107 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
3108 }
3109
ce1a14dc 3110 qemu_bh_schedule(acb->bh);
f141eafe 3111
ce1a14dc 3112 return &acb->common;
beac80cd
FB
3113}
3114
f141eafe
AL
3115static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3116 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 3117 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 3118{
f141eafe
AL
3119 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
3120}
83f64091 3121
f141eafe
AL
3122static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3123 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3124 BlockDriverCompletionFunc *cb, void *opaque)
3125{
3126 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 3127}
beac80cd 3128
68485420
KW
3129
3130typedef struct BlockDriverAIOCBCoroutine {
3131 BlockDriverAIOCB common;
3132 BlockRequest req;
3133 bool is_write;
3134 QEMUBH* bh;
3135} BlockDriverAIOCBCoroutine;
3136
3137static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3138{
3139 qemu_aio_flush();
3140}
3141
3142static AIOPool bdrv_em_co_aio_pool = {
3143 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3144 .cancel = bdrv_aio_co_cancel_em,
3145};
3146
35246a68 3147static void bdrv_co_em_bh(void *opaque)
68485420
KW
3148{
3149 BlockDriverAIOCBCoroutine *acb = opaque;
3150
3151 acb->common.cb(acb->common.opaque, acb->req.error);
3152 qemu_bh_delete(acb->bh);
3153 qemu_aio_release(acb);
3154}
3155
b2a61371
SH
3156/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3157static void coroutine_fn bdrv_co_do_rw(void *opaque)
3158{
3159 BlockDriverAIOCBCoroutine *acb = opaque;
3160 BlockDriverState *bs = acb->common.bs;
3161
3162 if (!acb->is_write) {
3163 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
3164 acb->req.nb_sectors, acb->req.qiov);
3165 } else {
3166 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
3167 acb->req.nb_sectors, acb->req.qiov);
3168 }
3169
35246a68 3170 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2a61371
SH
3171 qemu_bh_schedule(acb->bh);
3172}
3173
68485420
KW
3174static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3175 int64_t sector_num,
3176 QEMUIOVector *qiov,
3177 int nb_sectors,
3178 BlockDriverCompletionFunc *cb,
3179 void *opaque,
8c5873d6 3180 bool is_write)
68485420
KW
3181{
3182 Coroutine *co;
3183 BlockDriverAIOCBCoroutine *acb;
3184
3185 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3186 acb->req.sector = sector_num;
3187 acb->req.nb_sectors = nb_sectors;
3188 acb->req.qiov = qiov;
3189 acb->is_write = is_write;
3190
8c5873d6 3191 co = qemu_coroutine_create(bdrv_co_do_rw);
68485420
KW
3192 qemu_coroutine_enter(co, acb);
3193
3194 return &acb->common;
3195}
3196
07f07615 3197static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
b2e12bc6 3198{
07f07615
PB
3199 BlockDriverAIOCBCoroutine *acb = opaque;
3200 BlockDriverState *bs = acb->common.bs;
b2e12bc6 3201
07f07615
PB
3202 acb->req.error = bdrv_co_flush(bs);
3203 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2e12bc6 3204 qemu_bh_schedule(acb->bh);
b2e12bc6
CH
3205}
3206
07f07615 3207BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
016f5cf6
AG
3208 BlockDriverCompletionFunc *cb, void *opaque)
3209{
07f07615 3210 trace_bdrv_aio_flush(bs, opaque);
016f5cf6 3211
07f07615
PB
3212 Coroutine *co;
3213 BlockDriverAIOCBCoroutine *acb;
016f5cf6 3214
07f07615
PB
3215 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3216 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3217 qemu_coroutine_enter(co, acb);
016f5cf6 3218
016f5cf6
AG
3219 return &acb->common;
3220}
3221
4265d620
PB
3222static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3223{
3224 BlockDriverAIOCBCoroutine *acb = opaque;
3225 BlockDriverState *bs = acb->common.bs;
3226
3227 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3228 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3229 qemu_bh_schedule(acb->bh);
3230}
3231
3232BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3233 int64_t sector_num, int nb_sectors,
3234 BlockDriverCompletionFunc *cb, void *opaque)
3235{
3236 Coroutine *co;
3237 BlockDriverAIOCBCoroutine *acb;
3238
3239 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3240
3241 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3242 acb->req.sector = sector_num;
3243 acb->req.nb_sectors = nb_sectors;
3244 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3245 qemu_coroutine_enter(co, acb);
3246
3247 return &acb->common;
3248}
3249
ea2384d3
FB
3250void bdrv_init(void)
3251{
5efa9d5a 3252 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 3253}
ce1a14dc 3254
eb852011
MA
3255void bdrv_init_with_whitelist(void)
3256{
3257 use_bdrv_whitelist = 1;
3258 bdrv_init();
3259}
3260
c16b5a2c
CH
3261void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3262 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 3263{
ce1a14dc
PB
3264 BlockDriverAIOCB *acb;
3265
6bbff9a0
AL
3266 if (pool->free_aiocb) {
3267 acb = pool->free_aiocb;
3268 pool->free_aiocb = acb->next;
ce1a14dc 3269 } else {
7267c094 3270 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 3271 acb->pool = pool;
ce1a14dc
PB
3272 }
3273 acb->bs = bs;
3274 acb->cb = cb;
3275 acb->opaque = opaque;
3276 return acb;
3277}
3278
3279void qemu_aio_release(void *p)
3280{
6bbff9a0
AL
3281 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3282 AIOPool *pool = acb->pool;
3283 acb->next = pool->free_aiocb;
3284 pool->free_aiocb = acb;
ce1a14dc 3285}
19cb3738 3286
f9f05dc5
KW
3287/**************************************************************/
3288/* Coroutine block device emulation */
3289
3290typedef struct CoroutineIOCompletion {
3291 Coroutine *coroutine;
3292 int ret;
3293} CoroutineIOCompletion;
3294
3295static void bdrv_co_io_em_complete(void *opaque, int ret)
3296{
3297 CoroutineIOCompletion *co = opaque;
3298
3299 co->ret = ret;
3300 qemu_coroutine_enter(co->coroutine, NULL);
3301}
3302
3303static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3304 int nb_sectors, QEMUIOVector *iov,
3305 bool is_write)
3306{
3307 CoroutineIOCompletion co = {
3308 .coroutine = qemu_coroutine_self(),
3309 };
3310 BlockDriverAIOCB *acb;
3311
3312 if (is_write) {
a652d160
SH
3313 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3314 bdrv_co_io_em_complete, &co);
f9f05dc5 3315 } else {
a652d160
SH
3316 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3317 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
3318 }
3319
59370aaa 3320 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
3321 if (!acb) {
3322 return -EIO;
3323 }
3324 qemu_coroutine_yield();
3325
3326 return co.ret;
3327}
3328
3329static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3330 int64_t sector_num, int nb_sectors,
3331 QEMUIOVector *iov)
3332{
3333 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3334}
3335
3336static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3337 int64_t sector_num, int nb_sectors,
3338 QEMUIOVector *iov)
3339{
3340 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3341}
3342
07f07615 3343static void coroutine_fn bdrv_flush_co_entry(void *opaque)
e7a8a783 3344{
07f07615
PB
3345 RwCo *rwco = opaque;
3346
3347 rwco->ret = bdrv_co_flush(rwco->bs);
3348}
3349
3350int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3351{
eb489bb1
KW
3352 int ret;
3353
ca716364 3354 if (!bs->drv) {
07f07615 3355 return 0;
eb489bb1
KW
3356 }
3357
ca716364 3358 /* Write back cached data to the OS even with cache=unsafe */
eb489bb1
KW
3359 if (bs->drv->bdrv_co_flush_to_os) {
3360 ret = bs->drv->bdrv_co_flush_to_os(bs);
3361 if (ret < 0) {
3362 return ret;
3363 }
3364 }
3365
ca716364
KW
3366 /* But don't actually force it to the disk with cache=unsafe */
3367 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3368 return 0;
3369 }
3370
eb489bb1 3371 if (bs->drv->bdrv_co_flush_to_disk) {
c68b89ac 3372 return bs->drv->bdrv_co_flush_to_disk(bs);
07f07615
PB
3373 } else if (bs->drv->bdrv_aio_flush) {
3374 BlockDriverAIOCB *acb;
3375 CoroutineIOCompletion co = {
3376 .coroutine = qemu_coroutine_self(),
3377 };
3378
3379 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3380 if (acb == NULL) {
3381 return -EIO;
3382 } else {
3383 qemu_coroutine_yield();
3384 return co.ret;
3385 }
07f07615
PB
3386 } else {
3387 /*
3388 * Some block drivers always operate in either writethrough or unsafe
3389 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3390 * know how the server works (because the behaviour is hardcoded or
3391 * depends on server-side configuration), so we can't ensure that
3392 * everything is safe on disk. Returning an error doesn't work because
3393 * that would break guests even if the server operates in writethrough
3394 * mode.
3395 *
3396 * Let's hope the user knows what he's doing.
3397 */
3398 return 0;
3399 }
3400}
3401
0f15423c
AL
3402void bdrv_invalidate_cache(BlockDriverState *bs)
3403{
3404 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3405 bs->drv->bdrv_invalidate_cache(bs);
3406 }
3407}
3408
3409void bdrv_invalidate_cache_all(void)
3410{
3411 BlockDriverState *bs;
3412
3413 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3414 bdrv_invalidate_cache(bs);
3415 }
3416}
3417
07f07615
PB
3418int bdrv_flush(BlockDriverState *bs)
3419{
3420 Coroutine *co;
3421 RwCo rwco = {
3422 .bs = bs,
3423 .ret = NOT_DONE,
e7a8a783 3424 };
e7a8a783 3425
07f07615
PB
3426 if (qemu_in_coroutine()) {
3427 /* Fast-path if already in coroutine context */
3428 bdrv_flush_co_entry(&rwco);
3429 } else {
3430 co = qemu_coroutine_create(bdrv_flush_co_entry);
3431 qemu_coroutine_enter(co, &rwco);
3432 while (rwco.ret == NOT_DONE) {
3433 qemu_aio_wait();
3434 }
e7a8a783 3435 }
07f07615
PB
3436
3437 return rwco.ret;
e7a8a783
KW
3438}
3439
4265d620
PB
3440static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3441{
3442 RwCo *rwco = opaque;
3443
3444 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3445}
3446
3447int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3448 int nb_sectors)
3449{
3450 if (!bs->drv) {
3451 return -ENOMEDIUM;
3452 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3453 return -EIO;
3454 } else if (bs->read_only) {
3455 return -EROFS;
3456 } else if (bs->drv->bdrv_co_discard) {
3457 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3458 } else if (bs->drv->bdrv_aio_discard) {
3459 BlockDriverAIOCB *acb;
3460 CoroutineIOCompletion co = {
3461 .coroutine = qemu_coroutine_self(),
3462 };
3463
3464 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3465 bdrv_co_io_em_complete, &co);
3466 if (acb == NULL) {
3467 return -EIO;
3468 } else {
3469 qemu_coroutine_yield();
3470 return co.ret;
3471 }
4265d620
PB
3472 } else {
3473 return 0;
3474 }
3475}
3476
3477int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3478{
3479 Coroutine *co;
3480 RwCo rwco = {
3481 .bs = bs,
3482 .sector_num = sector_num,
3483 .nb_sectors = nb_sectors,
3484 .ret = NOT_DONE,
3485 };
3486
3487 if (qemu_in_coroutine()) {
3488 /* Fast-path if already in coroutine context */
3489 bdrv_discard_co_entry(&rwco);
3490 } else {
3491 co = qemu_coroutine_create(bdrv_discard_co_entry);
3492 qemu_coroutine_enter(co, &rwco);
3493 while (rwco.ret == NOT_DONE) {
3494 qemu_aio_wait();
3495 }
3496 }
3497
3498 return rwco.ret;
3499}
3500
19cb3738
FB
3501/**************************************************************/
3502/* removable device support */
3503
3504/**
3505 * Return TRUE if the media is present
3506 */
3507int bdrv_is_inserted(BlockDriverState *bs)
3508{
3509 BlockDriver *drv = bs->drv;
a1aff5bf 3510
19cb3738
FB
3511 if (!drv)
3512 return 0;
3513 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
3514 return 1;
3515 return drv->bdrv_is_inserted(bs);
19cb3738
FB
3516}
3517
3518/**
8e49ca46
MA
3519 * Return whether the media changed since the last call to this
3520 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3521 */
3522int bdrv_media_changed(BlockDriverState *bs)
3523{
3524 BlockDriver *drv = bs->drv;
19cb3738 3525
8e49ca46
MA
3526 if (drv && drv->bdrv_media_changed) {
3527 return drv->bdrv_media_changed(bs);
3528 }
3529 return -ENOTSUP;
19cb3738
FB
3530}
3531
3532/**
3533 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3534 */
fdec4404 3535void bdrv_eject(BlockDriverState *bs, int eject_flag)
19cb3738
FB
3536{
3537 BlockDriver *drv = bs->drv;
19cb3738 3538
822e1cd1
MA
3539 if (drv && drv->bdrv_eject) {
3540 drv->bdrv_eject(bs, eject_flag);
19cb3738
FB
3541 }
3542}
3543
19cb3738
FB
3544/**
3545 * Lock or unlock the media (if it is locked, the user won't be able
3546 * to eject it manually).
3547 */
025e849a 3548void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3549{
3550 BlockDriver *drv = bs->drv;
3551
025e849a 3552 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3553
025e849a
MA
3554 if (drv && drv->bdrv_lock_medium) {
3555 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3556 }
3557}
985a03b0
TS
3558
3559/* needed for generic scsi interface */
3560
3561int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3562{
3563 BlockDriver *drv = bs->drv;
3564
3565 if (drv && drv->bdrv_ioctl)
3566 return drv->bdrv_ioctl(bs, req, buf);
3567 return -ENOTSUP;
3568}
7d780669 3569
221f715d
AL
3570BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3571 unsigned long int req, void *buf,
3572 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3573{
221f715d 3574 BlockDriver *drv = bs->drv;
7d780669 3575
221f715d
AL
3576 if (drv && drv->bdrv_aio_ioctl)
3577 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3578 return NULL;
7d780669 3579}
e268ca52 3580
7b6f9300
MA
3581void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3582{
3583 bs->buffer_alignment = align;
3584}
7cd1e32a 3585
e268ca52
AL
3586void *qemu_blockalign(BlockDriverState *bs, size_t size)
3587{
3588 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3589}
7cd1e32a 3590
3591void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3592{
3593 int64_t bitmap_size;
a55eb92c 3594
aaa0eb75 3595 bs->dirty_count = 0;
a55eb92c 3596 if (enable) {
c6d22830
JK
3597 if (!bs->dirty_bitmap) {
3598 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3599 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3600 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
a55eb92c 3601
7267c094 3602 bs->dirty_bitmap = g_malloc0(bitmap_size);
a55eb92c 3603 }
7cd1e32a 3604 } else {
c6d22830 3605 if (bs->dirty_bitmap) {
7267c094 3606 g_free(bs->dirty_bitmap);
c6d22830 3607 bs->dirty_bitmap = NULL;
a55eb92c 3608 }
7cd1e32a 3609 }
3610}
3611
3612int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3613{
6ea44308 3614 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 3615
c6d22830
JK
3616 if (bs->dirty_bitmap &&
3617 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
3618 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3619 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a 3620 } else {
3621 return 0;
3622 }
3623}
3624
a55eb92c
JK
3625void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3626 int nr_sectors)
7cd1e32a 3627{
3628 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3629}
aaa0eb75
LS
3630
3631int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3632{
3633 return bs->dirty_count;
3634}
f88e1a42 3635
db593f25
MT
3636void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3637{
3638 assert(bs->in_use != in_use);
3639 bs->in_use = in_use;
3640}
3641
3642int bdrv_in_use(BlockDriverState *bs)
3643{
3644 return bs->in_use;
3645}
3646
28a7282a
LC
3647void bdrv_iostatus_enable(BlockDriverState *bs)
3648{
d6bf279e 3649 bs->iostatus_enabled = true;
58e21ef5 3650 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3651}
3652
3653/* The I/O status is only enabled if the drive explicitly
3654 * enables it _and_ the VM is configured to stop on errors */
3655bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3656{
d6bf279e 3657 return (bs->iostatus_enabled &&
28a7282a
LC
3658 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3659 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3660 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3661}
3662
3663void bdrv_iostatus_disable(BlockDriverState *bs)
3664{
d6bf279e 3665 bs->iostatus_enabled = false;
28a7282a
LC
3666}
3667
3668void bdrv_iostatus_reset(BlockDriverState *bs)
3669{
3670 if (bdrv_iostatus_is_enabled(bs)) {
58e21ef5 3671 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3672 }
3673}
3674
3675/* XXX: Today this is set by device models because it makes the implementation
3676 quite simple. However, the block layer knows about the error, so it's
3677 possible to implement this without device models being involved */
3678void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3679{
58e21ef5
LC
3680 if (bdrv_iostatus_is_enabled(bs) &&
3681 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
28a7282a 3682 assert(error >= 0);
58e21ef5
LC
3683 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3684 BLOCK_DEVICE_IO_STATUS_FAILED;
28a7282a
LC
3685 }
3686}
3687
a597e79c
CH
3688void
3689bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3690 enum BlockAcctType type)
3691{
3692 assert(type < BDRV_MAX_IOTYPE);
3693
3694 cookie->bytes = bytes;
c488c7f6 3695 cookie->start_time_ns = get_clock();
a597e79c
CH
3696 cookie->type = type;
3697}
3698
3699void
3700bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3701{
3702 assert(cookie->type < BDRV_MAX_IOTYPE);
3703
3704 bs->nr_bytes[cookie->type] += cookie->bytes;
3705 bs->nr_ops[cookie->type]++;
c488c7f6 3706 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
3707}
3708
f88e1a42
JS
3709int bdrv_img_create(const char *filename, const char *fmt,
3710 const char *base_filename, const char *base_fmt,
3711 char *options, uint64_t img_size, int flags)
3712{
3713 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 3714 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
3715 BlockDriverState *bs = NULL;
3716 BlockDriver *drv, *proto_drv;
96df67d1 3717 BlockDriver *backing_drv = NULL;
f88e1a42
JS
3718 int ret = 0;
3719
3720 /* Find driver and parse its options */
3721 drv = bdrv_find_format(fmt);
3722 if (!drv) {
3723 error_report("Unknown file format '%s'", fmt);
4f70f249 3724 ret = -EINVAL;
f88e1a42
JS
3725 goto out;
3726 }
3727
3728 proto_drv = bdrv_find_protocol(filename);
3729 if (!proto_drv) {
3730 error_report("Unknown protocol '%s'", filename);
4f70f249 3731 ret = -EINVAL;
f88e1a42
JS
3732 goto out;
3733 }
3734
3735 create_options = append_option_parameters(create_options,
3736 drv->create_options);
3737 create_options = append_option_parameters(create_options,
3738 proto_drv->create_options);
3739
3740 /* Create parameter list with default values */
3741 param = parse_option_parameters("", create_options, param);
3742
3743 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3744
3745 /* Parse -o options */
3746 if (options) {
3747 param = parse_option_parameters(options, create_options, param);
3748 if (param == NULL) {
3749 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 3750 ret = -EINVAL;
f88e1a42
JS
3751 goto out;
3752 }
3753 }
3754
3755 if (base_filename) {
3756 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3757 base_filename)) {
3758 error_report("Backing file not supported for file format '%s'",
3759 fmt);
4f70f249 3760 ret = -EINVAL;
f88e1a42
JS
3761 goto out;
3762 }
3763 }
3764
3765 if (base_fmt) {
3766 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3767 error_report("Backing file format not supported for file "
3768 "format '%s'", fmt);
4f70f249 3769 ret = -EINVAL;
f88e1a42
JS
3770 goto out;
3771 }
3772 }
3773
792da93a
JS
3774 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3775 if (backing_file && backing_file->value.s) {
3776 if (!strcmp(filename, backing_file->value.s)) {
3777 error_report("Error: Trying to create an image with the "
3778 "same filename as the backing file");
4f70f249 3779 ret = -EINVAL;
792da93a
JS
3780 goto out;
3781 }
3782 }
3783
f88e1a42
JS
3784 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3785 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
3786 backing_drv = bdrv_find_format(backing_fmt->value.s);
3787 if (!backing_drv) {
f88e1a42
JS
3788 error_report("Unknown backing file format '%s'",
3789 backing_fmt->value.s);
4f70f249 3790 ret = -EINVAL;
f88e1a42
JS
3791 goto out;
3792 }
3793 }
3794
3795 // The size for the image must always be specified, with one exception:
3796 // If we are using a backing file, we can obtain the size from there
d220894e
KW
3797 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3798 if (size && size->value.n == -1) {
f88e1a42
JS
3799 if (backing_file && backing_file->value.s) {
3800 uint64_t size;
f88e1a42
JS
3801 char buf[32];
3802
f88e1a42
JS
3803 bs = bdrv_new("");
3804
96df67d1 3805 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
f88e1a42 3806 if (ret < 0) {
96df67d1 3807 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
3808 goto out;
3809 }
3810 bdrv_get_geometry(bs, &size);
3811 size *= 512;
3812
3813 snprintf(buf, sizeof(buf), "%" PRId64, size);
3814 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3815 } else {
3816 error_report("Image creation needs a size parameter");
4f70f249 3817 ret = -EINVAL;
f88e1a42
JS
3818 goto out;
3819 }
3820 }
3821
3822 printf("Formatting '%s', fmt=%s ", filename, fmt);
3823 print_option_parameters(param);
3824 puts("");
3825
3826 ret = bdrv_create(drv, filename, param);
3827
3828 if (ret < 0) {
3829 if (ret == -ENOTSUP) {
3830 error_report("Formatting or formatting option not supported for "
3831 "file format '%s'", fmt);
3832 } else if (ret == -EFBIG) {
3833 error_report("The image size is too large for file format '%s'",
3834 fmt);
3835 } else {
3836 error_report("%s: error while creating %s: %s", filename, fmt,
3837 strerror(-ret));
3838 }
3839 }
3840
3841out:
3842 free_option_parameters(create_options);
3843 free_option_parameters(param);
3844
3845 if (bs) {
3846 bdrv_delete(bs);
3847 }
4f70f249
JS
3848
3849 return ret;
f88e1a42 3850}