]> git.proxmox.com Git - qemu.git/blame - block.c
block: switch bdrv_read()/bdrv_write() to coroutines
[qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
d15e5465 30#include "qemu-objects.h"
68485420 31#include "qemu-coroutine.h"
fc01f7e7 32
71e72a19 33#ifdef CONFIG_BSD
7674e7bf
FB
34#include <sys/types.h>
35#include <sys/stat.h>
36#include <sys/ioctl.h>
72cf2d4f 37#include <sys/queue.h>
c5e97233 38#ifndef __DragonFly__
7674e7bf
FB
39#include <sys/disk.h>
40#endif
c5e97233 41#endif
7674e7bf 42
49dc768d
AL
43#ifdef _WIN32
44#include <windows.h>
45#endif
46
1c9805a3
SH
47#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
48
7d4b4ba5 49static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
50static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
51 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 52 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
53static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
54 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 55 BlockDriverCompletionFunc *cb, void *opaque);
b2e12bc6
CH
56static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
57 BlockDriverCompletionFunc *cb, void *opaque);
016f5cf6
AG
58static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
59 BlockDriverCompletionFunc *cb, void *opaque);
5fafdf24 60static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
83f64091
FB
61 uint8_t *buf, int nb_sectors);
62static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
63 const uint8_t *buf, int nb_sectors);
68485420
KW
64static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
65 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
66 BlockDriverCompletionFunc *cb, void *opaque);
67static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
68 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
69 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
70static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
72 QEMUIOVector *iov);
73static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
74 int64_t sector_num, int nb_sectors,
75 QEMUIOVector *iov);
e7a8a783 76static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
c5fbe571
SH
77static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
78 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
1c9805a3
SH
79static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
80 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
ec530c81 81
1b7bdbc1
SH
82static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
83 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 84
8a22f02a
SH
85static QLIST_HEAD(, BlockDriver) bdrv_drivers =
86 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 87
f9092b10
MA
88/* The device to use for VM snapshots */
89static BlockDriverState *bs_snapshots;
90
eb852011
MA
91/* If non-zero, use only whitelisted block drivers */
92static int use_bdrv_whitelist;
93
9e0b22f4
SH
94#ifdef _WIN32
95static int is_windows_drive_prefix(const char *filename)
96{
97 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
98 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
99 filename[1] == ':');
100}
101
102int is_windows_drive(const char *filename)
103{
104 if (is_windows_drive_prefix(filename) &&
105 filename[2] == '\0')
106 return 1;
107 if (strstart(filename, "\\\\.\\", NULL) ||
108 strstart(filename, "//./", NULL))
109 return 1;
110 return 0;
111}
112#endif
113
114/* check if the path starts with "<protocol>:" */
115static int path_has_protocol(const char *path)
116{
117#ifdef _WIN32
118 if (is_windows_drive(path) ||
119 is_windows_drive_prefix(path)) {
120 return 0;
121 }
122#endif
123
124 return strchr(path, ':') != NULL;
125}
126
83f64091 127int path_is_absolute(const char *path)
3b0d4f61 128{
83f64091 129 const char *p;
21664424
FB
130#ifdef _WIN32
131 /* specific case for names like: "\\.\d:" */
132 if (*path == '/' || *path == '\\')
133 return 1;
134#endif
83f64091
FB
135 p = strchr(path, ':');
136 if (p)
137 p++;
138 else
139 p = path;
3b9f94e1
FB
140#ifdef _WIN32
141 return (*p == '/' || *p == '\\');
142#else
143 return (*p == '/');
144#endif
3b0d4f61
FB
145}
146
83f64091
FB
147/* if filename is absolute, just copy it to dest. Otherwise, build a
148 path to it by considering it is relative to base_path. URL are
149 supported. */
150void path_combine(char *dest, int dest_size,
151 const char *base_path,
152 const char *filename)
3b0d4f61 153{
83f64091
FB
154 const char *p, *p1;
155 int len;
156
157 if (dest_size <= 0)
158 return;
159 if (path_is_absolute(filename)) {
160 pstrcpy(dest, dest_size, filename);
161 } else {
162 p = strchr(base_path, ':');
163 if (p)
164 p++;
165 else
166 p = base_path;
3b9f94e1
FB
167 p1 = strrchr(base_path, '/');
168#ifdef _WIN32
169 {
170 const char *p2;
171 p2 = strrchr(base_path, '\\');
172 if (!p1 || p2 > p1)
173 p1 = p2;
174 }
175#endif
83f64091
FB
176 if (p1)
177 p1++;
178 else
179 p1 = base_path;
180 if (p1 > p)
181 p = p1;
182 len = p - base_path;
183 if (len > dest_size - 1)
184 len = dest_size - 1;
185 memcpy(dest, base_path, len);
186 dest[len] = '\0';
187 pstrcat(dest, dest_size, filename);
3b0d4f61 188 }
3b0d4f61
FB
189}
190
5efa9d5a 191void bdrv_register(BlockDriver *bdrv)
ea2384d3 192{
68485420
KW
193 if (bdrv->bdrv_co_readv) {
194 /* Emulate AIO by coroutines, and sync by AIO */
195 bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em;
196 bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
197 bdrv->bdrv_read = bdrv_read_em;
198 bdrv->bdrv_write = bdrv_write_em;
f9f05dc5
KW
199 } else {
200 bdrv->bdrv_co_readv = bdrv_co_readv_em;
201 bdrv->bdrv_co_writev = bdrv_co_writev_em;
202
203 if (!bdrv->bdrv_aio_readv) {
204 /* add AIO emulation layer */
205 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
206 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
207 } else if (!bdrv->bdrv_read) {
208 /* add synchronous IO emulation layer */
209 bdrv->bdrv_read = bdrv_read_em;
210 bdrv->bdrv_write = bdrv_write_em;
211 }
83f64091 212 }
b2e12bc6
CH
213
214 if (!bdrv->bdrv_aio_flush)
215 bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
216
8a22f02a 217 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 218}
b338082b
FB
219
220/* create a new block device (by default it is empty) */
221BlockDriverState *bdrv_new(const char *device_name)
222{
1b7bdbc1 223 BlockDriverState *bs;
b338082b 224
7267c094 225 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 226 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 227 if (device_name[0] != '\0') {
1b7bdbc1 228 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 229 }
28a7282a 230 bdrv_iostatus_disable(bs);
b338082b
FB
231 return bs;
232}
233
ea2384d3
FB
234BlockDriver *bdrv_find_format(const char *format_name)
235{
236 BlockDriver *drv1;
8a22f02a
SH
237 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
238 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 239 return drv1;
8a22f02a 240 }
ea2384d3
FB
241 }
242 return NULL;
243}
244
eb852011
MA
245static int bdrv_is_whitelisted(BlockDriver *drv)
246{
247 static const char *whitelist[] = {
248 CONFIG_BDRV_WHITELIST
249 };
250 const char **p;
251
252 if (!whitelist[0])
253 return 1; /* no whitelist, anything goes */
254
255 for (p = whitelist; *p; p++) {
256 if (!strcmp(drv->format_name, *p)) {
257 return 1;
258 }
259 }
260 return 0;
261}
262
263BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
264{
265 BlockDriver *drv = bdrv_find_format(format_name);
266 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
267}
268
0e7e1989
KW
269int bdrv_create(BlockDriver *drv, const char* filename,
270 QEMUOptionParameter *options)
ea2384d3
FB
271{
272 if (!drv->bdrv_create)
273 return -ENOTSUP;
0e7e1989
KW
274
275 return drv->bdrv_create(filename, options);
ea2384d3
FB
276}
277
84a12e66
CH
278int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
279{
280 BlockDriver *drv;
281
b50cbabc 282 drv = bdrv_find_protocol(filename);
84a12e66 283 if (drv == NULL) {
16905d71 284 return -ENOENT;
84a12e66
CH
285 }
286
287 return bdrv_create(drv, filename, options);
288}
289
d5249393 290#ifdef _WIN32
95389c86 291void get_tmp_filename(char *filename, int size)
d5249393 292{
3b9f94e1 293 char temp_dir[MAX_PATH];
3b46e624 294
3b9f94e1
FB
295 GetTempPath(MAX_PATH, temp_dir);
296 GetTempFileName(temp_dir, "qem", 0, filename);
d5249393
FB
297}
298#else
95389c86 299void get_tmp_filename(char *filename, int size)
fc01f7e7 300{
67b915a5 301 int fd;
7ccfb2eb 302 const char *tmpdir;
d5249393 303 /* XXX: race condition possible */
0badc1ee
AJ
304 tmpdir = getenv("TMPDIR");
305 if (!tmpdir)
306 tmpdir = "/tmp";
307 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
ea2384d3
FB
308 fd = mkstemp(filename);
309 close(fd);
310}
d5249393 311#endif
fc01f7e7 312
84a12e66
CH
313/*
314 * Detect host devices. By convention, /dev/cdrom[N] is always
315 * recognized as a host CDROM.
316 */
317static BlockDriver *find_hdev_driver(const char *filename)
318{
319 int score_max = 0, score;
320 BlockDriver *drv = NULL, *d;
321
322 QLIST_FOREACH(d, &bdrv_drivers, list) {
323 if (d->bdrv_probe_device) {
324 score = d->bdrv_probe_device(filename);
325 if (score > score_max) {
326 score_max = score;
327 drv = d;
328 }
329 }
330 }
331
332 return drv;
333}
334
b50cbabc 335BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
336{
337 BlockDriver *drv1;
338 char protocol[128];
1cec71e3 339 int len;
83f64091 340 const char *p;
19cb3738 341
66f82cee
KW
342 /* TODO Drivers without bdrv_file_open must be specified explicitly */
343
39508e7a
CH
344 /*
345 * XXX(hch): we really should not let host device detection
346 * override an explicit protocol specification, but moving this
347 * later breaks access to device names with colons in them.
348 * Thanks to the brain-dead persistent naming schemes on udev-
349 * based Linux systems those actually are quite common.
350 */
351 drv1 = find_hdev_driver(filename);
352 if (drv1) {
353 return drv1;
354 }
355
9e0b22f4 356 if (!path_has_protocol(filename)) {
39508e7a 357 return bdrv_find_format("file");
84a12e66 358 }
9e0b22f4
SH
359 p = strchr(filename, ':');
360 assert(p != NULL);
1cec71e3
AL
361 len = p - filename;
362 if (len > sizeof(protocol) - 1)
363 len = sizeof(protocol) - 1;
364 memcpy(protocol, filename, len);
365 protocol[len] = '\0';
8a22f02a 366 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 367 if (drv1->protocol_name &&
8a22f02a 368 !strcmp(drv1->protocol_name, protocol)) {
83f64091 369 return drv1;
8a22f02a 370 }
83f64091
FB
371 }
372 return NULL;
373}
374
c98ac35d 375static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
376{
377 int ret, score, score_max;
378 BlockDriver *drv1, *drv;
379 uint8_t buf[2048];
380 BlockDriverState *bs;
381
f5edb014 382 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
383 if (ret < 0) {
384 *pdrv = NULL;
385 return ret;
386 }
f8ea0b00 387
08a00559
KW
388 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
389 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 390 bdrv_delete(bs);
c98ac35d
SW
391 drv = bdrv_find_format("raw");
392 if (!drv) {
393 ret = -ENOENT;
394 }
395 *pdrv = drv;
396 return ret;
1a396859 397 }
f8ea0b00 398
83f64091
FB
399 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
400 bdrv_delete(bs);
401 if (ret < 0) {
c98ac35d
SW
402 *pdrv = NULL;
403 return ret;
83f64091
FB
404 }
405
ea2384d3 406 score_max = 0;
84a12e66 407 drv = NULL;
8a22f02a 408 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
409 if (drv1->bdrv_probe) {
410 score = drv1->bdrv_probe(buf, ret, filename);
411 if (score > score_max) {
412 score_max = score;
413 drv = drv1;
414 }
0849bf08 415 }
fc01f7e7 416 }
c98ac35d
SW
417 if (!drv) {
418 ret = -ENOENT;
419 }
420 *pdrv = drv;
421 return ret;
ea2384d3
FB
422}
423
51762288
SH
424/**
425 * Set the current 'total_sectors' value
426 */
427static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
428{
429 BlockDriver *drv = bs->drv;
430
396759ad
NB
431 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
432 if (bs->sg)
433 return 0;
434
51762288
SH
435 /* query actual device if possible, otherwise just trust the hint */
436 if (drv->bdrv_getlength) {
437 int64_t length = drv->bdrv_getlength(bs);
438 if (length < 0) {
439 return length;
440 }
441 hint = length >> BDRV_SECTOR_BITS;
442 }
443
444 bs->total_sectors = hint;
445 return 0;
446}
447
c3993cdc
SH
448/**
449 * Set open flags for a given cache mode
450 *
451 * Return 0 on success, -1 if the cache mode was invalid.
452 */
453int bdrv_parse_cache_flags(const char *mode, int *flags)
454{
455 *flags &= ~BDRV_O_CACHE_MASK;
456
457 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
458 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
459 } else if (!strcmp(mode, "directsync")) {
460 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
461 } else if (!strcmp(mode, "writeback")) {
462 *flags |= BDRV_O_CACHE_WB;
463 } else if (!strcmp(mode, "unsafe")) {
464 *flags |= BDRV_O_CACHE_WB;
465 *flags |= BDRV_O_NO_FLUSH;
466 } else if (!strcmp(mode, "writethrough")) {
467 /* this is the default */
468 } else {
469 return -1;
470 }
471
472 return 0;
473}
474
57915332
KW
475/*
476 * Common part for opening disk images and files
477 */
478static int bdrv_open_common(BlockDriverState *bs, const char *filename,
479 int flags, BlockDriver *drv)
480{
481 int ret, open_flags;
482
483 assert(drv != NULL);
484
28dcee10
SH
485 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
486
66f82cee 487 bs->file = NULL;
51762288 488 bs->total_sectors = 0;
57915332
KW
489 bs->encrypted = 0;
490 bs->valid_key = 0;
491 bs->open_flags = flags;
57915332
KW
492 bs->buffer_alignment = 512;
493
494 pstrcpy(bs->filename, sizeof(bs->filename), filename);
495
496 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
497 return -ENOTSUP;
498 }
499
500 bs->drv = drv;
7267c094 501 bs->opaque = g_malloc0(drv->instance_size);
57915332 502
a6599793 503 if (flags & BDRV_O_CACHE_WB)
57915332
KW
504 bs->enable_write_cache = 1;
505
506 /*
507 * Clear flags that are internal to the block layer before opening the
508 * image.
509 */
510 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
511
512 /*
ebabb67a 513 * Snapshots should be writable.
57915332
KW
514 */
515 if (bs->is_temporary) {
516 open_flags |= BDRV_O_RDWR;
517 }
518
66f82cee
KW
519 /* Open the image, either directly or using a protocol */
520 if (drv->bdrv_file_open) {
521 ret = drv->bdrv_file_open(bs, filename, open_flags);
522 } else {
523 ret = bdrv_file_open(&bs->file, filename, open_flags);
524 if (ret >= 0) {
525 ret = drv->bdrv_open(bs, open_flags);
526 }
527 }
528
57915332
KW
529 if (ret < 0) {
530 goto free_and_fail;
531 }
532
533 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
51762288
SH
534
535 ret = refresh_total_sectors(bs, bs->total_sectors);
536 if (ret < 0) {
537 goto free_and_fail;
57915332 538 }
51762288 539
57915332
KW
540#ifndef _WIN32
541 if (bs->is_temporary) {
542 unlink(filename);
543 }
544#endif
545 return 0;
546
547free_and_fail:
66f82cee
KW
548 if (bs->file) {
549 bdrv_delete(bs->file);
550 bs->file = NULL;
551 }
7267c094 552 g_free(bs->opaque);
57915332
KW
553 bs->opaque = NULL;
554 bs->drv = NULL;
555 return ret;
556}
557
b6ce07aa
KW
558/*
559 * Opens a file using a protocol (file, host_device, nbd, ...)
560 */
83f64091 561int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 562{
83f64091 563 BlockDriverState *bs;
6db95603 564 BlockDriver *drv;
83f64091
FB
565 int ret;
566
b50cbabc 567 drv = bdrv_find_protocol(filename);
6db95603
CH
568 if (!drv) {
569 return -ENOENT;
570 }
571
83f64091 572 bs = bdrv_new("");
b6ce07aa 573 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
574 if (ret < 0) {
575 bdrv_delete(bs);
576 return ret;
3b0d4f61 577 }
71d0770c 578 bs->growable = 1;
83f64091
FB
579 *pbs = bs;
580 return 0;
581}
582
b6ce07aa
KW
583/*
584 * Opens a disk image (raw, qcow2, vmdk, ...)
585 */
d6e9098e
KW
586int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
587 BlockDriver *drv)
ea2384d3 588{
b6ce07aa 589 int ret;
712e7874 590
83f64091 591 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
592 BlockDriverState *bs1;
593 int64_t total_size;
7c96d46e 594 int is_protocol = 0;
91a073a9
KW
595 BlockDriver *bdrv_qcow2;
596 QEMUOptionParameter *options;
b6ce07aa
KW
597 char tmp_filename[PATH_MAX];
598 char backing_filename[PATH_MAX];
3b46e624 599
ea2384d3
FB
600 /* if snapshot, we create a temporary backing file and open it
601 instead of opening 'filename' directly */
33e3963e 602
ea2384d3
FB
603 /* if there is a backing file, use it */
604 bs1 = bdrv_new("");
d6e9098e 605 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 606 if (ret < 0) {
ea2384d3 607 bdrv_delete(bs1);
51d7c00c 608 return ret;
ea2384d3 609 }
3e82990b 610 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
611
612 if (bs1->drv && bs1->drv->protocol_name)
613 is_protocol = 1;
614
ea2384d3 615 bdrv_delete(bs1);
3b46e624 616
ea2384d3 617 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
7c96d46e
AL
618
619 /* Real path is meaningless for protocols */
620 if (is_protocol)
621 snprintf(backing_filename, sizeof(backing_filename),
622 "%s", filename);
114cdfa9
KS
623 else if (!realpath(filename, backing_filename))
624 return -errno;
7c96d46e 625
91a073a9
KW
626 bdrv_qcow2 = bdrv_find_format("qcow2");
627 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
628
3e82990b 629 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
630 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
631 if (drv) {
632 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
633 drv->format_name);
634 }
635
636 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 637 free_option_parameters(options);
51d7c00c
AL
638 if (ret < 0) {
639 return ret;
ea2384d3 640 }
91a073a9 641
ea2384d3 642 filename = tmp_filename;
91a073a9 643 drv = bdrv_qcow2;
ea2384d3
FB
644 bs->is_temporary = 1;
645 }
712e7874 646
b6ce07aa 647 /* Find the right image format driver */
6db95603 648 if (!drv) {
c98ac35d 649 ret = find_image_format(filename, &drv);
51d7c00c 650 }
6987307c 651
51d7c00c 652 if (!drv) {
51d7c00c 653 goto unlink_and_fail;
ea2384d3 654 }
b6ce07aa
KW
655
656 /* Open the image */
657 ret = bdrv_open_common(bs, filename, flags, drv);
658 if (ret < 0) {
6987307c
CH
659 goto unlink_and_fail;
660 }
661
b6ce07aa
KW
662 /* If there is a backing file, use it */
663 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
664 char backing_filename[PATH_MAX];
665 int back_flags;
666 BlockDriver *back_drv = NULL;
667
668 bs->backing_hd = bdrv_new("");
df2dbb4a
SH
669
670 if (path_has_protocol(bs->backing_file)) {
671 pstrcpy(backing_filename, sizeof(backing_filename),
672 bs->backing_file);
673 } else {
674 path_combine(backing_filename, sizeof(backing_filename),
675 filename, bs->backing_file);
676 }
677
678 if (bs->backing_format[0] != '\0') {
b6ce07aa 679 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 680 }
b6ce07aa
KW
681
682 /* backing files always opened read-only */
683 back_flags =
684 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
685
686 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
687 if (ret < 0) {
688 bdrv_close(bs);
689 return ret;
690 }
691 if (bs->is_temporary) {
692 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
693 } else {
694 /* base image inherits from "parent" */
695 bs->backing_hd->keep_read_only = bs->keep_read_only;
696 }
697 }
698
699 if (!bdrv_key_required(bs)) {
7d4b4ba5 700 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
701 }
702
703 return 0;
704
705unlink_and_fail:
706 if (bs->is_temporary) {
707 unlink(filename);
708 }
709 return ret;
710}
711
fc01f7e7
FB
712void bdrv_close(BlockDriverState *bs)
713{
19cb3738 714 if (bs->drv) {
f9092b10
MA
715 if (bs == bs_snapshots) {
716 bs_snapshots = NULL;
717 }
557df6ac 718 if (bs->backing_hd) {
ea2384d3 719 bdrv_delete(bs->backing_hd);
557df6ac
SH
720 bs->backing_hd = NULL;
721 }
ea2384d3 722 bs->drv->bdrv_close(bs);
7267c094 723 g_free(bs->opaque);
ea2384d3
FB
724#ifdef _WIN32
725 if (bs->is_temporary) {
726 unlink(bs->filename);
727 }
67b915a5 728#endif
ea2384d3
FB
729 bs->opaque = NULL;
730 bs->drv = NULL;
b338082b 731
66f82cee
KW
732 if (bs->file != NULL) {
733 bdrv_close(bs->file);
734 }
735
7d4b4ba5 736 bdrv_dev_change_media_cb(bs, false);
b338082b
FB
737 }
738}
739
2bc93fed
MK
740void bdrv_close_all(void)
741{
742 BlockDriverState *bs;
743
744 QTAILQ_FOREACH(bs, &bdrv_states, list) {
745 bdrv_close(bs);
746 }
747}
748
d22b2f41
RH
749/* make a BlockDriverState anonymous by removing from bdrv_state list.
750 Also, NULL terminate the device_name to prevent double remove */
751void bdrv_make_anon(BlockDriverState *bs)
752{
753 if (bs->device_name[0] != '\0') {
754 QTAILQ_REMOVE(&bdrv_states, bs, list);
755 }
756 bs->device_name[0] = '\0';
757}
758
b338082b
FB
759void bdrv_delete(BlockDriverState *bs)
760{
fa879d62 761 assert(!bs->dev);
18846dee 762
1b7bdbc1 763 /* remove from list, if necessary */
d22b2f41 764 bdrv_make_anon(bs);
34c6f050 765
b338082b 766 bdrv_close(bs);
66f82cee
KW
767 if (bs->file != NULL) {
768 bdrv_delete(bs->file);
769 }
770
f9092b10 771 assert(bs != bs_snapshots);
7267c094 772 g_free(bs);
fc01f7e7
FB
773}
774
fa879d62
MA
775int bdrv_attach_dev(BlockDriverState *bs, void *dev)
776/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 777{
fa879d62 778 if (bs->dev) {
18846dee
MA
779 return -EBUSY;
780 }
fa879d62 781 bs->dev = dev;
28a7282a 782 bdrv_iostatus_reset(bs);
18846dee
MA
783 return 0;
784}
785
fa879d62
MA
786/* TODO qdevified devices don't use this, remove when devices are qdevified */
787void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 788{
fa879d62
MA
789 if (bdrv_attach_dev(bs, dev) < 0) {
790 abort();
791 }
792}
793
794void bdrv_detach_dev(BlockDriverState *bs, void *dev)
795/* TODO change to DeviceState *dev when all users are qdevified */
796{
797 assert(bs->dev == dev);
798 bs->dev = NULL;
0e49de52
MA
799 bs->dev_ops = NULL;
800 bs->dev_opaque = NULL;
29e05f20 801 bs->buffer_alignment = 512;
18846dee
MA
802}
803
fa879d62
MA
804/* TODO change to return DeviceState * when all users are qdevified */
805void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 806{
fa879d62 807 return bs->dev;
18846dee
MA
808}
809
0e49de52
MA
810void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
811 void *opaque)
812{
813 bs->dev_ops = ops;
814 bs->dev_opaque = opaque;
2c6942fa
MA
815 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
816 bs_snapshots = NULL;
817 }
0e49de52
MA
818}
819
7d4b4ba5 820static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 821{
145feb17 822 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
7d4b4ba5 823 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
145feb17
MA
824 }
825}
826
2c6942fa
MA
827bool bdrv_dev_has_removable_media(BlockDriverState *bs)
828{
829 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
830}
831
e4def80b
MA
832bool bdrv_dev_is_tray_open(BlockDriverState *bs)
833{
834 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
835 return bs->dev_ops->is_tray_open(bs->dev_opaque);
836 }
837 return false;
838}
839
145feb17
MA
840static void bdrv_dev_resize_cb(BlockDriverState *bs)
841{
842 if (bs->dev_ops && bs->dev_ops->resize_cb) {
843 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
844 }
845}
846
f107639a
MA
847bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
848{
849 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
850 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
851 }
852 return false;
853}
854
e97fc193
AL
855/*
856 * Run consistency checks on an image
857 *
e076f338 858 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 859 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 860 * check are stored in res.
e97fc193 861 */
e076f338 862int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
e97fc193
AL
863{
864 if (bs->drv->bdrv_check == NULL) {
865 return -ENOTSUP;
866 }
867
e076f338 868 memset(res, 0, sizeof(*res));
9ac228e0 869 return bs->drv->bdrv_check(bs, res);
e97fc193
AL
870}
871
8a426614
KW
872#define COMMIT_BUF_SECTORS 2048
873
33e3963e
FB
874/* commit COW file into the raw image */
875int bdrv_commit(BlockDriverState *bs)
876{
19cb3738 877 BlockDriver *drv = bs->drv;
ee181196 878 BlockDriver *backing_drv;
8a426614
KW
879 int64_t sector, total_sectors;
880 int n, ro, open_flags;
4dca4b63 881 int ret = 0, rw_ret = 0;
8a426614 882 uint8_t *buf;
4dca4b63
NS
883 char filename[1024];
884 BlockDriverState *bs_rw, *bs_ro;
33e3963e 885
19cb3738
FB
886 if (!drv)
887 return -ENOMEDIUM;
4dca4b63
NS
888
889 if (!bs->backing_hd) {
890 return -ENOTSUP;
33e3963e
FB
891 }
892
4dca4b63
NS
893 if (bs->backing_hd->keep_read_only) {
894 return -EACCES;
895 }
ee181196
KW
896
897 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
898 ro = bs->backing_hd->read_only;
899 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
900 open_flags = bs->backing_hd->open_flags;
901
902 if (ro) {
903 /* re-open as RW */
904 bdrv_delete(bs->backing_hd);
905 bs->backing_hd = NULL;
906 bs_rw = bdrv_new("");
ee181196
KW
907 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
908 backing_drv);
4dca4b63
NS
909 if (rw_ret < 0) {
910 bdrv_delete(bs_rw);
911 /* try to re-open read-only */
912 bs_ro = bdrv_new("");
ee181196
KW
913 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
914 backing_drv);
4dca4b63
NS
915 if (ret < 0) {
916 bdrv_delete(bs_ro);
917 /* drive not functional anymore */
918 bs->drv = NULL;
919 return ret;
920 }
921 bs->backing_hd = bs_ro;
922 return rw_ret;
923 }
924 bs->backing_hd = bs_rw;
ea2384d3 925 }
33e3963e 926
6ea44308 927 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 928 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
929
930 for (sector = 0; sector < total_sectors; sector += n) {
931 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
932
933 if (bdrv_read(bs, sector, buf, n) != 0) {
934 ret = -EIO;
935 goto ro_cleanup;
936 }
937
938 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
939 ret = -EIO;
940 goto ro_cleanup;
941 }
ea2384d3 942 }
33e3963e 943 }
95389c86 944
1d44952f
CH
945 if (drv->bdrv_make_empty) {
946 ret = drv->bdrv_make_empty(bs);
947 bdrv_flush(bs);
948 }
95389c86 949
3f5075ae
CH
950 /*
951 * Make sure all data we wrote to the backing device is actually
952 * stable on disk.
953 */
954 if (bs->backing_hd)
955 bdrv_flush(bs->backing_hd);
4dca4b63
NS
956
957ro_cleanup:
7267c094 958 g_free(buf);
4dca4b63
NS
959
960 if (ro) {
961 /* re-open as RO */
962 bdrv_delete(bs->backing_hd);
963 bs->backing_hd = NULL;
964 bs_ro = bdrv_new("");
ee181196
KW
965 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
966 backing_drv);
4dca4b63
NS
967 if (ret < 0) {
968 bdrv_delete(bs_ro);
969 /* drive not functional anymore */
970 bs->drv = NULL;
971 return ret;
972 }
973 bs->backing_hd = bs_ro;
974 bs->backing_hd->keep_read_only = 0;
975 }
976
1d44952f 977 return ret;
33e3963e
FB
978}
979
6ab4b5ab
MA
980void bdrv_commit_all(void)
981{
982 BlockDriverState *bs;
983
984 QTAILQ_FOREACH(bs, &bdrv_states, list) {
985 bdrv_commit(bs);
986 }
987}
988
756e6736
KW
989/*
990 * Return values:
991 * 0 - success
992 * -EINVAL - backing format specified, but no file
993 * -ENOSPC - can't update the backing file because no space is left in the
994 * image file header
995 * -ENOTSUP - format driver doesn't support changing the backing file
996 */
997int bdrv_change_backing_file(BlockDriverState *bs,
998 const char *backing_file, const char *backing_fmt)
999{
1000 BlockDriver *drv = bs->drv;
1001
1002 if (drv->bdrv_change_backing_file != NULL) {
1003 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1004 } else {
1005 return -ENOTSUP;
1006 }
1007}
1008
71d0770c
AL
1009static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1010 size_t size)
1011{
1012 int64_t len;
1013
1014 if (!bdrv_is_inserted(bs))
1015 return -ENOMEDIUM;
1016
1017 if (bs->growable)
1018 return 0;
1019
1020 len = bdrv_getlength(bs);
1021
fbb7b4e0
KW
1022 if (offset < 0)
1023 return -EIO;
1024
1025 if ((offset > len) || (len - offset < size))
71d0770c
AL
1026 return -EIO;
1027
1028 return 0;
1029}
1030
1031static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1032 int nb_sectors)
1033{
eb5a3165
JS
1034 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1035 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1036}
1037
e7a8a783
KW
1038static inline bool bdrv_has_async_rw(BlockDriver *drv)
1039{
1040 return drv->bdrv_co_readv != bdrv_co_readv_em
1041 || drv->bdrv_aio_readv != bdrv_aio_readv_em;
1042}
1043
1044static inline bool bdrv_has_async_flush(BlockDriver *drv)
1045{
1046 return drv->bdrv_aio_flush != bdrv_aio_flush_em;
1047}
1048
1c9805a3
SH
1049typedef struct RwCo {
1050 BlockDriverState *bs;
1051 int64_t sector_num;
1052 int nb_sectors;
1053 QEMUIOVector *qiov;
1054 bool is_write;
1055 int ret;
1056} RwCo;
1057
1058static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 1059{
1c9805a3 1060 RwCo *rwco = opaque;
ea2384d3 1061
1c9805a3
SH
1062 if (!rwco->is_write) {
1063 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1064 rwco->nb_sectors, rwco->qiov);
1065 } else {
1066 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1067 rwco->nb_sectors, rwco->qiov);
1068 }
1069}
e7a8a783 1070
1c9805a3
SH
1071/*
1072 * Process a synchronous request using coroutines
1073 */
1074static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1075 int nb_sectors, bool is_write)
1076{
1077 QEMUIOVector qiov;
1078 struct iovec iov = {
1079 .iov_base = (void *)buf,
1080 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1081 };
1082 Coroutine *co;
1083 RwCo rwco = {
1084 .bs = bs,
1085 .sector_num = sector_num,
1086 .nb_sectors = nb_sectors,
1087 .qiov = &qiov,
1088 .is_write = is_write,
1089 .ret = NOT_DONE,
1090 };
e7a8a783 1091
1c9805a3 1092 qemu_iovec_init_external(&qiov, &iov, 1);
e7a8a783 1093
1c9805a3
SH
1094 if (qemu_in_coroutine()) {
1095 /* Fast-path if already in coroutine context */
1096 bdrv_rw_co_entry(&rwco);
1097 } else {
1098 co = qemu_coroutine_create(bdrv_rw_co_entry);
1099 qemu_coroutine_enter(co, &rwco);
1100 while (rwco.ret == NOT_DONE) {
1101 qemu_aio_wait();
1102 }
1103 }
1104 return rwco.ret;
1105}
b338082b 1106
1c9805a3
SH
1107/* return < 0 if error. See bdrv_write() for the return codes */
1108int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1109 uint8_t *buf, int nb_sectors)
1110{
1111 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
fc01f7e7
FB
1112}
1113
7cd1e32a 1114static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1115 int nb_sectors, int dirty)
7cd1e32a 1116{
1117 int64_t start, end;
c6d22830 1118 unsigned long val, idx, bit;
a55eb92c 1119
6ea44308 1120 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1121 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1122
1123 for (; start <= end; start++) {
c6d22830
JK
1124 idx = start / (sizeof(unsigned long) * 8);
1125 bit = start % (sizeof(unsigned long) * 8);
1126 val = bs->dirty_bitmap[idx];
1127 if (dirty) {
6d59fec1 1128 if (!(val & (1UL << bit))) {
aaa0eb75 1129 bs->dirty_count++;
6d59fec1 1130 val |= 1UL << bit;
aaa0eb75 1131 }
c6d22830 1132 } else {
6d59fec1 1133 if (val & (1UL << bit)) {
aaa0eb75 1134 bs->dirty_count--;
6d59fec1 1135 val &= ~(1UL << bit);
aaa0eb75 1136 }
c6d22830
JK
1137 }
1138 bs->dirty_bitmap[idx] = val;
7cd1e32a 1139 }
1140}
1141
5fafdf24 1142/* Return < 0 if error. Important errors are:
19cb3738
FB
1143 -EIO generic I/O error (may happen for all errors)
1144 -ENOMEDIUM No media inserted.
1145 -EINVAL Invalid sector number or nb_sectors
1146 -EACCES Trying to write a read-only device
1147*/
5fafdf24 1148int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1149 const uint8_t *buf, int nb_sectors)
1150{
1c9805a3 1151 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
83f64091
FB
1152}
1153
eda578e5
AL
1154int bdrv_pread(BlockDriverState *bs, int64_t offset,
1155 void *buf, int count1)
83f64091 1156{
6ea44308 1157 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1158 int len, nb_sectors, count;
1159 int64_t sector_num;
9a8c4cce 1160 int ret;
83f64091
FB
1161
1162 count = count1;
1163 /* first read to align to sector start */
6ea44308 1164 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1165 if (len > count)
1166 len = count;
6ea44308 1167 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1168 if (len > 0) {
9a8c4cce
KW
1169 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1170 return ret;
6ea44308 1171 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1172 count -= len;
1173 if (count == 0)
1174 return count1;
1175 sector_num++;
1176 buf += len;
1177 }
1178
1179 /* read the sectors "in place" */
6ea44308 1180 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1181 if (nb_sectors > 0) {
9a8c4cce
KW
1182 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1183 return ret;
83f64091 1184 sector_num += nb_sectors;
6ea44308 1185 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1186 buf += len;
1187 count -= len;
1188 }
1189
1190 /* add data from the last sector */
1191 if (count > 0) {
9a8c4cce
KW
1192 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1193 return ret;
83f64091
FB
1194 memcpy(buf, tmp_buf, count);
1195 }
1196 return count1;
1197}
1198
eda578e5
AL
1199int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1200 const void *buf, int count1)
83f64091 1201{
6ea44308 1202 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1203 int len, nb_sectors, count;
1204 int64_t sector_num;
9a8c4cce 1205 int ret;
83f64091
FB
1206
1207 count = count1;
1208 /* first write to align to sector start */
6ea44308 1209 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1210 if (len > count)
1211 len = count;
6ea44308 1212 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1213 if (len > 0) {
9a8c4cce
KW
1214 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1215 return ret;
6ea44308 1216 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1217 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1218 return ret;
83f64091
FB
1219 count -= len;
1220 if (count == 0)
1221 return count1;
1222 sector_num++;
1223 buf += len;
1224 }
1225
1226 /* write the sectors "in place" */
6ea44308 1227 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1228 if (nb_sectors > 0) {
9a8c4cce
KW
1229 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1230 return ret;
83f64091 1231 sector_num += nb_sectors;
6ea44308 1232 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1233 buf += len;
1234 count -= len;
1235 }
1236
1237 /* add data from the last sector */
1238 if (count > 0) {
9a8c4cce
KW
1239 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1240 return ret;
83f64091 1241 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1242 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1243 return ret;
83f64091
FB
1244 }
1245 return count1;
1246}
83f64091 1247
f08145fe
KW
1248/*
1249 * Writes to the file and ensures that no writes are reordered across this
1250 * request (acts as a barrier)
1251 *
1252 * Returns 0 on success, -errno in error cases.
1253 */
1254int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1255 const void *buf, int count)
1256{
1257 int ret;
1258
1259 ret = bdrv_pwrite(bs, offset, buf, count);
1260 if (ret < 0) {
1261 return ret;
1262 }
1263
92196b2f
SH
1264 /* No flush needed for cache modes that use O_DSYNC */
1265 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
f08145fe
KW
1266 bdrv_flush(bs);
1267 }
1268
1269 return 0;
1270}
1271
c5fbe571
SH
1272/*
1273 * Handle a read request in coroutine context
1274 */
1275static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1276 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
da1fa91d
KW
1277{
1278 BlockDriver *drv = bs->drv;
1279
da1fa91d
KW
1280 if (!drv) {
1281 return -ENOMEDIUM;
1282 }
1283 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1284 return -EIO;
1285 }
1286
1287 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1288}
1289
c5fbe571 1290int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
1291 int nb_sectors, QEMUIOVector *qiov)
1292{
c5fbe571 1293 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 1294
c5fbe571
SH
1295 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1296}
1297
1298/*
1299 * Handle a write request in coroutine context
1300 */
1301static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1302 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1303{
1304 BlockDriver *drv = bs->drv;
da1fa91d
KW
1305
1306 if (!bs->drv) {
1307 return -ENOMEDIUM;
1308 }
1309 if (bs->read_only) {
1310 return -EACCES;
1311 }
1312 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1313 return -EIO;
1314 }
1315
1316 if (bs->dirty_bitmap) {
1317 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1318 }
1319
1320 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1321 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1322 }
1323
1324 return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1325}
1326
c5fbe571
SH
1327int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1328 int nb_sectors, QEMUIOVector *qiov)
1329{
1330 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1331
1332 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1333}
1334
83f64091
FB
1335/**
1336 * Truncate file to 'offset' bytes (needed only for file protocols)
1337 */
1338int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1339{
1340 BlockDriver *drv = bs->drv;
51762288 1341 int ret;
83f64091 1342 if (!drv)
19cb3738 1343 return -ENOMEDIUM;
83f64091
FB
1344 if (!drv->bdrv_truncate)
1345 return -ENOTSUP;
59f2689d
NS
1346 if (bs->read_only)
1347 return -EACCES;
8591675f
MT
1348 if (bdrv_in_use(bs))
1349 return -EBUSY;
51762288
SH
1350 ret = drv->bdrv_truncate(bs, offset);
1351 if (ret == 0) {
1352 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 1353 bdrv_dev_resize_cb(bs);
51762288
SH
1354 }
1355 return ret;
83f64091
FB
1356}
1357
4a1d5e1f
FZ
1358/**
1359 * Length of a allocated file in bytes. Sparse files are counted by actual
1360 * allocated space. Return < 0 if error or unknown.
1361 */
1362int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1363{
1364 BlockDriver *drv = bs->drv;
1365 if (!drv) {
1366 return -ENOMEDIUM;
1367 }
1368 if (drv->bdrv_get_allocated_file_size) {
1369 return drv->bdrv_get_allocated_file_size(bs);
1370 }
1371 if (bs->file) {
1372 return bdrv_get_allocated_file_size(bs->file);
1373 }
1374 return -ENOTSUP;
1375}
1376
83f64091
FB
1377/**
1378 * Length of a file in bytes. Return < 0 if error or unknown.
1379 */
1380int64_t bdrv_getlength(BlockDriverState *bs)
1381{
1382 BlockDriver *drv = bs->drv;
1383 if (!drv)
19cb3738 1384 return -ENOMEDIUM;
51762288 1385
2c6942fa 1386 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
1387 if (drv->bdrv_getlength) {
1388 return drv->bdrv_getlength(bs);
1389 }
83f64091 1390 }
46a4e4e6 1391 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
1392}
1393
19cb3738 1394/* return 0 as number of sectors if no device present or error */
96b8f136 1395void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 1396{
19cb3738
FB
1397 int64_t length;
1398 length = bdrv_getlength(bs);
1399 if (length < 0)
1400 length = 0;
1401 else
6ea44308 1402 length = length >> BDRV_SECTOR_BITS;
19cb3738 1403 *nb_sectors_ptr = length;
fc01f7e7 1404}
cf98951b 1405
f3d54fc4
AL
1406struct partition {
1407 uint8_t boot_ind; /* 0x80 - active */
1408 uint8_t head; /* starting head */
1409 uint8_t sector; /* starting sector */
1410 uint8_t cyl; /* starting cylinder */
1411 uint8_t sys_ind; /* What partition type */
1412 uint8_t end_head; /* end head */
1413 uint8_t end_sector; /* end sector */
1414 uint8_t end_cyl; /* end cylinder */
1415 uint32_t start_sect; /* starting sector counting from 0 */
1416 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 1417} QEMU_PACKED;
f3d54fc4
AL
1418
1419/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1420static int guess_disk_lchs(BlockDriverState *bs,
1421 int *pcylinders, int *pheads, int *psectors)
1422{
eb5a3165 1423 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
1424 int ret, i, heads, sectors, cylinders;
1425 struct partition *p;
1426 uint32_t nr_sects;
a38131b6 1427 uint64_t nb_sectors;
f3d54fc4
AL
1428
1429 bdrv_get_geometry(bs, &nb_sectors);
1430
1431 ret = bdrv_read(bs, 0, buf, 1);
1432 if (ret < 0)
1433 return -1;
1434 /* test msdos magic */
1435 if (buf[510] != 0x55 || buf[511] != 0xaa)
1436 return -1;
1437 for(i = 0; i < 4; i++) {
1438 p = ((struct partition *)(buf + 0x1be)) + i;
1439 nr_sects = le32_to_cpu(p->nr_sects);
1440 if (nr_sects && p->end_head) {
1441 /* We make the assumption that the partition terminates on
1442 a cylinder boundary */
1443 heads = p->end_head + 1;
1444 sectors = p->end_sector & 63;
1445 if (sectors == 0)
1446 continue;
1447 cylinders = nb_sectors / (heads * sectors);
1448 if (cylinders < 1 || cylinders > 16383)
1449 continue;
1450 *pheads = heads;
1451 *psectors = sectors;
1452 *pcylinders = cylinders;
1453#if 0
1454 printf("guessed geometry: LCHS=%d %d %d\n",
1455 cylinders, heads, sectors);
1456#endif
1457 return 0;
1458 }
1459 }
1460 return -1;
1461}
1462
1463void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1464{
1465 int translation, lba_detected = 0;
1466 int cylinders, heads, secs;
a38131b6 1467 uint64_t nb_sectors;
f3d54fc4
AL
1468
1469 /* if a geometry hint is available, use it */
1470 bdrv_get_geometry(bs, &nb_sectors);
1471 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1472 translation = bdrv_get_translation_hint(bs);
1473 if (cylinders != 0) {
1474 *pcyls = cylinders;
1475 *pheads = heads;
1476 *psecs = secs;
1477 } else {
1478 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1479 if (heads > 16) {
1480 /* if heads > 16, it means that a BIOS LBA
1481 translation was active, so the default
1482 hardware geometry is OK */
1483 lba_detected = 1;
1484 goto default_geometry;
1485 } else {
1486 *pcyls = cylinders;
1487 *pheads = heads;
1488 *psecs = secs;
1489 /* disable any translation to be in sync with
1490 the logical geometry */
1491 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1492 bdrv_set_translation_hint(bs,
1493 BIOS_ATA_TRANSLATION_NONE);
1494 }
1495 }
1496 } else {
1497 default_geometry:
1498 /* if no geometry, use a standard physical disk geometry */
1499 cylinders = nb_sectors / (16 * 63);
1500
1501 if (cylinders > 16383)
1502 cylinders = 16383;
1503 else if (cylinders < 2)
1504 cylinders = 2;
1505 *pcyls = cylinders;
1506 *pheads = 16;
1507 *psecs = 63;
1508 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1509 if ((*pcyls * *pheads) <= 131072) {
1510 bdrv_set_translation_hint(bs,
1511 BIOS_ATA_TRANSLATION_LARGE);
1512 } else {
1513 bdrv_set_translation_hint(bs,
1514 BIOS_ATA_TRANSLATION_LBA);
1515 }
1516 }
1517 }
1518 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1519 }
1520}
1521
5fafdf24 1522void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
1523 int cyls, int heads, int secs)
1524{
1525 bs->cyls = cyls;
1526 bs->heads = heads;
1527 bs->secs = secs;
1528}
1529
46d4767d
FB
1530void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1531{
1532 bs->translation = translation;
1533}
1534
5fafdf24 1535void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
1536 int *pcyls, int *pheads, int *psecs)
1537{
1538 *pcyls = bs->cyls;
1539 *pheads = bs->heads;
1540 *psecs = bs->secs;
1541}
1542
5bbdbb46
BS
1543/* Recognize floppy formats */
1544typedef struct FDFormat {
1545 FDriveType drive;
1546 uint8_t last_sect;
1547 uint8_t max_track;
1548 uint8_t max_head;
1549} FDFormat;
1550
1551static const FDFormat fd_formats[] = {
1552 /* First entry is default format */
1553 /* 1.44 MB 3"1/2 floppy disks */
1554 { FDRIVE_DRV_144, 18, 80, 1, },
1555 { FDRIVE_DRV_144, 20, 80, 1, },
1556 { FDRIVE_DRV_144, 21, 80, 1, },
1557 { FDRIVE_DRV_144, 21, 82, 1, },
1558 { FDRIVE_DRV_144, 21, 83, 1, },
1559 { FDRIVE_DRV_144, 22, 80, 1, },
1560 { FDRIVE_DRV_144, 23, 80, 1, },
1561 { FDRIVE_DRV_144, 24, 80, 1, },
1562 /* 2.88 MB 3"1/2 floppy disks */
1563 { FDRIVE_DRV_288, 36, 80, 1, },
1564 { FDRIVE_DRV_288, 39, 80, 1, },
1565 { FDRIVE_DRV_288, 40, 80, 1, },
1566 { FDRIVE_DRV_288, 44, 80, 1, },
1567 { FDRIVE_DRV_288, 48, 80, 1, },
1568 /* 720 kB 3"1/2 floppy disks */
1569 { FDRIVE_DRV_144, 9, 80, 1, },
1570 { FDRIVE_DRV_144, 10, 80, 1, },
1571 { FDRIVE_DRV_144, 10, 82, 1, },
1572 { FDRIVE_DRV_144, 10, 83, 1, },
1573 { FDRIVE_DRV_144, 13, 80, 1, },
1574 { FDRIVE_DRV_144, 14, 80, 1, },
1575 /* 1.2 MB 5"1/4 floppy disks */
1576 { FDRIVE_DRV_120, 15, 80, 1, },
1577 { FDRIVE_DRV_120, 18, 80, 1, },
1578 { FDRIVE_DRV_120, 18, 82, 1, },
1579 { FDRIVE_DRV_120, 18, 83, 1, },
1580 { FDRIVE_DRV_120, 20, 80, 1, },
1581 /* 720 kB 5"1/4 floppy disks */
1582 { FDRIVE_DRV_120, 9, 80, 1, },
1583 { FDRIVE_DRV_120, 11, 80, 1, },
1584 /* 360 kB 5"1/4 floppy disks */
1585 { FDRIVE_DRV_120, 9, 40, 1, },
1586 { FDRIVE_DRV_120, 9, 40, 0, },
1587 { FDRIVE_DRV_120, 10, 41, 1, },
1588 { FDRIVE_DRV_120, 10, 42, 1, },
1589 /* 320 kB 5"1/4 floppy disks */
1590 { FDRIVE_DRV_120, 8, 40, 1, },
1591 { FDRIVE_DRV_120, 8, 40, 0, },
1592 /* 360 kB must match 5"1/4 better than 3"1/2... */
1593 { FDRIVE_DRV_144, 9, 80, 0, },
1594 /* end */
1595 { FDRIVE_DRV_NONE, -1, -1, 0, },
1596};
1597
1598void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1599 int *max_track, int *last_sect,
1600 FDriveType drive_in, FDriveType *drive)
1601{
1602 const FDFormat *parse;
1603 uint64_t nb_sectors, size;
1604 int i, first_match, match;
1605
1606 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1607 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1608 /* User defined disk */
1609 } else {
1610 bdrv_get_geometry(bs, &nb_sectors);
1611 match = -1;
1612 first_match = -1;
1613 for (i = 0; ; i++) {
1614 parse = &fd_formats[i];
1615 if (parse->drive == FDRIVE_DRV_NONE) {
1616 break;
1617 }
1618 if (drive_in == parse->drive ||
1619 drive_in == FDRIVE_DRV_NONE) {
1620 size = (parse->max_head + 1) * parse->max_track *
1621 parse->last_sect;
1622 if (nb_sectors == size) {
1623 match = i;
1624 break;
1625 }
1626 if (first_match == -1) {
1627 first_match = i;
1628 }
1629 }
1630 }
1631 if (match == -1) {
1632 if (first_match == -1) {
1633 match = 1;
1634 } else {
1635 match = first_match;
1636 }
1637 parse = &fd_formats[match];
1638 }
1639 *nb_heads = parse->max_head + 1;
1640 *max_track = parse->max_track;
1641 *last_sect = parse->last_sect;
1642 *drive = parse->drive;
1643 }
1644}
1645
46d4767d
FB
1646int bdrv_get_translation_hint(BlockDriverState *bs)
1647{
1648 return bs->translation;
1649}
1650
abd7f68d
MA
1651void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1652 BlockErrorAction on_write_error)
1653{
1654 bs->on_read_error = on_read_error;
1655 bs->on_write_error = on_write_error;
1656}
1657
1658BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1659{
1660 return is_read ? bs->on_read_error : bs->on_write_error;
1661}
1662
b338082b
FB
1663int bdrv_is_read_only(BlockDriverState *bs)
1664{
1665 return bs->read_only;
1666}
1667
985a03b0
TS
1668int bdrv_is_sg(BlockDriverState *bs)
1669{
1670 return bs->sg;
1671}
1672
e900a7b7
CH
1673int bdrv_enable_write_cache(BlockDriverState *bs)
1674{
1675 return bs->enable_write_cache;
1676}
1677
ea2384d3
FB
1678int bdrv_is_encrypted(BlockDriverState *bs)
1679{
1680 if (bs->backing_hd && bs->backing_hd->encrypted)
1681 return 1;
1682 return bs->encrypted;
1683}
1684
c0f4ce77
AL
1685int bdrv_key_required(BlockDriverState *bs)
1686{
1687 BlockDriverState *backing_hd = bs->backing_hd;
1688
1689 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1690 return 1;
1691 return (bs->encrypted && !bs->valid_key);
1692}
1693
ea2384d3
FB
1694int bdrv_set_key(BlockDriverState *bs, const char *key)
1695{
1696 int ret;
1697 if (bs->backing_hd && bs->backing_hd->encrypted) {
1698 ret = bdrv_set_key(bs->backing_hd, key);
1699 if (ret < 0)
1700 return ret;
1701 if (!bs->encrypted)
1702 return 0;
1703 }
fd04a2ae
SH
1704 if (!bs->encrypted) {
1705 return -EINVAL;
1706 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1707 return -ENOMEDIUM;
1708 }
c0f4ce77 1709 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
1710 if (ret < 0) {
1711 bs->valid_key = 0;
1712 } else if (!bs->valid_key) {
1713 bs->valid_key = 1;
1714 /* call the change callback now, we skipped it on open */
7d4b4ba5 1715 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 1716 }
c0f4ce77 1717 return ret;
ea2384d3
FB
1718}
1719
1720void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1721{
19cb3738 1722 if (!bs->drv) {
ea2384d3
FB
1723 buf[0] = '\0';
1724 } else {
1725 pstrcpy(buf, buf_size, bs->drv->format_name);
1726 }
1727}
1728
5fafdf24 1729void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
1730 void *opaque)
1731{
1732 BlockDriver *drv;
1733
8a22f02a 1734 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
1735 it(opaque, drv->format_name);
1736 }
1737}
1738
b338082b
FB
1739BlockDriverState *bdrv_find(const char *name)
1740{
1741 BlockDriverState *bs;
1742
1b7bdbc1
SH
1743 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1744 if (!strcmp(name, bs->device_name)) {
b338082b 1745 return bs;
1b7bdbc1 1746 }
b338082b
FB
1747 }
1748 return NULL;
1749}
1750
2f399b0a
MA
1751BlockDriverState *bdrv_next(BlockDriverState *bs)
1752{
1753 if (!bs) {
1754 return QTAILQ_FIRST(&bdrv_states);
1755 }
1756 return QTAILQ_NEXT(bs, list);
1757}
1758
51de9760 1759void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
1760{
1761 BlockDriverState *bs;
1762
1b7bdbc1 1763 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 1764 it(opaque, bs);
81d0912d
FB
1765 }
1766}
1767
ea2384d3
FB
1768const char *bdrv_get_device_name(BlockDriverState *bs)
1769{
1770 return bs->device_name;
1771}
1772
205ef796 1773int bdrv_flush(BlockDriverState *bs)
7a6cba61 1774{
016f5cf6 1775 if (bs->open_flags & BDRV_O_NO_FLUSH) {
205ef796
KW
1776 return 0;
1777 }
1778
e7a8a783
KW
1779 if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
1780 return bdrv_co_flush_em(bs);
1781 }
1782
205ef796
KW
1783 if (bs->drv && bs->drv->bdrv_flush) {
1784 return bs->drv->bdrv_flush(bs);
016f5cf6
AG
1785 }
1786
205ef796
KW
1787 /*
1788 * Some block drivers always operate in either writethrough or unsafe mode
1789 * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1790 * the server works (because the behaviour is hardcoded or depends on
1791 * server-side configuration), so we can't ensure that everything is safe
1792 * on disk. Returning an error doesn't work because that would break guests
1793 * even if the server operates in writethrough mode.
1794 *
1795 * Let's hope the user knows what he's doing.
1796 */
1797 return 0;
7a6cba61
PB
1798}
1799
c6ca28d6
AL
1800void bdrv_flush_all(void)
1801{
1802 BlockDriverState *bs;
1803
1b7bdbc1 1804 QTAILQ_FOREACH(bs, &bdrv_states, list) {
c602a489 1805 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
c6ca28d6 1806 bdrv_flush(bs);
1b7bdbc1
SH
1807 }
1808 }
c6ca28d6
AL
1809}
1810
f2feebbd
KW
1811int bdrv_has_zero_init(BlockDriverState *bs)
1812{
1813 assert(bs->drv);
1814
336c1c12
KW
1815 if (bs->drv->bdrv_has_zero_init) {
1816 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
1817 }
1818
1819 return 1;
1820}
1821
bb8bf76f
CH
1822int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1823{
1824 if (!bs->drv) {
1825 return -ENOMEDIUM;
1826 }
1827 if (!bs->drv->bdrv_discard) {
1828 return 0;
1829 }
1830 return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1831}
1832
f58c7b35
TS
1833/*
1834 * Returns true iff the specified sector is present in the disk image. Drivers
1835 * not implementing the functionality are assumed to not support backing files,
1836 * hence all their sectors are reported as allocated.
1837 *
1838 * 'pnum' is set to the number of sectors (including and immediately following
1839 * the specified sector) that are known to be in the same
1840 * allocated/unallocated state.
1841 *
1842 * 'nb_sectors' is the max value 'pnum' should be set to.
1843 */
1844int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1845 int *pnum)
1846{
1847 int64_t n;
1848 if (!bs->drv->bdrv_is_allocated) {
1849 if (sector_num >= bs->total_sectors) {
1850 *pnum = 0;
1851 return 0;
1852 }
1853 n = bs->total_sectors - sector_num;
1854 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1855 return 1;
1856 }
1857 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1858}
1859
2582bfed
LC
1860void bdrv_mon_event(const BlockDriverState *bdrv,
1861 BlockMonEventAction action, int is_read)
1862{
1863 QObject *data;
1864 const char *action_str;
1865
1866 switch (action) {
1867 case BDRV_ACTION_REPORT:
1868 action_str = "report";
1869 break;
1870 case BDRV_ACTION_IGNORE:
1871 action_str = "ignore";
1872 break;
1873 case BDRV_ACTION_STOP:
1874 action_str = "stop";
1875 break;
1876 default:
1877 abort();
1878 }
1879
1880 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1881 bdrv->device_name,
1882 action_str,
1883 is_read ? "read" : "write");
1884 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1885
1886 qobject_decref(data);
1887}
1888
d15e5465 1889static void bdrv_print_dict(QObject *obj, void *opaque)
b338082b 1890{
d15e5465
LC
1891 QDict *bs_dict;
1892 Monitor *mon = opaque;
1893
1894 bs_dict = qobject_to_qdict(obj);
1895
d8aeeb31 1896 monitor_printf(mon, "%s: removable=%d",
d15e5465 1897 qdict_get_str(bs_dict, "device"),
d15e5465
LC
1898 qdict_get_bool(bs_dict, "removable"));
1899
1900 if (qdict_get_bool(bs_dict, "removable")) {
1901 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
e4def80b
MA
1902 monitor_printf(mon, " tray-open=%d",
1903 qdict_get_bool(bs_dict, "tray-open"));
d15e5465 1904 }
d2078cc2
LC
1905
1906 if (qdict_haskey(bs_dict, "io-status")) {
1907 monitor_printf(mon, " io-status=%s", qdict_get_str(bs_dict, "io-status"));
1908 }
1909
d15e5465
LC
1910 if (qdict_haskey(bs_dict, "inserted")) {
1911 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1912
1913 monitor_printf(mon, " file=");
1914 monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1915 if (qdict_haskey(qdict, "backing_file")) {
1916 monitor_printf(mon, " backing_file=");
1917 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1918 }
1919 monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1920 qdict_get_bool(qdict, "ro"),
1921 qdict_get_str(qdict, "drv"),
1922 qdict_get_bool(qdict, "encrypted"));
1923 } else {
1924 monitor_printf(mon, " [not inserted]");
1925 }
1926
1927 monitor_printf(mon, "\n");
1928}
1929
1930void bdrv_info_print(Monitor *mon, const QObject *data)
1931{
1932 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1933}
1934
f04ef601
LC
1935static const char *const io_status_name[BDRV_IOS_MAX] = {
1936 [BDRV_IOS_OK] = "ok",
1937 [BDRV_IOS_FAILED] = "failed",
1938 [BDRV_IOS_ENOSPC] = "nospace",
1939};
1940
d15e5465
LC
1941void bdrv_info(Monitor *mon, QObject **ret_data)
1942{
1943 QList *bs_list;
b338082b
FB
1944 BlockDriverState *bs;
1945
d15e5465
LC
1946 bs_list = qlist_new();
1947
1b7bdbc1 1948 QTAILQ_FOREACH(bs, &bdrv_states, list) {
d15e5465 1949 QObject *bs_obj;
e4def80b 1950 QDict *bs_dict;
d15e5465 1951
d8aeeb31 1952 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
d15e5465 1953 "'removable': %i, 'locked': %i }",
2c6942fa
MA
1954 bs->device_name,
1955 bdrv_dev_has_removable_media(bs),
f107639a 1956 bdrv_dev_is_medium_locked(bs));
e4def80b 1957 bs_dict = qobject_to_qdict(bs_obj);
d15e5465 1958
e4def80b
MA
1959 if (bdrv_dev_has_removable_media(bs)) {
1960 qdict_put(bs_dict, "tray-open",
1961 qbool_from_int(bdrv_dev_is_tray_open(bs)));
1962 }
f04ef601
LC
1963
1964 if (bdrv_iostatus_is_enabled(bs)) {
1965 qdict_put(bs_dict, "io-status",
1966 qstring_from_str(io_status_name[bs->iostatus]));
1967 }
1968
19cb3738 1969 if (bs->drv) {
d15e5465 1970 QObject *obj;
d15e5465
LC
1971
1972 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1973 "'encrypted': %i }",
1974 bs->filename, bs->read_only,
1975 bs->drv->format_name,
1976 bdrv_is_encrypted(bs));
fef30743 1977 if (bs->backing_file[0] != '\0') {
d15e5465
LC
1978 QDict *qdict = qobject_to_qdict(obj);
1979 qdict_put(qdict, "backing_file",
1980 qstring_from_str(bs->backing_file));
376253ec 1981 }
d15e5465
LC
1982
1983 qdict_put_obj(bs_dict, "inserted", obj);
b338082b 1984 }
d15e5465 1985 qlist_append_obj(bs_list, bs_obj);
b338082b 1986 }
d15e5465
LC
1987
1988 *ret_data = QOBJECT(bs_list);
b338082b 1989}
a36e69dd 1990
218a536a 1991static void bdrv_stats_iter(QObject *data, void *opaque)
a36e69dd 1992{
218a536a
LC
1993 QDict *qdict;
1994 Monitor *mon = opaque;
1995
1996 qdict = qobject_to_qdict(data);
1997 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1998
1999 qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
2000 monitor_printf(mon, " rd_bytes=%" PRId64
2001 " wr_bytes=%" PRId64
2002 " rd_operations=%" PRId64
2003 " wr_operations=%" PRId64
e8045d67 2004 " flush_operations=%" PRId64
c488c7f6
CH
2005 " wr_total_time_ns=%" PRId64
2006 " rd_total_time_ns=%" PRId64
2007 " flush_total_time_ns=%" PRId64
218a536a
LC
2008 "\n",
2009 qdict_get_int(qdict, "rd_bytes"),
2010 qdict_get_int(qdict, "wr_bytes"),
2011 qdict_get_int(qdict, "rd_operations"),
e8045d67 2012 qdict_get_int(qdict, "wr_operations"),
c488c7f6
CH
2013 qdict_get_int(qdict, "flush_operations"),
2014 qdict_get_int(qdict, "wr_total_time_ns"),
2015 qdict_get_int(qdict, "rd_total_time_ns"),
2016 qdict_get_int(qdict, "flush_total_time_ns"));
218a536a
LC
2017}
2018
2019void bdrv_stats_print(Monitor *mon, const QObject *data)
2020{
2021 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
2022}
2023
294cc35f
KW
2024static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
2025{
2026 QObject *res;
2027 QDict *dict;
2028
2029 res = qobject_from_jsonf("{ 'stats': {"
2030 "'rd_bytes': %" PRId64 ","
2031 "'wr_bytes': %" PRId64 ","
2032 "'rd_operations': %" PRId64 ","
2033 "'wr_operations': %" PRId64 ","
e8045d67 2034 "'wr_highest_offset': %" PRId64 ","
c488c7f6
CH
2035 "'flush_operations': %" PRId64 ","
2036 "'wr_total_time_ns': %" PRId64 ","
2037 "'rd_total_time_ns': %" PRId64 ","
2038 "'flush_total_time_ns': %" PRId64
294cc35f 2039 "} }",
a597e79c
CH
2040 bs->nr_bytes[BDRV_ACCT_READ],
2041 bs->nr_bytes[BDRV_ACCT_WRITE],
2042 bs->nr_ops[BDRV_ACCT_READ],
2043 bs->nr_ops[BDRV_ACCT_WRITE],
5ffbbc67 2044 bs->wr_highest_sector *
e8045d67 2045 (uint64_t)BDRV_SECTOR_SIZE,
c488c7f6
CH
2046 bs->nr_ops[BDRV_ACCT_FLUSH],
2047 bs->total_time_ns[BDRV_ACCT_WRITE],
2048 bs->total_time_ns[BDRV_ACCT_READ],
2049 bs->total_time_ns[BDRV_ACCT_FLUSH]);
294cc35f
KW
2050 dict = qobject_to_qdict(res);
2051
2052 if (*bs->device_name) {
2053 qdict_put(dict, "device", qstring_from_str(bs->device_name));
2054 }
2055
2056 if (bs->file) {
2057 QObject *parent = bdrv_info_stats_bs(bs->file);
2058 qdict_put_obj(dict, "parent", parent);
2059 }
2060
2061 return res;
2062}
2063
218a536a
LC
2064void bdrv_info_stats(Monitor *mon, QObject **ret_data)
2065{
2066 QObject *obj;
2067 QList *devices;
a36e69dd
TS
2068 BlockDriverState *bs;
2069
218a536a
LC
2070 devices = qlist_new();
2071
1b7bdbc1 2072 QTAILQ_FOREACH(bs, &bdrv_states, list) {
294cc35f 2073 obj = bdrv_info_stats_bs(bs);
218a536a 2074 qlist_append_obj(devices, obj);
a36e69dd 2075 }
218a536a
LC
2076
2077 *ret_data = QOBJECT(devices);
a36e69dd 2078}
ea2384d3 2079
045df330
AL
2080const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2081{
2082 if (bs->backing_hd && bs->backing_hd->encrypted)
2083 return bs->backing_file;
2084 else if (bs->encrypted)
2085 return bs->filename;
2086 else
2087 return NULL;
2088}
2089
5fafdf24 2090void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
2091 char *filename, int filename_size)
2092{
b783e409 2093 if (!bs->backing_file) {
83f64091
FB
2094 pstrcpy(filename, filename_size, "");
2095 } else {
2096 pstrcpy(filename, filename_size, bs->backing_file);
2097 }
2098}
2099
5fafdf24 2100int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
2101 const uint8_t *buf, int nb_sectors)
2102{
2103 BlockDriver *drv = bs->drv;
2104 if (!drv)
19cb3738 2105 return -ENOMEDIUM;
faea38e7
FB
2106 if (!drv->bdrv_write_compressed)
2107 return -ENOTSUP;
fbb7b4e0
KW
2108 if (bdrv_check_request(bs, sector_num, nb_sectors))
2109 return -EIO;
a55eb92c 2110
c6d22830 2111 if (bs->dirty_bitmap) {
7cd1e32a 2112 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2113 }
a55eb92c 2114
faea38e7
FB
2115 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2116}
3b46e624 2117
faea38e7
FB
2118int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2119{
2120 BlockDriver *drv = bs->drv;
2121 if (!drv)
19cb3738 2122 return -ENOMEDIUM;
faea38e7
FB
2123 if (!drv->bdrv_get_info)
2124 return -ENOTSUP;
2125 memset(bdi, 0, sizeof(*bdi));
2126 return drv->bdrv_get_info(bs, bdi);
2127}
2128
45566e9c
CH
2129int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2130 int64_t pos, int size)
178e08a5
AL
2131{
2132 BlockDriver *drv = bs->drv;
2133 if (!drv)
2134 return -ENOMEDIUM;
7cdb1f6d
MK
2135 if (drv->bdrv_save_vmstate)
2136 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2137 if (bs->file)
2138 return bdrv_save_vmstate(bs->file, buf, pos, size);
2139 return -ENOTSUP;
178e08a5
AL
2140}
2141
45566e9c
CH
2142int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2143 int64_t pos, int size)
178e08a5
AL
2144{
2145 BlockDriver *drv = bs->drv;
2146 if (!drv)
2147 return -ENOMEDIUM;
7cdb1f6d
MK
2148 if (drv->bdrv_load_vmstate)
2149 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2150 if (bs->file)
2151 return bdrv_load_vmstate(bs->file, buf, pos, size);
2152 return -ENOTSUP;
178e08a5
AL
2153}
2154
8b9b0cc2
KW
2155void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2156{
2157 BlockDriver *drv = bs->drv;
2158
2159 if (!drv || !drv->bdrv_debug_event) {
2160 return;
2161 }
2162
2163 return drv->bdrv_debug_event(bs, event);
2164
2165}
2166
faea38e7
FB
2167/**************************************************************/
2168/* handling of snapshots */
2169
feeee5ac
MDCF
2170int bdrv_can_snapshot(BlockDriverState *bs)
2171{
2172 BlockDriver *drv = bs->drv;
07b70bfb 2173 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2174 return 0;
2175 }
2176
2177 if (!drv->bdrv_snapshot_create) {
2178 if (bs->file != NULL) {
2179 return bdrv_can_snapshot(bs->file);
2180 }
2181 return 0;
2182 }
2183
2184 return 1;
2185}
2186
199630b6
BS
2187int bdrv_is_snapshot(BlockDriverState *bs)
2188{
2189 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2190}
2191
f9092b10
MA
2192BlockDriverState *bdrv_snapshots(void)
2193{
2194 BlockDriverState *bs;
2195
3ac906f7 2196 if (bs_snapshots) {
f9092b10 2197 return bs_snapshots;
3ac906f7 2198 }
f9092b10
MA
2199
2200 bs = NULL;
2201 while ((bs = bdrv_next(bs))) {
2202 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2203 bs_snapshots = bs;
2204 return bs;
f9092b10
MA
2205 }
2206 }
2207 return NULL;
f9092b10
MA
2208}
2209
5fafdf24 2210int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2211 QEMUSnapshotInfo *sn_info)
2212{
2213 BlockDriver *drv = bs->drv;
2214 if (!drv)
19cb3738 2215 return -ENOMEDIUM;
7cdb1f6d
MK
2216 if (drv->bdrv_snapshot_create)
2217 return drv->bdrv_snapshot_create(bs, sn_info);
2218 if (bs->file)
2219 return bdrv_snapshot_create(bs->file, sn_info);
2220 return -ENOTSUP;
faea38e7
FB
2221}
2222
5fafdf24 2223int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2224 const char *snapshot_id)
2225{
2226 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2227 int ret, open_ret;
2228
faea38e7 2229 if (!drv)
19cb3738 2230 return -ENOMEDIUM;
7cdb1f6d
MK
2231 if (drv->bdrv_snapshot_goto)
2232 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2233
2234 if (bs->file) {
2235 drv->bdrv_close(bs);
2236 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2237 open_ret = drv->bdrv_open(bs, bs->open_flags);
2238 if (open_ret < 0) {
2239 bdrv_delete(bs->file);
2240 bs->drv = NULL;
2241 return open_ret;
2242 }
2243 return ret;
2244 }
2245
2246 return -ENOTSUP;
faea38e7
FB
2247}
2248
2249int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2250{
2251 BlockDriver *drv = bs->drv;
2252 if (!drv)
19cb3738 2253 return -ENOMEDIUM;
7cdb1f6d
MK
2254 if (drv->bdrv_snapshot_delete)
2255 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2256 if (bs->file)
2257 return bdrv_snapshot_delete(bs->file, snapshot_id);
2258 return -ENOTSUP;
faea38e7
FB
2259}
2260
5fafdf24 2261int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2262 QEMUSnapshotInfo **psn_info)
2263{
2264 BlockDriver *drv = bs->drv;
2265 if (!drv)
19cb3738 2266 return -ENOMEDIUM;
7cdb1f6d
MK
2267 if (drv->bdrv_snapshot_list)
2268 return drv->bdrv_snapshot_list(bs, psn_info);
2269 if (bs->file)
2270 return bdrv_snapshot_list(bs->file, psn_info);
2271 return -ENOTSUP;
faea38e7
FB
2272}
2273
51ef6727 2274int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2275 const char *snapshot_name)
2276{
2277 BlockDriver *drv = bs->drv;
2278 if (!drv) {
2279 return -ENOMEDIUM;
2280 }
2281 if (!bs->read_only) {
2282 return -EINVAL;
2283 }
2284 if (drv->bdrv_snapshot_load_tmp) {
2285 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2286 }
2287 return -ENOTSUP;
2288}
2289
faea38e7
FB
2290#define NB_SUFFIXES 4
2291
2292char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2293{
2294 static const char suffixes[NB_SUFFIXES] = "KMGT";
2295 int64_t base;
2296 int i;
2297
2298 if (size <= 999) {
2299 snprintf(buf, buf_size, "%" PRId64, size);
2300 } else {
2301 base = 1024;
2302 for(i = 0; i < NB_SUFFIXES; i++) {
2303 if (size < (10 * base)) {
5fafdf24 2304 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
2305 (double)size / base,
2306 suffixes[i]);
2307 break;
2308 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 2309 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
2310 ((size + (base >> 1)) / base),
2311 suffixes[i]);
2312 break;
2313 }
2314 base = base * 1024;
2315 }
2316 }
2317 return buf;
2318}
2319
2320char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2321{
2322 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
2323#ifdef _WIN32
2324 struct tm *ptm;
2325#else
faea38e7 2326 struct tm tm;
3b9f94e1 2327#endif
faea38e7
FB
2328 time_t ti;
2329 int64_t secs;
2330
2331 if (!sn) {
5fafdf24
TS
2332 snprintf(buf, buf_size,
2333 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2334 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2335 } else {
2336 ti = sn->date_sec;
3b9f94e1
FB
2337#ifdef _WIN32
2338 ptm = localtime(&ti);
2339 strftime(date_buf, sizeof(date_buf),
2340 "%Y-%m-%d %H:%M:%S", ptm);
2341#else
faea38e7
FB
2342 localtime_r(&ti, &tm);
2343 strftime(date_buf, sizeof(date_buf),
2344 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 2345#endif
faea38e7
FB
2346 secs = sn->vm_clock_nsec / 1000000000;
2347 snprintf(clock_buf, sizeof(clock_buf),
2348 "%02d:%02d:%02d.%03d",
2349 (int)(secs / 3600),
2350 (int)((secs / 60) % 60),
5fafdf24 2351 (int)(secs % 60),
faea38e7
FB
2352 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2353 snprintf(buf, buf_size,
5fafdf24 2354 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2355 sn->id_str, sn->name,
2356 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2357 date_buf,
2358 clock_buf);
2359 }
2360 return buf;
2361}
2362
ea2384d3 2363/**************************************************************/
83f64091 2364/* async I/Os */
ea2384d3 2365
3b69e4b9 2366BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 2367 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 2368 BlockDriverCompletionFunc *cb, void *opaque)
83f64091
FB
2369{
2370 BlockDriver *drv = bs->drv;
83f64091 2371
bbf0a440
SH
2372 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2373
19cb3738 2374 if (!drv)
ce1a14dc 2375 return NULL;
71d0770c
AL
2376 if (bdrv_check_request(bs, sector_num, nb_sectors))
2377 return NULL;
3b46e624 2378
a597e79c
CH
2379 return drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
2380 cb, opaque);
ea2384d3
FB
2381}
2382
4dcafbb1
MT
2383typedef struct BlockCompleteData {
2384 BlockDriverCompletionFunc *cb;
2385 void *opaque;
2386 BlockDriverState *bs;
2387 int64_t sector_num;
2388 int nb_sectors;
2389} BlockCompleteData;
2390
2391static void block_complete_cb(void *opaque, int ret)
2392{
2393 BlockCompleteData *b = opaque;
2394
2395 if (b->bs->dirty_bitmap) {
2396 set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1);
2397 }
2398 b->cb(b->opaque, ret);
7267c094 2399 g_free(b);
4dcafbb1
MT
2400}
2401
2402static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs,
2403 int64_t sector_num,
2404 int nb_sectors,
2405 BlockDriverCompletionFunc *cb,
2406 void *opaque)
2407{
7267c094 2408 BlockCompleteData *blkdata = g_malloc0(sizeof(BlockCompleteData));
4dcafbb1
MT
2409
2410 blkdata->bs = bs;
2411 blkdata->cb = cb;
2412 blkdata->opaque = opaque;
2413 blkdata->sector_num = sector_num;
2414 blkdata->nb_sectors = nb_sectors;
2415
2416 return blkdata;
2417}
2418
f141eafe
AL
2419BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2420 QEMUIOVector *qiov, int nb_sectors,
2421 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 2422{
83f64091 2423 BlockDriver *drv = bs->drv;
a36e69dd 2424 BlockDriverAIOCB *ret;
4dcafbb1 2425 BlockCompleteData *blk_cb_data;
ea2384d3 2426
bbf0a440
SH
2427 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2428
19cb3738 2429 if (!drv)
ce1a14dc 2430 return NULL;
83f64091 2431 if (bs->read_only)
ce1a14dc 2432 return NULL;
71d0770c
AL
2433 if (bdrv_check_request(bs, sector_num, nb_sectors))
2434 return NULL;
83f64091 2435
c6d22830 2436 if (bs->dirty_bitmap) {
4dcafbb1
MT
2437 blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
2438 opaque);
2439 cb = &block_complete_cb;
2440 opaque = blk_cb_data;
7cd1e32a 2441 }
a55eb92c 2442
f141eafe
AL
2443 ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
2444 cb, opaque);
a36e69dd
TS
2445
2446 if (ret) {
294cc35f
KW
2447 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2448 bs->wr_highest_sector = sector_num + nb_sectors - 1;
2449 }
a36e69dd
TS
2450 }
2451
2452 return ret;
83f64091
FB
2453}
2454
40b4f539
KW
2455
2456typedef struct MultiwriteCB {
2457 int error;
2458 int num_requests;
2459 int num_callbacks;
2460 struct {
2461 BlockDriverCompletionFunc *cb;
2462 void *opaque;
2463 QEMUIOVector *free_qiov;
2464 void *free_buf;
2465 } callbacks[];
2466} MultiwriteCB;
2467
2468static void multiwrite_user_cb(MultiwriteCB *mcb)
2469{
2470 int i;
2471
2472 for (i = 0; i < mcb->num_callbacks; i++) {
2473 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
2474 if (mcb->callbacks[i].free_qiov) {
2475 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2476 }
7267c094 2477 g_free(mcb->callbacks[i].free_qiov);
f8a83245 2478 qemu_vfree(mcb->callbacks[i].free_buf);
40b4f539
KW
2479 }
2480}
2481
2482static void multiwrite_cb(void *opaque, int ret)
2483{
2484 MultiwriteCB *mcb = opaque;
2485
6d519a5f
SH
2486 trace_multiwrite_cb(mcb, ret);
2487
cb6d3ca0 2488 if (ret < 0 && !mcb->error) {
40b4f539 2489 mcb->error = ret;
40b4f539
KW
2490 }
2491
2492 mcb->num_requests--;
2493 if (mcb->num_requests == 0) {
de189a1b 2494 multiwrite_user_cb(mcb);
7267c094 2495 g_free(mcb);
40b4f539
KW
2496 }
2497}
2498
2499static int multiwrite_req_compare(const void *a, const void *b)
2500{
77be4366
CH
2501 const BlockRequest *req1 = a, *req2 = b;
2502
2503 /*
2504 * Note that we can't simply subtract req2->sector from req1->sector
2505 * here as that could overflow the return value.
2506 */
2507 if (req1->sector > req2->sector) {
2508 return 1;
2509 } else if (req1->sector < req2->sector) {
2510 return -1;
2511 } else {
2512 return 0;
2513 }
40b4f539
KW
2514}
2515
2516/*
2517 * Takes a bunch of requests and tries to merge them. Returns the number of
2518 * requests that remain after merging.
2519 */
2520static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2521 int num_reqs, MultiwriteCB *mcb)
2522{
2523 int i, outidx;
2524
2525 // Sort requests by start sector
2526 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2527
2528 // Check if adjacent requests touch the same clusters. If so, combine them,
2529 // filling up gaps with zero sectors.
2530 outidx = 0;
2531 for (i = 1; i < num_reqs; i++) {
2532 int merge = 0;
2533 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2534
2535 // This handles the cases that are valid for all block drivers, namely
2536 // exactly sequential writes and overlapping writes.
2537 if (reqs[i].sector <= oldreq_last) {
2538 merge = 1;
2539 }
2540
2541 // The block driver may decide that it makes sense to combine requests
2542 // even if there is a gap of some sectors between them. In this case,
2543 // the gap is filled with zeros (therefore only applicable for yet
2544 // unused space in format like qcow2).
2545 if (!merge && bs->drv->bdrv_merge_requests) {
2546 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2547 }
2548
e2a305fb
CH
2549 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2550 merge = 0;
2551 }
2552
40b4f539
KW
2553 if (merge) {
2554 size_t size;
7267c094 2555 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
2556 qemu_iovec_init(qiov,
2557 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2558
2559 // Add the first request to the merged one. If the requests are
2560 // overlapping, drop the last sectors of the first request.
2561 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2562 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2563
2564 // We might need to add some zeros between the two requests
2565 if (reqs[i].sector > oldreq_last) {
2566 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2567 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2568 memset(buf, 0, zero_bytes);
2569 qemu_iovec_add(qiov, buf, zero_bytes);
2570 mcb->callbacks[i].free_buf = buf;
2571 }
2572
2573 // Add the second request
2574 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2575
cbf1dff2 2576 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
2577 reqs[outidx].qiov = qiov;
2578
2579 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2580 } else {
2581 outidx++;
2582 reqs[outidx].sector = reqs[i].sector;
2583 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2584 reqs[outidx].qiov = reqs[i].qiov;
2585 }
2586 }
2587
2588 return outidx + 1;
2589}
2590
2591/*
2592 * Submit multiple AIO write requests at once.
2593 *
2594 * On success, the function returns 0 and all requests in the reqs array have
2595 * been submitted. In error case this function returns -1, and any of the
2596 * requests may or may not be submitted yet. In particular, this means that the
2597 * callback will be called for some of the requests, for others it won't. The
2598 * caller must check the error field of the BlockRequest to wait for the right
2599 * callbacks (if error != 0, no callback will be called).
2600 *
2601 * The implementation may modify the contents of the reqs array, e.g. to merge
2602 * requests. However, the fields opaque and error are left unmodified as they
2603 * are used to signal failure for a single request to the caller.
2604 */
2605int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2606{
2607 BlockDriverAIOCB *acb;
2608 MultiwriteCB *mcb;
2609 int i;
2610
301db7c2
RH
2611 /* don't submit writes if we don't have a medium */
2612 if (bs->drv == NULL) {
2613 for (i = 0; i < num_reqs; i++) {
2614 reqs[i].error = -ENOMEDIUM;
2615 }
2616 return -1;
2617 }
2618
40b4f539
KW
2619 if (num_reqs == 0) {
2620 return 0;
2621 }
2622
2623 // Create MultiwriteCB structure
7267c094 2624 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
2625 mcb->num_requests = 0;
2626 mcb->num_callbacks = num_reqs;
2627
2628 for (i = 0; i < num_reqs; i++) {
2629 mcb->callbacks[i].cb = reqs[i].cb;
2630 mcb->callbacks[i].opaque = reqs[i].opaque;
2631 }
2632
2633 // Check for mergable requests
2634 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2635
6d519a5f
SH
2636 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2637
453f9a16
KW
2638 /*
2639 * Run the aio requests. As soon as one request can't be submitted
2640 * successfully, fail all requests that are not yet submitted (we must
2641 * return failure for all requests anyway)
2642 *
2643 * num_requests cannot be set to the right value immediately: If
2644 * bdrv_aio_writev fails for some request, num_requests would be too high
2645 * and therefore multiwrite_cb() would never recognize the multiwrite
2646 * request as completed. We also cannot use the loop variable i to set it
2647 * when the first request fails because the callback may already have been
2648 * called for previously submitted requests. Thus, num_requests must be
2649 * incremented for each request that is submitted.
2650 *
2651 * The problem that callbacks may be called early also means that we need
2652 * to take care that num_requests doesn't become 0 before all requests are
2653 * submitted - multiwrite_cb() would consider the multiwrite request
2654 * completed. A dummy request that is "completed" by a manual call to
2655 * multiwrite_cb() takes care of this.
2656 */
2657 mcb->num_requests = 1;
2658
6d519a5f 2659 // Run the aio requests
40b4f539 2660 for (i = 0; i < num_reqs; i++) {
453f9a16 2661 mcb->num_requests++;
40b4f539
KW
2662 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2663 reqs[i].nb_sectors, multiwrite_cb, mcb);
2664
2665 if (acb == NULL) {
2666 // We can only fail the whole thing if no request has been
2667 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2668 // complete and report the error in the callback.
453f9a16 2669 if (i == 0) {
6d519a5f 2670 trace_bdrv_aio_multiwrite_earlyfail(mcb);
40b4f539
KW
2671 goto fail;
2672 } else {
6d519a5f 2673 trace_bdrv_aio_multiwrite_latefail(mcb, i);
7eb58a6c 2674 multiwrite_cb(mcb, -EIO);
40b4f539
KW
2675 break;
2676 }
40b4f539
KW
2677 }
2678 }
2679
453f9a16
KW
2680 /* Complete the dummy request */
2681 multiwrite_cb(mcb, 0);
2682
40b4f539
KW
2683 return 0;
2684
2685fail:
453f9a16
KW
2686 for (i = 0; i < mcb->num_callbacks; i++) {
2687 reqs[i].error = -EIO;
2688 }
7267c094 2689 g_free(mcb);
40b4f539
KW
2690 return -1;
2691}
2692
b2e12bc6
CH
2693BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2694 BlockDriverCompletionFunc *cb, void *opaque)
2695{
2696 BlockDriver *drv = bs->drv;
2697
a13aac04
SH
2698 trace_bdrv_aio_flush(bs, opaque);
2699
016f5cf6
AG
2700 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2701 return bdrv_aio_noop_em(bs, cb, opaque);
2702 }
2703
b2e12bc6
CH
2704 if (!drv)
2705 return NULL;
b2e12bc6
CH
2706 return drv->bdrv_aio_flush(bs, cb, opaque);
2707}
2708
83f64091 2709void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 2710{
6bbff9a0 2711 acb->pool->cancel(acb);
83f64091
FB
2712}
2713
ce1a14dc 2714
83f64091
FB
2715/**************************************************************/
2716/* async block device emulation */
2717
c16b5a2c
CH
2718typedef struct BlockDriverAIOCBSync {
2719 BlockDriverAIOCB common;
2720 QEMUBH *bh;
2721 int ret;
2722 /* vector translation state */
2723 QEMUIOVector *qiov;
2724 uint8_t *bounce;
2725 int is_write;
2726} BlockDriverAIOCBSync;
2727
2728static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2729{
b666d239
KW
2730 BlockDriverAIOCBSync *acb =
2731 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 2732 qemu_bh_delete(acb->bh);
36afc451 2733 acb->bh = NULL;
c16b5a2c
CH
2734 qemu_aio_release(acb);
2735}
2736
2737static AIOPool bdrv_em_aio_pool = {
2738 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2739 .cancel = bdrv_aio_cancel_em,
2740};
2741
ce1a14dc 2742static void bdrv_aio_bh_cb(void *opaque)
83f64091 2743{
ce1a14dc 2744 BlockDriverAIOCBSync *acb = opaque;
f141eafe 2745
f141eafe
AL
2746 if (!acb->is_write)
2747 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 2748 qemu_vfree(acb->bounce);
ce1a14dc 2749 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 2750 qemu_bh_delete(acb->bh);
36afc451 2751 acb->bh = NULL;
ce1a14dc 2752 qemu_aio_release(acb);
83f64091 2753}
beac80cd 2754
f141eafe
AL
2755static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2756 int64_t sector_num,
2757 QEMUIOVector *qiov,
2758 int nb_sectors,
2759 BlockDriverCompletionFunc *cb,
2760 void *opaque,
2761 int is_write)
2762
83f64091 2763{
ce1a14dc 2764 BlockDriverAIOCBSync *acb;
ce1a14dc 2765
c16b5a2c 2766 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
2767 acb->is_write = is_write;
2768 acb->qiov = qiov;
e268ca52 2769 acb->bounce = qemu_blockalign(bs, qiov->size);
f141eafe 2770
ce1a14dc
PB
2771 if (!acb->bh)
2772 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
2773
2774 if (is_write) {
2775 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1ed20acf 2776 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 2777 } else {
1ed20acf 2778 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
2779 }
2780
ce1a14dc 2781 qemu_bh_schedule(acb->bh);
f141eafe 2782
ce1a14dc 2783 return &acb->common;
beac80cd
FB
2784}
2785
f141eafe
AL
2786static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2787 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 2788 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 2789{
f141eafe
AL
2790 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2791}
83f64091 2792
f141eafe
AL
2793static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2794 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2795 BlockDriverCompletionFunc *cb, void *opaque)
2796{
2797 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 2798}
beac80cd 2799
68485420
KW
2800
2801typedef struct BlockDriverAIOCBCoroutine {
2802 BlockDriverAIOCB common;
2803 BlockRequest req;
2804 bool is_write;
2805 QEMUBH* bh;
2806} BlockDriverAIOCBCoroutine;
2807
2808static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2809{
2810 qemu_aio_flush();
2811}
2812
2813static AIOPool bdrv_em_co_aio_pool = {
2814 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2815 .cancel = bdrv_aio_co_cancel_em,
2816};
2817
2818static void bdrv_co_rw_bh(void *opaque)
2819{
2820 BlockDriverAIOCBCoroutine *acb = opaque;
2821
2822 acb->common.cb(acb->common.opaque, acb->req.error);
2823 qemu_bh_delete(acb->bh);
2824 qemu_aio_release(acb);
2825}
2826
2827static void coroutine_fn bdrv_co_rw(void *opaque)
2828{
2829 BlockDriverAIOCBCoroutine *acb = opaque;
2830 BlockDriverState *bs = acb->common.bs;
2831
2832 if (!acb->is_write) {
2833 acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector,
2834 acb->req.nb_sectors, acb->req.qiov);
2835 } else {
2836 acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector,
2837 acb->req.nb_sectors, acb->req.qiov);
2838 }
2839
2840 acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2841 qemu_bh_schedule(acb->bh);
2842}
2843
2844static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2845 int64_t sector_num,
2846 QEMUIOVector *qiov,
2847 int nb_sectors,
2848 BlockDriverCompletionFunc *cb,
2849 void *opaque,
2850 bool is_write)
2851{
2852 Coroutine *co;
2853 BlockDriverAIOCBCoroutine *acb;
2854
2855 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2856 acb->req.sector = sector_num;
2857 acb->req.nb_sectors = nb_sectors;
2858 acb->req.qiov = qiov;
2859 acb->is_write = is_write;
2860
2861 co = qemu_coroutine_create(bdrv_co_rw);
2862 qemu_coroutine_enter(co, acb);
2863
2864 return &acb->common;
2865}
2866
2867static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
2868 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2869 BlockDriverCompletionFunc *cb, void *opaque)
2870{
2871 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2872 false);
2873}
2874
2875static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
2876 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2877 BlockDriverCompletionFunc *cb, void *opaque)
2878{
2879 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2880 true);
2881}
2882
b2e12bc6
CH
2883static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2884 BlockDriverCompletionFunc *cb, void *opaque)
2885{
2886 BlockDriverAIOCBSync *acb;
2887
2888 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2889 acb->is_write = 1; /* don't bounce in the completion hadler */
2890 acb->qiov = NULL;
2891 acb->bounce = NULL;
2892 acb->ret = 0;
2893
2894 if (!acb->bh)
2895 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2896
2897 bdrv_flush(bs);
2898 qemu_bh_schedule(acb->bh);
2899 return &acb->common;
2900}
2901
016f5cf6
AG
2902static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2903 BlockDriverCompletionFunc *cb, void *opaque)
2904{
2905 BlockDriverAIOCBSync *acb;
2906
2907 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2908 acb->is_write = 1; /* don't bounce in the completion handler */
2909 acb->qiov = NULL;
2910 acb->bounce = NULL;
2911 acb->ret = 0;
2912
2913 if (!acb->bh) {
2914 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2915 }
2916
2917 qemu_bh_schedule(acb->bh);
2918 return &acb->common;
2919}
2920
83f64091
FB
2921/**************************************************************/
2922/* sync block device emulation */
ea2384d3 2923
83f64091
FB
2924static void bdrv_rw_em_cb(void *opaque, int ret)
2925{
2926 *(int *)opaque = ret;
ea2384d3
FB
2927}
2928
5fafdf24 2929static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
83f64091 2930 uint8_t *buf, int nb_sectors)
7a6cba61 2931{
ce1a14dc
PB
2932 int async_ret;
2933 BlockDriverAIOCB *acb;
f141eafe
AL
2934 struct iovec iov;
2935 QEMUIOVector qiov;
83f64091 2936
83f64091 2937 async_ret = NOT_DONE;
3f4cb3d3 2938 iov.iov_base = (void *)buf;
eb5a3165 2939 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
f141eafe 2940 qemu_iovec_init_external(&qiov, &iov, 1);
1ed20acf
SH
2941
2942 acb = bs->drv->bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2943 bdrv_rw_em_cb, &async_ret);
65d6b3d8
KW
2944 if (acb == NULL) {
2945 async_ret = -1;
2946 goto fail;
2947 }
baf35cb9 2948
83f64091
FB
2949 while (async_ret == NOT_DONE) {
2950 qemu_aio_wait();
2951 }
baf35cb9 2952
65d6b3d8
KW
2953
2954fail:
83f64091 2955 return async_ret;
7a6cba61
PB
2956}
2957
83f64091
FB
2958static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2959 const uint8_t *buf, int nb_sectors)
2960{
ce1a14dc
PB
2961 int async_ret;
2962 BlockDriverAIOCB *acb;
f141eafe
AL
2963 struct iovec iov;
2964 QEMUIOVector qiov;
83f64091 2965
83f64091 2966 async_ret = NOT_DONE;
f141eafe 2967 iov.iov_base = (void *)buf;
eb5a3165 2968 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
f141eafe 2969 qemu_iovec_init_external(&qiov, &iov, 1);
1ed20acf
SH
2970
2971 acb = bs->drv->bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2972 bdrv_rw_em_cb, &async_ret);
65d6b3d8
KW
2973 if (acb == NULL) {
2974 async_ret = -1;
2975 goto fail;
2976 }
83f64091
FB
2977 while (async_ret == NOT_DONE) {
2978 qemu_aio_wait();
2979 }
65d6b3d8
KW
2980
2981fail:
83f64091
FB
2982 return async_ret;
2983}
ea2384d3
FB
2984
2985void bdrv_init(void)
2986{
5efa9d5a 2987 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 2988}
ce1a14dc 2989
eb852011
MA
2990void bdrv_init_with_whitelist(void)
2991{
2992 use_bdrv_whitelist = 1;
2993 bdrv_init();
2994}
2995
c16b5a2c
CH
2996void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2997 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 2998{
ce1a14dc
PB
2999 BlockDriverAIOCB *acb;
3000
6bbff9a0
AL
3001 if (pool->free_aiocb) {
3002 acb = pool->free_aiocb;
3003 pool->free_aiocb = acb->next;
ce1a14dc 3004 } else {
7267c094 3005 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 3006 acb->pool = pool;
ce1a14dc
PB
3007 }
3008 acb->bs = bs;
3009 acb->cb = cb;
3010 acb->opaque = opaque;
3011 return acb;
3012}
3013
3014void qemu_aio_release(void *p)
3015{
6bbff9a0
AL
3016 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3017 AIOPool *pool = acb->pool;
3018 acb->next = pool->free_aiocb;
3019 pool->free_aiocb = acb;
ce1a14dc 3020}
19cb3738 3021
f9f05dc5
KW
3022/**************************************************************/
3023/* Coroutine block device emulation */
3024
3025typedef struct CoroutineIOCompletion {
3026 Coroutine *coroutine;
3027 int ret;
3028} CoroutineIOCompletion;
3029
3030static void bdrv_co_io_em_complete(void *opaque, int ret)
3031{
3032 CoroutineIOCompletion *co = opaque;
3033
3034 co->ret = ret;
3035 qemu_coroutine_enter(co->coroutine, NULL);
3036}
3037
3038static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3039 int nb_sectors, QEMUIOVector *iov,
3040 bool is_write)
3041{
3042 CoroutineIOCompletion co = {
3043 .coroutine = qemu_coroutine_self(),
3044 };
3045 BlockDriverAIOCB *acb;
3046
3047 if (is_write) {
a652d160
SH
3048 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3049 bdrv_co_io_em_complete, &co);
f9f05dc5 3050 } else {
a652d160
SH
3051 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3052 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
3053 }
3054
59370aaa 3055 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
3056 if (!acb) {
3057 return -EIO;
3058 }
3059 qemu_coroutine_yield();
3060
3061 return co.ret;
3062}
3063
3064static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3065 int64_t sector_num, int nb_sectors,
3066 QEMUIOVector *iov)
3067{
3068 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3069}
3070
3071static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3072 int64_t sector_num, int nb_sectors,
3073 QEMUIOVector *iov)
3074{
3075 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3076}
3077
e7a8a783
KW
3078static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
3079{
3080 CoroutineIOCompletion co = {
3081 .coroutine = qemu_coroutine_self(),
3082 };
3083 BlockDriverAIOCB *acb;
3084
3085 acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3086 if (!acb) {
3087 return -EIO;
3088 }
3089 qemu_coroutine_yield();
3090 return co.ret;
3091}
3092
19cb3738
FB
3093/**************************************************************/
3094/* removable device support */
3095
3096/**
3097 * Return TRUE if the media is present
3098 */
3099int bdrv_is_inserted(BlockDriverState *bs)
3100{
3101 BlockDriver *drv = bs->drv;
a1aff5bf 3102
19cb3738
FB
3103 if (!drv)
3104 return 0;
3105 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
3106 return 1;
3107 return drv->bdrv_is_inserted(bs);
19cb3738
FB
3108}
3109
3110/**
8e49ca46
MA
3111 * Return whether the media changed since the last call to this
3112 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3113 */
3114int bdrv_media_changed(BlockDriverState *bs)
3115{
3116 BlockDriver *drv = bs->drv;
19cb3738 3117
8e49ca46
MA
3118 if (drv && drv->bdrv_media_changed) {
3119 return drv->bdrv_media_changed(bs);
3120 }
3121 return -ENOTSUP;
19cb3738
FB
3122}
3123
3124/**
3125 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3126 */
fdec4404 3127void bdrv_eject(BlockDriverState *bs, int eject_flag)
19cb3738
FB
3128{
3129 BlockDriver *drv = bs->drv;
19cb3738 3130
822e1cd1
MA
3131 if (drv && drv->bdrv_eject) {
3132 drv->bdrv_eject(bs, eject_flag);
19cb3738
FB
3133 }
3134}
3135
19cb3738
FB
3136/**
3137 * Lock or unlock the media (if it is locked, the user won't be able
3138 * to eject it manually).
3139 */
025e849a 3140void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3141{
3142 BlockDriver *drv = bs->drv;
3143
025e849a 3144 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3145
025e849a
MA
3146 if (drv && drv->bdrv_lock_medium) {
3147 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3148 }
3149}
985a03b0
TS
3150
3151/* needed for generic scsi interface */
3152
3153int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3154{
3155 BlockDriver *drv = bs->drv;
3156
3157 if (drv && drv->bdrv_ioctl)
3158 return drv->bdrv_ioctl(bs, req, buf);
3159 return -ENOTSUP;
3160}
7d780669 3161
221f715d
AL
3162BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3163 unsigned long int req, void *buf,
3164 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3165{
221f715d 3166 BlockDriver *drv = bs->drv;
7d780669 3167
221f715d
AL
3168 if (drv && drv->bdrv_aio_ioctl)
3169 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3170 return NULL;
7d780669 3171}
e268ca52 3172
7b6f9300
MA
3173void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3174{
3175 bs->buffer_alignment = align;
3176}
7cd1e32a 3177
e268ca52
AL
3178void *qemu_blockalign(BlockDriverState *bs, size_t size)
3179{
3180 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3181}
7cd1e32a 3182
3183void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3184{
3185 int64_t bitmap_size;
a55eb92c 3186
aaa0eb75 3187 bs->dirty_count = 0;
a55eb92c 3188 if (enable) {
c6d22830
JK
3189 if (!bs->dirty_bitmap) {
3190 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3191 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3192 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
a55eb92c 3193
7267c094 3194 bs->dirty_bitmap = g_malloc0(bitmap_size);
a55eb92c 3195 }
7cd1e32a 3196 } else {
c6d22830 3197 if (bs->dirty_bitmap) {
7267c094 3198 g_free(bs->dirty_bitmap);
c6d22830 3199 bs->dirty_bitmap = NULL;
a55eb92c 3200 }
7cd1e32a 3201 }
3202}
3203
3204int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3205{
6ea44308 3206 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 3207
c6d22830
JK
3208 if (bs->dirty_bitmap &&
3209 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
3210 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3211 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a 3212 } else {
3213 return 0;
3214 }
3215}
3216
a55eb92c
JK
3217void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3218 int nr_sectors)
7cd1e32a 3219{
3220 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3221}
aaa0eb75
LS
3222
3223int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3224{
3225 return bs->dirty_count;
3226}
f88e1a42 3227
db593f25
MT
3228void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3229{
3230 assert(bs->in_use != in_use);
3231 bs->in_use = in_use;
3232}
3233
3234int bdrv_in_use(BlockDriverState *bs)
3235{
3236 return bs->in_use;
3237}
3238
28a7282a
LC
3239void bdrv_iostatus_enable(BlockDriverState *bs)
3240{
3241 bs->iostatus = BDRV_IOS_OK;
3242}
3243
3244/* The I/O status is only enabled if the drive explicitly
3245 * enables it _and_ the VM is configured to stop on errors */
3246bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3247{
3248 return (bs->iostatus != BDRV_IOS_INVAL &&
3249 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3250 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3251 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3252}
3253
3254void bdrv_iostatus_disable(BlockDriverState *bs)
3255{
3256 bs->iostatus = BDRV_IOS_INVAL;
3257}
3258
3259void bdrv_iostatus_reset(BlockDriverState *bs)
3260{
3261 if (bdrv_iostatus_is_enabled(bs)) {
3262 bs->iostatus = BDRV_IOS_OK;
3263 }
3264}
3265
3266/* XXX: Today this is set by device models because it makes the implementation
3267 quite simple. However, the block layer knows about the error, so it's
3268 possible to implement this without device models being involved */
3269void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3270{
3271 if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
3272 assert(error >= 0);
3273 bs->iostatus = error == ENOSPC ? BDRV_IOS_ENOSPC : BDRV_IOS_FAILED;
3274 }
3275}
3276
a597e79c
CH
3277void
3278bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3279 enum BlockAcctType type)
3280{
3281 assert(type < BDRV_MAX_IOTYPE);
3282
3283 cookie->bytes = bytes;
c488c7f6 3284 cookie->start_time_ns = get_clock();
a597e79c
CH
3285 cookie->type = type;
3286}
3287
3288void
3289bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3290{
3291 assert(cookie->type < BDRV_MAX_IOTYPE);
3292
3293 bs->nr_bytes[cookie->type] += cookie->bytes;
3294 bs->nr_ops[cookie->type]++;
c488c7f6 3295 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
3296}
3297
f88e1a42
JS
3298int bdrv_img_create(const char *filename, const char *fmt,
3299 const char *base_filename, const char *base_fmt,
3300 char *options, uint64_t img_size, int flags)
3301{
3302 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 3303 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
3304 BlockDriverState *bs = NULL;
3305 BlockDriver *drv, *proto_drv;
96df67d1 3306 BlockDriver *backing_drv = NULL;
f88e1a42
JS
3307 int ret = 0;
3308
3309 /* Find driver and parse its options */
3310 drv = bdrv_find_format(fmt);
3311 if (!drv) {
3312 error_report("Unknown file format '%s'", fmt);
4f70f249 3313 ret = -EINVAL;
f88e1a42
JS
3314 goto out;
3315 }
3316
3317 proto_drv = bdrv_find_protocol(filename);
3318 if (!proto_drv) {
3319 error_report("Unknown protocol '%s'", filename);
4f70f249 3320 ret = -EINVAL;
f88e1a42
JS
3321 goto out;
3322 }
3323
3324 create_options = append_option_parameters(create_options,
3325 drv->create_options);
3326 create_options = append_option_parameters(create_options,
3327 proto_drv->create_options);
3328
3329 /* Create parameter list with default values */
3330 param = parse_option_parameters("", create_options, param);
3331
3332 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3333
3334 /* Parse -o options */
3335 if (options) {
3336 param = parse_option_parameters(options, create_options, param);
3337 if (param == NULL) {
3338 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 3339 ret = -EINVAL;
f88e1a42
JS
3340 goto out;
3341 }
3342 }
3343
3344 if (base_filename) {
3345 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3346 base_filename)) {
3347 error_report("Backing file not supported for file format '%s'",
3348 fmt);
4f70f249 3349 ret = -EINVAL;
f88e1a42
JS
3350 goto out;
3351 }
3352 }
3353
3354 if (base_fmt) {
3355 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3356 error_report("Backing file format not supported for file "
3357 "format '%s'", fmt);
4f70f249 3358 ret = -EINVAL;
f88e1a42
JS
3359 goto out;
3360 }
3361 }
3362
792da93a
JS
3363 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3364 if (backing_file && backing_file->value.s) {
3365 if (!strcmp(filename, backing_file->value.s)) {
3366 error_report("Error: Trying to create an image with the "
3367 "same filename as the backing file");
4f70f249 3368 ret = -EINVAL;
792da93a
JS
3369 goto out;
3370 }
3371 }
3372
f88e1a42
JS
3373 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3374 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
3375 backing_drv = bdrv_find_format(backing_fmt->value.s);
3376 if (!backing_drv) {
f88e1a42
JS
3377 error_report("Unknown backing file format '%s'",
3378 backing_fmt->value.s);
4f70f249 3379 ret = -EINVAL;
f88e1a42
JS
3380 goto out;
3381 }
3382 }
3383
3384 // The size for the image must always be specified, with one exception:
3385 // If we are using a backing file, we can obtain the size from there
d220894e
KW
3386 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3387 if (size && size->value.n == -1) {
f88e1a42
JS
3388 if (backing_file && backing_file->value.s) {
3389 uint64_t size;
f88e1a42
JS
3390 char buf[32];
3391
f88e1a42
JS
3392 bs = bdrv_new("");
3393
96df67d1 3394 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
f88e1a42 3395 if (ret < 0) {
96df67d1 3396 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
3397 goto out;
3398 }
3399 bdrv_get_geometry(bs, &size);
3400 size *= 512;
3401
3402 snprintf(buf, sizeof(buf), "%" PRId64, size);
3403 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3404 } else {
3405 error_report("Image creation needs a size parameter");
4f70f249 3406 ret = -EINVAL;
f88e1a42
JS
3407 goto out;
3408 }
3409 }
3410
3411 printf("Formatting '%s', fmt=%s ", filename, fmt);
3412 print_option_parameters(param);
3413 puts("");
3414
3415 ret = bdrv_create(drv, filename, param);
3416
3417 if (ret < 0) {
3418 if (ret == -ENOTSUP) {
3419 error_report("Formatting or formatting option not supported for "
3420 "file format '%s'", fmt);
3421 } else if (ret == -EFBIG) {
3422 error_report("The image size is too large for file format '%s'",
3423 fmt);
3424 } else {
3425 error_report("%s: error while creating %s: %s", filename, fmt,
3426 strerror(-ret));
3427 }
3428 }
3429
3430out:
3431 free_option_parameters(create_options);
3432 free_option_parameters(param);
3433
3434 if (bs) {
3435 bdrv_delete(bs);
3436 }
4f70f249
JS
3437
3438 return ret;
f88e1a42 3439}