]> git.proxmox.com Git - qemu.git/blame - block.c
block: mark blocks dirty on coroutine write completion
[qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
d15e5465 30#include "qemu-objects.h"
68485420 31#include "qemu-coroutine.h"
fc01f7e7 32
71e72a19 33#ifdef CONFIG_BSD
7674e7bf
FB
34#include <sys/types.h>
35#include <sys/stat.h>
36#include <sys/ioctl.h>
72cf2d4f 37#include <sys/queue.h>
c5e97233 38#ifndef __DragonFly__
7674e7bf
FB
39#include <sys/disk.h>
40#endif
c5e97233 41#endif
7674e7bf 42
49dc768d
AL
43#ifdef _WIN32
44#include <windows.h>
45#endif
46
1c9805a3
SH
47#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
48
7d4b4ba5 49static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
50static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
51 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 52 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
53static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
54 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 55 BlockDriverCompletionFunc *cb, void *opaque);
b2e12bc6
CH
56static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
57 BlockDriverCompletionFunc *cb, void *opaque);
016f5cf6
AG
58static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
59 BlockDriverCompletionFunc *cb, void *opaque);
5fafdf24 60static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
83f64091
FB
61 uint8_t *buf, int nb_sectors);
62static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
63 const uint8_t *buf, int nb_sectors);
68485420
KW
64static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
65 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
66 BlockDriverCompletionFunc *cb, void *opaque);
67static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
68 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
69 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
70static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
72 QEMUIOVector *iov);
73static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
74 int64_t sector_num, int nb_sectors,
75 QEMUIOVector *iov);
e7a8a783 76static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
c5fbe571
SH
77static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
78 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
1c9805a3
SH
79static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
80 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
b2a61371
SH
81static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
82 int64_t sector_num,
83 QEMUIOVector *qiov,
84 int nb_sectors,
85 BlockDriverCompletionFunc *cb,
86 void *opaque,
87 bool is_write,
88 CoroutineEntry *entry);
89static void coroutine_fn bdrv_co_do_rw(void *opaque);
ec530c81 90
1b7bdbc1
SH
91static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
92 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 93
8a22f02a
SH
94static QLIST_HEAD(, BlockDriver) bdrv_drivers =
95 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 96
f9092b10
MA
97/* The device to use for VM snapshots */
98static BlockDriverState *bs_snapshots;
99
eb852011
MA
100/* If non-zero, use only whitelisted block drivers */
101static int use_bdrv_whitelist;
102
9e0b22f4
SH
103#ifdef _WIN32
104static int is_windows_drive_prefix(const char *filename)
105{
106 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
107 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
108 filename[1] == ':');
109}
110
111int is_windows_drive(const char *filename)
112{
113 if (is_windows_drive_prefix(filename) &&
114 filename[2] == '\0')
115 return 1;
116 if (strstart(filename, "\\\\.\\", NULL) ||
117 strstart(filename, "//./", NULL))
118 return 1;
119 return 0;
120}
121#endif
122
123/* check if the path starts with "<protocol>:" */
124static int path_has_protocol(const char *path)
125{
126#ifdef _WIN32
127 if (is_windows_drive(path) ||
128 is_windows_drive_prefix(path)) {
129 return 0;
130 }
131#endif
132
133 return strchr(path, ':') != NULL;
134}
135
83f64091 136int path_is_absolute(const char *path)
3b0d4f61 137{
83f64091 138 const char *p;
21664424
FB
139#ifdef _WIN32
140 /* specific case for names like: "\\.\d:" */
141 if (*path == '/' || *path == '\\')
142 return 1;
143#endif
83f64091
FB
144 p = strchr(path, ':');
145 if (p)
146 p++;
147 else
148 p = path;
3b9f94e1
FB
149#ifdef _WIN32
150 return (*p == '/' || *p == '\\');
151#else
152 return (*p == '/');
153#endif
3b0d4f61
FB
154}
155
83f64091
FB
156/* if filename is absolute, just copy it to dest. Otherwise, build a
157 path to it by considering it is relative to base_path. URL are
158 supported. */
159void path_combine(char *dest, int dest_size,
160 const char *base_path,
161 const char *filename)
3b0d4f61 162{
83f64091
FB
163 const char *p, *p1;
164 int len;
165
166 if (dest_size <= 0)
167 return;
168 if (path_is_absolute(filename)) {
169 pstrcpy(dest, dest_size, filename);
170 } else {
171 p = strchr(base_path, ':');
172 if (p)
173 p++;
174 else
175 p = base_path;
3b9f94e1
FB
176 p1 = strrchr(base_path, '/');
177#ifdef _WIN32
178 {
179 const char *p2;
180 p2 = strrchr(base_path, '\\');
181 if (!p1 || p2 > p1)
182 p1 = p2;
183 }
184#endif
83f64091
FB
185 if (p1)
186 p1++;
187 else
188 p1 = base_path;
189 if (p1 > p)
190 p = p1;
191 len = p - base_path;
192 if (len > dest_size - 1)
193 len = dest_size - 1;
194 memcpy(dest, base_path, len);
195 dest[len] = '\0';
196 pstrcat(dest, dest_size, filename);
3b0d4f61 197 }
3b0d4f61
FB
198}
199
5efa9d5a 200void bdrv_register(BlockDriver *bdrv)
ea2384d3 201{
68485420
KW
202 if (bdrv->bdrv_co_readv) {
203 /* Emulate AIO by coroutines, and sync by AIO */
204 bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em;
205 bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
206 bdrv->bdrv_read = bdrv_read_em;
207 bdrv->bdrv_write = bdrv_write_em;
f9f05dc5
KW
208 } else {
209 bdrv->bdrv_co_readv = bdrv_co_readv_em;
210 bdrv->bdrv_co_writev = bdrv_co_writev_em;
211
212 if (!bdrv->bdrv_aio_readv) {
213 /* add AIO emulation layer */
214 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
215 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
216 } else if (!bdrv->bdrv_read) {
217 /* add synchronous IO emulation layer */
218 bdrv->bdrv_read = bdrv_read_em;
219 bdrv->bdrv_write = bdrv_write_em;
220 }
83f64091 221 }
b2e12bc6
CH
222
223 if (!bdrv->bdrv_aio_flush)
224 bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
225
8a22f02a 226 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 227}
b338082b
FB
228
229/* create a new block device (by default it is empty) */
230BlockDriverState *bdrv_new(const char *device_name)
231{
1b7bdbc1 232 BlockDriverState *bs;
b338082b 233
7267c094 234 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 235 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 236 if (device_name[0] != '\0') {
1b7bdbc1 237 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 238 }
28a7282a 239 bdrv_iostatus_disable(bs);
b338082b
FB
240 return bs;
241}
242
ea2384d3
FB
243BlockDriver *bdrv_find_format(const char *format_name)
244{
245 BlockDriver *drv1;
8a22f02a
SH
246 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
247 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 248 return drv1;
8a22f02a 249 }
ea2384d3
FB
250 }
251 return NULL;
252}
253
eb852011
MA
254static int bdrv_is_whitelisted(BlockDriver *drv)
255{
256 static const char *whitelist[] = {
257 CONFIG_BDRV_WHITELIST
258 };
259 const char **p;
260
261 if (!whitelist[0])
262 return 1; /* no whitelist, anything goes */
263
264 for (p = whitelist; *p; p++) {
265 if (!strcmp(drv->format_name, *p)) {
266 return 1;
267 }
268 }
269 return 0;
270}
271
272BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
273{
274 BlockDriver *drv = bdrv_find_format(format_name);
275 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
276}
277
0e7e1989
KW
278int bdrv_create(BlockDriver *drv, const char* filename,
279 QEMUOptionParameter *options)
ea2384d3
FB
280{
281 if (!drv->bdrv_create)
282 return -ENOTSUP;
0e7e1989
KW
283
284 return drv->bdrv_create(filename, options);
ea2384d3
FB
285}
286
84a12e66
CH
287int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
288{
289 BlockDriver *drv;
290
b50cbabc 291 drv = bdrv_find_protocol(filename);
84a12e66 292 if (drv == NULL) {
16905d71 293 return -ENOENT;
84a12e66
CH
294 }
295
296 return bdrv_create(drv, filename, options);
297}
298
d5249393 299#ifdef _WIN32
95389c86 300void get_tmp_filename(char *filename, int size)
d5249393 301{
3b9f94e1 302 char temp_dir[MAX_PATH];
3b46e624 303
3b9f94e1
FB
304 GetTempPath(MAX_PATH, temp_dir);
305 GetTempFileName(temp_dir, "qem", 0, filename);
d5249393
FB
306}
307#else
95389c86 308void get_tmp_filename(char *filename, int size)
fc01f7e7 309{
67b915a5 310 int fd;
7ccfb2eb 311 const char *tmpdir;
d5249393 312 /* XXX: race condition possible */
0badc1ee
AJ
313 tmpdir = getenv("TMPDIR");
314 if (!tmpdir)
315 tmpdir = "/tmp";
316 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
ea2384d3
FB
317 fd = mkstemp(filename);
318 close(fd);
319}
d5249393 320#endif
fc01f7e7 321
84a12e66
CH
322/*
323 * Detect host devices. By convention, /dev/cdrom[N] is always
324 * recognized as a host CDROM.
325 */
326static BlockDriver *find_hdev_driver(const char *filename)
327{
328 int score_max = 0, score;
329 BlockDriver *drv = NULL, *d;
330
331 QLIST_FOREACH(d, &bdrv_drivers, list) {
332 if (d->bdrv_probe_device) {
333 score = d->bdrv_probe_device(filename);
334 if (score > score_max) {
335 score_max = score;
336 drv = d;
337 }
338 }
339 }
340
341 return drv;
342}
343
b50cbabc 344BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
345{
346 BlockDriver *drv1;
347 char protocol[128];
1cec71e3 348 int len;
83f64091 349 const char *p;
19cb3738 350
66f82cee
KW
351 /* TODO Drivers without bdrv_file_open must be specified explicitly */
352
39508e7a
CH
353 /*
354 * XXX(hch): we really should not let host device detection
355 * override an explicit protocol specification, but moving this
356 * later breaks access to device names with colons in them.
357 * Thanks to the brain-dead persistent naming schemes on udev-
358 * based Linux systems those actually are quite common.
359 */
360 drv1 = find_hdev_driver(filename);
361 if (drv1) {
362 return drv1;
363 }
364
9e0b22f4 365 if (!path_has_protocol(filename)) {
39508e7a 366 return bdrv_find_format("file");
84a12e66 367 }
9e0b22f4
SH
368 p = strchr(filename, ':');
369 assert(p != NULL);
1cec71e3
AL
370 len = p - filename;
371 if (len > sizeof(protocol) - 1)
372 len = sizeof(protocol) - 1;
373 memcpy(protocol, filename, len);
374 protocol[len] = '\0';
8a22f02a 375 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 376 if (drv1->protocol_name &&
8a22f02a 377 !strcmp(drv1->protocol_name, protocol)) {
83f64091 378 return drv1;
8a22f02a 379 }
83f64091
FB
380 }
381 return NULL;
382}
383
c98ac35d 384static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
385{
386 int ret, score, score_max;
387 BlockDriver *drv1, *drv;
388 uint8_t buf[2048];
389 BlockDriverState *bs;
390
f5edb014 391 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
392 if (ret < 0) {
393 *pdrv = NULL;
394 return ret;
395 }
f8ea0b00 396
08a00559
KW
397 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
398 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 399 bdrv_delete(bs);
c98ac35d
SW
400 drv = bdrv_find_format("raw");
401 if (!drv) {
402 ret = -ENOENT;
403 }
404 *pdrv = drv;
405 return ret;
1a396859 406 }
f8ea0b00 407
83f64091
FB
408 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
409 bdrv_delete(bs);
410 if (ret < 0) {
c98ac35d
SW
411 *pdrv = NULL;
412 return ret;
83f64091
FB
413 }
414
ea2384d3 415 score_max = 0;
84a12e66 416 drv = NULL;
8a22f02a 417 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
418 if (drv1->bdrv_probe) {
419 score = drv1->bdrv_probe(buf, ret, filename);
420 if (score > score_max) {
421 score_max = score;
422 drv = drv1;
423 }
0849bf08 424 }
fc01f7e7 425 }
c98ac35d
SW
426 if (!drv) {
427 ret = -ENOENT;
428 }
429 *pdrv = drv;
430 return ret;
ea2384d3
FB
431}
432
51762288
SH
433/**
434 * Set the current 'total_sectors' value
435 */
436static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
437{
438 BlockDriver *drv = bs->drv;
439
396759ad
NB
440 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
441 if (bs->sg)
442 return 0;
443
51762288
SH
444 /* query actual device if possible, otherwise just trust the hint */
445 if (drv->bdrv_getlength) {
446 int64_t length = drv->bdrv_getlength(bs);
447 if (length < 0) {
448 return length;
449 }
450 hint = length >> BDRV_SECTOR_BITS;
451 }
452
453 bs->total_sectors = hint;
454 return 0;
455}
456
c3993cdc
SH
457/**
458 * Set open flags for a given cache mode
459 *
460 * Return 0 on success, -1 if the cache mode was invalid.
461 */
462int bdrv_parse_cache_flags(const char *mode, int *flags)
463{
464 *flags &= ~BDRV_O_CACHE_MASK;
465
466 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
467 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
468 } else if (!strcmp(mode, "directsync")) {
469 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
470 } else if (!strcmp(mode, "writeback")) {
471 *flags |= BDRV_O_CACHE_WB;
472 } else if (!strcmp(mode, "unsafe")) {
473 *flags |= BDRV_O_CACHE_WB;
474 *flags |= BDRV_O_NO_FLUSH;
475 } else if (!strcmp(mode, "writethrough")) {
476 /* this is the default */
477 } else {
478 return -1;
479 }
480
481 return 0;
482}
483
57915332
KW
484/*
485 * Common part for opening disk images and files
486 */
487static int bdrv_open_common(BlockDriverState *bs, const char *filename,
488 int flags, BlockDriver *drv)
489{
490 int ret, open_flags;
491
492 assert(drv != NULL);
493
28dcee10
SH
494 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
495
66f82cee 496 bs->file = NULL;
51762288 497 bs->total_sectors = 0;
57915332
KW
498 bs->encrypted = 0;
499 bs->valid_key = 0;
500 bs->open_flags = flags;
57915332
KW
501 bs->buffer_alignment = 512;
502
503 pstrcpy(bs->filename, sizeof(bs->filename), filename);
504
505 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
506 return -ENOTSUP;
507 }
508
509 bs->drv = drv;
7267c094 510 bs->opaque = g_malloc0(drv->instance_size);
57915332 511
a6599793 512 if (flags & BDRV_O_CACHE_WB)
57915332
KW
513 bs->enable_write_cache = 1;
514
515 /*
516 * Clear flags that are internal to the block layer before opening the
517 * image.
518 */
519 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
520
521 /*
ebabb67a 522 * Snapshots should be writable.
57915332
KW
523 */
524 if (bs->is_temporary) {
525 open_flags |= BDRV_O_RDWR;
526 }
527
66f82cee
KW
528 /* Open the image, either directly or using a protocol */
529 if (drv->bdrv_file_open) {
530 ret = drv->bdrv_file_open(bs, filename, open_flags);
531 } else {
532 ret = bdrv_file_open(&bs->file, filename, open_flags);
533 if (ret >= 0) {
534 ret = drv->bdrv_open(bs, open_flags);
535 }
536 }
537
57915332
KW
538 if (ret < 0) {
539 goto free_and_fail;
540 }
541
542 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
51762288
SH
543
544 ret = refresh_total_sectors(bs, bs->total_sectors);
545 if (ret < 0) {
546 goto free_and_fail;
57915332 547 }
51762288 548
57915332
KW
549#ifndef _WIN32
550 if (bs->is_temporary) {
551 unlink(filename);
552 }
553#endif
554 return 0;
555
556free_and_fail:
66f82cee
KW
557 if (bs->file) {
558 bdrv_delete(bs->file);
559 bs->file = NULL;
560 }
7267c094 561 g_free(bs->opaque);
57915332
KW
562 bs->opaque = NULL;
563 bs->drv = NULL;
564 return ret;
565}
566
b6ce07aa
KW
567/*
568 * Opens a file using a protocol (file, host_device, nbd, ...)
569 */
83f64091 570int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 571{
83f64091 572 BlockDriverState *bs;
6db95603 573 BlockDriver *drv;
83f64091
FB
574 int ret;
575
b50cbabc 576 drv = bdrv_find_protocol(filename);
6db95603
CH
577 if (!drv) {
578 return -ENOENT;
579 }
580
83f64091 581 bs = bdrv_new("");
b6ce07aa 582 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
583 if (ret < 0) {
584 bdrv_delete(bs);
585 return ret;
3b0d4f61 586 }
71d0770c 587 bs->growable = 1;
83f64091
FB
588 *pbs = bs;
589 return 0;
590}
591
b6ce07aa
KW
592/*
593 * Opens a disk image (raw, qcow2, vmdk, ...)
594 */
d6e9098e
KW
595int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
596 BlockDriver *drv)
ea2384d3 597{
b6ce07aa 598 int ret;
712e7874 599
83f64091 600 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
601 BlockDriverState *bs1;
602 int64_t total_size;
7c96d46e 603 int is_protocol = 0;
91a073a9
KW
604 BlockDriver *bdrv_qcow2;
605 QEMUOptionParameter *options;
b6ce07aa
KW
606 char tmp_filename[PATH_MAX];
607 char backing_filename[PATH_MAX];
3b46e624 608
ea2384d3
FB
609 /* if snapshot, we create a temporary backing file and open it
610 instead of opening 'filename' directly */
33e3963e 611
ea2384d3
FB
612 /* if there is a backing file, use it */
613 bs1 = bdrv_new("");
d6e9098e 614 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 615 if (ret < 0) {
ea2384d3 616 bdrv_delete(bs1);
51d7c00c 617 return ret;
ea2384d3 618 }
3e82990b 619 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
620
621 if (bs1->drv && bs1->drv->protocol_name)
622 is_protocol = 1;
623
ea2384d3 624 bdrv_delete(bs1);
3b46e624 625
ea2384d3 626 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
7c96d46e
AL
627
628 /* Real path is meaningless for protocols */
629 if (is_protocol)
630 snprintf(backing_filename, sizeof(backing_filename),
631 "%s", filename);
114cdfa9
KS
632 else if (!realpath(filename, backing_filename))
633 return -errno;
7c96d46e 634
91a073a9
KW
635 bdrv_qcow2 = bdrv_find_format("qcow2");
636 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
637
3e82990b 638 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
639 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
640 if (drv) {
641 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
642 drv->format_name);
643 }
644
645 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 646 free_option_parameters(options);
51d7c00c
AL
647 if (ret < 0) {
648 return ret;
ea2384d3 649 }
91a073a9 650
ea2384d3 651 filename = tmp_filename;
91a073a9 652 drv = bdrv_qcow2;
ea2384d3
FB
653 bs->is_temporary = 1;
654 }
712e7874 655
b6ce07aa 656 /* Find the right image format driver */
6db95603 657 if (!drv) {
c98ac35d 658 ret = find_image_format(filename, &drv);
51d7c00c 659 }
6987307c 660
51d7c00c 661 if (!drv) {
51d7c00c 662 goto unlink_and_fail;
ea2384d3 663 }
b6ce07aa
KW
664
665 /* Open the image */
666 ret = bdrv_open_common(bs, filename, flags, drv);
667 if (ret < 0) {
6987307c
CH
668 goto unlink_and_fail;
669 }
670
b6ce07aa
KW
671 /* If there is a backing file, use it */
672 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
673 char backing_filename[PATH_MAX];
674 int back_flags;
675 BlockDriver *back_drv = NULL;
676
677 bs->backing_hd = bdrv_new("");
df2dbb4a
SH
678
679 if (path_has_protocol(bs->backing_file)) {
680 pstrcpy(backing_filename, sizeof(backing_filename),
681 bs->backing_file);
682 } else {
683 path_combine(backing_filename, sizeof(backing_filename),
684 filename, bs->backing_file);
685 }
686
687 if (bs->backing_format[0] != '\0') {
b6ce07aa 688 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 689 }
b6ce07aa
KW
690
691 /* backing files always opened read-only */
692 back_flags =
693 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
694
695 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
696 if (ret < 0) {
697 bdrv_close(bs);
698 return ret;
699 }
700 if (bs->is_temporary) {
701 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
702 } else {
703 /* base image inherits from "parent" */
704 bs->backing_hd->keep_read_only = bs->keep_read_only;
705 }
706 }
707
708 if (!bdrv_key_required(bs)) {
7d4b4ba5 709 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
710 }
711
712 return 0;
713
714unlink_and_fail:
715 if (bs->is_temporary) {
716 unlink(filename);
717 }
718 return ret;
719}
720
fc01f7e7
FB
721void bdrv_close(BlockDriverState *bs)
722{
19cb3738 723 if (bs->drv) {
f9092b10
MA
724 if (bs == bs_snapshots) {
725 bs_snapshots = NULL;
726 }
557df6ac 727 if (bs->backing_hd) {
ea2384d3 728 bdrv_delete(bs->backing_hd);
557df6ac
SH
729 bs->backing_hd = NULL;
730 }
ea2384d3 731 bs->drv->bdrv_close(bs);
7267c094 732 g_free(bs->opaque);
ea2384d3
FB
733#ifdef _WIN32
734 if (bs->is_temporary) {
735 unlink(bs->filename);
736 }
67b915a5 737#endif
ea2384d3
FB
738 bs->opaque = NULL;
739 bs->drv = NULL;
b338082b 740
66f82cee
KW
741 if (bs->file != NULL) {
742 bdrv_close(bs->file);
743 }
744
7d4b4ba5 745 bdrv_dev_change_media_cb(bs, false);
b338082b
FB
746 }
747}
748
2bc93fed
MK
749void bdrv_close_all(void)
750{
751 BlockDriverState *bs;
752
753 QTAILQ_FOREACH(bs, &bdrv_states, list) {
754 bdrv_close(bs);
755 }
756}
757
d22b2f41
RH
758/* make a BlockDriverState anonymous by removing from bdrv_state list.
759 Also, NULL terminate the device_name to prevent double remove */
760void bdrv_make_anon(BlockDriverState *bs)
761{
762 if (bs->device_name[0] != '\0') {
763 QTAILQ_REMOVE(&bdrv_states, bs, list);
764 }
765 bs->device_name[0] = '\0';
766}
767
b338082b
FB
768void bdrv_delete(BlockDriverState *bs)
769{
fa879d62 770 assert(!bs->dev);
18846dee 771
1b7bdbc1 772 /* remove from list, if necessary */
d22b2f41 773 bdrv_make_anon(bs);
34c6f050 774
b338082b 775 bdrv_close(bs);
66f82cee
KW
776 if (bs->file != NULL) {
777 bdrv_delete(bs->file);
778 }
779
f9092b10 780 assert(bs != bs_snapshots);
7267c094 781 g_free(bs);
fc01f7e7
FB
782}
783
fa879d62
MA
784int bdrv_attach_dev(BlockDriverState *bs, void *dev)
785/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 786{
fa879d62 787 if (bs->dev) {
18846dee
MA
788 return -EBUSY;
789 }
fa879d62 790 bs->dev = dev;
28a7282a 791 bdrv_iostatus_reset(bs);
18846dee
MA
792 return 0;
793}
794
fa879d62
MA
795/* TODO qdevified devices don't use this, remove when devices are qdevified */
796void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 797{
fa879d62
MA
798 if (bdrv_attach_dev(bs, dev) < 0) {
799 abort();
800 }
801}
802
803void bdrv_detach_dev(BlockDriverState *bs, void *dev)
804/* TODO change to DeviceState *dev when all users are qdevified */
805{
806 assert(bs->dev == dev);
807 bs->dev = NULL;
0e49de52
MA
808 bs->dev_ops = NULL;
809 bs->dev_opaque = NULL;
29e05f20 810 bs->buffer_alignment = 512;
18846dee
MA
811}
812
fa879d62
MA
813/* TODO change to return DeviceState * when all users are qdevified */
814void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 815{
fa879d62 816 return bs->dev;
18846dee
MA
817}
818
0e49de52
MA
819void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
820 void *opaque)
821{
822 bs->dev_ops = ops;
823 bs->dev_opaque = opaque;
2c6942fa
MA
824 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
825 bs_snapshots = NULL;
826 }
0e49de52
MA
827}
828
7d4b4ba5 829static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 830{
145feb17 831 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
7d4b4ba5 832 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
145feb17
MA
833 }
834}
835
2c6942fa
MA
836bool bdrv_dev_has_removable_media(BlockDriverState *bs)
837{
838 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
839}
840
e4def80b
MA
841bool bdrv_dev_is_tray_open(BlockDriverState *bs)
842{
843 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
844 return bs->dev_ops->is_tray_open(bs->dev_opaque);
845 }
846 return false;
847}
848
145feb17
MA
849static void bdrv_dev_resize_cb(BlockDriverState *bs)
850{
851 if (bs->dev_ops && bs->dev_ops->resize_cb) {
852 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
853 }
854}
855
f107639a
MA
856bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
857{
858 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
859 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
860 }
861 return false;
862}
863
e97fc193
AL
864/*
865 * Run consistency checks on an image
866 *
e076f338 867 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 868 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 869 * check are stored in res.
e97fc193 870 */
e076f338 871int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
e97fc193
AL
872{
873 if (bs->drv->bdrv_check == NULL) {
874 return -ENOTSUP;
875 }
876
e076f338 877 memset(res, 0, sizeof(*res));
9ac228e0 878 return bs->drv->bdrv_check(bs, res);
e97fc193
AL
879}
880
8a426614
KW
881#define COMMIT_BUF_SECTORS 2048
882
33e3963e
FB
883/* commit COW file into the raw image */
884int bdrv_commit(BlockDriverState *bs)
885{
19cb3738 886 BlockDriver *drv = bs->drv;
ee181196 887 BlockDriver *backing_drv;
8a426614
KW
888 int64_t sector, total_sectors;
889 int n, ro, open_flags;
4dca4b63 890 int ret = 0, rw_ret = 0;
8a426614 891 uint8_t *buf;
4dca4b63
NS
892 char filename[1024];
893 BlockDriverState *bs_rw, *bs_ro;
33e3963e 894
19cb3738
FB
895 if (!drv)
896 return -ENOMEDIUM;
4dca4b63
NS
897
898 if (!bs->backing_hd) {
899 return -ENOTSUP;
33e3963e
FB
900 }
901
4dca4b63
NS
902 if (bs->backing_hd->keep_read_only) {
903 return -EACCES;
904 }
ee181196
KW
905
906 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
907 ro = bs->backing_hd->read_only;
908 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
909 open_flags = bs->backing_hd->open_flags;
910
911 if (ro) {
912 /* re-open as RW */
913 bdrv_delete(bs->backing_hd);
914 bs->backing_hd = NULL;
915 bs_rw = bdrv_new("");
ee181196
KW
916 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
917 backing_drv);
4dca4b63
NS
918 if (rw_ret < 0) {
919 bdrv_delete(bs_rw);
920 /* try to re-open read-only */
921 bs_ro = bdrv_new("");
ee181196
KW
922 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
923 backing_drv);
4dca4b63
NS
924 if (ret < 0) {
925 bdrv_delete(bs_ro);
926 /* drive not functional anymore */
927 bs->drv = NULL;
928 return ret;
929 }
930 bs->backing_hd = bs_ro;
931 return rw_ret;
932 }
933 bs->backing_hd = bs_rw;
ea2384d3 934 }
33e3963e 935
6ea44308 936 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 937 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
938
939 for (sector = 0; sector < total_sectors; sector += n) {
940 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
941
942 if (bdrv_read(bs, sector, buf, n) != 0) {
943 ret = -EIO;
944 goto ro_cleanup;
945 }
946
947 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
948 ret = -EIO;
949 goto ro_cleanup;
950 }
ea2384d3 951 }
33e3963e 952 }
95389c86 953
1d44952f
CH
954 if (drv->bdrv_make_empty) {
955 ret = drv->bdrv_make_empty(bs);
956 bdrv_flush(bs);
957 }
95389c86 958
3f5075ae
CH
959 /*
960 * Make sure all data we wrote to the backing device is actually
961 * stable on disk.
962 */
963 if (bs->backing_hd)
964 bdrv_flush(bs->backing_hd);
4dca4b63
NS
965
966ro_cleanup:
7267c094 967 g_free(buf);
4dca4b63
NS
968
969 if (ro) {
970 /* re-open as RO */
971 bdrv_delete(bs->backing_hd);
972 bs->backing_hd = NULL;
973 bs_ro = bdrv_new("");
ee181196
KW
974 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
975 backing_drv);
4dca4b63
NS
976 if (ret < 0) {
977 bdrv_delete(bs_ro);
978 /* drive not functional anymore */
979 bs->drv = NULL;
980 return ret;
981 }
982 bs->backing_hd = bs_ro;
983 bs->backing_hd->keep_read_only = 0;
984 }
985
1d44952f 986 return ret;
33e3963e
FB
987}
988
6ab4b5ab
MA
989void bdrv_commit_all(void)
990{
991 BlockDriverState *bs;
992
993 QTAILQ_FOREACH(bs, &bdrv_states, list) {
994 bdrv_commit(bs);
995 }
996}
997
756e6736
KW
998/*
999 * Return values:
1000 * 0 - success
1001 * -EINVAL - backing format specified, but no file
1002 * -ENOSPC - can't update the backing file because no space is left in the
1003 * image file header
1004 * -ENOTSUP - format driver doesn't support changing the backing file
1005 */
1006int bdrv_change_backing_file(BlockDriverState *bs,
1007 const char *backing_file, const char *backing_fmt)
1008{
1009 BlockDriver *drv = bs->drv;
1010
1011 if (drv->bdrv_change_backing_file != NULL) {
1012 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1013 } else {
1014 return -ENOTSUP;
1015 }
1016}
1017
71d0770c
AL
1018static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1019 size_t size)
1020{
1021 int64_t len;
1022
1023 if (!bdrv_is_inserted(bs))
1024 return -ENOMEDIUM;
1025
1026 if (bs->growable)
1027 return 0;
1028
1029 len = bdrv_getlength(bs);
1030
fbb7b4e0
KW
1031 if (offset < 0)
1032 return -EIO;
1033
1034 if ((offset > len) || (len - offset < size))
71d0770c
AL
1035 return -EIO;
1036
1037 return 0;
1038}
1039
1040static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1041 int nb_sectors)
1042{
eb5a3165
JS
1043 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1044 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1045}
1046
e7a8a783
KW
1047static inline bool bdrv_has_async_rw(BlockDriver *drv)
1048{
1049 return drv->bdrv_co_readv != bdrv_co_readv_em
1050 || drv->bdrv_aio_readv != bdrv_aio_readv_em;
1051}
1052
1053static inline bool bdrv_has_async_flush(BlockDriver *drv)
1054{
1055 return drv->bdrv_aio_flush != bdrv_aio_flush_em;
1056}
1057
1c9805a3
SH
1058typedef struct RwCo {
1059 BlockDriverState *bs;
1060 int64_t sector_num;
1061 int nb_sectors;
1062 QEMUIOVector *qiov;
1063 bool is_write;
1064 int ret;
1065} RwCo;
1066
1067static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 1068{
1c9805a3 1069 RwCo *rwco = opaque;
ea2384d3 1070
1c9805a3
SH
1071 if (!rwco->is_write) {
1072 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1073 rwco->nb_sectors, rwco->qiov);
1074 } else {
1075 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1076 rwco->nb_sectors, rwco->qiov);
1077 }
1078}
e7a8a783 1079
1c9805a3
SH
1080/*
1081 * Process a synchronous request using coroutines
1082 */
1083static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1084 int nb_sectors, bool is_write)
1085{
1086 QEMUIOVector qiov;
1087 struct iovec iov = {
1088 .iov_base = (void *)buf,
1089 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1090 };
1091 Coroutine *co;
1092 RwCo rwco = {
1093 .bs = bs,
1094 .sector_num = sector_num,
1095 .nb_sectors = nb_sectors,
1096 .qiov = &qiov,
1097 .is_write = is_write,
1098 .ret = NOT_DONE,
1099 };
e7a8a783 1100
1c9805a3 1101 qemu_iovec_init_external(&qiov, &iov, 1);
e7a8a783 1102
1c9805a3
SH
1103 if (qemu_in_coroutine()) {
1104 /* Fast-path if already in coroutine context */
1105 bdrv_rw_co_entry(&rwco);
1106 } else {
1107 co = qemu_coroutine_create(bdrv_rw_co_entry);
1108 qemu_coroutine_enter(co, &rwco);
1109 while (rwco.ret == NOT_DONE) {
1110 qemu_aio_wait();
1111 }
1112 }
1113 return rwco.ret;
1114}
b338082b 1115
1c9805a3
SH
1116/* return < 0 if error. See bdrv_write() for the return codes */
1117int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1118 uint8_t *buf, int nb_sectors)
1119{
1120 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
fc01f7e7
FB
1121}
1122
7cd1e32a 1123static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1124 int nb_sectors, int dirty)
7cd1e32a 1125{
1126 int64_t start, end;
c6d22830 1127 unsigned long val, idx, bit;
a55eb92c 1128
6ea44308 1129 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1130 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1131
1132 for (; start <= end; start++) {
c6d22830
JK
1133 idx = start / (sizeof(unsigned long) * 8);
1134 bit = start % (sizeof(unsigned long) * 8);
1135 val = bs->dirty_bitmap[idx];
1136 if (dirty) {
6d59fec1 1137 if (!(val & (1UL << bit))) {
aaa0eb75 1138 bs->dirty_count++;
6d59fec1 1139 val |= 1UL << bit;
aaa0eb75 1140 }
c6d22830 1141 } else {
6d59fec1 1142 if (val & (1UL << bit)) {
aaa0eb75 1143 bs->dirty_count--;
6d59fec1 1144 val &= ~(1UL << bit);
aaa0eb75 1145 }
c6d22830
JK
1146 }
1147 bs->dirty_bitmap[idx] = val;
7cd1e32a 1148 }
1149}
1150
5fafdf24 1151/* Return < 0 if error. Important errors are:
19cb3738
FB
1152 -EIO generic I/O error (may happen for all errors)
1153 -ENOMEDIUM No media inserted.
1154 -EINVAL Invalid sector number or nb_sectors
1155 -EACCES Trying to write a read-only device
1156*/
5fafdf24 1157int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1158 const uint8_t *buf, int nb_sectors)
1159{
1c9805a3 1160 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
83f64091
FB
1161}
1162
eda578e5
AL
1163int bdrv_pread(BlockDriverState *bs, int64_t offset,
1164 void *buf, int count1)
83f64091 1165{
6ea44308 1166 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1167 int len, nb_sectors, count;
1168 int64_t sector_num;
9a8c4cce 1169 int ret;
83f64091
FB
1170
1171 count = count1;
1172 /* first read to align to sector start */
6ea44308 1173 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1174 if (len > count)
1175 len = count;
6ea44308 1176 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1177 if (len > 0) {
9a8c4cce
KW
1178 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1179 return ret;
6ea44308 1180 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1181 count -= len;
1182 if (count == 0)
1183 return count1;
1184 sector_num++;
1185 buf += len;
1186 }
1187
1188 /* read the sectors "in place" */
6ea44308 1189 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1190 if (nb_sectors > 0) {
9a8c4cce
KW
1191 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1192 return ret;
83f64091 1193 sector_num += nb_sectors;
6ea44308 1194 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1195 buf += len;
1196 count -= len;
1197 }
1198
1199 /* add data from the last sector */
1200 if (count > 0) {
9a8c4cce
KW
1201 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1202 return ret;
83f64091
FB
1203 memcpy(buf, tmp_buf, count);
1204 }
1205 return count1;
1206}
1207
eda578e5
AL
1208int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1209 const void *buf, int count1)
83f64091 1210{
6ea44308 1211 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1212 int len, nb_sectors, count;
1213 int64_t sector_num;
9a8c4cce 1214 int ret;
83f64091
FB
1215
1216 count = count1;
1217 /* first write to align to sector start */
6ea44308 1218 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1219 if (len > count)
1220 len = count;
6ea44308 1221 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1222 if (len > 0) {
9a8c4cce
KW
1223 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1224 return ret;
6ea44308 1225 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1226 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1227 return ret;
83f64091
FB
1228 count -= len;
1229 if (count == 0)
1230 return count1;
1231 sector_num++;
1232 buf += len;
1233 }
1234
1235 /* write the sectors "in place" */
6ea44308 1236 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1237 if (nb_sectors > 0) {
9a8c4cce
KW
1238 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1239 return ret;
83f64091 1240 sector_num += nb_sectors;
6ea44308 1241 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1242 buf += len;
1243 count -= len;
1244 }
1245
1246 /* add data from the last sector */
1247 if (count > 0) {
9a8c4cce
KW
1248 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1249 return ret;
83f64091 1250 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1251 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1252 return ret;
83f64091
FB
1253 }
1254 return count1;
1255}
83f64091 1256
f08145fe
KW
1257/*
1258 * Writes to the file and ensures that no writes are reordered across this
1259 * request (acts as a barrier)
1260 *
1261 * Returns 0 on success, -errno in error cases.
1262 */
1263int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1264 const void *buf, int count)
1265{
1266 int ret;
1267
1268 ret = bdrv_pwrite(bs, offset, buf, count);
1269 if (ret < 0) {
1270 return ret;
1271 }
1272
92196b2f
SH
1273 /* No flush needed for cache modes that use O_DSYNC */
1274 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
f08145fe
KW
1275 bdrv_flush(bs);
1276 }
1277
1278 return 0;
1279}
1280
c5fbe571
SH
1281/*
1282 * Handle a read request in coroutine context
1283 */
1284static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1285 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
da1fa91d
KW
1286{
1287 BlockDriver *drv = bs->drv;
1288
da1fa91d
KW
1289 if (!drv) {
1290 return -ENOMEDIUM;
1291 }
1292 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1293 return -EIO;
1294 }
1295
1296 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1297}
1298
c5fbe571 1299int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
1300 int nb_sectors, QEMUIOVector *qiov)
1301{
c5fbe571 1302 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 1303
c5fbe571
SH
1304 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1305}
1306
1307/*
1308 * Handle a write request in coroutine context
1309 */
1310static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1311 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1312{
1313 BlockDriver *drv = bs->drv;
6b7cb247 1314 int ret;
da1fa91d
KW
1315
1316 if (!bs->drv) {
1317 return -ENOMEDIUM;
1318 }
1319 if (bs->read_only) {
1320 return -EACCES;
1321 }
1322 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1323 return -EIO;
1324 }
1325
6b7cb247
SH
1326 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1327
da1fa91d
KW
1328 if (bs->dirty_bitmap) {
1329 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1330 }
1331
1332 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1333 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1334 }
1335
6b7cb247 1336 return ret;
da1fa91d
KW
1337}
1338
c5fbe571
SH
1339int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1340 int nb_sectors, QEMUIOVector *qiov)
1341{
1342 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1343
1344 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1345}
1346
83f64091
FB
1347/**
1348 * Truncate file to 'offset' bytes (needed only for file protocols)
1349 */
1350int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1351{
1352 BlockDriver *drv = bs->drv;
51762288 1353 int ret;
83f64091 1354 if (!drv)
19cb3738 1355 return -ENOMEDIUM;
83f64091
FB
1356 if (!drv->bdrv_truncate)
1357 return -ENOTSUP;
59f2689d
NS
1358 if (bs->read_only)
1359 return -EACCES;
8591675f
MT
1360 if (bdrv_in_use(bs))
1361 return -EBUSY;
51762288
SH
1362 ret = drv->bdrv_truncate(bs, offset);
1363 if (ret == 0) {
1364 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 1365 bdrv_dev_resize_cb(bs);
51762288
SH
1366 }
1367 return ret;
83f64091
FB
1368}
1369
4a1d5e1f
FZ
1370/**
1371 * Length of a allocated file in bytes. Sparse files are counted by actual
1372 * allocated space. Return < 0 if error or unknown.
1373 */
1374int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1375{
1376 BlockDriver *drv = bs->drv;
1377 if (!drv) {
1378 return -ENOMEDIUM;
1379 }
1380 if (drv->bdrv_get_allocated_file_size) {
1381 return drv->bdrv_get_allocated_file_size(bs);
1382 }
1383 if (bs->file) {
1384 return bdrv_get_allocated_file_size(bs->file);
1385 }
1386 return -ENOTSUP;
1387}
1388
83f64091
FB
1389/**
1390 * Length of a file in bytes. Return < 0 if error or unknown.
1391 */
1392int64_t bdrv_getlength(BlockDriverState *bs)
1393{
1394 BlockDriver *drv = bs->drv;
1395 if (!drv)
19cb3738 1396 return -ENOMEDIUM;
51762288 1397
2c6942fa 1398 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
1399 if (drv->bdrv_getlength) {
1400 return drv->bdrv_getlength(bs);
1401 }
83f64091 1402 }
46a4e4e6 1403 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
1404}
1405
19cb3738 1406/* return 0 as number of sectors if no device present or error */
96b8f136 1407void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 1408{
19cb3738
FB
1409 int64_t length;
1410 length = bdrv_getlength(bs);
1411 if (length < 0)
1412 length = 0;
1413 else
6ea44308 1414 length = length >> BDRV_SECTOR_BITS;
19cb3738 1415 *nb_sectors_ptr = length;
fc01f7e7 1416}
cf98951b 1417
f3d54fc4
AL
1418struct partition {
1419 uint8_t boot_ind; /* 0x80 - active */
1420 uint8_t head; /* starting head */
1421 uint8_t sector; /* starting sector */
1422 uint8_t cyl; /* starting cylinder */
1423 uint8_t sys_ind; /* What partition type */
1424 uint8_t end_head; /* end head */
1425 uint8_t end_sector; /* end sector */
1426 uint8_t end_cyl; /* end cylinder */
1427 uint32_t start_sect; /* starting sector counting from 0 */
1428 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 1429} QEMU_PACKED;
f3d54fc4
AL
1430
1431/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1432static int guess_disk_lchs(BlockDriverState *bs,
1433 int *pcylinders, int *pheads, int *psectors)
1434{
eb5a3165 1435 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
1436 int ret, i, heads, sectors, cylinders;
1437 struct partition *p;
1438 uint32_t nr_sects;
a38131b6 1439 uint64_t nb_sectors;
f3d54fc4
AL
1440
1441 bdrv_get_geometry(bs, &nb_sectors);
1442
1443 ret = bdrv_read(bs, 0, buf, 1);
1444 if (ret < 0)
1445 return -1;
1446 /* test msdos magic */
1447 if (buf[510] != 0x55 || buf[511] != 0xaa)
1448 return -1;
1449 for(i = 0; i < 4; i++) {
1450 p = ((struct partition *)(buf + 0x1be)) + i;
1451 nr_sects = le32_to_cpu(p->nr_sects);
1452 if (nr_sects && p->end_head) {
1453 /* We make the assumption that the partition terminates on
1454 a cylinder boundary */
1455 heads = p->end_head + 1;
1456 sectors = p->end_sector & 63;
1457 if (sectors == 0)
1458 continue;
1459 cylinders = nb_sectors / (heads * sectors);
1460 if (cylinders < 1 || cylinders > 16383)
1461 continue;
1462 *pheads = heads;
1463 *psectors = sectors;
1464 *pcylinders = cylinders;
1465#if 0
1466 printf("guessed geometry: LCHS=%d %d %d\n",
1467 cylinders, heads, sectors);
1468#endif
1469 return 0;
1470 }
1471 }
1472 return -1;
1473}
1474
1475void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1476{
1477 int translation, lba_detected = 0;
1478 int cylinders, heads, secs;
a38131b6 1479 uint64_t nb_sectors;
f3d54fc4
AL
1480
1481 /* if a geometry hint is available, use it */
1482 bdrv_get_geometry(bs, &nb_sectors);
1483 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1484 translation = bdrv_get_translation_hint(bs);
1485 if (cylinders != 0) {
1486 *pcyls = cylinders;
1487 *pheads = heads;
1488 *psecs = secs;
1489 } else {
1490 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1491 if (heads > 16) {
1492 /* if heads > 16, it means that a BIOS LBA
1493 translation was active, so the default
1494 hardware geometry is OK */
1495 lba_detected = 1;
1496 goto default_geometry;
1497 } else {
1498 *pcyls = cylinders;
1499 *pheads = heads;
1500 *psecs = secs;
1501 /* disable any translation to be in sync with
1502 the logical geometry */
1503 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1504 bdrv_set_translation_hint(bs,
1505 BIOS_ATA_TRANSLATION_NONE);
1506 }
1507 }
1508 } else {
1509 default_geometry:
1510 /* if no geometry, use a standard physical disk geometry */
1511 cylinders = nb_sectors / (16 * 63);
1512
1513 if (cylinders > 16383)
1514 cylinders = 16383;
1515 else if (cylinders < 2)
1516 cylinders = 2;
1517 *pcyls = cylinders;
1518 *pheads = 16;
1519 *psecs = 63;
1520 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1521 if ((*pcyls * *pheads) <= 131072) {
1522 bdrv_set_translation_hint(bs,
1523 BIOS_ATA_TRANSLATION_LARGE);
1524 } else {
1525 bdrv_set_translation_hint(bs,
1526 BIOS_ATA_TRANSLATION_LBA);
1527 }
1528 }
1529 }
1530 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1531 }
1532}
1533
5fafdf24 1534void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
1535 int cyls, int heads, int secs)
1536{
1537 bs->cyls = cyls;
1538 bs->heads = heads;
1539 bs->secs = secs;
1540}
1541
46d4767d
FB
1542void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1543{
1544 bs->translation = translation;
1545}
1546
5fafdf24 1547void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
1548 int *pcyls, int *pheads, int *psecs)
1549{
1550 *pcyls = bs->cyls;
1551 *pheads = bs->heads;
1552 *psecs = bs->secs;
1553}
1554
5bbdbb46
BS
1555/* Recognize floppy formats */
1556typedef struct FDFormat {
1557 FDriveType drive;
1558 uint8_t last_sect;
1559 uint8_t max_track;
1560 uint8_t max_head;
1561} FDFormat;
1562
1563static const FDFormat fd_formats[] = {
1564 /* First entry is default format */
1565 /* 1.44 MB 3"1/2 floppy disks */
1566 { FDRIVE_DRV_144, 18, 80, 1, },
1567 { FDRIVE_DRV_144, 20, 80, 1, },
1568 { FDRIVE_DRV_144, 21, 80, 1, },
1569 { FDRIVE_DRV_144, 21, 82, 1, },
1570 { FDRIVE_DRV_144, 21, 83, 1, },
1571 { FDRIVE_DRV_144, 22, 80, 1, },
1572 { FDRIVE_DRV_144, 23, 80, 1, },
1573 { FDRIVE_DRV_144, 24, 80, 1, },
1574 /* 2.88 MB 3"1/2 floppy disks */
1575 { FDRIVE_DRV_288, 36, 80, 1, },
1576 { FDRIVE_DRV_288, 39, 80, 1, },
1577 { FDRIVE_DRV_288, 40, 80, 1, },
1578 { FDRIVE_DRV_288, 44, 80, 1, },
1579 { FDRIVE_DRV_288, 48, 80, 1, },
1580 /* 720 kB 3"1/2 floppy disks */
1581 { FDRIVE_DRV_144, 9, 80, 1, },
1582 { FDRIVE_DRV_144, 10, 80, 1, },
1583 { FDRIVE_DRV_144, 10, 82, 1, },
1584 { FDRIVE_DRV_144, 10, 83, 1, },
1585 { FDRIVE_DRV_144, 13, 80, 1, },
1586 { FDRIVE_DRV_144, 14, 80, 1, },
1587 /* 1.2 MB 5"1/4 floppy disks */
1588 { FDRIVE_DRV_120, 15, 80, 1, },
1589 { FDRIVE_DRV_120, 18, 80, 1, },
1590 { FDRIVE_DRV_120, 18, 82, 1, },
1591 { FDRIVE_DRV_120, 18, 83, 1, },
1592 { FDRIVE_DRV_120, 20, 80, 1, },
1593 /* 720 kB 5"1/4 floppy disks */
1594 { FDRIVE_DRV_120, 9, 80, 1, },
1595 { FDRIVE_DRV_120, 11, 80, 1, },
1596 /* 360 kB 5"1/4 floppy disks */
1597 { FDRIVE_DRV_120, 9, 40, 1, },
1598 { FDRIVE_DRV_120, 9, 40, 0, },
1599 { FDRIVE_DRV_120, 10, 41, 1, },
1600 { FDRIVE_DRV_120, 10, 42, 1, },
1601 /* 320 kB 5"1/4 floppy disks */
1602 { FDRIVE_DRV_120, 8, 40, 1, },
1603 { FDRIVE_DRV_120, 8, 40, 0, },
1604 /* 360 kB must match 5"1/4 better than 3"1/2... */
1605 { FDRIVE_DRV_144, 9, 80, 0, },
1606 /* end */
1607 { FDRIVE_DRV_NONE, -1, -1, 0, },
1608};
1609
1610void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1611 int *max_track, int *last_sect,
1612 FDriveType drive_in, FDriveType *drive)
1613{
1614 const FDFormat *parse;
1615 uint64_t nb_sectors, size;
1616 int i, first_match, match;
1617
1618 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1619 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1620 /* User defined disk */
1621 } else {
1622 bdrv_get_geometry(bs, &nb_sectors);
1623 match = -1;
1624 first_match = -1;
1625 for (i = 0; ; i++) {
1626 parse = &fd_formats[i];
1627 if (parse->drive == FDRIVE_DRV_NONE) {
1628 break;
1629 }
1630 if (drive_in == parse->drive ||
1631 drive_in == FDRIVE_DRV_NONE) {
1632 size = (parse->max_head + 1) * parse->max_track *
1633 parse->last_sect;
1634 if (nb_sectors == size) {
1635 match = i;
1636 break;
1637 }
1638 if (first_match == -1) {
1639 first_match = i;
1640 }
1641 }
1642 }
1643 if (match == -1) {
1644 if (first_match == -1) {
1645 match = 1;
1646 } else {
1647 match = first_match;
1648 }
1649 parse = &fd_formats[match];
1650 }
1651 *nb_heads = parse->max_head + 1;
1652 *max_track = parse->max_track;
1653 *last_sect = parse->last_sect;
1654 *drive = parse->drive;
1655 }
1656}
1657
46d4767d
FB
1658int bdrv_get_translation_hint(BlockDriverState *bs)
1659{
1660 return bs->translation;
1661}
1662
abd7f68d
MA
1663void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1664 BlockErrorAction on_write_error)
1665{
1666 bs->on_read_error = on_read_error;
1667 bs->on_write_error = on_write_error;
1668}
1669
1670BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1671{
1672 return is_read ? bs->on_read_error : bs->on_write_error;
1673}
1674
b338082b
FB
1675int bdrv_is_read_only(BlockDriverState *bs)
1676{
1677 return bs->read_only;
1678}
1679
985a03b0
TS
1680int bdrv_is_sg(BlockDriverState *bs)
1681{
1682 return bs->sg;
1683}
1684
e900a7b7
CH
1685int bdrv_enable_write_cache(BlockDriverState *bs)
1686{
1687 return bs->enable_write_cache;
1688}
1689
ea2384d3
FB
1690int bdrv_is_encrypted(BlockDriverState *bs)
1691{
1692 if (bs->backing_hd && bs->backing_hd->encrypted)
1693 return 1;
1694 return bs->encrypted;
1695}
1696
c0f4ce77
AL
1697int bdrv_key_required(BlockDriverState *bs)
1698{
1699 BlockDriverState *backing_hd = bs->backing_hd;
1700
1701 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1702 return 1;
1703 return (bs->encrypted && !bs->valid_key);
1704}
1705
ea2384d3
FB
1706int bdrv_set_key(BlockDriverState *bs, const char *key)
1707{
1708 int ret;
1709 if (bs->backing_hd && bs->backing_hd->encrypted) {
1710 ret = bdrv_set_key(bs->backing_hd, key);
1711 if (ret < 0)
1712 return ret;
1713 if (!bs->encrypted)
1714 return 0;
1715 }
fd04a2ae
SH
1716 if (!bs->encrypted) {
1717 return -EINVAL;
1718 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1719 return -ENOMEDIUM;
1720 }
c0f4ce77 1721 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
1722 if (ret < 0) {
1723 bs->valid_key = 0;
1724 } else if (!bs->valid_key) {
1725 bs->valid_key = 1;
1726 /* call the change callback now, we skipped it on open */
7d4b4ba5 1727 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 1728 }
c0f4ce77 1729 return ret;
ea2384d3
FB
1730}
1731
1732void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1733{
19cb3738 1734 if (!bs->drv) {
ea2384d3
FB
1735 buf[0] = '\0';
1736 } else {
1737 pstrcpy(buf, buf_size, bs->drv->format_name);
1738 }
1739}
1740
5fafdf24 1741void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
1742 void *opaque)
1743{
1744 BlockDriver *drv;
1745
8a22f02a 1746 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
1747 it(opaque, drv->format_name);
1748 }
1749}
1750
b338082b
FB
1751BlockDriverState *bdrv_find(const char *name)
1752{
1753 BlockDriverState *bs;
1754
1b7bdbc1
SH
1755 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1756 if (!strcmp(name, bs->device_name)) {
b338082b 1757 return bs;
1b7bdbc1 1758 }
b338082b
FB
1759 }
1760 return NULL;
1761}
1762
2f399b0a
MA
1763BlockDriverState *bdrv_next(BlockDriverState *bs)
1764{
1765 if (!bs) {
1766 return QTAILQ_FIRST(&bdrv_states);
1767 }
1768 return QTAILQ_NEXT(bs, list);
1769}
1770
51de9760 1771void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
1772{
1773 BlockDriverState *bs;
1774
1b7bdbc1 1775 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 1776 it(opaque, bs);
81d0912d
FB
1777 }
1778}
1779
ea2384d3
FB
1780const char *bdrv_get_device_name(BlockDriverState *bs)
1781{
1782 return bs->device_name;
1783}
1784
205ef796 1785int bdrv_flush(BlockDriverState *bs)
7a6cba61 1786{
016f5cf6 1787 if (bs->open_flags & BDRV_O_NO_FLUSH) {
205ef796
KW
1788 return 0;
1789 }
1790
e7a8a783
KW
1791 if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
1792 return bdrv_co_flush_em(bs);
1793 }
1794
205ef796
KW
1795 if (bs->drv && bs->drv->bdrv_flush) {
1796 return bs->drv->bdrv_flush(bs);
016f5cf6
AG
1797 }
1798
205ef796
KW
1799 /*
1800 * Some block drivers always operate in either writethrough or unsafe mode
1801 * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1802 * the server works (because the behaviour is hardcoded or depends on
1803 * server-side configuration), so we can't ensure that everything is safe
1804 * on disk. Returning an error doesn't work because that would break guests
1805 * even if the server operates in writethrough mode.
1806 *
1807 * Let's hope the user knows what he's doing.
1808 */
1809 return 0;
7a6cba61
PB
1810}
1811
c6ca28d6
AL
1812void bdrv_flush_all(void)
1813{
1814 BlockDriverState *bs;
1815
1b7bdbc1 1816 QTAILQ_FOREACH(bs, &bdrv_states, list) {
c602a489 1817 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
c6ca28d6 1818 bdrv_flush(bs);
1b7bdbc1
SH
1819 }
1820 }
c6ca28d6
AL
1821}
1822
f2feebbd
KW
1823int bdrv_has_zero_init(BlockDriverState *bs)
1824{
1825 assert(bs->drv);
1826
336c1c12
KW
1827 if (bs->drv->bdrv_has_zero_init) {
1828 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
1829 }
1830
1831 return 1;
1832}
1833
bb8bf76f
CH
1834int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1835{
1836 if (!bs->drv) {
1837 return -ENOMEDIUM;
1838 }
1839 if (!bs->drv->bdrv_discard) {
1840 return 0;
1841 }
1842 return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1843}
1844
f58c7b35
TS
1845/*
1846 * Returns true iff the specified sector is present in the disk image. Drivers
1847 * not implementing the functionality are assumed to not support backing files,
1848 * hence all their sectors are reported as allocated.
1849 *
1850 * 'pnum' is set to the number of sectors (including and immediately following
1851 * the specified sector) that are known to be in the same
1852 * allocated/unallocated state.
1853 *
1854 * 'nb_sectors' is the max value 'pnum' should be set to.
1855 */
1856int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1857 int *pnum)
1858{
1859 int64_t n;
1860 if (!bs->drv->bdrv_is_allocated) {
1861 if (sector_num >= bs->total_sectors) {
1862 *pnum = 0;
1863 return 0;
1864 }
1865 n = bs->total_sectors - sector_num;
1866 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1867 return 1;
1868 }
1869 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1870}
1871
2582bfed
LC
1872void bdrv_mon_event(const BlockDriverState *bdrv,
1873 BlockMonEventAction action, int is_read)
1874{
1875 QObject *data;
1876 const char *action_str;
1877
1878 switch (action) {
1879 case BDRV_ACTION_REPORT:
1880 action_str = "report";
1881 break;
1882 case BDRV_ACTION_IGNORE:
1883 action_str = "ignore";
1884 break;
1885 case BDRV_ACTION_STOP:
1886 action_str = "stop";
1887 break;
1888 default:
1889 abort();
1890 }
1891
1892 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1893 bdrv->device_name,
1894 action_str,
1895 is_read ? "read" : "write");
1896 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1897
1898 qobject_decref(data);
1899}
1900
d15e5465 1901static void bdrv_print_dict(QObject *obj, void *opaque)
b338082b 1902{
d15e5465
LC
1903 QDict *bs_dict;
1904 Monitor *mon = opaque;
1905
1906 bs_dict = qobject_to_qdict(obj);
1907
d8aeeb31 1908 monitor_printf(mon, "%s: removable=%d",
d15e5465 1909 qdict_get_str(bs_dict, "device"),
d15e5465
LC
1910 qdict_get_bool(bs_dict, "removable"));
1911
1912 if (qdict_get_bool(bs_dict, "removable")) {
1913 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
e4def80b
MA
1914 monitor_printf(mon, " tray-open=%d",
1915 qdict_get_bool(bs_dict, "tray-open"));
d15e5465 1916 }
d2078cc2
LC
1917
1918 if (qdict_haskey(bs_dict, "io-status")) {
1919 monitor_printf(mon, " io-status=%s", qdict_get_str(bs_dict, "io-status"));
1920 }
1921
d15e5465
LC
1922 if (qdict_haskey(bs_dict, "inserted")) {
1923 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1924
1925 monitor_printf(mon, " file=");
1926 monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1927 if (qdict_haskey(qdict, "backing_file")) {
1928 monitor_printf(mon, " backing_file=");
1929 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1930 }
1931 monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1932 qdict_get_bool(qdict, "ro"),
1933 qdict_get_str(qdict, "drv"),
1934 qdict_get_bool(qdict, "encrypted"));
1935 } else {
1936 monitor_printf(mon, " [not inserted]");
1937 }
1938
1939 monitor_printf(mon, "\n");
1940}
1941
1942void bdrv_info_print(Monitor *mon, const QObject *data)
1943{
1944 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1945}
1946
f04ef601
LC
1947static const char *const io_status_name[BDRV_IOS_MAX] = {
1948 [BDRV_IOS_OK] = "ok",
1949 [BDRV_IOS_FAILED] = "failed",
1950 [BDRV_IOS_ENOSPC] = "nospace",
1951};
1952
d15e5465
LC
1953void bdrv_info(Monitor *mon, QObject **ret_data)
1954{
1955 QList *bs_list;
b338082b
FB
1956 BlockDriverState *bs;
1957
d15e5465
LC
1958 bs_list = qlist_new();
1959
1b7bdbc1 1960 QTAILQ_FOREACH(bs, &bdrv_states, list) {
d15e5465 1961 QObject *bs_obj;
e4def80b 1962 QDict *bs_dict;
d15e5465 1963
d8aeeb31 1964 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
d15e5465 1965 "'removable': %i, 'locked': %i }",
2c6942fa
MA
1966 bs->device_name,
1967 bdrv_dev_has_removable_media(bs),
f107639a 1968 bdrv_dev_is_medium_locked(bs));
e4def80b 1969 bs_dict = qobject_to_qdict(bs_obj);
d15e5465 1970
e4def80b
MA
1971 if (bdrv_dev_has_removable_media(bs)) {
1972 qdict_put(bs_dict, "tray-open",
1973 qbool_from_int(bdrv_dev_is_tray_open(bs)));
1974 }
f04ef601
LC
1975
1976 if (bdrv_iostatus_is_enabled(bs)) {
1977 qdict_put(bs_dict, "io-status",
1978 qstring_from_str(io_status_name[bs->iostatus]));
1979 }
1980
19cb3738 1981 if (bs->drv) {
d15e5465 1982 QObject *obj;
d15e5465
LC
1983
1984 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1985 "'encrypted': %i }",
1986 bs->filename, bs->read_only,
1987 bs->drv->format_name,
1988 bdrv_is_encrypted(bs));
fef30743 1989 if (bs->backing_file[0] != '\0') {
d15e5465
LC
1990 QDict *qdict = qobject_to_qdict(obj);
1991 qdict_put(qdict, "backing_file",
1992 qstring_from_str(bs->backing_file));
376253ec 1993 }
d15e5465
LC
1994
1995 qdict_put_obj(bs_dict, "inserted", obj);
b338082b 1996 }
d15e5465 1997 qlist_append_obj(bs_list, bs_obj);
b338082b 1998 }
d15e5465
LC
1999
2000 *ret_data = QOBJECT(bs_list);
b338082b 2001}
a36e69dd 2002
218a536a 2003static void bdrv_stats_iter(QObject *data, void *opaque)
a36e69dd 2004{
218a536a
LC
2005 QDict *qdict;
2006 Monitor *mon = opaque;
2007
2008 qdict = qobject_to_qdict(data);
2009 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
2010
2011 qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
2012 monitor_printf(mon, " rd_bytes=%" PRId64
2013 " wr_bytes=%" PRId64
2014 " rd_operations=%" PRId64
2015 " wr_operations=%" PRId64
e8045d67 2016 " flush_operations=%" PRId64
c488c7f6
CH
2017 " wr_total_time_ns=%" PRId64
2018 " rd_total_time_ns=%" PRId64
2019 " flush_total_time_ns=%" PRId64
218a536a
LC
2020 "\n",
2021 qdict_get_int(qdict, "rd_bytes"),
2022 qdict_get_int(qdict, "wr_bytes"),
2023 qdict_get_int(qdict, "rd_operations"),
e8045d67 2024 qdict_get_int(qdict, "wr_operations"),
c488c7f6
CH
2025 qdict_get_int(qdict, "flush_operations"),
2026 qdict_get_int(qdict, "wr_total_time_ns"),
2027 qdict_get_int(qdict, "rd_total_time_ns"),
2028 qdict_get_int(qdict, "flush_total_time_ns"));
218a536a
LC
2029}
2030
2031void bdrv_stats_print(Monitor *mon, const QObject *data)
2032{
2033 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
2034}
2035
294cc35f
KW
2036static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
2037{
2038 QObject *res;
2039 QDict *dict;
2040
2041 res = qobject_from_jsonf("{ 'stats': {"
2042 "'rd_bytes': %" PRId64 ","
2043 "'wr_bytes': %" PRId64 ","
2044 "'rd_operations': %" PRId64 ","
2045 "'wr_operations': %" PRId64 ","
e8045d67 2046 "'wr_highest_offset': %" PRId64 ","
c488c7f6
CH
2047 "'flush_operations': %" PRId64 ","
2048 "'wr_total_time_ns': %" PRId64 ","
2049 "'rd_total_time_ns': %" PRId64 ","
2050 "'flush_total_time_ns': %" PRId64
294cc35f 2051 "} }",
a597e79c
CH
2052 bs->nr_bytes[BDRV_ACCT_READ],
2053 bs->nr_bytes[BDRV_ACCT_WRITE],
2054 bs->nr_ops[BDRV_ACCT_READ],
2055 bs->nr_ops[BDRV_ACCT_WRITE],
5ffbbc67 2056 bs->wr_highest_sector *
e8045d67 2057 (uint64_t)BDRV_SECTOR_SIZE,
c488c7f6
CH
2058 bs->nr_ops[BDRV_ACCT_FLUSH],
2059 bs->total_time_ns[BDRV_ACCT_WRITE],
2060 bs->total_time_ns[BDRV_ACCT_READ],
2061 bs->total_time_ns[BDRV_ACCT_FLUSH]);
294cc35f
KW
2062 dict = qobject_to_qdict(res);
2063
2064 if (*bs->device_name) {
2065 qdict_put(dict, "device", qstring_from_str(bs->device_name));
2066 }
2067
2068 if (bs->file) {
2069 QObject *parent = bdrv_info_stats_bs(bs->file);
2070 qdict_put_obj(dict, "parent", parent);
2071 }
2072
2073 return res;
2074}
2075
218a536a
LC
2076void bdrv_info_stats(Monitor *mon, QObject **ret_data)
2077{
2078 QObject *obj;
2079 QList *devices;
a36e69dd
TS
2080 BlockDriverState *bs;
2081
218a536a
LC
2082 devices = qlist_new();
2083
1b7bdbc1 2084 QTAILQ_FOREACH(bs, &bdrv_states, list) {
294cc35f 2085 obj = bdrv_info_stats_bs(bs);
218a536a 2086 qlist_append_obj(devices, obj);
a36e69dd 2087 }
218a536a
LC
2088
2089 *ret_data = QOBJECT(devices);
a36e69dd 2090}
ea2384d3 2091
045df330
AL
2092const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2093{
2094 if (bs->backing_hd && bs->backing_hd->encrypted)
2095 return bs->backing_file;
2096 else if (bs->encrypted)
2097 return bs->filename;
2098 else
2099 return NULL;
2100}
2101
5fafdf24 2102void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
2103 char *filename, int filename_size)
2104{
b783e409 2105 if (!bs->backing_file) {
83f64091
FB
2106 pstrcpy(filename, filename_size, "");
2107 } else {
2108 pstrcpy(filename, filename_size, bs->backing_file);
2109 }
2110}
2111
5fafdf24 2112int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
2113 const uint8_t *buf, int nb_sectors)
2114{
2115 BlockDriver *drv = bs->drv;
2116 if (!drv)
19cb3738 2117 return -ENOMEDIUM;
faea38e7
FB
2118 if (!drv->bdrv_write_compressed)
2119 return -ENOTSUP;
fbb7b4e0
KW
2120 if (bdrv_check_request(bs, sector_num, nb_sectors))
2121 return -EIO;
a55eb92c 2122
c6d22830 2123 if (bs->dirty_bitmap) {
7cd1e32a 2124 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2125 }
a55eb92c 2126
faea38e7
FB
2127 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2128}
3b46e624 2129
faea38e7
FB
2130int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2131{
2132 BlockDriver *drv = bs->drv;
2133 if (!drv)
19cb3738 2134 return -ENOMEDIUM;
faea38e7
FB
2135 if (!drv->bdrv_get_info)
2136 return -ENOTSUP;
2137 memset(bdi, 0, sizeof(*bdi));
2138 return drv->bdrv_get_info(bs, bdi);
2139}
2140
45566e9c
CH
2141int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2142 int64_t pos, int size)
178e08a5
AL
2143{
2144 BlockDriver *drv = bs->drv;
2145 if (!drv)
2146 return -ENOMEDIUM;
7cdb1f6d
MK
2147 if (drv->bdrv_save_vmstate)
2148 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2149 if (bs->file)
2150 return bdrv_save_vmstate(bs->file, buf, pos, size);
2151 return -ENOTSUP;
178e08a5
AL
2152}
2153
45566e9c
CH
2154int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2155 int64_t pos, int size)
178e08a5
AL
2156{
2157 BlockDriver *drv = bs->drv;
2158 if (!drv)
2159 return -ENOMEDIUM;
7cdb1f6d
MK
2160 if (drv->bdrv_load_vmstate)
2161 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2162 if (bs->file)
2163 return bdrv_load_vmstate(bs->file, buf, pos, size);
2164 return -ENOTSUP;
178e08a5
AL
2165}
2166
8b9b0cc2
KW
2167void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2168{
2169 BlockDriver *drv = bs->drv;
2170
2171 if (!drv || !drv->bdrv_debug_event) {
2172 return;
2173 }
2174
2175 return drv->bdrv_debug_event(bs, event);
2176
2177}
2178
faea38e7
FB
2179/**************************************************************/
2180/* handling of snapshots */
2181
feeee5ac
MDCF
2182int bdrv_can_snapshot(BlockDriverState *bs)
2183{
2184 BlockDriver *drv = bs->drv;
07b70bfb 2185 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2186 return 0;
2187 }
2188
2189 if (!drv->bdrv_snapshot_create) {
2190 if (bs->file != NULL) {
2191 return bdrv_can_snapshot(bs->file);
2192 }
2193 return 0;
2194 }
2195
2196 return 1;
2197}
2198
199630b6
BS
2199int bdrv_is_snapshot(BlockDriverState *bs)
2200{
2201 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2202}
2203
f9092b10
MA
2204BlockDriverState *bdrv_snapshots(void)
2205{
2206 BlockDriverState *bs;
2207
3ac906f7 2208 if (bs_snapshots) {
f9092b10 2209 return bs_snapshots;
3ac906f7 2210 }
f9092b10
MA
2211
2212 bs = NULL;
2213 while ((bs = bdrv_next(bs))) {
2214 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2215 bs_snapshots = bs;
2216 return bs;
f9092b10
MA
2217 }
2218 }
2219 return NULL;
f9092b10
MA
2220}
2221
5fafdf24 2222int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2223 QEMUSnapshotInfo *sn_info)
2224{
2225 BlockDriver *drv = bs->drv;
2226 if (!drv)
19cb3738 2227 return -ENOMEDIUM;
7cdb1f6d
MK
2228 if (drv->bdrv_snapshot_create)
2229 return drv->bdrv_snapshot_create(bs, sn_info);
2230 if (bs->file)
2231 return bdrv_snapshot_create(bs->file, sn_info);
2232 return -ENOTSUP;
faea38e7
FB
2233}
2234
5fafdf24 2235int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2236 const char *snapshot_id)
2237{
2238 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2239 int ret, open_ret;
2240
faea38e7 2241 if (!drv)
19cb3738 2242 return -ENOMEDIUM;
7cdb1f6d
MK
2243 if (drv->bdrv_snapshot_goto)
2244 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2245
2246 if (bs->file) {
2247 drv->bdrv_close(bs);
2248 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2249 open_ret = drv->bdrv_open(bs, bs->open_flags);
2250 if (open_ret < 0) {
2251 bdrv_delete(bs->file);
2252 bs->drv = NULL;
2253 return open_ret;
2254 }
2255 return ret;
2256 }
2257
2258 return -ENOTSUP;
faea38e7
FB
2259}
2260
2261int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2262{
2263 BlockDriver *drv = bs->drv;
2264 if (!drv)
19cb3738 2265 return -ENOMEDIUM;
7cdb1f6d
MK
2266 if (drv->bdrv_snapshot_delete)
2267 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2268 if (bs->file)
2269 return bdrv_snapshot_delete(bs->file, snapshot_id);
2270 return -ENOTSUP;
faea38e7
FB
2271}
2272
5fafdf24 2273int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2274 QEMUSnapshotInfo **psn_info)
2275{
2276 BlockDriver *drv = bs->drv;
2277 if (!drv)
19cb3738 2278 return -ENOMEDIUM;
7cdb1f6d
MK
2279 if (drv->bdrv_snapshot_list)
2280 return drv->bdrv_snapshot_list(bs, psn_info);
2281 if (bs->file)
2282 return bdrv_snapshot_list(bs->file, psn_info);
2283 return -ENOTSUP;
faea38e7
FB
2284}
2285
51ef6727 2286int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2287 const char *snapshot_name)
2288{
2289 BlockDriver *drv = bs->drv;
2290 if (!drv) {
2291 return -ENOMEDIUM;
2292 }
2293 if (!bs->read_only) {
2294 return -EINVAL;
2295 }
2296 if (drv->bdrv_snapshot_load_tmp) {
2297 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2298 }
2299 return -ENOTSUP;
2300}
2301
faea38e7
FB
2302#define NB_SUFFIXES 4
2303
2304char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2305{
2306 static const char suffixes[NB_SUFFIXES] = "KMGT";
2307 int64_t base;
2308 int i;
2309
2310 if (size <= 999) {
2311 snprintf(buf, buf_size, "%" PRId64, size);
2312 } else {
2313 base = 1024;
2314 for(i = 0; i < NB_SUFFIXES; i++) {
2315 if (size < (10 * base)) {
5fafdf24 2316 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
2317 (double)size / base,
2318 suffixes[i]);
2319 break;
2320 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 2321 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
2322 ((size + (base >> 1)) / base),
2323 suffixes[i]);
2324 break;
2325 }
2326 base = base * 1024;
2327 }
2328 }
2329 return buf;
2330}
2331
2332char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2333{
2334 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
2335#ifdef _WIN32
2336 struct tm *ptm;
2337#else
faea38e7 2338 struct tm tm;
3b9f94e1 2339#endif
faea38e7
FB
2340 time_t ti;
2341 int64_t secs;
2342
2343 if (!sn) {
5fafdf24
TS
2344 snprintf(buf, buf_size,
2345 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2346 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2347 } else {
2348 ti = sn->date_sec;
3b9f94e1
FB
2349#ifdef _WIN32
2350 ptm = localtime(&ti);
2351 strftime(date_buf, sizeof(date_buf),
2352 "%Y-%m-%d %H:%M:%S", ptm);
2353#else
faea38e7
FB
2354 localtime_r(&ti, &tm);
2355 strftime(date_buf, sizeof(date_buf),
2356 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 2357#endif
faea38e7
FB
2358 secs = sn->vm_clock_nsec / 1000000000;
2359 snprintf(clock_buf, sizeof(clock_buf),
2360 "%02d:%02d:%02d.%03d",
2361 (int)(secs / 3600),
2362 (int)((secs / 60) % 60),
5fafdf24 2363 (int)(secs % 60),
faea38e7
FB
2364 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2365 snprintf(buf, buf_size,
5fafdf24 2366 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2367 sn->id_str, sn->name,
2368 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2369 date_buf,
2370 clock_buf);
2371 }
2372 return buf;
2373}
2374
ea2384d3 2375/**************************************************************/
83f64091 2376/* async I/Os */
ea2384d3 2377
3b69e4b9 2378BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 2379 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 2380 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 2381{
bbf0a440
SH
2382 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2383
b2a61371
SH
2384 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2385 cb, opaque, false, bdrv_co_do_rw);
ea2384d3
FB
2386}
2387
4dcafbb1
MT
2388typedef struct BlockCompleteData {
2389 BlockDriverCompletionFunc *cb;
2390 void *opaque;
2391 BlockDriverState *bs;
2392 int64_t sector_num;
2393 int nb_sectors;
2394} BlockCompleteData;
2395
2396static void block_complete_cb(void *opaque, int ret)
2397{
2398 BlockCompleteData *b = opaque;
2399
2400 if (b->bs->dirty_bitmap) {
2401 set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1);
2402 }
2403 b->cb(b->opaque, ret);
7267c094 2404 g_free(b);
4dcafbb1
MT
2405}
2406
2407static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs,
2408 int64_t sector_num,
2409 int nb_sectors,
2410 BlockDriverCompletionFunc *cb,
2411 void *opaque)
2412{
7267c094 2413 BlockCompleteData *blkdata = g_malloc0(sizeof(BlockCompleteData));
4dcafbb1
MT
2414
2415 blkdata->bs = bs;
2416 blkdata->cb = cb;
2417 blkdata->opaque = opaque;
2418 blkdata->sector_num = sector_num;
2419 blkdata->nb_sectors = nb_sectors;
2420
2421 return blkdata;
2422}
2423
f141eafe
AL
2424BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2425 QEMUIOVector *qiov, int nb_sectors,
2426 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 2427{
83f64091 2428 BlockDriver *drv = bs->drv;
a36e69dd 2429 BlockDriverAIOCB *ret;
4dcafbb1 2430 BlockCompleteData *blk_cb_data;
ea2384d3 2431
bbf0a440
SH
2432 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2433
19cb3738 2434 if (!drv)
ce1a14dc 2435 return NULL;
83f64091 2436 if (bs->read_only)
ce1a14dc 2437 return NULL;
71d0770c
AL
2438 if (bdrv_check_request(bs, sector_num, nb_sectors))
2439 return NULL;
83f64091 2440
c6d22830 2441 if (bs->dirty_bitmap) {
4dcafbb1
MT
2442 blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
2443 opaque);
2444 cb = &block_complete_cb;
2445 opaque = blk_cb_data;
7cd1e32a 2446 }
a55eb92c 2447
f141eafe
AL
2448 ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
2449 cb, opaque);
a36e69dd
TS
2450
2451 if (ret) {
294cc35f
KW
2452 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2453 bs->wr_highest_sector = sector_num + nb_sectors - 1;
2454 }
a36e69dd
TS
2455 }
2456
2457 return ret;
83f64091
FB
2458}
2459
40b4f539
KW
2460
2461typedef struct MultiwriteCB {
2462 int error;
2463 int num_requests;
2464 int num_callbacks;
2465 struct {
2466 BlockDriverCompletionFunc *cb;
2467 void *opaque;
2468 QEMUIOVector *free_qiov;
2469 void *free_buf;
2470 } callbacks[];
2471} MultiwriteCB;
2472
2473static void multiwrite_user_cb(MultiwriteCB *mcb)
2474{
2475 int i;
2476
2477 for (i = 0; i < mcb->num_callbacks; i++) {
2478 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
2479 if (mcb->callbacks[i].free_qiov) {
2480 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2481 }
7267c094 2482 g_free(mcb->callbacks[i].free_qiov);
f8a83245 2483 qemu_vfree(mcb->callbacks[i].free_buf);
40b4f539
KW
2484 }
2485}
2486
2487static void multiwrite_cb(void *opaque, int ret)
2488{
2489 MultiwriteCB *mcb = opaque;
2490
6d519a5f
SH
2491 trace_multiwrite_cb(mcb, ret);
2492
cb6d3ca0 2493 if (ret < 0 && !mcb->error) {
40b4f539 2494 mcb->error = ret;
40b4f539
KW
2495 }
2496
2497 mcb->num_requests--;
2498 if (mcb->num_requests == 0) {
de189a1b 2499 multiwrite_user_cb(mcb);
7267c094 2500 g_free(mcb);
40b4f539
KW
2501 }
2502}
2503
2504static int multiwrite_req_compare(const void *a, const void *b)
2505{
77be4366
CH
2506 const BlockRequest *req1 = a, *req2 = b;
2507
2508 /*
2509 * Note that we can't simply subtract req2->sector from req1->sector
2510 * here as that could overflow the return value.
2511 */
2512 if (req1->sector > req2->sector) {
2513 return 1;
2514 } else if (req1->sector < req2->sector) {
2515 return -1;
2516 } else {
2517 return 0;
2518 }
40b4f539
KW
2519}
2520
2521/*
2522 * Takes a bunch of requests and tries to merge them. Returns the number of
2523 * requests that remain after merging.
2524 */
2525static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2526 int num_reqs, MultiwriteCB *mcb)
2527{
2528 int i, outidx;
2529
2530 // Sort requests by start sector
2531 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2532
2533 // Check if adjacent requests touch the same clusters. If so, combine them,
2534 // filling up gaps with zero sectors.
2535 outidx = 0;
2536 for (i = 1; i < num_reqs; i++) {
2537 int merge = 0;
2538 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2539
2540 // This handles the cases that are valid for all block drivers, namely
2541 // exactly sequential writes and overlapping writes.
2542 if (reqs[i].sector <= oldreq_last) {
2543 merge = 1;
2544 }
2545
2546 // The block driver may decide that it makes sense to combine requests
2547 // even if there is a gap of some sectors between them. In this case,
2548 // the gap is filled with zeros (therefore only applicable for yet
2549 // unused space in format like qcow2).
2550 if (!merge && bs->drv->bdrv_merge_requests) {
2551 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2552 }
2553
e2a305fb
CH
2554 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2555 merge = 0;
2556 }
2557
40b4f539
KW
2558 if (merge) {
2559 size_t size;
7267c094 2560 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
2561 qemu_iovec_init(qiov,
2562 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2563
2564 // Add the first request to the merged one. If the requests are
2565 // overlapping, drop the last sectors of the first request.
2566 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2567 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2568
2569 // We might need to add some zeros between the two requests
2570 if (reqs[i].sector > oldreq_last) {
2571 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2572 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2573 memset(buf, 0, zero_bytes);
2574 qemu_iovec_add(qiov, buf, zero_bytes);
2575 mcb->callbacks[i].free_buf = buf;
2576 }
2577
2578 // Add the second request
2579 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2580
cbf1dff2 2581 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
2582 reqs[outidx].qiov = qiov;
2583
2584 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2585 } else {
2586 outidx++;
2587 reqs[outidx].sector = reqs[i].sector;
2588 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2589 reqs[outidx].qiov = reqs[i].qiov;
2590 }
2591 }
2592
2593 return outidx + 1;
2594}
2595
2596/*
2597 * Submit multiple AIO write requests at once.
2598 *
2599 * On success, the function returns 0 and all requests in the reqs array have
2600 * been submitted. In error case this function returns -1, and any of the
2601 * requests may or may not be submitted yet. In particular, this means that the
2602 * callback will be called for some of the requests, for others it won't. The
2603 * caller must check the error field of the BlockRequest to wait for the right
2604 * callbacks (if error != 0, no callback will be called).
2605 *
2606 * The implementation may modify the contents of the reqs array, e.g. to merge
2607 * requests. However, the fields opaque and error are left unmodified as they
2608 * are used to signal failure for a single request to the caller.
2609 */
2610int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2611{
2612 BlockDriverAIOCB *acb;
2613 MultiwriteCB *mcb;
2614 int i;
2615
301db7c2
RH
2616 /* don't submit writes if we don't have a medium */
2617 if (bs->drv == NULL) {
2618 for (i = 0; i < num_reqs; i++) {
2619 reqs[i].error = -ENOMEDIUM;
2620 }
2621 return -1;
2622 }
2623
40b4f539
KW
2624 if (num_reqs == 0) {
2625 return 0;
2626 }
2627
2628 // Create MultiwriteCB structure
7267c094 2629 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
2630 mcb->num_requests = 0;
2631 mcb->num_callbacks = num_reqs;
2632
2633 for (i = 0; i < num_reqs; i++) {
2634 mcb->callbacks[i].cb = reqs[i].cb;
2635 mcb->callbacks[i].opaque = reqs[i].opaque;
2636 }
2637
2638 // Check for mergable requests
2639 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2640
6d519a5f
SH
2641 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2642
453f9a16
KW
2643 /*
2644 * Run the aio requests. As soon as one request can't be submitted
2645 * successfully, fail all requests that are not yet submitted (we must
2646 * return failure for all requests anyway)
2647 *
2648 * num_requests cannot be set to the right value immediately: If
2649 * bdrv_aio_writev fails for some request, num_requests would be too high
2650 * and therefore multiwrite_cb() would never recognize the multiwrite
2651 * request as completed. We also cannot use the loop variable i to set it
2652 * when the first request fails because the callback may already have been
2653 * called for previously submitted requests. Thus, num_requests must be
2654 * incremented for each request that is submitted.
2655 *
2656 * The problem that callbacks may be called early also means that we need
2657 * to take care that num_requests doesn't become 0 before all requests are
2658 * submitted - multiwrite_cb() would consider the multiwrite request
2659 * completed. A dummy request that is "completed" by a manual call to
2660 * multiwrite_cb() takes care of this.
2661 */
2662 mcb->num_requests = 1;
2663
6d519a5f 2664 // Run the aio requests
40b4f539 2665 for (i = 0; i < num_reqs; i++) {
453f9a16 2666 mcb->num_requests++;
40b4f539
KW
2667 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2668 reqs[i].nb_sectors, multiwrite_cb, mcb);
2669
2670 if (acb == NULL) {
2671 // We can only fail the whole thing if no request has been
2672 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2673 // complete and report the error in the callback.
453f9a16 2674 if (i == 0) {
6d519a5f 2675 trace_bdrv_aio_multiwrite_earlyfail(mcb);
40b4f539
KW
2676 goto fail;
2677 } else {
6d519a5f 2678 trace_bdrv_aio_multiwrite_latefail(mcb, i);
7eb58a6c 2679 multiwrite_cb(mcb, -EIO);
40b4f539
KW
2680 break;
2681 }
40b4f539
KW
2682 }
2683 }
2684
453f9a16
KW
2685 /* Complete the dummy request */
2686 multiwrite_cb(mcb, 0);
2687
40b4f539
KW
2688 return 0;
2689
2690fail:
453f9a16
KW
2691 for (i = 0; i < mcb->num_callbacks; i++) {
2692 reqs[i].error = -EIO;
2693 }
7267c094 2694 g_free(mcb);
40b4f539
KW
2695 return -1;
2696}
2697
b2e12bc6
CH
2698BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2699 BlockDriverCompletionFunc *cb, void *opaque)
2700{
2701 BlockDriver *drv = bs->drv;
2702
a13aac04
SH
2703 trace_bdrv_aio_flush(bs, opaque);
2704
016f5cf6
AG
2705 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2706 return bdrv_aio_noop_em(bs, cb, opaque);
2707 }
2708
b2e12bc6
CH
2709 if (!drv)
2710 return NULL;
b2e12bc6
CH
2711 return drv->bdrv_aio_flush(bs, cb, opaque);
2712}
2713
83f64091 2714void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 2715{
6bbff9a0 2716 acb->pool->cancel(acb);
83f64091
FB
2717}
2718
ce1a14dc 2719
83f64091
FB
2720/**************************************************************/
2721/* async block device emulation */
2722
c16b5a2c
CH
2723typedef struct BlockDriverAIOCBSync {
2724 BlockDriverAIOCB common;
2725 QEMUBH *bh;
2726 int ret;
2727 /* vector translation state */
2728 QEMUIOVector *qiov;
2729 uint8_t *bounce;
2730 int is_write;
2731} BlockDriverAIOCBSync;
2732
2733static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2734{
b666d239
KW
2735 BlockDriverAIOCBSync *acb =
2736 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 2737 qemu_bh_delete(acb->bh);
36afc451 2738 acb->bh = NULL;
c16b5a2c
CH
2739 qemu_aio_release(acb);
2740}
2741
2742static AIOPool bdrv_em_aio_pool = {
2743 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2744 .cancel = bdrv_aio_cancel_em,
2745};
2746
ce1a14dc 2747static void bdrv_aio_bh_cb(void *opaque)
83f64091 2748{
ce1a14dc 2749 BlockDriverAIOCBSync *acb = opaque;
f141eafe 2750
f141eafe
AL
2751 if (!acb->is_write)
2752 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 2753 qemu_vfree(acb->bounce);
ce1a14dc 2754 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 2755 qemu_bh_delete(acb->bh);
36afc451 2756 acb->bh = NULL;
ce1a14dc 2757 qemu_aio_release(acb);
83f64091 2758}
beac80cd 2759
f141eafe
AL
2760static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2761 int64_t sector_num,
2762 QEMUIOVector *qiov,
2763 int nb_sectors,
2764 BlockDriverCompletionFunc *cb,
2765 void *opaque,
2766 int is_write)
2767
83f64091 2768{
ce1a14dc 2769 BlockDriverAIOCBSync *acb;
ce1a14dc 2770
c16b5a2c 2771 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
2772 acb->is_write = is_write;
2773 acb->qiov = qiov;
e268ca52 2774 acb->bounce = qemu_blockalign(bs, qiov->size);
f141eafe 2775
ce1a14dc
PB
2776 if (!acb->bh)
2777 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
2778
2779 if (is_write) {
2780 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1ed20acf 2781 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 2782 } else {
1ed20acf 2783 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
2784 }
2785
ce1a14dc 2786 qemu_bh_schedule(acb->bh);
f141eafe 2787
ce1a14dc 2788 return &acb->common;
beac80cd
FB
2789}
2790
f141eafe
AL
2791static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2792 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 2793 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 2794{
f141eafe
AL
2795 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2796}
83f64091 2797
f141eafe
AL
2798static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2799 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2800 BlockDriverCompletionFunc *cb, void *opaque)
2801{
2802 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 2803}
beac80cd 2804
68485420
KW
2805
2806typedef struct BlockDriverAIOCBCoroutine {
2807 BlockDriverAIOCB common;
2808 BlockRequest req;
2809 bool is_write;
2810 QEMUBH* bh;
2811} BlockDriverAIOCBCoroutine;
2812
2813static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2814{
2815 qemu_aio_flush();
2816}
2817
2818static AIOPool bdrv_em_co_aio_pool = {
2819 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2820 .cancel = bdrv_aio_co_cancel_em,
2821};
2822
2823static void bdrv_co_rw_bh(void *opaque)
2824{
2825 BlockDriverAIOCBCoroutine *acb = opaque;
2826
2827 acb->common.cb(acb->common.opaque, acb->req.error);
2828 qemu_bh_delete(acb->bh);
2829 qemu_aio_release(acb);
2830}
2831
b2a61371 2832/* Invoke .bdrv_co_readv/.bdrv_co_writev */
68485420
KW
2833static void coroutine_fn bdrv_co_rw(void *opaque)
2834{
2835 BlockDriverAIOCBCoroutine *acb = opaque;
2836 BlockDriverState *bs = acb->common.bs;
2837
2838 if (!acb->is_write) {
2839 acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector,
2840 acb->req.nb_sectors, acb->req.qiov);
2841 } else {
2842 acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector,
2843 acb->req.nb_sectors, acb->req.qiov);
2844 }
2845
2846 acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2847 qemu_bh_schedule(acb->bh);
2848}
2849
b2a61371
SH
2850/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2851static void coroutine_fn bdrv_co_do_rw(void *opaque)
2852{
2853 BlockDriverAIOCBCoroutine *acb = opaque;
2854 BlockDriverState *bs = acb->common.bs;
2855
2856 if (!acb->is_write) {
2857 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
2858 acb->req.nb_sectors, acb->req.qiov);
2859 } else {
2860 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
2861 acb->req.nb_sectors, acb->req.qiov);
2862 }
2863
2864 acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2865 qemu_bh_schedule(acb->bh);
2866}
2867
68485420
KW
2868static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2869 int64_t sector_num,
2870 QEMUIOVector *qiov,
2871 int nb_sectors,
2872 BlockDriverCompletionFunc *cb,
2873 void *opaque,
b2a61371
SH
2874 bool is_write,
2875 CoroutineEntry *entry)
68485420
KW
2876{
2877 Coroutine *co;
2878 BlockDriverAIOCBCoroutine *acb;
2879
2880 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2881 acb->req.sector = sector_num;
2882 acb->req.nb_sectors = nb_sectors;
2883 acb->req.qiov = qiov;
2884 acb->is_write = is_write;
2885
b2a61371 2886 co = qemu_coroutine_create(entry);
68485420
KW
2887 qemu_coroutine_enter(co, acb);
2888
2889 return &acb->common;
2890}
2891
2892static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
2893 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2894 BlockDriverCompletionFunc *cb, void *opaque)
2895{
2896 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
b2a61371 2897 false, bdrv_co_rw);
68485420
KW
2898}
2899
2900static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
2901 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2902 BlockDriverCompletionFunc *cb, void *opaque)
2903{
2904 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
b2a61371 2905 true, bdrv_co_rw);
68485420
KW
2906}
2907
b2e12bc6
CH
2908static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2909 BlockDriverCompletionFunc *cb, void *opaque)
2910{
2911 BlockDriverAIOCBSync *acb;
2912
2913 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2914 acb->is_write = 1; /* don't bounce in the completion hadler */
2915 acb->qiov = NULL;
2916 acb->bounce = NULL;
2917 acb->ret = 0;
2918
2919 if (!acb->bh)
2920 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2921
2922 bdrv_flush(bs);
2923 qemu_bh_schedule(acb->bh);
2924 return &acb->common;
2925}
2926
016f5cf6
AG
2927static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2928 BlockDriverCompletionFunc *cb, void *opaque)
2929{
2930 BlockDriverAIOCBSync *acb;
2931
2932 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2933 acb->is_write = 1; /* don't bounce in the completion handler */
2934 acb->qiov = NULL;
2935 acb->bounce = NULL;
2936 acb->ret = 0;
2937
2938 if (!acb->bh) {
2939 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2940 }
2941
2942 qemu_bh_schedule(acb->bh);
2943 return &acb->common;
2944}
2945
83f64091
FB
2946/**************************************************************/
2947/* sync block device emulation */
ea2384d3 2948
83f64091
FB
2949static void bdrv_rw_em_cb(void *opaque, int ret)
2950{
2951 *(int *)opaque = ret;
ea2384d3
FB
2952}
2953
5fafdf24 2954static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
83f64091 2955 uint8_t *buf, int nb_sectors)
7a6cba61 2956{
ce1a14dc
PB
2957 int async_ret;
2958 BlockDriverAIOCB *acb;
f141eafe
AL
2959 struct iovec iov;
2960 QEMUIOVector qiov;
83f64091 2961
83f64091 2962 async_ret = NOT_DONE;
3f4cb3d3 2963 iov.iov_base = (void *)buf;
eb5a3165 2964 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
f141eafe 2965 qemu_iovec_init_external(&qiov, &iov, 1);
1ed20acf
SH
2966
2967 acb = bs->drv->bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2968 bdrv_rw_em_cb, &async_ret);
65d6b3d8
KW
2969 if (acb == NULL) {
2970 async_ret = -1;
2971 goto fail;
2972 }
baf35cb9 2973
83f64091
FB
2974 while (async_ret == NOT_DONE) {
2975 qemu_aio_wait();
2976 }
baf35cb9 2977
65d6b3d8
KW
2978
2979fail:
83f64091 2980 return async_ret;
7a6cba61
PB
2981}
2982
83f64091
FB
2983static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2984 const uint8_t *buf, int nb_sectors)
2985{
ce1a14dc
PB
2986 int async_ret;
2987 BlockDriverAIOCB *acb;
f141eafe
AL
2988 struct iovec iov;
2989 QEMUIOVector qiov;
83f64091 2990
83f64091 2991 async_ret = NOT_DONE;
f141eafe 2992 iov.iov_base = (void *)buf;
eb5a3165 2993 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
f141eafe 2994 qemu_iovec_init_external(&qiov, &iov, 1);
1ed20acf
SH
2995
2996 acb = bs->drv->bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2997 bdrv_rw_em_cb, &async_ret);
65d6b3d8
KW
2998 if (acb == NULL) {
2999 async_ret = -1;
3000 goto fail;
3001 }
83f64091
FB
3002 while (async_ret == NOT_DONE) {
3003 qemu_aio_wait();
3004 }
65d6b3d8
KW
3005
3006fail:
83f64091
FB
3007 return async_ret;
3008}
ea2384d3
FB
3009
3010void bdrv_init(void)
3011{
5efa9d5a 3012 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 3013}
ce1a14dc 3014
eb852011
MA
3015void bdrv_init_with_whitelist(void)
3016{
3017 use_bdrv_whitelist = 1;
3018 bdrv_init();
3019}
3020
c16b5a2c
CH
3021void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3022 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 3023{
ce1a14dc
PB
3024 BlockDriverAIOCB *acb;
3025
6bbff9a0
AL
3026 if (pool->free_aiocb) {
3027 acb = pool->free_aiocb;
3028 pool->free_aiocb = acb->next;
ce1a14dc 3029 } else {
7267c094 3030 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 3031 acb->pool = pool;
ce1a14dc
PB
3032 }
3033 acb->bs = bs;
3034 acb->cb = cb;
3035 acb->opaque = opaque;
3036 return acb;
3037}
3038
3039void qemu_aio_release(void *p)
3040{
6bbff9a0
AL
3041 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3042 AIOPool *pool = acb->pool;
3043 acb->next = pool->free_aiocb;
3044 pool->free_aiocb = acb;
ce1a14dc 3045}
19cb3738 3046
f9f05dc5
KW
3047/**************************************************************/
3048/* Coroutine block device emulation */
3049
3050typedef struct CoroutineIOCompletion {
3051 Coroutine *coroutine;
3052 int ret;
3053} CoroutineIOCompletion;
3054
3055static void bdrv_co_io_em_complete(void *opaque, int ret)
3056{
3057 CoroutineIOCompletion *co = opaque;
3058
3059 co->ret = ret;
3060 qemu_coroutine_enter(co->coroutine, NULL);
3061}
3062
3063static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3064 int nb_sectors, QEMUIOVector *iov,
3065 bool is_write)
3066{
3067 CoroutineIOCompletion co = {
3068 .coroutine = qemu_coroutine_self(),
3069 };
3070 BlockDriverAIOCB *acb;
3071
3072 if (is_write) {
a652d160
SH
3073 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3074 bdrv_co_io_em_complete, &co);
f9f05dc5 3075 } else {
a652d160
SH
3076 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3077 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
3078 }
3079
59370aaa 3080 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
3081 if (!acb) {
3082 return -EIO;
3083 }
3084 qemu_coroutine_yield();
3085
3086 return co.ret;
3087}
3088
3089static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3090 int64_t sector_num, int nb_sectors,
3091 QEMUIOVector *iov)
3092{
3093 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3094}
3095
3096static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3097 int64_t sector_num, int nb_sectors,
3098 QEMUIOVector *iov)
3099{
3100 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3101}
3102
e7a8a783
KW
3103static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
3104{
3105 CoroutineIOCompletion co = {
3106 .coroutine = qemu_coroutine_self(),
3107 };
3108 BlockDriverAIOCB *acb;
3109
3110 acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3111 if (!acb) {
3112 return -EIO;
3113 }
3114 qemu_coroutine_yield();
3115 return co.ret;
3116}
3117
19cb3738
FB
3118/**************************************************************/
3119/* removable device support */
3120
3121/**
3122 * Return TRUE if the media is present
3123 */
3124int bdrv_is_inserted(BlockDriverState *bs)
3125{
3126 BlockDriver *drv = bs->drv;
a1aff5bf 3127
19cb3738
FB
3128 if (!drv)
3129 return 0;
3130 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
3131 return 1;
3132 return drv->bdrv_is_inserted(bs);
19cb3738
FB
3133}
3134
3135/**
8e49ca46
MA
3136 * Return whether the media changed since the last call to this
3137 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3138 */
3139int bdrv_media_changed(BlockDriverState *bs)
3140{
3141 BlockDriver *drv = bs->drv;
19cb3738 3142
8e49ca46
MA
3143 if (drv && drv->bdrv_media_changed) {
3144 return drv->bdrv_media_changed(bs);
3145 }
3146 return -ENOTSUP;
19cb3738
FB
3147}
3148
3149/**
3150 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3151 */
fdec4404 3152void bdrv_eject(BlockDriverState *bs, int eject_flag)
19cb3738
FB
3153{
3154 BlockDriver *drv = bs->drv;
19cb3738 3155
822e1cd1
MA
3156 if (drv && drv->bdrv_eject) {
3157 drv->bdrv_eject(bs, eject_flag);
19cb3738
FB
3158 }
3159}
3160
19cb3738
FB
3161/**
3162 * Lock or unlock the media (if it is locked, the user won't be able
3163 * to eject it manually).
3164 */
025e849a 3165void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3166{
3167 BlockDriver *drv = bs->drv;
3168
025e849a 3169 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3170
025e849a
MA
3171 if (drv && drv->bdrv_lock_medium) {
3172 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3173 }
3174}
985a03b0
TS
3175
3176/* needed for generic scsi interface */
3177
3178int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3179{
3180 BlockDriver *drv = bs->drv;
3181
3182 if (drv && drv->bdrv_ioctl)
3183 return drv->bdrv_ioctl(bs, req, buf);
3184 return -ENOTSUP;
3185}
7d780669 3186
221f715d
AL
3187BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3188 unsigned long int req, void *buf,
3189 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3190{
221f715d 3191 BlockDriver *drv = bs->drv;
7d780669 3192
221f715d
AL
3193 if (drv && drv->bdrv_aio_ioctl)
3194 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3195 return NULL;
7d780669 3196}
e268ca52 3197
7b6f9300
MA
3198void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3199{
3200 bs->buffer_alignment = align;
3201}
7cd1e32a 3202
e268ca52
AL
3203void *qemu_blockalign(BlockDriverState *bs, size_t size)
3204{
3205 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3206}
7cd1e32a 3207
3208void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3209{
3210 int64_t bitmap_size;
a55eb92c 3211
aaa0eb75 3212 bs->dirty_count = 0;
a55eb92c 3213 if (enable) {
c6d22830
JK
3214 if (!bs->dirty_bitmap) {
3215 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3216 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3217 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
a55eb92c 3218
7267c094 3219 bs->dirty_bitmap = g_malloc0(bitmap_size);
a55eb92c 3220 }
7cd1e32a 3221 } else {
c6d22830 3222 if (bs->dirty_bitmap) {
7267c094 3223 g_free(bs->dirty_bitmap);
c6d22830 3224 bs->dirty_bitmap = NULL;
a55eb92c 3225 }
7cd1e32a 3226 }
3227}
3228
3229int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3230{
6ea44308 3231 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 3232
c6d22830
JK
3233 if (bs->dirty_bitmap &&
3234 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
3235 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3236 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a 3237 } else {
3238 return 0;
3239 }
3240}
3241
a55eb92c
JK
3242void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3243 int nr_sectors)
7cd1e32a 3244{
3245 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3246}
aaa0eb75
LS
3247
3248int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3249{
3250 return bs->dirty_count;
3251}
f88e1a42 3252
db593f25
MT
3253void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3254{
3255 assert(bs->in_use != in_use);
3256 bs->in_use = in_use;
3257}
3258
3259int bdrv_in_use(BlockDriverState *bs)
3260{
3261 return bs->in_use;
3262}
3263
28a7282a
LC
3264void bdrv_iostatus_enable(BlockDriverState *bs)
3265{
3266 bs->iostatus = BDRV_IOS_OK;
3267}
3268
3269/* The I/O status is only enabled if the drive explicitly
3270 * enables it _and_ the VM is configured to stop on errors */
3271bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3272{
3273 return (bs->iostatus != BDRV_IOS_INVAL &&
3274 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3275 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3276 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3277}
3278
3279void bdrv_iostatus_disable(BlockDriverState *bs)
3280{
3281 bs->iostatus = BDRV_IOS_INVAL;
3282}
3283
3284void bdrv_iostatus_reset(BlockDriverState *bs)
3285{
3286 if (bdrv_iostatus_is_enabled(bs)) {
3287 bs->iostatus = BDRV_IOS_OK;
3288 }
3289}
3290
3291/* XXX: Today this is set by device models because it makes the implementation
3292 quite simple. However, the block layer knows about the error, so it's
3293 possible to implement this without device models being involved */
3294void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3295{
3296 if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
3297 assert(error >= 0);
3298 bs->iostatus = error == ENOSPC ? BDRV_IOS_ENOSPC : BDRV_IOS_FAILED;
3299 }
3300}
3301
a597e79c
CH
3302void
3303bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3304 enum BlockAcctType type)
3305{
3306 assert(type < BDRV_MAX_IOTYPE);
3307
3308 cookie->bytes = bytes;
c488c7f6 3309 cookie->start_time_ns = get_clock();
a597e79c
CH
3310 cookie->type = type;
3311}
3312
3313void
3314bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3315{
3316 assert(cookie->type < BDRV_MAX_IOTYPE);
3317
3318 bs->nr_bytes[cookie->type] += cookie->bytes;
3319 bs->nr_ops[cookie->type]++;
c488c7f6 3320 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
3321}
3322
f88e1a42
JS
3323int bdrv_img_create(const char *filename, const char *fmt,
3324 const char *base_filename, const char *base_fmt,
3325 char *options, uint64_t img_size, int flags)
3326{
3327 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 3328 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
3329 BlockDriverState *bs = NULL;
3330 BlockDriver *drv, *proto_drv;
96df67d1 3331 BlockDriver *backing_drv = NULL;
f88e1a42
JS
3332 int ret = 0;
3333
3334 /* Find driver and parse its options */
3335 drv = bdrv_find_format(fmt);
3336 if (!drv) {
3337 error_report("Unknown file format '%s'", fmt);
4f70f249 3338 ret = -EINVAL;
f88e1a42
JS
3339 goto out;
3340 }
3341
3342 proto_drv = bdrv_find_protocol(filename);
3343 if (!proto_drv) {
3344 error_report("Unknown protocol '%s'", filename);
4f70f249 3345 ret = -EINVAL;
f88e1a42
JS
3346 goto out;
3347 }
3348
3349 create_options = append_option_parameters(create_options,
3350 drv->create_options);
3351 create_options = append_option_parameters(create_options,
3352 proto_drv->create_options);
3353
3354 /* Create parameter list with default values */
3355 param = parse_option_parameters("", create_options, param);
3356
3357 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3358
3359 /* Parse -o options */
3360 if (options) {
3361 param = parse_option_parameters(options, create_options, param);
3362 if (param == NULL) {
3363 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 3364 ret = -EINVAL;
f88e1a42
JS
3365 goto out;
3366 }
3367 }
3368
3369 if (base_filename) {
3370 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3371 base_filename)) {
3372 error_report("Backing file not supported for file format '%s'",
3373 fmt);
4f70f249 3374 ret = -EINVAL;
f88e1a42
JS
3375 goto out;
3376 }
3377 }
3378
3379 if (base_fmt) {
3380 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3381 error_report("Backing file format not supported for file "
3382 "format '%s'", fmt);
4f70f249 3383 ret = -EINVAL;
f88e1a42
JS
3384 goto out;
3385 }
3386 }
3387
792da93a
JS
3388 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3389 if (backing_file && backing_file->value.s) {
3390 if (!strcmp(filename, backing_file->value.s)) {
3391 error_report("Error: Trying to create an image with the "
3392 "same filename as the backing file");
4f70f249 3393 ret = -EINVAL;
792da93a
JS
3394 goto out;
3395 }
3396 }
3397
f88e1a42
JS
3398 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3399 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
3400 backing_drv = bdrv_find_format(backing_fmt->value.s);
3401 if (!backing_drv) {
f88e1a42
JS
3402 error_report("Unknown backing file format '%s'",
3403 backing_fmt->value.s);
4f70f249 3404 ret = -EINVAL;
f88e1a42
JS
3405 goto out;
3406 }
3407 }
3408
3409 // The size for the image must always be specified, with one exception:
3410 // If we are using a backing file, we can obtain the size from there
d220894e
KW
3411 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3412 if (size && size->value.n == -1) {
f88e1a42
JS
3413 if (backing_file && backing_file->value.s) {
3414 uint64_t size;
f88e1a42
JS
3415 char buf[32];
3416
f88e1a42
JS
3417 bs = bdrv_new("");
3418
96df67d1 3419 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
f88e1a42 3420 if (ret < 0) {
96df67d1 3421 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
3422 goto out;
3423 }
3424 bdrv_get_geometry(bs, &size);
3425 size *= 512;
3426
3427 snprintf(buf, sizeof(buf), "%" PRId64, size);
3428 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3429 } else {
3430 error_report("Image creation needs a size parameter");
4f70f249 3431 ret = -EINVAL;
f88e1a42
JS
3432 goto out;
3433 }
3434 }
3435
3436 printf("Formatting '%s', fmt=%s ", filename, fmt);
3437 print_option_parameters(param);
3438 puts("");
3439
3440 ret = bdrv_create(drv, filename, param);
3441
3442 if (ret < 0) {
3443 if (ret == -ENOTSUP) {
3444 error_report("Formatting or formatting option not supported for "
3445 "file format '%s'", fmt);
3446 } else if (ret == -EFBIG) {
3447 error_report("The image size is too large for file format '%s'",
3448 fmt);
3449 } else {
3450 error_report("%s: error while creating %s: %s", filename, fmt,
3451 strerror(-ret));
3452 }
3453 }
3454
3455out:
3456 free_option_parameters(create_options);
3457 free_option_parameters(param);
3458
3459 if (bs) {
3460 bdrv_delete(bs);
3461 }
4f70f249
JS
3462
3463 return ret;
f88e1a42 3464}