]> git.proxmox.com Git - mirror_qemu.git/blame - block.c
linux-aio: Allow reads beyond the end of growable images
[mirror_qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
d15e5465 30#include "qemu-objects.h"
68485420 31#include "qemu-coroutine.h"
fc01f7e7 32
71e72a19 33#ifdef CONFIG_BSD
7674e7bf
FB
34#include <sys/types.h>
35#include <sys/stat.h>
36#include <sys/ioctl.h>
72cf2d4f 37#include <sys/queue.h>
c5e97233 38#ifndef __DragonFly__
7674e7bf
FB
39#include <sys/disk.h>
40#endif
c5e97233 41#endif
7674e7bf 42
49dc768d
AL
43#ifdef _WIN32
44#include <windows.h>
45#endif
46
1c9805a3
SH
47#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
48
7d4b4ba5 49static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
50static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
51 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 52 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
53static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
54 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 55 BlockDriverCompletionFunc *cb, void *opaque);
b2e12bc6
CH
56static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
57 BlockDriverCompletionFunc *cb, void *opaque);
016f5cf6
AG
58static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
59 BlockDriverCompletionFunc *cb, void *opaque);
5fafdf24 60static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
83f64091
FB
61 uint8_t *buf, int nb_sectors);
62static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
63 const uint8_t *buf, int nb_sectors);
68485420
KW
64static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
65 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
66 BlockDriverCompletionFunc *cb, void *opaque);
67static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
68 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
69 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
70static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
72 QEMUIOVector *iov);
73static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
74 int64_t sector_num, int nb_sectors,
75 QEMUIOVector *iov);
e7a8a783 76static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
c5fbe571
SH
77static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
78 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
1c9805a3
SH
79static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
80 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
b2a61371
SH
81static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
82 int64_t sector_num,
83 QEMUIOVector *qiov,
84 int nb_sectors,
85 BlockDriverCompletionFunc *cb,
86 void *opaque,
87 bool is_write,
88 CoroutineEntry *entry);
89static void coroutine_fn bdrv_co_do_rw(void *opaque);
ec530c81 90
1b7bdbc1
SH
91static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
92 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 93
8a22f02a
SH
94static QLIST_HEAD(, BlockDriver) bdrv_drivers =
95 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 96
f9092b10
MA
97/* The device to use for VM snapshots */
98static BlockDriverState *bs_snapshots;
99
eb852011
MA
100/* If non-zero, use only whitelisted block drivers */
101static int use_bdrv_whitelist;
102
9e0b22f4
SH
103#ifdef _WIN32
104static int is_windows_drive_prefix(const char *filename)
105{
106 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
107 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
108 filename[1] == ':');
109}
110
111int is_windows_drive(const char *filename)
112{
113 if (is_windows_drive_prefix(filename) &&
114 filename[2] == '\0')
115 return 1;
116 if (strstart(filename, "\\\\.\\", NULL) ||
117 strstart(filename, "//./", NULL))
118 return 1;
119 return 0;
120}
121#endif
122
123/* check if the path starts with "<protocol>:" */
124static int path_has_protocol(const char *path)
125{
126#ifdef _WIN32
127 if (is_windows_drive(path) ||
128 is_windows_drive_prefix(path)) {
129 return 0;
130 }
131#endif
132
133 return strchr(path, ':') != NULL;
134}
135
83f64091 136int path_is_absolute(const char *path)
3b0d4f61 137{
83f64091 138 const char *p;
21664424
FB
139#ifdef _WIN32
140 /* specific case for names like: "\\.\d:" */
141 if (*path == '/' || *path == '\\')
142 return 1;
143#endif
83f64091
FB
144 p = strchr(path, ':');
145 if (p)
146 p++;
147 else
148 p = path;
3b9f94e1
FB
149#ifdef _WIN32
150 return (*p == '/' || *p == '\\');
151#else
152 return (*p == '/');
153#endif
3b0d4f61
FB
154}
155
83f64091
FB
156/* if filename is absolute, just copy it to dest. Otherwise, build a
157 path to it by considering it is relative to base_path. URL are
158 supported. */
159void path_combine(char *dest, int dest_size,
160 const char *base_path,
161 const char *filename)
3b0d4f61 162{
83f64091
FB
163 const char *p, *p1;
164 int len;
165
166 if (dest_size <= 0)
167 return;
168 if (path_is_absolute(filename)) {
169 pstrcpy(dest, dest_size, filename);
170 } else {
171 p = strchr(base_path, ':');
172 if (p)
173 p++;
174 else
175 p = base_path;
3b9f94e1
FB
176 p1 = strrchr(base_path, '/');
177#ifdef _WIN32
178 {
179 const char *p2;
180 p2 = strrchr(base_path, '\\');
181 if (!p1 || p2 > p1)
182 p1 = p2;
183 }
184#endif
83f64091
FB
185 if (p1)
186 p1++;
187 else
188 p1 = base_path;
189 if (p1 > p)
190 p = p1;
191 len = p - base_path;
192 if (len > dest_size - 1)
193 len = dest_size - 1;
194 memcpy(dest, base_path, len);
195 dest[len] = '\0';
196 pstrcat(dest, dest_size, filename);
3b0d4f61 197 }
3b0d4f61
FB
198}
199
5efa9d5a 200void bdrv_register(BlockDriver *bdrv)
ea2384d3 201{
68485420
KW
202 if (bdrv->bdrv_co_readv) {
203 /* Emulate AIO by coroutines, and sync by AIO */
204 bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em;
205 bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
206 bdrv->bdrv_read = bdrv_read_em;
207 bdrv->bdrv_write = bdrv_write_em;
f9f05dc5
KW
208 } else {
209 bdrv->bdrv_co_readv = bdrv_co_readv_em;
210 bdrv->bdrv_co_writev = bdrv_co_writev_em;
211
212 if (!bdrv->bdrv_aio_readv) {
213 /* add AIO emulation layer */
214 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
215 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
216 } else if (!bdrv->bdrv_read) {
217 /* add synchronous IO emulation layer */
218 bdrv->bdrv_read = bdrv_read_em;
219 bdrv->bdrv_write = bdrv_write_em;
220 }
83f64091 221 }
b2e12bc6
CH
222
223 if (!bdrv->bdrv_aio_flush)
224 bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
225
8a22f02a 226 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 227}
b338082b
FB
228
229/* create a new block device (by default it is empty) */
230BlockDriverState *bdrv_new(const char *device_name)
231{
1b7bdbc1 232 BlockDriverState *bs;
b338082b 233
7267c094 234 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 235 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 236 if (device_name[0] != '\0') {
1b7bdbc1 237 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 238 }
28a7282a 239 bdrv_iostatus_disable(bs);
b338082b
FB
240 return bs;
241}
242
ea2384d3
FB
243BlockDriver *bdrv_find_format(const char *format_name)
244{
245 BlockDriver *drv1;
8a22f02a
SH
246 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
247 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 248 return drv1;
8a22f02a 249 }
ea2384d3
FB
250 }
251 return NULL;
252}
253
eb852011
MA
254static int bdrv_is_whitelisted(BlockDriver *drv)
255{
256 static const char *whitelist[] = {
257 CONFIG_BDRV_WHITELIST
258 };
259 const char **p;
260
261 if (!whitelist[0])
262 return 1; /* no whitelist, anything goes */
263
264 for (p = whitelist; *p; p++) {
265 if (!strcmp(drv->format_name, *p)) {
266 return 1;
267 }
268 }
269 return 0;
270}
271
272BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
273{
274 BlockDriver *drv = bdrv_find_format(format_name);
275 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
276}
277
0e7e1989
KW
278int bdrv_create(BlockDriver *drv, const char* filename,
279 QEMUOptionParameter *options)
ea2384d3
FB
280{
281 if (!drv->bdrv_create)
282 return -ENOTSUP;
0e7e1989
KW
283
284 return drv->bdrv_create(filename, options);
ea2384d3
FB
285}
286
84a12e66
CH
287int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
288{
289 BlockDriver *drv;
290
b50cbabc 291 drv = bdrv_find_protocol(filename);
84a12e66 292 if (drv == NULL) {
16905d71 293 return -ENOENT;
84a12e66
CH
294 }
295
296 return bdrv_create(drv, filename, options);
297}
298
d5249393 299#ifdef _WIN32
95389c86 300void get_tmp_filename(char *filename, int size)
d5249393 301{
3b9f94e1 302 char temp_dir[MAX_PATH];
3b46e624 303
3b9f94e1
FB
304 GetTempPath(MAX_PATH, temp_dir);
305 GetTempFileName(temp_dir, "qem", 0, filename);
d5249393
FB
306}
307#else
95389c86 308void get_tmp_filename(char *filename, int size)
fc01f7e7 309{
67b915a5 310 int fd;
7ccfb2eb 311 const char *tmpdir;
d5249393 312 /* XXX: race condition possible */
0badc1ee
AJ
313 tmpdir = getenv("TMPDIR");
314 if (!tmpdir)
315 tmpdir = "/tmp";
316 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
ea2384d3
FB
317 fd = mkstemp(filename);
318 close(fd);
319}
d5249393 320#endif
fc01f7e7 321
84a12e66
CH
322/*
323 * Detect host devices. By convention, /dev/cdrom[N] is always
324 * recognized as a host CDROM.
325 */
326static BlockDriver *find_hdev_driver(const char *filename)
327{
328 int score_max = 0, score;
329 BlockDriver *drv = NULL, *d;
330
331 QLIST_FOREACH(d, &bdrv_drivers, list) {
332 if (d->bdrv_probe_device) {
333 score = d->bdrv_probe_device(filename);
334 if (score > score_max) {
335 score_max = score;
336 drv = d;
337 }
338 }
339 }
340
341 return drv;
342}
343
b50cbabc 344BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
345{
346 BlockDriver *drv1;
347 char protocol[128];
1cec71e3 348 int len;
83f64091 349 const char *p;
19cb3738 350
66f82cee
KW
351 /* TODO Drivers without bdrv_file_open must be specified explicitly */
352
39508e7a
CH
353 /*
354 * XXX(hch): we really should not let host device detection
355 * override an explicit protocol specification, but moving this
356 * later breaks access to device names with colons in them.
357 * Thanks to the brain-dead persistent naming schemes on udev-
358 * based Linux systems those actually are quite common.
359 */
360 drv1 = find_hdev_driver(filename);
361 if (drv1) {
362 return drv1;
363 }
364
9e0b22f4 365 if (!path_has_protocol(filename)) {
39508e7a 366 return bdrv_find_format("file");
84a12e66 367 }
9e0b22f4
SH
368 p = strchr(filename, ':');
369 assert(p != NULL);
1cec71e3
AL
370 len = p - filename;
371 if (len > sizeof(protocol) - 1)
372 len = sizeof(protocol) - 1;
373 memcpy(protocol, filename, len);
374 protocol[len] = '\0';
8a22f02a 375 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 376 if (drv1->protocol_name &&
8a22f02a 377 !strcmp(drv1->protocol_name, protocol)) {
83f64091 378 return drv1;
8a22f02a 379 }
83f64091
FB
380 }
381 return NULL;
382}
383
c98ac35d 384static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
385{
386 int ret, score, score_max;
387 BlockDriver *drv1, *drv;
388 uint8_t buf[2048];
389 BlockDriverState *bs;
390
f5edb014 391 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
392 if (ret < 0) {
393 *pdrv = NULL;
394 return ret;
395 }
f8ea0b00 396
08a00559
KW
397 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
398 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 399 bdrv_delete(bs);
c98ac35d
SW
400 drv = bdrv_find_format("raw");
401 if (!drv) {
402 ret = -ENOENT;
403 }
404 *pdrv = drv;
405 return ret;
1a396859 406 }
f8ea0b00 407
83f64091
FB
408 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
409 bdrv_delete(bs);
410 if (ret < 0) {
c98ac35d
SW
411 *pdrv = NULL;
412 return ret;
83f64091
FB
413 }
414
ea2384d3 415 score_max = 0;
84a12e66 416 drv = NULL;
8a22f02a 417 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
418 if (drv1->bdrv_probe) {
419 score = drv1->bdrv_probe(buf, ret, filename);
420 if (score > score_max) {
421 score_max = score;
422 drv = drv1;
423 }
0849bf08 424 }
fc01f7e7 425 }
c98ac35d
SW
426 if (!drv) {
427 ret = -ENOENT;
428 }
429 *pdrv = drv;
430 return ret;
ea2384d3
FB
431}
432
51762288
SH
433/**
434 * Set the current 'total_sectors' value
435 */
436static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
437{
438 BlockDriver *drv = bs->drv;
439
396759ad
NB
440 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
441 if (bs->sg)
442 return 0;
443
51762288
SH
444 /* query actual device if possible, otherwise just trust the hint */
445 if (drv->bdrv_getlength) {
446 int64_t length = drv->bdrv_getlength(bs);
447 if (length < 0) {
448 return length;
449 }
450 hint = length >> BDRV_SECTOR_BITS;
451 }
452
453 bs->total_sectors = hint;
454 return 0;
455}
456
c3993cdc
SH
457/**
458 * Set open flags for a given cache mode
459 *
460 * Return 0 on success, -1 if the cache mode was invalid.
461 */
462int bdrv_parse_cache_flags(const char *mode, int *flags)
463{
464 *flags &= ~BDRV_O_CACHE_MASK;
465
466 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
467 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
468 } else if (!strcmp(mode, "directsync")) {
469 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
470 } else if (!strcmp(mode, "writeback")) {
471 *flags |= BDRV_O_CACHE_WB;
472 } else if (!strcmp(mode, "unsafe")) {
473 *flags |= BDRV_O_CACHE_WB;
474 *flags |= BDRV_O_NO_FLUSH;
475 } else if (!strcmp(mode, "writethrough")) {
476 /* this is the default */
477 } else {
478 return -1;
479 }
480
481 return 0;
482}
483
57915332
KW
484/*
485 * Common part for opening disk images and files
486 */
487static int bdrv_open_common(BlockDriverState *bs, const char *filename,
488 int flags, BlockDriver *drv)
489{
490 int ret, open_flags;
491
492 assert(drv != NULL);
493
28dcee10
SH
494 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
495
66f82cee 496 bs->file = NULL;
51762288 497 bs->total_sectors = 0;
57915332
KW
498 bs->encrypted = 0;
499 bs->valid_key = 0;
500 bs->open_flags = flags;
57915332
KW
501 bs->buffer_alignment = 512;
502
503 pstrcpy(bs->filename, sizeof(bs->filename), filename);
504
505 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
506 return -ENOTSUP;
507 }
508
509 bs->drv = drv;
7267c094 510 bs->opaque = g_malloc0(drv->instance_size);
57915332 511
a6599793 512 if (flags & BDRV_O_CACHE_WB)
57915332
KW
513 bs->enable_write_cache = 1;
514
515 /*
516 * Clear flags that are internal to the block layer before opening the
517 * image.
518 */
519 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
520
521 /*
ebabb67a 522 * Snapshots should be writable.
57915332
KW
523 */
524 if (bs->is_temporary) {
525 open_flags |= BDRV_O_RDWR;
526 }
527
66f82cee
KW
528 /* Open the image, either directly or using a protocol */
529 if (drv->bdrv_file_open) {
530 ret = drv->bdrv_file_open(bs, filename, open_flags);
531 } else {
532 ret = bdrv_file_open(&bs->file, filename, open_flags);
533 if (ret >= 0) {
534 ret = drv->bdrv_open(bs, open_flags);
535 }
536 }
537
57915332
KW
538 if (ret < 0) {
539 goto free_and_fail;
540 }
541
542 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
51762288
SH
543
544 ret = refresh_total_sectors(bs, bs->total_sectors);
545 if (ret < 0) {
546 goto free_and_fail;
57915332 547 }
51762288 548
57915332
KW
549#ifndef _WIN32
550 if (bs->is_temporary) {
551 unlink(filename);
552 }
553#endif
554 return 0;
555
556free_and_fail:
66f82cee
KW
557 if (bs->file) {
558 bdrv_delete(bs->file);
559 bs->file = NULL;
560 }
7267c094 561 g_free(bs->opaque);
57915332
KW
562 bs->opaque = NULL;
563 bs->drv = NULL;
564 return ret;
565}
566
b6ce07aa
KW
567/*
568 * Opens a file using a protocol (file, host_device, nbd, ...)
569 */
83f64091 570int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 571{
83f64091 572 BlockDriverState *bs;
6db95603 573 BlockDriver *drv;
83f64091
FB
574 int ret;
575
b50cbabc 576 drv = bdrv_find_protocol(filename);
6db95603
CH
577 if (!drv) {
578 return -ENOENT;
579 }
580
83f64091 581 bs = bdrv_new("");
b6ce07aa 582 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
583 if (ret < 0) {
584 bdrv_delete(bs);
585 return ret;
3b0d4f61 586 }
71d0770c 587 bs->growable = 1;
83f64091
FB
588 *pbs = bs;
589 return 0;
590}
591
b6ce07aa
KW
592/*
593 * Opens a disk image (raw, qcow2, vmdk, ...)
594 */
d6e9098e
KW
595int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
596 BlockDriver *drv)
ea2384d3 597{
b6ce07aa 598 int ret;
712e7874 599
83f64091 600 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
601 BlockDriverState *bs1;
602 int64_t total_size;
7c96d46e 603 int is_protocol = 0;
91a073a9
KW
604 BlockDriver *bdrv_qcow2;
605 QEMUOptionParameter *options;
b6ce07aa
KW
606 char tmp_filename[PATH_MAX];
607 char backing_filename[PATH_MAX];
3b46e624 608
ea2384d3
FB
609 /* if snapshot, we create a temporary backing file and open it
610 instead of opening 'filename' directly */
33e3963e 611
ea2384d3
FB
612 /* if there is a backing file, use it */
613 bs1 = bdrv_new("");
d6e9098e 614 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 615 if (ret < 0) {
ea2384d3 616 bdrv_delete(bs1);
51d7c00c 617 return ret;
ea2384d3 618 }
3e82990b 619 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
620
621 if (bs1->drv && bs1->drv->protocol_name)
622 is_protocol = 1;
623
ea2384d3 624 bdrv_delete(bs1);
3b46e624 625
ea2384d3 626 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
7c96d46e
AL
627
628 /* Real path is meaningless for protocols */
629 if (is_protocol)
630 snprintf(backing_filename, sizeof(backing_filename),
631 "%s", filename);
114cdfa9
KS
632 else if (!realpath(filename, backing_filename))
633 return -errno;
7c96d46e 634
91a073a9
KW
635 bdrv_qcow2 = bdrv_find_format("qcow2");
636 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
637
3e82990b 638 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
639 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
640 if (drv) {
641 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
642 drv->format_name);
643 }
644
645 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 646 free_option_parameters(options);
51d7c00c
AL
647 if (ret < 0) {
648 return ret;
ea2384d3 649 }
91a073a9 650
ea2384d3 651 filename = tmp_filename;
91a073a9 652 drv = bdrv_qcow2;
ea2384d3
FB
653 bs->is_temporary = 1;
654 }
712e7874 655
b6ce07aa 656 /* Find the right image format driver */
6db95603 657 if (!drv) {
c98ac35d 658 ret = find_image_format(filename, &drv);
51d7c00c 659 }
6987307c 660
51d7c00c 661 if (!drv) {
51d7c00c 662 goto unlink_and_fail;
ea2384d3 663 }
b6ce07aa
KW
664
665 /* Open the image */
666 ret = bdrv_open_common(bs, filename, flags, drv);
667 if (ret < 0) {
6987307c
CH
668 goto unlink_and_fail;
669 }
670
b6ce07aa
KW
671 /* If there is a backing file, use it */
672 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
673 char backing_filename[PATH_MAX];
674 int back_flags;
675 BlockDriver *back_drv = NULL;
676
677 bs->backing_hd = bdrv_new("");
df2dbb4a
SH
678
679 if (path_has_protocol(bs->backing_file)) {
680 pstrcpy(backing_filename, sizeof(backing_filename),
681 bs->backing_file);
682 } else {
683 path_combine(backing_filename, sizeof(backing_filename),
684 filename, bs->backing_file);
685 }
686
687 if (bs->backing_format[0] != '\0') {
b6ce07aa 688 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 689 }
b6ce07aa
KW
690
691 /* backing files always opened read-only */
692 back_flags =
693 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
694
695 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
696 if (ret < 0) {
697 bdrv_close(bs);
698 return ret;
699 }
700 if (bs->is_temporary) {
701 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
702 } else {
703 /* base image inherits from "parent" */
704 bs->backing_hd->keep_read_only = bs->keep_read_only;
705 }
706 }
707
708 if (!bdrv_key_required(bs)) {
7d4b4ba5 709 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
710 }
711
712 return 0;
713
714unlink_and_fail:
715 if (bs->is_temporary) {
716 unlink(filename);
717 }
718 return ret;
719}
720
fc01f7e7
FB
721void bdrv_close(BlockDriverState *bs)
722{
19cb3738 723 if (bs->drv) {
f9092b10
MA
724 if (bs == bs_snapshots) {
725 bs_snapshots = NULL;
726 }
557df6ac 727 if (bs->backing_hd) {
ea2384d3 728 bdrv_delete(bs->backing_hd);
557df6ac
SH
729 bs->backing_hd = NULL;
730 }
ea2384d3 731 bs->drv->bdrv_close(bs);
7267c094 732 g_free(bs->opaque);
ea2384d3
FB
733#ifdef _WIN32
734 if (bs->is_temporary) {
735 unlink(bs->filename);
736 }
67b915a5 737#endif
ea2384d3
FB
738 bs->opaque = NULL;
739 bs->drv = NULL;
b338082b 740
66f82cee
KW
741 if (bs->file != NULL) {
742 bdrv_close(bs->file);
743 }
744
7d4b4ba5 745 bdrv_dev_change_media_cb(bs, false);
b338082b
FB
746 }
747}
748
2bc93fed
MK
749void bdrv_close_all(void)
750{
751 BlockDriverState *bs;
752
753 QTAILQ_FOREACH(bs, &bdrv_states, list) {
754 bdrv_close(bs);
755 }
756}
757
d22b2f41
RH
758/* make a BlockDriverState anonymous by removing from bdrv_state list.
759 Also, NULL terminate the device_name to prevent double remove */
760void bdrv_make_anon(BlockDriverState *bs)
761{
762 if (bs->device_name[0] != '\0') {
763 QTAILQ_REMOVE(&bdrv_states, bs, list);
764 }
765 bs->device_name[0] = '\0';
766}
767
b338082b
FB
768void bdrv_delete(BlockDriverState *bs)
769{
fa879d62 770 assert(!bs->dev);
18846dee 771
1b7bdbc1 772 /* remove from list, if necessary */
d22b2f41 773 bdrv_make_anon(bs);
34c6f050 774
b338082b 775 bdrv_close(bs);
66f82cee
KW
776 if (bs->file != NULL) {
777 bdrv_delete(bs->file);
778 }
779
f9092b10 780 assert(bs != bs_snapshots);
7267c094 781 g_free(bs);
fc01f7e7
FB
782}
783
fa879d62
MA
784int bdrv_attach_dev(BlockDriverState *bs, void *dev)
785/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 786{
fa879d62 787 if (bs->dev) {
18846dee
MA
788 return -EBUSY;
789 }
fa879d62 790 bs->dev = dev;
28a7282a 791 bdrv_iostatus_reset(bs);
18846dee
MA
792 return 0;
793}
794
fa879d62
MA
795/* TODO qdevified devices don't use this, remove when devices are qdevified */
796void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 797{
fa879d62
MA
798 if (bdrv_attach_dev(bs, dev) < 0) {
799 abort();
800 }
801}
802
803void bdrv_detach_dev(BlockDriverState *bs, void *dev)
804/* TODO change to DeviceState *dev when all users are qdevified */
805{
806 assert(bs->dev == dev);
807 bs->dev = NULL;
0e49de52
MA
808 bs->dev_ops = NULL;
809 bs->dev_opaque = NULL;
29e05f20 810 bs->buffer_alignment = 512;
18846dee
MA
811}
812
fa879d62
MA
813/* TODO change to return DeviceState * when all users are qdevified */
814void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 815{
fa879d62 816 return bs->dev;
18846dee
MA
817}
818
0e49de52
MA
819void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
820 void *opaque)
821{
822 bs->dev_ops = ops;
823 bs->dev_opaque = opaque;
2c6942fa
MA
824 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
825 bs_snapshots = NULL;
826 }
0e49de52
MA
827}
828
7d4b4ba5 829static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 830{
145feb17 831 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
7d4b4ba5 832 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
145feb17
MA
833 }
834}
835
2c6942fa
MA
836bool bdrv_dev_has_removable_media(BlockDriverState *bs)
837{
838 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
839}
840
e4def80b
MA
841bool bdrv_dev_is_tray_open(BlockDriverState *bs)
842{
843 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
844 return bs->dev_ops->is_tray_open(bs->dev_opaque);
845 }
846 return false;
847}
848
145feb17
MA
849static void bdrv_dev_resize_cb(BlockDriverState *bs)
850{
851 if (bs->dev_ops && bs->dev_ops->resize_cb) {
852 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
853 }
854}
855
f107639a
MA
856bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
857{
858 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
859 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
860 }
861 return false;
862}
863
e97fc193
AL
864/*
865 * Run consistency checks on an image
866 *
e076f338 867 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 868 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 869 * check are stored in res.
e97fc193 870 */
e076f338 871int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
e97fc193
AL
872{
873 if (bs->drv->bdrv_check == NULL) {
874 return -ENOTSUP;
875 }
876
e076f338 877 memset(res, 0, sizeof(*res));
9ac228e0 878 return bs->drv->bdrv_check(bs, res);
e97fc193
AL
879}
880
8a426614
KW
881#define COMMIT_BUF_SECTORS 2048
882
33e3963e
FB
883/* commit COW file into the raw image */
884int bdrv_commit(BlockDriverState *bs)
885{
19cb3738 886 BlockDriver *drv = bs->drv;
ee181196 887 BlockDriver *backing_drv;
8a426614
KW
888 int64_t sector, total_sectors;
889 int n, ro, open_flags;
4dca4b63 890 int ret = 0, rw_ret = 0;
8a426614 891 uint8_t *buf;
4dca4b63
NS
892 char filename[1024];
893 BlockDriverState *bs_rw, *bs_ro;
33e3963e 894
19cb3738
FB
895 if (!drv)
896 return -ENOMEDIUM;
4dca4b63
NS
897
898 if (!bs->backing_hd) {
899 return -ENOTSUP;
33e3963e
FB
900 }
901
4dca4b63
NS
902 if (bs->backing_hd->keep_read_only) {
903 return -EACCES;
904 }
ee181196
KW
905
906 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
907 ro = bs->backing_hd->read_only;
908 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
909 open_flags = bs->backing_hd->open_flags;
910
911 if (ro) {
912 /* re-open as RW */
913 bdrv_delete(bs->backing_hd);
914 bs->backing_hd = NULL;
915 bs_rw = bdrv_new("");
ee181196
KW
916 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
917 backing_drv);
4dca4b63
NS
918 if (rw_ret < 0) {
919 bdrv_delete(bs_rw);
920 /* try to re-open read-only */
921 bs_ro = bdrv_new("");
ee181196
KW
922 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
923 backing_drv);
4dca4b63
NS
924 if (ret < 0) {
925 bdrv_delete(bs_ro);
926 /* drive not functional anymore */
927 bs->drv = NULL;
928 return ret;
929 }
930 bs->backing_hd = bs_ro;
931 return rw_ret;
932 }
933 bs->backing_hd = bs_rw;
ea2384d3 934 }
33e3963e 935
6ea44308 936 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 937 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
938
939 for (sector = 0; sector < total_sectors; sector += n) {
940 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
941
942 if (bdrv_read(bs, sector, buf, n) != 0) {
943 ret = -EIO;
944 goto ro_cleanup;
945 }
946
947 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
948 ret = -EIO;
949 goto ro_cleanup;
950 }
ea2384d3 951 }
33e3963e 952 }
95389c86 953
1d44952f
CH
954 if (drv->bdrv_make_empty) {
955 ret = drv->bdrv_make_empty(bs);
956 bdrv_flush(bs);
957 }
95389c86 958
3f5075ae
CH
959 /*
960 * Make sure all data we wrote to the backing device is actually
961 * stable on disk.
962 */
963 if (bs->backing_hd)
964 bdrv_flush(bs->backing_hd);
4dca4b63
NS
965
966ro_cleanup:
7267c094 967 g_free(buf);
4dca4b63
NS
968
969 if (ro) {
970 /* re-open as RO */
971 bdrv_delete(bs->backing_hd);
972 bs->backing_hd = NULL;
973 bs_ro = bdrv_new("");
ee181196
KW
974 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
975 backing_drv);
4dca4b63
NS
976 if (ret < 0) {
977 bdrv_delete(bs_ro);
978 /* drive not functional anymore */
979 bs->drv = NULL;
980 return ret;
981 }
982 bs->backing_hd = bs_ro;
983 bs->backing_hd->keep_read_only = 0;
984 }
985
1d44952f 986 return ret;
33e3963e
FB
987}
988
6ab4b5ab
MA
989void bdrv_commit_all(void)
990{
991 BlockDriverState *bs;
992
993 QTAILQ_FOREACH(bs, &bdrv_states, list) {
994 bdrv_commit(bs);
995 }
996}
997
756e6736
KW
998/*
999 * Return values:
1000 * 0 - success
1001 * -EINVAL - backing format specified, but no file
1002 * -ENOSPC - can't update the backing file because no space is left in the
1003 * image file header
1004 * -ENOTSUP - format driver doesn't support changing the backing file
1005 */
1006int bdrv_change_backing_file(BlockDriverState *bs,
1007 const char *backing_file, const char *backing_fmt)
1008{
1009 BlockDriver *drv = bs->drv;
1010
1011 if (drv->bdrv_change_backing_file != NULL) {
1012 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1013 } else {
1014 return -ENOTSUP;
1015 }
1016}
1017
71d0770c
AL
1018static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1019 size_t size)
1020{
1021 int64_t len;
1022
1023 if (!bdrv_is_inserted(bs))
1024 return -ENOMEDIUM;
1025
1026 if (bs->growable)
1027 return 0;
1028
1029 len = bdrv_getlength(bs);
1030
fbb7b4e0
KW
1031 if (offset < 0)
1032 return -EIO;
1033
1034 if ((offset > len) || (len - offset < size))
71d0770c
AL
1035 return -EIO;
1036
1037 return 0;
1038}
1039
1040static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1041 int nb_sectors)
1042{
eb5a3165
JS
1043 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1044 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1045}
1046
e7a8a783
KW
1047static inline bool bdrv_has_async_rw(BlockDriver *drv)
1048{
1049 return drv->bdrv_co_readv != bdrv_co_readv_em
1050 || drv->bdrv_aio_readv != bdrv_aio_readv_em;
1051}
1052
1053static inline bool bdrv_has_async_flush(BlockDriver *drv)
1054{
1055 return drv->bdrv_aio_flush != bdrv_aio_flush_em;
1056}
1057
1c9805a3
SH
1058typedef struct RwCo {
1059 BlockDriverState *bs;
1060 int64_t sector_num;
1061 int nb_sectors;
1062 QEMUIOVector *qiov;
1063 bool is_write;
1064 int ret;
1065} RwCo;
1066
1067static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 1068{
1c9805a3 1069 RwCo *rwco = opaque;
ea2384d3 1070
1c9805a3
SH
1071 if (!rwco->is_write) {
1072 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1073 rwco->nb_sectors, rwco->qiov);
1074 } else {
1075 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1076 rwco->nb_sectors, rwco->qiov);
1077 }
1078}
e7a8a783 1079
1c9805a3
SH
1080/*
1081 * Process a synchronous request using coroutines
1082 */
1083static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1084 int nb_sectors, bool is_write)
1085{
1086 QEMUIOVector qiov;
1087 struct iovec iov = {
1088 .iov_base = (void *)buf,
1089 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1090 };
1091 Coroutine *co;
1092 RwCo rwco = {
1093 .bs = bs,
1094 .sector_num = sector_num,
1095 .nb_sectors = nb_sectors,
1096 .qiov = &qiov,
1097 .is_write = is_write,
1098 .ret = NOT_DONE,
1099 };
e7a8a783 1100
1c9805a3 1101 qemu_iovec_init_external(&qiov, &iov, 1);
e7a8a783 1102
1c9805a3
SH
1103 if (qemu_in_coroutine()) {
1104 /* Fast-path if already in coroutine context */
1105 bdrv_rw_co_entry(&rwco);
1106 } else {
1107 co = qemu_coroutine_create(bdrv_rw_co_entry);
1108 qemu_coroutine_enter(co, &rwco);
1109 while (rwco.ret == NOT_DONE) {
1110 qemu_aio_wait();
1111 }
1112 }
1113 return rwco.ret;
1114}
b338082b 1115
1c9805a3
SH
1116/* return < 0 if error. See bdrv_write() for the return codes */
1117int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1118 uint8_t *buf, int nb_sectors)
1119{
1120 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
fc01f7e7
FB
1121}
1122
7cd1e32a 1123static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1124 int nb_sectors, int dirty)
7cd1e32a
LS
1125{
1126 int64_t start, end;
c6d22830 1127 unsigned long val, idx, bit;
a55eb92c 1128
6ea44308 1129 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1130 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1131
1132 for (; start <= end; start++) {
c6d22830
JK
1133 idx = start / (sizeof(unsigned long) * 8);
1134 bit = start % (sizeof(unsigned long) * 8);
1135 val = bs->dirty_bitmap[idx];
1136 if (dirty) {
6d59fec1 1137 if (!(val & (1UL << bit))) {
aaa0eb75 1138 bs->dirty_count++;
6d59fec1 1139 val |= 1UL << bit;
aaa0eb75 1140 }
c6d22830 1141 } else {
6d59fec1 1142 if (val & (1UL << bit)) {
aaa0eb75 1143 bs->dirty_count--;
6d59fec1 1144 val &= ~(1UL << bit);
aaa0eb75 1145 }
c6d22830
JK
1146 }
1147 bs->dirty_bitmap[idx] = val;
7cd1e32a
LS
1148 }
1149}
1150
5fafdf24 1151/* Return < 0 if error. Important errors are:
19cb3738
FB
1152 -EIO generic I/O error (may happen for all errors)
1153 -ENOMEDIUM No media inserted.
1154 -EINVAL Invalid sector number or nb_sectors
1155 -EACCES Trying to write a read-only device
1156*/
5fafdf24 1157int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1158 const uint8_t *buf, int nb_sectors)
1159{
1c9805a3 1160 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
83f64091
FB
1161}
1162
eda578e5
AL
1163int bdrv_pread(BlockDriverState *bs, int64_t offset,
1164 void *buf, int count1)
83f64091 1165{
6ea44308 1166 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1167 int len, nb_sectors, count;
1168 int64_t sector_num;
9a8c4cce 1169 int ret;
83f64091
FB
1170
1171 count = count1;
1172 /* first read to align to sector start */
6ea44308 1173 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1174 if (len > count)
1175 len = count;
6ea44308 1176 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1177 if (len > 0) {
9a8c4cce
KW
1178 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1179 return ret;
6ea44308 1180 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1181 count -= len;
1182 if (count == 0)
1183 return count1;
1184 sector_num++;
1185 buf += len;
1186 }
1187
1188 /* read the sectors "in place" */
6ea44308 1189 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1190 if (nb_sectors > 0) {
9a8c4cce
KW
1191 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1192 return ret;
83f64091 1193 sector_num += nb_sectors;
6ea44308 1194 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1195 buf += len;
1196 count -= len;
1197 }
1198
1199 /* add data from the last sector */
1200 if (count > 0) {
9a8c4cce
KW
1201 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1202 return ret;
83f64091
FB
1203 memcpy(buf, tmp_buf, count);
1204 }
1205 return count1;
1206}
1207
eda578e5
AL
1208int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1209 const void *buf, int count1)
83f64091 1210{
6ea44308 1211 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1212 int len, nb_sectors, count;
1213 int64_t sector_num;
9a8c4cce 1214 int ret;
83f64091
FB
1215
1216 count = count1;
1217 /* first write to align to sector start */
6ea44308 1218 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1219 if (len > count)
1220 len = count;
6ea44308 1221 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1222 if (len > 0) {
9a8c4cce
KW
1223 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1224 return ret;
6ea44308 1225 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1226 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1227 return ret;
83f64091
FB
1228 count -= len;
1229 if (count == 0)
1230 return count1;
1231 sector_num++;
1232 buf += len;
1233 }
1234
1235 /* write the sectors "in place" */
6ea44308 1236 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1237 if (nb_sectors > 0) {
9a8c4cce
KW
1238 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1239 return ret;
83f64091 1240 sector_num += nb_sectors;
6ea44308 1241 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1242 buf += len;
1243 count -= len;
1244 }
1245
1246 /* add data from the last sector */
1247 if (count > 0) {
9a8c4cce
KW
1248 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1249 return ret;
83f64091 1250 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1251 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1252 return ret;
83f64091
FB
1253 }
1254 return count1;
1255}
83f64091 1256
f08145fe
KW
1257/*
1258 * Writes to the file and ensures that no writes are reordered across this
1259 * request (acts as a barrier)
1260 *
1261 * Returns 0 on success, -errno in error cases.
1262 */
1263int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1264 const void *buf, int count)
1265{
1266 int ret;
1267
1268 ret = bdrv_pwrite(bs, offset, buf, count);
1269 if (ret < 0) {
1270 return ret;
1271 }
1272
92196b2f
SH
1273 /* No flush needed for cache modes that use O_DSYNC */
1274 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
f08145fe
KW
1275 bdrv_flush(bs);
1276 }
1277
1278 return 0;
1279}
1280
c5fbe571
SH
1281/*
1282 * Handle a read request in coroutine context
1283 */
1284static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1285 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
da1fa91d
KW
1286{
1287 BlockDriver *drv = bs->drv;
1288
da1fa91d
KW
1289 if (!drv) {
1290 return -ENOMEDIUM;
1291 }
1292 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1293 return -EIO;
1294 }
1295
1296 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1297}
1298
c5fbe571 1299int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
1300 int nb_sectors, QEMUIOVector *qiov)
1301{
c5fbe571 1302 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 1303
c5fbe571
SH
1304 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1305}
1306
1307/*
1308 * Handle a write request in coroutine context
1309 */
1310static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1311 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1312{
1313 BlockDriver *drv = bs->drv;
6b7cb247 1314 int ret;
da1fa91d
KW
1315
1316 if (!bs->drv) {
1317 return -ENOMEDIUM;
1318 }
1319 if (bs->read_only) {
1320 return -EACCES;
1321 }
1322 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1323 return -EIO;
1324 }
1325
6b7cb247
SH
1326 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1327
da1fa91d
KW
1328 if (bs->dirty_bitmap) {
1329 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1330 }
1331
1332 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1333 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1334 }
1335
6b7cb247 1336 return ret;
da1fa91d
KW
1337}
1338
c5fbe571
SH
1339int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1340 int nb_sectors, QEMUIOVector *qiov)
1341{
1342 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1343
1344 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1345}
1346
83f64091
FB
1347/**
1348 * Truncate file to 'offset' bytes (needed only for file protocols)
1349 */
1350int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1351{
1352 BlockDriver *drv = bs->drv;
51762288 1353 int ret;
83f64091 1354 if (!drv)
19cb3738 1355 return -ENOMEDIUM;
83f64091
FB
1356 if (!drv->bdrv_truncate)
1357 return -ENOTSUP;
59f2689d
NS
1358 if (bs->read_only)
1359 return -EACCES;
8591675f
MT
1360 if (bdrv_in_use(bs))
1361 return -EBUSY;
51762288
SH
1362 ret = drv->bdrv_truncate(bs, offset);
1363 if (ret == 0) {
1364 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 1365 bdrv_dev_resize_cb(bs);
51762288
SH
1366 }
1367 return ret;
83f64091
FB
1368}
1369
4a1d5e1f
FZ
1370/**
1371 * Length of a allocated file in bytes. Sparse files are counted by actual
1372 * allocated space. Return < 0 if error or unknown.
1373 */
1374int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1375{
1376 BlockDriver *drv = bs->drv;
1377 if (!drv) {
1378 return -ENOMEDIUM;
1379 }
1380 if (drv->bdrv_get_allocated_file_size) {
1381 return drv->bdrv_get_allocated_file_size(bs);
1382 }
1383 if (bs->file) {
1384 return bdrv_get_allocated_file_size(bs->file);
1385 }
1386 return -ENOTSUP;
1387}
1388
83f64091
FB
1389/**
1390 * Length of a file in bytes. Return < 0 if error or unknown.
1391 */
1392int64_t bdrv_getlength(BlockDriverState *bs)
1393{
1394 BlockDriver *drv = bs->drv;
1395 if (!drv)
19cb3738 1396 return -ENOMEDIUM;
51762288 1397
2c6942fa 1398 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
1399 if (drv->bdrv_getlength) {
1400 return drv->bdrv_getlength(bs);
1401 }
83f64091 1402 }
46a4e4e6 1403 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
1404}
1405
19cb3738 1406/* return 0 as number of sectors if no device present or error */
96b8f136 1407void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 1408{
19cb3738
FB
1409 int64_t length;
1410 length = bdrv_getlength(bs);
1411 if (length < 0)
1412 length = 0;
1413 else
6ea44308 1414 length = length >> BDRV_SECTOR_BITS;
19cb3738 1415 *nb_sectors_ptr = length;
fc01f7e7 1416}
cf98951b 1417
f3d54fc4
AL
1418struct partition {
1419 uint8_t boot_ind; /* 0x80 - active */
1420 uint8_t head; /* starting head */
1421 uint8_t sector; /* starting sector */
1422 uint8_t cyl; /* starting cylinder */
1423 uint8_t sys_ind; /* What partition type */
1424 uint8_t end_head; /* end head */
1425 uint8_t end_sector; /* end sector */
1426 uint8_t end_cyl; /* end cylinder */
1427 uint32_t start_sect; /* starting sector counting from 0 */
1428 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 1429} QEMU_PACKED;
f3d54fc4
AL
1430
1431/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1432static int guess_disk_lchs(BlockDriverState *bs,
1433 int *pcylinders, int *pheads, int *psectors)
1434{
eb5a3165 1435 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
1436 int ret, i, heads, sectors, cylinders;
1437 struct partition *p;
1438 uint32_t nr_sects;
a38131b6 1439 uint64_t nb_sectors;
f3d54fc4
AL
1440
1441 bdrv_get_geometry(bs, &nb_sectors);
1442
1443 ret = bdrv_read(bs, 0, buf, 1);
1444 if (ret < 0)
1445 return -1;
1446 /* test msdos magic */
1447 if (buf[510] != 0x55 || buf[511] != 0xaa)
1448 return -1;
1449 for(i = 0; i < 4; i++) {
1450 p = ((struct partition *)(buf + 0x1be)) + i;
1451 nr_sects = le32_to_cpu(p->nr_sects);
1452 if (nr_sects && p->end_head) {
1453 /* We make the assumption that the partition terminates on
1454 a cylinder boundary */
1455 heads = p->end_head + 1;
1456 sectors = p->end_sector & 63;
1457 if (sectors == 0)
1458 continue;
1459 cylinders = nb_sectors / (heads * sectors);
1460 if (cylinders < 1 || cylinders > 16383)
1461 continue;
1462 *pheads = heads;
1463 *psectors = sectors;
1464 *pcylinders = cylinders;
1465#if 0
1466 printf("guessed geometry: LCHS=%d %d %d\n",
1467 cylinders, heads, sectors);
1468#endif
1469 return 0;
1470 }
1471 }
1472 return -1;
1473}
1474
1475void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1476{
1477 int translation, lba_detected = 0;
1478 int cylinders, heads, secs;
a38131b6 1479 uint64_t nb_sectors;
f3d54fc4
AL
1480
1481 /* if a geometry hint is available, use it */
1482 bdrv_get_geometry(bs, &nb_sectors);
1483 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1484 translation = bdrv_get_translation_hint(bs);
1485 if (cylinders != 0) {
1486 *pcyls = cylinders;
1487 *pheads = heads;
1488 *psecs = secs;
1489 } else {
1490 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1491 if (heads > 16) {
1492 /* if heads > 16, it means that a BIOS LBA
1493 translation was active, so the default
1494 hardware geometry is OK */
1495 lba_detected = 1;
1496 goto default_geometry;
1497 } else {
1498 *pcyls = cylinders;
1499 *pheads = heads;
1500 *psecs = secs;
1501 /* disable any translation to be in sync with
1502 the logical geometry */
1503 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1504 bdrv_set_translation_hint(bs,
1505 BIOS_ATA_TRANSLATION_NONE);
1506 }
1507 }
1508 } else {
1509 default_geometry:
1510 /* if no geometry, use a standard physical disk geometry */
1511 cylinders = nb_sectors / (16 * 63);
1512
1513 if (cylinders > 16383)
1514 cylinders = 16383;
1515 else if (cylinders < 2)
1516 cylinders = 2;
1517 *pcyls = cylinders;
1518 *pheads = 16;
1519 *psecs = 63;
1520 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1521 if ((*pcyls * *pheads) <= 131072) {
1522 bdrv_set_translation_hint(bs,
1523 BIOS_ATA_TRANSLATION_LARGE);
1524 } else {
1525 bdrv_set_translation_hint(bs,
1526 BIOS_ATA_TRANSLATION_LBA);
1527 }
1528 }
1529 }
1530 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1531 }
1532}
1533
5fafdf24 1534void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
1535 int cyls, int heads, int secs)
1536{
1537 bs->cyls = cyls;
1538 bs->heads = heads;
1539 bs->secs = secs;
1540}
1541
46d4767d
FB
1542void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1543{
1544 bs->translation = translation;
1545}
1546
5fafdf24 1547void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
1548 int *pcyls, int *pheads, int *psecs)
1549{
1550 *pcyls = bs->cyls;
1551 *pheads = bs->heads;
1552 *psecs = bs->secs;
1553}
1554
5bbdbb46
BS
1555/* Recognize floppy formats */
1556typedef struct FDFormat {
1557 FDriveType drive;
1558 uint8_t last_sect;
1559 uint8_t max_track;
1560 uint8_t max_head;
1561} FDFormat;
1562
1563static const FDFormat fd_formats[] = {
1564 /* First entry is default format */
1565 /* 1.44 MB 3"1/2 floppy disks */
1566 { FDRIVE_DRV_144, 18, 80, 1, },
1567 { FDRIVE_DRV_144, 20, 80, 1, },
1568 { FDRIVE_DRV_144, 21, 80, 1, },
1569 { FDRIVE_DRV_144, 21, 82, 1, },
1570 { FDRIVE_DRV_144, 21, 83, 1, },
1571 { FDRIVE_DRV_144, 22, 80, 1, },
1572 { FDRIVE_DRV_144, 23, 80, 1, },
1573 { FDRIVE_DRV_144, 24, 80, 1, },
1574 /* 2.88 MB 3"1/2 floppy disks */
1575 { FDRIVE_DRV_288, 36, 80, 1, },
1576 { FDRIVE_DRV_288, 39, 80, 1, },
1577 { FDRIVE_DRV_288, 40, 80, 1, },
1578 { FDRIVE_DRV_288, 44, 80, 1, },
1579 { FDRIVE_DRV_288, 48, 80, 1, },
1580 /* 720 kB 3"1/2 floppy disks */
1581 { FDRIVE_DRV_144, 9, 80, 1, },
1582 { FDRIVE_DRV_144, 10, 80, 1, },
1583 { FDRIVE_DRV_144, 10, 82, 1, },
1584 { FDRIVE_DRV_144, 10, 83, 1, },
1585 { FDRIVE_DRV_144, 13, 80, 1, },
1586 { FDRIVE_DRV_144, 14, 80, 1, },
1587 /* 1.2 MB 5"1/4 floppy disks */
1588 { FDRIVE_DRV_120, 15, 80, 1, },
1589 { FDRIVE_DRV_120, 18, 80, 1, },
1590 { FDRIVE_DRV_120, 18, 82, 1, },
1591 { FDRIVE_DRV_120, 18, 83, 1, },
1592 { FDRIVE_DRV_120, 20, 80, 1, },
1593 /* 720 kB 5"1/4 floppy disks */
1594 { FDRIVE_DRV_120, 9, 80, 1, },
1595 { FDRIVE_DRV_120, 11, 80, 1, },
1596 /* 360 kB 5"1/4 floppy disks */
1597 { FDRIVE_DRV_120, 9, 40, 1, },
1598 { FDRIVE_DRV_120, 9, 40, 0, },
1599 { FDRIVE_DRV_120, 10, 41, 1, },
1600 { FDRIVE_DRV_120, 10, 42, 1, },
1601 /* 320 kB 5"1/4 floppy disks */
1602 { FDRIVE_DRV_120, 8, 40, 1, },
1603 { FDRIVE_DRV_120, 8, 40, 0, },
1604 /* 360 kB must match 5"1/4 better than 3"1/2... */
1605 { FDRIVE_DRV_144, 9, 80, 0, },
1606 /* end */
1607 { FDRIVE_DRV_NONE, -1, -1, 0, },
1608};
1609
1610void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1611 int *max_track, int *last_sect,
1612 FDriveType drive_in, FDriveType *drive)
1613{
1614 const FDFormat *parse;
1615 uint64_t nb_sectors, size;
1616 int i, first_match, match;
1617
1618 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1619 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1620 /* User defined disk */
1621 } else {
1622 bdrv_get_geometry(bs, &nb_sectors);
1623 match = -1;
1624 first_match = -1;
1625 for (i = 0; ; i++) {
1626 parse = &fd_formats[i];
1627 if (parse->drive == FDRIVE_DRV_NONE) {
1628 break;
1629 }
1630 if (drive_in == parse->drive ||
1631 drive_in == FDRIVE_DRV_NONE) {
1632 size = (parse->max_head + 1) * parse->max_track *
1633 parse->last_sect;
1634 if (nb_sectors == size) {
1635 match = i;
1636 break;
1637 }
1638 if (first_match == -1) {
1639 first_match = i;
1640 }
1641 }
1642 }
1643 if (match == -1) {
1644 if (first_match == -1) {
1645 match = 1;
1646 } else {
1647 match = first_match;
1648 }
1649 parse = &fd_formats[match];
1650 }
1651 *nb_heads = parse->max_head + 1;
1652 *max_track = parse->max_track;
1653 *last_sect = parse->last_sect;
1654 *drive = parse->drive;
1655 }
1656}
1657
46d4767d
FB
1658int bdrv_get_translation_hint(BlockDriverState *bs)
1659{
1660 return bs->translation;
1661}
1662
abd7f68d
MA
1663void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1664 BlockErrorAction on_write_error)
1665{
1666 bs->on_read_error = on_read_error;
1667 bs->on_write_error = on_write_error;
1668}
1669
1670BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1671{
1672 return is_read ? bs->on_read_error : bs->on_write_error;
1673}
1674
b338082b
FB
1675int bdrv_is_read_only(BlockDriverState *bs)
1676{
1677 return bs->read_only;
1678}
1679
985a03b0
TS
1680int bdrv_is_sg(BlockDriverState *bs)
1681{
1682 return bs->sg;
1683}
1684
e900a7b7
CH
1685int bdrv_enable_write_cache(BlockDriverState *bs)
1686{
1687 return bs->enable_write_cache;
1688}
1689
ea2384d3
FB
1690int bdrv_is_encrypted(BlockDriverState *bs)
1691{
1692 if (bs->backing_hd && bs->backing_hd->encrypted)
1693 return 1;
1694 return bs->encrypted;
1695}
1696
c0f4ce77
AL
1697int bdrv_key_required(BlockDriverState *bs)
1698{
1699 BlockDriverState *backing_hd = bs->backing_hd;
1700
1701 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1702 return 1;
1703 return (bs->encrypted && !bs->valid_key);
1704}
1705
ea2384d3
FB
1706int bdrv_set_key(BlockDriverState *bs, const char *key)
1707{
1708 int ret;
1709 if (bs->backing_hd && bs->backing_hd->encrypted) {
1710 ret = bdrv_set_key(bs->backing_hd, key);
1711 if (ret < 0)
1712 return ret;
1713 if (!bs->encrypted)
1714 return 0;
1715 }
fd04a2ae
SH
1716 if (!bs->encrypted) {
1717 return -EINVAL;
1718 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1719 return -ENOMEDIUM;
1720 }
c0f4ce77 1721 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
1722 if (ret < 0) {
1723 bs->valid_key = 0;
1724 } else if (!bs->valid_key) {
1725 bs->valid_key = 1;
1726 /* call the change callback now, we skipped it on open */
7d4b4ba5 1727 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 1728 }
c0f4ce77 1729 return ret;
ea2384d3
FB
1730}
1731
1732void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1733{
19cb3738 1734 if (!bs->drv) {
ea2384d3
FB
1735 buf[0] = '\0';
1736 } else {
1737 pstrcpy(buf, buf_size, bs->drv->format_name);
1738 }
1739}
1740
5fafdf24 1741void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
1742 void *opaque)
1743{
1744 BlockDriver *drv;
1745
8a22f02a 1746 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
1747 it(opaque, drv->format_name);
1748 }
1749}
1750
b338082b
FB
1751BlockDriverState *bdrv_find(const char *name)
1752{
1753 BlockDriverState *bs;
1754
1b7bdbc1
SH
1755 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1756 if (!strcmp(name, bs->device_name)) {
b338082b 1757 return bs;
1b7bdbc1 1758 }
b338082b
FB
1759 }
1760 return NULL;
1761}
1762
2f399b0a
MA
1763BlockDriverState *bdrv_next(BlockDriverState *bs)
1764{
1765 if (!bs) {
1766 return QTAILQ_FIRST(&bdrv_states);
1767 }
1768 return QTAILQ_NEXT(bs, list);
1769}
1770
51de9760 1771void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
1772{
1773 BlockDriverState *bs;
1774
1b7bdbc1 1775 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 1776 it(opaque, bs);
81d0912d
FB
1777 }
1778}
1779
ea2384d3
FB
1780const char *bdrv_get_device_name(BlockDriverState *bs)
1781{
1782 return bs->device_name;
1783}
1784
205ef796 1785int bdrv_flush(BlockDriverState *bs)
7a6cba61 1786{
016f5cf6 1787 if (bs->open_flags & BDRV_O_NO_FLUSH) {
205ef796
KW
1788 return 0;
1789 }
1790
e7a8a783
KW
1791 if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
1792 return bdrv_co_flush_em(bs);
1793 }
1794
205ef796
KW
1795 if (bs->drv && bs->drv->bdrv_flush) {
1796 return bs->drv->bdrv_flush(bs);
016f5cf6
AG
1797 }
1798
205ef796
KW
1799 /*
1800 * Some block drivers always operate in either writethrough or unsafe mode
1801 * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1802 * the server works (because the behaviour is hardcoded or depends on
1803 * server-side configuration), so we can't ensure that everything is safe
1804 * on disk. Returning an error doesn't work because that would break guests
1805 * even if the server operates in writethrough mode.
1806 *
1807 * Let's hope the user knows what he's doing.
1808 */
1809 return 0;
7a6cba61
PB
1810}
1811
c6ca28d6
AL
1812void bdrv_flush_all(void)
1813{
1814 BlockDriverState *bs;
1815
1b7bdbc1 1816 QTAILQ_FOREACH(bs, &bdrv_states, list) {
c602a489 1817 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
c6ca28d6 1818 bdrv_flush(bs);
1b7bdbc1
SH
1819 }
1820 }
c6ca28d6
AL
1821}
1822
f2feebbd
KW
1823int bdrv_has_zero_init(BlockDriverState *bs)
1824{
1825 assert(bs->drv);
1826
336c1c12
KW
1827 if (bs->drv->bdrv_has_zero_init) {
1828 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
1829 }
1830
1831 return 1;
1832}
1833
bb8bf76f
CH
1834int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1835{
1836 if (!bs->drv) {
1837 return -ENOMEDIUM;
1838 }
1839 if (!bs->drv->bdrv_discard) {
1840 return 0;
1841 }
1842 return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1843}
1844
f58c7b35
TS
1845/*
1846 * Returns true iff the specified sector is present in the disk image. Drivers
1847 * not implementing the functionality are assumed to not support backing files,
1848 * hence all their sectors are reported as allocated.
1849 *
1850 * 'pnum' is set to the number of sectors (including and immediately following
1851 * the specified sector) that are known to be in the same
1852 * allocated/unallocated state.
1853 *
1854 * 'nb_sectors' is the max value 'pnum' should be set to.
1855 */
1856int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1857 int *pnum)
1858{
1859 int64_t n;
1860 if (!bs->drv->bdrv_is_allocated) {
1861 if (sector_num >= bs->total_sectors) {
1862 *pnum = 0;
1863 return 0;
1864 }
1865 n = bs->total_sectors - sector_num;
1866 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1867 return 1;
1868 }
1869 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1870}
1871
2582bfed
LC
1872void bdrv_mon_event(const BlockDriverState *bdrv,
1873 BlockMonEventAction action, int is_read)
1874{
1875 QObject *data;
1876 const char *action_str;
1877
1878 switch (action) {
1879 case BDRV_ACTION_REPORT:
1880 action_str = "report";
1881 break;
1882 case BDRV_ACTION_IGNORE:
1883 action_str = "ignore";
1884 break;
1885 case BDRV_ACTION_STOP:
1886 action_str = "stop";
1887 break;
1888 default:
1889 abort();
1890 }
1891
1892 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1893 bdrv->device_name,
1894 action_str,
1895 is_read ? "read" : "write");
1896 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1897
1898 qobject_decref(data);
1899}
1900
d15e5465 1901static void bdrv_print_dict(QObject *obj, void *opaque)
b338082b 1902{
d15e5465
LC
1903 QDict *bs_dict;
1904 Monitor *mon = opaque;
1905
1906 bs_dict = qobject_to_qdict(obj);
1907
d8aeeb31 1908 monitor_printf(mon, "%s: removable=%d",
d15e5465 1909 qdict_get_str(bs_dict, "device"),
d15e5465
LC
1910 qdict_get_bool(bs_dict, "removable"));
1911
1912 if (qdict_get_bool(bs_dict, "removable")) {
1913 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
e4def80b
MA
1914 monitor_printf(mon, " tray-open=%d",
1915 qdict_get_bool(bs_dict, "tray-open"));
d15e5465 1916 }
d2078cc2
LC
1917
1918 if (qdict_haskey(bs_dict, "io-status")) {
1919 monitor_printf(mon, " io-status=%s", qdict_get_str(bs_dict, "io-status"));
1920 }
1921
d15e5465
LC
1922 if (qdict_haskey(bs_dict, "inserted")) {
1923 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1924
1925 monitor_printf(mon, " file=");
1926 monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1927 if (qdict_haskey(qdict, "backing_file")) {
1928 monitor_printf(mon, " backing_file=");
1929 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1930 }
1931 monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1932 qdict_get_bool(qdict, "ro"),
1933 qdict_get_str(qdict, "drv"),
1934 qdict_get_bool(qdict, "encrypted"));
1935 } else {
1936 monitor_printf(mon, " [not inserted]");
1937 }
1938
1939 monitor_printf(mon, "\n");
1940}
1941
1942void bdrv_info_print(Monitor *mon, const QObject *data)
1943{
1944 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1945}
1946
f04ef601
LC
1947static const char *const io_status_name[BDRV_IOS_MAX] = {
1948 [BDRV_IOS_OK] = "ok",
1949 [BDRV_IOS_FAILED] = "failed",
1950 [BDRV_IOS_ENOSPC] = "nospace",
1951};
1952
d15e5465
LC
1953void bdrv_info(Monitor *mon, QObject **ret_data)
1954{
1955 QList *bs_list;
b338082b
FB
1956 BlockDriverState *bs;
1957
d15e5465
LC
1958 bs_list = qlist_new();
1959
1b7bdbc1 1960 QTAILQ_FOREACH(bs, &bdrv_states, list) {
d15e5465 1961 QObject *bs_obj;
e4def80b 1962 QDict *bs_dict;
d15e5465 1963
d8aeeb31 1964 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
d15e5465 1965 "'removable': %i, 'locked': %i }",
2c6942fa
MA
1966 bs->device_name,
1967 bdrv_dev_has_removable_media(bs),
f107639a 1968 bdrv_dev_is_medium_locked(bs));
e4def80b 1969 bs_dict = qobject_to_qdict(bs_obj);
d15e5465 1970
e4def80b
MA
1971 if (bdrv_dev_has_removable_media(bs)) {
1972 qdict_put(bs_dict, "tray-open",
1973 qbool_from_int(bdrv_dev_is_tray_open(bs)));
1974 }
f04ef601
LC
1975
1976 if (bdrv_iostatus_is_enabled(bs)) {
1977 qdict_put(bs_dict, "io-status",
1978 qstring_from_str(io_status_name[bs->iostatus]));
1979 }
1980
19cb3738 1981 if (bs->drv) {
d15e5465 1982 QObject *obj;
d15e5465
LC
1983
1984 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1985 "'encrypted': %i }",
1986 bs->filename, bs->read_only,
1987 bs->drv->format_name,
1988 bdrv_is_encrypted(bs));
fef30743 1989 if (bs->backing_file[0] != '\0') {
d15e5465
LC
1990 QDict *qdict = qobject_to_qdict(obj);
1991 qdict_put(qdict, "backing_file",
1992 qstring_from_str(bs->backing_file));
376253ec 1993 }
d15e5465
LC
1994
1995 qdict_put_obj(bs_dict, "inserted", obj);
b338082b 1996 }
d15e5465 1997 qlist_append_obj(bs_list, bs_obj);
b338082b 1998 }
d15e5465
LC
1999
2000 *ret_data = QOBJECT(bs_list);
b338082b 2001}
a36e69dd 2002
218a536a 2003static void bdrv_stats_iter(QObject *data, void *opaque)
a36e69dd 2004{
218a536a
LC
2005 QDict *qdict;
2006 Monitor *mon = opaque;
2007
2008 qdict = qobject_to_qdict(data);
2009 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
2010
2011 qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
2012 monitor_printf(mon, " rd_bytes=%" PRId64
2013 " wr_bytes=%" PRId64
2014 " rd_operations=%" PRId64
2015 " wr_operations=%" PRId64
e8045d67 2016 " flush_operations=%" PRId64
c488c7f6
CH
2017 " wr_total_time_ns=%" PRId64
2018 " rd_total_time_ns=%" PRId64
2019 " flush_total_time_ns=%" PRId64
218a536a
LC
2020 "\n",
2021 qdict_get_int(qdict, "rd_bytes"),
2022 qdict_get_int(qdict, "wr_bytes"),
2023 qdict_get_int(qdict, "rd_operations"),
e8045d67 2024 qdict_get_int(qdict, "wr_operations"),
c488c7f6
CH
2025 qdict_get_int(qdict, "flush_operations"),
2026 qdict_get_int(qdict, "wr_total_time_ns"),
2027 qdict_get_int(qdict, "rd_total_time_ns"),
2028 qdict_get_int(qdict, "flush_total_time_ns"));
218a536a
LC
2029}
2030
2031void bdrv_stats_print(Monitor *mon, const QObject *data)
2032{
2033 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
2034}
2035
294cc35f
KW
2036static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
2037{
2038 QObject *res;
2039 QDict *dict;
2040
2041 res = qobject_from_jsonf("{ 'stats': {"
2042 "'rd_bytes': %" PRId64 ","
2043 "'wr_bytes': %" PRId64 ","
2044 "'rd_operations': %" PRId64 ","
2045 "'wr_operations': %" PRId64 ","
e8045d67 2046 "'wr_highest_offset': %" PRId64 ","
c488c7f6
CH
2047 "'flush_operations': %" PRId64 ","
2048 "'wr_total_time_ns': %" PRId64 ","
2049 "'rd_total_time_ns': %" PRId64 ","
2050 "'flush_total_time_ns': %" PRId64
294cc35f 2051 "} }",
a597e79c
CH
2052 bs->nr_bytes[BDRV_ACCT_READ],
2053 bs->nr_bytes[BDRV_ACCT_WRITE],
2054 bs->nr_ops[BDRV_ACCT_READ],
2055 bs->nr_ops[BDRV_ACCT_WRITE],
5ffbbc67 2056 bs->wr_highest_sector *
e8045d67 2057 (uint64_t)BDRV_SECTOR_SIZE,
c488c7f6
CH
2058 bs->nr_ops[BDRV_ACCT_FLUSH],
2059 bs->total_time_ns[BDRV_ACCT_WRITE],
2060 bs->total_time_ns[BDRV_ACCT_READ],
2061 bs->total_time_ns[BDRV_ACCT_FLUSH]);
294cc35f
KW
2062 dict = qobject_to_qdict(res);
2063
2064 if (*bs->device_name) {
2065 qdict_put(dict, "device", qstring_from_str(bs->device_name));
2066 }
2067
2068 if (bs->file) {
2069 QObject *parent = bdrv_info_stats_bs(bs->file);
2070 qdict_put_obj(dict, "parent", parent);
2071 }
2072
2073 return res;
2074}
2075
218a536a
LC
2076void bdrv_info_stats(Monitor *mon, QObject **ret_data)
2077{
2078 QObject *obj;
2079 QList *devices;
a36e69dd
TS
2080 BlockDriverState *bs;
2081
218a536a
LC
2082 devices = qlist_new();
2083
1b7bdbc1 2084 QTAILQ_FOREACH(bs, &bdrv_states, list) {
294cc35f 2085 obj = bdrv_info_stats_bs(bs);
218a536a 2086 qlist_append_obj(devices, obj);
a36e69dd 2087 }
218a536a
LC
2088
2089 *ret_data = QOBJECT(devices);
a36e69dd 2090}
ea2384d3 2091
045df330
AL
2092const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2093{
2094 if (bs->backing_hd && bs->backing_hd->encrypted)
2095 return bs->backing_file;
2096 else if (bs->encrypted)
2097 return bs->filename;
2098 else
2099 return NULL;
2100}
2101
5fafdf24 2102void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
2103 char *filename, int filename_size)
2104{
b783e409 2105 if (!bs->backing_file) {
83f64091
FB
2106 pstrcpy(filename, filename_size, "");
2107 } else {
2108 pstrcpy(filename, filename_size, bs->backing_file);
2109 }
2110}
2111
5fafdf24 2112int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
2113 const uint8_t *buf, int nb_sectors)
2114{
2115 BlockDriver *drv = bs->drv;
2116 if (!drv)
19cb3738 2117 return -ENOMEDIUM;
faea38e7
FB
2118 if (!drv->bdrv_write_compressed)
2119 return -ENOTSUP;
fbb7b4e0
KW
2120 if (bdrv_check_request(bs, sector_num, nb_sectors))
2121 return -EIO;
a55eb92c 2122
c6d22830 2123 if (bs->dirty_bitmap) {
7cd1e32a
LS
2124 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2125 }
a55eb92c 2126
faea38e7
FB
2127 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2128}
3b46e624 2129
faea38e7
FB
2130int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2131{
2132 BlockDriver *drv = bs->drv;
2133 if (!drv)
19cb3738 2134 return -ENOMEDIUM;
faea38e7
FB
2135 if (!drv->bdrv_get_info)
2136 return -ENOTSUP;
2137 memset(bdi, 0, sizeof(*bdi));
2138 return drv->bdrv_get_info(bs, bdi);
2139}
2140
45566e9c
CH
2141int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2142 int64_t pos, int size)
178e08a5
AL
2143{
2144 BlockDriver *drv = bs->drv;
2145 if (!drv)
2146 return -ENOMEDIUM;
7cdb1f6d
MK
2147 if (drv->bdrv_save_vmstate)
2148 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2149 if (bs->file)
2150 return bdrv_save_vmstate(bs->file, buf, pos, size);
2151 return -ENOTSUP;
178e08a5
AL
2152}
2153
45566e9c
CH
2154int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2155 int64_t pos, int size)
178e08a5
AL
2156{
2157 BlockDriver *drv = bs->drv;
2158 if (!drv)
2159 return -ENOMEDIUM;
7cdb1f6d
MK
2160 if (drv->bdrv_load_vmstate)
2161 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2162 if (bs->file)
2163 return bdrv_load_vmstate(bs->file, buf, pos, size);
2164 return -ENOTSUP;
178e08a5
AL
2165}
2166
8b9b0cc2
KW
2167void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2168{
2169 BlockDriver *drv = bs->drv;
2170
2171 if (!drv || !drv->bdrv_debug_event) {
2172 return;
2173 }
2174
2175 return drv->bdrv_debug_event(bs, event);
2176
2177}
2178
faea38e7
FB
2179/**************************************************************/
2180/* handling of snapshots */
2181
feeee5ac
MDCF
2182int bdrv_can_snapshot(BlockDriverState *bs)
2183{
2184 BlockDriver *drv = bs->drv;
07b70bfb 2185 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2186 return 0;
2187 }
2188
2189 if (!drv->bdrv_snapshot_create) {
2190 if (bs->file != NULL) {
2191 return bdrv_can_snapshot(bs->file);
2192 }
2193 return 0;
2194 }
2195
2196 return 1;
2197}
2198
199630b6
BS
2199int bdrv_is_snapshot(BlockDriverState *bs)
2200{
2201 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2202}
2203
f9092b10
MA
2204BlockDriverState *bdrv_snapshots(void)
2205{
2206 BlockDriverState *bs;
2207
3ac906f7 2208 if (bs_snapshots) {
f9092b10 2209 return bs_snapshots;
3ac906f7 2210 }
f9092b10
MA
2211
2212 bs = NULL;
2213 while ((bs = bdrv_next(bs))) {
2214 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2215 bs_snapshots = bs;
2216 return bs;
f9092b10
MA
2217 }
2218 }
2219 return NULL;
f9092b10
MA
2220}
2221
5fafdf24 2222int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2223 QEMUSnapshotInfo *sn_info)
2224{
2225 BlockDriver *drv = bs->drv;
2226 if (!drv)
19cb3738 2227 return -ENOMEDIUM;
7cdb1f6d
MK
2228 if (drv->bdrv_snapshot_create)
2229 return drv->bdrv_snapshot_create(bs, sn_info);
2230 if (bs->file)
2231 return bdrv_snapshot_create(bs->file, sn_info);
2232 return -ENOTSUP;
faea38e7
FB
2233}
2234
5fafdf24 2235int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2236 const char *snapshot_id)
2237{
2238 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2239 int ret, open_ret;
2240
faea38e7 2241 if (!drv)
19cb3738 2242 return -ENOMEDIUM;
7cdb1f6d
MK
2243 if (drv->bdrv_snapshot_goto)
2244 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2245
2246 if (bs->file) {
2247 drv->bdrv_close(bs);
2248 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2249 open_ret = drv->bdrv_open(bs, bs->open_flags);
2250 if (open_ret < 0) {
2251 bdrv_delete(bs->file);
2252 bs->drv = NULL;
2253 return open_ret;
2254 }
2255 return ret;
2256 }
2257
2258 return -ENOTSUP;
faea38e7
FB
2259}
2260
2261int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2262{
2263 BlockDriver *drv = bs->drv;
2264 if (!drv)
19cb3738 2265 return -ENOMEDIUM;
7cdb1f6d
MK
2266 if (drv->bdrv_snapshot_delete)
2267 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2268 if (bs->file)
2269 return bdrv_snapshot_delete(bs->file, snapshot_id);
2270 return -ENOTSUP;
faea38e7
FB
2271}
2272
5fafdf24 2273int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2274 QEMUSnapshotInfo **psn_info)
2275{
2276 BlockDriver *drv = bs->drv;
2277 if (!drv)
19cb3738 2278 return -ENOMEDIUM;
7cdb1f6d
MK
2279 if (drv->bdrv_snapshot_list)
2280 return drv->bdrv_snapshot_list(bs, psn_info);
2281 if (bs->file)
2282 return bdrv_snapshot_list(bs->file, psn_info);
2283 return -ENOTSUP;
faea38e7
FB
2284}
2285
51ef6727 2286int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2287 const char *snapshot_name)
2288{
2289 BlockDriver *drv = bs->drv;
2290 if (!drv) {
2291 return -ENOMEDIUM;
2292 }
2293 if (!bs->read_only) {
2294 return -EINVAL;
2295 }
2296 if (drv->bdrv_snapshot_load_tmp) {
2297 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2298 }
2299 return -ENOTSUP;
2300}
2301
faea38e7
FB
2302#define NB_SUFFIXES 4
2303
2304char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2305{
2306 static const char suffixes[NB_SUFFIXES] = "KMGT";
2307 int64_t base;
2308 int i;
2309
2310 if (size <= 999) {
2311 snprintf(buf, buf_size, "%" PRId64, size);
2312 } else {
2313 base = 1024;
2314 for(i = 0; i < NB_SUFFIXES; i++) {
2315 if (size < (10 * base)) {
5fafdf24 2316 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
2317 (double)size / base,
2318 suffixes[i]);
2319 break;
2320 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 2321 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
2322 ((size + (base >> 1)) / base),
2323 suffixes[i]);
2324 break;
2325 }
2326 base = base * 1024;
2327 }
2328 }
2329 return buf;
2330}
2331
2332char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2333{
2334 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
2335#ifdef _WIN32
2336 struct tm *ptm;
2337#else
faea38e7 2338 struct tm tm;
3b9f94e1 2339#endif
faea38e7
FB
2340 time_t ti;
2341 int64_t secs;
2342
2343 if (!sn) {
5fafdf24
TS
2344 snprintf(buf, buf_size,
2345 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2346 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2347 } else {
2348 ti = sn->date_sec;
3b9f94e1
FB
2349#ifdef _WIN32
2350 ptm = localtime(&ti);
2351 strftime(date_buf, sizeof(date_buf),
2352 "%Y-%m-%d %H:%M:%S", ptm);
2353#else
faea38e7
FB
2354 localtime_r(&ti, &tm);
2355 strftime(date_buf, sizeof(date_buf),
2356 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 2357#endif
faea38e7
FB
2358 secs = sn->vm_clock_nsec / 1000000000;
2359 snprintf(clock_buf, sizeof(clock_buf),
2360 "%02d:%02d:%02d.%03d",
2361 (int)(secs / 3600),
2362 (int)((secs / 60) % 60),
5fafdf24 2363 (int)(secs % 60),
faea38e7
FB
2364 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2365 snprintf(buf, buf_size,
5fafdf24 2366 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2367 sn->id_str, sn->name,
2368 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2369 date_buf,
2370 clock_buf);
2371 }
2372 return buf;
2373}
2374
ea2384d3 2375/**************************************************************/
83f64091 2376/* async I/Os */
ea2384d3 2377
3b69e4b9 2378BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 2379 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 2380 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 2381{
bbf0a440
SH
2382 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2383
b2a61371
SH
2384 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2385 cb, opaque, false, bdrv_co_do_rw);
ea2384d3
FB
2386}
2387
f141eafe
AL
2388BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2389 QEMUIOVector *qiov, int nb_sectors,
2390 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 2391{
bbf0a440
SH
2392 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2393
1a6e115b
SH
2394 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2395 cb, opaque, true, bdrv_co_do_rw);
83f64091
FB
2396}
2397
40b4f539
KW
2398
2399typedef struct MultiwriteCB {
2400 int error;
2401 int num_requests;
2402 int num_callbacks;
2403 struct {
2404 BlockDriverCompletionFunc *cb;
2405 void *opaque;
2406 QEMUIOVector *free_qiov;
2407 void *free_buf;
2408 } callbacks[];
2409} MultiwriteCB;
2410
2411static void multiwrite_user_cb(MultiwriteCB *mcb)
2412{
2413 int i;
2414
2415 for (i = 0; i < mcb->num_callbacks; i++) {
2416 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
2417 if (mcb->callbacks[i].free_qiov) {
2418 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2419 }
7267c094 2420 g_free(mcb->callbacks[i].free_qiov);
f8a83245 2421 qemu_vfree(mcb->callbacks[i].free_buf);
40b4f539
KW
2422 }
2423}
2424
2425static void multiwrite_cb(void *opaque, int ret)
2426{
2427 MultiwriteCB *mcb = opaque;
2428
6d519a5f
SH
2429 trace_multiwrite_cb(mcb, ret);
2430
cb6d3ca0 2431 if (ret < 0 && !mcb->error) {
40b4f539 2432 mcb->error = ret;
40b4f539
KW
2433 }
2434
2435 mcb->num_requests--;
2436 if (mcb->num_requests == 0) {
de189a1b 2437 multiwrite_user_cb(mcb);
7267c094 2438 g_free(mcb);
40b4f539
KW
2439 }
2440}
2441
2442static int multiwrite_req_compare(const void *a, const void *b)
2443{
77be4366
CH
2444 const BlockRequest *req1 = a, *req2 = b;
2445
2446 /*
2447 * Note that we can't simply subtract req2->sector from req1->sector
2448 * here as that could overflow the return value.
2449 */
2450 if (req1->sector > req2->sector) {
2451 return 1;
2452 } else if (req1->sector < req2->sector) {
2453 return -1;
2454 } else {
2455 return 0;
2456 }
40b4f539
KW
2457}
2458
2459/*
2460 * Takes a bunch of requests and tries to merge them. Returns the number of
2461 * requests that remain after merging.
2462 */
2463static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2464 int num_reqs, MultiwriteCB *mcb)
2465{
2466 int i, outidx;
2467
2468 // Sort requests by start sector
2469 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2470
2471 // Check if adjacent requests touch the same clusters. If so, combine them,
2472 // filling up gaps with zero sectors.
2473 outidx = 0;
2474 for (i = 1; i < num_reqs; i++) {
2475 int merge = 0;
2476 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2477
2478 // This handles the cases that are valid for all block drivers, namely
2479 // exactly sequential writes and overlapping writes.
2480 if (reqs[i].sector <= oldreq_last) {
2481 merge = 1;
2482 }
2483
2484 // The block driver may decide that it makes sense to combine requests
2485 // even if there is a gap of some sectors between them. In this case,
2486 // the gap is filled with zeros (therefore only applicable for yet
2487 // unused space in format like qcow2).
2488 if (!merge && bs->drv->bdrv_merge_requests) {
2489 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2490 }
2491
e2a305fb
CH
2492 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2493 merge = 0;
2494 }
2495
40b4f539
KW
2496 if (merge) {
2497 size_t size;
7267c094 2498 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
2499 qemu_iovec_init(qiov,
2500 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2501
2502 // Add the first request to the merged one. If the requests are
2503 // overlapping, drop the last sectors of the first request.
2504 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2505 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2506
2507 // We might need to add some zeros between the two requests
2508 if (reqs[i].sector > oldreq_last) {
2509 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2510 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2511 memset(buf, 0, zero_bytes);
2512 qemu_iovec_add(qiov, buf, zero_bytes);
2513 mcb->callbacks[i].free_buf = buf;
2514 }
2515
2516 // Add the second request
2517 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2518
cbf1dff2 2519 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
2520 reqs[outidx].qiov = qiov;
2521
2522 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2523 } else {
2524 outidx++;
2525 reqs[outidx].sector = reqs[i].sector;
2526 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2527 reqs[outidx].qiov = reqs[i].qiov;
2528 }
2529 }
2530
2531 return outidx + 1;
2532}
2533
2534/*
2535 * Submit multiple AIO write requests at once.
2536 *
2537 * On success, the function returns 0 and all requests in the reqs array have
2538 * been submitted. In error case this function returns -1, and any of the
2539 * requests may or may not be submitted yet. In particular, this means that the
2540 * callback will be called for some of the requests, for others it won't. The
2541 * caller must check the error field of the BlockRequest to wait for the right
2542 * callbacks (if error != 0, no callback will be called).
2543 *
2544 * The implementation may modify the contents of the reqs array, e.g. to merge
2545 * requests. However, the fields opaque and error are left unmodified as they
2546 * are used to signal failure for a single request to the caller.
2547 */
2548int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2549{
2550 BlockDriverAIOCB *acb;
2551 MultiwriteCB *mcb;
2552 int i;
2553
301db7c2
RH
2554 /* don't submit writes if we don't have a medium */
2555 if (bs->drv == NULL) {
2556 for (i = 0; i < num_reqs; i++) {
2557 reqs[i].error = -ENOMEDIUM;
2558 }
2559 return -1;
2560 }
2561
40b4f539
KW
2562 if (num_reqs == 0) {
2563 return 0;
2564 }
2565
2566 // Create MultiwriteCB structure
7267c094 2567 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
2568 mcb->num_requests = 0;
2569 mcb->num_callbacks = num_reqs;
2570
2571 for (i = 0; i < num_reqs; i++) {
2572 mcb->callbacks[i].cb = reqs[i].cb;
2573 mcb->callbacks[i].opaque = reqs[i].opaque;
2574 }
2575
2576 // Check for mergable requests
2577 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2578
6d519a5f
SH
2579 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2580
453f9a16
KW
2581 /*
2582 * Run the aio requests. As soon as one request can't be submitted
2583 * successfully, fail all requests that are not yet submitted (we must
2584 * return failure for all requests anyway)
2585 *
2586 * num_requests cannot be set to the right value immediately: If
2587 * bdrv_aio_writev fails for some request, num_requests would be too high
2588 * and therefore multiwrite_cb() would never recognize the multiwrite
2589 * request as completed. We also cannot use the loop variable i to set it
2590 * when the first request fails because the callback may already have been
2591 * called for previously submitted requests. Thus, num_requests must be
2592 * incremented for each request that is submitted.
2593 *
2594 * The problem that callbacks may be called early also means that we need
2595 * to take care that num_requests doesn't become 0 before all requests are
2596 * submitted - multiwrite_cb() would consider the multiwrite request
2597 * completed. A dummy request that is "completed" by a manual call to
2598 * multiwrite_cb() takes care of this.
2599 */
2600 mcb->num_requests = 1;
2601
6d519a5f 2602 // Run the aio requests
40b4f539 2603 for (i = 0; i < num_reqs; i++) {
453f9a16 2604 mcb->num_requests++;
40b4f539
KW
2605 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2606 reqs[i].nb_sectors, multiwrite_cb, mcb);
2607
2608 if (acb == NULL) {
2609 // We can only fail the whole thing if no request has been
2610 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2611 // complete and report the error in the callback.
453f9a16 2612 if (i == 0) {
6d519a5f 2613 trace_bdrv_aio_multiwrite_earlyfail(mcb);
40b4f539
KW
2614 goto fail;
2615 } else {
6d519a5f 2616 trace_bdrv_aio_multiwrite_latefail(mcb, i);
7eb58a6c 2617 multiwrite_cb(mcb, -EIO);
40b4f539
KW
2618 break;
2619 }
40b4f539
KW
2620 }
2621 }
2622
453f9a16
KW
2623 /* Complete the dummy request */
2624 multiwrite_cb(mcb, 0);
2625
40b4f539
KW
2626 return 0;
2627
2628fail:
453f9a16
KW
2629 for (i = 0; i < mcb->num_callbacks; i++) {
2630 reqs[i].error = -EIO;
2631 }
7267c094 2632 g_free(mcb);
40b4f539
KW
2633 return -1;
2634}
2635
b2e12bc6
CH
2636BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2637 BlockDriverCompletionFunc *cb, void *opaque)
2638{
2639 BlockDriver *drv = bs->drv;
2640
a13aac04
SH
2641 trace_bdrv_aio_flush(bs, opaque);
2642
016f5cf6
AG
2643 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2644 return bdrv_aio_noop_em(bs, cb, opaque);
2645 }
2646
b2e12bc6
CH
2647 if (!drv)
2648 return NULL;
b2e12bc6
CH
2649 return drv->bdrv_aio_flush(bs, cb, opaque);
2650}
2651
83f64091 2652void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 2653{
6bbff9a0 2654 acb->pool->cancel(acb);
83f64091
FB
2655}
2656
ce1a14dc 2657
83f64091
FB
2658/**************************************************************/
2659/* async block device emulation */
2660
c16b5a2c
CH
2661typedef struct BlockDriverAIOCBSync {
2662 BlockDriverAIOCB common;
2663 QEMUBH *bh;
2664 int ret;
2665 /* vector translation state */
2666 QEMUIOVector *qiov;
2667 uint8_t *bounce;
2668 int is_write;
2669} BlockDriverAIOCBSync;
2670
2671static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2672{
b666d239
KW
2673 BlockDriverAIOCBSync *acb =
2674 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 2675 qemu_bh_delete(acb->bh);
36afc451 2676 acb->bh = NULL;
c16b5a2c
CH
2677 qemu_aio_release(acb);
2678}
2679
2680static AIOPool bdrv_em_aio_pool = {
2681 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2682 .cancel = bdrv_aio_cancel_em,
2683};
2684
ce1a14dc 2685static void bdrv_aio_bh_cb(void *opaque)
83f64091 2686{
ce1a14dc 2687 BlockDriverAIOCBSync *acb = opaque;
f141eafe 2688
f141eafe
AL
2689 if (!acb->is_write)
2690 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 2691 qemu_vfree(acb->bounce);
ce1a14dc 2692 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 2693 qemu_bh_delete(acb->bh);
36afc451 2694 acb->bh = NULL;
ce1a14dc 2695 qemu_aio_release(acb);
83f64091 2696}
beac80cd 2697
f141eafe
AL
2698static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2699 int64_t sector_num,
2700 QEMUIOVector *qiov,
2701 int nb_sectors,
2702 BlockDriverCompletionFunc *cb,
2703 void *opaque,
2704 int is_write)
2705
83f64091 2706{
ce1a14dc 2707 BlockDriverAIOCBSync *acb;
ce1a14dc 2708
c16b5a2c 2709 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
2710 acb->is_write = is_write;
2711 acb->qiov = qiov;
e268ca52 2712 acb->bounce = qemu_blockalign(bs, qiov->size);
f141eafe 2713
ce1a14dc
PB
2714 if (!acb->bh)
2715 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
2716
2717 if (is_write) {
2718 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1ed20acf 2719 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 2720 } else {
1ed20acf 2721 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
2722 }
2723
ce1a14dc 2724 qemu_bh_schedule(acb->bh);
f141eafe 2725
ce1a14dc 2726 return &acb->common;
beac80cd
FB
2727}
2728
f141eafe
AL
2729static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2730 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 2731 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 2732{
f141eafe
AL
2733 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2734}
83f64091 2735
f141eafe
AL
2736static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2737 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2738 BlockDriverCompletionFunc *cb, void *opaque)
2739{
2740 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 2741}
beac80cd 2742
68485420
KW
2743
2744typedef struct BlockDriverAIOCBCoroutine {
2745 BlockDriverAIOCB common;
2746 BlockRequest req;
2747 bool is_write;
2748 QEMUBH* bh;
2749} BlockDriverAIOCBCoroutine;
2750
2751static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2752{
2753 qemu_aio_flush();
2754}
2755
2756static AIOPool bdrv_em_co_aio_pool = {
2757 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2758 .cancel = bdrv_aio_co_cancel_em,
2759};
2760
2761static void bdrv_co_rw_bh(void *opaque)
2762{
2763 BlockDriverAIOCBCoroutine *acb = opaque;
2764
2765 acb->common.cb(acb->common.opaque, acb->req.error);
2766 qemu_bh_delete(acb->bh);
2767 qemu_aio_release(acb);
2768}
2769
b2a61371 2770/* Invoke .bdrv_co_readv/.bdrv_co_writev */
68485420
KW
2771static void coroutine_fn bdrv_co_rw(void *opaque)
2772{
2773 BlockDriverAIOCBCoroutine *acb = opaque;
2774 BlockDriverState *bs = acb->common.bs;
2775
2776 if (!acb->is_write) {
2777 acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector,
2778 acb->req.nb_sectors, acb->req.qiov);
2779 } else {
2780 acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector,
2781 acb->req.nb_sectors, acb->req.qiov);
2782 }
2783
2784 acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2785 qemu_bh_schedule(acb->bh);
2786}
2787
b2a61371
SH
2788/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2789static void coroutine_fn bdrv_co_do_rw(void *opaque)
2790{
2791 BlockDriverAIOCBCoroutine *acb = opaque;
2792 BlockDriverState *bs = acb->common.bs;
2793
2794 if (!acb->is_write) {
2795 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
2796 acb->req.nb_sectors, acb->req.qiov);
2797 } else {
2798 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
2799 acb->req.nb_sectors, acb->req.qiov);
2800 }
2801
2802 acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2803 qemu_bh_schedule(acb->bh);
2804}
2805
68485420
KW
2806static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2807 int64_t sector_num,
2808 QEMUIOVector *qiov,
2809 int nb_sectors,
2810 BlockDriverCompletionFunc *cb,
2811 void *opaque,
b2a61371
SH
2812 bool is_write,
2813 CoroutineEntry *entry)
68485420
KW
2814{
2815 Coroutine *co;
2816 BlockDriverAIOCBCoroutine *acb;
2817
2818 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2819 acb->req.sector = sector_num;
2820 acb->req.nb_sectors = nb_sectors;
2821 acb->req.qiov = qiov;
2822 acb->is_write = is_write;
2823
b2a61371 2824 co = qemu_coroutine_create(entry);
68485420
KW
2825 qemu_coroutine_enter(co, acb);
2826
2827 return &acb->common;
2828}
2829
2830static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
2831 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2832 BlockDriverCompletionFunc *cb, void *opaque)
2833{
2834 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
b2a61371 2835 false, bdrv_co_rw);
68485420
KW
2836}
2837
2838static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
2839 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2840 BlockDriverCompletionFunc *cb, void *opaque)
2841{
2842 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
b2a61371 2843 true, bdrv_co_rw);
68485420
KW
2844}
2845
b2e12bc6
CH
2846static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2847 BlockDriverCompletionFunc *cb, void *opaque)
2848{
2849 BlockDriverAIOCBSync *acb;
2850
2851 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2852 acb->is_write = 1; /* don't bounce in the completion hadler */
2853 acb->qiov = NULL;
2854 acb->bounce = NULL;
2855 acb->ret = 0;
2856
2857 if (!acb->bh)
2858 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2859
2860 bdrv_flush(bs);
2861 qemu_bh_schedule(acb->bh);
2862 return &acb->common;
2863}
2864
016f5cf6
AG
2865static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2866 BlockDriverCompletionFunc *cb, void *opaque)
2867{
2868 BlockDriverAIOCBSync *acb;
2869
2870 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2871 acb->is_write = 1; /* don't bounce in the completion handler */
2872 acb->qiov = NULL;
2873 acb->bounce = NULL;
2874 acb->ret = 0;
2875
2876 if (!acb->bh) {
2877 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2878 }
2879
2880 qemu_bh_schedule(acb->bh);
2881 return &acb->common;
2882}
2883
83f64091
FB
2884/**************************************************************/
2885/* sync block device emulation */
ea2384d3 2886
83f64091
FB
2887static void bdrv_rw_em_cb(void *opaque, int ret)
2888{
2889 *(int *)opaque = ret;
ea2384d3
FB
2890}
2891
5fafdf24 2892static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
83f64091 2893 uint8_t *buf, int nb_sectors)
7a6cba61 2894{
ce1a14dc
PB
2895 int async_ret;
2896 BlockDriverAIOCB *acb;
f141eafe
AL
2897 struct iovec iov;
2898 QEMUIOVector qiov;
83f64091 2899
83f64091 2900 async_ret = NOT_DONE;
3f4cb3d3 2901 iov.iov_base = (void *)buf;
eb5a3165 2902 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
f141eafe 2903 qemu_iovec_init_external(&qiov, &iov, 1);
1ed20acf
SH
2904
2905 acb = bs->drv->bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2906 bdrv_rw_em_cb, &async_ret);
65d6b3d8
KW
2907 if (acb == NULL) {
2908 async_ret = -1;
2909 goto fail;
2910 }
baf35cb9 2911
83f64091
FB
2912 while (async_ret == NOT_DONE) {
2913 qemu_aio_wait();
2914 }
baf35cb9 2915
65d6b3d8
KW
2916
2917fail:
83f64091 2918 return async_ret;
7a6cba61
PB
2919}
2920
83f64091
FB
2921static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2922 const uint8_t *buf, int nb_sectors)
2923{
ce1a14dc
PB
2924 int async_ret;
2925 BlockDriverAIOCB *acb;
f141eafe
AL
2926 struct iovec iov;
2927 QEMUIOVector qiov;
83f64091 2928
83f64091 2929 async_ret = NOT_DONE;
f141eafe 2930 iov.iov_base = (void *)buf;
eb5a3165 2931 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
f141eafe 2932 qemu_iovec_init_external(&qiov, &iov, 1);
1ed20acf
SH
2933
2934 acb = bs->drv->bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2935 bdrv_rw_em_cb, &async_ret);
65d6b3d8
KW
2936 if (acb == NULL) {
2937 async_ret = -1;
2938 goto fail;
2939 }
83f64091
FB
2940 while (async_ret == NOT_DONE) {
2941 qemu_aio_wait();
2942 }
65d6b3d8
KW
2943
2944fail:
83f64091
FB
2945 return async_ret;
2946}
ea2384d3
FB
2947
2948void bdrv_init(void)
2949{
5efa9d5a 2950 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 2951}
ce1a14dc 2952
eb852011
MA
2953void bdrv_init_with_whitelist(void)
2954{
2955 use_bdrv_whitelist = 1;
2956 bdrv_init();
2957}
2958
c16b5a2c
CH
2959void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2960 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 2961{
ce1a14dc
PB
2962 BlockDriverAIOCB *acb;
2963
6bbff9a0
AL
2964 if (pool->free_aiocb) {
2965 acb = pool->free_aiocb;
2966 pool->free_aiocb = acb->next;
ce1a14dc 2967 } else {
7267c094 2968 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 2969 acb->pool = pool;
ce1a14dc
PB
2970 }
2971 acb->bs = bs;
2972 acb->cb = cb;
2973 acb->opaque = opaque;
2974 return acb;
2975}
2976
2977void qemu_aio_release(void *p)
2978{
6bbff9a0
AL
2979 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2980 AIOPool *pool = acb->pool;
2981 acb->next = pool->free_aiocb;
2982 pool->free_aiocb = acb;
ce1a14dc 2983}
19cb3738 2984
f9f05dc5
KW
2985/**************************************************************/
2986/* Coroutine block device emulation */
2987
2988typedef struct CoroutineIOCompletion {
2989 Coroutine *coroutine;
2990 int ret;
2991} CoroutineIOCompletion;
2992
2993static void bdrv_co_io_em_complete(void *opaque, int ret)
2994{
2995 CoroutineIOCompletion *co = opaque;
2996
2997 co->ret = ret;
2998 qemu_coroutine_enter(co->coroutine, NULL);
2999}
3000
3001static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3002 int nb_sectors, QEMUIOVector *iov,
3003 bool is_write)
3004{
3005 CoroutineIOCompletion co = {
3006 .coroutine = qemu_coroutine_self(),
3007 };
3008 BlockDriverAIOCB *acb;
3009
3010 if (is_write) {
a652d160
SH
3011 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3012 bdrv_co_io_em_complete, &co);
f9f05dc5 3013 } else {
a652d160
SH
3014 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3015 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
3016 }
3017
59370aaa 3018 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
3019 if (!acb) {
3020 return -EIO;
3021 }
3022 qemu_coroutine_yield();
3023
3024 return co.ret;
3025}
3026
3027static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3028 int64_t sector_num, int nb_sectors,
3029 QEMUIOVector *iov)
3030{
3031 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3032}
3033
3034static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3035 int64_t sector_num, int nb_sectors,
3036 QEMUIOVector *iov)
3037{
3038 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3039}
3040
e7a8a783
KW
3041static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
3042{
3043 CoroutineIOCompletion co = {
3044 .coroutine = qemu_coroutine_self(),
3045 };
3046 BlockDriverAIOCB *acb;
3047
3048 acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3049 if (!acb) {
3050 return -EIO;
3051 }
3052 qemu_coroutine_yield();
3053 return co.ret;
3054}
3055
19cb3738
FB
3056/**************************************************************/
3057/* removable device support */
3058
3059/**
3060 * Return TRUE if the media is present
3061 */
3062int bdrv_is_inserted(BlockDriverState *bs)
3063{
3064 BlockDriver *drv = bs->drv;
a1aff5bf 3065
19cb3738
FB
3066 if (!drv)
3067 return 0;
3068 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
3069 return 1;
3070 return drv->bdrv_is_inserted(bs);
19cb3738
FB
3071}
3072
3073/**
8e49ca46
MA
3074 * Return whether the media changed since the last call to this
3075 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3076 */
3077int bdrv_media_changed(BlockDriverState *bs)
3078{
3079 BlockDriver *drv = bs->drv;
19cb3738 3080
8e49ca46
MA
3081 if (drv && drv->bdrv_media_changed) {
3082 return drv->bdrv_media_changed(bs);
3083 }
3084 return -ENOTSUP;
19cb3738
FB
3085}
3086
3087/**
3088 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3089 */
fdec4404 3090void bdrv_eject(BlockDriverState *bs, int eject_flag)
19cb3738
FB
3091{
3092 BlockDriver *drv = bs->drv;
19cb3738 3093
822e1cd1
MA
3094 if (drv && drv->bdrv_eject) {
3095 drv->bdrv_eject(bs, eject_flag);
19cb3738
FB
3096 }
3097}
3098
19cb3738
FB
3099/**
3100 * Lock or unlock the media (if it is locked, the user won't be able
3101 * to eject it manually).
3102 */
025e849a 3103void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3104{
3105 BlockDriver *drv = bs->drv;
3106
025e849a 3107 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3108
025e849a
MA
3109 if (drv && drv->bdrv_lock_medium) {
3110 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3111 }
3112}
985a03b0
TS
3113
3114/* needed for generic scsi interface */
3115
3116int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3117{
3118 BlockDriver *drv = bs->drv;
3119
3120 if (drv && drv->bdrv_ioctl)
3121 return drv->bdrv_ioctl(bs, req, buf);
3122 return -ENOTSUP;
3123}
7d780669 3124
221f715d
AL
3125BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3126 unsigned long int req, void *buf,
3127 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3128{
221f715d 3129 BlockDriver *drv = bs->drv;
7d780669 3130
221f715d
AL
3131 if (drv && drv->bdrv_aio_ioctl)
3132 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3133 return NULL;
7d780669 3134}
e268ca52 3135
7b6f9300
MA
3136void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3137{
3138 bs->buffer_alignment = align;
3139}
7cd1e32a 3140
e268ca52
AL
3141void *qemu_blockalign(BlockDriverState *bs, size_t size)
3142{
3143 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3144}
7cd1e32a
LS
3145
3146void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3147{
3148 int64_t bitmap_size;
a55eb92c 3149
aaa0eb75 3150 bs->dirty_count = 0;
a55eb92c 3151 if (enable) {
c6d22830
JK
3152 if (!bs->dirty_bitmap) {
3153 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3154 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3155 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
a55eb92c 3156
7267c094 3157 bs->dirty_bitmap = g_malloc0(bitmap_size);
a55eb92c 3158 }
7cd1e32a 3159 } else {
c6d22830 3160 if (bs->dirty_bitmap) {
7267c094 3161 g_free(bs->dirty_bitmap);
c6d22830 3162 bs->dirty_bitmap = NULL;
a55eb92c 3163 }
7cd1e32a
LS
3164 }
3165}
3166
3167int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3168{
6ea44308 3169 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 3170
c6d22830
JK
3171 if (bs->dirty_bitmap &&
3172 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
3173 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3174 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a
LS
3175 } else {
3176 return 0;
3177 }
3178}
3179
a55eb92c
JK
3180void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3181 int nr_sectors)
7cd1e32a
LS
3182{
3183 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3184}
aaa0eb75
LS
3185
3186int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3187{
3188 return bs->dirty_count;
3189}
f88e1a42 3190
db593f25
MT
3191void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3192{
3193 assert(bs->in_use != in_use);
3194 bs->in_use = in_use;
3195}
3196
3197int bdrv_in_use(BlockDriverState *bs)
3198{
3199 return bs->in_use;
3200}
3201
28a7282a
LC
3202void bdrv_iostatus_enable(BlockDriverState *bs)
3203{
3204 bs->iostatus = BDRV_IOS_OK;
3205}
3206
3207/* The I/O status is only enabled if the drive explicitly
3208 * enables it _and_ the VM is configured to stop on errors */
3209bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3210{
3211 return (bs->iostatus != BDRV_IOS_INVAL &&
3212 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3213 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3214 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3215}
3216
3217void bdrv_iostatus_disable(BlockDriverState *bs)
3218{
3219 bs->iostatus = BDRV_IOS_INVAL;
3220}
3221
3222void bdrv_iostatus_reset(BlockDriverState *bs)
3223{
3224 if (bdrv_iostatus_is_enabled(bs)) {
3225 bs->iostatus = BDRV_IOS_OK;
3226 }
3227}
3228
3229/* XXX: Today this is set by device models because it makes the implementation
3230 quite simple. However, the block layer knows about the error, so it's
3231 possible to implement this without device models being involved */
3232void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3233{
3234 if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
3235 assert(error >= 0);
3236 bs->iostatus = error == ENOSPC ? BDRV_IOS_ENOSPC : BDRV_IOS_FAILED;
3237 }
3238}
3239
a597e79c
CH
3240void
3241bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3242 enum BlockAcctType type)
3243{
3244 assert(type < BDRV_MAX_IOTYPE);
3245
3246 cookie->bytes = bytes;
c488c7f6 3247 cookie->start_time_ns = get_clock();
a597e79c
CH
3248 cookie->type = type;
3249}
3250
3251void
3252bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3253{
3254 assert(cookie->type < BDRV_MAX_IOTYPE);
3255
3256 bs->nr_bytes[cookie->type] += cookie->bytes;
3257 bs->nr_ops[cookie->type]++;
c488c7f6 3258 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
3259}
3260
f88e1a42
JS
3261int bdrv_img_create(const char *filename, const char *fmt,
3262 const char *base_filename, const char *base_fmt,
3263 char *options, uint64_t img_size, int flags)
3264{
3265 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 3266 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
3267 BlockDriverState *bs = NULL;
3268 BlockDriver *drv, *proto_drv;
96df67d1 3269 BlockDriver *backing_drv = NULL;
f88e1a42
JS
3270 int ret = 0;
3271
3272 /* Find driver and parse its options */
3273 drv = bdrv_find_format(fmt);
3274 if (!drv) {
3275 error_report("Unknown file format '%s'", fmt);
4f70f249 3276 ret = -EINVAL;
f88e1a42
JS
3277 goto out;
3278 }
3279
3280 proto_drv = bdrv_find_protocol(filename);
3281 if (!proto_drv) {
3282 error_report("Unknown protocol '%s'", filename);
4f70f249 3283 ret = -EINVAL;
f88e1a42
JS
3284 goto out;
3285 }
3286
3287 create_options = append_option_parameters(create_options,
3288 drv->create_options);
3289 create_options = append_option_parameters(create_options,
3290 proto_drv->create_options);
3291
3292 /* Create parameter list with default values */
3293 param = parse_option_parameters("", create_options, param);
3294
3295 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3296
3297 /* Parse -o options */
3298 if (options) {
3299 param = parse_option_parameters(options, create_options, param);
3300 if (param == NULL) {
3301 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 3302 ret = -EINVAL;
f88e1a42
JS
3303 goto out;
3304 }
3305 }
3306
3307 if (base_filename) {
3308 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3309 base_filename)) {
3310 error_report("Backing file not supported for file format '%s'",
3311 fmt);
4f70f249 3312 ret = -EINVAL;
f88e1a42
JS
3313 goto out;
3314 }
3315 }
3316
3317 if (base_fmt) {
3318 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3319 error_report("Backing file format not supported for file "
3320 "format '%s'", fmt);
4f70f249 3321 ret = -EINVAL;
f88e1a42
JS
3322 goto out;
3323 }
3324 }
3325
792da93a
JS
3326 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3327 if (backing_file && backing_file->value.s) {
3328 if (!strcmp(filename, backing_file->value.s)) {
3329 error_report("Error: Trying to create an image with the "
3330 "same filename as the backing file");
4f70f249 3331 ret = -EINVAL;
792da93a
JS
3332 goto out;
3333 }
3334 }
3335
f88e1a42
JS
3336 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3337 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
3338 backing_drv = bdrv_find_format(backing_fmt->value.s);
3339 if (!backing_drv) {
f88e1a42
JS
3340 error_report("Unknown backing file format '%s'",
3341 backing_fmt->value.s);
4f70f249 3342 ret = -EINVAL;
f88e1a42
JS
3343 goto out;
3344 }
3345 }
3346
3347 // The size for the image must always be specified, with one exception:
3348 // If we are using a backing file, we can obtain the size from there
d220894e
KW
3349 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3350 if (size && size->value.n == -1) {
f88e1a42
JS
3351 if (backing_file && backing_file->value.s) {
3352 uint64_t size;
f88e1a42
JS
3353 char buf[32];
3354
f88e1a42
JS
3355 bs = bdrv_new("");
3356
96df67d1 3357 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
f88e1a42 3358 if (ret < 0) {
96df67d1 3359 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
3360 goto out;
3361 }
3362 bdrv_get_geometry(bs, &size);
3363 size *= 512;
3364
3365 snprintf(buf, sizeof(buf), "%" PRId64, size);
3366 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3367 } else {
3368 error_report("Image creation needs a size parameter");
4f70f249 3369 ret = -EINVAL;
f88e1a42
JS
3370 goto out;
3371 }
3372 }
3373
3374 printf("Formatting '%s', fmt=%s ", filename, fmt);
3375 print_option_parameters(param);
3376 puts("");
3377
3378 ret = bdrv_create(drv, filename, param);
3379
3380 if (ret < 0) {
3381 if (ret == -ENOTSUP) {
3382 error_report("Formatting or formatting option not supported for "
3383 "file format '%s'", fmt);
3384 } else if (ret == -EFBIG) {
3385 error_report("The image size is too large for file format '%s'",
3386 fmt);
3387 } else {
3388 error_report("%s: error while creating %s: %s", filename, fmt,
3389 strerror(-ret));
3390 }
3391 }
3392
3393out:
3394 free_option_parameters(create_options);
3395 free_option_parameters(param);
3396
3397 if (bs) {
3398 bdrv_delete(bs);
3399 }
4f70f249
JS
3400
3401 return ret;
f88e1a42 3402}