]> git.proxmox.com Git - qemu.git/blame - block.c
block: drop .bdrv_read()/.bdrv_write() emulation
[qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
d15e5465 30#include "qemu-objects.h"
68485420 31#include "qemu-coroutine.h"
fc01f7e7 32
71e72a19 33#ifdef CONFIG_BSD
7674e7bf
FB
34#include <sys/types.h>
35#include <sys/stat.h>
36#include <sys/ioctl.h>
72cf2d4f 37#include <sys/queue.h>
c5e97233 38#ifndef __DragonFly__
7674e7bf
FB
39#include <sys/disk.h>
40#endif
c5e97233 41#endif
7674e7bf 42
49dc768d
AL
43#ifdef _WIN32
44#include <windows.h>
45#endif
46
1c9805a3
SH
47#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
48
7d4b4ba5 49static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
50static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
51 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 52 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
53static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
54 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 55 BlockDriverCompletionFunc *cb, void *opaque);
b2e12bc6
CH
56static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
57 BlockDriverCompletionFunc *cb, void *opaque);
016f5cf6
AG
58static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
59 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
60static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
61 int64_t sector_num, int nb_sectors,
62 QEMUIOVector *iov);
63static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors,
65 QEMUIOVector *iov);
e7a8a783 66static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
c5fbe571
SH
67static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
68 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
1c9805a3
SH
69static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
70 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
b2a61371
SH
71static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
72 int64_t sector_num,
73 QEMUIOVector *qiov,
74 int nb_sectors,
75 BlockDriverCompletionFunc *cb,
76 void *opaque,
8c5873d6 77 bool is_write);
b2a61371 78static void coroutine_fn bdrv_co_do_rw(void *opaque);
ec530c81 79
1b7bdbc1
SH
80static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
81 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 82
8a22f02a
SH
83static QLIST_HEAD(, BlockDriver) bdrv_drivers =
84 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 85
f9092b10
MA
86/* The device to use for VM snapshots */
87static BlockDriverState *bs_snapshots;
88
eb852011
MA
89/* If non-zero, use only whitelisted block drivers */
90static int use_bdrv_whitelist;
91
9e0b22f4
SH
92#ifdef _WIN32
93static int is_windows_drive_prefix(const char *filename)
94{
95 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
96 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
97 filename[1] == ':');
98}
99
100int is_windows_drive(const char *filename)
101{
102 if (is_windows_drive_prefix(filename) &&
103 filename[2] == '\0')
104 return 1;
105 if (strstart(filename, "\\\\.\\", NULL) ||
106 strstart(filename, "//./", NULL))
107 return 1;
108 return 0;
109}
110#endif
111
112/* check if the path starts with "<protocol>:" */
113static int path_has_protocol(const char *path)
114{
115#ifdef _WIN32
116 if (is_windows_drive(path) ||
117 is_windows_drive_prefix(path)) {
118 return 0;
119 }
120#endif
121
122 return strchr(path, ':') != NULL;
123}
124
83f64091 125int path_is_absolute(const char *path)
3b0d4f61 126{
83f64091 127 const char *p;
21664424
FB
128#ifdef _WIN32
129 /* specific case for names like: "\\.\d:" */
130 if (*path == '/' || *path == '\\')
131 return 1;
132#endif
83f64091
FB
133 p = strchr(path, ':');
134 if (p)
135 p++;
136 else
137 p = path;
3b9f94e1
FB
138#ifdef _WIN32
139 return (*p == '/' || *p == '\\');
140#else
141 return (*p == '/');
142#endif
3b0d4f61
FB
143}
144
83f64091
FB
145/* if filename is absolute, just copy it to dest. Otherwise, build a
146 path to it by considering it is relative to base_path. URL are
147 supported. */
148void path_combine(char *dest, int dest_size,
149 const char *base_path,
150 const char *filename)
3b0d4f61 151{
83f64091
FB
152 const char *p, *p1;
153 int len;
154
155 if (dest_size <= 0)
156 return;
157 if (path_is_absolute(filename)) {
158 pstrcpy(dest, dest_size, filename);
159 } else {
160 p = strchr(base_path, ':');
161 if (p)
162 p++;
163 else
164 p = base_path;
3b9f94e1
FB
165 p1 = strrchr(base_path, '/');
166#ifdef _WIN32
167 {
168 const char *p2;
169 p2 = strrchr(base_path, '\\');
170 if (!p1 || p2 > p1)
171 p1 = p2;
172 }
173#endif
83f64091
FB
174 if (p1)
175 p1++;
176 else
177 p1 = base_path;
178 if (p1 > p)
179 p = p1;
180 len = p - base_path;
181 if (len > dest_size - 1)
182 len = dest_size - 1;
183 memcpy(dest, base_path, len);
184 dest[len] = '\0';
185 pstrcat(dest, dest_size, filename);
3b0d4f61 186 }
3b0d4f61
FB
187}
188
5efa9d5a 189void bdrv_register(BlockDriver *bdrv)
ea2384d3 190{
8c5873d6
SH
191 /* Block drivers without coroutine functions need emulation */
192 if (!bdrv->bdrv_co_readv) {
f9f05dc5
KW
193 bdrv->bdrv_co_readv = bdrv_co_readv_em;
194 bdrv->bdrv_co_writev = bdrv_co_writev_em;
195
f8c35c1d
SH
196 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
197 * the block driver lacks aio we need to emulate that too.
198 */
f9f05dc5
KW
199 if (!bdrv->bdrv_aio_readv) {
200 /* add AIO emulation layer */
201 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
202 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
f9f05dc5 203 }
83f64091 204 }
b2e12bc6
CH
205
206 if (!bdrv->bdrv_aio_flush)
207 bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
208
8a22f02a 209 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 210}
b338082b
FB
211
212/* create a new block device (by default it is empty) */
213BlockDriverState *bdrv_new(const char *device_name)
214{
1b7bdbc1 215 BlockDriverState *bs;
b338082b 216
7267c094 217 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 218 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 219 if (device_name[0] != '\0') {
1b7bdbc1 220 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 221 }
28a7282a 222 bdrv_iostatus_disable(bs);
b338082b
FB
223 return bs;
224}
225
ea2384d3
FB
226BlockDriver *bdrv_find_format(const char *format_name)
227{
228 BlockDriver *drv1;
8a22f02a
SH
229 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
230 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 231 return drv1;
8a22f02a 232 }
ea2384d3
FB
233 }
234 return NULL;
235}
236
eb852011
MA
237static int bdrv_is_whitelisted(BlockDriver *drv)
238{
239 static const char *whitelist[] = {
240 CONFIG_BDRV_WHITELIST
241 };
242 const char **p;
243
244 if (!whitelist[0])
245 return 1; /* no whitelist, anything goes */
246
247 for (p = whitelist; *p; p++) {
248 if (!strcmp(drv->format_name, *p)) {
249 return 1;
250 }
251 }
252 return 0;
253}
254
255BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
256{
257 BlockDriver *drv = bdrv_find_format(format_name);
258 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
259}
260
0e7e1989
KW
261int bdrv_create(BlockDriver *drv, const char* filename,
262 QEMUOptionParameter *options)
ea2384d3
FB
263{
264 if (!drv->bdrv_create)
265 return -ENOTSUP;
0e7e1989
KW
266
267 return drv->bdrv_create(filename, options);
ea2384d3
FB
268}
269
84a12e66
CH
270int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
271{
272 BlockDriver *drv;
273
b50cbabc 274 drv = bdrv_find_protocol(filename);
84a12e66 275 if (drv == NULL) {
16905d71 276 return -ENOENT;
84a12e66
CH
277 }
278
279 return bdrv_create(drv, filename, options);
280}
281
d5249393 282#ifdef _WIN32
95389c86 283void get_tmp_filename(char *filename, int size)
d5249393 284{
3b9f94e1 285 char temp_dir[MAX_PATH];
3b46e624 286
3b9f94e1
FB
287 GetTempPath(MAX_PATH, temp_dir);
288 GetTempFileName(temp_dir, "qem", 0, filename);
d5249393
FB
289}
290#else
95389c86 291void get_tmp_filename(char *filename, int size)
fc01f7e7 292{
67b915a5 293 int fd;
7ccfb2eb 294 const char *tmpdir;
d5249393 295 /* XXX: race condition possible */
0badc1ee
AJ
296 tmpdir = getenv("TMPDIR");
297 if (!tmpdir)
298 tmpdir = "/tmp";
299 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
ea2384d3
FB
300 fd = mkstemp(filename);
301 close(fd);
302}
d5249393 303#endif
fc01f7e7 304
84a12e66
CH
305/*
306 * Detect host devices. By convention, /dev/cdrom[N] is always
307 * recognized as a host CDROM.
308 */
309static BlockDriver *find_hdev_driver(const char *filename)
310{
311 int score_max = 0, score;
312 BlockDriver *drv = NULL, *d;
313
314 QLIST_FOREACH(d, &bdrv_drivers, list) {
315 if (d->bdrv_probe_device) {
316 score = d->bdrv_probe_device(filename);
317 if (score > score_max) {
318 score_max = score;
319 drv = d;
320 }
321 }
322 }
323
324 return drv;
325}
326
b50cbabc 327BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
328{
329 BlockDriver *drv1;
330 char protocol[128];
1cec71e3 331 int len;
83f64091 332 const char *p;
19cb3738 333
66f82cee
KW
334 /* TODO Drivers without bdrv_file_open must be specified explicitly */
335
39508e7a
CH
336 /*
337 * XXX(hch): we really should not let host device detection
338 * override an explicit protocol specification, but moving this
339 * later breaks access to device names with colons in them.
340 * Thanks to the brain-dead persistent naming schemes on udev-
341 * based Linux systems those actually are quite common.
342 */
343 drv1 = find_hdev_driver(filename);
344 if (drv1) {
345 return drv1;
346 }
347
9e0b22f4 348 if (!path_has_protocol(filename)) {
39508e7a 349 return bdrv_find_format("file");
84a12e66 350 }
9e0b22f4
SH
351 p = strchr(filename, ':');
352 assert(p != NULL);
1cec71e3
AL
353 len = p - filename;
354 if (len > sizeof(protocol) - 1)
355 len = sizeof(protocol) - 1;
356 memcpy(protocol, filename, len);
357 protocol[len] = '\0';
8a22f02a 358 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 359 if (drv1->protocol_name &&
8a22f02a 360 !strcmp(drv1->protocol_name, protocol)) {
83f64091 361 return drv1;
8a22f02a 362 }
83f64091
FB
363 }
364 return NULL;
365}
366
c98ac35d 367static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
368{
369 int ret, score, score_max;
370 BlockDriver *drv1, *drv;
371 uint8_t buf[2048];
372 BlockDriverState *bs;
373
f5edb014 374 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
375 if (ret < 0) {
376 *pdrv = NULL;
377 return ret;
378 }
f8ea0b00 379
08a00559
KW
380 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
381 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 382 bdrv_delete(bs);
c98ac35d
SW
383 drv = bdrv_find_format("raw");
384 if (!drv) {
385 ret = -ENOENT;
386 }
387 *pdrv = drv;
388 return ret;
1a396859 389 }
f8ea0b00 390
83f64091
FB
391 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
392 bdrv_delete(bs);
393 if (ret < 0) {
c98ac35d
SW
394 *pdrv = NULL;
395 return ret;
83f64091
FB
396 }
397
ea2384d3 398 score_max = 0;
84a12e66 399 drv = NULL;
8a22f02a 400 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
401 if (drv1->bdrv_probe) {
402 score = drv1->bdrv_probe(buf, ret, filename);
403 if (score > score_max) {
404 score_max = score;
405 drv = drv1;
406 }
0849bf08 407 }
fc01f7e7 408 }
c98ac35d
SW
409 if (!drv) {
410 ret = -ENOENT;
411 }
412 *pdrv = drv;
413 return ret;
ea2384d3
FB
414}
415
51762288
SH
416/**
417 * Set the current 'total_sectors' value
418 */
419static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
420{
421 BlockDriver *drv = bs->drv;
422
396759ad
NB
423 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
424 if (bs->sg)
425 return 0;
426
51762288
SH
427 /* query actual device if possible, otherwise just trust the hint */
428 if (drv->bdrv_getlength) {
429 int64_t length = drv->bdrv_getlength(bs);
430 if (length < 0) {
431 return length;
432 }
433 hint = length >> BDRV_SECTOR_BITS;
434 }
435
436 bs->total_sectors = hint;
437 return 0;
438}
439
c3993cdc
SH
440/**
441 * Set open flags for a given cache mode
442 *
443 * Return 0 on success, -1 if the cache mode was invalid.
444 */
445int bdrv_parse_cache_flags(const char *mode, int *flags)
446{
447 *flags &= ~BDRV_O_CACHE_MASK;
448
449 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
450 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
451 } else if (!strcmp(mode, "directsync")) {
452 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
453 } else if (!strcmp(mode, "writeback")) {
454 *flags |= BDRV_O_CACHE_WB;
455 } else if (!strcmp(mode, "unsafe")) {
456 *flags |= BDRV_O_CACHE_WB;
457 *flags |= BDRV_O_NO_FLUSH;
458 } else if (!strcmp(mode, "writethrough")) {
459 /* this is the default */
460 } else {
461 return -1;
462 }
463
464 return 0;
465}
466
57915332
KW
467/*
468 * Common part for opening disk images and files
469 */
470static int bdrv_open_common(BlockDriverState *bs, const char *filename,
471 int flags, BlockDriver *drv)
472{
473 int ret, open_flags;
474
475 assert(drv != NULL);
476
28dcee10
SH
477 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
478
66f82cee 479 bs->file = NULL;
51762288 480 bs->total_sectors = 0;
57915332
KW
481 bs->encrypted = 0;
482 bs->valid_key = 0;
483 bs->open_flags = flags;
57915332
KW
484 bs->buffer_alignment = 512;
485
486 pstrcpy(bs->filename, sizeof(bs->filename), filename);
487
488 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
489 return -ENOTSUP;
490 }
491
492 bs->drv = drv;
7267c094 493 bs->opaque = g_malloc0(drv->instance_size);
57915332 494
a6599793 495 if (flags & BDRV_O_CACHE_WB)
57915332
KW
496 bs->enable_write_cache = 1;
497
498 /*
499 * Clear flags that are internal to the block layer before opening the
500 * image.
501 */
502 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
503
504 /*
ebabb67a 505 * Snapshots should be writable.
57915332
KW
506 */
507 if (bs->is_temporary) {
508 open_flags |= BDRV_O_RDWR;
509 }
510
66f82cee
KW
511 /* Open the image, either directly or using a protocol */
512 if (drv->bdrv_file_open) {
513 ret = drv->bdrv_file_open(bs, filename, open_flags);
514 } else {
515 ret = bdrv_file_open(&bs->file, filename, open_flags);
516 if (ret >= 0) {
517 ret = drv->bdrv_open(bs, open_flags);
518 }
519 }
520
57915332
KW
521 if (ret < 0) {
522 goto free_and_fail;
523 }
524
525 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
51762288
SH
526
527 ret = refresh_total_sectors(bs, bs->total_sectors);
528 if (ret < 0) {
529 goto free_and_fail;
57915332 530 }
51762288 531
57915332
KW
532#ifndef _WIN32
533 if (bs->is_temporary) {
534 unlink(filename);
535 }
536#endif
537 return 0;
538
539free_and_fail:
66f82cee
KW
540 if (bs->file) {
541 bdrv_delete(bs->file);
542 bs->file = NULL;
543 }
7267c094 544 g_free(bs->opaque);
57915332
KW
545 bs->opaque = NULL;
546 bs->drv = NULL;
547 return ret;
548}
549
b6ce07aa
KW
550/*
551 * Opens a file using a protocol (file, host_device, nbd, ...)
552 */
83f64091 553int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 554{
83f64091 555 BlockDriverState *bs;
6db95603 556 BlockDriver *drv;
83f64091
FB
557 int ret;
558
b50cbabc 559 drv = bdrv_find_protocol(filename);
6db95603
CH
560 if (!drv) {
561 return -ENOENT;
562 }
563
83f64091 564 bs = bdrv_new("");
b6ce07aa 565 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
566 if (ret < 0) {
567 bdrv_delete(bs);
568 return ret;
3b0d4f61 569 }
71d0770c 570 bs->growable = 1;
83f64091
FB
571 *pbs = bs;
572 return 0;
573}
574
b6ce07aa
KW
575/*
576 * Opens a disk image (raw, qcow2, vmdk, ...)
577 */
d6e9098e
KW
578int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
579 BlockDriver *drv)
ea2384d3 580{
b6ce07aa 581 int ret;
712e7874 582
83f64091 583 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
584 BlockDriverState *bs1;
585 int64_t total_size;
7c96d46e 586 int is_protocol = 0;
91a073a9
KW
587 BlockDriver *bdrv_qcow2;
588 QEMUOptionParameter *options;
b6ce07aa
KW
589 char tmp_filename[PATH_MAX];
590 char backing_filename[PATH_MAX];
3b46e624 591
ea2384d3
FB
592 /* if snapshot, we create a temporary backing file and open it
593 instead of opening 'filename' directly */
33e3963e 594
ea2384d3
FB
595 /* if there is a backing file, use it */
596 bs1 = bdrv_new("");
d6e9098e 597 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 598 if (ret < 0) {
ea2384d3 599 bdrv_delete(bs1);
51d7c00c 600 return ret;
ea2384d3 601 }
3e82990b 602 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
603
604 if (bs1->drv && bs1->drv->protocol_name)
605 is_protocol = 1;
606
ea2384d3 607 bdrv_delete(bs1);
3b46e624 608
ea2384d3 609 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
7c96d46e
AL
610
611 /* Real path is meaningless for protocols */
612 if (is_protocol)
613 snprintf(backing_filename, sizeof(backing_filename),
614 "%s", filename);
114cdfa9
KS
615 else if (!realpath(filename, backing_filename))
616 return -errno;
7c96d46e 617
91a073a9
KW
618 bdrv_qcow2 = bdrv_find_format("qcow2");
619 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
620
3e82990b 621 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
622 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
623 if (drv) {
624 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
625 drv->format_name);
626 }
627
628 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 629 free_option_parameters(options);
51d7c00c
AL
630 if (ret < 0) {
631 return ret;
ea2384d3 632 }
91a073a9 633
ea2384d3 634 filename = tmp_filename;
91a073a9 635 drv = bdrv_qcow2;
ea2384d3
FB
636 bs->is_temporary = 1;
637 }
712e7874 638
b6ce07aa 639 /* Find the right image format driver */
6db95603 640 if (!drv) {
c98ac35d 641 ret = find_image_format(filename, &drv);
51d7c00c 642 }
6987307c 643
51d7c00c 644 if (!drv) {
51d7c00c 645 goto unlink_and_fail;
ea2384d3 646 }
b6ce07aa
KW
647
648 /* Open the image */
649 ret = bdrv_open_common(bs, filename, flags, drv);
650 if (ret < 0) {
6987307c
CH
651 goto unlink_and_fail;
652 }
653
b6ce07aa
KW
654 /* If there is a backing file, use it */
655 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
656 char backing_filename[PATH_MAX];
657 int back_flags;
658 BlockDriver *back_drv = NULL;
659
660 bs->backing_hd = bdrv_new("");
df2dbb4a
SH
661
662 if (path_has_protocol(bs->backing_file)) {
663 pstrcpy(backing_filename, sizeof(backing_filename),
664 bs->backing_file);
665 } else {
666 path_combine(backing_filename, sizeof(backing_filename),
667 filename, bs->backing_file);
668 }
669
670 if (bs->backing_format[0] != '\0') {
b6ce07aa 671 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 672 }
b6ce07aa
KW
673
674 /* backing files always opened read-only */
675 back_flags =
676 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
677
678 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
679 if (ret < 0) {
680 bdrv_close(bs);
681 return ret;
682 }
683 if (bs->is_temporary) {
684 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
685 } else {
686 /* base image inherits from "parent" */
687 bs->backing_hd->keep_read_only = bs->keep_read_only;
688 }
689 }
690
691 if (!bdrv_key_required(bs)) {
7d4b4ba5 692 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
693 }
694
695 return 0;
696
697unlink_and_fail:
698 if (bs->is_temporary) {
699 unlink(filename);
700 }
701 return ret;
702}
703
fc01f7e7
FB
704void bdrv_close(BlockDriverState *bs)
705{
19cb3738 706 if (bs->drv) {
f9092b10
MA
707 if (bs == bs_snapshots) {
708 bs_snapshots = NULL;
709 }
557df6ac 710 if (bs->backing_hd) {
ea2384d3 711 bdrv_delete(bs->backing_hd);
557df6ac
SH
712 bs->backing_hd = NULL;
713 }
ea2384d3 714 bs->drv->bdrv_close(bs);
7267c094 715 g_free(bs->opaque);
ea2384d3
FB
716#ifdef _WIN32
717 if (bs->is_temporary) {
718 unlink(bs->filename);
719 }
67b915a5 720#endif
ea2384d3
FB
721 bs->opaque = NULL;
722 bs->drv = NULL;
b338082b 723
66f82cee
KW
724 if (bs->file != NULL) {
725 bdrv_close(bs->file);
726 }
727
7d4b4ba5 728 bdrv_dev_change_media_cb(bs, false);
b338082b
FB
729 }
730}
731
2bc93fed
MK
732void bdrv_close_all(void)
733{
734 BlockDriverState *bs;
735
736 QTAILQ_FOREACH(bs, &bdrv_states, list) {
737 bdrv_close(bs);
738 }
739}
740
d22b2f41
RH
741/* make a BlockDriverState anonymous by removing from bdrv_state list.
742 Also, NULL terminate the device_name to prevent double remove */
743void bdrv_make_anon(BlockDriverState *bs)
744{
745 if (bs->device_name[0] != '\0') {
746 QTAILQ_REMOVE(&bdrv_states, bs, list);
747 }
748 bs->device_name[0] = '\0';
749}
750
b338082b
FB
751void bdrv_delete(BlockDriverState *bs)
752{
fa879d62 753 assert(!bs->dev);
18846dee 754
1b7bdbc1 755 /* remove from list, if necessary */
d22b2f41 756 bdrv_make_anon(bs);
34c6f050 757
b338082b 758 bdrv_close(bs);
66f82cee
KW
759 if (bs->file != NULL) {
760 bdrv_delete(bs->file);
761 }
762
f9092b10 763 assert(bs != bs_snapshots);
7267c094 764 g_free(bs);
fc01f7e7
FB
765}
766
fa879d62
MA
767int bdrv_attach_dev(BlockDriverState *bs, void *dev)
768/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 769{
fa879d62 770 if (bs->dev) {
18846dee
MA
771 return -EBUSY;
772 }
fa879d62 773 bs->dev = dev;
28a7282a 774 bdrv_iostatus_reset(bs);
18846dee
MA
775 return 0;
776}
777
fa879d62
MA
778/* TODO qdevified devices don't use this, remove when devices are qdevified */
779void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 780{
fa879d62
MA
781 if (bdrv_attach_dev(bs, dev) < 0) {
782 abort();
783 }
784}
785
786void bdrv_detach_dev(BlockDriverState *bs, void *dev)
787/* TODO change to DeviceState *dev when all users are qdevified */
788{
789 assert(bs->dev == dev);
790 bs->dev = NULL;
0e49de52
MA
791 bs->dev_ops = NULL;
792 bs->dev_opaque = NULL;
29e05f20 793 bs->buffer_alignment = 512;
18846dee
MA
794}
795
fa879d62
MA
796/* TODO change to return DeviceState * when all users are qdevified */
797void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 798{
fa879d62 799 return bs->dev;
18846dee
MA
800}
801
0e49de52
MA
802void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
803 void *opaque)
804{
805 bs->dev_ops = ops;
806 bs->dev_opaque = opaque;
2c6942fa
MA
807 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
808 bs_snapshots = NULL;
809 }
0e49de52
MA
810}
811
7d4b4ba5 812static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 813{
145feb17 814 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
7d4b4ba5 815 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
145feb17
MA
816 }
817}
818
2c6942fa
MA
819bool bdrv_dev_has_removable_media(BlockDriverState *bs)
820{
821 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
822}
823
e4def80b
MA
824bool bdrv_dev_is_tray_open(BlockDriverState *bs)
825{
826 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
827 return bs->dev_ops->is_tray_open(bs->dev_opaque);
828 }
829 return false;
830}
831
145feb17
MA
832static void bdrv_dev_resize_cb(BlockDriverState *bs)
833{
834 if (bs->dev_ops && bs->dev_ops->resize_cb) {
835 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
836 }
837}
838
f107639a
MA
839bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
840{
841 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
842 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
843 }
844 return false;
845}
846
e97fc193
AL
847/*
848 * Run consistency checks on an image
849 *
e076f338 850 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 851 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 852 * check are stored in res.
e97fc193 853 */
e076f338 854int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
e97fc193
AL
855{
856 if (bs->drv->bdrv_check == NULL) {
857 return -ENOTSUP;
858 }
859
e076f338 860 memset(res, 0, sizeof(*res));
9ac228e0 861 return bs->drv->bdrv_check(bs, res);
e97fc193
AL
862}
863
8a426614
KW
864#define COMMIT_BUF_SECTORS 2048
865
33e3963e
FB
866/* commit COW file into the raw image */
867int bdrv_commit(BlockDriverState *bs)
868{
19cb3738 869 BlockDriver *drv = bs->drv;
ee181196 870 BlockDriver *backing_drv;
8a426614
KW
871 int64_t sector, total_sectors;
872 int n, ro, open_flags;
4dca4b63 873 int ret = 0, rw_ret = 0;
8a426614 874 uint8_t *buf;
4dca4b63
NS
875 char filename[1024];
876 BlockDriverState *bs_rw, *bs_ro;
33e3963e 877
19cb3738
FB
878 if (!drv)
879 return -ENOMEDIUM;
4dca4b63
NS
880
881 if (!bs->backing_hd) {
882 return -ENOTSUP;
33e3963e
FB
883 }
884
4dca4b63
NS
885 if (bs->backing_hd->keep_read_only) {
886 return -EACCES;
887 }
ee181196
KW
888
889 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
890 ro = bs->backing_hd->read_only;
891 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
892 open_flags = bs->backing_hd->open_flags;
893
894 if (ro) {
895 /* re-open as RW */
896 bdrv_delete(bs->backing_hd);
897 bs->backing_hd = NULL;
898 bs_rw = bdrv_new("");
ee181196
KW
899 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
900 backing_drv);
4dca4b63
NS
901 if (rw_ret < 0) {
902 bdrv_delete(bs_rw);
903 /* try to re-open read-only */
904 bs_ro = bdrv_new("");
ee181196
KW
905 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
906 backing_drv);
4dca4b63
NS
907 if (ret < 0) {
908 bdrv_delete(bs_ro);
909 /* drive not functional anymore */
910 bs->drv = NULL;
911 return ret;
912 }
913 bs->backing_hd = bs_ro;
914 return rw_ret;
915 }
916 bs->backing_hd = bs_rw;
ea2384d3 917 }
33e3963e 918
6ea44308 919 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 920 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
921
922 for (sector = 0; sector < total_sectors; sector += n) {
923 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
924
925 if (bdrv_read(bs, sector, buf, n) != 0) {
926 ret = -EIO;
927 goto ro_cleanup;
928 }
929
930 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
931 ret = -EIO;
932 goto ro_cleanup;
933 }
ea2384d3 934 }
33e3963e 935 }
95389c86 936
1d44952f
CH
937 if (drv->bdrv_make_empty) {
938 ret = drv->bdrv_make_empty(bs);
939 bdrv_flush(bs);
940 }
95389c86 941
3f5075ae
CH
942 /*
943 * Make sure all data we wrote to the backing device is actually
944 * stable on disk.
945 */
946 if (bs->backing_hd)
947 bdrv_flush(bs->backing_hd);
4dca4b63
NS
948
949ro_cleanup:
7267c094 950 g_free(buf);
4dca4b63
NS
951
952 if (ro) {
953 /* re-open as RO */
954 bdrv_delete(bs->backing_hd);
955 bs->backing_hd = NULL;
956 bs_ro = bdrv_new("");
ee181196
KW
957 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
958 backing_drv);
4dca4b63
NS
959 if (ret < 0) {
960 bdrv_delete(bs_ro);
961 /* drive not functional anymore */
962 bs->drv = NULL;
963 return ret;
964 }
965 bs->backing_hd = bs_ro;
966 bs->backing_hd->keep_read_only = 0;
967 }
968
1d44952f 969 return ret;
33e3963e
FB
970}
971
6ab4b5ab
MA
972void bdrv_commit_all(void)
973{
974 BlockDriverState *bs;
975
976 QTAILQ_FOREACH(bs, &bdrv_states, list) {
977 bdrv_commit(bs);
978 }
979}
980
756e6736
KW
981/*
982 * Return values:
983 * 0 - success
984 * -EINVAL - backing format specified, but no file
985 * -ENOSPC - can't update the backing file because no space is left in the
986 * image file header
987 * -ENOTSUP - format driver doesn't support changing the backing file
988 */
989int bdrv_change_backing_file(BlockDriverState *bs,
990 const char *backing_file, const char *backing_fmt)
991{
992 BlockDriver *drv = bs->drv;
993
994 if (drv->bdrv_change_backing_file != NULL) {
995 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
996 } else {
997 return -ENOTSUP;
998 }
999}
1000
71d0770c
AL
1001static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1002 size_t size)
1003{
1004 int64_t len;
1005
1006 if (!bdrv_is_inserted(bs))
1007 return -ENOMEDIUM;
1008
1009 if (bs->growable)
1010 return 0;
1011
1012 len = bdrv_getlength(bs);
1013
fbb7b4e0
KW
1014 if (offset < 0)
1015 return -EIO;
1016
1017 if ((offset > len) || (len - offset < size))
71d0770c
AL
1018 return -EIO;
1019
1020 return 0;
1021}
1022
1023static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1024 int nb_sectors)
1025{
eb5a3165
JS
1026 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1027 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1028}
1029
e7a8a783
KW
1030static inline bool bdrv_has_async_rw(BlockDriver *drv)
1031{
1032 return drv->bdrv_co_readv != bdrv_co_readv_em
1033 || drv->bdrv_aio_readv != bdrv_aio_readv_em;
1034}
1035
1036static inline bool bdrv_has_async_flush(BlockDriver *drv)
1037{
1038 return drv->bdrv_aio_flush != bdrv_aio_flush_em;
1039}
1040
1c9805a3
SH
1041typedef struct RwCo {
1042 BlockDriverState *bs;
1043 int64_t sector_num;
1044 int nb_sectors;
1045 QEMUIOVector *qiov;
1046 bool is_write;
1047 int ret;
1048} RwCo;
1049
1050static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 1051{
1c9805a3 1052 RwCo *rwco = opaque;
ea2384d3 1053
1c9805a3
SH
1054 if (!rwco->is_write) {
1055 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1056 rwco->nb_sectors, rwco->qiov);
1057 } else {
1058 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1059 rwco->nb_sectors, rwco->qiov);
1060 }
1061}
e7a8a783 1062
1c9805a3
SH
1063/*
1064 * Process a synchronous request using coroutines
1065 */
1066static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1067 int nb_sectors, bool is_write)
1068{
1069 QEMUIOVector qiov;
1070 struct iovec iov = {
1071 .iov_base = (void *)buf,
1072 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1073 };
1074 Coroutine *co;
1075 RwCo rwco = {
1076 .bs = bs,
1077 .sector_num = sector_num,
1078 .nb_sectors = nb_sectors,
1079 .qiov = &qiov,
1080 .is_write = is_write,
1081 .ret = NOT_DONE,
1082 };
e7a8a783 1083
1c9805a3 1084 qemu_iovec_init_external(&qiov, &iov, 1);
e7a8a783 1085
1c9805a3
SH
1086 if (qemu_in_coroutine()) {
1087 /* Fast-path if already in coroutine context */
1088 bdrv_rw_co_entry(&rwco);
1089 } else {
1090 co = qemu_coroutine_create(bdrv_rw_co_entry);
1091 qemu_coroutine_enter(co, &rwco);
1092 while (rwco.ret == NOT_DONE) {
1093 qemu_aio_wait();
1094 }
1095 }
1096 return rwco.ret;
1097}
b338082b 1098
1c9805a3
SH
1099/* return < 0 if error. See bdrv_write() for the return codes */
1100int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1101 uint8_t *buf, int nb_sectors)
1102{
1103 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
fc01f7e7
FB
1104}
1105
7cd1e32a 1106static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1107 int nb_sectors, int dirty)
7cd1e32a 1108{
1109 int64_t start, end;
c6d22830 1110 unsigned long val, idx, bit;
a55eb92c 1111
6ea44308 1112 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1113 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1114
1115 for (; start <= end; start++) {
c6d22830
JK
1116 idx = start / (sizeof(unsigned long) * 8);
1117 bit = start % (sizeof(unsigned long) * 8);
1118 val = bs->dirty_bitmap[idx];
1119 if (dirty) {
6d59fec1 1120 if (!(val & (1UL << bit))) {
aaa0eb75 1121 bs->dirty_count++;
6d59fec1 1122 val |= 1UL << bit;
aaa0eb75 1123 }
c6d22830 1124 } else {
6d59fec1 1125 if (val & (1UL << bit)) {
aaa0eb75 1126 bs->dirty_count--;
6d59fec1 1127 val &= ~(1UL << bit);
aaa0eb75 1128 }
c6d22830
JK
1129 }
1130 bs->dirty_bitmap[idx] = val;
7cd1e32a 1131 }
1132}
1133
5fafdf24 1134/* Return < 0 if error. Important errors are:
19cb3738
FB
1135 -EIO generic I/O error (may happen for all errors)
1136 -ENOMEDIUM No media inserted.
1137 -EINVAL Invalid sector number or nb_sectors
1138 -EACCES Trying to write a read-only device
1139*/
5fafdf24 1140int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1141 const uint8_t *buf, int nb_sectors)
1142{
1c9805a3 1143 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
83f64091
FB
1144}
1145
eda578e5
AL
1146int bdrv_pread(BlockDriverState *bs, int64_t offset,
1147 void *buf, int count1)
83f64091 1148{
6ea44308 1149 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1150 int len, nb_sectors, count;
1151 int64_t sector_num;
9a8c4cce 1152 int ret;
83f64091
FB
1153
1154 count = count1;
1155 /* first read to align to sector start */
6ea44308 1156 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1157 if (len > count)
1158 len = count;
6ea44308 1159 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1160 if (len > 0) {
9a8c4cce
KW
1161 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1162 return ret;
6ea44308 1163 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1164 count -= len;
1165 if (count == 0)
1166 return count1;
1167 sector_num++;
1168 buf += len;
1169 }
1170
1171 /* read the sectors "in place" */
6ea44308 1172 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1173 if (nb_sectors > 0) {
9a8c4cce
KW
1174 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1175 return ret;
83f64091 1176 sector_num += nb_sectors;
6ea44308 1177 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1178 buf += len;
1179 count -= len;
1180 }
1181
1182 /* add data from the last sector */
1183 if (count > 0) {
9a8c4cce
KW
1184 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1185 return ret;
83f64091
FB
1186 memcpy(buf, tmp_buf, count);
1187 }
1188 return count1;
1189}
1190
eda578e5
AL
1191int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1192 const void *buf, int count1)
83f64091 1193{
6ea44308 1194 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1195 int len, nb_sectors, count;
1196 int64_t sector_num;
9a8c4cce 1197 int ret;
83f64091
FB
1198
1199 count = count1;
1200 /* first write to align to sector start */
6ea44308 1201 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1202 if (len > count)
1203 len = count;
6ea44308 1204 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1205 if (len > 0) {
9a8c4cce
KW
1206 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1207 return ret;
6ea44308 1208 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1209 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1210 return ret;
83f64091
FB
1211 count -= len;
1212 if (count == 0)
1213 return count1;
1214 sector_num++;
1215 buf += len;
1216 }
1217
1218 /* write the sectors "in place" */
6ea44308 1219 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1220 if (nb_sectors > 0) {
9a8c4cce
KW
1221 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1222 return ret;
83f64091 1223 sector_num += nb_sectors;
6ea44308 1224 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1225 buf += len;
1226 count -= len;
1227 }
1228
1229 /* add data from the last sector */
1230 if (count > 0) {
9a8c4cce
KW
1231 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1232 return ret;
83f64091 1233 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1234 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1235 return ret;
83f64091
FB
1236 }
1237 return count1;
1238}
83f64091 1239
f08145fe
KW
1240/*
1241 * Writes to the file and ensures that no writes are reordered across this
1242 * request (acts as a barrier)
1243 *
1244 * Returns 0 on success, -errno in error cases.
1245 */
1246int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1247 const void *buf, int count)
1248{
1249 int ret;
1250
1251 ret = bdrv_pwrite(bs, offset, buf, count);
1252 if (ret < 0) {
1253 return ret;
1254 }
1255
92196b2f
SH
1256 /* No flush needed for cache modes that use O_DSYNC */
1257 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
f08145fe
KW
1258 bdrv_flush(bs);
1259 }
1260
1261 return 0;
1262}
1263
c5fbe571
SH
1264/*
1265 * Handle a read request in coroutine context
1266 */
1267static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1268 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
da1fa91d
KW
1269{
1270 BlockDriver *drv = bs->drv;
1271
da1fa91d
KW
1272 if (!drv) {
1273 return -ENOMEDIUM;
1274 }
1275 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1276 return -EIO;
1277 }
1278
1279 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1280}
1281
c5fbe571 1282int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
1283 int nb_sectors, QEMUIOVector *qiov)
1284{
c5fbe571 1285 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 1286
c5fbe571
SH
1287 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1288}
1289
1290/*
1291 * Handle a write request in coroutine context
1292 */
1293static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1294 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1295{
1296 BlockDriver *drv = bs->drv;
6b7cb247 1297 int ret;
da1fa91d
KW
1298
1299 if (!bs->drv) {
1300 return -ENOMEDIUM;
1301 }
1302 if (bs->read_only) {
1303 return -EACCES;
1304 }
1305 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1306 return -EIO;
1307 }
1308
6b7cb247
SH
1309 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1310
da1fa91d
KW
1311 if (bs->dirty_bitmap) {
1312 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1313 }
1314
1315 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1316 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1317 }
1318
6b7cb247 1319 return ret;
da1fa91d
KW
1320}
1321
c5fbe571
SH
1322int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1323 int nb_sectors, QEMUIOVector *qiov)
1324{
1325 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1326
1327 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1328}
1329
83f64091
FB
1330/**
1331 * Truncate file to 'offset' bytes (needed only for file protocols)
1332 */
1333int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1334{
1335 BlockDriver *drv = bs->drv;
51762288 1336 int ret;
83f64091 1337 if (!drv)
19cb3738 1338 return -ENOMEDIUM;
83f64091
FB
1339 if (!drv->bdrv_truncate)
1340 return -ENOTSUP;
59f2689d
NS
1341 if (bs->read_only)
1342 return -EACCES;
8591675f
MT
1343 if (bdrv_in_use(bs))
1344 return -EBUSY;
51762288
SH
1345 ret = drv->bdrv_truncate(bs, offset);
1346 if (ret == 0) {
1347 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 1348 bdrv_dev_resize_cb(bs);
51762288
SH
1349 }
1350 return ret;
83f64091
FB
1351}
1352
4a1d5e1f
FZ
1353/**
1354 * Length of a allocated file in bytes. Sparse files are counted by actual
1355 * allocated space. Return < 0 if error or unknown.
1356 */
1357int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1358{
1359 BlockDriver *drv = bs->drv;
1360 if (!drv) {
1361 return -ENOMEDIUM;
1362 }
1363 if (drv->bdrv_get_allocated_file_size) {
1364 return drv->bdrv_get_allocated_file_size(bs);
1365 }
1366 if (bs->file) {
1367 return bdrv_get_allocated_file_size(bs->file);
1368 }
1369 return -ENOTSUP;
1370}
1371
83f64091
FB
1372/**
1373 * Length of a file in bytes. Return < 0 if error or unknown.
1374 */
1375int64_t bdrv_getlength(BlockDriverState *bs)
1376{
1377 BlockDriver *drv = bs->drv;
1378 if (!drv)
19cb3738 1379 return -ENOMEDIUM;
51762288 1380
2c6942fa 1381 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
1382 if (drv->bdrv_getlength) {
1383 return drv->bdrv_getlength(bs);
1384 }
83f64091 1385 }
46a4e4e6 1386 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
1387}
1388
19cb3738 1389/* return 0 as number of sectors if no device present or error */
96b8f136 1390void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 1391{
19cb3738
FB
1392 int64_t length;
1393 length = bdrv_getlength(bs);
1394 if (length < 0)
1395 length = 0;
1396 else
6ea44308 1397 length = length >> BDRV_SECTOR_BITS;
19cb3738 1398 *nb_sectors_ptr = length;
fc01f7e7 1399}
cf98951b 1400
f3d54fc4
AL
1401struct partition {
1402 uint8_t boot_ind; /* 0x80 - active */
1403 uint8_t head; /* starting head */
1404 uint8_t sector; /* starting sector */
1405 uint8_t cyl; /* starting cylinder */
1406 uint8_t sys_ind; /* What partition type */
1407 uint8_t end_head; /* end head */
1408 uint8_t end_sector; /* end sector */
1409 uint8_t end_cyl; /* end cylinder */
1410 uint32_t start_sect; /* starting sector counting from 0 */
1411 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 1412} QEMU_PACKED;
f3d54fc4
AL
1413
1414/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1415static int guess_disk_lchs(BlockDriverState *bs,
1416 int *pcylinders, int *pheads, int *psectors)
1417{
eb5a3165 1418 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
1419 int ret, i, heads, sectors, cylinders;
1420 struct partition *p;
1421 uint32_t nr_sects;
a38131b6 1422 uint64_t nb_sectors;
f3d54fc4
AL
1423
1424 bdrv_get_geometry(bs, &nb_sectors);
1425
1426 ret = bdrv_read(bs, 0, buf, 1);
1427 if (ret < 0)
1428 return -1;
1429 /* test msdos magic */
1430 if (buf[510] != 0x55 || buf[511] != 0xaa)
1431 return -1;
1432 for(i = 0; i < 4; i++) {
1433 p = ((struct partition *)(buf + 0x1be)) + i;
1434 nr_sects = le32_to_cpu(p->nr_sects);
1435 if (nr_sects && p->end_head) {
1436 /* We make the assumption that the partition terminates on
1437 a cylinder boundary */
1438 heads = p->end_head + 1;
1439 sectors = p->end_sector & 63;
1440 if (sectors == 0)
1441 continue;
1442 cylinders = nb_sectors / (heads * sectors);
1443 if (cylinders < 1 || cylinders > 16383)
1444 continue;
1445 *pheads = heads;
1446 *psectors = sectors;
1447 *pcylinders = cylinders;
1448#if 0
1449 printf("guessed geometry: LCHS=%d %d %d\n",
1450 cylinders, heads, sectors);
1451#endif
1452 return 0;
1453 }
1454 }
1455 return -1;
1456}
1457
1458void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1459{
1460 int translation, lba_detected = 0;
1461 int cylinders, heads, secs;
a38131b6 1462 uint64_t nb_sectors;
f3d54fc4
AL
1463
1464 /* if a geometry hint is available, use it */
1465 bdrv_get_geometry(bs, &nb_sectors);
1466 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1467 translation = bdrv_get_translation_hint(bs);
1468 if (cylinders != 0) {
1469 *pcyls = cylinders;
1470 *pheads = heads;
1471 *psecs = secs;
1472 } else {
1473 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1474 if (heads > 16) {
1475 /* if heads > 16, it means that a BIOS LBA
1476 translation was active, so the default
1477 hardware geometry is OK */
1478 lba_detected = 1;
1479 goto default_geometry;
1480 } else {
1481 *pcyls = cylinders;
1482 *pheads = heads;
1483 *psecs = secs;
1484 /* disable any translation to be in sync with
1485 the logical geometry */
1486 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1487 bdrv_set_translation_hint(bs,
1488 BIOS_ATA_TRANSLATION_NONE);
1489 }
1490 }
1491 } else {
1492 default_geometry:
1493 /* if no geometry, use a standard physical disk geometry */
1494 cylinders = nb_sectors / (16 * 63);
1495
1496 if (cylinders > 16383)
1497 cylinders = 16383;
1498 else if (cylinders < 2)
1499 cylinders = 2;
1500 *pcyls = cylinders;
1501 *pheads = 16;
1502 *psecs = 63;
1503 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1504 if ((*pcyls * *pheads) <= 131072) {
1505 bdrv_set_translation_hint(bs,
1506 BIOS_ATA_TRANSLATION_LARGE);
1507 } else {
1508 bdrv_set_translation_hint(bs,
1509 BIOS_ATA_TRANSLATION_LBA);
1510 }
1511 }
1512 }
1513 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1514 }
1515}
1516
5fafdf24 1517void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
1518 int cyls, int heads, int secs)
1519{
1520 bs->cyls = cyls;
1521 bs->heads = heads;
1522 bs->secs = secs;
1523}
1524
46d4767d
FB
1525void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1526{
1527 bs->translation = translation;
1528}
1529
5fafdf24 1530void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
1531 int *pcyls, int *pheads, int *psecs)
1532{
1533 *pcyls = bs->cyls;
1534 *pheads = bs->heads;
1535 *psecs = bs->secs;
1536}
1537
5bbdbb46
BS
1538/* Recognize floppy formats */
1539typedef struct FDFormat {
1540 FDriveType drive;
1541 uint8_t last_sect;
1542 uint8_t max_track;
1543 uint8_t max_head;
1544} FDFormat;
1545
1546static const FDFormat fd_formats[] = {
1547 /* First entry is default format */
1548 /* 1.44 MB 3"1/2 floppy disks */
1549 { FDRIVE_DRV_144, 18, 80, 1, },
1550 { FDRIVE_DRV_144, 20, 80, 1, },
1551 { FDRIVE_DRV_144, 21, 80, 1, },
1552 { FDRIVE_DRV_144, 21, 82, 1, },
1553 { FDRIVE_DRV_144, 21, 83, 1, },
1554 { FDRIVE_DRV_144, 22, 80, 1, },
1555 { FDRIVE_DRV_144, 23, 80, 1, },
1556 { FDRIVE_DRV_144, 24, 80, 1, },
1557 /* 2.88 MB 3"1/2 floppy disks */
1558 { FDRIVE_DRV_288, 36, 80, 1, },
1559 { FDRIVE_DRV_288, 39, 80, 1, },
1560 { FDRIVE_DRV_288, 40, 80, 1, },
1561 { FDRIVE_DRV_288, 44, 80, 1, },
1562 { FDRIVE_DRV_288, 48, 80, 1, },
1563 /* 720 kB 3"1/2 floppy disks */
1564 { FDRIVE_DRV_144, 9, 80, 1, },
1565 { FDRIVE_DRV_144, 10, 80, 1, },
1566 { FDRIVE_DRV_144, 10, 82, 1, },
1567 { FDRIVE_DRV_144, 10, 83, 1, },
1568 { FDRIVE_DRV_144, 13, 80, 1, },
1569 { FDRIVE_DRV_144, 14, 80, 1, },
1570 /* 1.2 MB 5"1/4 floppy disks */
1571 { FDRIVE_DRV_120, 15, 80, 1, },
1572 { FDRIVE_DRV_120, 18, 80, 1, },
1573 { FDRIVE_DRV_120, 18, 82, 1, },
1574 { FDRIVE_DRV_120, 18, 83, 1, },
1575 { FDRIVE_DRV_120, 20, 80, 1, },
1576 /* 720 kB 5"1/4 floppy disks */
1577 { FDRIVE_DRV_120, 9, 80, 1, },
1578 { FDRIVE_DRV_120, 11, 80, 1, },
1579 /* 360 kB 5"1/4 floppy disks */
1580 { FDRIVE_DRV_120, 9, 40, 1, },
1581 { FDRIVE_DRV_120, 9, 40, 0, },
1582 { FDRIVE_DRV_120, 10, 41, 1, },
1583 { FDRIVE_DRV_120, 10, 42, 1, },
1584 /* 320 kB 5"1/4 floppy disks */
1585 { FDRIVE_DRV_120, 8, 40, 1, },
1586 { FDRIVE_DRV_120, 8, 40, 0, },
1587 /* 360 kB must match 5"1/4 better than 3"1/2... */
1588 { FDRIVE_DRV_144, 9, 80, 0, },
1589 /* end */
1590 { FDRIVE_DRV_NONE, -1, -1, 0, },
1591};
1592
1593void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1594 int *max_track, int *last_sect,
1595 FDriveType drive_in, FDriveType *drive)
1596{
1597 const FDFormat *parse;
1598 uint64_t nb_sectors, size;
1599 int i, first_match, match;
1600
1601 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1602 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1603 /* User defined disk */
1604 } else {
1605 bdrv_get_geometry(bs, &nb_sectors);
1606 match = -1;
1607 first_match = -1;
1608 for (i = 0; ; i++) {
1609 parse = &fd_formats[i];
1610 if (parse->drive == FDRIVE_DRV_NONE) {
1611 break;
1612 }
1613 if (drive_in == parse->drive ||
1614 drive_in == FDRIVE_DRV_NONE) {
1615 size = (parse->max_head + 1) * parse->max_track *
1616 parse->last_sect;
1617 if (nb_sectors == size) {
1618 match = i;
1619 break;
1620 }
1621 if (first_match == -1) {
1622 first_match = i;
1623 }
1624 }
1625 }
1626 if (match == -1) {
1627 if (first_match == -1) {
1628 match = 1;
1629 } else {
1630 match = first_match;
1631 }
1632 parse = &fd_formats[match];
1633 }
1634 *nb_heads = parse->max_head + 1;
1635 *max_track = parse->max_track;
1636 *last_sect = parse->last_sect;
1637 *drive = parse->drive;
1638 }
1639}
1640
46d4767d
FB
1641int bdrv_get_translation_hint(BlockDriverState *bs)
1642{
1643 return bs->translation;
1644}
1645
abd7f68d
MA
1646void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1647 BlockErrorAction on_write_error)
1648{
1649 bs->on_read_error = on_read_error;
1650 bs->on_write_error = on_write_error;
1651}
1652
1653BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1654{
1655 return is_read ? bs->on_read_error : bs->on_write_error;
1656}
1657
b338082b
FB
1658int bdrv_is_read_only(BlockDriverState *bs)
1659{
1660 return bs->read_only;
1661}
1662
985a03b0
TS
1663int bdrv_is_sg(BlockDriverState *bs)
1664{
1665 return bs->sg;
1666}
1667
e900a7b7
CH
1668int bdrv_enable_write_cache(BlockDriverState *bs)
1669{
1670 return bs->enable_write_cache;
1671}
1672
ea2384d3
FB
1673int bdrv_is_encrypted(BlockDriverState *bs)
1674{
1675 if (bs->backing_hd && bs->backing_hd->encrypted)
1676 return 1;
1677 return bs->encrypted;
1678}
1679
c0f4ce77
AL
1680int bdrv_key_required(BlockDriverState *bs)
1681{
1682 BlockDriverState *backing_hd = bs->backing_hd;
1683
1684 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1685 return 1;
1686 return (bs->encrypted && !bs->valid_key);
1687}
1688
ea2384d3
FB
1689int bdrv_set_key(BlockDriverState *bs, const char *key)
1690{
1691 int ret;
1692 if (bs->backing_hd && bs->backing_hd->encrypted) {
1693 ret = bdrv_set_key(bs->backing_hd, key);
1694 if (ret < 0)
1695 return ret;
1696 if (!bs->encrypted)
1697 return 0;
1698 }
fd04a2ae
SH
1699 if (!bs->encrypted) {
1700 return -EINVAL;
1701 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1702 return -ENOMEDIUM;
1703 }
c0f4ce77 1704 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
1705 if (ret < 0) {
1706 bs->valid_key = 0;
1707 } else if (!bs->valid_key) {
1708 bs->valid_key = 1;
1709 /* call the change callback now, we skipped it on open */
7d4b4ba5 1710 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 1711 }
c0f4ce77 1712 return ret;
ea2384d3
FB
1713}
1714
1715void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1716{
19cb3738 1717 if (!bs->drv) {
ea2384d3
FB
1718 buf[0] = '\0';
1719 } else {
1720 pstrcpy(buf, buf_size, bs->drv->format_name);
1721 }
1722}
1723
5fafdf24 1724void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
1725 void *opaque)
1726{
1727 BlockDriver *drv;
1728
8a22f02a 1729 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
1730 it(opaque, drv->format_name);
1731 }
1732}
1733
b338082b
FB
1734BlockDriverState *bdrv_find(const char *name)
1735{
1736 BlockDriverState *bs;
1737
1b7bdbc1
SH
1738 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1739 if (!strcmp(name, bs->device_name)) {
b338082b 1740 return bs;
1b7bdbc1 1741 }
b338082b
FB
1742 }
1743 return NULL;
1744}
1745
2f399b0a
MA
1746BlockDriverState *bdrv_next(BlockDriverState *bs)
1747{
1748 if (!bs) {
1749 return QTAILQ_FIRST(&bdrv_states);
1750 }
1751 return QTAILQ_NEXT(bs, list);
1752}
1753
51de9760 1754void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
1755{
1756 BlockDriverState *bs;
1757
1b7bdbc1 1758 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 1759 it(opaque, bs);
81d0912d
FB
1760 }
1761}
1762
ea2384d3
FB
1763const char *bdrv_get_device_name(BlockDriverState *bs)
1764{
1765 return bs->device_name;
1766}
1767
205ef796 1768int bdrv_flush(BlockDriverState *bs)
7a6cba61 1769{
016f5cf6 1770 if (bs->open_flags & BDRV_O_NO_FLUSH) {
205ef796
KW
1771 return 0;
1772 }
1773
e7a8a783
KW
1774 if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
1775 return bdrv_co_flush_em(bs);
1776 }
1777
205ef796
KW
1778 if (bs->drv && bs->drv->bdrv_flush) {
1779 return bs->drv->bdrv_flush(bs);
016f5cf6
AG
1780 }
1781
205ef796
KW
1782 /*
1783 * Some block drivers always operate in either writethrough or unsafe mode
1784 * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1785 * the server works (because the behaviour is hardcoded or depends on
1786 * server-side configuration), so we can't ensure that everything is safe
1787 * on disk. Returning an error doesn't work because that would break guests
1788 * even if the server operates in writethrough mode.
1789 *
1790 * Let's hope the user knows what he's doing.
1791 */
1792 return 0;
7a6cba61
PB
1793}
1794
c6ca28d6
AL
1795void bdrv_flush_all(void)
1796{
1797 BlockDriverState *bs;
1798
1b7bdbc1 1799 QTAILQ_FOREACH(bs, &bdrv_states, list) {
c602a489 1800 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
c6ca28d6 1801 bdrv_flush(bs);
1b7bdbc1
SH
1802 }
1803 }
c6ca28d6
AL
1804}
1805
f2feebbd
KW
1806int bdrv_has_zero_init(BlockDriverState *bs)
1807{
1808 assert(bs->drv);
1809
336c1c12
KW
1810 if (bs->drv->bdrv_has_zero_init) {
1811 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
1812 }
1813
1814 return 1;
1815}
1816
bb8bf76f
CH
1817int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1818{
1819 if (!bs->drv) {
1820 return -ENOMEDIUM;
1821 }
1822 if (!bs->drv->bdrv_discard) {
1823 return 0;
1824 }
1825 return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1826}
1827
f58c7b35
TS
1828/*
1829 * Returns true iff the specified sector is present in the disk image. Drivers
1830 * not implementing the functionality are assumed to not support backing files,
1831 * hence all their sectors are reported as allocated.
1832 *
1833 * 'pnum' is set to the number of sectors (including and immediately following
1834 * the specified sector) that are known to be in the same
1835 * allocated/unallocated state.
1836 *
1837 * 'nb_sectors' is the max value 'pnum' should be set to.
1838 */
1839int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1840 int *pnum)
1841{
1842 int64_t n;
1843 if (!bs->drv->bdrv_is_allocated) {
1844 if (sector_num >= bs->total_sectors) {
1845 *pnum = 0;
1846 return 0;
1847 }
1848 n = bs->total_sectors - sector_num;
1849 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1850 return 1;
1851 }
1852 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1853}
1854
2582bfed
LC
1855void bdrv_mon_event(const BlockDriverState *bdrv,
1856 BlockMonEventAction action, int is_read)
1857{
1858 QObject *data;
1859 const char *action_str;
1860
1861 switch (action) {
1862 case BDRV_ACTION_REPORT:
1863 action_str = "report";
1864 break;
1865 case BDRV_ACTION_IGNORE:
1866 action_str = "ignore";
1867 break;
1868 case BDRV_ACTION_STOP:
1869 action_str = "stop";
1870 break;
1871 default:
1872 abort();
1873 }
1874
1875 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1876 bdrv->device_name,
1877 action_str,
1878 is_read ? "read" : "write");
1879 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1880
1881 qobject_decref(data);
1882}
1883
d15e5465 1884static void bdrv_print_dict(QObject *obj, void *opaque)
b338082b 1885{
d15e5465
LC
1886 QDict *bs_dict;
1887 Monitor *mon = opaque;
1888
1889 bs_dict = qobject_to_qdict(obj);
1890
d8aeeb31 1891 monitor_printf(mon, "%s: removable=%d",
d15e5465 1892 qdict_get_str(bs_dict, "device"),
d15e5465
LC
1893 qdict_get_bool(bs_dict, "removable"));
1894
1895 if (qdict_get_bool(bs_dict, "removable")) {
1896 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
e4def80b
MA
1897 monitor_printf(mon, " tray-open=%d",
1898 qdict_get_bool(bs_dict, "tray-open"));
d15e5465 1899 }
d2078cc2
LC
1900
1901 if (qdict_haskey(bs_dict, "io-status")) {
1902 monitor_printf(mon, " io-status=%s", qdict_get_str(bs_dict, "io-status"));
1903 }
1904
d15e5465
LC
1905 if (qdict_haskey(bs_dict, "inserted")) {
1906 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1907
1908 monitor_printf(mon, " file=");
1909 monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1910 if (qdict_haskey(qdict, "backing_file")) {
1911 monitor_printf(mon, " backing_file=");
1912 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1913 }
1914 monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1915 qdict_get_bool(qdict, "ro"),
1916 qdict_get_str(qdict, "drv"),
1917 qdict_get_bool(qdict, "encrypted"));
1918 } else {
1919 monitor_printf(mon, " [not inserted]");
1920 }
1921
1922 monitor_printf(mon, "\n");
1923}
1924
1925void bdrv_info_print(Monitor *mon, const QObject *data)
1926{
1927 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1928}
1929
f04ef601
LC
1930static const char *const io_status_name[BDRV_IOS_MAX] = {
1931 [BDRV_IOS_OK] = "ok",
1932 [BDRV_IOS_FAILED] = "failed",
1933 [BDRV_IOS_ENOSPC] = "nospace",
1934};
1935
d15e5465
LC
1936void bdrv_info(Monitor *mon, QObject **ret_data)
1937{
1938 QList *bs_list;
b338082b
FB
1939 BlockDriverState *bs;
1940
d15e5465
LC
1941 bs_list = qlist_new();
1942
1b7bdbc1 1943 QTAILQ_FOREACH(bs, &bdrv_states, list) {
d15e5465 1944 QObject *bs_obj;
e4def80b 1945 QDict *bs_dict;
d15e5465 1946
d8aeeb31 1947 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
d15e5465 1948 "'removable': %i, 'locked': %i }",
2c6942fa
MA
1949 bs->device_name,
1950 bdrv_dev_has_removable_media(bs),
f107639a 1951 bdrv_dev_is_medium_locked(bs));
e4def80b 1952 bs_dict = qobject_to_qdict(bs_obj);
d15e5465 1953
e4def80b
MA
1954 if (bdrv_dev_has_removable_media(bs)) {
1955 qdict_put(bs_dict, "tray-open",
1956 qbool_from_int(bdrv_dev_is_tray_open(bs)));
1957 }
f04ef601
LC
1958
1959 if (bdrv_iostatus_is_enabled(bs)) {
1960 qdict_put(bs_dict, "io-status",
1961 qstring_from_str(io_status_name[bs->iostatus]));
1962 }
1963
19cb3738 1964 if (bs->drv) {
d15e5465 1965 QObject *obj;
d15e5465
LC
1966
1967 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1968 "'encrypted': %i }",
1969 bs->filename, bs->read_only,
1970 bs->drv->format_name,
1971 bdrv_is_encrypted(bs));
fef30743 1972 if (bs->backing_file[0] != '\0') {
d15e5465
LC
1973 QDict *qdict = qobject_to_qdict(obj);
1974 qdict_put(qdict, "backing_file",
1975 qstring_from_str(bs->backing_file));
376253ec 1976 }
d15e5465
LC
1977
1978 qdict_put_obj(bs_dict, "inserted", obj);
b338082b 1979 }
d15e5465 1980 qlist_append_obj(bs_list, bs_obj);
b338082b 1981 }
d15e5465
LC
1982
1983 *ret_data = QOBJECT(bs_list);
b338082b 1984}
a36e69dd 1985
218a536a 1986static void bdrv_stats_iter(QObject *data, void *opaque)
a36e69dd 1987{
218a536a
LC
1988 QDict *qdict;
1989 Monitor *mon = opaque;
1990
1991 qdict = qobject_to_qdict(data);
1992 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1993
1994 qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1995 monitor_printf(mon, " rd_bytes=%" PRId64
1996 " wr_bytes=%" PRId64
1997 " rd_operations=%" PRId64
1998 " wr_operations=%" PRId64
e8045d67 1999 " flush_operations=%" PRId64
c488c7f6
CH
2000 " wr_total_time_ns=%" PRId64
2001 " rd_total_time_ns=%" PRId64
2002 " flush_total_time_ns=%" PRId64
218a536a
LC
2003 "\n",
2004 qdict_get_int(qdict, "rd_bytes"),
2005 qdict_get_int(qdict, "wr_bytes"),
2006 qdict_get_int(qdict, "rd_operations"),
e8045d67 2007 qdict_get_int(qdict, "wr_operations"),
c488c7f6
CH
2008 qdict_get_int(qdict, "flush_operations"),
2009 qdict_get_int(qdict, "wr_total_time_ns"),
2010 qdict_get_int(qdict, "rd_total_time_ns"),
2011 qdict_get_int(qdict, "flush_total_time_ns"));
218a536a
LC
2012}
2013
2014void bdrv_stats_print(Monitor *mon, const QObject *data)
2015{
2016 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
2017}
2018
294cc35f
KW
2019static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
2020{
2021 QObject *res;
2022 QDict *dict;
2023
2024 res = qobject_from_jsonf("{ 'stats': {"
2025 "'rd_bytes': %" PRId64 ","
2026 "'wr_bytes': %" PRId64 ","
2027 "'rd_operations': %" PRId64 ","
2028 "'wr_operations': %" PRId64 ","
e8045d67 2029 "'wr_highest_offset': %" PRId64 ","
c488c7f6
CH
2030 "'flush_operations': %" PRId64 ","
2031 "'wr_total_time_ns': %" PRId64 ","
2032 "'rd_total_time_ns': %" PRId64 ","
2033 "'flush_total_time_ns': %" PRId64
294cc35f 2034 "} }",
a597e79c
CH
2035 bs->nr_bytes[BDRV_ACCT_READ],
2036 bs->nr_bytes[BDRV_ACCT_WRITE],
2037 bs->nr_ops[BDRV_ACCT_READ],
2038 bs->nr_ops[BDRV_ACCT_WRITE],
5ffbbc67 2039 bs->wr_highest_sector *
e8045d67 2040 (uint64_t)BDRV_SECTOR_SIZE,
c488c7f6
CH
2041 bs->nr_ops[BDRV_ACCT_FLUSH],
2042 bs->total_time_ns[BDRV_ACCT_WRITE],
2043 bs->total_time_ns[BDRV_ACCT_READ],
2044 bs->total_time_ns[BDRV_ACCT_FLUSH]);
294cc35f
KW
2045 dict = qobject_to_qdict(res);
2046
2047 if (*bs->device_name) {
2048 qdict_put(dict, "device", qstring_from_str(bs->device_name));
2049 }
2050
2051 if (bs->file) {
2052 QObject *parent = bdrv_info_stats_bs(bs->file);
2053 qdict_put_obj(dict, "parent", parent);
2054 }
2055
2056 return res;
2057}
2058
218a536a
LC
2059void bdrv_info_stats(Monitor *mon, QObject **ret_data)
2060{
2061 QObject *obj;
2062 QList *devices;
a36e69dd
TS
2063 BlockDriverState *bs;
2064
218a536a
LC
2065 devices = qlist_new();
2066
1b7bdbc1 2067 QTAILQ_FOREACH(bs, &bdrv_states, list) {
294cc35f 2068 obj = bdrv_info_stats_bs(bs);
218a536a 2069 qlist_append_obj(devices, obj);
a36e69dd 2070 }
218a536a
LC
2071
2072 *ret_data = QOBJECT(devices);
a36e69dd 2073}
ea2384d3 2074
045df330
AL
2075const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2076{
2077 if (bs->backing_hd && bs->backing_hd->encrypted)
2078 return bs->backing_file;
2079 else if (bs->encrypted)
2080 return bs->filename;
2081 else
2082 return NULL;
2083}
2084
5fafdf24 2085void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
2086 char *filename, int filename_size)
2087{
b783e409 2088 if (!bs->backing_file) {
83f64091
FB
2089 pstrcpy(filename, filename_size, "");
2090 } else {
2091 pstrcpy(filename, filename_size, bs->backing_file);
2092 }
2093}
2094
5fafdf24 2095int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
2096 const uint8_t *buf, int nb_sectors)
2097{
2098 BlockDriver *drv = bs->drv;
2099 if (!drv)
19cb3738 2100 return -ENOMEDIUM;
faea38e7
FB
2101 if (!drv->bdrv_write_compressed)
2102 return -ENOTSUP;
fbb7b4e0
KW
2103 if (bdrv_check_request(bs, sector_num, nb_sectors))
2104 return -EIO;
a55eb92c 2105
c6d22830 2106 if (bs->dirty_bitmap) {
7cd1e32a 2107 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2108 }
a55eb92c 2109
faea38e7
FB
2110 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2111}
3b46e624 2112
faea38e7
FB
2113int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2114{
2115 BlockDriver *drv = bs->drv;
2116 if (!drv)
19cb3738 2117 return -ENOMEDIUM;
faea38e7
FB
2118 if (!drv->bdrv_get_info)
2119 return -ENOTSUP;
2120 memset(bdi, 0, sizeof(*bdi));
2121 return drv->bdrv_get_info(bs, bdi);
2122}
2123
45566e9c
CH
2124int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2125 int64_t pos, int size)
178e08a5
AL
2126{
2127 BlockDriver *drv = bs->drv;
2128 if (!drv)
2129 return -ENOMEDIUM;
7cdb1f6d
MK
2130 if (drv->bdrv_save_vmstate)
2131 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2132 if (bs->file)
2133 return bdrv_save_vmstate(bs->file, buf, pos, size);
2134 return -ENOTSUP;
178e08a5
AL
2135}
2136
45566e9c
CH
2137int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2138 int64_t pos, int size)
178e08a5
AL
2139{
2140 BlockDriver *drv = bs->drv;
2141 if (!drv)
2142 return -ENOMEDIUM;
7cdb1f6d
MK
2143 if (drv->bdrv_load_vmstate)
2144 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2145 if (bs->file)
2146 return bdrv_load_vmstate(bs->file, buf, pos, size);
2147 return -ENOTSUP;
178e08a5
AL
2148}
2149
8b9b0cc2
KW
2150void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2151{
2152 BlockDriver *drv = bs->drv;
2153
2154 if (!drv || !drv->bdrv_debug_event) {
2155 return;
2156 }
2157
2158 return drv->bdrv_debug_event(bs, event);
2159
2160}
2161
faea38e7
FB
2162/**************************************************************/
2163/* handling of snapshots */
2164
feeee5ac
MDCF
2165int bdrv_can_snapshot(BlockDriverState *bs)
2166{
2167 BlockDriver *drv = bs->drv;
07b70bfb 2168 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2169 return 0;
2170 }
2171
2172 if (!drv->bdrv_snapshot_create) {
2173 if (bs->file != NULL) {
2174 return bdrv_can_snapshot(bs->file);
2175 }
2176 return 0;
2177 }
2178
2179 return 1;
2180}
2181
199630b6
BS
2182int bdrv_is_snapshot(BlockDriverState *bs)
2183{
2184 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2185}
2186
f9092b10
MA
2187BlockDriverState *bdrv_snapshots(void)
2188{
2189 BlockDriverState *bs;
2190
3ac906f7 2191 if (bs_snapshots) {
f9092b10 2192 return bs_snapshots;
3ac906f7 2193 }
f9092b10
MA
2194
2195 bs = NULL;
2196 while ((bs = bdrv_next(bs))) {
2197 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2198 bs_snapshots = bs;
2199 return bs;
f9092b10
MA
2200 }
2201 }
2202 return NULL;
f9092b10
MA
2203}
2204
5fafdf24 2205int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2206 QEMUSnapshotInfo *sn_info)
2207{
2208 BlockDriver *drv = bs->drv;
2209 if (!drv)
19cb3738 2210 return -ENOMEDIUM;
7cdb1f6d
MK
2211 if (drv->bdrv_snapshot_create)
2212 return drv->bdrv_snapshot_create(bs, sn_info);
2213 if (bs->file)
2214 return bdrv_snapshot_create(bs->file, sn_info);
2215 return -ENOTSUP;
faea38e7
FB
2216}
2217
5fafdf24 2218int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2219 const char *snapshot_id)
2220{
2221 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2222 int ret, open_ret;
2223
faea38e7 2224 if (!drv)
19cb3738 2225 return -ENOMEDIUM;
7cdb1f6d
MK
2226 if (drv->bdrv_snapshot_goto)
2227 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2228
2229 if (bs->file) {
2230 drv->bdrv_close(bs);
2231 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2232 open_ret = drv->bdrv_open(bs, bs->open_flags);
2233 if (open_ret < 0) {
2234 bdrv_delete(bs->file);
2235 bs->drv = NULL;
2236 return open_ret;
2237 }
2238 return ret;
2239 }
2240
2241 return -ENOTSUP;
faea38e7
FB
2242}
2243
2244int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2245{
2246 BlockDriver *drv = bs->drv;
2247 if (!drv)
19cb3738 2248 return -ENOMEDIUM;
7cdb1f6d
MK
2249 if (drv->bdrv_snapshot_delete)
2250 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2251 if (bs->file)
2252 return bdrv_snapshot_delete(bs->file, snapshot_id);
2253 return -ENOTSUP;
faea38e7
FB
2254}
2255
5fafdf24 2256int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2257 QEMUSnapshotInfo **psn_info)
2258{
2259 BlockDriver *drv = bs->drv;
2260 if (!drv)
19cb3738 2261 return -ENOMEDIUM;
7cdb1f6d
MK
2262 if (drv->bdrv_snapshot_list)
2263 return drv->bdrv_snapshot_list(bs, psn_info);
2264 if (bs->file)
2265 return bdrv_snapshot_list(bs->file, psn_info);
2266 return -ENOTSUP;
faea38e7
FB
2267}
2268
51ef6727 2269int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2270 const char *snapshot_name)
2271{
2272 BlockDriver *drv = bs->drv;
2273 if (!drv) {
2274 return -ENOMEDIUM;
2275 }
2276 if (!bs->read_only) {
2277 return -EINVAL;
2278 }
2279 if (drv->bdrv_snapshot_load_tmp) {
2280 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2281 }
2282 return -ENOTSUP;
2283}
2284
faea38e7
FB
2285#define NB_SUFFIXES 4
2286
2287char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2288{
2289 static const char suffixes[NB_SUFFIXES] = "KMGT";
2290 int64_t base;
2291 int i;
2292
2293 if (size <= 999) {
2294 snprintf(buf, buf_size, "%" PRId64, size);
2295 } else {
2296 base = 1024;
2297 for(i = 0; i < NB_SUFFIXES; i++) {
2298 if (size < (10 * base)) {
5fafdf24 2299 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
2300 (double)size / base,
2301 suffixes[i]);
2302 break;
2303 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 2304 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
2305 ((size + (base >> 1)) / base),
2306 suffixes[i]);
2307 break;
2308 }
2309 base = base * 1024;
2310 }
2311 }
2312 return buf;
2313}
2314
2315char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2316{
2317 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
2318#ifdef _WIN32
2319 struct tm *ptm;
2320#else
faea38e7 2321 struct tm tm;
3b9f94e1 2322#endif
faea38e7
FB
2323 time_t ti;
2324 int64_t secs;
2325
2326 if (!sn) {
5fafdf24
TS
2327 snprintf(buf, buf_size,
2328 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2329 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2330 } else {
2331 ti = sn->date_sec;
3b9f94e1
FB
2332#ifdef _WIN32
2333 ptm = localtime(&ti);
2334 strftime(date_buf, sizeof(date_buf),
2335 "%Y-%m-%d %H:%M:%S", ptm);
2336#else
faea38e7
FB
2337 localtime_r(&ti, &tm);
2338 strftime(date_buf, sizeof(date_buf),
2339 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 2340#endif
faea38e7
FB
2341 secs = sn->vm_clock_nsec / 1000000000;
2342 snprintf(clock_buf, sizeof(clock_buf),
2343 "%02d:%02d:%02d.%03d",
2344 (int)(secs / 3600),
2345 (int)((secs / 60) % 60),
5fafdf24 2346 (int)(secs % 60),
faea38e7
FB
2347 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2348 snprintf(buf, buf_size,
5fafdf24 2349 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2350 sn->id_str, sn->name,
2351 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2352 date_buf,
2353 clock_buf);
2354 }
2355 return buf;
2356}
2357
ea2384d3 2358/**************************************************************/
83f64091 2359/* async I/Os */
ea2384d3 2360
3b69e4b9 2361BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 2362 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 2363 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 2364{
bbf0a440
SH
2365 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2366
b2a61371 2367 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2368 cb, opaque, false);
ea2384d3
FB
2369}
2370
f141eafe
AL
2371BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2372 QEMUIOVector *qiov, int nb_sectors,
2373 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 2374{
bbf0a440
SH
2375 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2376
1a6e115b 2377 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2378 cb, opaque, true);
83f64091
FB
2379}
2380
40b4f539
KW
2381
2382typedef struct MultiwriteCB {
2383 int error;
2384 int num_requests;
2385 int num_callbacks;
2386 struct {
2387 BlockDriverCompletionFunc *cb;
2388 void *opaque;
2389 QEMUIOVector *free_qiov;
2390 void *free_buf;
2391 } callbacks[];
2392} MultiwriteCB;
2393
2394static void multiwrite_user_cb(MultiwriteCB *mcb)
2395{
2396 int i;
2397
2398 for (i = 0; i < mcb->num_callbacks; i++) {
2399 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
2400 if (mcb->callbacks[i].free_qiov) {
2401 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2402 }
7267c094 2403 g_free(mcb->callbacks[i].free_qiov);
f8a83245 2404 qemu_vfree(mcb->callbacks[i].free_buf);
40b4f539
KW
2405 }
2406}
2407
2408static void multiwrite_cb(void *opaque, int ret)
2409{
2410 MultiwriteCB *mcb = opaque;
2411
6d519a5f
SH
2412 trace_multiwrite_cb(mcb, ret);
2413
cb6d3ca0 2414 if (ret < 0 && !mcb->error) {
40b4f539 2415 mcb->error = ret;
40b4f539
KW
2416 }
2417
2418 mcb->num_requests--;
2419 if (mcb->num_requests == 0) {
de189a1b 2420 multiwrite_user_cb(mcb);
7267c094 2421 g_free(mcb);
40b4f539
KW
2422 }
2423}
2424
2425static int multiwrite_req_compare(const void *a, const void *b)
2426{
77be4366
CH
2427 const BlockRequest *req1 = a, *req2 = b;
2428
2429 /*
2430 * Note that we can't simply subtract req2->sector from req1->sector
2431 * here as that could overflow the return value.
2432 */
2433 if (req1->sector > req2->sector) {
2434 return 1;
2435 } else if (req1->sector < req2->sector) {
2436 return -1;
2437 } else {
2438 return 0;
2439 }
40b4f539
KW
2440}
2441
2442/*
2443 * Takes a bunch of requests and tries to merge them. Returns the number of
2444 * requests that remain after merging.
2445 */
2446static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2447 int num_reqs, MultiwriteCB *mcb)
2448{
2449 int i, outidx;
2450
2451 // Sort requests by start sector
2452 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2453
2454 // Check if adjacent requests touch the same clusters. If so, combine them,
2455 // filling up gaps with zero sectors.
2456 outidx = 0;
2457 for (i = 1; i < num_reqs; i++) {
2458 int merge = 0;
2459 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2460
2461 // This handles the cases that are valid for all block drivers, namely
2462 // exactly sequential writes and overlapping writes.
2463 if (reqs[i].sector <= oldreq_last) {
2464 merge = 1;
2465 }
2466
2467 // The block driver may decide that it makes sense to combine requests
2468 // even if there is a gap of some sectors between them. In this case,
2469 // the gap is filled with zeros (therefore only applicable for yet
2470 // unused space in format like qcow2).
2471 if (!merge && bs->drv->bdrv_merge_requests) {
2472 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2473 }
2474
e2a305fb
CH
2475 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2476 merge = 0;
2477 }
2478
40b4f539
KW
2479 if (merge) {
2480 size_t size;
7267c094 2481 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
2482 qemu_iovec_init(qiov,
2483 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2484
2485 // Add the first request to the merged one. If the requests are
2486 // overlapping, drop the last sectors of the first request.
2487 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2488 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2489
2490 // We might need to add some zeros between the two requests
2491 if (reqs[i].sector > oldreq_last) {
2492 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2493 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2494 memset(buf, 0, zero_bytes);
2495 qemu_iovec_add(qiov, buf, zero_bytes);
2496 mcb->callbacks[i].free_buf = buf;
2497 }
2498
2499 // Add the second request
2500 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2501
cbf1dff2 2502 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
2503 reqs[outidx].qiov = qiov;
2504
2505 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2506 } else {
2507 outidx++;
2508 reqs[outidx].sector = reqs[i].sector;
2509 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2510 reqs[outidx].qiov = reqs[i].qiov;
2511 }
2512 }
2513
2514 return outidx + 1;
2515}
2516
2517/*
2518 * Submit multiple AIO write requests at once.
2519 *
2520 * On success, the function returns 0 and all requests in the reqs array have
2521 * been submitted. In error case this function returns -1, and any of the
2522 * requests may or may not be submitted yet. In particular, this means that the
2523 * callback will be called for some of the requests, for others it won't. The
2524 * caller must check the error field of the BlockRequest to wait for the right
2525 * callbacks (if error != 0, no callback will be called).
2526 *
2527 * The implementation may modify the contents of the reqs array, e.g. to merge
2528 * requests. However, the fields opaque and error are left unmodified as they
2529 * are used to signal failure for a single request to the caller.
2530 */
2531int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2532{
2533 BlockDriverAIOCB *acb;
2534 MultiwriteCB *mcb;
2535 int i;
2536
301db7c2
RH
2537 /* don't submit writes if we don't have a medium */
2538 if (bs->drv == NULL) {
2539 for (i = 0; i < num_reqs; i++) {
2540 reqs[i].error = -ENOMEDIUM;
2541 }
2542 return -1;
2543 }
2544
40b4f539
KW
2545 if (num_reqs == 0) {
2546 return 0;
2547 }
2548
2549 // Create MultiwriteCB structure
7267c094 2550 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
2551 mcb->num_requests = 0;
2552 mcb->num_callbacks = num_reqs;
2553
2554 for (i = 0; i < num_reqs; i++) {
2555 mcb->callbacks[i].cb = reqs[i].cb;
2556 mcb->callbacks[i].opaque = reqs[i].opaque;
2557 }
2558
2559 // Check for mergable requests
2560 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2561
6d519a5f
SH
2562 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2563
453f9a16
KW
2564 /*
2565 * Run the aio requests. As soon as one request can't be submitted
2566 * successfully, fail all requests that are not yet submitted (we must
2567 * return failure for all requests anyway)
2568 *
2569 * num_requests cannot be set to the right value immediately: If
2570 * bdrv_aio_writev fails for some request, num_requests would be too high
2571 * and therefore multiwrite_cb() would never recognize the multiwrite
2572 * request as completed. We also cannot use the loop variable i to set it
2573 * when the first request fails because the callback may already have been
2574 * called for previously submitted requests. Thus, num_requests must be
2575 * incremented for each request that is submitted.
2576 *
2577 * The problem that callbacks may be called early also means that we need
2578 * to take care that num_requests doesn't become 0 before all requests are
2579 * submitted - multiwrite_cb() would consider the multiwrite request
2580 * completed. A dummy request that is "completed" by a manual call to
2581 * multiwrite_cb() takes care of this.
2582 */
2583 mcb->num_requests = 1;
2584
6d519a5f 2585 // Run the aio requests
40b4f539 2586 for (i = 0; i < num_reqs; i++) {
453f9a16 2587 mcb->num_requests++;
40b4f539
KW
2588 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2589 reqs[i].nb_sectors, multiwrite_cb, mcb);
2590
2591 if (acb == NULL) {
2592 // We can only fail the whole thing if no request has been
2593 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2594 // complete and report the error in the callback.
453f9a16 2595 if (i == 0) {
6d519a5f 2596 trace_bdrv_aio_multiwrite_earlyfail(mcb);
40b4f539
KW
2597 goto fail;
2598 } else {
6d519a5f 2599 trace_bdrv_aio_multiwrite_latefail(mcb, i);
7eb58a6c 2600 multiwrite_cb(mcb, -EIO);
40b4f539
KW
2601 break;
2602 }
40b4f539
KW
2603 }
2604 }
2605
453f9a16
KW
2606 /* Complete the dummy request */
2607 multiwrite_cb(mcb, 0);
2608
40b4f539
KW
2609 return 0;
2610
2611fail:
453f9a16
KW
2612 for (i = 0; i < mcb->num_callbacks; i++) {
2613 reqs[i].error = -EIO;
2614 }
7267c094 2615 g_free(mcb);
40b4f539
KW
2616 return -1;
2617}
2618
b2e12bc6
CH
2619BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2620 BlockDriverCompletionFunc *cb, void *opaque)
2621{
2622 BlockDriver *drv = bs->drv;
2623
a13aac04
SH
2624 trace_bdrv_aio_flush(bs, opaque);
2625
016f5cf6
AG
2626 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2627 return bdrv_aio_noop_em(bs, cb, opaque);
2628 }
2629
b2e12bc6
CH
2630 if (!drv)
2631 return NULL;
b2e12bc6
CH
2632 return drv->bdrv_aio_flush(bs, cb, opaque);
2633}
2634
83f64091 2635void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 2636{
6bbff9a0 2637 acb->pool->cancel(acb);
83f64091
FB
2638}
2639
ce1a14dc 2640
83f64091
FB
2641/**************************************************************/
2642/* async block device emulation */
2643
c16b5a2c
CH
2644typedef struct BlockDriverAIOCBSync {
2645 BlockDriverAIOCB common;
2646 QEMUBH *bh;
2647 int ret;
2648 /* vector translation state */
2649 QEMUIOVector *qiov;
2650 uint8_t *bounce;
2651 int is_write;
2652} BlockDriverAIOCBSync;
2653
2654static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2655{
b666d239
KW
2656 BlockDriverAIOCBSync *acb =
2657 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 2658 qemu_bh_delete(acb->bh);
36afc451 2659 acb->bh = NULL;
c16b5a2c
CH
2660 qemu_aio_release(acb);
2661}
2662
2663static AIOPool bdrv_em_aio_pool = {
2664 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2665 .cancel = bdrv_aio_cancel_em,
2666};
2667
ce1a14dc 2668static void bdrv_aio_bh_cb(void *opaque)
83f64091 2669{
ce1a14dc 2670 BlockDriverAIOCBSync *acb = opaque;
f141eafe 2671
f141eafe
AL
2672 if (!acb->is_write)
2673 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 2674 qemu_vfree(acb->bounce);
ce1a14dc 2675 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 2676 qemu_bh_delete(acb->bh);
36afc451 2677 acb->bh = NULL;
ce1a14dc 2678 qemu_aio_release(acb);
83f64091 2679}
beac80cd 2680
f141eafe
AL
2681static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2682 int64_t sector_num,
2683 QEMUIOVector *qiov,
2684 int nb_sectors,
2685 BlockDriverCompletionFunc *cb,
2686 void *opaque,
2687 int is_write)
2688
83f64091 2689{
ce1a14dc 2690 BlockDriverAIOCBSync *acb;
ce1a14dc 2691
c16b5a2c 2692 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
2693 acb->is_write = is_write;
2694 acb->qiov = qiov;
e268ca52 2695 acb->bounce = qemu_blockalign(bs, qiov->size);
f141eafe 2696
ce1a14dc
PB
2697 if (!acb->bh)
2698 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
2699
2700 if (is_write) {
2701 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1ed20acf 2702 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 2703 } else {
1ed20acf 2704 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
2705 }
2706
ce1a14dc 2707 qemu_bh_schedule(acb->bh);
f141eafe 2708
ce1a14dc 2709 return &acb->common;
beac80cd
FB
2710}
2711
f141eafe
AL
2712static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2713 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 2714 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 2715{
f141eafe
AL
2716 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2717}
83f64091 2718
f141eafe
AL
2719static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2720 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2721 BlockDriverCompletionFunc *cb, void *opaque)
2722{
2723 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 2724}
beac80cd 2725
68485420
KW
2726
2727typedef struct BlockDriverAIOCBCoroutine {
2728 BlockDriverAIOCB common;
2729 BlockRequest req;
2730 bool is_write;
2731 QEMUBH* bh;
2732} BlockDriverAIOCBCoroutine;
2733
2734static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2735{
2736 qemu_aio_flush();
2737}
2738
2739static AIOPool bdrv_em_co_aio_pool = {
2740 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2741 .cancel = bdrv_aio_co_cancel_em,
2742};
2743
2744static void bdrv_co_rw_bh(void *opaque)
2745{
2746 BlockDriverAIOCBCoroutine *acb = opaque;
2747
2748 acb->common.cb(acb->common.opaque, acb->req.error);
2749 qemu_bh_delete(acb->bh);
2750 qemu_aio_release(acb);
2751}
2752
b2a61371
SH
2753/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2754static void coroutine_fn bdrv_co_do_rw(void *opaque)
2755{
2756 BlockDriverAIOCBCoroutine *acb = opaque;
2757 BlockDriverState *bs = acb->common.bs;
2758
2759 if (!acb->is_write) {
2760 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
2761 acb->req.nb_sectors, acb->req.qiov);
2762 } else {
2763 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
2764 acb->req.nb_sectors, acb->req.qiov);
2765 }
2766
2767 acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2768 qemu_bh_schedule(acb->bh);
2769}
2770
68485420
KW
2771static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2772 int64_t sector_num,
2773 QEMUIOVector *qiov,
2774 int nb_sectors,
2775 BlockDriverCompletionFunc *cb,
2776 void *opaque,
8c5873d6 2777 bool is_write)
68485420
KW
2778{
2779 Coroutine *co;
2780 BlockDriverAIOCBCoroutine *acb;
2781
2782 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2783 acb->req.sector = sector_num;
2784 acb->req.nb_sectors = nb_sectors;
2785 acb->req.qiov = qiov;
2786 acb->is_write = is_write;
2787
8c5873d6 2788 co = qemu_coroutine_create(bdrv_co_do_rw);
68485420
KW
2789 qemu_coroutine_enter(co, acb);
2790
2791 return &acb->common;
2792}
2793
b2e12bc6
CH
2794static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2795 BlockDriverCompletionFunc *cb, void *opaque)
2796{
2797 BlockDriverAIOCBSync *acb;
2798
2799 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2800 acb->is_write = 1; /* don't bounce in the completion hadler */
2801 acb->qiov = NULL;
2802 acb->bounce = NULL;
2803 acb->ret = 0;
2804
2805 if (!acb->bh)
2806 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2807
2808 bdrv_flush(bs);
2809 qemu_bh_schedule(acb->bh);
2810 return &acb->common;
2811}
2812
016f5cf6
AG
2813static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2814 BlockDriverCompletionFunc *cb, void *opaque)
2815{
2816 BlockDriverAIOCBSync *acb;
2817
2818 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2819 acb->is_write = 1; /* don't bounce in the completion handler */
2820 acb->qiov = NULL;
2821 acb->bounce = NULL;
2822 acb->ret = 0;
2823
2824 if (!acb->bh) {
2825 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2826 }
2827
2828 qemu_bh_schedule(acb->bh);
2829 return &acb->common;
2830}
2831
ea2384d3
FB
2832void bdrv_init(void)
2833{
5efa9d5a 2834 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 2835}
ce1a14dc 2836
eb852011
MA
2837void bdrv_init_with_whitelist(void)
2838{
2839 use_bdrv_whitelist = 1;
2840 bdrv_init();
2841}
2842
c16b5a2c
CH
2843void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2844 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 2845{
ce1a14dc
PB
2846 BlockDriverAIOCB *acb;
2847
6bbff9a0
AL
2848 if (pool->free_aiocb) {
2849 acb = pool->free_aiocb;
2850 pool->free_aiocb = acb->next;
ce1a14dc 2851 } else {
7267c094 2852 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 2853 acb->pool = pool;
ce1a14dc
PB
2854 }
2855 acb->bs = bs;
2856 acb->cb = cb;
2857 acb->opaque = opaque;
2858 return acb;
2859}
2860
2861void qemu_aio_release(void *p)
2862{
6bbff9a0
AL
2863 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2864 AIOPool *pool = acb->pool;
2865 acb->next = pool->free_aiocb;
2866 pool->free_aiocb = acb;
ce1a14dc 2867}
19cb3738 2868
f9f05dc5
KW
2869/**************************************************************/
2870/* Coroutine block device emulation */
2871
2872typedef struct CoroutineIOCompletion {
2873 Coroutine *coroutine;
2874 int ret;
2875} CoroutineIOCompletion;
2876
2877static void bdrv_co_io_em_complete(void *opaque, int ret)
2878{
2879 CoroutineIOCompletion *co = opaque;
2880
2881 co->ret = ret;
2882 qemu_coroutine_enter(co->coroutine, NULL);
2883}
2884
2885static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2886 int nb_sectors, QEMUIOVector *iov,
2887 bool is_write)
2888{
2889 CoroutineIOCompletion co = {
2890 .coroutine = qemu_coroutine_self(),
2891 };
2892 BlockDriverAIOCB *acb;
2893
2894 if (is_write) {
a652d160
SH
2895 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2896 bdrv_co_io_em_complete, &co);
f9f05dc5 2897 } else {
a652d160
SH
2898 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2899 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
2900 }
2901
59370aaa 2902 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
2903 if (!acb) {
2904 return -EIO;
2905 }
2906 qemu_coroutine_yield();
2907
2908 return co.ret;
2909}
2910
2911static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2912 int64_t sector_num, int nb_sectors,
2913 QEMUIOVector *iov)
2914{
2915 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2916}
2917
2918static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2919 int64_t sector_num, int nb_sectors,
2920 QEMUIOVector *iov)
2921{
2922 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2923}
2924
e7a8a783
KW
2925static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
2926{
2927 CoroutineIOCompletion co = {
2928 .coroutine = qemu_coroutine_self(),
2929 };
2930 BlockDriverAIOCB *acb;
2931
2932 acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2933 if (!acb) {
2934 return -EIO;
2935 }
2936 qemu_coroutine_yield();
2937 return co.ret;
2938}
2939
19cb3738
FB
2940/**************************************************************/
2941/* removable device support */
2942
2943/**
2944 * Return TRUE if the media is present
2945 */
2946int bdrv_is_inserted(BlockDriverState *bs)
2947{
2948 BlockDriver *drv = bs->drv;
a1aff5bf 2949
19cb3738
FB
2950 if (!drv)
2951 return 0;
2952 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
2953 return 1;
2954 return drv->bdrv_is_inserted(bs);
19cb3738
FB
2955}
2956
2957/**
8e49ca46
MA
2958 * Return whether the media changed since the last call to this
2959 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
2960 */
2961int bdrv_media_changed(BlockDriverState *bs)
2962{
2963 BlockDriver *drv = bs->drv;
19cb3738 2964
8e49ca46
MA
2965 if (drv && drv->bdrv_media_changed) {
2966 return drv->bdrv_media_changed(bs);
2967 }
2968 return -ENOTSUP;
19cb3738
FB
2969}
2970
2971/**
2972 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2973 */
fdec4404 2974void bdrv_eject(BlockDriverState *bs, int eject_flag)
19cb3738
FB
2975{
2976 BlockDriver *drv = bs->drv;
19cb3738 2977
822e1cd1
MA
2978 if (drv && drv->bdrv_eject) {
2979 drv->bdrv_eject(bs, eject_flag);
19cb3738
FB
2980 }
2981}
2982
19cb3738
FB
2983/**
2984 * Lock or unlock the media (if it is locked, the user won't be able
2985 * to eject it manually).
2986 */
025e849a 2987void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
2988{
2989 BlockDriver *drv = bs->drv;
2990
025e849a 2991 trace_bdrv_lock_medium(bs, locked);
b8c6d095 2992
025e849a
MA
2993 if (drv && drv->bdrv_lock_medium) {
2994 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
2995 }
2996}
985a03b0
TS
2997
2998/* needed for generic scsi interface */
2999
3000int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3001{
3002 BlockDriver *drv = bs->drv;
3003
3004 if (drv && drv->bdrv_ioctl)
3005 return drv->bdrv_ioctl(bs, req, buf);
3006 return -ENOTSUP;
3007}
7d780669 3008
221f715d
AL
3009BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3010 unsigned long int req, void *buf,
3011 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3012{
221f715d 3013 BlockDriver *drv = bs->drv;
7d780669 3014
221f715d
AL
3015 if (drv && drv->bdrv_aio_ioctl)
3016 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3017 return NULL;
7d780669 3018}
e268ca52 3019
7b6f9300
MA
3020void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3021{
3022 bs->buffer_alignment = align;
3023}
7cd1e32a 3024
e268ca52
AL
3025void *qemu_blockalign(BlockDriverState *bs, size_t size)
3026{
3027 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3028}
7cd1e32a 3029
3030void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3031{
3032 int64_t bitmap_size;
a55eb92c 3033
aaa0eb75 3034 bs->dirty_count = 0;
a55eb92c 3035 if (enable) {
c6d22830
JK
3036 if (!bs->dirty_bitmap) {
3037 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3038 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3039 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
a55eb92c 3040
7267c094 3041 bs->dirty_bitmap = g_malloc0(bitmap_size);
a55eb92c 3042 }
7cd1e32a 3043 } else {
c6d22830 3044 if (bs->dirty_bitmap) {
7267c094 3045 g_free(bs->dirty_bitmap);
c6d22830 3046 bs->dirty_bitmap = NULL;
a55eb92c 3047 }
7cd1e32a 3048 }
3049}
3050
3051int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3052{
6ea44308 3053 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 3054
c6d22830
JK
3055 if (bs->dirty_bitmap &&
3056 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
3057 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3058 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a 3059 } else {
3060 return 0;
3061 }
3062}
3063
a55eb92c
JK
3064void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3065 int nr_sectors)
7cd1e32a 3066{
3067 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3068}
aaa0eb75
LS
3069
3070int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3071{
3072 return bs->dirty_count;
3073}
f88e1a42 3074
db593f25
MT
3075void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3076{
3077 assert(bs->in_use != in_use);
3078 bs->in_use = in_use;
3079}
3080
3081int bdrv_in_use(BlockDriverState *bs)
3082{
3083 return bs->in_use;
3084}
3085
28a7282a
LC
3086void bdrv_iostatus_enable(BlockDriverState *bs)
3087{
3088 bs->iostatus = BDRV_IOS_OK;
3089}
3090
3091/* The I/O status is only enabled if the drive explicitly
3092 * enables it _and_ the VM is configured to stop on errors */
3093bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3094{
3095 return (bs->iostatus != BDRV_IOS_INVAL &&
3096 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3097 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3098 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3099}
3100
3101void bdrv_iostatus_disable(BlockDriverState *bs)
3102{
3103 bs->iostatus = BDRV_IOS_INVAL;
3104}
3105
3106void bdrv_iostatus_reset(BlockDriverState *bs)
3107{
3108 if (bdrv_iostatus_is_enabled(bs)) {
3109 bs->iostatus = BDRV_IOS_OK;
3110 }
3111}
3112
3113/* XXX: Today this is set by device models because it makes the implementation
3114 quite simple. However, the block layer knows about the error, so it's
3115 possible to implement this without device models being involved */
3116void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3117{
3118 if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
3119 assert(error >= 0);
3120 bs->iostatus = error == ENOSPC ? BDRV_IOS_ENOSPC : BDRV_IOS_FAILED;
3121 }
3122}
3123
a597e79c
CH
3124void
3125bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3126 enum BlockAcctType type)
3127{
3128 assert(type < BDRV_MAX_IOTYPE);
3129
3130 cookie->bytes = bytes;
c488c7f6 3131 cookie->start_time_ns = get_clock();
a597e79c
CH
3132 cookie->type = type;
3133}
3134
3135void
3136bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3137{
3138 assert(cookie->type < BDRV_MAX_IOTYPE);
3139
3140 bs->nr_bytes[cookie->type] += cookie->bytes;
3141 bs->nr_ops[cookie->type]++;
c488c7f6 3142 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
3143}
3144
f88e1a42
JS
3145int bdrv_img_create(const char *filename, const char *fmt,
3146 const char *base_filename, const char *base_fmt,
3147 char *options, uint64_t img_size, int flags)
3148{
3149 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 3150 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
3151 BlockDriverState *bs = NULL;
3152 BlockDriver *drv, *proto_drv;
96df67d1 3153 BlockDriver *backing_drv = NULL;
f88e1a42
JS
3154 int ret = 0;
3155
3156 /* Find driver and parse its options */
3157 drv = bdrv_find_format(fmt);
3158 if (!drv) {
3159 error_report("Unknown file format '%s'", fmt);
4f70f249 3160 ret = -EINVAL;
f88e1a42
JS
3161 goto out;
3162 }
3163
3164 proto_drv = bdrv_find_protocol(filename);
3165 if (!proto_drv) {
3166 error_report("Unknown protocol '%s'", filename);
4f70f249 3167 ret = -EINVAL;
f88e1a42
JS
3168 goto out;
3169 }
3170
3171 create_options = append_option_parameters(create_options,
3172 drv->create_options);
3173 create_options = append_option_parameters(create_options,
3174 proto_drv->create_options);
3175
3176 /* Create parameter list with default values */
3177 param = parse_option_parameters("", create_options, param);
3178
3179 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3180
3181 /* Parse -o options */
3182 if (options) {
3183 param = parse_option_parameters(options, create_options, param);
3184 if (param == NULL) {
3185 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 3186 ret = -EINVAL;
f88e1a42
JS
3187 goto out;
3188 }
3189 }
3190
3191 if (base_filename) {
3192 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3193 base_filename)) {
3194 error_report("Backing file not supported for file format '%s'",
3195 fmt);
4f70f249 3196 ret = -EINVAL;
f88e1a42
JS
3197 goto out;
3198 }
3199 }
3200
3201 if (base_fmt) {
3202 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3203 error_report("Backing file format not supported for file "
3204 "format '%s'", fmt);
4f70f249 3205 ret = -EINVAL;
f88e1a42
JS
3206 goto out;
3207 }
3208 }
3209
792da93a
JS
3210 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3211 if (backing_file && backing_file->value.s) {
3212 if (!strcmp(filename, backing_file->value.s)) {
3213 error_report("Error: Trying to create an image with the "
3214 "same filename as the backing file");
4f70f249 3215 ret = -EINVAL;
792da93a
JS
3216 goto out;
3217 }
3218 }
3219
f88e1a42
JS
3220 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3221 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
3222 backing_drv = bdrv_find_format(backing_fmt->value.s);
3223 if (!backing_drv) {
f88e1a42
JS
3224 error_report("Unknown backing file format '%s'",
3225 backing_fmt->value.s);
4f70f249 3226 ret = -EINVAL;
f88e1a42
JS
3227 goto out;
3228 }
3229 }
3230
3231 // The size for the image must always be specified, with one exception:
3232 // If we are using a backing file, we can obtain the size from there
d220894e
KW
3233 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3234 if (size && size->value.n == -1) {
f88e1a42
JS
3235 if (backing_file && backing_file->value.s) {
3236 uint64_t size;
f88e1a42
JS
3237 char buf[32];
3238
f88e1a42
JS
3239 bs = bdrv_new("");
3240
96df67d1 3241 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
f88e1a42 3242 if (ret < 0) {
96df67d1 3243 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
3244 goto out;
3245 }
3246 bdrv_get_geometry(bs, &size);
3247 size *= 512;
3248
3249 snprintf(buf, sizeof(buf), "%" PRId64, size);
3250 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3251 } else {
3252 error_report("Image creation needs a size parameter");
4f70f249 3253 ret = -EINVAL;
f88e1a42
JS
3254 goto out;
3255 }
3256 }
3257
3258 printf("Formatting '%s', fmt=%s ", filename, fmt);
3259 print_option_parameters(param);
3260 puts("");
3261
3262 ret = bdrv_create(drv, filename, param);
3263
3264 if (ret < 0) {
3265 if (ret == -ENOTSUP) {
3266 error_report("Formatting or formatting option not supported for "
3267 "file format '%s'", fmt);
3268 } else if (ret == -EFBIG) {
3269 error_report("The image size is too large for file format '%s'",
3270 fmt);
3271 } else {
3272 error_report("%s: error while creating %s: %s", filename, fmt,
3273 strerror(-ret));
3274 }
3275 }
3276
3277out:
3278 free_option_parameters(create_options);
3279 free_option_parameters(param);
3280
3281 if (bs) {
3282 bdrv_delete(bs);
3283 }
4f70f249
JS
3284
3285 return ret;
f88e1a42 3286}