]> git.proxmox.com Git - mirror_qemu.git/blame - block.c
xen_disk: Always set feature-barrier = 1
[mirror_qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
d15e5465 30#include "qemu-objects.h"
68485420 31#include "qemu-coroutine.h"
fc01f7e7 32
71e72a19 33#ifdef CONFIG_BSD
7674e7bf
FB
34#include <sys/types.h>
35#include <sys/stat.h>
36#include <sys/ioctl.h>
72cf2d4f 37#include <sys/queue.h>
c5e97233 38#ifndef __DragonFly__
7674e7bf
FB
39#include <sys/disk.h>
40#endif
c5e97233 41#endif
7674e7bf 42
49dc768d
AL
43#ifdef _WIN32
44#include <windows.h>
45#endif
46
1c9805a3
SH
47#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
48
7d4b4ba5 49static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
50static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
51 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 52 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
53static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
54 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 55 BlockDriverCompletionFunc *cb, void *opaque);
b2e12bc6
CH
56static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
57 BlockDriverCompletionFunc *cb, void *opaque);
016f5cf6
AG
58static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
59 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
60static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
61 int64_t sector_num, int nb_sectors,
62 QEMUIOVector *iov);
63static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors,
65 QEMUIOVector *iov);
e7a8a783 66static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
c5fbe571
SH
67static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
68 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
1c9805a3
SH
69static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
70 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
b2a61371
SH
71static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
72 int64_t sector_num,
73 QEMUIOVector *qiov,
74 int nb_sectors,
75 BlockDriverCompletionFunc *cb,
76 void *opaque,
8c5873d6 77 bool is_write);
b2a61371 78static void coroutine_fn bdrv_co_do_rw(void *opaque);
ec530c81 79
1b7bdbc1
SH
80static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
81 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 82
8a22f02a
SH
83static QLIST_HEAD(, BlockDriver) bdrv_drivers =
84 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 85
f9092b10
MA
86/* The device to use for VM snapshots */
87static BlockDriverState *bs_snapshots;
88
eb852011
MA
89/* If non-zero, use only whitelisted block drivers */
90static int use_bdrv_whitelist;
91
9e0b22f4
SH
92#ifdef _WIN32
93static int is_windows_drive_prefix(const char *filename)
94{
95 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
96 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
97 filename[1] == ':');
98}
99
100int is_windows_drive(const char *filename)
101{
102 if (is_windows_drive_prefix(filename) &&
103 filename[2] == '\0')
104 return 1;
105 if (strstart(filename, "\\\\.\\", NULL) ||
106 strstart(filename, "//./", NULL))
107 return 1;
108 return 0;
109}
110#endif
111
112/* check if the path starts with "<protocol>:" */
113static int path_has_protocol(const char *path)
114{
115#ifdef _WIN32
116 if (is_windows_drive(path) ||
117 is_windows_drive_prefix(path)) {
118 return 0;
119 }
120#endif
121
122 return strchr(path, ':') != NULL;
123}
124
83f64091 125int path_is_absolute(const char *path)
3b0d4f61 126{
83f64091 127 const char *p;
21664424
FB
128#ifdef _WIN32
129 /* specific case for names like: "\\.\d:" */
130 if (*path == '/' || *path == '\\')
131 return 1;
132#endif
83f64091
FB
133 p = strchr(path, ':');
134 if (p)
135 p++;
136 else
137 p = path;
3b9f94e1
FB
138#ifdef _WIN32
139 return (*p == '/' || *p == '\\');
140#else
141 return (*p == '/');
142#endif
3b0d4f61
FB
143}
144
83f64091
FB
145/* if filename is absolute, just copy it to dest. Otherwise, build a
146 path to it by considering it is relative to base_path. URL are
147 supported. */
148void path_combine(char *dest, int dest_size,
149 const char *base_path,
150 const char *filename)
3b0d4f61 151{
83f64091
FB
152 const char *p, *p1;
153 int len;
154
155 if (dest_size <= 0)
156 return;
157 if (path_is_absolute(filename)) {
158 pstrcpy(dest, dest_size, filename);
159 } else {
160 p = strchr(base_path, ':');
161 if (p)
162 p++;
163 else
164 p = base_path;
3b9f94e1
FB
165 p1 = strrchr(base_path, '/');
166#ifdef _WIN32
167 {
168 const char *p2;
169 p2 = strrchr(base_path, '\\');
170 if (!p1 || p2 > p1)
171 p1 = p2;
172 }
173#endif
83f64091
FB
174 if (p1)
175 p1++;
176 else
177 p1 = base_path;
178 if (p1 > p)
179 p = p1;
180 len = p - base_path;
181 if (len > dest_size - 1)
182 len = dest_size - 1;
183 memcpy(dest, base_path, len);
184 dest[len] = '\0';
185 pstrcat(dest, dest_size, filename);
3b0d4f61 186 }
3b0d4f61
FB
187}
188
5efa9d5a 189void bdrv_register(BlockDriver *bdrv)
ea2384d3 190{
8c5873d6
SH
191 /* Block drivers without coroutine functions need emulation */
192 if (!bdrv->bdrv_co_readv) {
f9f05dc5
KW
193 bdrv->bdrv_co_readv = bdrv_co_readv_em;
194 bdrv->bdrv_co_writev = bdrv_co_writev_em;
195
f8c35c1d
SH
196 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
197 * the block driver lacks aio we need to emulate that too.
198 */
f9f05dc5
KW
199 if (!bdrv->bdrv_aio_readv) {
200 /* add AIO emulation layer */
201 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
202 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
f9f05dc5 203 }
83f64091 204 }
b2e12bc6
CH
205
206 if (!bdrv->bdrv_aio_flush)
207 bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
208
8a22f02a 209 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 210}
b338082b
FB
211
212/* create a new block device (by default it is empty) */
213BlockDriverState *bdrv_new(const char *device_name)
214{
1b7bdbc1 215 BlockDriverState *bs;
b338082b 216
7267c094 217 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 218 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 219 if (device_name[0] != '\0') {
1b7bdbc1 220 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 221 }
28a7282a 222 bdrv_iostatus_disable(bs);
b338082b
FB
223 return bs;
224}
225
ea2384d3
FB
226BlockDriver *bdrv_find_format(const char *format_name)
227{
228 BlockDriver *drv1;
8a22f02a
SH
229 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
230 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 231 return drv1;
8a22f02a 232 }
ea2384d3
FB
233 }
234 return NULL;
235}
236
eb852011
MA
237static int bdrv_is_whitelisted(BlockDriver *drv)
238{
239 static const char *whitelist[] = {
240 CONFIG_BDRV_WHITELIST
241 };
242 const char **p;
243
244 if (!whitelist[0])
245 return 1; /* no whitelist, anything goes */
246
247 for (p = whitelist; *p; p++) {
248 if (!strcmp(drv->format_name, *p)) {
249 return 1;
250 }
251 }
252 return 0;
253}
254
255BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
256{
257 BlockDriver *drv = bdrv_find_format(format_name);
258 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
259}
260
0e7e1989
KW
261int bdrv_create(BlockDriver *drv, const char* filename,
262 QEMUOptionParameter *options)
ea2384d3
FB
263{
264 if (!drv->bdrv_create)
265 return -ENOTSUP;
0e7e1989
KW
266
267 return drv->bdrv_create(filename, options);
ea2384d3
FB
268}
269
84a12e66
CH
270int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
271{
272 BlockDriver *drv;
273
b50cbabc 274 drv = bdrv_find_protocol(filename);
84a12e66 275 if (drv == NULL) {
16905d71 276 return -ENOENT;
84a12e66
CH
277 }
278
279 return bdrv_create(drv, filename, options);
280}
281
d5249393 282#ifdef _WIN32
95389c86 283void get_tmp_filename(char *filename, int size)
d5249393 284{
3b9f94e1 285 char temp_dir[MAX_PATH];
3b46e624 286
3b9f94e1
FB
287 GetTempPath(MAX_PATH, temp_dir);
288 GetTempFileName(temp_dir, "qem", 0, filename);
d5249393
FB
289}
290#else
95389c86 291void get_tmp_filename(char *filename, int size)
fc01f7e7 292{
67b915a5 293 int fd;
7ccfb2eb 294 const char *tmpdir;
d5249393 295 /* XXX: race condition possible */
0badc1ee
AJ
296 tmpdir = getenv("TMPDIR");
297 if (!tmpdir)
298 tmpdir = "/tmp";
299 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
ea2384d3
FB
300 fd = mkstemp(filename);
301 close(fd);
302}
d5249393 303#endif
fc01f7e7 304
84a12e66
CH
305/*
306 * Detect host devices. By convention, /dev/cdrom[N] is always
307 * recognized as a host CDROM.
308 */
309static BlockDriver *find_hdev_driver(const char *filename)
310{
311 int score_max = 0, score;
312 BlockDriver *drv = NULL, *d;
313
314 QLIST_FOREACH(d, &bdrv_drivers, list) {
315 if (d->bdrv_probe_device) {
316 score = d->bdrv_probe_device(filename);
317 if (score > score_max) {
318 score_max = score;
319 drv = d;
320 }
321 }
322 }
323
324 return drv;
325}
326
b50cbabc 327BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
328{
329 BlockDriver *drv1;
330 char protocol[128];
1cec71e3 331 int len;
83f64091 332 const char *p;
19cb3738 333
66f82cee
KW
334 /* TODO Drivers without bdrv_file_open must be specified explicitly */
335
39508e7a
CH
336 /*
337 * XXX(hch): we really should not let host device detection
338 * override an explicit protocol specification, but moving this
339 * later breaks access to device names with colons in them.
340 * Thanks to the brain-dead persistent naming schemes on udev-
341 * based Linux systems those actually are quite common.
342 */
343 drv1 = find_hdev_driver(filename);
344 if (drv1) {
345 return drv1;
346 }
347
9e0b22f4 348 if (!path_has_protocol(filename)) {
39508e7a 349 return bdrv_find_format("file");
84a12e66 350 }
9e0b22f4
SH
351 p = strchr(filename, ':');
352 assert(p != NULL);
1cec71e3
AL
353 len = p - filename;
354 if (len > sizeof(protocol) - 1)
355 len = sizeof(protocol) - 1;
356 memcpy(protocol, filename, len);
357 protocol[len] = '\0';
8a22f02a 358 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 359 if (drv1->protocol_name &&
8a22f02a 360 !strcmp(drv1->protocol_name, protocol)) {
83f64091 361 return drv1;
8a22f02a 362 }
83f64091
FB
363 }
364 return NULL;
365}
366
c98ac35d 367static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
368{
369 int ret, score, score_max;
370 BlockDriver *drv1, *drv;
371 uint8_t buf[2048];
372 BlockDriverState *bs;
373
f5edb014 374 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
375 if (ret < 0) {
376 *pdrv = NULL;
377 return ret;
378 }
f8ea0b00 379
08a00559
KW
380 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
381 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 382 bdrv_delete(bs);
c98ac35d
SW
383 drv = bdrv_find_format("raw");
384 if (!drv) {
385 ret = -ENOENT;
386 }
387 *pdrv = drv;
388 return ret;
1a396859 389 }
f8ea0b00 390
83f64091
FB
391 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
392 bdrv_delete(bs);
393 if (ret < 0) {
c98ac35d
SW
394 *pdrv = NULL;
395 return ret;
83f64091
FB
396 }
397
ea2384d3 398 score_max = 0;
84a12e66 399 drv = NULL;
8a22f02a 400 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
401 if (drv1->bdrv_probe) {
402 score = drv1->bdrv_probe(buf, ret, filename);
403 if (score > score_max) {
404 score_max = score;
405 drv = drv1;
406 }
0849bf08 407 }
fc01f7e7 408 }
c98ac35d
SW
409 if (!drv) {
410 ret = -ENOENT;
411 }
412 *pdrv = drv;
413 return ret;
ea2384d3
FB
414}
415
51762288
SH
416/**
417 * Set the current 'total_sectors' value
418 */
419static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
420{
421 BlockDriver *drv = bs->drv;
422
396759ad
NB
423 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
424 if (bs->sg)
425 return 0;
426
51762288
SH
427 /* query actual device if possible, otherwise just trust the hint */
428 if (drv->bdrv_getlength) {
429 int64_t length = drv->bdrv_getlength(bs);
430 if (length < 0) {
431 return length;
432 }
433 hint = length >> BDRV_SECTOR_BITS;
434 }
435
436 bs->total_sectors = hint;
437 return 0;
438}
439
c3993cdc
SH
440/**
441 * Set open flags for a given cache mode
442 *
443 * Return 0 on success, -1 if the cache mode was invalid.
444 */
445int bdrv_parse_cache_flags(const char *mode, int *flags)
446{
447 *flags &= ~BDRV_O_CACHE_MASK;
448
449 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
450 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
451 } else if (!strcmp(mode, "directsync")) {
452 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
453 } else if (!strcmp(mode, "writeback")) {
454 *flags |= BDRV_O_CACHE_WB;
455 } else if (!strcmp(mode, "unsafe")) {
456 *flags |= BDRV_O_CACHE_WB;
457 *flags |= BDRV_O_NO_FLUSH;
458 } else if (!strcmp(mode, "writethrough")) {
459 /* this is the default */
460 } else {
461 return -1;
462 }
463
464 return 0;
465}
466
57915332
KW
467/*
468 * Common part for opening disk images and files
469 */
470static int bdrv_open_common(BlockDriverState *bs, const char *filename,
471 int flags, BlockDriver *drv)
472{
473 int ret, open_flags;
474
475 assert(drv != NULL);
476
28dcee10
SH
477 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
478
66f82cee 479 bs->file = NULL;
51762288 480 bs->total_sectors = 0;
57915332
KW
481 bs->encrypted = 0;
482 bs->valid_key = 0;
483 bs->open_flags = flags;
57915332
KW
484 bs->buffer_alignment = 512;
485
486 pstrcpy(bs->filename, sizeof(bs->filename), filename);
487
488 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
489 return -ENOTSUP;
490 }
491
492 bs->drv = drv;
7267c094 493 bs->opaque = g_malloc0(drv->instance_size);
57915332 494
a6599793 495 if (flags & BDRV_O_CACHE_WB)
57915332
KW
496 bs->enable_write_cache = 1;
497
498 /*
499 * Clear flags that are internal to the block layer before opening the
500 * image.
501 */
502 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
503
504 /*
ebabb67a 505 * Snapshots should be writable.
57915332
KW
506 */
507 if (bs->is_temporary) {
508 open_flags |= BDRV_O_RDWR;
509 }
510
66f82cee
KW
511 /* Open the image, either directly or using a protocol */
512 if (drv->bdrv_file_open) {
513 ret = drv->bdrv_file_open(bs, filename, open_flags);
514 } else {
515 ret = bdrv_file_open(&bs->file, filename, open_flags);
516 if (ret >= 0) {
517 ret = drv->bdrv_open(bs, open_flags);
518 }
519 }
520
57915332
KW
521 if (ret < 0) {
522 goto free_and_fail;
523 }
524
525 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
51762288
SH
526
527 ret = refresh_total_sectors(bs, bs->total_sectors);
528 if (ret < 0) {
529 goto free_and_fail;
57915332 530 }
51762288 531
57915332
KW
532#ifndef _WIN32
533 if (bs->is_temporary) {
534 unlink(filename);
535 }
536#endif
537 return 0;
538
539free_and_fail:
66f82cee
KW
540 if (bs->file) {
541 bdrv_delete(bs->file);
542 bs->file = NULL;
543 }
7267c094 544 g_free(bs->opaque);
57915332
KW
545 bs->opaque = NULL;
546 bs->drv = NULL;
547 return ret;
548}
549
b6ce07aa
KW
550/*
551 * Opens a file using a protocol (file, host_device, nbd, ...)
552 */
83f64091 553int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 554{
83f64091 555 BlockDriverState *bs;
6db95603 556 BlockDriver *drv;
83f64091
FB
557 int ret;
558
b50cbabc 559 drv = bdrv_find_protocol(filename);
6db95603
CH
560 if (!drv) {
561 return -ENOENT;
562 }
563
83f64091 564 bs = bdrv_new("");
b6ce07aa 565 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
566 if (ret < 0) {
567 bdrv_delete(bs);
568 return ret;
3b0d4f61 569 }
71d0770c 570 bs->growable = 1;
83f64091
FB
571 *pbs = bs;
572 return 0;
573}
574
b6ce07aa
KW
575/*
576 * Opens a disk image (raw, qcow2, vmdk, ...)
577 */
d6e9098e
KW
578int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
579 BlockDriver *drv)
ea2384d3 580{
b6ce07aa 581 int ret;
712e7874 582
83f64091 583 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
584 BlockDriverState *bs1;
585 int64_t total_size;
7c96d46e 586 int is_protocol = 0;
91a073a9
KW
587 BlockDriver *bdrv_qcow2;
588 QEMUOptionParameter *options;
b6ce07aa
KW
589 char tmp_filename[PATH_MAX];
590 char backing_filename[PATH_MAX];
3b46e624 591
ea2384d3
FB
592 /* if snapshot, we create a temporary backing file and open it
593 instead of opening 'filename' directly */
33e3963e 594
ea2384d3
FB
595 /* if there is a backing file, use it */
596 bs1 = bdrv_new("");
d6e9098e 597 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 598 if (ret < 0) {
ea2384d3 599 bdrv_delete(bs1);
51d7c00c 600 return ret;
ea2384d3 601 }
3e82990b 602 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
603
604 if (bs1->drv && bs1->drv->protocol_name)
605 is_protocol = 1;
606
ea2384d3 607 bdrv_delete(bs1);
3b46e624 608
ea2384d3 609 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
7c96d46e
AL
610
611 /* Real path is meaningless for protocols */
612 if (is_protocol)
613 snprintf(backing_filename, sizeof(backing_filename),
614 "%s", filename);
114cdfa9
KS
615 else if (!realpath(filename, backing_filename))
616 return -errno;
7c96d46e 617
91a073a9
KW
618 bdrv_qcow2 = bdrv_find_format("qcow2");
619 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
620
3e82990b 621 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
622 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
623 if (drv) {
624 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
625 drv->format_name);
626 }
627
628 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 629 free_option_parameters(options);
51d7c00c
AL
630 if (ret < 0) {
631 return ret;
ea2384d3 632 }
91a073a9 633
ea2384d3 634 filename = tmp_filename;
91a073a9 635 drv = bdrv_qcow2;
ea2384d3
FB
636 bs->is_temporary = 1;
637 }
712e7874 638
b6ce07aa 639 /* Find the right image format driver */
6db95603 640 if (!drv) {
c98ac35d 641 ret = find_image_format(filename, &drv);
51d7c00c 642 }
6987307c 643
51d7c00c 644 if (!drv) {
51d7c00c 645 goto unlink_and_fail;
ea2384d3 646 }
b6ce07aa
KW
647
648 /* Open the image */
649 ret = bdrv_open_common(bs, filename, flags, drv);
650 if (ret < 0) {
6987307c
CH
651 goto unlink_and_fail;
652 }
653
b6ce07aa
KW
654 /* If there is a backing file, use it */
655 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
656 char backing_filename[PATH_MAX];
657 int back_flags;
658 BlockDriver *back_drv = NULL;
659
660 bs->backing_hd = bdrv_new("");
df2dbb4a
SH
661
662 if (path_has_protocol(bs->backing_file)) {
663 pstrcpy(backing_filename, sizeof(backing_filename),
664 bs->backing_file);
665 } else {
666 path_combine(backing_filename, sizeof(backing_filename),
667 filename, bs->backing_file);
668 }
669
670 if (bs->backing_format[0] != '\0') {
b6ce07aa 671 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 672 }
b6ce07aa
KW
673
674 /* backing files always opened read-only */
675 back_flags =
676 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
677
678 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
679 if (ret < 0) {
680 bdrv_close(bs);
681 return ret;
682 }
683 if (bs->is_temporary) {
684 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
685 } else {
686 /* base image inherits from "parent" */
687 bs->backing_hd->keep_read_only = bs->keep_read_only;
688 }
689 }
690
691 if (!bdrv_key_required(bs)) {
7d4b4ba5 692 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
693 }
694
695 return 0;
696
697unlink_and_fail:
698 if (bs->is_temporary) {
699 unlink(filename);
700 }
701 return ret;
702}
703
fc01f7e7
FB
704void bdrv_close(BlockDriverState *bs)
705{
19cb3738 706 if (bs->drv) {
f9092b10
MA
707 if (bs == bs_snapshots) {
708 bs_snapshots = NULL;
709 }
557df6ac 710 if (bs->backing_hd) {
ea2384d3 711 bdrv_delete(bs->backing_hd);
557df6ac
SH
712 bs->backing_hd = NULL;
713 }
ea2384d3 714 bs->drv->bdrv_close(bs);
7267c094 715 g_free(bs->opaque);
ea2384d3
FB
716#ifdef _WIN32
717 if (bs->is_temporary) {
718 unlink(bs->filename);
719 }
67b915a5 720#endif
ea2384d3
FB
721 bs->opaque = NULL;
722 bs->drv = NULL;
b338082b 723
66f82cee
KW
724 if (bs->file != NULL) {
725 bdrv_close(bs->file);
726 }
727
7d4b4ba5 728 bdrv_dev_change_media_cb(bs, false);
b338082b
FB
729 }
730}
731
2bc93fed
MK
732void bdrv_close_all(void)
733{
734 BlockDriverState *bs;
735
736 QTAILQ_FOREACH(bs, &bdrv_states, list) {
737 bdrv_close(bs);
738 }
739}
740
d22b2f41
RH
741/* make a BlockDriverState anonymous by removing from bdrv_state list.
742 Also, NULL terminate the device_name to prevent double remove */
743void bdrv_make_anon(BlockDriverState *bs)
744{
745 if (bs->device_name[0] != '\0') {
746 QTAILQ_REMOVE(&bdrv_states, bs, list);
747 }
748 bs->device_name[0] = '\0';
749}
750
b338082b
FB
751void bdrv_delete(BlockDriverState *bs)
752{
fa879d62 753 assert(!bs->dev);
18846dee 754
1b7bdbc1 755 /* remove from list, if necessary */
d22b2f41 756 bdrv_make_anon(bs);
34c6f050 757
b338082b 758 bdrv_close(bs);
66f82cee
KW
759 if (bs->file != NULL) {
760 bdrv_delete(bs->file);
761 }
762
f9092b10 763 assert(bs != bs_snapshots);
7267c094 764 g_free(bs);
fc01f7e7
FB
765}
766
fa879d62
MA
767int bdrv_attach_dev(BlockDriverState *bs, void *dev)
768/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 769{
fa879d62 770 if (bs->dev) {
18846dee
MA
771 return -EBUSY;
772 }
fa879d62 773 bs->dev = dev;
28a7282a 774 bdrv_iostatus_reset(bs);
18846dee
MA
775 return 0;
776}
777
fa879d62
MA
778/* TODO qdevified devices don't use this, remove when devices are qdevified */
779void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 780{
fa879d62
MA
781 if (bdrv_attach_dev(bs, dev) < 0) {
782 abort();
783 }
784}
785
786void bdrv_detach_dev(BlockDriverState *bs, void *dev)
787/* TODO change to DeviceState *dev when all users are qdevified */
788{
789 assert(bs->dev == dev);
790 bs->dev = NULL;
0e49de52
MA
791 bs->dev_ops = NULL;
792 bs->dev_opaque = NULL;
29e05f20 793 bs->buffer_alignment = 512;
18846dee
MA
794}
795
fa879d62
MA
796/* TODO change to return DeviceState * when all users are qdevified */
797void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 798{
fa879d62 799 return bs->dev;
18846dee
MA
800}
801
0e49de52
MA
802void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
803 void *opaque)
804{
805 bs->dev_ops = ops;
806 bs->dev_opaque = opaque;
2c6942fa
MA
807 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
808 bs_snapshots = NULL;
809 }
0e49de52
MA
810}
811
7d4b4ba5 812static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 813{
145feb17 814 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
7d4b4ba5 815 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
145feb17
MA
816 }
817}
818
2c6942fa
MA
819bool bdrv_dev_has_removable_media(BlockDriverState *bs)
820{
821 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
822}
823
e4def80b
MA
824bool bdrv_dev_is_tray_open(BlockDriverState *bs)
825{
826 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
827 return bs->dev_ops->is_tray_open(bs->dev_opaque);
828 }
829 return false;
830}
831
145feb17
MA
832static void bdrv_dev_resize_cb(BlockDriverState *bs)
833{
834 if (bs->dev_ops && bs->dev_ops->resize_cb) {
835 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
836 }
837}
838
f107639a
MA
839bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
840{
841 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
842 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
843 }
844 return false;
845}
846
e97fc193
AL
847/*
848 * Run consistency checks on an image
849 *
e076f338 850 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 851 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 852 * check are stored in res.
e97fc193 853 */
e076f338 854int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
e97fc193
AL
855{
856 if (bs->drv->bdrv_check == NULL) {
857 return -ENOTSUP;
858 }
859
e076f338 860 memset(res, 0, sizeof(*res));
9ac228e0 861 return bs->drv->bdrv_check(bs, res);
e97fc193
AL
862}
863
8a426614
KW
864#define COMMIT_BUF_SECTORS 2048
865
33e3963e
FB
866/* commit COW file into the raw image */
867int bdrv_commit(BlockDriverState *bs)
868{
19cb3738 869 BlockDriver *drv = bs->drv;
ee181196 870 BlockDriver *backing_drv;
8a426614
KW
871 int64_t sector, total_sectors;
872 int n, ro, open_flags;
4dca4b63 873 int ret = 0, rw_ret = 0;
8a426614 874 uint8_t *buf;
4dca4b63
NS
875 char filename[1024];
876 BlockDriverState *bs_rw, *bs_ro;
33e3963e 877
19cb3738
FB
878 if (!drv)
879 return -ENOMEDIUM;
4dca4b63
NS
880
881 if (!bs->backing_hd) {
882 return -ENOTSUP;
33e3963e
FB
883 }
884
4dca4b63
NS
885 if (bs->backing_hd->keep_read_only) {
886 return -EACCES;
887 }
ee181196
KW
888
889 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
890 ro = bs->backing_hd->read_only;
891 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
892 open_flags = bs->backing_hd->open_flags;
893
894 if (ro) {
895 /* re-open as RW */
896 bdrv_delete(bs->backing_hd);
897 bs->backing_hd = NULL;
898 bs_rw = bdrv_new("");
ee181196
KW
899 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
900 backing_drv);
4dca4b63
NS
901 if (rw_ret < 0) {
902 bdrv_delete(bs_rw);
903 /* try to re-open read-only */
904 bs_ro = bdrv_new("");
ee181196
KW
905 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
906 backing_drv);
4dca4b63
NS
907 if (ret < 0) {
908 bdrv_delete(bs_ro);
909 /* drive not functional anymore */
910 bs->drv = NULL;
911 return ret;
912 }
913 bs->backing_hd = bs_ro;
914 return rw_ret;
915 }
916 bs->backing_hd = bs_rw;
ea2384d3 917 }
33e3963e 918
6ea44308 919 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 920 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
921
922 for (sector = 0; sector < total_sectors; sector += n) {
923 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
924
925 if (bdrv_read(bs, sector, buf, n) != 0) {
926 ret = -EIO;
927 goto ro_cleanup;
928 }
929
930 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
931 ret = -EIO;
932 goto ro_cleanup;
933 }
ea2384d3 934 }
33e3963e 935 }
95389c86 936
1d44952f
CH
937 if (drv->bdrv_make_empty) {
938 ret = drv->bdrv_make_empty(bs);
939 bdrv_flush(bs);
940 }
95389c86 941
3f5075ae
CH
942 /*
943 * Make sure all data we wrote to the backing device is actually
944 * stable on disk.
945 */
946 if (bs->backing_hd)
947 bdrv_flush(bs->backing_hd);
4dca4b63
NS
948
949ro_cleanup:
7267c094 950 g_free(buf);
4dca4b63
NS
951
952 if (ro) {
953 /* re-open as RO */
954 bdrv_delete(bs->backing_hd);
955 bs->backing_hd = NULL;
956 bs_ro = bdrv_new("");
ee181196
KW
957 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
958 backing_drv);
4dca4b63
NS
959 if (ret < 0) {
960 bdrv_delete(bs_ro);
961 /* drive not functional anymore */
962 bs->drv = NULL;
963 return ret;
964 }
965 bs->backing_hd = bs_ro;
966 bs->backing_hd->keep_read_only = 0;
967 }
968
1d44952f 969 return ret;
33e3963e
FB
970}
971
6ab4b5ab
MA
972void bdrv_commit_all(void)
973{
974 BlockDriverState *bs;
975
976 QTAILQ_FOREACH(bs, &bdrv_states, list) {
977 bdrv_commit(bs);
978 }
979}
980
756e6736
KW
981/*
982 * Return values:
983 * 0 - success
984 * -EINVAL - backing format specified, but no file
985 * -ENOSPC - can't update the backing file because no space is left in the
986 * image file header
987 * -ENOTSUP - format driver doesn't support changing the backing file
988 */
989int bdrv_change_backing_file(BlockDriverState *bs,
990 const char *backing_file, const char *backing_fmt)
991{
992 BlockDriver *drv = bs->drv;
993
994 if (drv->bdrv_change_backing_file != NULL) {
995 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
996 } else {
997 return -ENOTSUP;
998 }
999}
1000
71d0770c
AL
1001static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1002 size_t size)
1003{
1004 int64_t len;
1005
1006 if (!bdrv_is_inserted(bs))
1007 return -ENOMEDIUM;
1008
1009 if (bs->growable)
1010 return 0;
1011
1012 len = bdrv_getlength(bs);
1013
fbb7b4e0
KW
1014 if (offset < 0)
1015 return -EIO;
1016
1017 if ((offset > len) || (len - offset < size))
71d0770c
AL
1018 return -EIO;
1019
1020 return 0;
1021}
1022
1023static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1024 int nb_sectors)
1025{
eb5a3165
JS
1026 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1027 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1028}
1029
e7a8a783
KW
1030static inline bool bdrv_has_async_flush(BlockDriver *drv)
1031{
1032 return drv->bdrv_aio_flush != bdrv_aio_flush_em;
1033}
1034
1c9805a3
SH
1035typedef struct RwCo {
1036 BlockDriverState *bs;
1037 int64_t sector_num;
1038 int nb_sectors;
1039 QEMUIOVector *qiov;
1040 bool is_write;
1041 int ret;
1042} RwCo;
1043
1044static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 1045{
1c9805a3 1046 RwCo *rwco = opaque;
ea2384d3 1047
1c9805a3
SH
1048 if (!rwco->is_write) {
1049 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1050 rwco->nb_sectors, rwco->qiov);
1051 } else {
1052 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1053 rwco->nb_sectors, rwco->qiov);
1054 }
1055}
e7a8a783 1056
1c9805a3
SH
1057/*
1058 * Process a synchronous request using coroutines
1059 */
1060static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1061 int nb_sectors, bool is_write)
1062{
1063 QEMUIOVector qiov;
1064 struct iovec iov = {
1065 .iov_base = (void *)buf,
1066 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1067 };
1068 Coroutine *co;
1069 RwCo rwco = {
1070 .bs = bs,
1071 .sector_num = sector_num,
1072 .nb_sectors = nb_sectors,
1073 .qiov = &qiov,
1074 .is_write = is_write,
1075 .ret = NOT_DONE,
1076 };
e7a8a783 1077
1c9805a3 1078 qemu_iovec_init_external(&qiov, &iov, 1);
e7a8a783 1079
1c9805a3
SH
1080 if (qemu_in_coroutine()) {
1081 /* Fast-path if already in coroutine context */
1082 bdrv_rw_co_entry(&rwco);
1083 } else {
1084 co = qemu_coroutine_create(bdrv_rw_co_entry);
1085 qemu_coroutine_enter(co, &rwco);
1086 while (rwco.ret == NOT_DONE) {
1087 qemu_aio_wait();
1088 }
1089 }
1090 return rwco.ret;
1091}
b338082b 1092
1c9805a3
SH
1093/* return < 0 if error. See bdrv_write() for the return codes */
1094int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1095 uint8_t *buf, int nb_sectors)
1096{
1097 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
fc01f7e7
FB
1098}
1099
7cd1e32a 1100static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1101 int nb_sectors, int dirty)
7cd1e32a
LS
1102{
1103 int64_t start, end;
c6d22830 1104 unsigned long val, idx, bit;
a55eb92c 1105
6ea44308 1106 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1107 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1108
1109 for (; start <= end; start++) {
c6d22830
JK
1110 idx = start / (sizeof(unsigned long) * 8);
1111 bit = start % (sizeof(unsigned long) * 8);
1112 val = bs->dirty_bitmap[idx];
1113 if (dirty) {
6d59fec1 1114 if (!(val & (1UL << bit))) {
aaa0eb75 1115 bs->dirty_count++;
6d59fec1 1116 val |= 1UL << bit;
aaa0eb75 1117 }
c6d22830 1118 } else {
6d59fec1 1119 if (val & (1UL << bit)) {
aaa0eb75 1120 bs->dirty_count--;
6d59fec1 1121 val &= ~(1UL << bit);
aaa0eb75 1122 }
c6d22830
JK
1123 }
1124 bs->dirty_bitmap[idx] = val;
7cd1e32a
LS
1125 }
1126}
1127
5fafdf24 1128/* Return < 0 if error. Important errors are:
19cb3738
FB
1129 -EIO generic I/O error (may happen for all errors)
1130 -ENOMEDIUM No media inserted.
1131 -EINVAL Invalid sector number or nb_sectors
1132 -EACCES Trying to write a read-only device
1133*/
5fafdf24 1134int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1135 const uint8_t *buf, int nb_sectors)
1136{
1c9805a3 1137 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
83f64091
FB
1138}
1139
eda578e5
AL
1140int bdrv_pread(BlockDriverState *bs, int64_t offset,
1141 void *buf, int count1)
83f64091 1142{
6ea44308 1143 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1144 int len, nb_sectors, count;
1145 int64_t sector_num;
9a8c4cce 1146 int ret;
83f64091
FB
1147
1148 count = count1;
1149 /* first read to align to sector start */
6ea44308 1150 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1151 if (len > count)
1152 len = count;
6ea44308 1153 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1154 if (len > 0) {
9a8c4cce
KW
1155 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1156 return ret;
6ea44308 1157 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1158 count -= len;
1159 if (count == 0)
1160 return count1;
1161 sector_num++;
1162 buf += len;
1163 }
1164
1165 /* read the sectors "in place" */
6ea44308 1166 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1167 if (nb_sectors > 0) {
9a8c4cce
KW
1168 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1169 return ret;
83f64091 1170 sector_num += nb_sectors;
6ea44308 1171 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1172 buf += len;
1173 count -= len;
1174 }
1175
1176 /* add data from the last sector */
1177 if (count > 0) {
9a8c4cce
KW
1178 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1179 return ret;
83f64091
FB
1180 memcpy(buf, tmp_buf, count);
1181 }
1182 return count1;
1183}
1184
eda578e5
AL
1185int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1186 const void *buf, int count1)
83f64091 1187{
6ea44308 1188 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1189 int len, nb_sectors, count;
1190 int64_t sector_num;
9a8c4cce 1191 int ret;
83f64091
FB
1192
1193 count = count1;
1194 /* first write to align to sector start */
6ea44308 1195 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1196 if (len > count)
1197 len = count;
6ea44308 1198 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1199 if (len > 0) {
9a8c4cce
KW
1200 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1201 return ret;
6ea44308 1202 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1203 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1204 return ret;
83f64091
FB
1205 count -= len;
1206 if (count == 0)
1207 return count1;
1208 sector_num++;
1209 buf += len;
1210 }
1211
1212 /* write the sectors "in place" */
6ea44308 1213 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1214 if (nb_sectors > 0) {
9a8c4cce
KW
1215 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1216 return ret;
83f64091 1217 sector_num += nb_sectors;
6ea44308 1218 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1219 buf += len;
1220 count -= len;
1221 }
1222
1223 /* add data from the last sector */
1224 if (count > 0) {
9a8c4cce
KW
1225 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1226 return ret;
83f64091 1227 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1228 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1229 return ret;
83f64091
FB
1230 }
1231 return count1;
1232}
83f64091 1233
f08145fe
KW
1234/*
1235 * Writes to the file and ensures that no writes are reordered across this
1236 * request (acts as a barrier)
1237 *
1238 * Returns 0 on success, -errno in error cases.
1239 */
1240int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1241 const void *buf, int count)
1242{
1243 int ret;
1244
1245 ret = bdrv_pwrite(bs, offset, buf, count);
1246 if (ret < 0) {
1247 return ret;
1248 }
1249
92196b2f
SH
1250 /* No flush needed for cache modes that use O_DSYNC */
1251 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
f08145fe
KW
1252 bdrv_flush(bs);
1253 }
1254
1255 return 0;
1256}
1257
c5fbe571
SH
1258/*
1259 * Handle a read request in coroutine context
1260 */
1261static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1262 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
da1fa91d
KW
1263{
1264 BlockDriver *drv = bs->drv;
1265
da1fa91d
KW
1266 if (!drv) {
1267 return -ENOMEDIUM;
1268 }
1269 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1270 return -EIO;
1271 }
1272
1273 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1274}
1275
c5fbe571 1276int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
1277 int nb_sectors, QEMUIOVector *qiov)
1278{
c5fbe571 1279 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 1280
c5fbe571
SH
1281 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1282}
1283
1284/*
1285 * Handle a write request in coroutine context
1286 */
1287static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1288 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1289{
1290 BlockDriver *drv = bs->drv;
6b7cb247 1291 int ret;
da1fa91d
KW
1292
1293 if (!bs->drv) {
1294 return -ENOMEDIUM;
1295 }
1296 if (bs->read_only) {
1297 return -EACCES;
1298 }
1299 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1300 return -EIO;
1301 }
1302
6b7cb247
SH
1303 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1304
da1fa91d
KW
1305 if (bs->dirty_bitmap) {
1306 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1307 }
1308
1309 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1310 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1311 }
1312
6b7cb247 1313 return ret;
da1fa91d
KW
1314}
1315
c5fbe571
SH
1316int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1317 int nb_sectors, QEMUIOVector *qiov)
1318{
1319 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1320
1321 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1322}
1323
83f64091
FB
1324/**
1325 * Truncate file to 'offset' bytes (needed only for file protocols)
1326 */
1327int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1328{
1329 BlockDriver *drv = bs->drv;
51762288 1330 int ret;
83f64091 1331 if (!drv)
19cb3738 1332 return -ENOMEDIUM;
83f64091
FB
1333 if (!drv->bdrv_truncate)
1334 return -ENOTSUP;
59f2689d
NS
1335 if (bs->read_only)
1336 return -EACCES;
8591675f
MT
1337 if (bdrv_in_use(bs))
1338 return -EBUSY;
51762288
SH
1339 ret = drv->bdrv_truncate(bs, offset);
1340 if (ret == 0) {
1341 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 1342 bdrv_dev_resize_cb(bs);
51762288
SH
1343 }
1344 return ret;
83f64091
FB
1345}
1346
4a1d5e1f
FZ
1347/**
1348 * Length of a allocated file in bytes. Sparse files are counted by actual
1349 * allocated space. Return < 0 if error or unknown.
1350 */
1351int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1352{
1353 BlockDriver *drv = bs->drv;
1354 if (!drv) {
1355 return -ENOMEDIUM;
1356 }
1357 if (drv->bdrv_get_allocated_file_size) {
1358 return drv->bdrv_get_allocated_file_size(bs);
1359 }
1360 if (bs->file) {
1361 return bdrv_get_allocated_file_size(bs->file);
1362 }
1363 return -ENOTSUP;
1364}
1365
83f64091
FB
1366/**
1367 * Length of a file in bytes. Return < 0 if error or unknown.
1368 */
1369int64_t bdrv_getlength(BlockDriverState *bs)
1370{
1371 BlockDriver *drv = bs->drv;
1372 if (!drv)
19cb3738 1373 return -ENOMEDIUM;
51762288 1374
2c6942fa 1375 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
1376 if (drv->bdrv_getlength) {
1377 return drv->bdrv_getlength(bs);
1378 }
83f64091 1379 }
46a4e4e6 1380 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
1381}
1382
19cb3738 1383/* return 0 as number of sectors if no device present or error */
96b8f136 1384void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 1385{
19cb3738
FB
1386 int64_t length;
1387 length = bdrv_getlength(bs);
1388 if (length < 0)
1389 length = 0;
1390 else
6ea44308 1391 length = length >> BDRV_SECTOR_BITS;
19cb3738 1392 *nb_sectors_ptr = length;
fc01f7e7 1393}
cf98951b 1394
f3d54fc4
AL
1395struct partition {
1396 uint8_t boot_ind; /* 0x80 - active */
1397 uint8_t head; /* starting head */
1398 uint8_t sector; /* starting sector */
1399 uint8_t cyl; /* starting cylinder */
1400 uint8_t sys_ind; /* What partition type */
1401 uint8_t end_head; /* end head */
1402 uint8_t end_sector; /* end sector */
1403 uint8_t end_cyl; /* end cylinder */
1404 uint32_t start_sect; /* starting sector counting from 0 */
1405 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 1406} QEMU_PACKED;
f3d54fc4
AL
1407
1408/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1409static int guess_disk_lchs(BlockDriverState *bs,
1410 int *pcylinders, int *pheads, int *psectors)
1411{
eb5a3165 1412 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
1413 int ret, i, heads, sectors, cylinders;
1414 struct partition *p;
1415 uint32_t nr_sects;
a38131b6 1416 uint64_t nb_sectors;
f3d54fc4
AL
1417
1418 bdrv_get_geometry(bs, &nb_sectors);
1419
1420 ret = bdrv_read(bs, 0, buf, 1);
1421 if (ret < 0)
1422 return -1;
1423 /* test msdos magic */
1424 if (buf[510] != 0x55 || buf[511] != 0xaa)
1425 return -1;
1426 for(i = 0; i < 4; i++) {
1427 p = ((struct partition *)(buf + 0x1be)) + i;
1428 nr_sects = le32_to_cpu(p->nr_sects);
1429 if (nr_sects && p->end_head) {
1430 /* We make the assumption that the partition terminates on
1431 a cylinder boundary */
1432 heads = p->end_head + 1;
1433 sectors = p->end_sector & 63;
1434 if (sectors == 0)
1435 continue;
1436 cylinders = nb_sectors / (heads * sectors);
1437 if (cylinders < 1 || cylinders > 16383)
1438 continue;
1439 *pheads = heads;
1440 *psectors = sectors;
1441 *pcylinders = cylinders;
1442#if 0
1443 printf("guessed geometry: LCHS=%d %d %d\n",
1444 cylinders, heads, sectors);
1445#endif
1446 return 0;
1447 }
1448 }
1449 return -1;
1450}
1451
1452void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1453{
1454 int translation, lba_detected = 0;
1455 int cylinders, heads, secs;
a38131b6 1456 uint64_t nb_sectors;
f3d54fc4
AL
1457
1458 /* if a geometry hint is available, use it */
1459 bdrv_get_geometry(bs, &nb_sectors);
1460 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1461 translation = bdrv_get_translation_hint(bs);
1462 if (cylinders != 0) {
1463 *pcyls = cylinders;
1464 *pheads = heads;
1465 *psecs = secs;
1466 } else {
1467 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1468 if (heads > 16) {
1469 /* if heads > 16, it means that a BIOS LBA
1470 translation was active, so the default
1471 hardware geometry is OK */
1472 lba_detected = 1;
1473 goto default_geometry;
1474 } else {
1475 *pcyls = cylinders;
1476 *pheads = heads;
1477 *psecs = secs;
1478 /* disable any translation to be in sync with
1479 the logical geometry */
1480 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1481 bdrv_set_translation_hint(bs,
1482 BIOS_ATA_TRANSLATION_NONE);
1483 }
1484 }
1485 } else {
1486 default_geometry:
1487 /* if no geometry, use a standard physical disk geometry */
1488 cylinders = nb_sectors / (16 * 63);
1489
1490 if (cylinders > 16383)
1491 cylinders = 16383;
1492 else if (cylinders < 2)
1493 cylinders = 2;
1494 *pcyls = cylinders;
1495 *pheads = 16;
1496 *psecs = 63;
1497 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1498 if ((*pcyls * *pheads) <= 131072) {
1499 bdrv_set_translation_hint(bs,
1500 BIOS_ATA_TRANSLATION_LARGE);
1501 } else {
1502 bdrv_set_translation_hint(bs,
1503 BIOS_ATA_TRANSLATION_LBA);
1504 }
1505 }
1506 }
1507 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1508 }
1509}
1510
5fafdf24 1511void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
1512 int cyls, int heads, int secs)
1513{
1514 bs->cyls = cyls;
1515 bs->heads = heads;
1516 bs->secs = secs;
1517}
1518
46d4767d
FB
1519void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1520{
1521 bs->translation = translation;
1522}
1523
5fafdf24 1524void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
1525 int *pcyls, int *pheads, int *psecs)
1526{
1527 *pcyls = bs->cyls;
1528 *pheads = bs->heads;
1529 *psecs = bs->secs;
1530}
1531
5bbdbb46
BS
1532/* Recognize floppy formats */
1533typedef struct FDFormat {
1534 FDriveType drive;
1535 uint8_t last_sect;
1536 uint8_t max_track;
1537 uint8_t max_head;
1538} FDFormat;
1539
1540static const FDFormat fd_formats[] = {
1541 /* First entry is default format */
1542 /* 1.44 MB 3"1/2 floppy disks */
1543 { FDRIVE_DRV_144, 18, 80, 1, },
1544 { FDRIVE_DRV_144, 20, 80, 1, },
1545 { FDRIVE_DRV_144, 21, 80, 1, },
1546 { FDRIVE_DRV_144, 21, 82, 1, },
1547 { FDRIVE_DRV_144, 21, 83, 1, },
1548 { FDRIVE_DRV_144, 22, 80, 1, },
1549 { FDRIVE_DRV_144, 23, 80, 1, },
1550 { FDRIVE_DRV_144, 24, 80, 1, },
1551 /* 2.88 MB 3"1/2 floppy disks */
1552 { FDRIVE_DRV_288, 36, 80, 1, },
1553 { FDRIVE_DRV_288, 39, 80, 1, },
1554 { FDRIVE_DRV_288, 40, 80, 1, },
1555 { FDRIVE_DRV_288, 44, 80, 1, },
1556 { FDRIVE_DRV_288, 48, 80, 1, },
1557 /* 720 kB 3"1/2 floppy disks */
1558 { FDRIVE_DRV_144, 9, 80, 1, },
1559 { FDRIVE_DRV_144, 10, 80, 1, },
1560 { FDRIVE_DRV_144, 10, 82, 1, },
1561 { FDRIVE_DRV_144, 10, 83, 1, },
1562 { FDRIVE_DRV_144, 13, 80, 1, },
1563 { FDRIVE_DRV_144, 14, 80, 1, },
1564 /* 1.2 MB 5"1/4 floppy disks */
1565 { FDRIVE_DRV_120, 15, 80, 1, },
1566 { FDRIVE_DRV_120, 18, 80, 1, },
1567 { FDRIVE_DRV_120, 18, 82, 1, },
1568 { FDRIVE_DRV_120, 18, 83, 1, },
1569 { FDRIVE_DRV_120, 20, 80, 1, },
1570 /* 720 kB 5"1/4 floppy disks */
1571 { FDRIVE_DRV_120, 9, 80, 1, },
1572 { FDRIVE_DRV_120, 11, 80, 1, },
1573 /* 360 kB 5"1/4 floppy disks */
1574 { FDRIVE_DRV_120, 9, 40, 1, },
1575 { FDRIVE_DRV_120, 9, 40, 0, },
1576 { FDRIVE_DRV_120, 10, 41, 1, },
1577 { FDRIVE_DRV_120, 10, 42, 1, },
1578 /* 320 kB 5"1/4 floppy disks */
1579 { FDRIVE_DRV_120, 8, 40, 1, },
1580 { FDRIVE_DRV_120, 8, 40, 0, },
1581 /* 360 kB must match 5"1/4 better than 3"1/2... */
1582 { FDRIVE_DRV_144, 9, 80, 0, },
1583 /* end */
1584 { FDRIVE_DRV_NONE, -1, -1, 0, },
1585};
1586
1587void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1588 int *max_track, int *last_sect,
1589 FDriveType drive_in, FDriveType *drive)
1590{
1591 const FDFormat *parse;
1592 uint64_t nb_sectors, size;
1593 int i, first_match, match;
1594
1595 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1596 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1597 /* User defined disk */
1598 } else {
1599 bdrv_get_geometry(bs, &nb_sectors);
1600 match = -1;
1601 first_match = -1;
1602 for (i = 0; ; i++) {
1603 parse = &fd_formats[i];
1604 if (parse->drive == FDRIVE_DRV_NONE) {
1605 break;
1606 }
1607 if (drive_in == parse->drive ||
1608 drive_in == FDRIVE_DRV_NONE) {
1609 size = (parse->max_head + 1) * parse->max_track *
1610 parse->last_sect;
1611 if (nb_sectors == size) {
1612 match = i;
1613 break;
1614 }
1615 if (first_match == -1) {
1616 first_match = i;
1617 }
1618 }
1619 }
1620 if (match == -1) {
1621 if (first_match == -1) {
1622 match = 1;
1623 } else {
1624 match = first_match;
1625 }
1626 parse = &fd_formats[match];
1627 }
1628 *nb_heads = parse->max_head + 1;
1629 *max_track = parse->max_track;
1630 *last_sect = parse->last_sect;
1631 *drive = parse->drive;
1632 }
1633}
1634
46d4767d
FB
1635int bdrv_get_translation_hint(BlockDriverState *bs)
1636{
1637 return bs->translation;
1638}
1639
abd7f68d
MA
1640void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1641 BlockErrorAction on_write_error)
1642{
1643 bs->on_read_error = on_read_error;
1644 bs->on_write_error = on_write_error;
1645}
1646
1647BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1648{
1649 return is_read ? bs->on_read_error : bs->on_write_error;
1650}
1651
b338082b
FB
1652int bdrv_is_read_only(BlockDriverState *bs)
1653{
1654 return bs->read_only;
1655}
1656
985a03b0
TS
1657int bdrv_is_sg(BlockDriverState *bs)
1658{
1659 return bs->sg;
1660}
1661
e900a7b7
CH
1662int bdrv_enable_write_cache(BlockDriverState *bs)
1663{
1664 return bs->enable_write_cache;
1665}
1666
ea2384d3
FB
1667int bdrv_is_encrypted(BlockDriverState *bs)
1668{
1669 if (bs->backing_hd && bs->backing_hd->encrypted)
1670 return 1;
1671 return bs->encrypted;
1672}
1673
c0f4ce77
AL
1674int bdrv_key_required(BlockDriverState *bs)
1675{
1676 BlockDriverState *backing_hd = bs->backing_hd;
1677
1678 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1679 return 1;
1680 return (bs->encrypted && !bs->valid_key);
1681}
1682
ea2384d3
FB
1683int bdrv_set_key(BlockDriverState *bs, const char *key)
1684{
1685 int ret;
1686 if (bs->backing_hd && bs->backing_hd->encrypted) {
1687 ret = bdrv_set_key(bs->backing_hd, key);
1688 if (ret < 0)
1689 return ret;
1690 if (!bs->encrypted)
1691 return 0;
1692 }
fd04a2ae
SH
1693 if (!bs->encrypted) {
1694 return -EINVAL;
1695 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1696 return -ENOMEDIUM;
1697 }
c0f4ce77 1698 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
1699 if (ret < 0) {
1700 bs->valid_key = 0;
1701 } else if (!bs->valid_key) {
1702 bs->valid_key = 1;
1703 /* call the change callback now, we skipped it on open */
7d4b4ba5 1704 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 1705 }
c0f4ce77 1706 return ret;
ea2384d3
FB
1707}
1708
1709void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1710{
19cb3738 1711 if (!bs->drv) {
ea2384d3
FB
1712 buf[0] = '\0';
1713 } else {
1714 pstrcpy(buf, buf_size, bs->drv->format_name);
1715 }
1716}
1717
5fafdf24 1718void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
1719 void *opaque)
1720{
1721 BlockDriver *drv;
1722
8a22f02a 1723 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
1724 it(opaque, drv->format_name);
1725 }
1726}
1727
b338082b
FB
1728BlockDriverState *bdrv_find(const char *name)
1729{
1730 BlockDriverState *bs;
1731
1b7bdbc1
SH
1732 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1733 if (!strcmp(name, bs->device_name)) {
b338082b 1734 return bs;
1b7bdbc1 1735 }
b338082b
FB
1736 }
1737 return NULL;
1738}
1739
2f399b0a
MA
1740BlockDriverState *bdrv_next(BlockDriverState *bs)
1741{
1742 if (!bs) {
1743 return QTAILQ_FIRST(&bdrv_states);
1744 }
1745 return QTAILQ_NEXT(bs, list);
1746}
1747
51de9760 1748void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
1749{
1750 BlockDriverState *bs;
1751
1b7bdbc1 1752 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 1753 it(opaque, bs);
81d0912d
FB
1754 }
1755}
1756
ea2384d3
FB
1757const char *bdrv_get_device_name(BlockDriverState *bs)
1758{
1759 return bs->device_name;
1760}
1761
205ef796 1762int bdrv_flush(BlockDriverState *bs)
7a6cba61 1763{
016f5cf6 1764 if (bs->open_flags & BDRV_O_NO_FLUSH) {
205ef796
KW
1765 return 0;
1766 }
1767
e7a8a783
KW
1768 if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
1769 return bdrv_co_flush_em(bs);
1770 }
1771
205ef796
KW
1772 if (bs->drv && bs->drv->bdrv_flush) {
1773 return bs->drv->bdrv_flush(bs);
016f5cf6
AG
1774 }
1775
205ef796
KW
1776 /*
1777 * Some block drivers always operate in either writethrough or unsafe mode
1778 * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1779 * the server works (because the behaviour is hardcoded or depends on
1780 * server-side configuration), so we can't ensure that everything is safe
1781 * on disk. Returning an error doesn't work because that would break guests
1782 * even if the server operates in writethrough mode.
1783 *
1784 * Let's hope the user knows what he's doing.
1785 */
1786 return 0;
7a6cba61
PB
1787}
1788
c6ca28d6
AL
1789void bdrv_flush_all(void)
1790{
1791 BlockDriverState *bs;
1792
1b7bdbc1 1793 QTAILQ_FOREACH(bs, &bdrv_states, list) {
c602a489 1794 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
c6ca28d6 1795 bdrv_flush(bs);
1b7bdbc1
SH
1796 }
1797 }
c6ca28d6
AL
1798}
1799
f2feebbd
KW
1800int bdrv_has_zero_init(BlockDriverState *bs)
1801{
1802 assert(bs->drv);
1803
336c1c12
KW
1804 if (bs->drv->bdrv_has_zero_init) {
1805 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
1806 }
1807
1808 return 1;
1809}
1810
bb8bf76f
CH
1811int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1812{
1813 if (!bs->drv) {
1814 return -ENOMEDIUM;
1815 }
1816 if (!bs->drv->bdrv_discard) {
1817 return 0;
1818 }
1819 return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1820}
1821
f58c7b35
TS
1822/*
1823 * Returns true iff the specified sector is present in the disk image. Drivers
1824 * not implementing the functionality are assumed to not support backing files,
1825 * hence all their sectors are reported as allocated.
1826 *
1827 * 'pnum' is set to the number of sectors (including and immediately following
1828 * the specified sector) that are known to be in the same
1829 * allocated/unallocated state.
1830 *
1831 * 'nb_sectors' is the max value 'pnum' should be set to.
1832 */
1833int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1834 int *pnum)
1835{
1836 int64_t n;
1837 if (!bs->drv->bdrv_is_allocated) {
1838 if (sector_num >= bs->total_sectors) {
1839 *pnum = 0;
1840 return 0;
1841 }
1842 n = bs->total_sectors - sector_num;
1843 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1844 return 1;
1845 }
1846 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1847}
1848
2582bfed
LC
1849void bdrv_mon_event(const BlockDriverState *bdrv,
1850 BlockMonEventAction action, int is_read)
1851{
1852 QObject *data;
1853 const char *action_str;
1854
1855 switch (action) {
1856 case BDRV_ACTION_REPORT:
1857 action_str = "report";
1858 break;
1859 case BDRV_ACTION_IGNORE:
1860 action_str = "ignore";
1861 break;
1862 case BDRV_ACTION_STOP:
1863 action_str = "stop";
1864 break;
1865 default:
1866 abort();
1867 }
1868
1869 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1870 bdrv->device_name,
1871 action_str,
1872 is_read ? "read" : "write");
1873 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1874
1875 qobject_decref(data);
1876}
1877
d15e5465 1878static void bdrv_print_dict(QObject *obj, void *opaque)
b338082b 1879{
d15e5465
LC
1880 QDict *bs_dict;
1881 Monitor *mon = opaque;
1882
1883 bs_dict = qobject_to_qdict(obj);
1884
d8aeeb31 1885 monitor_printf(mon, "%s: removable=%d",
d15e5465 1886 qdict_get_str(bs_dict, "device"),
d15e5465
LC
1887 qdict_get_bool(bs_dict, "removable"));
1888
1889 if (qdict_get_bool(bs_dict, "removable")) {
1890 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
e4def80b
MA
1891 monitor_printf(mon, " tray-open=%d",
1892 qdict_get_bool(bs_dict, "tray-open"));
d15e5465 1893 }
d2078cc2
LC
1894
1895 if (qdict_haskey(bs_dict, "io-status")) {
1896 monitor_printf(mon, " io-status=%s", qdict_get_str(bs_dict, "io-status"));
1897 }
1898
d15e5465
LC
1899 if (qdict_haskey(bs_dict, "inserted")) {
1900 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1901
1902 monitor_printf(mon, " file=");
1903 monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1904 if (qdict_haskey(qdict, "backing_file")) {
1905 monitor_printf(mon, " backing_file=");
1906 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1907 }
1908 monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1909 qdict_get_bool(qdict, "ro"),
1910 qdict_get_str(qdict, "drv"),
1911 qdict_get_bool(qdict, "encrypted"));
1912 } else {
1913 monitor_printf(mon, " [not inserted]");
1914 }
1915
1916 monitor_printf(mon, "\n");
1917}
1918
1919void bdrv_info_print(Monitor *mon, const QObject *data)
1920{
1921 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1922}
1923
f04ef601
LC
1924static const char *const io_status_name[BDRV_IOS_MAX] = {
1925 [BDRV_IOS_OK] = "ok",
1926 [BDRV_IOS_FAILED] = "failed",
1927 [BDRV_IOS_ENOSPC] = "nospace",
1928};
1929
d15e5465
LC
1930void bdrv_info(Monitor *mon, QObject **ret_data)
1931{
1932 QList *bs_list;
b338082b
FB
1933 BlockDriverState *bs;
1934
d15e5465
LC
1935 bs_list = qlist_new();
1936
1b7bdbc1 1937 QTAILQ_FOREACH(bs, &bdrv_states, list) {
d15e5465 1938 QObject *bs_obj;
e4def80b 1939 QDict *bs_dict;
d15e5465 1940
d8aeeb31 1941 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
d15e5465 1942 "'removable': %i, 'locked': %i }",
2c6942fa
MA
1943 bs->device_name,
1944 bdrv_dev_has_removable_media(bs),
f107639a 1945 bdrv_dev_is_medium_locked(bs));
e4def80b 1946 bs_dict = qobject_to_qdict(bs_obj);
d15e5465 1947
e4def80b
MA
1948 if (bdrv_dev_has_removable_media(bs)) {
1949 qdict_put(bs_dict, "tray-open",
1950 qbool_from_int(bdrv_dev_is_tray_open(bs)));
1951 }
f04ef601
LC
1952
1953 if (bdrv_iostatus_is_enabled(bs)) {
1954 qdict_put(bs_dict, "io-status",
1955 qstring_from_str(io_status_name[bs->iostatus]));
1956 }
1957
19cb3738 1958 if (bs->drv) {
d15e5465 1959 QObject *obj;
d15e5465
LC
1960
1961 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1962 "'encrypted': %i }",
1963 bs->filename, bs->read_only,
1964 bs->drv->format_name,
1965 bdrv_is_encrypted(bs));
fef30743 1966 if (bs->backing_file[0] != '\0') {
d15e5465
LC
1967 QDict *qdict = qobject_to_qdict(obj);
1968 qdict_put(qdict, "backing_file",
1969 qstring_from_str(bs->backing_file));
376253ec 1970 }
d15e5465
LC
1971
1972 qdict_put_obj(bs_dict, "inserted", obj);
b338082b 1973 }
d15e5465 1974 qlist_append_obj(bs_list, bs_obj);
b338082b 1975 }
d15e5465
LC
1976
1977 *ret_data = QOBJECT(bs_list);
b338082b 1978}
a36e69dd 1979
218a536a 1980static void bdrv_stats_iter(QObject *data, void *opaque)
a36e69dd 1981{
218a536a
LC
1982 QDict *qdict;
1983 Monitor *mon = opaque;
1984
1985 qdict = qobject_to_qdict(data);
1986 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1987
1988 qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1989 monitor_printf(mon, " rd_bytes=%" PRId64
1990 " wr_bytes=%" PRId64
1991 " rd_operations=%" PRId64
1992 " wr_operations=%" PRId64
e8045d67 1993 " flush_operations=%" PRId64
c488c7f6
CH
1994 " wr_total_time_ns=%" PRId64
1995 " rd_total_time_ns=%" PRId64
1996 " flush_total_time_ns=%" PRId64
218a536a
LC
1997 "\n",
1998 qdict_get_int(qdict, "rd_bytes"),
1999 qdict_get_int(qdict, "wr_bytes"),
2000 qdict_get_int(qdict, "rd_operations"),
e8045d67 2001 qdict_get_int(qdict, "wr_operations"),
c488c7f6
CH
2002 qdict_get_int(qdict, "flush_operations"),
2003 qdict_get_int(qdict, "wr_total_time_ns"),
2004 qdict_get_int(qdict, "rd_total_time_ns"),
2005 qdict_get_int(qdict, "flush_total_time_ns"));
218a536a
LC
2006}
2007
2008void bdrv_stats_print(Monitor *mon, const QObject *data)
2009{
2010 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
2011}
2012
294cc35f
KW
2013static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
2014{
2015 QObject *res;
2016 QDict *dict;
2017
2018 res = qobject_from_jsonf("{ 'stats': {"
2019 "'rd_bytes': %" PRId64 ","
2020 "'wr_bytes': %" PRId64 ","
2021 "'rd_operations': %" PRId64 ","
2022 "'wr_operations': %" PRId64 ","
e8045d67 2023 "'wr_highest_offset': %" PRId64 ","
c488c7f6
CH
2024 "'flush_operations': %" PRId64 ","
2025 "'wr_total_time_ns': %" PRId64 ","
2026 "'rd_total_time_ns': %" PRId64 ","
2027 "'flush_total_time_ns': %" PRId64
294cc35f 2028 "} }",
a597e79c
CH
2029 bs->nr_bytes[BDRV_ACCT_READ],
2030 bs->nr_bytes[BDRV_ACCT_WRITE],
2031 bs->nr_ops[BDRV_ACCT_READ],
2032 bs->nr_ops[BDRV_ACCT_WRITE],
5ffbbc67 2033 bs->wr_highest_sector *
e8045d67 2034 (uint64_t)BDRV_SECTOR_SIZE,
c488c7f6
CH
2035 bs->nr_ops[BDRV_ACCT_FLUSH],
2036 bs->total_time_ns[BDRV_ACCT_WRITE],
2037 bs->total_time_ns[BDRV_ACCT_READ],
2038 bs->total_time_ns[BDRV_ACCT_FLUSH]);
294cc35f
KW
2039 dict = qobject_to_qdict(res);
2040
2041 if (*bs->device_name) {
2042 qdict_put(dict, "device", qstring_from_str(bs->device_name));
2043 }
2044
2045 if (bs->file) {
2046 QObject *parent = bdrv_info_stats_bs(bs->file);
2047 qdict_put_obj(dict, "parent", parent);
2048 }
2049
2050 return res;
2051}
2052
218a536a
LC
2053void bdrv_info_stats(Monitor *mon, QObject **ret_data)
2054{
2055 QObject *obj;
2056 QList *devices;
a36e69dd
TS
2057 BlockDriverState *bs;
2058
218a536a
LC
2059 devices = qlist_new();
2060
1b7bdbc1 2061 QTAILQ_FOREACH(bs, &bdrv_states, list) {
294cc35f 2062 obj = bdrv_info_stats_bs(bs);
218a536a 2063 qlist_append_obj(devices, obj);
a36e69dd 2064 }
218a536a
LC
2065
2066 *ret_data = QOBJECT(devices);
a36e69dd 2067}
ea2384d3 2068
045df330
AL
2069const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2070{
2071 if (bs->backing_hd && bs->backing_hd->encrypted)
2072 return bs->backing_file;
2073 else if (bs->encrypted)
2074 return bs->filename;
2075 else
2076 return NULL;
2077}
2078
5fafdf24 2079void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
2080 char *filename, int filename_size)
2081{
b783e409 2082 if (!bs->backing_file) {
83f64091
FB
2083 pstrcpy(filename, filename_size, "");
2084 } else {
2085 pstrcpy(filename, filename_size, bs->backing_file);
2086 }
2087}
2088
5fafdf24 2089int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
2090 const uint8_t *buf, int nb_sectors)
2091{
2092 BlockDriver *drv = bs->drv;
2093 if (!drv)
19cb3738 2094 return -ENOMEDIUM;
faea38e7
FB
2095 if (!drv->bdrv_write_compressed)
2096 return -ENOTSUP;
fbb7b4e0
KW
2097 if (bdrv_check_request(bs, sector_num, nb_sectors))
2098 return -EIO;
a55eb92c 2099
c6d22830 2100 if (bs->dirty_bitmap) {
7cd1e32a
LS
2101 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2102 }
a55eb92c 2103
faea38e7
FB
2104 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2105}
3b46e624 2106
faea38e7
FB
2107int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2108{
2109 BlockDriver *drv = bs->drv;
2110 if (!drv)
19cb3738 2111 return -ENOMEDIUM;
faea38e7
FB
2112 if (!drv->bdrv_get_info)
2113 return -ENOTSUP;
2114 memset(bdi, 0, sizeof(*bdi));
2115 return drv->bdrv_get_info(bs, bdi);
2116}
2117
45566e9c
CH
2118int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2119 int64_t pos, int size)
178e08a5
AL
2120{
2121 BlockDriver *drv = bs->drv;
2122 if (!drv)
2123 return -ENOMEDIUM;
7cdb1f6d
MK
2124 if (drv->bdrv_save_vmstate)
2125 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2126 if (bs->file)
2127 return bdrv_save_vmstate(bs->file, buf, pos, size);
2128 return -ENOTSUP;
178e08a5
AL
2129}
2130
45566e9c
CH
2131int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2132 int64_t pos, int size)
178e08a5
AL
2133{
2134 BlockDriver *drv = bs->drv;
2135 if (!drv)
2136 return -ENOMEDIUM;
7cdb1f6d
MK
2137 if (drv->bdrv_load_vmstate)
2138 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2139 if (bs->file)
2140 return bdrv_load_vmstate(bs->file, buf, pos, size);
2141 return -ENOTSUP;
178e08a5
AL
2142}
2143
8b9b0cc2
KW
2144void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2145{
2146 BlockDriver *drv = bs->drv;
2147
2148 if (!drv || !drv->bdrv_debug_event) {
2149 return;
2150 }
2151
2152 return drv->bdrv_debug_event(bs, event);
2153
2154}
2155
faea38e7
FB
2156/**************************************************************/
2157/* handling of snapshots */
2158
feeee5ac
MDCF
2159int bdrv_can_snapshot(BlockDriverState *bs)
2160{
2161 BlockDriver *drv = bs->drv;
07b70bfb 2162 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2163 return 0;
2164 }
2165
2166 if (!drv->bdrv_snapshot_create) {
2167 if (bs->file != NULL) {
2168 return bdrv_can_snapshot(bs->file);
2169 }
2170 return 0;
2171 }
2172
2173 return 1;
2174}
2175
199630b6
BS
2176int bdrv_is_snapshot(BlockDriverState *bs)
2177{
2178 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2179}
2180
f9092b10
MA
2181BlockDriverState *bdrv_snapshots(void)
2182{
2183 BlockDriverState *bs;
2184
3ac906f7 2185 if (bs_snapshots) {
f9092b10 2186 return bs_snapshots;
3ac906f7 2187 }
f9092b10
MA
2188
2189 bs = NULL;
2190 while ((bs = bdrv_next(bs))) {
2191 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2192 bs_snapshots = bs;
2193 return bs;
f9092b10
MA
2194 }
2195 }
2196 return NULL;
f9092b10
MA
2197}
2198
5fafdf24 2199int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2200 QEMUSnapshotInfo *sn_info)
2201{
2202 BlockDriver *drv = bs->drv;
2203 if (!drv)
19cb3738 2204 return -ENOMEDIUM;
7cdb1f6d
MK
2205 if (drv->bdrv_snapshot_create)
2206 return drv->bdrv_snapshot_create(bs, sn_info);
2207 if (bs->file)
2208 return bdrv_snapshot_create(bs->file, sn_info);
2209 return -ENOTSUP;
faea38e7
FB
2210}
2211
5fafdf24 2212int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2213 const char *snapshot_id)
2214{
2215 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2216 int ret, open_ret;
2217
faea38e7 2218 if (!drv)
19cb3738 2219 return -ENOMEDIUM;
7cdb1f6d
MK
2220 if (drv->bdrv_snapshot_goto)
2221 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2222
2223 if (bs->file) {
2224 drv->bdrv_close(bs);
2225 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2226 open_ret = drv->bdrv_open(bs, bs->open_flags);
2227 if (open_ret < 0) {
2228 bdrv_delete(bs->file);
2229 bs->drv = NULL;
2230 return open_ret;
2231 }
2232 return ret;
2233 }
2234
2235 return -ENOTSUP;
faea38e7
FB
2236}
2237
2238int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2239{
2240 BlockDriver *drv = bs->drv;
2241 if (!drv)
19cb3738 2242 return -ENOMEDIUM;
7cdb1f6d
MK
2243 if (drv->bdrv_snapshot_delete)
2244 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2245 if (bs->file)
2246 return bdrv_snapshot_delete(bs->file, snapshot_id);
2247 return -ENOTSUP;
faea38e7
FB
2248}
2249
5fafdf24 2250int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2251 QEMUSnapshotInfo **psn_info)
2252{
2253 BlockDriver *drv = bs->drv;
2254 if (!drv)
19cb3738 2255 return -ENOMEDIUM;
7cdb1f6d
MK
2256 if (drv->bdrv_snapshot_list)
2257 return drv->bdrv_snapshot_list(bs, psn_info);
2258 if (bs->file)
2259 return bdrv_snapshot_list(bs->file, psn_info);
2260 return -ENOTSUP;
faea38e7
FB
2261}
2262
51ef6727 2263int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2264 const char *snapshot_name)
2265{
2266 BlockDriver *drv = bs->drv;
2267 if (!drv) {
2268 return -ENOMEDIUM;
2269 }
2270 if (!bs->read_only) {
2271 return -EINVAL;
2272 }
2273 if (drv->bdrv_snapshot_load_tmp) {
2274 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2275 }
2276 return -ENOTSUP;
2277}
2278
faea38e7
FB
2279#define NB_SUFFIXES 4
2280
2281char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2282{
2283 static const char suffixes[NB_SUFFIXES] = "KMGT";
2284 int64_t base;
2285 int i;
2286
2287 if (size <= 999) {
2288 snprintf(buf, buf_size, "%" PRId64, size);
2289 } else {
2290 base = 1024;
2291 for(i = 0; i < NB_SUFFIXES; i++) {
2292 if (size < (10 * base)) {
5fafdf24 2293 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
2294 (double)size / base,
2295 suffixes[i]);
2296 break;
2297 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 2298 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
2299 ((size + (base >> 1)) / base),
2300 suffixes[i]);
2301 break;
2302 }
2303 base = base * 1024;
2304 }
2305 }
2306 return buf;
2307}
2308
2309char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2310{
2311 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
2312#ifdef _WIN32
2313 struct tm *ptm;
2314#else
faea38e7 2315 struct tm tm;
3b9f94e1 2316#endif
faea38e7
FB
2317 time_t ti;
2318 int64_t secs;
2319
2320 if (!sn) {
5fafdf24
TS
2321 snprintf(buf, buf_size,
2322 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2323 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2324 } else {
2325 ti = sn->date_sec;
3b9f94e1
FB
2326#ifdef _WIN32
2327 ptm = localtime(&ti);
2328 strftime(date_buf, sizeof(date_buf),
2329 "%Y-%m-%d %H:%M:%S", ptm);
2330#else
faea38e7
FB
2331 localtime_r(&ti, &tm);
2332 strftime(date_buf, sizeof(date_buf),
2333 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 2334#endif
faea38e7
FB
2335 secs = sn->vm_clock_nsec / 1000000000;
2336 snprintf(clock_buf, sizeof(clock_buf),
2337 "%02d:%02d:%02d.%03d",
2338 (int)(secs / 3600),
2339 (int)((secs / 60) % 60),
5fafdf24 2340 (int)(secs % 60),
faea38e7
FB
2341 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2342 snprintf(buf, buf_size,
5fafdf24 2343 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2344 sn->id_str, sn->name,
2345 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2346 date_buf,
2347 clock_buf);
2348 }
2349 return buf;
2350}
2351
ea2384d3 2352/**************************************************************/
83f64091 2353/* async I/Os */
ea2384d3 2354
3b69e4b9 2355BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 2356 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 2357 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 2358{
bbf0a440
SH
2359 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2360
b2a61371 2361 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2362 cb, opaque, false);
ea2384d3
FB
2363}
2364
f141eafe
AL
2365BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2366 QEMUIOVector *qiov, int nb_sectors,
2367 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 2368{
bbf0a440
SH
2369 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2370
1a6e115b 2371 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2372 cb, opaque, true);
83f64091
FB
2373}
2374
40b4f539
KW
2375
2376typedef struct MultiwriteCB {
2377 int error;
2378 int num_requests;
2379 int num_callbacks;
2380 struct {
2381 BlockDriverCompletionFunc *cb;
2382 void *opaque;
2383 QEMUIOVector *free_qiov;
2384 void *free_buf;
2385 } callbacks[];
2386} MultiwriteCB;
2387
2388static void multiwrite_user_cb(MultiwriteCB *mcb)
2389{
2390 int i;
2391
2392 for (i = 0; i < mcb->num_callbacks; i++) {
2393 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
2394 if (mcb->callbacks[i].free_qiov) {
2395 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2396 }
7267c094 2397 g_free(mcb->callbacks[i].free_qiov);
f8a83245 2398 qemu_vfree(mcb->callbacks[i].free_buf);
40b4f539
KW
2399 }
2400}
2401
2402static void multiwrite_cb(void *opaque, int ret)
2403{
2404 MultiwriteCB *mcb = opaque;
2405
6d519a5f
SH
2406 trace_multiwrite_cb(mcb, ret);
2407
cb6d3ca0 2408 if (ret < 0 && !mcb->error) {
40b4f539 2409 mcb->error = ret;
40b4f539
KW
2410 }
2411
2412 mcb->num_requests--;
2413 if (mcb->num_requests == 0) {
de189a1b 2414 multiwrite_user_cb(mcb);
7267c094 2415 g_free(mcb);
40b4f539
KW
2416 }
2417}
2418
2419static int multiwrite_req_compare(const void *a, const void *b)
2420{
77be4366
CH
2421 const BlockRequest *req1 = a, *req2 = b;
2422
2423 /*
2424 * Note that we can't simply subtract req2->sector from req1->sector
2425 * here as that could overflow the return value.
2426 */
2427 if (req1->sector > req2->sector) {
2428 return 1;
2429 } else if (req1->sector < req2->sector) {
2430 return -1;
2431 } else {
2432 return 0;
2433 }
40b4f539
KW
2434}
2435
2436/*
2437 * Takes a bunch of requests and tries to merge them. Returns the number of
2438 * requests that remain after merging.
2439 */
2440static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2441 int num_reqs, MultiwriteCB *mcb)
2442{
2443 int i, outidx;
2444
2445 // Sort requests by start sector
2446 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2447
2448 // Check if adjacent requests touch the same clusters. If so, combine them,
2449 // filling up gaps with zero sectors.
2450 outidx = 0;
2451 for (i = 1; i < num_reqs; i++) {
2452 int merge = 0;
2453 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2454
2455 // This handles the cases that are valid for all block drivers, namely
2456 // exactly sequential writes and overlapping writes.
2457 if (reqs[i].sector <= oldreq_last) {
2458 merge = 1;
2459 }
2460
2461 // The block driver may decide that it makes sense to combine requests
2462 // even if there is a gap of some sectors between them. In this case,
2463 // the gap is filled with zeros (therefore only applicable for yet
2464 // unused space in format like qcow2).
2465 if (!merge && bs->drv->bdrv_merge_requests) {
2466 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2467 }
2468
e2a305fb
CH
2469 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2470 merge = 0;
2471 }
2472
40b4f539
KW
2473 if (merge) {
2474 size_t size;
7267c094 2475 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
2476 qemu_iovec_init(qiov,
2477 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2478
2479 // Add the first request to the merged one. If the requests are
2480 // overlapping, drop the last sectors of the first request.
2481 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2482 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2483
2484 // We might need to add some zeros between the two requests
2485 if (reqs[i].sector > oldreq_last) {
2486 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2487 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2488 memset(buf, 0, zero_bytes);
2489 qemu_iovec_add(qiov, buf, zero_bytes);
2490 mcb->callbacks[i].free_buf = buf;
2491 }
2492
2493 // Add the second request
2494 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2495
cbf1dff2 2496 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
2497 reqs[outidx].qiov = qiov;
2498
2499 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2500 } else {
2501 outidx++;
2502 reqs[outidx].sector = reqs[i].sector;
2503 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2504 reqs[outidx].qiov = reqs[i].qiov;
2505 }
2506 }
2507
2508 return outidx + 1;
2509}
2510
2511/*
2512 * Submit multiple AIO write requests at once.
2513 *
2514 * On success, the function returns 0 and all requests in the reqs array have
2515 * been submitted. In error case this function returns -1, and any of the
2516 * requests may or may not be submitted yet. In particular, this means that the
2517 * callback will be called for some of the requests, for others it won't. The
2518 * caller must check the error field of the BlockRequest to wait for the right
2519 * callbacks (if error != 0, no callback will be called).
2520 *
2521 * The implementation may modify the contents of the reqs array, e.g. to merge
2522 * requests. However, the fields opaque and error are left unmodified as they
2523 * are used to signal failure for a single request to the caller.
2524 */
2525int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2526{
2527 BlockDriverAIOCB *acb;
2528 MultiwriteCB *mcb;
2529 int i;
2530
301db7c2
RH
2531 /* don't submit writes if we don't have a medium */
2532 if (bs->drv == NULL) {
2533 for (i = 0; i < num_reqs; i++) {
2534 reqs[i].error = -ENOMEDIUM;
2535 }
2536 return -1;
2537 }
2538
40b4f539
KW
2539 if (num_reqs == 0) {
2540 return 0;
2541 }
2542
2543 // Create MultiwriteCB structure
7267c094 2544 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
2545 mcb->num_requests = 0;
2546 mcb->num_callbacks = num_reqs;
2547
2548 for (i = 0; i < num_reqs; i++) {
2549 mcb->callbacks[i].cb = reqs[i].cb;
2550 mcb->callbacks[i].opaque = reqs[i].opaque;
2551 }
2552
2553 // Check for mergable requests
2554 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2555
6d519a5f
SH
2556 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2557
453f9a16
KW
2558 /*
2559 * Run the aio requests. As soon as one request can't be submitted
2560 * successfully, fail all requests that are not yet submitted (we must
2561 * return failure for all requests anyway)
2562 *
2563 * num_requests cannot be set to the right value immediately: If
2564 * bdrv_aio_writev fails for some request, num_requests would be too high
2565 * and therefore multiwrite_cb() would never recognize the multiwrite
2566 * request as completed. We also cannot use the loop variable i to set it
2567 * when the first request fails because the callback may already have been
2568 * called for previously submitted requests. Thus, num_requests must be
2569 * incremented for each request that is submitted.
2570 *
2571 * The problem that callbacks may be called early also means that we need
2572 * to take care that num_requests doesn't become 0 before all requests are
2573 * submitted - multiwrite_cb() would consider the multiwrite request
2574 * completed. A dummy request that is "completed" by a manual call to
2575 * multiwrite_cb() takes care of this.
2576 */
2577 mcb->num_requests = 1;
2578
6d519a5f 2579 // Run the aio requests
40b4f539 2580 for (i = 0; i < num_reqs; i++) {
453f9a16 2581 mcb->num_requests++;
40b4f539
KW
2582 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2583 reqs[i].nb_sectors, multiwrite_cb, mcb);
2584
2585 if (acb == NULL) {
2586 // We can only fail the whole thing if no request has been
2587 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2588 // complete and report the error in the callback.
453f9a16 2589 if (i == 0) {
6d519a5f 2590 trace_bdrv_aio_multiwrite_earlyfail(mcb);
40b4f539
KW
2591 goto fail;
2592 } else {
6d519a5f 2593 trace_bdrv_aio_multiwrite_latefail(mcb, i);
7eb58a6c 2594 multiwrite_cb(mcb, -EIO);
40b4f539
KW
2595 break;
2596 }
40b4f539
KW
2597 }
2598 }
2599
453f9a16
KW
2600 /* Complete the dummy request */
2601 multiwrite_cb(mcb, 0);
2602
40b4f539
KW
2603 return 0;
2604
2605fail:
453f9a16
KW
2606 for (i = 0; i < mcb->num_callbacks; i++) {
2607 reqs[i].error = -EIO;
2608 }
7267c094 2609 g_free(mcb);
40b4f539
KW
2610 return -1;
2611}
2612
b2e12bc6
CH
2613BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2614 BlockDriverCompletionFunc *cb, void *opaque)
2615{
2616 BlockDriver *drv = bs->drv;
2617
a13aac04
SH
2618 trace_bdrv_aio_flush(bs, opaque);
2619
016f5cf6
AG
2620 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2621 return bdrv_aio_noop_em(bs, cb, opaque);
2622 }
2623
b2e12bc6
CH
2624 if (!drv)
2625 return NULL;
b2e12bc6
CH
2626 return drv->bdrv_aio_flush(bs, cb, opaque);
2627}
2628
83f64091 2629void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 2630{
6bbff9a0 2631 acb->pool->cancel(acb);
83f64091
FB
2632}
2633
ce1a14dc 2634
83f64091
FB
2635/**************************************************************/
2636/* async block device emulation */
2637
c16b5a2c
CH
2638typedef struct BlockDriverAIOCBSync {
2639 BlockDriverAIOCB common;
2640 QEMUBH *bh;
2641 int ret;
2642 /* vector translation state */
2643 QEMUIOVector *qiov;
2644 uint8_t *bounce;
2645 int is_write;
2646} BlockDriverAIOCBSync;
2647
2648static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2649{
b666d239
KW
2650 BlockDriverAIOCBSync *acb =
2651 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 2652 qemu_bh_delete(acb->bh);
36afc451 2653 acb->bh = NULL;
c16b5a2c
CH
2654 qemu_aio_release(acb);
2655}
2656
2657static AIOPool bdrv_em_aio_pool = {
2658 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2659 .cancel = bdrv_aio_cancel_em,
2660};
2661
ce1a14dc 2662static void bdrv_aio_bh_cb(void *opaque)
83f64091 2663{
ce1a14dc 2664 BlockDriverAIOCBSync *acb = opaque;
f141eafe 2665
f141eafe
AL
2666 if (!acb->is_write)
2667 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 2668 qemu_vfree(acb->bounce);
ce1a14dc 2669 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 2670 qemu_bh_delete(acb->bh);
36afc451 2671 acb->bh = NULL;
ce1a14dc 2672 qemu_aio_release(acb);
83f64091 2673}
beac80cd 2674
f141eafe
AL
2675static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2676 int64_t sector_num,
2677 QEMUIOVector *qiov,
2678 int nb_sectors,
2679 BlockDriverCompletionFunc *cb,
2680 void *opaque,
2681 int is_write)
2682
83f64091 2683{
ce1a14dc 2684 BlockDriverAIOCBSync *acb;
ce1a14dc 2685
c16b5a2c 2686 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
2687 acb->is_write = is_write;
2688 acb->qiov = qiov;
e268ca52 2689 acb->bounce = qemu_blockalign(bs, qiov->size);
f141eafe 2690
ce1a14dc
PB
2691 if (!acb->bh)
2692 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
2693
2694 if (is_write) {
2695 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1ed20acf 2696 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 2697 } else {
1ed20acf 2698 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
2699 }
2700
ce1a14dc 2701 qemu_bh_schedule(acb->bh);
f141eafe 2702
ce1a14dc 2703 return &acb->common;
beac80cd
FB
2704}
2705
f141eafe
AL
2706static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2707 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 2708 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 2709{
f141eafe
AL
2710 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2711}
83f64091 2712
f141eafe
AL
2713static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2714 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2715 BlockDriverCompletionFunc *cb, void *opaque)
2716{
2717 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 2718}
beac80cd 2719
68485420
KW
2720
2721typedef struct BlockDriverAIOCBCoroutine {
2722 BlockDriverAIOCB common;
2723 BlockRequest req;
2724 bool is_write;
2725 QEMUBH* bh;
2726} BlockDriverAIOCBCoroutine;
2727
2728static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2729{
2730 qemu_aio_flush();
2731}
2732
2733static AIOPool bdrv_em_co_aio_pool = {
2734 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2735 .cancel = bdrv_aio_co_cancel_em,
2736};
2737
35246a68 2738static void bdrv_co_em_bh(void *opaque)
68485420
KW
2739{
2740 BlockDriverAIOCBCoroutine *acb = opaque;
2741
2742 acb->common.cb(acb->common.opaque, acb->req.error);
2743 qemu_bh_delete(acb->bh);
2744 qemu_aio_release(acb);
2745}
2746
b2a61371
SH
2747/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2748static void coroutine_fn bdrv_co_do_rw(void *opaque)
2749{
2750 BlockDriverAIOCBCoroutine *acb = opaque;
2751 BlockDriverState *bs = acb->common.bs;
2752
2753 if (!acb->is_write) {
2754 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
2755 acb->req.nb_sectors, acb->req.qiov);
2756 } else {
2757 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
2758 acb->req.nb_sectors, acb->req.qiov);
2759 }
2760
35246a68 2761 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2a61371
SH
2762 qemu_bh_schedule(acb->bh);
2763}
2764
68485420
KW
2765static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2766 int64_t sector_num,
2767 QEMUIOVector *qiov,
2768 int nb_sectors,
2769 BlockDriverCompletionFunc *cb,
2770 void *opaque,
8c5873d6 2771 bool is_write)
68485420
KW
2772{
2773 Coroutine *co;
2774 BlockDriverAIOCBCoroutine *acb;
2775
2776 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2777 acb->req.sector = sector_num;
2778 acb->req.nb_sectors = nb_sectors;
2779 acb->req.qiov = qiov;
2780 acb->is_write = is_write;
2781
8c5873d6 2782 co = qemu_coroutine_create(bdrv_co_do_rw);
68485420
KW
2783 qemu_coroutine_enter(co, acb);
2784
2785 return &acb->common;
2786}
2787
b2e12bc6
CH
2788static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2789 BlockDriverCompletionFunc *cb, void *opaque)
2790{
2791 BlockDriverAIOCBSync *acb;
2792
2793 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2794 acb->is_write = 1; /* don't bounce in the completion hadler */
2795 acb->qiov = NULL;
2796 acb->bounce = NULL;
2797 acb->ret = 0;
2798
2799 if (!acb->bh)
2800 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2801
2802 bdrv_flush(bs);
2803 qemu_bh_schedule(acb->bh);
2804 return &acb->common;
2805}
2806
016f5cf6
AG
2807static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2808 BlockDriverCompletionFunc *cb, void *opaque)
2809{
2810 BlockDriverAIOCBSync *acb;
2811
2812 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2813 acb->is_write = 1; /* don't bounce in the completion handler */
2814 acb->qiov = NULL;
2815 acb->bounce = NULL;
2816 acb->ret = 0;
2817
2818 if (!acb->bh) {
2819 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2820 }
2821
2822 qemu_bh_schedule(acb->bh);
2823 return &acb->common;
2824}
2825
ea2384d3
FB
2826void bdrv_init(void)
2827{
5efa9d5a 2828 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 2829}
ce1a14dc 2830
eb852011
MA
2831void bdrv_init_with_whitelist(void)
2832{
2833 use_bdrv_whitelist = 1;
2834 bdrv_init();
2835}
2836
c16b5a2c
CH
2837void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2838 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 2839{
ce1a14dc
PB
2840 BlockDriverAIOCB *acb;
2841
6bbff9a0
AL
2842 if (pool->free_aiocb) {
2843 acb = pool->free_aiocb;
2844 pool->free_aiocb = acb->next;
ce1a14dc 2845 } else {
7267c094 2846 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 2847 acb->pool = pool;
ce1a14dc
PB
2848 }
2849 acb->bs = bs;
2850 acb->cb = cb;
2851 acb->opaque = opaque;
2852 return acb;
2853}
2854
2855void qemu_aio_release(void *p)
2856{
6bbff9a0
AL
2857 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2858 AIOPool *pool = acb->pool;
2859 acb->next = pool->free_aiocb;
2860 pool->free_aiocb = acb;
ce1a14dc 2861}
19cb3738 2862
f9f05dc5
KW
2863/**************************************************************/
2864/* Coroutine block device emulation */
2865
2866typedef struct CoroutineIOCompletion {
2867 Coroutine *coroutine;
2868 int ret;
2869} CoroutineIOCompletion;
2870
2871static void bdrv_co_io_em_complete(void *opaque, int ret)
2872{
2873 CoroutineIOCompletion *co = opaque;
2874
2875 co->ret = ret;
2876 qemu_coroutine_enter(co->coroutine, NULL);
2877}
2878
2879static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2880 int nb_sectors, QEMUIOVector *iov,
2881 bool is_write)
2882{
2883 CoroutineIOCompletion co = {
2884 .coroutine = qemu_coroutine_self(),
2885 };
2886 BlockDriverAIOCB *acb;
2887
2888 if (is_write) {
a652d160
SH
2889 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2890 bdrv_co_io_em_complete, &co);
f9f05dc5 2891 } else {
a652d160
SH
2892 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2893 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
2894 }
2895
59370aaa 2896 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
2897 if (!acb) {
2898 return -EIO;
2899 }
2900 qemu_coroutine_yield();
2901
2902 return co.ret;
2903}
2904
2905static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2906 int64_t sector_num, int nb_sectors,
2907 QEMUIOVector *iov)
2908{
2909 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2910}
2911
2912static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2913 int64_t sector_num, int nb_sectors,
2914 QEMUIOVector *iov)
2915{
2916 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2917}
2918
e7a8a783
KW
2919static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
2920{
2921 CoroutineIOCompletion co = {
2922 .coroutine = qemu_coroutine_self(),
2923 };
2924 BlockDriverAIOCB *acb;
2925
2926 acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2927 if (!acb) {
2928 return -EIO;
2929 }
2930 qemu_coroutine_yield();
2931 return co.ret;
2932}
2933
19cb3738
FB
2934/**************************************************************/
2935/* removable device support */
2936
2937/**
2938 * Return TRUE if the media is present
2939 */
2940int bdrv_is_inserted(BlockDriverState *bs)
2941{
2942 BlockDriver *drv = bs->drv;
a1aff5bf 2943
19cb3738
FB
2944 if (!drv)
2945 return 0;
2946 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
2947 return 1;
2948 return drv->bdrv_is_inserted(bs);
19cb3738
FB
2949}
2950
2951/**
8e49ca46
MA
2952 * Return whether the media changed since the last call to this
2953 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
2954 */
2955int bdrv_media_changed(BlockDriverState *bs)
2956{
2957 BlockDriver *drv = bs->drv;
19cb3738 2958
8e49ca46
MA
2959 if (drv && drv->bdrv_media_changed) {
2960 return drv->bdrv_media_changed(bs);
2961 }
2962 return -ENOTSUP;
19cb3738
FB
2963}
2964
2965/**
2966 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2967 */
fdec4404 2968void bdrv_eject(BlockDriverState *bs, int eject_flag)
19cb3738
FB
2969{
2970 BlockDriver *drv = bs->drv;
19cb3738 2971
822e1cd1
MA
2972 if (drv && drv->bdrv_eject) {
2973 drv->bdrv_eject(bs, eject_flag);
19cb3738
FB
2974 }
2975}
2976
19cb3738
FB
2977/**
2978 * Lock or unlock the media (if it is locked, the user won't be able
2979 * to eject it manually).
2980 */
025e849a 2981void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
2982{
2983 BlockDriver *drv = bs->drv;
2984
025e849a 2985 trace_bdrv_lock_medium(bs, locked);
b8c6d095 2986
025e849a
MA
2987 if (drv && drv->bdrv_lock_medium) {
2988 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
2989 }
2990}
985a03b0
TS
2991
2992/* needed for generic scsi interface */
2993
2994int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
2995{
2996 BlockDriver *drv = bs->drv;
2997
2998 if (drv && drv->bdrv_ioctl)
2999 return drv->bdrv_ioctl(bs, req, buf);
3000 return -ENOTSUP;
3001}
7d780669 3002
221f715d
AL
3003BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3004 unsigned long int req, void *buf,
3005 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3006{
221f715d 3007 BlockDriver *drv = bs->drv;
7d780669 3008
221f715d
AL
3009 if (drv && drv->bdrv_aio_ioctl)
3010 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3011 return NULL;
7d780669 3012}
e268ca52 3013
7b6f9300
MA
3014void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3015{
3016 bs->buffer_alignment = align;
3017}
7cd1e32a 3018
e268ca52
AL
3019void *qemu_blockalign(BlockDriverState *bs, size_t size)
3020{
3021 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3022}
7cd1e32a
LS
3023
3024void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3025{
3026 int64_t bitmap_size;
a55eb92c 3027
aaa0eb75 3028 bs->dirty_count = 0;
a55eb92c 3029 if (enable) {
c6d22830
JK
3030 if (!bs->dirty_bitmap) {
3031 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3032 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3033 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
a55eb92c 3034
7267c094 3035 bs->dirty_bitmap = g_malloc0(bitmap_size);
a55eb92c 3036 }
7cd1e32a 3037 } else {
c6d22830 3038 if (bs->dirty_bitmap) {
7267c094 3039 g_free(bs->dirty_bitmap);
c6d22830 3040 bs->dirty_bitmap = NULL;
a55eb92c 3041 }
7cd1e32a
LS
3042 }
3043}
3044
3045int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3046{
6ea44308 3047 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 3048
c6d22830
JK
3049 if (bs->dirty_bitmap &&
3050 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
3051 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3052 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a
LS
3053 } else {
3054 return 0;
3055 }
3056}
3057
a55eb92c
JK
3058void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3059 int nr_sectors)
7cd1e32a
LS
3060{
3061 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3062}
aaa0eb75
LS
3063
3064int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3065{
3066 return bs->dirty_count;
3067}
f88e1a42 3068
db593f25
MT
3069void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3070{
3071 assert(bs->in_use != in_use);
3072 bs->in_use = in_use;
3073}
3074
3075int bdrv_in_use(BlockDriverState *bs)
3076{
3077 return bs->in_use;
3078}
3079
28a7282a
LC
3080void bdrv_iostatus_enable(BlockDriverState *bs)
3081{
3082 bs->iostatus = BDRV_IOS_OK;
3083}
3084
3085/* The I/O status is only enabled if the drive explicitly
3086 * enables it _and_ the VM is configured to stop on errors */
3087bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3088{
3089 return (bs->iostatus != BDRV_IOS_INVAL &&
3090 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3091 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3092 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3093}
3094
3095void bdrv_iostatus_disable(BlockDriverState *bs)
3096{
3097 bs->iostatus = BDRV_IOS_INVAL;
3098}
3099
3100void bdrv_iostatus_reset(BlockDriverState *bs)
3101{
3102 if (bdrv_iostatus_is_enabled(bs)) {
3103 bs->iostatus = BDRV_IOS_OK;
3104 }
3105}
3106
3107/* XXX: Today this is set by device models because it makes the implementation
3108 quite simple. However, the block layer knows about the error, so it's
3109 possible to implement this without device models being involved */
3110void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3111{
3112 if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
3113 assert(error >= 0);
3114 bs->iostatus = error == ENOSPC ? BDRV_IOS_ENOSPC : BDRV_IOS_FAILED;
3115 }
3116}
3117
a597e79c
CH
3118void
3119bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3120 enum BlockAcctType type)
3121{
3122 assert(type < BDRV_MAX_IOTYPE);
3123
3124 cookie->bytes = bytes;
c488c7f6 3125 cookie->start_time_ns = get_clock();
a597e79c
CH
3126 cookie->type = type;
3127}
3128
3129void
3130bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3131{
3132 assert(cookie->type < BDRV_MAX_IOTYPE);
3133
3134 bs->nr_bytes[cookie->type] += cookie->bytes;
3135 bs->nr_ops[cookie->type]++;
c488c7f6 3136 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
3137}
3138
f88e1a42
JS
3139int bdrv_img_create(const char *filename, const char *fmt,
3140 const char *base_filename, const char *base_fmt,
3141 char *options, uint64_t img_size, int flags)
3142{
3143 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 3144 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
3145 BlockDriverState *bs = NULL;
3146 BlockDriver *drv, *proto_drv;
96df67d1 3147 BlockDriver *backing_drv = NULL;
f88e1a42
JS
3148 int ret = 0;
3149
3150 /* Find driver and parse its options */
3151 drv = bdrv_find_format(fmt);
3152 if (!drv) {
3153 error_report("Unknown file format '%s'", fmt);
4f70f249 3154 ret = -EINVAL;
f88e1a42
JS
3155 goto out;
3156 }
3157
3158 proto_drv = bdrv_find_protocol(filename);
3159 if (!proto_drv) {
3160 error_report("Unknown protocol '%s'", filename);
4f70f249 3161 ret = -EINVAL;
f88e1a42
JS
3162 goto out;
3163 }
3164
3165 create_options = append_option_parameters(create_options,
3166 drv->create_options);
3167 create_options = append_option_parameters(create_options,
3168 proto_drv->create_options);
3169
3170 /* Create parameter list with default values */
3171 param = parse_option_parameters("", create_options, param);
3172
3173 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3174
3175 /* Parse -o options */
3176 if (options) {
3177 param = parse_option_parameters(options, create_options, param);
3178 if (param == NULL) {
3179 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 3180 ret = -EINVAL;
f88e1a42
JS
3181 goto out;
3182 }
3183 }
3184
3185 if (base_filename) {
3186 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3187 base_filename)) {
3188 error_report("Backing file not supported for file format '%s'",
3189 fmt);
4f70f249 3190 ret = -EINVAL;
f88e1a42
JS
3191 goto out;
3192 }
3193 }
3194
3195 if (base_fmt) {
3196 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3197 error_report("Backing file format not supported for file "
3198 "format '%s'", fmt);
4f70f249 3199 ret = -EINVAL;
f88e1a42
JS
3200 goto out;
3201 }
3202 }
3203
792da93a
JS
3204 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3205 if (backing_file && backing_file->value.s) {
3206 if (!strcmp(filename, backing_file->value.s)) {
3207 error_report("Error: Trying to create an image with the "
3208 "same filename as the backing file");
4f70f249 3209 ret = -EINVAL;
792da93a
JS
3210 goto out;
3211 }
3212 }
3213
f88e1a42
JS
3214 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3215 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
3216 backing_drv = bdrv_find_format(backing_fmt->value.s);
3217 if (!backing_drv) {
f88e1a42
JS
3218 error_report("Unknown backing file format '%s'",
3219 backing_fmt->value.s);
4f70f249 3220 ret = -EINVAL;
f88e1a42
JS
3221 goto out;
3222 }
3223 }
3224
3225 // The size for the image must always be specified, with one exception:
3226 // If we are using a backing file, we can obtain the size from there
d220894e
KW
3227 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3228 if (size && size->value.n == -1) {
f88e1a42
JS
3229 if (backing_file && backing_file->value.s) {
3230 uint64_t size;
f88e1a42
JS
3231 char buf[32];
3232
f88e1a42
JS
3233 bs = bdrv_new("");
3234
96df67d1 3235 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
f88e1a42 3236 if (ret < 0) {
96df67d1 3237 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
3238 goto out;
3239 }
3240 bdrv_get_geometry(bs, &size);
3241 size *= 512;
3242
3243 snprintf(buf, sizeof(buf), "%" PRId64, size);
3244 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3245 } else {
3246 error_report("Image creation needs a size parameter");
4f70f249 3247 ret = -EINVAL;
f88e1a42
JS
3248 goto out;
3249 }
3250 }
3251
3252 printf("Formatting '%s', fmt=%s ", filename, fmt);
3253 print_option_parameters(param);
3254 puts("");
3255
3256 ret = bdrv_create(drv, filename, param);
3257
3258 if (ret < 0) {
3259 if (ret == -ENOTSUP) {
3260 error_report("Formatting or formatting option not supported for "
3261 "file format '%s'", fmt);
3262 } else if (ret == -EFBIG) {
3263 error_report("The image size is too large for file format '%s'",
3264 fmt);
3265 } else {
3266 error_report("%s: error while creating %s: %s", filename, fmt,
3267 strerror(-ret));
3268 }
3269 }
3270
3271out:
3272 free_option_parameters(create_options);
3273 free_option_parameters(param);
3274
3275 if (bs) {
3276 bdrv_delete(bs);
3277 }
4f70f249
JS
3278
3279 return ret;
f88e1a42 3280}