]> git.proxmox.com Git - qemu.git/blame - block.c
block: split out bdrv_co_do_readv() and bdrv_co_do_writev()
[qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
d15e5465 30#include "qemu-objects.h"
68485420 31#include "qemu-coroutine.h"
fc01f7e7 32
71e72a19 33#ifdef CONFIG_BSD
7674e7bf
FB
34#include <sys/types.h>
35#include <sys/stat.h>
36#include <sys/ioctl.h>
72cf2d4f 37#include <sys/queue.h>
c5e97233 38#ifndef __DragonFly__
7674e7bf
FB
39#include <sys/disk.h>
40#endif
c5e97233 41#endif
7674e7bf 42
49dc768d
AL
43#ifdef _WIN32
44#include <windows.h>
45#endif
46
7d4b4ba5 47static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
48static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
49 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 50 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
51static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
52 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 53 BlockDriverCompletionFunc *cb, void *opaque);
b2e12bc6
CH
54static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
55 BlockDriverCompletionFunc *cb, void *opaque);
016f5cf6
AG
56static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
57 BlockDriverCompletionFunc *cb, void *opaque);
5fafdf24 58static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
83f64091
FB
59 uint8_t *buf, int nb_sectors);
60static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
61 const uint8_t *buf, int nb_sectors);
68485420
KW
62static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
63 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
64 BlockDriverCompletionFunc *cb, void *opaque);
65static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
66 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
67 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
68static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
69 int64_t sector_num, int nb_sectors,
70 QEMUIOVector *iov);
71static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
72 int64_t sector_num, int nb_sectors,
73 QEMUIOVector *iov);
e7a8a783 74static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
c5fbe571
SH
75static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
76 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
ec530c81 77
1b7bdbc1
SH
78static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
79 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 80
8a22f02a
SH
81static QLIST_HEAD(, BlockDriver) bdrv_drivers =
82 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 83
f9092b10
MA
84/* The device to use for VM snapshots */
85static BlockDriverState *bs_snapshots;
86
eb852011
MA
87/* If non-zero, use only whitelisted block drivers */
88static int use_bdrv_whitelist;
89
9e0b22f4
SH
90#ifdef _WIN32
91static int is_windows_drive_prefix(const char *filename)
92{
93 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
94 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
95 filename[1] == ':');
96}
97
98int is_windows_drive(const char *filename)
99{
100 if (is_windows_drive_prefix(filename) &&
101 filename[2] == '\0')
102 return 1;
103 if (strstart(filename, "\\\\.\\", NULL) ||
104 strstart(filename, "//./", NULL))
105 return 1;
106 return 0;
107}
108#endif
109
110/* check if the path starts with "<protocol>:" */
111static int path_has_protocol(const char *path)
112{
113#ifdef _WIN32
114 if (is_windows_drive(path) ||
115 is_windows_drive_prefix(path)) {
116 return 0;
117 }
118#endif
119
120 return strchr(path, ':') != NULL;
121}
122
83f64091 123int path_is_absolute(const char *path)
3b0d4f61 124{
83f64091 125 const char *p;
21664424
FB
126#ifdef _WIN32
127 /* specific case for names like: "\\.\d:" */
128 if (*path == '/' || *path == '\\')
129 return 1;
130#endif
83f64091
FB
131 p = strchr(path, ':');
132 if (p)
133 p++;
134 else
135 p = path;
3b9f94e1
FB
136#ifdef _WIN32
137 return (*p == '/' || *p == '\\');
138#else
139 return (*p == '/');
140#endif
3b0d4f61
FB
141}
142
83f64091
FB
143/* if filename is absolute, just copy it to dest. Otherwise, build a
144 path to it by considering it is relative to base_path. URL are
145 supported. */
146void path_combine(char *dest, int dest_size,
147 const char *base_path,
148 const char *filename)
3b0d4f61 149{
83f64091
FB
150 const char *p, *p1;
151 int len;
152
153 if (dest_size <= 0)
154 return;
155 if (path_is_absolute(filename)) {
156 pstrcpy(dest, dest_size, filename);
157 } else {
158 p = strchr(base_path, ':');
159 if (p)
160 p++;
161 else
162 p = base_path;
3b9f94e1
FB
163 p1 = strrchr(base_path, '/');
164#ifdef _WIN32
165 {
166 const char *p2;
167 p2 = strrchr(base_path, '\\');
168 if (!p1 || p2 > p1)
169 p1 = p2;
170 }
171#endif
83f64091
FB
172 if (p1)
173 p1++;
174 else
175 p1 = base_path;
176 if (p1 > p)
177 p = p1;
178 len = p - base_path;
179 if (len > dest_size - 1)
180 len = dest_size - 1;
181 memcpy(dest, base_path, len);
182 dest[len] = '\0';
183 pstrcat(dest, dest_size, filename);
3b0d4f61 184 }
3b0d4f61
FB
185}
186
5efa9d5a 187void bdrv_register(BlockDriver *bdrv)
ea2384d3 188{
68485420
KW
189 if (bdrv->bdrv_co_readv) {
190 /* Emulate AIO by coroutines, and sync by AIO */
191 bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em;
192 bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
193 bdrv->bdrv_read = bdrv_read_em;
194 bdrv->bdrv_write = bdrv_write_em;
f9f05dc5
KW
195 } else {
196 bdrv->bdrv_co_readv = bdrv_co_readv_em;
197 bdrv->bdrv_co_writev = bdrv_co_writev_em;
198
199 if (!bdrv->bdrv_aio_readv) {
200 /* add AIO emulation layer */
201 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
202 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
203 } else if (!bdrv->bdrv_read) {
204 /* add synchronous IO emulation layer */
205 bdrv->bdrv_read = bdrv_read_em;
206 bdrv->bdrv_write = bdrv_write_em;
207 }
83f64091 208 }
b2e12bc6
CH
209
210 if (!bdrv->bdrv_aio_flush)
211 bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
212
8a22f02a 213 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 214}
b338082b
FB
215
216/* create a new block device (by default it is empty) */
217BlockDriverState *bdrv_new(const char *device_name)
218{
1b7bdbc1 219 BlockDriverState *bs;
b338082b 220
7267c094 221 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 222 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 223 if (device_name[0] != '\0') {
1b7bdbc1 224 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 225 }
28a7282a 226 bdrv_iostatus_disable(bs);
b338082b
FB
227 return bs;
228}
229
ea2384d3
FB
230BlockDriver *bdrv_find_format(const char *format_name)
231{
232 BlockDriver *drv1;
8a22f02a
SH
233 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
234 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 235 return drv1;
8a22f02a 236 }
ea2384d3
FB
237 }
238 return NULL;
239}
240
eb852011
MA
241static int bdrv_is_whitelisted(BlockDriver *drv)
242{
243 static const char *whitelist[] = {
244 CONFIG_BDRV_WHITELIST
245 };
246 const char **p;
247
248 if (!whitelist[0])
249 return 1; /* no whitelist, anything goes */
250
251 for (p = whitelist; *p; p++) {
252 if (!strcmp(drv->format_name, *p)) {
253 return 1;
254 }
255 }
256 return 0;
257}
258
259BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
260{
261 BlockDriver *drv = bdrv_find_format(format_name);
262 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
263}
264
0e7e1989
KW
265int bdrv_create(BlockDriver *drv, const char* filename,
266 QEMUOptionParameter *options)
ea2384d3
FB
267{
268 if (!drv->bdrv_create)
269 return -ENOTSUP;
0e7e1989
KW
270
271 return drv->bdrv_create(filename, options);
ea2384d3
FB
272}
273
84a12e66
CH
274int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
275{
276 BlockDriver *drv;
277
b50cbabc 278 drv = bdrv_find_protocol(filename);
84a12e66 279 if (drv == NULL) {
16905d71 280 return -ENOENT;
84a12e66
CH
281 }
282
283 return bdrv_create(drv, filename, options);
284}
285
d5249393 286#ifdef _WIN32
95389c86 287void get_tmp_filename(char *filename, int size)
d5249393 288{
3b9f94e1 289 char temp_dir[MAX_PATH];
3b46e624 290
3b9f94e1
FB
291 GetTempPath(MAX_PATH, temp_dir);
292 GetTempFileName(temp_dir, "qem", 0, filename);
d5249393
FB
293}
294#else
95389c86 295void get_tmp_filename(char *filename, int size)
fc01f7e7 296{
67b915a5 297 int fd;
7ccfb2eb 298 const char *tmpdir;
d5249393 299 /* XXX: race condition possible */
0badc1ee
AJ
300 tmpdir = getenv("TMPDIR");
301 if (!tmpdir)
302 tmpdir = "/tmp";
303 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
ea2384d3
FB
304 fd = mkstemp(filename);
305 close(fd);
306}
d5249393 307#endif
fc01f7e7 308
84a12e66
CH
309/*
310 * Detect host devices. By convention, /dev/cdrom[N] is always
311 * recognized as a host CDROM.
312 */
313static BlockDriver *find_hdev_driver(const char *filename)
314{
315 int score_max = 0, score;
316 BlockDriver *drv = NULL, *d;
317
318 QLIST_FOREACH(d, &bdrv_drivers, list) {
319 if (d->bdrv_probe_device) {
320 score = d->bdrv_probe_device(filename);
321 if (score > score_max) {
322 score_max = score;
323 drv = d;
324 }
325 }
326 }
327
328 return drv;
329}
330
b50cbabc 331BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
332{
333 BlockDriver *drv1;
334 char protocol[128];
1cec71e3 335 int len;
83f64091 336 const char *p;
19cb3738 337
66f82cee
KW
338 /* TODO Drivers without bdrv_file_open must be specified explicitly */
339
39508e7a
CH
340 /*
341 * XXX(hch): we really should not let host device detection
342 * override an explicit protocol specification, but moving this
343 * later breaks access to device names with colons in them.
344 * Thanks to the brain-dead persistent naming schemes on udev-
345 * based Linux systems those actually are quite common.
346 */
347 drv1 = find_hdev_driver(filename);
348 if (drv1) {
349 return drv1;
350 }
351
9e0b22f4 352 if (!path_has_protocol(filename)) {
39508e7a 353 return bdrv_find_format("file");
84a12e66 354 }
9e0b22f4
SH
355 p = strchr(filename, ':');
356 assert(p != NULL);
1cec71e3
AL
357 len = p - filename;
358 if (len > sizeof(protocol) - 1)
359 len = sizeof(protocol) - 1;
360 memcpy(protocol, filename, len);
361 protocol[len] = '\0';
8a22f02a 362 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 363 if (drv1->protocol_name &&
8a22f02a 364 !strcmp(drv1->protocol_name, protocol)) {
83f64091 365 return drv1;
8a22f02a 366 }
83f64091
FB
367 }
368 return NULL;
369}
370
c98ac35d 371static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
372{
373 int ret, score, score_max;
374 BlockDriver *drv1, *drv;
375 uint8_t buf[2048];
376 BlockDriverState *bs;
377
f5edb014 378 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
379 if (ret < 0) {
380 *pdrv = NULL;
381 return ret;
382 }
f8ea0b00 383
08a00559
KW
384 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
385 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 386 bdrv_delete(bs);
c98ac35d
SW
387 drv = bdrv_find_format("raw");
388 if (!drv) {
389 ret = -ENOENT;
390 }
391 *pdrv = drv;
392 return ret;
1a396859 393 }
f8ea0b00 394
83f64091
FB
395 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
396 bdrv_delete(bs);
397 if (ret < 0) {
c98ac35d
SW
398 *pdrv = NULL;
399 return ret;
83f64091
FB
400 }
401
ea2384d3 402 score_max = 0;
84a12e66 403 drv = NULL;
8a22f02a 404 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
405 if (drv1->bdrv_probe) {
406 score = drv1->bdrv_probe(buf, ret, filename);
407 if (score > score_max) {
408 score_max = score;
409 drv = drv1;
410 }
0849bf08 411 }
fc01f7e7 412 }
c98ac35d
SW
413 if (!drv) {
414 ret = -ENOENT;
415 }
416 *pdrv = drv;
417 return ret;
ea2384d3
FB
418}
419
51762288
SH
420/**
421 * Set the current 'total_sectors' value
422 */
423static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
424{
425 BlockDriver *drv = bs->drv;
426
396759ad
NB
427 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
428 if (bs->sg)
429 return 0;
430
51762288
SH
431 /* query actual device if possible, otherwise just trust the hint */
432 if (drv->bdrv_getlength) {
433 int64_t length = drv->bdrv_getlength(bs);
434 if (length < 0) {
435 return length;
436 }
437 hint = length >> BDRV_SECTOR_BITS;
438 }
439
440 bs->total_sectors = hint;
441 return 0;
442}
443
c3993cdc
SH
444/**
445 * Set open flags for a given cache mode
446 *
447 * Return 0 on success, -1 if the cache mode was invalid.
448 */
449int bdrv_parse_cache_flags(const char *mode, int *flags)
450{
451 *flags &= ~BDRV_O_CACHE_MASK;
452
453 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
454 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
455 } else if (!strcmp(mode, "directsync")) {
456 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
457 } else if (!strcmp(mode, "writeback")) {
458 *flags |= BDRV_O_CACHE_WB;
459 } else if (!strcmp(mode, "unsafe")) {
460 *flags |= BDRV_O_CACHE_WB;
461 *flags |= BDRV_O_NO_FLUSH;
462 } else if (!strcmp(mode, "writethrough")) {
463 /* this is the default */
464 } else {
465 return -1;
466 }
467
468 return 0;
469}
470
57915332
KW
471/*
472 * Common part for opening disk images and files
473 */
474static int bdrv_open_common(BlockDriverState *bs, const char *filename,
475 int flags, BlockDriver *drv)
476{
477 int ret, open_flags;
478
479 assert(drv != NULL);
480
28dcee10
SH
481 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
482
66f82cee 483 bs->file = NULL;
51762288 484 bs->total_sectors = 0;
57915332
KW
485 bs->encrypted = 0;
486 bs->valid_key = 0;
487 bs->open_flags = flags;
57915332
KW
488 bs->buffer_alignment = 512;
489
490 pstrcpy(bs->filename, sizeof(bs->filename), filename);
491
492 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
493 return -ENOTSUP;
494 }
495
496 bs->drv = drv;
7267c094 497 bs->opaque = g_malloc0(drv->instance_size);
57915332 498
a6599793 499 if (flags & BDRV_O_CACHE_WB)
57915332
KW
500 bs->enable_write_cache = 1;
501
502 /*
503 * Clear flags that are internal to the block layer before opening the
504 * image.
505 */
506 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
507
508 /*
ebabb67a 509 * Snapshots should be writable.
57915332
KW
510 */
511 if (bs->is_temporary) {
512 open_flags |= BDRV_O_RDWR;
513 }
514
66f82cee
KW
515 /* Open the image, either directly or using a protocol */
516 if (drv->bdrv_file_open) {
517 ret = drv->bdrv_file_open(bs, filename, open_flags);
518 } else {
519 ret = bdrv_file_open(&bs->file, filename, open_flags);
520 if (ret >= 0) {
521 ret = drv->bdrv_open(bs, open_flags);
522 }
523 }
524
57915332
KW
525 if (ret < 0) {
526 goto free_and_fail;
527 }
528
529 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
51762288
SH
530
531 ret = refresh_total_sectors(bs, bs->total_sectors);
532 if (ret < 0) {
533 goto free_and_fail;
57915332 534 }
51762288 535
57915332
KW
536#ifndef _WIN32
537 if (bs->is_temporary) {
538 unlink(filename);
539 }
540#endif
541 return 0;
542
543free_and_fail:
66f82cee
KW
544 if (bs->file) {
545 bdrv_delete(bs->file);
546 bs->file = NULL;
547 }
7267c094 548 g_free(bs->opaque);
57915332
KW
549 bs->opaque = NULL;
550 bs->drv = NULL;
551 return ret;
552}
553
b6ce07aa
KW
554/*
555 * Opens a file using a protocol (file, host_device, nbd, ...)
556 */
83f64091 557int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 558{
83f64091 559 BlockDriverState *bs;
6db95603 560 BlockDriver *drv;
83f64091
FB
561 int ret;
562
b50cbabc 563 drv = bdrv_find_protocol(filename);
6db95603
CH
564 if (!drv) {
565 return -ENOENT;
566 }
567
83f64091 568 bs = bdrv_new("");
b6ce07aa 569 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
570 if (ret < 0) {
571 bdrv_delete(bs);
572 return ret;
3b0d4f61 573 }
71d0770c 574 bs->growable = 1;
83f64091
FB
575 *pbs = bs;
576 return 0;
577}
578
b6ce07aa
KW
579/*
580 * Opens a disk image (raw, qcow2, vmdk, ...)
581 */
d6e9098e
KW
582int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
583 BlockDriver *drv)
ea2384d3 584{
b6ce07aa 585 int ret;
712e7874 586
83f64091 587 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
588 BlockDriverState *bs1;
589 int64_t total_size;
7c96d46e 590 int is_protocol = 0;
91a073a9
KW
591 BlockDriver *bdrv_qcow2;
592 QEMUOptionParameter *options;
b6ce07aa
KW
593 char tmp_filename[PATH_MAX];
594 char backing_filename[PATH_MAX];
3b46e624 595
ea2384d3
FB
596 /* if snapshot, we create a temporary backing file and open it
597 instead of opening 'filename' directly */
33e3963e 598
ea2384d3
FB
599 /* if there is a backing file, use it */
600 bs1 = bdrv_new("");
d6e9098e 601 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 602 if (ret < 0) {
ea2384d3 603 bdrv_delete(bs1);
51d7c00c 604 return ret;
ea2384d3 605 }
3e82990b 606 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
607
608 if (bs1->drv && bs1->drv->protocol_name)
609 is_protocol = 1;
610
ea2384d3 611 bdrv_delete(bs1);
3b46e624 612
ea2384d3 613 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
7c96d46e
AL
614
615 /* Real path is meaningless for protocols */
616 if (is_protocol)
617 snprintf(backing_filename, sizeof(backing_filename),
618 "%s", filename);
114cdfa9
KS
619 else if (!realpath(filename, backing_filename))
620 return -errno;
7c96d46e 621
91a073a9
KW
622 bdrv_qcow2 = bdrv_find_format("qcow2");
623 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
624
3e82990b 625 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
626 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
627 if (drv) {
628 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
629 drv->format_name);
630 }
631
632 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 633 free_option_parameters(options);
51d7c00c
AL
634 if (ret < 0) {
635 return ret;
ea2384d3 636 }
91a073a9 637
ea2384d3 638 filename = tmp_filename;
91a073a9 639 drv = bdrv_qcow2;
ea2384d3
FB
640 bs->is_temporary = 1;
641 }
712e7874 642
b6ce07aa 643 /* Find the right image format driver */
6db95603 644 if (!drv) {
c98ac35d 645 ret = find_image_format(filename, &drv);
51d7c00c 646 }
6987307c 647
51d7c00c 648 if (!drv) {
51d7c00c 649 goto unlink_and_fail;
ea2384d3 650 }
b6ce07aa
KW
651
652 /* Open the image */
653 ret = bdrv_open_common(bs, filename, flags, drv);
654 if (ret < 0) {
6987307c
CH
655 goto unlink_and_fail;
656 }
657
b6ce07aa
KW
658 /* If there is a backing file, use it */
659 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
660 char backing_filename[PATH_MAX];
661 int back_flags;
662 BlockDriver *back_drv = NULL;
663
664 bs->backing_hd = bdrv_new("");
df2dbb4a
SH
665
666 if (path_has_protocol(bs->backing_file)) {
667 pstrcpy(backing_filename, sizeof(backing_filename),
668 bs->backing_file);
669 } else {
670 path_combine(backing_filename, sizeof(backing_filename),
671 filename, bs->backing_file);
672 }
673
674 if (bs->backing_format[0] != '\0') {
b6ce07aa 675 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 676 }
b6ce07aa
KW
677
678 /* backing files always opened read-only */
679 back_flags =
680 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
681
682 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
683 if (ret < 0) {
684 bdrv_close(bs);
685 return ret;
686 }
687 if (bs->is_temporary) {
688 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
689 } else {
690 /* base image inherits from "parent" */
691 bs->backing_hd->keep_read_only = bs->keep_read_only;
692 }
693 }
694
695 if (!bdrv_key_required(bs)) {
7d4b4ba5 696 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
697 }
698
699 return 0;
700
701unlink_and_fail:
702 if (bs->is_temporary) {
703 unlink(filename);
704 }
705 return ret;
706}
707
fc01f7e7
FB
708void bdrv_close(BlockDriverState *bs)
709{
19cb3738 710 if (bs->drv) {
f9092b10
MA
711 if (bs == bs_snapshots) {
712 bs_snapshots = NULL;
713 }
557df6ac 714 if (bs->backing_hd) {
ea2384d3 715 bdrv_delete(bs->backing_hd);
557df6ac
SH
716 bs->backing_hd = NULL;
717 }
ea2384d3 718 bs->drv->bdrv_close(bs);
7267c094 719 g_free(bs->opaque);
ea2384d3
FB
720#ifdef _WIN32
721 if (bs->is_temporary) {
722 unlink(bs->filename);
723 }
67b915a5 724#endif
ea2384d3
FB
725 bs->opaque = NULL;
726 bs->drv = NULL;
b338082b 727
66f82cee
KW
728 if (bs->file != NULL) {
729 bdrv_close(bs->file);
730 }
731
7d4b4ba5 732 bdrv_dev_change_media_cb(bs, false);
b338082b
FB
733 }
734}
735
2bc93fed
MK
736void bdrv_close_all(void)
737{
738 BlockDriverState *bs;
739
740 QTAILQ_FOREACH(bs, &bdrv_states, list) {
741 bdrv_close(bs);
742 }
743}
744
d22b2f41
RH
745/* make a BlockDriverState anonymous by removing from bdrv_state list.
746 Also, NULL terminate the device_name to prevent double remove */
747void bdrv_make_anon(BlockDriverState *bs)
748{
749 if (bs->device_name[0] != '\0') {
750 QTAILQ_REMOVE(&bdrv_states, bs, list);
751 }
752 bs->device_name[0] = '\0';
753}
754
b338082b
FB
755void bdrv_delete(BlockDriverState *bs)
756{
fa879d62 757 assert(!bs->dev);
18846dee 758
1b7bdbc1 759 /* remove from list, if necessary */
d22b2f41 760 bdrv_make_anon(bs);
34c6f050 761
b338082b 762 bdrv_close(bs);
66f82cee
KW
763 if (bs->file != NULL) {
764 bdrv_delete(bs->file);
765 }
766
f9092b10 767 assert(bs != bs_snapshots);
7267c094 768 g_free(bs);
fc01f7e7
FB
769}
770
fa879d62
MA
771int bdrv_attach_dev(BlockDriverState *bs, void *dev)
772/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 773{
fa879d62 774 if (bs->dev) {
18846dee
MA
775 return -EBUSY;
776 }
fa879d62 777 bs->dev = dev;
28a7282a 778 bdrv_iostatus_reset(bs);
18846dee
MA
779 return 0;
780}
781
fa879d62
MA
782/* TODO qdevified devices don't use this, remove when devices are qdevified */
783void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 784{
fa879d62
MA
785 if (bdrv_attach_dev(bs, dev) < 0) {
786 abort();
787 }
788}
789
790void bdrv_detach_dev(BlockDriverState *bs, void *dev)
791/* TODO change to DeviceState *dev when all users are qdevified */
792{
793 assert(bs->dev == dev);
794 bs->dev = NULL;
0e49de52
MA
795 bs->dev_ops = NULL;
796 bs->dev_opaque = NULL;
29e05f20 797 bs->buffer_alignment = 512;
18846dee
MA
798}
799
fa879d62
MA
800/* TODO change to return DeviceState * when all users are qdevified */
801void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 802{
fa879d62 803 return bs->dev;
18846dee
MA
804}
805
0e49de52
MA
806void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
807 void *opaque)
808{
809 bs->dev_ops = ops;
810 bs->dev_opaque = opaque;
2c6942fa
MA
811 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
812 bs_snapshots = NULL;
813 }
0e49de52
MA
814}
815
7d4b4ba5 816static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 817{
145feb17 818 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
7d4b4ba5 819 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
145feb17
MA
820 }
821}
822
2c6942fa
MA
823bool bdrv_dev_has_removable_media(BlockDriverState *bs)
824{
825 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
826}
827
e4def80b
MA
828bool bdrv_dev_is_tray_open(BlockDriverState *bs)
829{
830 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
831 return bs->dev_ops->is_tray_open(bs->dev_opaque);
832 }
833 return false;
834}
835
145feb17
MA
836static void bdrv_dev_resize_cb(BlockDriverState *bs)
837{
838 if (bs->dev_ops && bs->dev_ops->resize_cb) {
839 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
840 }
841}
842
f107639a
MA
843bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
844{
845 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
846 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
847 }
848 return false;
849}
850
e97fc193
AL
851/*
852 * Run consistency checks on an image
853 *
e076f338 854 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 855 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 856 * check are stored in res.
e97fc193 857 */
e076f338 858int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
e97fc193
AL
859{
860 if (bs->drv->bdrv_check == NULL) {
861 return -ENOTSUP;
862 }
863
e076f338 864 memset(res, 0, sizeof(*res));
9ac228e0 865 return bs->drv->bdrv_check(bs, res);
e97fc193
AL
866}
867
8a426614
KW
868#define COMMIT_BUF_SECTORS 2048
869
33e3963e
FB
870/* commit COW file into the raw image */
871int bdrv_commit(BlockDriverState *bs)
872{
19cb3738 873 BlockDriver *drv = bs->drv;
ee181196 874 BlockDriver *backing_drv;
8a426614
KW
875 int64_t sector, total_sectors;
876 int n, ro, open_flags;
4dca4b63 877 int ret = 0, rw_ret = 0;
8a426614 878 uint8_t *buf;
4dca4b63
NS
879 char filename[1024];
880 BlockDriverState *bs_rw, *bs_ro;
33e3963e 881
19cb3738
FB
882 if (!drv)
883 return -ENOMEDIUM;
4dca4b63
NS
884
885 if (!bs->backing_hd) {
886 return -ENOTSUP;
33e3963e
FB
887 }
888
4dca4b63
NS
889 if (bs->backing_hd->keep_read_only) {
890 return -EACCES;
891 }
ee181196
KW
892
893 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
894 ro = bs->backing_hd->read_only;
895 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
896 open_flags = bs->backing_hd->open_flags;
897
898 if (ro) {
899 /* re-open as RW */
900 bdrv_delete(bs->backing_hd);
901 bs->backing_hd = NULL;
902 bs_rw = bdrv_new("");
ee181196
KW
903 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
904 backing_drv);
4dca4b63
NS
905 if (rw_ret < 0) {
906 bdrv_delete(bs_rw);
907 /* try to re-open read-only */
908 bs_ro = bdrv_new("");
ee181196
KW
909 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
910 backing_drv);
4dca4b63
NS
911 if (ret < 0) {
912 bdrv_delete(bs_ro);
913 /* drive not functional anymore */
914 bs->drv = NULL;
915 return ret;
916 }
917 bs->backing_hd = bs_ro;
918 return rw_ret;
919 }
920 bs->backing_hd = bs_rw;
ea2384d3 921 }
33e3963e 922
6ea44308 923 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 924 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
925
926 for (sector = 0; sector < total_sectors; sector += n) {
927 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
928
929 if (bdrv_read(bs, sector, buf, n) != 0) {
930 ret = -EIO;
931 goto ro_cleanup;
932 }
933
934 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
935 ret = -EIO;
936 goto ro_cleanup;
937 }
ea2384d3 938 }
33e3963e 939 }
95389c86 940
1d44952f
CH
941 if (drv->bdrv_make_empty) {
942 ret = drv->bdrv_make_empty(bs);
943 bdrv_flush(bs);
944 }
95389c86 945
3f5075ae
CH
946 /*
947 * Make sure all data we wrote to the backing device is actually
948 * stable on disk.
949 */
950 if (bs->backing_hd)
951 bdrv_flush(bs->backing_hd);
4dca4b63
NS
952
953ro_cleanup:
7267c094 954 g_free(buf);
4dca4b63
NS
955
956 if (ro) {
957 /* re-open as RO */
958 bdrv_delete(bs->backing_hd);
959 bs->backing_hd = NULL;
960 bs_ro = bdrv_new("");
ee181196
KW
961 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
962 backing_drv);
4dca4b63
NS
963 if (ret < 0) {
964 bdrv_delete(bs_ro);
965 /* drive not functional anymore */
966 bs->drv = NULL;
967 return ret;
968 }
969 bs->backing_hd = bs_ro;
970 bs->backing_hd->keep_read_only = 0;
971 }
972
1d44952f 973 return ret;
33e3963e
FB
974}
975
6ab4b5ab
MA
976void bdrv_commit_all(void)
977{
978 BlockDriverState *bs;
979
980 QTAILQ_FOREACH(bs, &bdrv_states, list) {
981 bdrv_commit(bs);
982 }
983}
984
756e6736
KW
985/*
986 * Return values:
987 * 0 - success
988 * -EINVAL - backing format specified, but no file
989 * -ENOSPC - can't update the backing file because no space is left in the
990 * image file header
991 * -ENOTSUP - format driver doesn't support changing the backing file
992 */
993int bdrv_change_backing_file(BlockDriverState *bs,
994 const char *backing_file, const char *backing_fmt)
995{
996 BlockDriver *drv = bs->drv;
997
998 if (drv->bdrv_change_backing_file != NULL) {
999 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1000 } else {
1001 return -ENOTSUP;
1002 }
1003}
1004
71d0770c
AL
1005static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1006 size_t size)
1007{
1008 int64_t len;
1009
1010 if (!bdrv_is_inserted(bs))
1011 return -ENOMEDIUM;
1012
1013 if (bs->growable)
1014 return 0;
1015
1016 len = bdrv_getlength(bs);
1017
fbb7b4e0
KW
1018 if (offset < 0)
1019 return -EIO;
1020
1021 if ((offset > len) || (len - offset < size))
71d0770c
AL
1022 return -EIO;
1023
1024 return 0;
1025}
1026
1027static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1028 int nb_sectors)
1029{
eb5a3165
JS
1030 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1031 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1032}
1033
e7a8a783
KW
1034static inline bool bdrv_has_async_rw(BlockDriver *drv)
1035{
1036 return drv->bdrv_co_readv != bdrv_co_readv_em
1037 || drv->bdrv_aio_readv != bdrv_aio_readv_em;
1038}
1039
1040static inline bool bdrv_has_async_flush(BlockDriver *drv)
1041{
1042 return drv->bdrv_aio_flush != bdrv_aio_flush_em;
1043}
1044
19cb3738 1045/* return < 0 if error. See bdrv_write() for the return codes */
5fafdf24 1046int bdrv_read(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1047 uint8_t *buf, int nb_sectors)
1048{
ea2384d3
FB
1049 BlockDriver *drv = bs->drv;
1050
19cb3738
FB
1051 if (!drv)
1052 return -ENOMEDIUM;
e7a8a783
KW
1053
1054 if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
1055 QEMUIOVector qiov;
1056 struct iovec iov = {
1057 .iov_base = (void *)buf,
1058 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1059 };
1060
1061 qemu_iovec_init_external(&qiov, &iov, 1);
1062 return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);
1063 }
1064
71d0770c
AL
1065 if (bdrv_check_request(bs, sector_num, nb_sectors))
1066 return -EIO;
b338082b 1067
eda578e5 1068 return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
fc01f7e7
FB
1069}
1070
7cd1e32a 1071static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1072 int nb_sectors, int dirty)
7cd1e32a 1073{
1074 int64_t start, end;
c6d22830 1075 unsigned long val, idx, bit;
a55eb92c 1076
6ea44308 1077 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1078 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1079
1080 for (; start <= end; start++) {
c6d22830
JK
1081 idx = start / (sizeof(unsigned long) * 8);
1082 bit = start % (sizeof(unsigned long) * 8);
1083 val = bs->dirty_bitmap[idx];
1084 if (dirty) {
6d59fec1 1085 if (!(val & (1UL << bit))) {
aaa0eb75 1086 bs->dirty_count++;
6d59fec1 1087 val |= 1UL << bit;
aaa0eb75 1088 }
c6d22830 1089 } else {
6d59fec1 1090 if (val & (1UL << bit)) {
aaa0eb75 1091 bs->dirty_count--;
6d59fec1 1092 val &= ~(1UL << bit);
aaa0eb75 1093 }
c6d22830
JK
1094 }
1095 bs->dirty_bitmap[idx] = val;
7cd1e32a 1096 }
1097}
1098
5fafdf24 1099/* Return < 0 if error. Important errors are:
19cb3738
FB
1100 -EIO generic I/O error (may happen for all errors)
1101 -ENOMEDIUM No media inserted.
1102 -EINVAL Invalid sector number or nb_sectors
1103 -EACCES Trying to write a read-only device
1104*/
5fafdf24 1105int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1106 const uint8_t *buf, int nb_sectors)
1107{
83f64091 1108 BlockDriver *drv = bs->drv;
e7a8a783 1109
19cb3738
FB
1110 if (!bs->drv)
1111 return -ENOMEDIUM;
e7a8a783
KW
1112
1113 if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
1114 QEMUIOVector qiov;
1115 struct iovec iov = {
1116 .iov_base = (void *)buf,
1117 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1118 };
1119
1120 qemu_iovec_init_external(&qiov, &iov, 1);
1121 return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1122 }
1123
0849bf08 1124 if (bs->read_only)
19cb3738 1125 return -EACCES;
71d0770c
AL
1126 if (bdrv_check_request(bs, sector_num, nb_sectors))
1127 return -EIO;
a55eb92c 1128
c6d22830 1129 if (bs->dirty_bitmap) {
7cd1e32a 1130 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1131 }
a55eb92c 1132
294cc35f
KW
1133 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1134 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1135 }
1136
42fb2807 1137 return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
83f64091
FB
1138}
1139
eda578e5
AL
1140int bdrv_pread(BlockDriverState *bs, int64_t offset,
1141 void *buf, int count1)
83f64091 1142{
6ea44308 1143 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1144 int len, nb_sectors, count;
1145 int64_t sector_num;
9a8c4cce 1146 int ret;
83f64091
FB
1147
1148 count = count1;
1149 /* first read to align to sector start */
6ea44308 1150 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1151 if (len > count)
1152 len = count;
6ea44308 1153 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1154 if (len > 0) {
9a8c4cce
KW
1155 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1156 return ret;
6ea44308 1157 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1158 count -= len;
1159 if (count == 0)
1160 return count1;
1161 sector_num++;
1162 buf += len;
1163 }
1164
1165 /* read the sectors "in place" */
6ea44308 1166 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1167 if (nb_sectors > 0) {
9a8c4cce
KW
1168 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1169 return ret;
83f64091 1170 sector_num += nb_sectors;
6ea44308 1171 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1172 buf += len;
1173 count -= len;
1174 }
1175
1176 /* add data from the last sector */
1177 if (count > 0) {
9a8c4cce
KW
1178 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1179 return ret;
83f64091
FB
1180 memcpy(buf, tmp_buf, count);
1181 }
1182 return count1;
1183}
1184
eda578e5
AL
1185int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1186 const void *buf, int count1)
83f64091 1187{
6ea44308 1188 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1189 int len, nb_sectors, count;
1190 int64_t sector_num;
9a8c4cce 1191 int ret;
83f64091
FB
1192
1193 count = count1;
1194 /* first write to align to sector start */
6ea44308 1195 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1196 if (len > count)
1197 len = count;
6ea44308 1198 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1199 if (len > 0) {
9a8c4cce
KW
1200 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1201 return ret;
6ea44308 1202 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1203 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1204 return ret;
83f64091
FB
1205 count -= len;
1206 if (count == 0)
1207 return count1;
1208 sector_num++;
1209 buf += len;
1210 }
1211
1212 /* write the sectors "in place" */
6ea44308 1213 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1214 if (nb_sectors > 0) {
9a8c4cce
KW
1215 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1216 return ret;
83f64091 1217 sector_num += nb_sectors;
6ea44308 1218 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1219 buf += len;
1220 count -= len;
1221 }
1222
1223 /* add data from the last sector */
1224 if (count > 0) {
9a8c4cce
KW
1225 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1226 return ret;
83f64091 1227 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1228 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1229 return ret;
83f64091
FB
1230 }
1231 return count1;
1232}
83f64091 1233
f08145fe
KW
1234/*
1235 * Writes to the file and ensures that no writes are reordered across this
1236 * request (acts as a barrier)
1237 *
1238 * Returns 0 on success, -errno in error cases.
1239 */
1240int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1241 const void *buf, int count)
1242{
1243 int ret;
1244
1245 ret = bdrv_pwrite(bs, offset, buf, count);
1246 if (ret < 0) {
1247 return ret;
1248 }
1249
92196b2f
SH
1250 /* No flush needed for cache modes that use O_DSYNC */
1251 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
f08145fe
KW
1252 bdrv_flush(bs);
1253 }
1254
1255 return 0;
1256}
1257
c5fbe571
SH
1258/*
1259 * Handle a read request in coroutine context
1260 */
1261static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1262 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
da1fa91d
KW
1263{
1264 BlockDriver *drv = bs->drv;
1265
da1fa91d
KW
1266 if (!drv) {
1267 return -ENOMEDIUM;
1268 }
1269 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1270 return -EIO;
1271 }
1272
1273 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1274}
1275
c5fbe571 1276int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
1277 int nb_sectors, QEMUIOVector *qiov)
1278{
c5fbe571 1279 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 1280
c5fbe571
SH
1281 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1282}
1283
1284/*
1285 * Handle a write request in coroutine context
1286 */
1287static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1288 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1289{
1290 BlockDriver *drv = bs->drv;
da1fa91d
KW
1291
1292 if (!bs->drv) {
1293 return -ENOMEDIUM;
1294 }
1295 if (bs->read_only) {
1296 return -EACCES;
1297 }
1298 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1299 return -EIO;
1300 }
1301
1302 if (bs->dirty_bitmap) {
1303 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1304 }
1305
1306 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1307 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1308 }
1309
1310 return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1311}
1312
c5fbe571
SH
1313int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1314 int nb_sectors, QEMUIOVector *qiov)
1315{
1316 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1317
1318 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1319}
1320
83f64091
FB
1321/**
1322 * Truncate file to 'offset' bytes (needed only for file protocols)
1323 */
1324int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1325{
1326 BlockDriver *drv = bs->drv;
51762288 1327 int ret;
83f64091 1328 if (!drv)
19cb3738 1329 return -ENOMEDIUM;
83f64091
FB
1330 if (!drv->bdrv_truncate)
1331 return -ENOTSUP;
59f2689d
NS
1332 if (bs->read_only)
1333 return -EACCES;
8591675f
MT
1334 if (bdrv_in_use(bs))
1335 return -EBUSY;
51762288
SH
1336 ret = drv->bdrv_truncate(bs, offset);
1337 if (ret == 0) {
1338 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 1339 bdrv_dev_resize_cb(bs);
51762288
SH
1340 }
1341 return ret;
83f64091
FB
1342}
1343
4a1d5e1f
FZ
1344/**
1345 * Length of a allocated file in bytes. Sparse files are counted by actual
1346 * allocated space. Return < 0 if error or unknown.
1347 */
1348int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1349{
1350 BlockDriver *drv = bs->drv;
1351 if (!drv) {
1352 return -ENOMEDIUM;
1353 }
1354 if (drv->bdrv_get_allocated_file_size) {
1355 return drv->bdrv_get_allocated_file_size(bs);
1356 }
1357 if (bs->file) {
1358 return bdrv_get_allocated_file_size(bs->file);
1359 }
1360 return -ENOTSUP;
1361}
1362
83f64091
FB
1363/**
1364 * Length of a file in bytes. Return < 0 if error or unknown.
1365 */
1366int64_t bdrv_getlength(BlockDriverState *bs)
1367{
1368 BlockDriver *drv = bs->drv;
1369 if (!drv)
19cb3738 1370 return -ENOMEDIUM;
51762288 1371
2c6942fa 1372 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
1373 if (drv->bdrv_getlength) {
1374 return drv->bdrv_getlength(bs);
1375 }
83f64091 1376 }
46a4e4e6 1377 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
1378}
1379
19cb3738 1380/* return 0 as number of sectors if no device present or error */
96b8f136 1381void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 1382{
19cb3738
FB
1383 int64_t length;
1384 length = bdrv_getlength(bs);
1385 if (length < 0)
1386 length = 0;
1387 else
6ea44308 1388 length = length >> BDRV_SECTOR_BITS;
19cb3738 1389 *nb_sectors_ptr = length;
fc01f7e7 1390}
cf98951b 1391
f3d54fc4
AL
1392struct partition {
1393 uint8_t boot_ind; /* 0x80 - active */
1394 uint8_t head; /* starting head */
1395 uint8_t sector; /* starting sector */
1396 uint8_t cyl; /* starting cylinder */
1397 uint8_t sys_ind; /* What partition type */
1398 uint8_t end_head; /* end head */
1399 uint8_t end_sector; /* end sector */
1400 uint8_t end_cyl; /* end cylinder */
1401 uint32_t start_sect; /* starting sector counting from 0 */
1402 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 1403} QEMU_PACKED;
f3d54fc4
AL
1404
1405/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1406static int guess_disk_lchs(BlockDriverState *bs,
1407 int *pcylinders, int *pheads, int *psectors)
1408{
eb5a3165 1409 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
1410 int ret, i, heads, sectors, cylinders;
1411 struct partition *p;
1412 uint32_t nr_sects;
a38131b6 1413 uint64_t nb_sectors;
f3d54fc4
AL
1414
1415 bdrv_get_geometry(bs, &nb_sectors);
1416
1417 ret = bdrv_read(bs, 0, buf, 1);
1418 if (ret < 0)
1419 return -1;
1420 /* test msdos magic */
1421 if (buf[510] != 0x55 || buf[511] != 0xaa)
1422 return -1;
1423 for(i = 0; i < 4; i++) {
1424 p = ((struct partition *)(buf + 0x1be)) + i;
1425 nr_sects = le32_to_cpu(p->nr_sects);
1426 if (nr_sects && p->end_head) {
1427 /* We make the assumption that the partition terminates on
1428 a cylinder boundary */
1429 heads = p->end_head + 1;
1430 sectors = p->end_sector & 63;
1431 if (sectors == 0)
1432 continue;
1433 cylinders = nb_sectors / (heads * sectors);
1434 if (cylinders < 1 || cylinders > 16383)
1435 continue;
1436 *pheads = heads;
1437 *psectors = sectors;
1438 *pcylinders = cylinders;
1439#if 0
1440 printf("guessed geometry: LCHS=%d %d %d\n",
1441 cylinders, heads, sectors);
1442#endif
1443 return 0;
1444 }
1445 }
1446 return -1;
1447}
1448
1449void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1450{
1451 int translation, lba_detected = 0;
1452 int cylinders, heads, secs;
a38131b6 1453 uint64_t nb_sectors;
f3d54fc4
AL
1454
1455 /* if a geometry hint is available, use it */
1456 bdrv_get_geometry(bs, &nb_sectors);
1457 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1458 translation = bdrv_get_translation_hint(bs);
1459 if (cylinders != 0) {
1460 *pcyls = cylinders;
1461 *pheads = heads;
1462 *psecs = secs;
1463 } else {
1464 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1465 if (heads > 16) {
1466 /* if heads > 16, it means that a BIOS LBA
1467 translation was active, so the default
1468 hardware geometry is OK */
1469 lba_detected = 1;
1470 goto default_geometry;
1471 } else {
1472 *pcyls = cylinders;
1473 *pheads = heads;
1474 *psecs = secs;
1475 /* disable any translation to be in sync with
1476 the logical geometry */
1477 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1478 bdrv_set_translation_hint(bs,
1479 BIOS_ATA_TRANSLATION_NONE);
1480 }
1481 }
1482 } else {
1483 default_geometry:
1484 /* if no geometry, use a standard physical disk geometry */
1485 cylinders = nb_sectors / (16 * 63);
1486
1487 if (cylinders > 16383)
1488 cylinders = 16383;
1489 else if (cylinders < 2)
1490 cylinders = 2;
1491 *pcyls = cylinders;
1492 *pheads = 16;
1493 *psecs = 63;
1494 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1495 if ((*pcyls * *pheads) <= 131072) {
1496 bdrv_set_translation_hint(bs,
1497 BIOS_ATA_TRANSLATION_LARGE);
1498 } else {
1499 bdrv_set_translation_hint(bs,
1500 BIOS_ATA_TRANSLATION_LBA);
1501 }
1502 }
1503 }
1504 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1505 }
1506}
1507
5fafdf24 1508void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
1509 int cyls, int heads, int secs)
1510{
1511 bs->cyls = cyls;
1512 bs->heads = heads;
1513 bs->secs = secs;
1514}
1515
46d4767d
FB
1516void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1517{
1518 bs->translation = translation;
1519}
1520
5fafdf24 1521void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
1522 int *pcyls, int *pheads, int *psecs)
1523{
1524 *pcyls = bs->cyls;
1525 *pheads = bs->heads;
1526 *psecs = bs->secs;
1527}
1528
5bbdbb46
BS
1529/* Recognize floppy formats */
1530typedef struct FDFormat {
1531 FDriveType drive;
1532 uint8_t last_sect;
1533 uint8_t max_track;
1534 uint8_t max_head;
1535} FDFormat;
1536
1537static const FDFormat fd_formats[] = {
1538 /* First entry is default format */
1539 /* 1.44 MB 3"1/2 floppy disks */
1540 { FDRIVE_DRV_144, 18, 80, 1, },
1541 { FDRIVE_DRV_144, 20, 80, 1, },
1542 { FDRIVE_DRV_144, 21, 80, 1, },
1543 { FDRIVE_DRV_144, 21, 82, 1, },
1544 { FDRIVE_DRV_144, 21, 83, 1, },
1545 { FDRIVE_DRV_144, 22, 80, 1, },
1546 { FDRIVE_DRV_144, 23, 80, 1, },
1547 { FDRIVE_DRV_144, 24, 80, 1, },
1548 /* 2.88 MB 3"1/2 floppy disks */
1549 { FDRIVE_DRV_288, 36, 80, 1, },
1550 { FDRIVE_DRV_288, 39, 80, 1, },
1551 { FDRIVE_DRV_288, 40, 80, 1, },
1552 { FDRIVE_DRV_288, 44, 80, 1, },
1553 { FDRIVE_DRV_288, 48, 80, 1, },
1554 /* 720 kB 3"1/2 floppy disks */
1555 { FDRIVE_DRV_144, 9, 80, 1, },
1556 { FDRIVE_DRV_144, 10, 80, 1, },
1557 { FDRIVE_DRV_144, 10, 82, 1, },
1558 { FDRIVE_DRV_144, 10, 83, 1, },
1559 { FDRIVE_DRV_144, 13, 80, 1, },
1560 { FDRIVE_DRV_144, 14, 80, 1, },
1561 /* 1.2 MB 5"1/4 floppy disks */
1562 { FDRIVE_DRV_120, 15, 80, 1, },
1563 { FDRIVE_DRV_120, 18, 80, 1, },
1564 { FDRIVE_DRV_120, 18, 82, 1, },
1565 { FDRIVE_DRV_120, 18, 83, 1, },
1566 { FDRIVE_DRV_120, 20, 80, 1, },
1567 /* 720 kB 5"1/4 floppy disks */
1568 { FDRIVE_DRV_120, 9, 80, 1, },
1569 { FDRIVE_DRV_120, 11, 80, 1, },
1570 /* 360 kB 5"1/4 floppy disks */
1571 { FDRIVE_DRV_120, 9, 40, 1, },
1572 { FDRIVE_DRV_120, 9, 40, 0, },
1573 { FDRIVE_DRV_120, 10, 41, 1, },
1574 { FDRIVE_DRV_120, 10, 42, 1, },
1575 /* 320 kB 5"1/4 floppy disks */
1576 { FDRIVE_DRV_120, 8, 40, 1, },
1577 { FDRIVE_DRV_120, 8, 40, 0, },
1578 /* 360 kB must match 5"1/4 better than 3"1/2... */
1579 { FDRIVE_DRV_144, 9, 80, 0, },
1580 /* end */
1581 { FDRIVE_DRV_NONE, -1, -1, 0, },
1582};
1583
1584void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1585 int *max_track, int *last_sect,
1586 FDriveType drive_in, FDriveType *drive)
1587{
1588 const FDFormat *parse;
1589 uint64_t nb_sectors, size;
1590 int i, first_match, match;
1591
1592 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1593 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1594 /* User defined disk */
1595 } else {
1596 bdrv_get_geometry(bs, &nb_sectors);
1597 match = -1;
1598 first_match = -1;
1599 for (i = 0; ; i++) {
1600 parse = &fd_formats[i];
1601 if (parse->drive == FDRIVE_DRV_NONE) {
1602 break;
1603 }
1604 if (drive_in == parse->drive ||
1605 drive_in == FDRIVE_DRV_NONE) {
1606 size = (parse->max_head + 1) * parse->max_track *
1607 parse->last_sect;
1608 if (nb_sectors == size) {
1609 match = i;
1610 break;
1611 }
1612 if (first_match == -1) {
1613 first_match = i;
1614 }
1615 }
1616 }
1617 if (match == -1) {
1618 if (first_match == -1) {
1619 match = 1;
1620 } else {
1621 match = first_match;
1622 }
1623 parse = &fd_formats[match];
1624 }
1625 *nb_heads = parse->max_head + 1;
1626 *max_track = parse->max_track;
1627 *last_sect = parse->last_sect;
1628 *drive = parse->drive;
1629 }
1630}
1631
46d4767d
FB
1632int bdrv_get_translation_hint(BlockDriverState *bs)
1633{
1634 return bs->translation;
1635}
1636
abd7f68d
MA
1637void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1638 BlockErrorAction on_write_error)
1639{
1640 bs->on_read_error = on_read_error;
1641 bs->on_write_error = on_write_error;
1642}
1643
1644BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1645{
1646 return is_read ? bs->on_read_error : bs->on_write_error;
1647}
1648
b338082b
FB
1649int bdrv_is_read_only(BlockDriverState *bs)
1650{
1651 return bs->read_only;
1652}
1653
985a03b0
TS
1654int bdrv_is_sg(BlockDriverState *bs)
1655{
1656 return bs->sg;
1657}
1658
e900a7b7
CH
1659int bdrv_enable_write_cache(BlockDriverState *bs)
1660{
1661 return bs->enable_write_cache;
1662}
1663
ea2384d3
FB
1664int bdrv_is_encrypted(BlockDriverState *bs)
1665{
1666 if (bs->backing_hd && bs->backing_hd->encrypted)
1667 return 1;
1668 return bs->encrypted;
1669}
1670
c0f4ce77
AL
1671int bdrv_key_required(BlockDriverState *bs)
1672{
1673 BlockDriverState *backing_hd = bs->backing_hd;
1674
1675 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1676 return 1;
1677 return (bs->encrypted && !bs->valid_key);
1678}
1679
ea2384d3
FB
1680int bdrv_set_key(BlockDriverState *bs, const char *key)
1681{
1682 int ret;
1683 if (bs->backing_hd && bs->backing_hd->encrypted) {
1684 ret = bdrv_set_key(bs->backing_hd, key);
1685 if (ret < 0)
1686 return ret;
1687 if (!bs->encrypted)
1688 return 0;
1689 }
fd04a2ae
SH
1690 if (!bs->encrypted) {
1691 return -EINVAL;
1692 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1693 return -ENOMEDIUM;
1694 }
c0f4ce77 1695 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
1696 if (ret < 0) {
1697 bs->valid_key = 0;
1698 } else if (!bs->valid_key) {
1699 bs->valid_key = 1;
1700 /* call the change callback now, we skipped it on open */
7d4b4ba5 1701 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 1702 }
c0f4ce77 1703 return ret;
ea2384d3
FB
1704}
1705
1706void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1707{
19cb3738 1708 if (!bs->drv) {
ea2384d3
FB
1709 buf[0] = '\0';
1710 } else {
1711 pstrcpy(buf, buf_size, bs->drv->format_name);
1712 }
1713}
1714
5fafdf24 1715void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
1716 void *opaque)
1717{
1718 BlockDriver *drv;
1719
8a22f02a 1720 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
1721 it(opaque, drv->format_name);
1722 }
1723}
1724
b338082b
FB
1725BlockDriverState *bdrv_find(const char *name)
1726{
1727 BlockDriverState *bs;
1728
1b7bdbc1
SH
1729 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1730 if (!strcmp(name, bs->device_name)) {
b338082b 1731 return bs;
1b7bdbc1 1732 }
b338082b
FB
1733 }
1734 return NULL;
1735}
1736
2f399b0a
MA
1737BlockDriverState *bdrv_next(BlockDriverState *bs)
1738{
1739 if (!bs) {
1740 return QTAILQ_FIRST(&bdrv_states);
1741 }
1742 return QTAILQ_NEXT(bs, list);
1743}
1744
51de9760 1745void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
1746{
1747 BlockDriverState *bs;
1748
1b7bdbc1 1749 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 1750 it(opaque, bs);
81d0912d
FB
1751 }
1752}
1753
ea2384d3
FB
1754const char *bdrv_get_device_name(BlockDriverState *bs)
1755{
1756 return bs->device_name;
1757}
1758
205ef796 1759int bdrv_flush(BlockDriverState *bs)
7a6cba61 1760{
016f5cf6 1761 if (bs->open_flags & BDRV_O_NO_FLUSH) {
205ef796
KW
1762 return 0;
1763 }
1764
e7a8a783
KW
1765 if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
1766 return bdrv_co_flush_em(bs);
1767 }
1768
205ef796
KW
1769 if (bs->drv && bs->drv->bdrv_flush) {
1770 return bs->drv->bdrv_flush(bs);
016f5cf6
AG
1771 }
1772
205ef796
KW
1773 /*
1774 * Some block drivers always operate in either writethrough or unsafe mode
1775 * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1776 * the server works (because the behaviour is hardcoded or depends on
1777 * server-side configuration), so we can't ensure that everything is safe
1778 * on disk. Returning an error doesn't work because that would break guests
1779 * even if the server operates in writethrough mode.
1780 *
1781 * Let's hope the user knows what he's doing.
1782 */
1783 return 0;
7a6cba61
PB
1784}
1785
c6ca28d6
AL
1786void bdrv_flush_all(void)
1787{
1788 BlockDriverState *bs;
1789
1b7bdbc1 1790 QTAILQ_FOREACH(bs, &bdrv_states, list) {
c602a489 1791 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
c6ca28d6 1792 bdrv_flush(bs);
1b7bdbc1
SH
1793 }
1794 }
c6ca28d6
AL
1795}
1796
f2feebbd
KW
1797int bdrv_has_zero_init(BlockDriverState *bs)
1798{
1799 assert(bs->drv);
1800
336c1c12
KW
1801 if (bs->drv->bdrv_has_zero_init) {
1802 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
1803 }
1804
1805 return 1;
1806}
1807
bb8bf76f
CH
1808int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1809{
1810 if (!bs->drv) {
1811 return -ENOMEDIUM;
1812 }
1813 if (!bs->drv->bdrv_discard) {
1814 return 0;
1815 }
1816 return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1817}
1818
f58c7b35
TS
1819/*
1820 * Returns true iff the specified sector is present in the disk image. Drivers
1821 * not implementing the functionality are assumed to not support backing files,
1822 * hence all their sectors are reported as allocated.
1823 *
1824 * 'pnum' is set to the number of sectors (including and immediately following
1825 * the specified sector) that are known to be in the same
1826 * allocated/unallocated state.
1827 *
1828 * 'nb_sectors' is the max value 'pnum' should be set to.
1829 */
1830int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1831 int *pnum)
1832{
1833 int64_t n;
1834 if (!bs->drv->bdrv_is_allocated) {
1835 if (sector_num >= bs->total_sectors) {
1836 *pnum = 0;
1837 return 0;
1838 }
1839 n = bs->total_sectors - sector_num;
1840 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1841 return 1;
1842 }
1843 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1844}
1845
2582bfed
LC
1846void bdrv_mon_event(const BlockDriverState *bdrv,
1847 BlockMonEventAction action, int is_read)
1848{
1849 QObject *data;
1850 const char *action_str;
1851
1852 switch (action) {
1853 case BDRV_ACTION_REPORT:
1854 action_str = "report";
1855 break;
1856 case BDRV_ACTION_IGNORE:
1857 action_str = "ignore";
1858 break;
1859 case BDRV_ACTION_STOP:
1860 action_str = "stop";
1861 break;
1862 default:
1863 abort();
1864 }
1865
1866 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1867 bdrv->device_name,
1868 action_str,
1869 is_read ? "read" : "write");
1870 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1871
1872 qobject_decref(data);
1873}
1874
d15e5465 1875static void bdrv_print_dict(QObject *obj, void *opaque)
b338082b 1876{
d15e5465
LC
1877 QDict *bs_dict;
1878 Monitor *mon = opaque;
1879
1880 bs_dict = qobject_to_qdict(obj);
1881
d8aeeb31 1882 monitor_printf(mon, "%s: removable=%d",
d15e5465 1883 qdict_get_str(bs_dict, "device"),
d15e5465
LC
1884 qdict_get_bool(bs_dict, "removable"));
1885
1886 if (qdict_get_bool(bs_dict, "removable")) {
1887 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
e4def80b
MA
1888 monitor_printf(mon, " tray-open=%d",
1889 qdict_get_bool(bs_dict, "tray-open"));
d15e5465 1890 }
d2078cc2
LC
1891
1892 if (qdict_haskey(bs_dict, "io-status")) {
1893 monitor_printf(mon, " io-status=%s", qdict_get_str(bs_dict, "io-status"));
1894 }
1895
d15e5465
LC
1896 if (qdict_haskey(bs_dict, "inserted")) {
1897 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1898
1899 monitor_printf(mon, " file=");
1900 monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1901 if (qdict_haskey(qdict, "backing_file")) {
1902 monitor_printf(mon, " backing_file=");
1903 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1904 }
1905 monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1906 qdict_get_bool(qdict, "ro"),
1907 qdict_get_str(qdict, "drv"),
1908 qdict_get_bool(qdict, "encrypted"));
1909 } else {
1910 monitor_printf(mon, " [not inserted]");
1911 }
1912
1913 monitor_printf(mon, "\n");
1914}
1915
1916void bdrv_info_print(Monitor *mon, const QObject *data)
1917{
1918 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1919}
1920
f04ef601
LC
1921static const char *const io_status_name[BDRV_IOS_MAX] = {
1922 [BDRV_IOS_OK] = "ok",
1923 [BDRV_IOS_FAILED] = "failed",
1924 [BDRV_IOS_ENOSPC] = "nospace",
1925};
1926
d15e5465
LC
1927void bdrv_info(Monitor *mon, QObject **ret_data)
1928{
1929 QList *bs_list;
b338082b
FB
1930 BlockDriverState *bs;
1931
d15e5465
LC
1932 bs_list = qlist_new();
1933
1b7bdbc1 1934 QTAILQ_FOREACH(bs, &bdrv_states, list) {
d15e5465 1935 QObject *bs_obj;
e4def80b 1936 QDict *bs_dict;
d15e5465 1937
d8aeeb31 1938 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
d15e5465 1939 "'removable': %i, 'locked': %i }",
2c6942fa
MA
1940 bs->device_name,
1941 bdrv_dev_has_removable_media(bs),
f107639a 1942 bdrv_dev_is_medium_locked(bs));
e4def80b 1943 bs_dict = qobject_to_qdict(bs_obj);
d15e5465 1944
e4def80b
MA
1945 if (bdrv_dev_has_removable_media(bs)) {
1946 qdict_put(bs_dict, "tray-open",
1947 qbool_from_int(bdrv_dev_is_tray_open(bs)));
1948 }
f04ef601
LC
1949
1950 if (bdrv_iostatus_is_enabled(bs)) {
1951 qdict_put(bs_dict, "io-status",
1952 qstring_from_str(io_status_name[bs->iostatus]));
1953 }
1954
19cb3738 1955 if (bs->drv) {
d15e5465 1956 QObject *obj;
d15e5465
LC
1957
1958 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1959 "'encrypted': %i }",
1960 bs->filename, bs->read_only,
1961 bs->drv->format_name,
1962 bdrv_is_encrypted(bs));
fef30743 1963 if (bs->backing_file[0] != '\0') {
d15e5465
LC
1964 QDict *qdict = qobject_to_qdict(obj);
1965 qdict_put(qdict, "backing_file",
1966 qstring_from_str(bs->backing_file));
376253ec 1967 }
d15e5465
LC
1968
1969 qdict_put_obj(bs_dict, "inserted", obj);
b338082b 1970 }
d15e5465 1971 qlist_append_obj(bs_list, bs_obj);
b338082b 1972 }
d15e5465
LC
1973
1974 *ret_data = QOBJECT(bs_list);
b338082b 1975}
a36e69dd 1976
218a536a 1977static void bdrv_stats_iter(QObject *data, void *opaque)
a36e69dd 1978{
218a536a
LC
1979 QDict *qdict;
1980 Monitor *mon = opaque;
1981
1982 qdict = qobject_to_qdict(data);
1983 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1984
1985 qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1986 monitor_printf(mon, " rd_bytes=%" PRId64
1987 " wr_bytes=%" PRId64
1988 " rd_operations=%" PRId64
1989 " wr_operations=%" PRId64
e8045d67 1990 " flush_operations=%" PRId64
c488c7f6
CH
1991 " wr_total_time_ns=%" PRId64
1992 " rd_total_time_ns=%" PRId64
1993 " flush_total_time_ns=%" PRId64
218a536a
LC
1994 "\n",
1995 qdict_get_int(qdict, "rd_bytes"),
1996 qdict_get_int(qdict, "wr_bytes"),
1997 qdict_get_int(qdict, "rd_operations"),
e8045d67 1998 qdict_get_int(qdict, "wr_operations"),
c488c7f6
CH
1999 qdict_get_int(qdict, "flush_operations"),
2000 qdict_get_int(qdict, "wr_total_time_ns"),
2001 qdict_get_int(qdict, "rd_total_time_ns"),
2002 qdict_get_int(qdict, "flush_total_time_ns"));
218a536a
LC
2003}
2004
2005void bdrv_stats_print(Monitor *mon, const QObject *data)
2006{
2007 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
2008}
2009
294cc35f
KW
2010static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
2011{
2012 QObject *res;
2013 QDict *dict;
2014
2015 res = qobject_from_jsonf("{ 'stats': {"
2016 "'rd_bytes': %" PRId64 ","
2017 "'wr_bytes': %" PRId64 ","
2018 "'rd_operations': %" PRId64 ","
2019 "'wr_operations': %" PRId64 ","
e8045d67 2020 "'wr_highest_offset': %" PRId64 ","
c488c7f6
CH
2021 "'flush_operations': %" PRId64 ","
2022 "'wr_total_time_ns': %" PRId64 ","
2023 "'rd_total_time_ns': %" PRId64 ","
2024 "'flush_total_time_ns': %" PRId64
294cc35f 2025 "} }",
a597e79c
CH
2026 bs->nr_bytes[BDRV_ACCT_READ],
2027 bs->nr_bytes[BDRV_ACCT_WRITE],
2028 bs->nr_ops[BDRV_ACCT_READ],
2029 bs->nr_ops[BDRV_ACCT_WRITE],
5ffbbc67 2030 bs->wr_highest_sector *
e8045d67 2031 (uint64_t)BDRV_SECTOR_SIZE,
c488c7f6
CH
2032 bs->nr_ops[BDRV_ACCT_FLUSH],
2033 bs->total_time_ns[BDRV_ACCT_WRITE],
2034 bs->total_time_ns[BDRV_ACCT_READ],
2035 bs->total_time_ns[BDRV_ACCT_FLUSH]);
294cc35f
KW
2036 dict = qobject_to_qdict(res);
2037
2038 if (*bs->device_name) {
2039 qdict_put(dict, "device", qstring_from_str(bs->device_name));
2040 }
2041
2042 if (bs->file) {
2043 QObject *parent = bdrv_info_stats_bs(bs->file);
2044 qdict_put_obj(dict, "parent", parent);
2045 }
2046
2047 return res;
2048}
2049
218a536a
LC
2050void bdrv_info_stats(Monitor *mon, QObject **ret_data)
2051{
2052 QObject *obj;
2053 QList *devices;
a36e69dd
TS
2054 BlockDriverState *bs;
2055
218a536a
LC
2056 devices = qlist_new();
2057
1b7bdbc1 2058 QTAILQ_FOREACH(bs, &bdrv_states, list) {
294cc35f 2059 obj = bdrv_info_stats_bs(bs);
218a536a 2060 qlist_append_obj(devices, obj);
a36e69dd 2061 }
218a536a
LC
2062
2063 *ret_data = QOBJECT(devices);
a36e69dd 2064}
ea2384d3 2065
045df330
AL
2066const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2067{
2068 if (bs->backing_hd && bs->backing_hd->encrypted)
2069 return bs->backing_file;
2070 else if (bs->encrypted)
2071 return bs->filename;
2072 else
2073 return NULL;
2074}
2075
5fafdf24 2076void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
2077 char *filename, int filename_size)
2078{
b783e409 2079 if (!bs->backing_file) {
83f64091
FB
2080 pstrcpy(filename, filename_size, "");
2081 } else {
2082 pstrcpy(filename, filename_size, bs->backing_file);
2083 }
2084}
2085
5fafdf24 2086int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
2087 const uint8_t *buf, int nb_sectors)
2088{
2089 BlockDriver *drv = bs->drv;
2090 if (!drv)
19cb3738 2091 return -ENOMEDIUM;
faea38e7
FB
2092 if (!drv->bdrv_write_compressed)
2093 return -ENOTSUP;
fbb7b4e0
KW
2094 if (bdrv_check_request(bs, sector_num, nb_sectors))
2095 return -EIO;
a55eb92c 2096
c6d22830 2097 if (bs->dirty_bitmap) {
7cd1e32a 2098 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2099 }
a55eb92c 2100
faea38e7
FB
2101 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2102}
3b46e624 2103
faea38e7
FB
2104int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2105{
2106 BlockDriver *drv = bs->drv;
2107 if (!drv)
19cb3738 2108 return -ENOMEDIUM;
faea38e7
FB
2109 if (!drv->bdrv_get_info)
2110 return -ENOTSUP;
2111 memset(bdi, 0, sizeof(*bdi));
2112 return drv->bdrv_get_info(bs, bdi);
2113}
2114
45566e9c
CH
2115int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2116 int64_t pos, int size)
178e08a5
AL
2117{
2118 BlockDriver *drv = bs->drv;
2119 if (!drv)
2120 return -ENOMEDIUM;
7cdb1f6d
MK
2121 if (drv->bdrv_save_vmstate)
2122 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2123 if (bs->file)
2124 return bdrv_save_vmstate(bs->file, buf, pos, size);
2125 return -ENOTSUP;
178e08a5
AL
2126}
2127
45566e9c
CH
2128int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2129 int64_t pos, int size)
178e08a5
AL
2130{
2131 BlockDriver *drv = bs->drv;
2132 if (!drv)
2133 return -ENOMEDIUM;
7cdb1f6d
MK
2134 if (drv->bdrv_load_vmstate)
2135 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2136 if (bs->file)
2137 return bdrv_load_vmstate(bs->file, buf, pos, size);
2138 return -ENOTSUP;
178e08a5
AL
2139}
2140
8b9b0cc2
KW
2141void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2142{
2143 BlockDriver *drv = bs->drv;
2144
2145 if (!drv || !drv->bdrv_debug_event) {
2146 return;
2147 }
2148
2149 return drv->bdrv_debug_event(bs, event);
2150
2151}
2152
faea38e7
FB
2153/**************************************************************/
2154/* handling of snapshots */
2155
feeee5ac
MDCF
2156int bdrv_can_snapshot(BlockDriverState *bs)
2157{
2158 BlockDriver *drv = bs->drv;
07b70bfb 2159 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2160 return 0;
2161 }
2162
2163 if (!drv->bdrv_snapshot_create) {
2164 if (bs->file != NULL) {
2165 return bdrv_can_snapshot(bs->file);
2166 }
2167 return 0;
2168 }
2169
2170 return 1;
2171}
2172
199630b6
BS
2173int bdrv_is_snapshot(BlockDriverState *bs)
2174{
2175 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2176}
2177
f9092b10
MA
2178BlockDriverState *bdrv_snapshots(void)
2179{
2180 BlockDriverState *bs;
2181
3ac906f7 2182 if (bs_snapshots) {
f9092b10 2183 return bs_snapshots;
3ac906f7 2184 }
f9092b10
MA
2185
2186 bs = NULL;
2187 while ((bs = bdrv_next(bs))) {
2188 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2189 bs_snapshots = bs;
2190 return bs;
f9092b10
MA
2191 }
2192 }
2193 return NULL;
f9092b10
MA
2194}
2195
5fafdf24 2196int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2197 QEMUSnapshotInfo *sn_info)
2198{
2199 BlockDriver *drv = bs->drv;
2200 if (!drv)
19cb3738 2201 return -ENOMEDIUM;
7cdb1f6d
MK
2202 if (drv->bdrv_snapshot_create)
2203 return drv->bdrv_snapshot_create(bs, sn_info);
2204 if (bs->file)
2205 return bdrv_snapshot_create(bs->file, sn_info);
2206 return -ENOTSUP;
faea38e7
FB
2207}
2208
5fafdf24 2209int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2210 const char *snapshot_id)
2211{
2212 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2213 int ret, open_ret;
2214
faea38e7 2215 if (!drv)
19cb3738 2216 return -ENOMEDIUM;
7cdb1f6d
MK
2217 if (drv->bdrv_snapshot_goto)
2218 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2219
2220 if (bs->file) {
2221 drv->bdrv_close(bs);
2222 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2223 open_ret = drv->bdrv_open(bs, bs->open_flags);
2224 if (open_ret < 0) {
2225 bdrv_delete(bs->file);
2226 bs->drv = NULL;
2227 return open_ret;
2228 }
2229 return ret;
2230 }
2231
2232 return -ENOTSUP;
faea38e7
FB
2233}
2234
2235int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2236{
2237 BlockDriver *drv = bs->drv;
2238 if (!drv)
19cb3738 2239 return -ENOMEDIUM;
7cdb1f6d
MK
2240 if (drv->bdrv_snapshot_delete)
2241 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2242 if (bs->file)
2243 return bdrv_snapshot_delete(bs->file, snapshot_id);
2244 return -ENOTSUP;
faea38e7
FB
2245}
2246
5fafdf24 2247int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2248 QEMUSnapshotInfo **psn_info)
2249{
2250 BlockDriver *drv = bs->drv;
2251 if (!drv)
19cb3738 2252 return -ENOMEDIUM;
7cdb1f6d
MK
2253 if (drv->bdrv_snapshot_list)
2254 return drv->bdrv_snapshot_list(bs, psn_info);
2255 if (bs->file)
2256 return bdrv_snapshot_list(bs->file, psn_info);
2257 return -ENOTSUP;
faea38e7
FB
2258}
2259
51ef6727 2260int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2261 const char *snapshot_name)
2262{
2263 BlockDriver *drv = bs->drv;
2264 if (!drv) {
2265 return -ENOMEDIUM;
2266 }
2267 if (!bs->read_only) {
2268 return -EINVAL;
2269 }
2270 if (drv->bdrv_snapshot_load_tmp) {
2271 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2272 }
2273 return -ENOTSUP;
2274}
2275
faea38e7
FB
2276#define NB_SUFFIXES 4
2277
2278char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2279{
2280 static const char suffixes[NB_SUFFIXES] = "KMGT";
2281 int64_t base;
2282 int i;
2283
2284 if (size <= 999) {
2285 snprintf(buf, buf_size, "%" PRId64, size);
2286 } else {
2287 base = 1024;
2288 for(i = 0; i < NB_SUFFIXES; i++) {
2289 if (size < (10 * base)) {
5fafdf24 2290 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
2291 (double)size / base,
2292 suffixes[i]);
2293 break;
2294 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 2295 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
2296 ((size + (base >> 1)) / base),
2297 suffixes[i]);
2298 break;
2299 }
2300 base = base * 1024;
2301 }
2302 }
2303 return buf;
2304}
2305
2306char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2307{
2308 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
2309#ifdef _WIN32
2310 struct tm *ptm;
2311#else
faea38e7 2312 struct tm tm;
3b9f94e1 2313#endif
faea38e7
FB
2314 time_t ti;
2315 int64_t secs;
2316
2317 if (!sn) {
5fafdf24
TS
2318 snprintf(buf, buf_size,
2319 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2320 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2321 } else {
2322 ti = sn->date_sec;
3b9f94e1
FB
2323#ifdef _WIN32
2324 ptm = localtime(&ti);
2325 strftime(date_buf, sizeof(date_buf),
2326 "%Y-%m-%d %H:%M:%S", ptm);
2327#else
faea38e7
FB
2328 localtime_r(&ti, &tm);
2329 strftime(date_buf, sizeof(date_buf),
2330 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 2331#endif
faea38e7
FB
2332 secs = sn->vm_clock_nsec / 1000000000;
2333 snprintf(clock_buf, sizeof(clock_buf),
2334 "%02d:%02d:%02d.%03d",
2335 (int)(secs / 3600),
2336 (int)((secs / 60) % 60),
5fafdf24 2337 (int)(secs % 60),
faea38e7
FB
2338 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2339 snprintf(buf, buf_size,
5fafdf24 2340 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2341 sn->id_str, sn->name,
2342 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2343 date_buf,
2344 clock_buf);
2345 }
2346 return buf;
2347}
2348
ea2384d3 2349/**************************************************************/
83f64091 2350/* async I/Os */
ea2384d3 2351
3b69e4b9 2352BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 2353 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 2354 BlockDriverCompletionFunc *cb, void *opaque)
83f64091
FB
2355{
2356 BlockDriver *drv = bs->drv;
83f64091 2357
bbf0a440
SH
2358 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2359
19cb3738 2360 if (!drv)
ce1a14dc 2361 return NULL;
71d0770c
AL
2362 if (bdrv_check_request(bs, sector_num, nb_sectors))
2363 return NULL;
3b46e624 2364
a597e79c
CH
2365 return drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
2366 cb, opaque);
ea2384d3
FB
2367}
2368
4dcafbb1
MT
2369typedef struct BlockCompleteData {
2370 BlockDriverCompletionFunc *cb;
2371 void *opaque;
2372 BlockDriverState *bs;
2373 int64_t sector_num;
2374 int nb_sectors;
2375} BlockCompleteData;
2376
2377static void block_complete_cb(void *opaque, int ret)
2378{
2379 BlockCompleteData *b = opaque;
2380
2381 if (b->bs->dirty_bitmap) {
2382 set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1);
2383 }
2384 b->cb(b->opaque, ret);
7267c094 2385 g_free(b);
4dcafbb1
MT
2386}
2387
2388static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs,
2389 int64_t sector_num,
2390 int nb_sectors,
2391 BlockDriverCompletionFunc *cb,
2392 void *opaque)
2393{
7267c094 2394 BlockCompleteData *blkdata = g_malloc0(sizeof(BlockCompleteData));
4dcafbb1
MT
2395
2396 blkdata->bs = bs;
2397 blkdata->cb = cb;
2398 blkdata->opaque = opaque;
2399 blkdata->sector_num = sector_num;
2400 blkdata->nb_sectors = nb_sectors;
2401
2402 return blkdata;
2403}
2404
f141eafe
AL
2405BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2406 QEMUIOVector *qiov, int nb_sectors,
2407 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 2408{
83f64091 2409 BlockDriver *drv = bs->drv;
a36e69dd 2410 BlockDriverAIOCB *ret;
4dcafbb1 2411 BlockCompleteData *blk_cb_data;
ea2384d3 2412
bbf0a440
SH
2413 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2414
19cb3738 2415 if (!drv)
ce1a14dc 2416 return NULL;
83f64091 2417 if (bs->read_only)
ce1a14dc 2418 return NULL;
71d0770c
AL
2419 if (bdrv_check_request(bs, sector_num, nb_sectors))
2420 return NULL;
83f64091 2421
c6d22830 2422 if (bs->dirty_bitmap) {
4dcafbb1
MT
2423 blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
2424 opaque);
2425 cb = &block_complete_cb;
2426 opaque = blk_cb_data;
7cd1e32a 2427 }
a55eb92c 2428
f141eafe
AL
2429 ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
2430 cb, opaque);
a36e69dd
TS
2431
2432 if (ret) {
294cc35f
KW
2433 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2434 bs->wr_highest_sector = sector_num + nb_sectors - 1;
2435 }
a36e69dd
TS
2436 }
2437
2438 return ret;
83f64091
FB
2439}
2440
40b4f539
KW
2441
2442typedef struct MultiwriteCB {
2443 int error;
2444 int num_requests;
2445 int num_callbacks;
2446 struct {
2447 BlockDriverCompletionFunc *cb;
2448 void *opaque;
2449 QEMUIOVector *free_qiov;
2450 void *free_buf;
2451 } callbacks[];
2452} MultiwriteCB;
2453
2454static void multiwrite_user_cb(MultiwriteCB *mcb)
2455{
2456 int i;
2457
2458 for (i = 0; i < mcb->num_callbacks; i++) {
2459 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
2460 if (mcb->callbacks[i].free_qiov) {
2461 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2462 }
7267c094 2463 g_free(mcb->callbacks[i].free_qiov);
f8a83245 2464 qemu_vfree(mcb->callbacks[i].free_buf);
40b4f539
KW
2465 }
2466}
2467
2468static void multiwrite_cb(void *opaque, int ret)
2469{
2470 MultiwriteCB *mcb = opaque;
2471
6d519a5f
SH
2472 trace_multiwrite_cb(mcb, ret);
2473
cb6d3ca0 2474 if (ret < 0 && !mcb->error) {
40b4f539 2475 mcb->error = ret;
40b4f539
KW
2476 }
2477
2478 mcb->num_requests--;
2479 if (mcb->num_requests == 0) {
de189a1b 2480 multiwrite_user_cb(mcb);
7267c094 2481 g_free(mcb);
40b4f539
KW
2482 }
2483}
2484
2485static int multiwrite_req_compare(const void *a, const void *b)
2486{
77be4366
CH
2487 const BlockRequest *req1 = a, *req2 = b;
2488
2489 /*
2490 * Note that we can't simply subtract req2->sector from req1->sector
2491 * here as that could overflow the return value.
2492 */
2493 if (req1->sector > req2->sector) {
2494 return 1;
2495 } else if (req1->sector < req2->sector) {
2496 return -1;
2497 } else {
2498 return 0;
2499 }
40b4f539
KW
2500}
2501
2502/*
2503 * Takes a bunch of requests and tries to merge them. Returns the number of
2504 * requests that remain after merging.
2505 */
2506static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2507 int num_reqs, MultiwriteCB *mcb)
2508{
2509 int i, outidx;
2510
2511 // Sort requests by start sector
2512 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2513
2514 // Check if adjacent requests touch the same clusters. If so, combine them,
2515 // filling up gaps with zero sectors.
2516 outidx = 0;
2517 for (i = 1; i < num_reqs; i++) {
2518 int merge = 0;
2519 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2520
2521 // This handles the cases that are valid for all block drivers, namely
2522 // exactly sequential writes and overlapping writes.
2523 if (reqs[i].sector <= oldreq_last) {
2524 merge = 1;
2525 }
2526
2527 // The block driver may decide that it makes sense to combine requests
2528 // even if there is a gap of some sectors between them. In this case,
2529 // the gap is filled with zeros (therefore only applicable for yet
2530 // unused space in format like qcow2).
2531 if (!merge && bs->drv->bdrv_merge_requests) {
2532 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2533 }
2534
e2a305fb
CH
2535 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2536 merge = 0;
2537 }
2538
40b4f539
KW
2539 if (merge) {
2540 size_t size;
7267c094 2541 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
2542 qemu_iovec_init(qiov,
2543 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2544
2545 // Add the first request to the merged one. If the requests are
2546 // overlapping, drop the last sectors of the first request.
2547 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2548 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2549
2550 // We might need to add some zeros between the two requests
2551 if (reqs[i].sector > oldreq_last) {
2552 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2553 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2554 memset(buf, 0, zero_bytes);
2555 qemu_iovec_add(qiov, buf, zero_bytes);
2556 mcb->callbacks[i].free_buf = buf;
2557 }
2558
2559 // Add the second request
2560 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2561
cbf1dff2 2562 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
2563 reqs[outidx].qiov = qiov;
2564
2565 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2566 } else {
2567 outidx++;
2568 reqs[outidx].sector = reqs[i].sector;
2569 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2570 reqs[outidx].qiov = reqs[i].qiov;
2571 }
2572 }
2573
2574 return outidx + 1;
2575}
2576
2577/*
2578 * Submit multiple AIO write requests at once.
2579 *
2580 * On success, the function returns 0 and all requests in the reqs array have
2581 * been submitted. In error case this function returns -1, and any of the
2582 * requests may or may not be submitted yet. In particular, this means that the
2583 * callback will be called for some of the requests, for others it won't. The
2584 * caller must check the error field of the BlockRequest to wait for the right
2585 * callbacks (if error != 0, no callback will be called).
2586 *
2587 * The implementation may modify the contents of the reqs array, e.g. to merge
2588 * requests. However, the fields opaque and error are left unmodified as they
2589 * are used to signal failure for a single request to the caller.
2590 */
2591int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2592{
2593 BlockDriverAIOCB *acb;
2594 MultiwriteCB *mcb;
2595 int i;
2596
301db7c2
RH
2597 /* don't submit writes if we don't have a medium */
2598 if (bs->drv == NULL) {
2599 for (i = 0; i < num_reqs; i++) {
2600 reqs[i].error = -ENOMEDIUM;
2601 }
2602 return -1;
2603 }
2604
40b4f539
KW
2605 if (num_reqs == 0) {
2606 return 0;
2607 }
2608
2609 // Create MultiwriteCB structure
7267c094 2610 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
2611 mcb->num_requests = 0;
2612 mcb->num_callbacks = num_reqs;
2613
2614 for (i = 0; i < num_reqs; i++) {
2615 mcb->callbacks[i].cb = reqs[i].cb;
2616 mcb->callbacks[i].opaque = reqs[i].opaque;
2617 }
2618
2619 // Check for mergable requests
2620 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2621
6d519a5f
SH
2622 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2623
453f9a16
KW
2624 /*
2625 * Run the aio requests. As soon as one request can't be submitted
2626 * successfully, fail all requests that are not yet submitted (we must
2627 * return failure for all requests anyway)
2628 *
2629 * num_requests cannot be set to the right value immediately: If
2630 * bdrv_aio_writev fails for some request, num_requests would be too high
2631 * and therefore multiwrite_cb() would never recognize the multiwrite
2632 * request as completed. We also cannot use the loop variable i to set it
2633 * when the first request fails because the callback may already have been
2634 * called for previously submitted requests. Thus, num_requests must be
2635 * incremented for each request that is submitted.
2636 *
2637 * The problem that callbacks may be called early also means that we need
2638 * to take care that num_requests doesn't become 0 before all requests are
2639 * submitted - multiwrite_cb() would consider the multiwrite request
2640 * completed. A dummy request that is "completed" by a manual call to
2641 * multiwrite_cb() takes care of this.
2642 */
2643 mcb->num_requests = 1;
2644
6d519a5f 2645 // Run the aio requests
40b4f539 2646 for (i = 0; i < num_reqs; i++) {
453f9a16 2647 mcb->num_requests++;
40b4f539
KW
2648 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2649 reqs[i].nb_sectors, multiwrite_cb, mcb);
2650
2651 if (acb == NULL) {
2652 // We can only fail the whole thing if no request has been
2653 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2654 // complete and report the error in the callback.
453f9a16 2655 if (i == 0) {
6d519a5f 2656 trace_bdrv_aio_multiwrite_earlyfail(mcb);
40b4f539
KW
2657 goto fail;
2658 } else {
6d519a5f 2659 trace_bdrv_aio_multiwrite_latefail(mcb, i);
7eb58a6c 2660 multiwrite_cb(mcb, -EIO);
40b4f539
KW
2661 break;
2662 }
40b4f539
KW
2663 }
2664 }
2665
453f9a16
KW
2666 /* Complete the dummy request */
2667 multiwrite_cb(mcb, 0);
2668
40b4f539
KW
2669 return 0;
2670
2671fail:
453f9a16
KW
2672 for (i = 0; i < mcb->num_callbacks; i++) {
2673 reqs[i].error = -EIO;
2674 }
7267c094 2675 g_free(mcb);
40b4f539
KW
2676 return -1;
2677}
2678
b2e12bc6
CH
2679BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2680 BlockDriverCompletionFunc *cb, void *opaque)
2681{
2682 BlockDriver *drv = bs->drv;
2683
a13aac04
SH
2684 trace_bdrv_aio_flush(bs, opaque);
2685
016f5cf6
AG
2686 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2687 return bdrv_aio_noop_em(bs, cb, opaque);
2688 }
2689
b2e12bc6
CH
2690 if (!drv)
2691 return NULL;
b2e12bc6
CH
2692 return drv->bdrv_aio_flush(bs, cb, opaque);
2693}
2694
83f64091 2695void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 2696{
6bbff9a0 2697 acb->pool->cancel(acb);
83f64091
FB
2698}
2699
ce1a14dc 2700
83f64091
FB
2701/**************************************************************/
2702/* async block device emulation */
2703
c16b5a2c
CH
2704typedef struct BlockDriverAIOCBSync {
2705 BlockDriverAIOCB common;
2706 QEMUBH *bh;
2707 int ret;
2708 /* vector translation state */
2709 QEMUIOVector *qiov;
2710 uint8_t *bounce;
2711 int is_write;
2712} BlockDriverAIOCBSync;
2713
2714static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2715{
b666d239
KW
2716 BlockDriverAIOCBSync *acb =
2717 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 2718 qemu_bh_delete(acb->bh);
36afc451 2719 acb->bh = NULL;
c16b5a2c
CH
2720 qemu_aio_release(acb);
2721}
2722
2723static AIOPool bdrv_em_aio_pool = {
2724 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2725 .cancel = bdrv_aio_cancel_em,
2726};
2727
ce1a14dc 2728static void bdrv_aio_bh_cb(void *opaque)
83f64091 2729{
ce1a14dc 2730 BlockDriverAIOCBSync *acb = opaque;
f141eafe 2731
f141eafe
AL
2732 if (!acb->is_write)
2733 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 2734 qemu_vfree(acb->bounce);
ce1a14dc 2735 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 2736 qemu_bh_delete(acb->bh);
36afc451 2737 acb->bh = NULL;
ce1a14dc 2738 qemu_aio_release(acb);
83f64091 2739}
beac80cd 2740
f141eafe
AL
2741static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2742 int64_t sector_num,
2743 QEMUIOVector *qiov,
2744 int nb_sectors,
2745 BlockDriverCompletionFunc *cb,
2746 void *opaque,
2747 int is_write)
2748
83f64091 2749{
ce1a14dc 2750 BlockDriverAIOCBSync *acb;
ce1a14dc 2751
c16b5a2c 2752 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
2753 acb->is_write = is_write;
2754 acb->qiov = qiov;
e268ca52 2755 acb->bounce = qemu_blockalign(bs, qiov->size);
f141eafe 2756
ce1a14dc
PB
2757 if (!acb->bh)
2758 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
2759
2760 if (is_write) {
2761 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1ed20acf 2762 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 2763 } else {
1ed20acf 2764 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
2765 }
2766
ce1a14dc 2767 qemu_bh_schedule(acb->bh);
f141eafe 2768
ce1a14dc 2769 return &acb->common;
beac80cd
FB
2770}
2771
f141eafe
AL
2772static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2773 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 2774 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 2775{
f141eafe
AL
2776 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2777}
83f64091 2778
f141eafe
AL
2779static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2780 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2781 BlockDriverCompletionFunc *cb, void *opaque)
2782{
2783 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 2784}
beac80cd 2785
68485420
KW
2786
2787typedef struct BlockDriverAIOCBCoroutine {
2788 BlockDriverAIOCB common;
2789 BlockRequest req;
2790 bool is_write;
2791 QEMUBH* bh;
2792} BlockDriverAIOCBCoroutine;
2793
2794static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2795{
2796 qemu_aio_flush();
2797}
2798
2799static AIOPool bdrv_em_co_aio_pool = {
2800 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2801 .cancel = bdrv_aio_co_cancel_em,
2802};
2803
2804static void bdrv_co_rw_bh(void *opaque)
2805{
2806 BlockDriverAIOCBCoroutine *acb = opaque;
2807
2808 acb->common.cb(acb->common.opaque, acb->req.error);
2809 qemu_bh_delete(acb->bh);
2810 qemu_aio_release(acb);
2811}
2812
2813static void coroutine_fn bdrv_co_rw(void *opaque)
2814{
2815 BlockDriverAIOCBCoroutine *acb = opaque;
2816 BlockDriverState *bs = acb->common.bs;
2817
2818 if (!acb->is_write) {
2819 acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector,
2820 acb->req.nb_sectors, acb->req.qiov);
2821 } else {
2822 acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector,
2823 acb->req.nb_sectors, acb->req.qiov);
2824 }
2825
2826 acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2827 qemu_bh_schedule(acb->bh);
2828}
2829
2830static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2831 int64_t sector_num,
2832 QEMUIOVector *qiov,
2833 int nb_sectors,
2834 BlockDriverCompletionFunc *cb,
2835 void *opaque,
2836 bool is_write)
2837{
2838 Coroutine *co;
2839 BlockDriverAIOCBCoroutine *acb;
2840
2841 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2842 acb->req.sector = sector_num;
2843 acb->req.nb_sectors = nb_sectors;
2844 acb->req.qiov = qiov;
2845 acb->is_write = is_write;
2846
2847 co = qemu_coroutine_create(bdrv_co_rw);
2848 qemu_coroutine_enter(co, acb);
2849
2850 return &acb->common;
2851}
2852
2853static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
2854 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2855 BlockDriverCompletionFunc *cb, void *opaque)
2856{
2857 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2858 false);
2859}
2860
2861static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
2862 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2863 BlockDriverCompletionFunc *cb, void *opaque)
2864{
2865 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2866 true);
2867}
2868
b2e12bc6
CH
2869static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2870 BlockDriverCompletionFunc *cb, void *opaque)
2871{
2872 BlockDriverAIOCBSync *acb;
2873
2874 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2875 acb->is_write = 1; /* don't bounce in the completion hadler */
2876 acb->qiov = NULL;
2877 acb->bounce = NULL;
2878 acb->ret = 0;
2879
2880 if (!acb->bh)
2881 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2882
2883 bdrv_flush(bs);
2884 qemu_bh_schedule(acb->bh);
2885 return &acb->common;
2886}
2887
016f5cf6
AG
2888static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2889 BlockDriverCompletionFunc *cb, void *opaque)
2890{
2891 BlockDriverAIOCBSync *acb;
2892
2893 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2894 acb->is_write = 1; /* don't bounce in the completion handler */
2895 acb->qiov = NULL;
2896 acb->bounce = NULL;
2897 acb->ret = 0;
2898
2899 if (!acb->bh) {
2900 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2901 }
2902
2903 qemu_bh_schedule(acb->bh);
2904 return &acb->common;
2905}
2906
83f64091
FB
2907/**************************************************************/
2908/* sync block device emulation */
ea2384d3 2909
83f64091
FB
2910static void bdrv_rw_em_cb(void *opaque, int ret)
2911{
2912 *(int *)opaque = ret;
ea2384d3
FB
2913}
2914
83f64091
FB
2915#define NOT_DONE 0x7fffffff
2916
5fafdf24 2917static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
83f64091 2918 uint8_t *buf, int nb_sectors)
7a6cba61 2919{
ce1a14dc
PB
2920 int async_ret;
2921 BlockDriverAIOCB *acb;
f141eafe
AL
2922 struct iovec iov;
2923 QEMUIOVector qiov;
83f64091 2924
83f64091 2925 async_ret = NOT_DONE;
3f4cb3d3 2926 iov.iov_base = (void *)buf;
eb5a3165 2927 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
f141eafe 2928 qemu_iovec_init_external(&qiov, &iov, 1);
1ed20acf
SH
2929
2930 acb = bs->drv->bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2931 bdrv_rw_em_cb, &async_ret);
65d6b3d8
KW
2932 if (acb == NULL) {
2933 async_ret = -1;
2934 goto fail;
2935 }
baf35cb9 2936
83f64091
FB
2937 while (async_ret == NOT_DONE) {
2938 qemu_aio_wait();
2939 }
baf35cb9 2940
65d6b3d8
KW
2941
2942fail:
83f64091 2943 return async_ret;
7a6cba61
PB
2944}
2945
83f64091
FB
2946static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2947 const uint8_t *buf, int nb_sectors)
2948{
ce1a14dc
PB
2949 int async_ret;
2950 BlockDriverAIOCB *acb;
f141eafe
AL
2951 struct iovec iov;
2952 QEMUIOVector qiov;
83f64091 2953
83f64091 2954 async_ret = NOT_DONE;
f141eafe 2955 iov.iov_base = (void *)buf;
eb5a3165 2956 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
f141eafe 2957 qemu_iovec_init_external(&qiov, &iov, 1);
1ed20acf
SH
2958
2959 acb = bs->drv->bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2960 bdrv_rw_em_cb, &async_ret);
65d6b3d8
KW
2961 if (acb == NULL) {
2962 async_ret = -1;
2963 goto fail;
2964 }
83f64091
FB
2965 while (async_ret == NOT_DONE) {
2966 qemu_aio_wait();
2967 }
65d6b3d8
KW
2968
2969fail:
83f64091
FB
2970 return async_ret;
2971}
ea2384d3
FB
2972
2973void bdrv_init(void)
2974{
5efa9d5a 2975 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 2976}
ce1a14dc 2977
eb852011
MA
2978void bdrv_init_with_whitelist(void)
2979{
2980 use_bdrv_whitelist = 1;
2981 bdrv_init();
2982}
2983
c16b5a2c
CH
2984void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2985 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 2986{
ce1a14dc
PB
2987 BlockDriverAIOCB *acb;
2988
6bbff9a0
AL
2989 if (pool->free_aiocb) {
2990 acb = pool->free_aiocb;
2991 pool->free_aiocb = acb->next;
ce1a14dc 2992 } else {
7267c094 2993 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 2994 acb->pool = pool;
ce1a14dc
PB
2995 }
2996 acb->bs = bs;
2997 acb->cb = cb;
2998 acb->opaque = opaque;
2999 return acb;
3000}
3001
3002void qemu_aio_release(void *p)
3003{
6bbff9a0
AL
3004 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3005 AIOPool *pool = acb->pool;
3006 acb->next = pool->free_aiocb;
3007 pool->free_aiocb = acb;
ce1a14dc 3008}
19cb3738 3009
f9f05dc5
KW
3010/**************************************************************/
3011/* Coroutine block device emulation */
3012
3013typedef struct CoroutineIOCompletion {
3014 Coroutine *coroutine;
3015 int ret;
3016} CoroutineIOCompletion;
3017
3018static void bdrv_co_io_em_complete(void *opaque, int ret)
3019{
3020 CoroutineIOCompletion *co = opaque;
3021
3022 co->ret = ret;
3023 qemu_coroutine_enter(co->coroutine, NULL);
3024}
3025
3026static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3027 int nb_sectors, QEMUIOVector *iov,
3028 bool is_write)
3029{
3030 CoroutineIOCompletion co = {
3031 .coroutine = qemu_coroutine_self(),
3032 };
3033 BlockDriverAIOCB *acb;
3034
3035 if (is_write) {
a652d160
SH
3036 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3037 bdrv_co_io_em_complete, &co);
f9f05dc5 3038 } else {
a652d160
SH
3039 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3040 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
3041 }
3042
59370aaa 3043 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
3044 if (!acb) {
3045 return -EIO;
3046 }
3047 qemu_coroutine_yield();
3048
3049 return co.ret;
3050}
3051
3052static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3053 int64_t sector_num, int nb_sectors,
3054 QEMUIOVector *iov)
3055{
3056 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3057}
3058
3059static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3060 int64_t sector_num, int nb_sectors,
3061 QEMUIOVector *iov)
3062{
3063 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3064}
3065
e7a8a783
KW
3066static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
3067{
3068 CoroutineIOCompletion co = {
3069 .coroutine = qemu_coroutine_self(),
3070 };
3071 BlockDriverAIOCB *acb;
3072
3073 acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3074 if (!acb) {
3075 return -EIO;
3076 }
3077 qemu_coroutine_yield();
3078 return co.ret;
3079}
3080
19cb3738
FB
3081/**************************************************************/
3082/* removable device support */
3083
3084/**
3085 * Return TRUE if the media is present
3086 */
3087int bdrv_is_inserted(BlockDriverState *bs)
3088{
3089 BlockDriver *drv = bs->drv;
a1aff5bf 3090
19cb3738
FB
3091 if (!drv)
3092 return 0;
3093 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
3094 return 1;
3095 return drv->bdrv_is_inserted(bs);
19cb3738
FB
3096}
3097
3098/**
8e49ca46
MA
3099 * Return whether the media changed since the last call to this
3100 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3101 */
3102int bdrv_media_changed(BlockDriverState *bs)
3103{
3104 BlockDriver *drv = bs->drv;
19cb3738 3105
8e49ca46
MA
3106 if (drv && drv->bdrv_media_changed) {
3107 return drv->bdrv_media_changed(bs);
3108 }
3109 return -ENOTSUP;
19cb3738
FB
3110}
3111
3112/**
3113 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3114 */
fdec4404 3115void bdrv_eject(BlockDriverState *bs, int eject_flag)
19cb3738
FB
3116{
3117 BlockDriver *drv = bs->drv;
19cb3738 3118
822e1cd1
MA
3119 if (drv && drv->bdrv_eject) {
3120 drv->bdrv_eject(bs, eject_flag);
19cb3738
FB
3121 }
3122}
3123
19cb3738
FB
3124/**
3125 * Lock or unlock the media (if it is locked, the user won't be able
3126 * to eject it manually).
3127 */
025e849a 3128void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3129{
3130 BlockDriver *drv = bs->drv;
3131
025e849a 3132 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3133
025e849a
MA
3134 if (drv && drv->bdrv_lock_medium) {
3135 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3136 }
3137}
985a03b0
TS
3138
3139/* needed for generic scsi interface */
3140
3141int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3142{
3143 BlockDriver *drv = bs->drv;
3144
3145 if (drv && drv->bdrv_ioctl)
3146 return drv->bdrv_ioctl(bs, req, buf);
3147 return -ENOTSUP;
3148}
7d780669 3149
221f715d
AL
3150BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3151 unsigned long int req, void *buf,
3152 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3153{
221f715d 3154 BlockDriver *drv = bs->drv;
7d780669 3155
221f715d
AL
3156 if (drv && drv->bdrv_aio_ioctl)
3157 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3158 return NULL;
7d780669 3159}
e268ca52 3160
7b6f9300
MA
3161void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3162{
3163 bs->buffer_alignment = align;
3164}
7cd1e32a 3165
e268ca52
AL
3166void *qemu_blockalign(BlockDriverState *bs, size_t size)
3167{
3168 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3169}
7cd1e32a 3170
3171void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3172{
3173 int64_t bitmap_size;
a55eb92c 3174
aaa0eb75 3175 bs->dirty_count = 0;
a55eb92c 3176 if (enable) {
c6d22830
JK
3177 if (!bs->dirty_bitmap) {
3178 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3179 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3180 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
a55eb92c 3181
7267c094 3182 bs->dirty_bitmap = g_malloc0(bitmap_size);
a55eb92c 3183 }
7cd1e32a 3184 } else {
c6d22830 3185 if (bs->dirty_bitmap) {
7267c094 3186 g_free(bs->dirty_bitmap);
c6d22830 3187 bs->dirty_bitmap = NULL;
a55eb92c 3188 }
7cd1e32a 3189 }
3190}
3191
3192int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3193{
6ea44308 3194 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 3195
c6d22830
JK
3196 if (bs->dirty_bitmap &&
3197 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
3198 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3199 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a 3200 } else {
3201 return 0;
3202 }
3203}
3204
a55eb92c
JK
3205void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3206 int nr_sectors)
7cd1e32a 3207{
3208 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3209}
aaa0eb75
LS
3210
3211int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3212{
3213 return bs->dirty_count;
3214}
f88e1a42 3215
db593f25
MT
3216void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3217{
3218 assert(bs->in_use != in_use);
3219 bs->in_use = in_use;
3220}
3221
3222int bdrv_in_use(BlockDriverState *bs)
3223{
3224 return bs->in_use;
3225}
3226
28a7282a
LC
3227void bdrv_iostatus_enable(BlockDriverState *bs)
3228{
3229 bs->iostatus = BDRV_IOS_OK;
3230}
3231
3232/* The I/O status is only enabled if the drive explicitly
3233 * enables it _and_ the VM is configured to stop on errors */
3234bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3235{
3236 return (bs->iostatus != BDRV_IOS_INVAL &&
3237 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3238 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3239 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3240}
3241
3242void bdrv_iostatus_disable(BlockDriverState *bs)
3243{
3244 bs->iostatus = BDRV_IOS_INVAL;
3245}
3246
3247void bdrv_iostatus_reset(BlockDriverState *bs)
3248{
3249 if (bdrv_iostatus_is_enabled(bs)) {
3250 bs->iostatus = BDRV_IOS_OK;
3251 }
3252}
3253
3254/* XXX: Today this is set by device models because it makes the implementation
3255 quite simple. However, the block layer knows about the error, so it's
3256 possible to implement this without device models being involved */
3257void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3258{
3259 if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
3260 assert(error >= 0);
3261 bs->iostatus = error == ENOSPC ? BDRV_IOS_ENOSPC : BDRV_IOS_FAILED;
3262 }
3263}
3264
a597e79c
CH
3265void
3266bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3267 enum BlockAcctType type)
3268{
3269 assert(type < BDRV_MAX_IOTYPE);
3270
3271 cookie->bytes = bytes;
c488c7f6 3272 cookie->start_time_ns = get_clock();
a597e79c
CH
3273 cookie->type = type;
3274}
3275
3276void
3277bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3278{
3279 assert(cookie->type < BDRV_MAX_IOTYPE);
3280
3281 bs->nr_bytes[cookie->type] += cookie->bytes;
3282 bs->nr_ops[cookie->type]++;
c488c7f6 3283 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
3284}
3285
f88e1a42
JS
3286int bdrv_img_create(const char *filename, const char *fmt,
3287 const char *base_filename, const char *base_fmt,
3288 char *options, uint64_t img_size, int flags)
3289{
3290 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 3291 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
3292 BlockDriverState *bs = NULL;
3293 BlockDriver *drv, *proto_drv;
96df67d1 3294 BlockDriver *backing_drv = NULL;
f88e1a42
JS
3295 int ret = 0;
3296
3297 /* Find driver and parse its options */
3298 drv = bdrv_find_format(fmt);
3299 if (!drv) {
3300 error_report("Unknown file format '%s'", fmt);
4f70f249 3301 ret = -EINVAL;
f88e1a42
JS
3302 goto out;
3303 }
3304
3305 proto_drv = bdrv_find_protocol(filename);
3306 if (!proto_drv) {
3307 error_report("Unknown protocol '%s'", filename);
4f70f249 3308 ret = -EINVAL;
f88e1a42
JS
3309 goto out;
3310 }
3311
3312 create_options = append_option_parameters(create_options,
3313 drv->create_options);
3314 create_options = append_option_parameters(create_options,
3315 proto_drv->create_options);
3316
3317 /* Create parameter list with default values */
3318 param = parse_option_parameters("", create_options, param);
3319
3320 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3321
3322 /* Parse -o options */
3323 if (options) {
3324 param = parse_option_parameters(options, create_options, param);
3325 if (param == NULL) {
3326 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 3327 ret = -EINVAL;
f88e1a42
JS
3328 goto out;
3329 }
3330 }
3331
3332 if (base_filename) {
3333 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3334 base_filename)) {
3335 error_report("Backing file not supported for file format '%s'",
3336 fmt);
4f70f249 3337 ret = -EINVAL;
f88e1a42
JS
3338 goto out;
3339 }
3340 }
3341
3342 if (base_fmt) {
3343 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3344 error_report("Backing file format not supported for file "
3345 "format '%s'", fmt);
4f70f249 3346 ret = -EINVAL;
f88e1a42
JS
3347 goto out;
3348 }
3349 }
3350
792da93a
JS
3351 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3352 if (backing_file && backing_file->value.s) {
3353 if (!strcmp(filename, backing_file->value.s)) {
3354 error_report("Error: Trying to create an image with the "
3355 "same filename as the backing file");
4f70f249 3356 ret = -EINVAL;
792da93a
JS
3357 goto out;
3358 }
3359 }
3360
f88e1a42
JS
3361 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3362 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
3363 backing_drv = bdrv_find_format(backing_fmt->value.s);
3364 if (!backing_drv) {
f88e1a42
JS
3365 error_report("Unknown backing file format '%s'",
3366 backing_fmt->value.s);
4f70f249 3367 ret = -EINVAL;
f88e1a42
JS
3368 goto out;
3369 }
3370 }
3371
3372 // The size for the image must always be specified, with one exception:
3373 // If we are using a backing file, we can obtain the size from there
d220894e
KW
3374 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3375 if (size && size->value.n == -1) {
f88e1a42
JS
3376 if (backing_file && backing_file->value.s) {
3377 uint64_t size;
f88e1a42
JS
3378 char buf[32];
3379
f88e1a42
JS
3380 bs = bdrv_new("");
3381
96df67d1 3382 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
f88e1a42 3383 if (ret < 0) {
96df67d1 3384 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
3385 goto out;
3386 }
3387 bdrv_get_geometry(bs, &size);
3388 size *= 512;
3389
3390 snprintf(buf, sizeof(buf), "%" PRId64, size);
3391 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3392 } else {
3393 error_report("Image creation needs a size parameter");
4f70f249 3394 ret = -EINVAL;
f88e1a42
JS
3395 goto out;
3396 }
3397 }
3398
3399 printf("Formatting '%s', fmt=%s ", filename, fmt);
3400 print_option_parameters(param);
3401 puts("");
3402
3403 ret = bdrv_create(drv, filename, param);
3404
3405 if (ret < 0) {
3406 if (ret == -ENOTSUP) {
3407 error_report("Formatting or formatting option not supported for "
3408 "file format '%s'", fmt);
3409 } else if (ret == -EFBIG) {
3410 error_report("The image size is too large for file format '%s'",
3411 fmt);
3412 } else {
3413 error_report("%s: error while creating %s: %s", filename, fmt,
3414 strerror(-ret));
3415 }
3416 }
3417
3418out:
3419 free_option_parameters(create_options);
3420 free_option_parameters(param);
3421
3422 if (bs) {
3423 bdrv_delete(bs);
3424 }
4f70f249
JS
3425
3426 return ret;
f88e1a42 3427}