]> git.proxmox.com Git - mirror_qemu.git/blame - block.c
qcow2: fix some errors and typo in qcow2.txt
[mirror_qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
d15e5465 30#include "qemu-objects.h"
68485420 31#include "qemu-coroutine.h"
fc01f7e7 32
71e72a19 33#ifdef CONFIG_BSD
7674e7bf
FB
34#include <sys/types.h>
35#include <sys/stat.h>
36#include <sys/ioctl.h>
72cf2d4f 37#include <sys/queue.h>
c5e97233 38#ifndef __DragonFly__
7674e7bf
FB
39#include <sys/disk.h>
40#endif
c5e97233 41#endif
7674e7bf 42
49dc768d
AL
43#ifdef _WIN32
44#include <windows.h>
45#endif
46
1c9805a3
SH
47#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
48
7d4b4ba5 49static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
50static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
51 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 52 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
53static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
54 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 55 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
56static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
57 int64_t sector_num, int nb_sectors,
58 QEMUIOVector *iov);
59static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
60 int64_t sector_num, int nb_sectors,
61 QEMUIOVector *iov);
c5fbe571
SH
62static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
63 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
1c9805a3
SH
64static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
b2a61371
SH
66static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
67 int64_t sector_num,
68 QEMUIOVector *qiov,
69 int nb_sectors,
70 BlockDriverCompletionFunc *cb,
71 void *opaque,
8c5873d6 72 bool is_write);
b2a61371 73static void coroutine_fn bdrv_co_do_rw(void *opaque);
ec530c81 74
1b7bdbc1
SH
75static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
76 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 77
8a22f02a
SH
78static QLIST_HEAD(, BlockDriver) bdrv_drivers =
79 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 80
f9092b10
MA
81/* The device to use for VM snapshots */
82static BlockDriverState *bs_snapshots;
83
eb852011
MA
84/* If non-zero, use only whitelisted block drivers */
85static int use_bdrv_whitelist;
86
9e0b22f4
SH
87#ifdef _WIN32
88static int is_windows_drive_prefix(const char *filename)
89{
90 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
91 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
92 filename[1] == ':');
93}
94
95int is_windows_drive(const char *filename)
96{
97 if (is_windows_drive_prefix(filename) &&
98 filename[2] == '\0')
99 return 1;
100 if (strstart(filename, "\\\\.\\", NULL) ||
101 strstart(filename, "//./", NULL))
102 return 1;
103 return 0;
104}
105#endif
106
107/* check if the path starts with "<protocol>:" */
108static int path_has_protocol(const char *path)
109{
110#ifdef _WIN32
111 if (is_windows_drive(path) ||
112 is_windows_drive_prefix(path)) {
113 return 0;
114 }
115#endif
116
117 return strchr(path, ':') != NULL;
118}
119
83f64091 120int path_is_absolute(const char *path)
3b0d4f61 121{
83f64091 122 const char *p;
21664424
FB
123#ifdef _WIN32
124 /* specific case for names like: "\\.\d:" */
125 if (*path == '/' || *path == '\\')
126 return 1;
127#endif
83f64091
FB
128 p = strchr(path, ':');
129 if (p)
130 p++;
131 else
132 p = path;
3b9f94e1
FB
133#ifdef _WIN32
134 return (*p == '/' || *p == '\\');
135#else
136 return (*p == '/');
137#endif
3b0d4f61
FB
138}
139
83f64091
FB
140/* if filename is absolute, just copy it to dest. Otherwise, build a
141 path to it by considering it is relative to base_path. URL are
142 supported. */
143void path_combine(char *dest, int dest_size,
144 const char *base_path,
145 const char *filename)
3b0d4f61 146{
83f64091
FB
147 const char *p, *p1;
148 int len;
149
150 if (dest_size <= 0)
151 return;
152 if (path_is_absolute(filename)) {
153 pstrcpy(dest, dest_size, filename);
154 } else {
155 p = strchr(base_path, ':');
156 if (p)
157 p++;
158 else
159 p = base_path;
3b9f94e1
FB
160 p1 = strrchr(base_path, '/');
161#ifdef _WIN32
162 {
163 const char *p2;
164 p2 = strrchr(base_path, '\\');
165 if (!p1 || p2 > p1)
166 p1 = p2;
167 }
168#endif
83f64091
FB
169 if (p1)
170 p1++;
171 else
172 p1 = base_path;
173 if (p1 > p)
174 p = p1;
175 len = p - base_path;
176 if (len > dest_size - 1)
177 len = dest_size - 1;
178 memcpy(dest, base_path, len);
179 dest[len] = '\0';
180 pstrcat(dest, dest_size, filename);
3b0d4f61 181 }
3b0d4f61
FB
182}
183
5efa9d5a 184void bdrv_register(BlockDriver *bdrv)
ea2384d3 185{
8c5873d6
SH
186 /* Block drivers without coroutine functions need emulation */
187 if (!bdrv->bdrv_co_readv) {
f9f05dc5
KW
188 bdrv->bdrv_co_readv = bdrv_co_readv_em;
189 bdrv->bdrv_co_writev = bdrv_co_writev_em;
190
f8c35c1d
SH
191 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
192 * the block driver lacks aio we need to emulate that too.
193 */
f9f05dc5
KW
194 if (!bdrv->bdrv_aio_readv) {
195 /* add AIO emulation layer */
196 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
197 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
f9f05dc5 198 }
83f64091 199 }
b2e12bc6 200
8a22f02a 201 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 202}
b338082b
FB
203
204/* create a new block device (by default it is empty) */
205BlockDriverState *bdrv_new(const char *device_name)
206{
1b7bdbc1 207 BlockDriverState *bs;
b338082b 208
7267c094 209 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 210 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 211 if (device_name[0] != '\0') {
1b7bdbc1 212 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 213 }
28a7282a 214 bdrv_iostatus_disable(bs);
b338082b
FB
215 return bs;
216}
217
ea2384d3
FB
218BlockDriver *bdrv_find_format(const char *format_name)
219{
220 BlockDriver *drv1;
8a22f02a
SH
221 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
222 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 223 return drv1;
8a22f02a 224 }
ea2384d3
FB
225 }
226 return NULL;
227}
228
eb852011
MA
229static int bdrv_is_whitelisted(BlockDriver *drv)
230{
231 static const char *whitelist[] = {
232 CONFIG_BDRV_WHITELIST
233 };
234 const char **p;
235
236 if (!whitelist[0])
237 return 1; /* no whitelist, anything goes */
238
239 for (p = whitelist; *p; p++) {
240 if (!strcmp(drv->format_name, *p)) {
241 return 1;
242 }
243 }
244 return 0;
245}
246
247BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
248{
249 BlockDriver *drv = bdrv_find_format(format_name);
250 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
251}
252
0e7e1989
KW
253int bdrv_create(BlockDriver *drv, const char* filename,
254 QEMUOptionParameter *options)
ea2384d3
FB
255{
256 if (!drv->bdrv_create)
257 return -ENOTSUP;
0e7e1989
KW
258
259 return drv->bdrv_create(filename, options);
ea2384d3
FB
260}
261
84a12e66
CH
262int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
263{
264 BlockDriver *drv;
265
b50cbabc 266 drv = bdrv_find_protocol(filename);
84a12e66 267 if (drv == NULL) {
16905d71 268 return -ENOENT;
84a12e66
CH
269 }
270
271 return bdrv_create(drv, filename, options);
272}
273
d5249393 274#ifdef _WIN32
95389c86 275void get_tmp_filename(char *filename, int size)
d5249393 276{
3b9f94e1 277 char temp_dir[MAX_PATH];
3b46e624 278
3b9f94e1
FB
279 GetTempPath(MAX_PATH, temp_dir);
280 GetTempFileName(temp_dir, "qem", 0, filename);
d5249393
FB
281}
282#else
95389c86 283void get_tmp_filename(char *filename, int size)
fc01f7e7 284{
67b915a5 285 int fd;
7ccfb2eb 286 const char *tmpdir;
d5249393 287 /* XXX: race condition possible */
0badc1ee
AJ
288 tmpdir = getenv("TMPDIR");
289 if (!tmpdir)
290 tmpdir = "/tmp";
291 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
ea2384d3
FB
292 fd = mkstemp(filename);
293 close(fd);
294}
d5249393 295#endif
fc01f7e7 296
84a12e66
CH
297/*
298 * Detect host devices. By convention, /dev/cdrom[N] is always
299 * recognized as a host CDROM.
300 */
301static BlockDriver *find_hdev_driver(const char *filename)
302{
303 int score_max = 0, score;
304 BlockDriver *drv = NULL, *d;
305
306 QLIST_FOREACH(d, &bdrv_drivers, list) {
307 if (d->bdrv_probe_device) {
308 score = d->bdrv_probe_device(filename);
309 if (score > score_max) {
310 score_max = score;
311 drv = d;
312 }
313 }
314 }
315
316 return drv;
317}
318
b50cbabc 319BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
320{
321 BlockDriver *drv1;
322 char protocol[128];
1cec71e3 323 int len;
83f64091 324 const char *p;
19cb3738 325
66f82cee
KW
326 /* TODO Drivers without bdrv_file_open must be specified explicitly */
327
39508e7a
CH
328 /*
329 * XXX(hch): we really should not let host device detection
330 * override an explicit protocol specification, but moving this
331 * later breaks access to device names with colons in them.
332 * Thanks to the brain-dead persistent naming schemes on udev-
333 * based Linux systems those actually are quite common.
334 */
335 drv1 = find_hdev_driver(filename);
336 if (drv1) {
337 return drv1;
338 }
339
9e0b22f4 340 if (!path_has_protocol(filename)) {
39508e7a 341 return bdrv_find_format("file");
84a12e66 342 }
9e0b22f4
SH
343 p = strchr(filename, ':');
344 assert(p != NULL);
1cec71e3
AL
345 len = p - filename;
346 if (len > sizeof(protocol) - 1)
347 len = sizeof(protocol) - 1;
348 memcpy(protocol, filename, len);
349 protocol[len] = '\0';
8a22f02a 350 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 351 if (drv1->protocol_name &&
8a22f02a 352 !strcmp(drv1->protocol_name, protocol)) {
83f64091 353 return drv1;
8a22f02a 354 }
83f64091
FB
355 }
356 return NULL;
357}
358
c98ac35d 359static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
360{
361 int ret, score, score_max;
362 BlockDriver *drv1, *drv;
363 uint8_t buf[2048];
364 BlockDriverState *bs;
365
f5edb014 366 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
367 if (ret < 0) {
368 *pdrv = NULL;
369 return ret;
370 }
f8ea0b00 371
08a00559
KW
372 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
373 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 374 bdrv_delete(bs);
c98ac35d
SW
375 drv = bdrv_find_format("raw");
376 if (!drv) {
377 ret = -ENOENT;
378 }
379 *pdrv = drv;
380 return ret;
1a396859 381 }
f8ea0b00 382
83f64091
FB
383 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
384 bdrv_delete(bs);
385 if (ret < 0) {
c98ac35d
SW
386 *pdrv = NULL;
387 return ret;
83f64091
FB
388 }
389
ea2384d3 390 score_max = 0;
84a12e66 391 drv = NULL;
8a22f02a 392 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
393 if (drv1->bdrv_probe) {
394 score = drv1->bdrv_probe(buf, ret, filename);
395 if (score > score_max) {
396 score_max = score;
397 drv = drv1;
398 }
0849bf08 399 }
fc01f7e7 400 }
c98ac35d
SW
401 if (!drv) {
402 ret = -ENOENT;
403 }
404 *pdrv = drv;
405 return ret;
ea2384d3
FB
406}
407
51762288
SH
408/**
409 * Set the current 'total_sectors' value
410 */
411static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
412{
413 BlockDriver *drv = bs->drv;
414
396759ad
NB
415 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
416 if (bs->sg)
417 return 0;
418
51762288
SH
419 /* query actual device if possible, otherwise just trust the hint */
420 if (drv->bdrv_getlength) {
421 int64_t length = drv->bdrv_getlength(bs);
422 if (length < 0) {
423 return length;
424 }
425 hint = length >> BDRV_SECTOR_BITS;
426 }
427
428 bs->total_sectors = hint;
429 return 0;
430}
431
c3993cdc
SH
432/**
433 * Set open flags for a given cache mode
434 *
435 * Return 0 on success, -1 if the cache mode was invalid.
436 */
437int bdrv_parse_cache_flags(const char *mode, int *flags)
438{
439 *flags &= ~BDRV_O_CACHE_MASK;
440
441 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
442 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
443 } else if (!strcmp(mode, "directsync")) {
444 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
445 } else if (!strcmp(mode, "writeback")) {
446 *flags |= BDRV_O_CACHE_WB;
447 } else if (!strcmp(mode, "unsafe")) {
448 *flags |= BDRV_O_CACHE_WB;
449 *flags |= BDRV_O_NO_FLUSH;
450 } else if (!strcmp(mode, "writethrough")) {
451 /* this is the default */
452 } else {
453 return -1;
454 }
455
456 return 0;
457}
458
57915332
KW
459/*
460 * Common part for opening disk images and files
461 */
462static int bdrv_open_common(BlockDriverState *bs, const char *filename,
463 int flags, BlockDriver *drv)
464{
465 int ret, open_flags;
466
467 assert(drv != NULL);
468
28dcee10
SH
469 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
470
66f82cee 471 bs->file = NULL;
51762288 472 bs->total_sectors = 0;
57915332
KW
473 bs->encrypted = 0;
474 bs->valid_key = 0;
475 bs->open_flags = flags;
57915332
KW
476 bs->buffer_alignment = 512;
477
478 pstrcpy(bs->filename, sizeof(bs->filename), filename);
479
480 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
481 return -ENOTSUP;
482 }
483
484 bs->drv = drv;
7267c094 485 bs->opaque = g_malloc0(drv->instance_size);
57915332 486
a6599793 487 if (flags & BDRV_O_CACHE_WB)
57915332
KW
488 bs->enable_write_cache = 1;
489
490 /*
491 * Clear flags that are internal to the block layer before opening the
492 * image.
493 */
494 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
495
496 /*
ebabb67a 497 * Snapshots should be writable.
57915332
KW
498 */
499 if (bs->is_temporary) {
500 open_flags |= BDRV_O_RDWR;
501 }
502
66f82cee
KW
503 /* Open the image, either directly or using a protocol */
504 if (drv->bdrv_file_open) {
505 ret = drv->bdrv_file_open(bs, filename, open_flags);
506 } else {
507 ret = bdrv_file_open(&bs->file, filename, open_flags);
508 if (ret >= 0) {
509 ret = drv->bdrv_open(bs, open_flags);
510 }
511 }
512
57915332
KW
513 if (ret < 0) {
514 goto free_and_fail;
515 }
516
517 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
51762288
SH
518
519 ret = refresh_total_sectors(bs, bs->total_sectors);
520 if (ret < 0) {
521 goto free_and_fail;
57915332 522 }
51762288 523
57915332
KW
524#ifndef _WIN32
525 if (bs->is_temporary) {
526 unlink(filename);
527 }
528#endif
529 return 0;
530
531free_and_fail:
66f82cee
KW
532 if (bs->file) {
533 bdrv_delete(bs->file);
534 bs->file = NULL;
535 }
7267c094 536 g_free(bs->opaque);
57915332
KW
537 bs->opaque = NULL;
538 bs->drv = NULL;
539 return ret;
540}
541
b6ce07aa
KW
542/*
543 * Opens a file using a protocol (file, host_device, nbd, ...)
544 */
83f64091 545int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 546{
83f64091 547 BlockDriverState *bs;
6db95603 548 BlockDriver *drv;
83f64091
FB
549 int ret;
550
b50cbabc 551 drv = bdrv_find_protocol(filename);
6db95603
CH
552 if (!drv) {
553 return -ENOENT;
554 }
555
83f64091 556 bs = bdrv_new("");
b6ce07aa 557 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
558 if (ret < 0) {
559 bdrv_delete(bs);
560 return ret;
3b0d4f61 561 }
71d0770c 562 bs->growable = 1;
83f64091
FB
563 *pbs = bs;
564 return 0;
565}
566
b6ce07aa
KW
567/*
568 * Opens a disk image (raw, qcow2, vmdk, ...)
569 */
d6e9098e
KW
570int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
571 BlockDriver *drv)
ea2384d3 572{
b6ce07aa 573 int ret;
712e7874 574
83f64091 575 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
576 BlockDriverState *bs1;
577 int64_t total_size;
7c96d46e 578 int is_protocol = 0;
91a073a9
KW
579 BlockDriver *bdrv_qcow2;
580 QEMUOptionParameter *options;
b6ce07aa
KW
581 char tmp_filename[PATH_MAX];
582 char backing_filename[PATH_MAX];
3b46e624 583
ea2384d3
FB
584 /* if snapshot, we create a temporary backing file and open it
585 instead of opening 'filename' directly */
33e3963e 586
ea2384d3
FB
587 /* if there is a backing file, use it */
588 bs1 = bdrv_new("");
d6e9098e 589 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 590 if (ret < 0) {
ea2384d3 591 bdrv_delete(bs1);
51d7c00c 592 return ret;
ea2384d3 593 }
3e82990b 594 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
595
596 if (bs1->drv && bs1->drv->protocol_name)
597 is_protocol = 1;
598
ea2384d3 599 bdrv_delete(bs1);
3b46e624 600
ea2384d3 601 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
7c96d46e
AL
602
603 /* Real path is meaningless for protocols */
604 if (is_protocol)
605 snprintf(backing_filename, sizeof(backing_filename),
606 "%s", filename);
114cdfa9
KS
607 else if (!realpath(filename, backing_filename))
608 return -errno;
7c96d46e 609
91a073a9
KW
610 bdrv_qcow2 = bdrv_find_format("qcow2");
611 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
612
3e82990b 613 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
614 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
615 if (drv) {
616 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
617 drv->format_name);
618 }
619
620 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 621 free_option_parameters(options);
51d7c00c
AL
622 if (ret < 0) {
623 return ret;
ea2384d3 624 }
91a073a9 625
ea2384d3 626 filename = tmp_filename;
91a073a9 627 drv = bdrv_qcow2;
ea2384d3
FB
628 bs->is_temporary = 1;
629 }
712e7874 630
b6ce07aa 631 /* Find the right image format driver */
6db95603 632 if (!drv) {
c98ac35d 633 ret = find_image_format(filename, &drv);
51d7c00c 634 }
6987307c 635
51d7c00c 636 if (!drv) {
51d7c00c 637 goto unlink_and_fail;
ea2384d3 638 }
b6ce07aa
KW
639
640 /* Open the image */
641 ret = bdrv_open_common(bs, filename, flags, drv);
642 if (ret < 0) {
6987307c
CH
643 goto unlink_and_fail;
644 }
645
b6ce07aa
KW
646 /* If there is a backing file, use it */
647 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
648 char backing_filename[PATH_MAX];
649 int back_flags;
650 BlockDriver *back_drv = NULL;
651
652 bs->backing_hd = bdrv_new("");
df2dbb4a
SH
653
654 if (path_has_protocol(bs->backing_file)) {
655 pstrcpy(backing_filename, sizeof(backing_filename),
656 bs->backing_file);
657 } else {
658 path_combine(backing_filename, sizeof(backing_filename),
659 filename, bs->backing_file);
660 }
661
662 if (bs->backing_format[0] != '\0') {
b6ce07aa 663 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 664 }
b6ce07aa
KW
665
666 /* backing files always opened read-only */
667 back_flags =
668 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
669
670 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
671 if (ret < 0) {
672 bdrv_close(bs);
673 return ret;
674 }
675 if (bs->is_temporary) {
676 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
677 } else {
678 /* base image inherits from "parent" */
679 bs->backing_hd->keep_read_only = bs->keep_read_only;
680 }
681 }
682
683 if (!bdrv_key_required(bs)) {
7d4b4ba5 684 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
685 }
686
687 return 0;
688
689unlink_and_fail:
690 if (bs->is_temporary) {
691 unlink(filename);
692 }
693 return ret;
694}
695
fc01f7e7
FB
696void bdrv_close(BlockDriverState *bs)
697{
19cb3738 698 if (bs->drv) {
f9092b10
MA
699 if (bs == bs_snapshots) {
700 bs_snapshots = NULL;
701 }
557df6ac 702 if (bs->backing_hd) {
ea2384d3 703 bdrv_delete(bs->backing_hd);
557df6ac
SH
704 bs->backing_hd = NULL;
705 }
ea2384d3 706 bs->drv->bdrv_close(bs);
7267c094 707 g_free(bs->opaque);
ea2384d3
FB
708#ifdef _WIN32
709 if (bs->is_temporary) {
710 unlink(bs->filename);
711 }
67b915a5 712#endif
ea2384d3
FB
713 bs->opaque = NULL;
714 bs->drv = NULL;
b338082b 715
66f82cee
KW
716 if (bs->file != NULL) {
717 bdrv_close(bs->file);
718 }
719
7d4b4ba5 720 bdrv_dev_change_media_cb(bs, false);
b338082b
FB
721 }
722}
723
2bc93fed
MK
724void bdrv_close_all(void)
725{
726 BlockDriverState *bs;
727
728 QTAILQ_FOREACH(bs, &bdrv_states, list) {
729 bdrv_close(bs);
730 }
731}
732
d22b2f41
RH
733/* make a BlockDriverState anonymous by removing from bdrv_state list.
734 Also, NULL terminate the device_name to prevent double remove */
735void bdrv_make_anon(BlockDriverState *bs)
736{
737 if (bs->device_name[0] != '\0') {
738 QTAILQ_REMOVE(&bdrv_states, bs, list);
739 }
740 bs->device_name[0] = '\0';
741}
742
b338082b
FB
743void bdrv_delete(BlockDriverState *bs)
744{
fa879d62 745 assert(!bs->dev);
18846dee 746
1b7bdbc1 747 /* remove from list, if necessary */
d22b2f41 748 bdrv_make_anon(bs);
34c6f050 749
b338082b 750 bdrv_close(bs);
66f82cee
KW
751 if (bs->file != NULL) {
752 bdrv_delete(bs->file);
753 }
754
f9092b10 755 assert(bs != bs_snapshots);
7267c094 756 g_free(bs);
fc01f7e7
FB
757}
758
fa879d62
MA
759int bdrv_attach_dev(BlockDriverState *bs, void *dev)
760/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 761{
fa879d62 762 if (bs->dev) {
18846dee
MA
763 return -EBUSY;
764 }
fa879d62 765 bs->dev = dev;
28a7282a 766 bdrv_iostatus_reset(bs);
18846dee
MA
767 return 0;
768}
769
fa879d62
MA
770/* TODO qdevified devices don't use this, remove when devices are qdevified */
771void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 772{
fa879d62
MA
773 if (bdrv_attach_dev(bs, dev) < 0) {
774 abort();
775 }
776}
777
778void bdrv_detach_dev(BlockDriverState *bs, void *dev)
779/* TODO change to DeviceState *dev when all users are qdevified */
780{
781 assert(bs->dev == dev);
782 bs->dev = NULL;
0e49de52
MA
783 bs->dev_ops = NULL;
784 bs->dev_opaque = NULL;
29e05f20 785 bs->buffer_alignment = 512;
18846dee
MA
786}
787
fa879d62
MA
788/* TODO change to return DeviceState * when all users are qdevified */
789void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 790{
fa879d62 791 return bs->dev;
18846dee
MA
792}
793
0e49de52
MA
794void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
795 void *opaque)
796{
797 bs->dev_ops = ops;
798 bs->dev_opaque = opaque;
2c6942fa
MA
799 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
800 bs_snapshots = NULL;
801 }
0e49de52
MA
802}
803
7d4b4ba5 804static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 805{
145feb17 806 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
7d4b4ba5 807 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
145feb17
MA
808 }
809}
810
2c6942fa
MA
811bool bdrv_dev_has_removable_media(BlockDriverState *bs)
812{
813 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
814}
815
e4def80b
MA
816bool bdrv_dev_is_tray_open(BlockDriverState *bs)
817{
818 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
819 return bs->dev_ops->is_tray_open(bs->dev_opaque);
820 }
821 return false;
822}
823
145feb17
MA
824static void bdrv_dev_resize_cb(BlockDriverState *bs)
825{
826 if (bs->dev_ops && bs->dev_ops->resize_cb) {
827 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
828 }
829}
830
f107639a
MA
831bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
832{
833 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
834 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
835 }
836 return false;
837}
838
e97fc193
AL
839/*
840 * Run consistency checks on an image
841 *
e076f338 842 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 843 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 844 * check are stored in res.
e97fc193 845 */
e076f338 846int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
e97fc193
AL
847{
848 if (bs->drv->bdrv_check == NULL) {
849 return -ENOTSUP;
850 }
851
e076f338 852 memset(res, 0, sizeof(*res));
9ac228e0 853 return bs->drv->bdrv_check(bs, res);
e97fc193
AL
854}
855
8a426614
KW
856#define COMMIT_BUF_SECTORS 2048
857
33e3963e
FB
858/* commit COW file into the raw image */
859int bdrv_commit(BlockDriverState *bs)
860{
19cb3738 861 BlockDriver *drv = bs->drv;
ee181196 862 BlockDriver *backing_drv;
8a426614
KW
863 int64_t sector, total_sectors;
864 int n, ro, open_flags;
4dca4b63 865 int ret = 0, rw_ret = 0;
8a426614 866 uint8_t *buf;
4dca4b63
NS
867 char filename[1024];
868 BlockDriverState *bs_rw, *bs_ro;
33e3963e 869
19cb3738
FB
870 if (!drv)
871 return -ENOMEDIUM;
4dca4b63
NS
872
873 if (!bs->backing_hd) {
874 return -ENOTSUP;
33e3963e
FB
875 }
876
4dca4b63
NS
877 if (bs->backing_hd->keep_read_only) {
878 return -EACCES;
879 }
ee181196
KW
880
881 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
882 ro = bs->backing_hd->read_only;
883 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
884 open_flags = bs->backing_hd->open_flags;
885
886 if (ro) {
887 /* re-open as RW */
888 bdrv_delete(bs->backing_hd);
889 bs->backing_hd = NULL;
890 bs_rw = bdrv_new("");
ee181196
KW
891 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
892 backing_drv);
4dca4b63
NS
893 if (rw_ret < 0) {
894 bdrv_delete(bs_rw);
895 /* try to re-open read-only */
896 bs_ro = bdrv_new("");
ee181196
KW
897 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
898 backing_drv);
4dca4b63
NS
899 if (ret < 0) {
900 bdrv_delete(bs_ro);
901 /* drive not functional anymore */
902 bs->drv = NULL;
903 return ret;
904 }
905 bs->backing_hd = bs_ro;
906 return rw_ret;
907 }
908 bs->backing_hd = bs_rw;
ea2384d3 909 }
33e3963e 910
6ea44308 911 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 912 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
913
914 for (sector = 0; sector < total_sectors; sector += n) {
915 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
916
917 if (bdrv_read(bs, sector, buf, n) != 0) {
918 ret = -EIO;
919 goto ro_cleanup;
920 }
921
922 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
923 ret = -EIO;
924 goto ro_cleanup;
925 }
ea2384d3 926 }
33e3963e 927 }
95389c86 928
1d44952f
CH
929 if (drv->bdrv_make_empty) {
930 ret = drv->bdrv_make_empty(bs);
931 bdrv_flush(bs);
932 }
95389c86 933
3f5075ae
CH
934 /*
935 * Make sure all data we wrote to the backing device is actually
936 * stable on disk.
937 */
938 if (bs->backing_hd)
939 bdrv_flush(bs->backing_hd);
4dca4b63
NS
940
941ro_cleanup:
7267c094 942 g_free(buf);
4dca4b63
NS
943
944 if (ro) {
945 /* re-open as RO */
946 bdrv_delete(bs->backing_hd);
947 bs->backing_hd = NULL;
948 bs_ro = bdrv_new("");
ee181196
KW
949 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
950 backing_drv);
4dca4b63
NS
951 if (ret < 0) {
952 bdrv_delete(bs_ro);
953 /* drive not functional anymore */
954 bs->drv = NULL;
955 return ret;
956 }
957 bs->backing_hd = bs_ro;
958 bs->backing_hd->keep_read_only = 0;
959 }
960
1d44952f 961 return ret;
33e3963e
FB
962}
963
6ab4b5ab
MA
964void bdrv_commit_all(void)
965{
966 BlockDriverState *bs;
967
968 QTAILQ_FOREACH(bs, &bdrv_states, list) {
969 bdrv_commit(bs);
970 }
971}
972
756e6736
KW
973/*
974 * Return values:
975 * 0 - success
976 * -EINVAL - backing format specified, but no file
977 * -ENOSPC - can't update the backing file because no space is left in the
978 * image file header
979 * -ENOTSUP - format driver doesn't support changing the backing file
980 */
981int bdrv_change_backing_file(BlockDriverState *bs,
982 const char *backing_file, const char *backing_fmt)
983{
984 BlockDriver *drv = bs->drv;
985
986 if (drv->bdrv_change_backing_file != NULL) {
987 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
988 } else {
989 return -ENOTSUP;
990 }
991}
992
71d0770c
AL
993static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
994 size_t size)
995{
996 int64_t len;
997
998 if (!bdrv_is_inserted(bs))
999 return -ENOMEDIUM;
1000
1001 if (bs->growable)
1002 return 0;
1003
1004 len = bdrv_getlength(bs);
1005
fbb7b4e0
KW
1006 if (offset < 0)
1007 return -EIO;
1008
1009 if ((offset > len) || (len - offset < size))
71d0770c
AL
1010 return -EIO;
1011
1012 return 0;
1013}
1014
1015static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1016 int nb_sectors)
1017{
eb5a3165
JS
1018 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1019 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1020}
1021
1c9805a3
SH
1022typedef struct RwCo {
1023 BlockDriverState *bs;
1024 int64_t sector_num;
1025 int nb_sectors;
1026 QEMUIOVector *qiov;
1027 bool is_write;
1028 int ret;
1029} RwCo;
1030
1031static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 1032{
1c9805a3 1033 RwCo *rwco = opaque;
ea2384d3 1034
1c9805a3
SH
1035 if (!rwco->is_write) {
1036 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1037 rwco->nb_sectors, rwco->qiov);
1038 } else {
1039 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1040 rwco->nb_sectors, rwco->qiov);
1041 }
1042}
e7a8a783 1043
1c9805a3
SH
1044/*
1045 * Process a synchronous request using coroutines
1046 */
1047static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1048 int nb_sectors, bool is_write)
1049{
1050 QEMUIOVector qiov;
1051 struct iovec iov = {
1052 .iov_base = (void *)buf,
1053 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1054 };
1055 Coroutine *co;
1056 RwCo rwco = {
1057 .bs = bs,
1058 .sector_num = sector_num,
1059 .nb_sectors = nb_sectors,
1060 .qiov = &qiov,
1061 .is_write = is_write,
1062 .ret = NOT_DONE,
1063 };
e7a8a783 1064
1c9805a3 1065 qemu_iovec_init_external(&qiov, &iov, 1);
e7a8a783 1066
1c9805a3
SH
1067 if (qemu_in_coroutine()) {
1068 /* Fast-path if already in coroutine context */
1069 bdrv_rw_co_entry(&rwco);
1070 } else {
1071 co = qemu_coroutine_create(bdrv_rw_co_entry);
1072 qemu_coroutine_enter(co, &rwco);
1073 while (rwco.ret == NOT_DONE) {
1074 qemu_aio_wait();
1075 }
1076 }
1077 return rwco.ret;
1078}
b338082b 1079
1c9805a3
SH
1080/* return < 0 if error. See bdrv_write() for the return codes */
1081int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1082 uint8_t *buf, int nb_sectors)
1083{
1084 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
fc01f7e7
FB
1085}
1086
7cd1e32a 1087static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1088 int nb_sectors, int dirty)
7cd1e32a
LS
1089{
1090 int64_t start, end;
c6d22830 1091 unsigned long val, idx, bit;
a55eb92c 1092
6ea44308 1093 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1094 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1095
1096 for (; start <= end; start++) {
c6d22830
JK
1097 idx = start / (sizeof(unsigned long) * 8);
1098 bit = start % (sizeof(unsigned long) * 8);
1099 val = bs->dirty_bitmap[idx];
1100 if (dirty) {
6d59fec1 1101 if (!(val & (1UL << bit))) {
aaa0eb75 1102 bs->dirty_count++;
6d59fec1 1103 val |= 1UL << bit;
aaa0eb75 1104 }
c6d22830 1105 } else {
6d59fec1 1106 if (val & (1UL << bit)) {
aaa0eb75 1107 bs->dirty_count--;
6d59fec1 1108 val &= ~(1UL << bit);
aaa0eb75 1109 }
c6d22830
JK
1110 }
1111 bs->dirty_bitmap[idx] = val;
7cd1e32a
LS
1112 }
1113}
1114
5fafdf24 1115/* Return < 0 if error. Important errors are:
19cb3738
FB
1116 -EIO generic I/O error (may happen for all errors)
1117 -ENOMEDIUM No media inserted.
1118 -EINVAL Invalid sector number or nb_sectors
1119 -EACCES Trying to write a read-only device
1120*/
5fafdf24 1121int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1122 const uint8_t *buf, int nb_sectors)
1123{
1c9805a3 1124 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
83f64091
FB
1125}
1126
eda578e5
AL
1127int bdrv_pread(BlockDriverState *bs, int64_t offset,
1128 void *buf, int count1)
83f64091 1129{
6ea44308 1130 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1131 int len, nb_sectors, count;
1132 int64_t sector_num;
9a8c4cce 1133 int ret;
83f64091
FB
1134
1135 count = count1;
1136 /* first read to align to sector start */
6ea44308 1137 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1138 if (len > count)
1139 len = count;
6ea44308 1140 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1141 if (len > 0) {
9a8c4cce
KW
1142 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1143 return ret;
6ea44308 1144 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1145 count -= len;
1146 if (count == 0)
1147 return count1;
1148 sector_num++;
1149 buf += len;
1150 }
1151
1152 /* read the sectors "in place" */
6ea44308 1153 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1154 if (nb_sectors > 0) {
9a8c4cce
KW
1155 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1156 return ret;
83f64091 1157 sector_num += nb_sectors;
6ea44308 1158 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1159 buf += len;
1160 count -= len;
1161 }
1162
1163 /* add data from the last sector */
1164 if (count > 0) {
9a8c4cce
KW
1165 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1166 return ret;
83f64091
FB
1167 memcpy(buf, tmp_buf, count);
1168 }
1169 return count1;
1170}
1171
eda578e5
AL
1172int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1173 const void *buf, int count1)
83f64091 1174{
6ea44308 1175 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1176 int len, nb_sectors, count;
1177 int64_t sector_num;
9a8c4cce 1178 int ret;
83f64091
FB
1179
1180 count = count1;
1181 /* first write to align to sector start */
6ea44308 1182 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1183 if (len > count)
1184 len = count;
6ea44308 1185 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1186 if (len > 0) {
9a8c4cce
KW
1187 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1188 return ret;
6ea44308 1189 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1190 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1191 return ret;
83f64091
FB
1192 count -= len;
1193 if (count == 0)
1194 return count1;
1195 sector_num++;
1196 buf += len;
1197 }
1198
1199 /* write the sectors "in place" */
6ea44308 1200 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1201 if (nb_sectors > 0) {
9a8c4cce
KW
1202 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1203 return ret;
83f64091 1204 sector_num += nb_sectors;
6ea44308 1205 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1206 buf += len;
1207 count -= len;
1208 }
1209
1210 /* add data from the last sector */
1211 if (count > 0) {
9a8c4cce
KW
1212 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1213 return ret;
83f64091 1214 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1215 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1216 return ret;
83f64091
FB
1217 }
1218 return count1;
1219}
83f64091 1220
f08145fe
KW
1221/*
1222 * Writes to the file and ensures that no writes are reordered across this
1223 * request (acts as a barrier)
1224 *
1225 * Returns 0 on success, -errno in error cases.
1226 */
1227int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1228 const void *buf, int count)
1229{
1230 int ret;
1231
1232 ret = bdrv_pwrite(bs, offset, buf, count);
1233 if (ret < 0) {
1234 return ret;
1235 }
1236
92196b2f
SH
1237 /* No flush needed for cache modes that use O_DSYNC */
1238 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
f08145fe
KW
1239 bdrv_flush(bs);
1240 }
1241
1242 return 0;
1243}
1244
c5fbe571
SH
1245/*
1246 * Handle a read request in coroutine context
1247 */
1248static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1249 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
da1fa91d
KW
1250{
1251 BlockDriver *drv = bs->drv;
1252
da1fa91d
KW
1253 if (!drv) {
1254 return -ENOMEDIUM;
1255 }
1256 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1257 return -EIO;
1258 }
1259
1260 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1261}
1262
c5fbe571 1263int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
1264 int nb_sectors, QEMUIOVector *qiov)
1265{
c5fbe571 1266 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 1267
c5fbe571
SH
1268 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1269}
1270
1271/*
1272 * Handle a write request in coroutine context
1273 */
1274static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1275 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1276{
1277 BlockDriver *drv = bs->drv;
6b7cb247 1278 int ret;
da1fa91d
KW
1279
1280 if (!bs->drv) {
1281 return -ENOMEDIUM;
1282 }
1283 if (bs->read_only) {
1284 return -EACCES;
1285 }
1286 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1287 return -EIO;
1288 }
1289
6b7cb247
SH
1290 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1291
da1fa91d
KW
1292 if (bs->dirty_bitmap) {
1293 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1294 }
1295
1296 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1297 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1298 }
1299
6b7cb247 1300 return ret;
da1fa91d
KW
1301}
1302
c5fbe571
SH
1303int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1304 int nb_sectors, QEMUIOVector *qiov)
1305{
1306 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1307
1308 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1309}
1310
83f64091
FB
1311/**
1312 * Truncate file to 'offset' bytes (needed only for file protocols)
1313 */
1314int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1315{
1316 BlockDriver *drv = bs->drv;
51762288 1317 int ret;
83f64091 1318 if (!drv)
19cb3738 1319 return -ENOMEDIUM;
83f64091
FB
1320 if (!drv->bdrv_truncate)
1321 return -ENOTSUP;
59f2689d
NS
1322 if (bs->read_only)
1323 return -EACCES;
8591675f
MT
1324 if (bdrv_in_use(bs))
1325 return -EBUSY;
51762288
SH
1326 ret = drv->bdrv_truncate(bs, offset);
1327 if (ret == 0) {
1328 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 1329 bdrv_dev_resize_cb(bs);
51762288
SH
1330 }
1331 return ret;
83f64091
FB
1332}
1333
4a1d5e1f
FZ
1334/**
1335 * Length of a allocated file in bytes. Sparse files are counted by actual
1336 * allocated space. Return < 0 if error or unknown.
1337 */
1338int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1339{
1340 BlockDriver *drv = bs->drv;
1341 if (!drv) {
1342 return -ENOMEDIUM;
1343 }
1344 if (drv->bdrv_get_allocated_file_size) {
1345 return drv->bdrv_get_allocated_file_size(bs);
1346 }
1347 if (bs->file) {
1348 return bdrv_get_allocated_file_size(bs->file);
1349 }
1350 return -ENOTSUP;
1351}
1352
83f64091
FB
1353/**
1354 * Length of a file in bytes. Return < 0 if error or unknown.
1355 */
1356int64_t bdrv_getlength(BlockDriverState *bs)
1357{
1358 BlockDriver *drv = bs->drv;
1359 if (!drv)
19cb3738 1360 return -ENOMEDIUM;
51762288 1361
2c6942fa 1362 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
1363 if (drv->bdrv_getlength) {
1364 return drv->bdrv_getlength(bs);
1365 }
83f64091 1366 }
46a4e4e6 1367 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
1368}
1369
19cb3738 1370/* return 0 as number of sectors if no device present or error */
96b8f136 1371void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 1372{
19cb3738
FB
1373 int64_t length;
1374 length = bdrv_getlength(bs);
1375 if (length < 0)
1376 length = 0;
1377 else
6ea44308 1378 length = length >> BDRV_SECTOR_BITS;
19cb3738 1379 *nb_sectors_ptr = length;
fc01f7e7 1380}
cf98951b 1381
f3d54fc4
AL
1382struct partition {
1383 uint8_t boot_ind; /* 0x80 - active */
1384 uint8_t head; /* starting head */
1385 uint8_t sector; /* starting sector */
1386 uint8_t cyl; /* starting cylinder */
1387 uint8_t sys_ind; /* What partition type */
1388 uint8_t end_head; /* end head */
1389 uint8_t end_sector; /* end sector */
1390 uint8_t end_cyl; /* end cylinder */
1391 uint32_t start_sect; /* starting sector counting from 0 */
1392 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 1393} QEMU_PACKED;
f3d54fc4
AL
1394
1395/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1396static int guess_disk_lchs(BlockDriverState *bs,
1397 int *pcylinders, int *pheads, int *psectors)
1398{
eb5a3165 1399 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
1400 int ret, i, heads, sectors, cylinders;
1401 struct partition *p;
1402 uint32_t nr_sects;
a38131b6 1403 uint64_t nb_sectors;
f3d54fc4
AL
1404
1405 bdrv_get_geometry(bs, &nb_sectors);
1406
1407 ret = bdrv_read(bs, 0, buf, 1);
1408 if (ret < 0)
1409 return -1;
1410 /* test msdos magic */
1411 if (buf[510] != 0x55 || buf[511] != 0xaa)
1412 return -1;
1413 for(i = 0; i < 4; i++) {
1414 p = ((struct partition *)(buf + 0x1be)) + i;
1415 nr_sects = le32_to_cpu(p->nr_sects);
1416 if (nr_sects && p->end_head) {
1417 /* We make the assumption that the partition terminates on
1418 a cylinder boundary */
1419 heads = p->end_head + 1;
1420 sectors = p->end_sector & 63;
1421 if (sectors == 0)
1422 continue;
1423 cylinders = nb_sectors / (heads * sectors);
1424 if (cylinders < 1 || cylinders > 16383)
1425 continue;
1426 *pheads = heads;
1427 *psectors = sectors;
1428 *pcylinders = cylinders;
1429#if 0
1430 printf("guessed geometry: LCHS=%d %d %d\n",
1431 cylinders, heads, sectors);
1432#endif
1433 return 0;
1434 }
1435 }
1436 return -1;
1437}
1438
1439void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1440{
1441 int translation, lba_detected = 0;
1442 int cylinders, heads, secs;
a38131b6 1443 uint64_t nb_sectors;
f3d54fc4
AL
1444
1445 /* if a geometry hint is available, use it */
1446 bdrv_get_geometry(bs, &nb_sectors);
1447 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1448 translation = bdrv_get_translation_hint(bs);
1449 if (cylinders != 0) {
1450 *pcyls = cylinders;
1451 *pheads = heads;
1452 *psecs = secs;
1453 } else {
1454 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1455 if (heads > 16) {
1456 /* if heads > 16, it means that a BIOS LBA
1457 translation was active, so the default
1458 hardware geometry is OK */
1459 lba_detected = 1;
1460 goto default_geometry;
1461 } else {
1462 *pcyls = cylinders;
1463 *pheads = heads;
1464 *psecs = secs;
1465 /* disable any translation to be in sync with
1466 the logical geometry */
1467 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1468 bdrv_set_translation_hint(bs,
1469 BIOS_ATA_TRANSLATION_NONE);
1470 }
1471 }
1472 } else {
1473 default_geometry:
1474 /* if no geometry, use a standard physical disk geometry */
1475 cylinders = nb_sectors / (16 * 63);
1476
1477 if (cylinders > 16383)
1478 cylinders = 16383;
1479 else if (cylinders < 2)
1480 cylinders = 2;
1481 *pcyls = cylinders;
1482 *pheads = 16;
1483 *psecs = 63;
1484 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1485 if ((*pcyls * *pheads) <= 131072) {
1486 bdrv_set_translation_hint(bs,
1487 BIOS_ATA_TRANSLATION_LARGE);
1488 } else {
1489 bdrv_set_translation_hint(bs,
1490 BIOS_ATA_TRANSLATION_LBA);
1491 }
1492 }
1493 }
1494 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1495 }
1496}
1497
5fafdf24 1498void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
1499 int cyls, int heads, int secs)
1500{
1501 bs->cyls = cyls;
1502 bs->heads = heads;
1503 bs->secs = secs;
1504}
1505
46d4767d
FB
1506void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1507{
1508 bs->translation = translation;
1509}
1510
5fafdf24 1511void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
1512 int *pcyls, int *pheads, int *psecs)
1513{
1514 *pcyls = bs->cyls;
1515 *pheads = bs->heads;
1516 *psecs = bs->secs;
1517}
1518
5bbdbb46
BS
1519/* Recognize floppy formats */
1520typedef struct FDFormat {
1521 FDriveType drive;
1522 uint8_t last_sect;
1523 uint8_t max_track;
1524 uint8_t max_head;
1525} FDFormat;
1526
1527static const FDFormat fd_formats[] = {
1528 /* First entry is default format */
1529 /* 1.44 MB 3"1/2 floppy disks */
1530 { FDRIVE_DRV_144, 18, 80, 1, },
1531 { FDRIVE_DRV_144, 20, 80, 1, },
1532 { FDRIVE_DRV_144, 21, 80, 1, },
1533 { FDRIVE_DRV_144, 21, 82, 1, },
1534 { FDRIVE_DRV_144, 21, 83, 1, },
1535 { FDRIVE_DRV_144, 22, 80, 1, },
1536 { FDRIVE_DRV_144, 23, 80, 1, },
1537 { FDRIVE_DRV_144, 24, 80, 1, },
1538 /* 2.88 MB 3"1/2 floppy disks */
1539 { FDRIVE_DRV_288, 36, 80, 1, },
1540 { FDRIVE_DRV_288, 39, 80, 1, },
1541 { FDRIVE_DRV_288, 40, 80, 1, },
1542 { FDRIVE_DRV_288, 44, 80, 1, },
1543 { FDRIVE_DRV_288, 48, 80, 1, },
1544 /* 720 kB 3"1/2 floppy disks */
1545 { FDRIVE_DRV_144, 9, 80, 1, },
1546 { FDRIVE_DRV_144, 10, 80, 1, },
1547 { FDRIVE_DRV_144, 10, 82, 1, },
1548 { FDRIVE_DRV_144, 10, 83, 1, },
1549 { FDRIVE_DRV_144, 13, 80, 1, },
1550 { FDRIVE_DRV_144, 14, 80, 1, },
1551 /* 1.2 MB 5"1/4 floppy disks */
1552 { FDRIVE_DRV_120, 15, 80, 1, },
1553 { FDRIVE_DRV_120, 18, 80, 1, },
1554 { FDRIVE_DRV_120, 18, 82, 1, },
1555 { FDRIVE_DRV_120, 18, 83, 1, },
1556 { FDRIVE_DRV_120, 20, 80, 1, },
1557 /* 720 kB 5"1/4 floppy disks */
1558 { FDRIVE_DRV_120, 9, 80, 1, },
1559 { FDRIVE_DRV_120, 11, 80, 1, },
1560 /* 360 kB 5"1/4 floppy disks */
1561 { FDRIVE_DRV_120, 9, 40, 1, },
1562 { FDRIVE_DRV_120, 9, 40, 0, },
1563 { FDRIVE_DRV_120, 10, 41, 1, },
1564 { FDRIVE_DRV_120, 10, 42, 1, },
1565 /* 320 kB 5"1/4 floppy disks */
1566 { FDRIVE_DRV_120, 8, 40, 1, },
1567 { FDRIVE_DRV_120, 8, 40, 0, },
1568 /* 360 kB must match 5"1/4 better than 3"1/2... */
1569 { FDRIVE_DRV_144, 9, 80, 0, },
1570 /* end */
1571 { FDRIVE_DRV_NONE, -1, -1, 0, },
1572};
1573
1574void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1575 int *max_track, int *last_sect,
1576 FDriveType drive_in, FDriveType *drive)
1577{
1578 const FDFormat *parse;
1579 uint64_t nb_sectors, size;
1580 int i, first_match, match;
1581
1582 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1583 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1584 /* User defined disk */
1585 } else {
1586 bdrv_get_geometry(bs, &nb_sectors);
1587 match = -1;
1588 first_match = -1;
1589 for (i = 0; ; i++) {
1590 parse = &fd_formats[i];
1591 if (parse->drive == FDRIVE_DRV_NONE) {
1592 break;
1593 }
1594 if (drive_in == parse->drive ||
1595 drive_in == FDRIVE_DRV_NONE) {
1596 size = (parse->max_head + 1) * parse->max_track *
1597 parse->last_sect;
1598 if (nb_sectors == size) {
1599 match = i;
1600 break;
1601 }
1602 if (first_match == -1) {
1603 first_match = i;
1604 }
1605 }
1606 }
1607 if (match == -1) {
1608 if (first_match == -1) {
1609 match = 1;
1610 } else {
1611 match = first_match;
1612 }
1613 parse = &fd_formats[match];
1614 }
1615 *nb_heads = parse->max_head + 1;
1616 *max_track = parse->max_track;
1617 *last_sect = parse->last_sect;
1618 *drive = parse->drive;
1619 }
1620}
1621
46d4767d
FB
1622int bdrv_get_translation_hint(BlockDriverState *bs)
1623{
1624 return bs->translation;
1625}
1626
abd7f68d
MA
1627void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1628 BlockErrorAction on_write_error)
1629{
1630 bs->on_read_error = on_read_error;
1631 bs->on_write_error = on_write_error;
1632}
1633
1634BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1635{
1636 return is_read ? bs->on_read_error : bs->on_write_error;
1637}
1638
b338082b
FB
1639int bdrv_is_read_only(BlockDriverState *bs)
1640{
1641 return bs->read_only;
1642}
1643
985a03b0
TS
1644int bdrv_is_sg(BlockDriverState *bs)
1645{
1646 return bs->sg;
1647}
1648
e900a7b7
CH
1649int bdrv_enable_write_cache(BlockDriverState *bs)
1650{
1651 return bs->enable_write_cache;
1652}
1653
ea2384d3
FB
1654int bdrv_is_encrypted(BlockDriverState *bs)
1655{
1656 if (bs->backing_hd && bs->backing_hd->encrypted)
1657 return 1;
1658 return bs->encrypted;
1659}
1660
c0f4ce77
AL
1661int bdrv_key_required(BlockDriverState *bs)
1662{
1663 BlockDriverState *backing_hd = bs->backing_hd;
1664
1665 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1666 return 1;
1667 return (bs->encrypted && !bs->valid_key);
1668}
1669
ea2384d3
FB
1670int bdrv_set_key(BlockDriverState *bs, const char *key)
1671{
1672 int ret;
1673 if (bs->backing_hd && bs->backing_hd->encrypted) {
1674 ret = bdrv_set_key(bs->backing_hd, key);
1675 if (ret < 0)
1676 return ret;
1677 if (!bs->encrypted)
1678 return 0;
1679 }
fd04a2ae
SH
1680 if (!bs->encrypted) {
1681 return -EINVAL;
1682 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1683 return -ENOMEDIUM;
1684 }
c0f4ce77 1685 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
1686 if (ret < 0) {
1687 bs->valid_key = 0;
1688 } else if (!bs->valid_key) {
1689 bs->valid_key = 1;
1690 /* call the change callback now, we skipped it on open */
7d4b4ba5 1691 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 1692 }
c0f4ce77 1693 return ret;
ea2384d3
FB
1694}
1695
1696void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1697{
19cb3738 1698 if (!bs->drv) {
ea2384d3
FB
1699 buf[0] = '\0';
1700 } else {
1701 pstrcpy(buf, buf_size, bs->drv->format_name);
1702 }
1703}
1704
5fafdf24 1705void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
1706 void *opaque)
1707{
1708 BlockDriver *drv;
1709
8a22f02a 1710 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
1711 it(opaque, drv->format_name);
1712 }
1713}
1714
b338082b
FB
1715BlockDriverState *bdrv_find(const char *name)
1716{
1717 BlockDriverState *bs;
1718
1b7bdbc1
SH
1719 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1720 if (!strcmp(name, bs->device_name)) {
b338082b 1721 return bs;
1b7bdbc1 1722 }
b338082b
FB
1723 }
1724 return NULL;
1725}
1726
2f399b0a
MA
1727BlockDriverState *bdrv_next(BlockDriverState *bs)
1728{
1729 if (!bs) {
1730 return QTAILQ_FIRST(&bdrv_states);
1731 }
1732 return QTAILQ_NEXT(bs, list);
1733}
1734
51de9760 1735void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
1736{
1737 BlockDriverState *bs;
1738
1b7bdbc1 1739 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 1740 it(opaque, bs);
81d0912d
FB
1741 }
1742}
1743
ea2384d3
FB
1744const char *bdrv_get_device_name(BlockDriverState *bs)
1745{
1746 return bs->device_name;
1747}
1748
c6ca28d6
AL
1749void bdrv_flush_all(void)
1750{
1751 BlockDriverState *bs;
1752
1b7bdbc1 1753 QTAILQ_FOREACH(bs, &bdrv_states, list) {
c602a489 1754 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
c6ca28d6 1755 bdrv_flush(bs);
1b7bdbc1
SH
1756 }
1757 }
c6ca28d6
AL
1758}
1759
f2feebbd
KW
1760int bdrv_has_zero_init(BlockDriverState *bs)
1761{
1762 assert(bs->drv);
1763
336c1c12
KW
1764 if (bs->drv->bdrv_has_zero_init) {
1765 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
1766 }
1767
1768 return 1;
1769}
1770
f58c7b35
TS
1771/*
1772 * Returns true iff the specified sector is present in the disk image. Drivers
1773 * not implementing the functionality are assumed to not support backing files,
1774 * hence all their sectors are reported as allocated.
1775 *
1776 * 'pnum' is set to the number of sectors (including and immediately following
1777 * the specified sector) that are known to be in the same
1778 * allocated/unallocated state.
1779 *
1780 * 'nb_sectors' is the max value 'pnum' should be set to.
1781 */
1782int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1783 int *pnum)
1784{
1785 int64_t n;
1786 if (!bs->drv->bdrv_is_allocated) {
1787 if (sector_num >= bs->total_sectors) {
1788 *pnum = 0;
1789 return 0;
1790 }
1791 n = bs->total_sectors - sector_num;
1792 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1793 return 1;
1794 }
1795 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1796}
1797
2582bfed
LC
1798void bdrv_mon_event(const BlockDriverState *bdrv,
1799 BlockMonEventAction action, int is_read)
1800{
1801 QObject *data;
1802 const char *action_str;
1803
1804 switch (action) {
1805 case BDRV_ACTION_REPORT:
1806 action_str = "report";
1807 break;
1808 case BDRV_ACTION_IGNORE:
1809 action_str = "ignore";
1810 break;
1811 case BDRV_ACTION_STOP:
1812 action_str = "stop";
1813 break;
1814 default:
1815 abort();
1816 }
1817
1818 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1819 bdrv->device_name,
1820 action_str,
1821 is_read ? "read" : "write");
1822 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1823
1824 qobject_decref(data);
1825}
1826
d15e5465 1827static void bdrv_print_dict(QObject *obj, void *opaque)
b338082b 1828{
d15e5465
LC
1829 QDict *bs_dict;
1830 Monitor *mon = opaque;
1831
1832 bs_dict = qobject_to_qdict(obj);
1833
d8aeeb31 1834 monitor_printf(mon, "%s: removable=%d",
d15e5465 1835 qdict_get_str(bs_dict, "device"),
d15e5465
LC
1836 qdict_get_bool(bs_dict, "removable"));
1837
1838 if (qdict_get_bool(bs_dict, "removable")) {
1839 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
e4def80b
MA
1840 monitor_printf(mon, " tray-open=%d",
1841 qdict_get_bool(bs_dict, "tray-open"));
d15e5465 1842 }
d2078cc2
LC
1843
1844 if (qdict_haskey(bs_dict, "io-status")) {
1845 monitor_printf(mon, " io-status=%s", qdict_get_str(bs_dict, "io-status"));
1846 }
1847
d15e5465
LC
1848 if (qdict_haskey(bs_dict, "inserted")) {
1849 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1850
1851 monitor_printf(mon, " file=");
1852 monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1853 if (qdict_haskey(qdict, "backing_file")) {
1854 monitor_printf(mon, " backing_file=");
1855 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1856 }
1857 monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1858 qdict_get_bool(qdict, "ro"),
1859 qdict_get_str(qdict, "drv"),
1860 qdict_get_bool(qdict, "encrypted"));
1861 } else {
1862 monitor_printf(mon, " [not inserted]");
1863 }
1864
1865 monitor_printf(mon, "\n");
1866}
1867
1868void bdrv_info_print(Monitor *mon, const QObject *data)
1869{
1870 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1871}
1872
f04ef601
LC
1873static const char *const io_status_name[BDRV_IOS_MAX] = {
1874 [BDRV_IOS_OK] = "ok",
1875 [BDRV_IOS_FAILED] = "failed",
1876 [BDRV_IOS_ENOSPC] = "nospace",
1877};
1878
d15e5465
LC
1879void bdrv_info(Monitor *mon, QObject **ret_data)
1880{
1881 QList *bs_list;
b338082b
FB
1882 BlockDriverState *bs;
1883
d15e5465
LC
1884 bs_list = qlist_new();
1885
1b7bdbc1 1886 QTAILQ_FOREACH(bs, &bdrv_states, list) {
d15e5465 1887 QObject *bs_obj;
e4def80b 1888 QDict *bs_dict;
d15e5465 1889
d8aeeb31 1890 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
d15e5465 1891 "'removable': %i, 'locked': %i }",
2c6942fa
MA
1892 bs->device_name,
1893 bdrv_dev_has_removable_media(bs),
f107639a 1894 bdrv_dev_is_medium_locked(bs));
e4def80b 1895 bs_dict = qobject_to_qdict(bs_obj);
d15e5465 1896
e4def80b
MA
1897 if (bdrv_dev_has_removable_media(bs)) {
1898 qdict_put(bs_dict, "tray-open",
1899 qbool_from_int(bdrv_dev_is_tray_open(bs)));
1900 }
f04ef601
LC
1901
1902 if (bdrv_iostatus_is_enabled(bs)) {
1903 qdict_put(bs_dict, "io-status",
1904 qstring_from_str(io_status_name[bs->iostatus]));
1905 }
1906
19cb3738 1907 if (bs->drv) {
d15e5465 1908 QObject *obj;
d15e5465
LC
1909
1910 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1911 "'encrypted': %i }",
1912 bs->filename, bs->read_only,
1913 bs->drv->format_name,
1914 bdrv_is_encrypted(bs));
fef30743 1915 if (bs->backing_file[0] != '\0') {
d15e5465
LC
1916 QDict *qdict = qobject_to_qdict(obj);
1917 qdict_put(qdict, "backing_file",
1918 qstring_from_str(bs->backing_file));
376253ec 1919 }
d15e5465
LC
1920
1921 qdict_put_obj(bs_dict, "inserted", obj);
b338082b 1922 }
d15e5465 1923 qlist_append_obj(bs_list, bs_obj);
b338082b 1924 }
d15e5465
LC
1925
1926 *ret_data = QOBJECT(bs_list);
b338082b 1927}
a36e69dd 1928
218a536a 1929static void bdrv_stats_iter(QObject *data, void *opaque)
a36e69dd 1930{
218a536a
LC
1931 QDict *qdict;
1932 Monitor *mon = opaque;
1933
1934 qdict = qobject_to_qdict(data);
1935 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1936
1937 qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1938 monitor_printf(mon, " rd_bytes=%" PRId64
1939 " wr_bytes=%" PRId64
1940 " rd_operations=%" PRId64
1941 " wr_operations=%" PRId64
e8045d67 1942 " flush_operations=%" PRId64
c488c7f6
CH
1943 " wr_total_time_ns=%" PRId64
1944 " rd_total_time_ns=%" PRId64
1945 " flush_total_time_ns=%" PRId64
218a536a
LC
1946 "\n",
1947 qdict_get_int(qdict, "rd_bytes"),
1948 qdict_get_int(qdict, "wr_bytes"),
1949 qdict_get_int(qdict, "rd_operations"),
e8045d67 1950 qdict_get_int(qdict, "wr_operations"),
c488c7f6
CH
1951 qdict_get_int(qdict, "flush_operations"),
1952 qdict_get_int(qdict, "wr_total_time_ns"),
1953 qdict_get_int(qdict, "rd_total_time_ns"),
1954 qdict_get_int(qdict, "flush_total_time_ns"));
218a536a
LC
1955}
1956
1957void bdrv_stats_print(Monitor *mon, const QObject *data)
1958{
1959 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1960}
1961
294cc35f
KW
1962static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1963{
1964 QObject *res;
1965 QDict *dict;
1966
1967 res = qobject_from_jsonf("{ 'stats': {"
1968 "'rd_bytes': %" PRId64 ","
1969 "'wr_bytes': %" PRId64 ","
1970 "'rd_operations': %" PRId64 ","
1971 "'wr_operations': %" PRId64 ","
e8045d67 1972 "'wr_highest_offset': %" PRId64 ","
c488c7f6
CH
1973 "'flush_operations': %" PRId64 ","
1974 "'wr_total_time_ns': %" PRId64 ","
1975 "'rd_total_time_ns': %" PRId64 ","
1976 "'flush_total_time_ns': %" PRId64
294cc35f 1977 "} }",
a597e79c
CH
1978 bs->nr_bytes[BDRV_ACCT_READ],
1979 bs->nr_bytes[BDRV_ACCT_WRITE],
1980 bs->nr_ops[BDRV_ACCT_READ],
1981 bs->nr_ops[BDRV_ACCT_WRITE],
5ffbbc67 1982 bs->wr_highest_sector *
e8045d67 1983 (uint64_t)BDRV_SECTOR_SIZE,
c488c7f6
CH
1984 bs->nr_ops[BDRV_ACCT_FLUSH],
1985 bs->total_time_ns[BDRV_ACCT_WRITE],
1986 bs->total_time_ns[BDRV_ACCT_READ],
1987 bs->total_time_ns[BDRV_ACCT_FLUSH]);
294cc35f
KW
1988 dict = qobject_to_qdict(res);
1989
1990 if (*bs->device_name) {
1991 qdict_put(dict, "device", qstring_from_str(bs->device_name));
1992 }
1993
1994 if (bs->file) {
1995 QObject *parent = bdrv_info_stats_bs(bs->file);
1996 qdict_put_obj(dict, "parent", parent);
1997 }
1998
1999 return res;
2000}
2001
218a536a
LC
2002void bdrv_info_stats(Monitor *mon, QObject **ret_data)
2003{
2004 QObject *obj;
2005 QList *devices;
a36e69dd
TS
2006 BlockDriverState *bs;
2007
218a536a
LC
2008 devices = qlist_new();
2009
1b7bdbc1 2010 QTAILQ_FOREACH(bs, &bdrv_states, list) {
294cc35f 2011 obj = bdrv_info_stats_bs(bs);
218a536a 2012 qlist_append_obj(devices, obj);
a36e69dd 2013 }
218a536a
LC
2014
2015 *ret_data = QOBJECT(devices);
a36e69dd 2016}
ea2384d3 2017
045df330
AL
2018const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2019{
2020 if (bs->backing_hd && bs->backing_hd->encrypted)
2021 return bs->backing_file;
2022 else if (bs->encrypted)
2023 return bs->filename;
2024 else
2025 return NULL;
2026}
2027
5fafdf24 2028void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
2029 char *filename, int filename_size)
2030{
b783e409 2031 if (!bs->backing_file) {
83f64091
FB
2032 pstrcpy(filename, filename_size, "");
2033 } else {
2034 pstrcpy(filename, filename_size, bs->backing_file);
2035 }
2036}
2037
5fafdf24 2038int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
2039 const uint8_t *buf, int nb_sectors)
2040{
2041 BlockDriver *drv = bs->drv;
2042 if (!drv)
19cb3738 2043 return -ENOMEDIUM;
faea38e7
FB
2044 if (!drv->bdrv_write_compressed)
2045 return -ENOTSUP;
fbb7b4e0
KW
2046 if (bdrv_check_request(bs, sector_num, nb_sectors))
2047 return -EIO;
a55eb92c 2048
c6d22830 2049 if (bs->dirty_bitmap) {
7cd1e32a
LS
2050 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2051 }
a55eb92c 2052
faea38e7
FB
2053 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2054}
3b46e624 2055
faea38e7
FB
2056int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2057{
2058 BlockDriver *drv = bs->drv;
2059 if (!drv)
19cb3738 2060 return -ENOMEDIUM;
faea38e7
FB
2061 if (!drv->bdrv_get_info)
2062 return -ENOTSUP;
2063 memset(bdi, 0, sizeof(*bdi));
2064 return drv->bdrv_get_info(bs, bdi);
2065}
2066
45566e9c
CH
2067int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2068 int64_t pos, int size)
178e08a5
AL
2069{
2070 BlockDriver *drv = bs->drv;
2071 if (!drv)
2072 return -ENOMEDIUM;
7cdb1f6d
MK
2073 if (drv->bdrv_save_vmstate)
2074 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2075 if (bs->file)
2076 return bdrv_save_vmstate(bs->file, buf, pos, size);
2077 return -ENOTSUP;
178e08a5
AL
2078}
2079
45566e9c
CH
2080int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2081 int64_t pos, int size)
178e08a5
AL
2082{
2083 BlockDriver *drv = bs->drv;
2084 if (!drv)
2085 return -ENOMEDIUM;
7cdb1f6d
MK
2086 if (drv->bdrv_load_vmstate)
2087 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2088 if (bs->file)
2089 return bdrv_load_vmstate(bs->file, buf, pos, size);
2090 return -ENOTSUP;
178e08a5
AL
2091}
2092
8b9b0cc2
KW
2093void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2094{
2095 BlockDriver *drv = bs->drv;
2096
2097 if (!drv || !drv->bdrv_debug_event) {
2098 return;
2099 }
2100
2101 return drv->bdrv_debug_event(bs, event);
2102
2103}
2104
faea38e7
FB
2105/**************************************************************/
2106/* handling of snapshots */
2107
feeee5ac
MDCF
2108int bdrv_can_snapshot(BlockDriverState *bs)
2109{
2110 BlockDriver *drv = bs->drv;
07b70bfb 2111 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2112 return 0;
2113 }
2114
2115 if (!drv->bdrv_snapshot_create) {
2116 if (bs->file != NULL) {
2117 return bdrv_can_snapshot(bs->file);
2118 }
2119 return 0;
2120 }
2121
2122 return 1;
2123}
2124
199630b6
BS
2125int bdrv_is_snapshot(BlockDriverState *bs)
2126{
2127 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2128}
2129
f9092b10
MA
2130BlockDriverState *bdrv_snapshots(void)
2131{
2132 BlockDriverState *bs;
2133
3ac906f7 2134 if (bs_snapshots) {
f9092b10 2135 return bs_snapshots;
3ac906f7 2136 }
f9092b10
MA
2137
2138 bs = NULL;
2139 while ((bs = bdrv_next(bs))) {
2140 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2141 bs_snapshots = bs;
2142 return bs;
f9092b10
MA
2143 }
2144 }
2145 return NULL;
f9092b10
MA
2146}
2147
5fafdf24 2148int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2149 QEMUSnapshotInfo *sn_info)
2150{
2151 BlockDriver *drv = bs->drv;
2152 if (!drv)
19cb3738 2153 return -ENOMEDIUM;
7cdb1f6d
MK
2154 if (drv->bdrv_snapshot_create)
2155 return drv->bdrv_snapshot_create(bs, sn_info);
2156 if (bs->file)
2157 return bdrv_snapshot_create(bs->file, sn_info);
2158 return -ENOTSUP;
faea38e7
FB
2159}
2160
5fafdf24 2161int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2162 const char *snapshot_id)
2163{
2164 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2165 int ret, open_ret;
2166
faea38e7 2167 if (!drv)
19cb3738 2168 return -ENOMEDIUM;
7cdb1f6d
MK
2169 if (drv->bdrv_snapshot_goto)
2170 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2171
2172 if (bs->file) {
2173 drv->bdrv_close(bs);
2174 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2175 open_ret = drv->bdrv_open(bs, bs->open_flags);
2176 if (open_ret < 0) {
2177 bdrv_delete(bs->file);
2178 bs->drv = NULL;
2179 return open_ret;
2180 }
2181 return ret;
2182 }
2183
2184 return -ENOTSUP;
faea38e7
FB
2185}
2186
2187int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2188{
2189 BlockDriver *drv = bs->drv;
2190 if (!drv)
19cb3738 2191 return -ENOMEDIUM;
7cdb1f6d
MK
2192 if (drv->bdrv_snapshot_delete)
2193 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2194 if (bs->file)
2195 return bdrv_snapshot_delete(bs->file, snapshot_id);
2196 return -ENOTSUP;
faea38e7
FB
2197}
2198
5fafdf24 2199int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2200 QEMUSnapshotInfo **psn_info)
2201{
2202 BlockDriver *drv = bs->drv;
2203 if (!drv)
19cb3738 2204 return -ENOMEDIUM;
7cdb1f6d
MK
2205 if (drv->bdrv_snapshot_list)
2206 return drv->bdrv_snapshot_list(bs, psn_info);
2207 if (bs->file)
2208 return bdrv_snapshot_list(bs->file, psn_info);
2209 return -ENOTSUP;
faea38e7
FB
2210}
2211
51ef6727 2212int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2213 const char *snapshot_name)
2214{
2215 BlockDriver *drv = bs->drv;
2216 if (!drv) {
2217 return -ENOMEDIUM;
2218 }
2219 if (!bs->read_only) {
2220 return -EINVAL;
2221 }
2222 if (drv->bdrv_snapshot_load_tmp) {
2223 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2224 }
2225 return -ENOTSUP;
2226}
2227
faea38e7
FB
2228#define NB_SUFFIXES 4
2229
2230char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2231{
2232 static const char suffixes[NB_SUFFIXES] = "KMGT";
2233 int64_t base;
2234 int i;
2235
2236 if (size <= 999) {
2237 snprintf(buf, buf_size, "%" PRId64, size);
2238 } else {
2239 base = 1024;
2240 for(i = 0; i < NB_SUFFIXES; i++) {
2241 if (size < (10 * base)) {
5fafdf24 2242 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
2243 (double)size / base,
2244 suffixes[i]);
2245 break;
2246 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 2247 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
2248 ((size + (base >> 1)) / base),
2249 suffixes[i]);
2250 break;
2251 }
2252 base = base * 1024;
2253 }
2254 }
2255 return buf;
2256}
2257
2258char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2259{
2260 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
2261#ifdef _WIN32
2262 struct tm *ptm;
2263#else
faea38e7 2264 struct tm tm;
3b9f94e1 2265#endif
faea38e7
FB
2266 time_t ti;
2267 int64_t secs;
2268
2269 if (!sn) {
5fafdf24
TS
2270 snprintf(buf, buf_size,
2271 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2272 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2273 } else {
2274 ti = sn->date_sec;
3b9f94e1
FB
2275#ifdef _WIN32
2276 ptm = localtime(&ti);
2277 strftime(date_buf, sizeof(date_buf),
2278 "%Y-%m-%d %H:%M:%S", ptm);
2279#else
faea38e7
FB
2280 localtime_r(&ti, &tm);
2281 strftime(date_buf, sizeof(date_buf),
2282 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 2283#endif
faea38e7
FB
2284 secs = sn->vm_clock_nsec / 1000000000;
2285 snprintf(clock_buf, sizeof(clock_buf),
2286 "%02d:%02d:%02d.%03d",
2287 (int)(secs / 3600),
2288 (int)((secs / 60) % 60),
5fafdf24 2289 (int)(secs % 60),
faea38e7
FB
2290 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2291 snprintf(buf, buf_size,
5fafdf24 2292 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2293 sn->id_str, sn->name,
2294 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2295 date_buf,
2296 clock_buf);
2297 }
2298 return buf;
2299}
2300
ea2384d3 2301/**************************************************************/
83f64091 2302/* async I/Os */
ea2384d3 2303
3b69e4b9 2304BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 2305 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 2306 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 2307{
bbf0a440
SH
2308 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2309
b2a61371 2310 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2311 cb, opaque, false);
ea2384d3
FB
2312}
2313
f141eafe
AL
2314BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2315 QEMUIOVector *qiov, int nb_sectors,
2316 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 2317{
bbf0a440
SH
2318 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2319
1a6e115b 2320 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2321 cb, opaque, true);
83f64091
FB
2322}
2323
40b4f539
KW
2324
2325typedef struct MultiwriteCB {
2326 int error;
2327 int num_requests;
2328 int num_callbacks;
2329 struct {
2330 BlockDriverCompletionFunc *cb;
2331 void *opaque;
2332 QEMUIOVector *free_qiov;
2333 void *free_buf;
2334 } callbacks[];
2335} MultiwriteCB;
2336
2337static void multiwrite_user_cb(MultiwriteCB *mcb)
2338{
2339 int i;
2340
2341 for (i = 0; i < mcb->num_callbacks; i++) {
2342 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
2343 if (mcb->callbacks[i].free_qiov) {
2344 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2345 }
7267c094 2346 g_free(mcb->callbacks[i].free_qiov);
f8a83245 2347 qemu_vfree(mcb->callbacks[i].free_buf);
40b4f539
KW
2348 }
2349}
2350
2351static void multiwrite_cb(void *opaque, int ret)
2352{
2353 MultiwriteCB *mcb = opaque;
2354
6d519a5f
SH
2355 trace_multiwrite_cb(mcb, ret);
2356
cb6d3ca0 2357 if (ret < 0 && !mcb->error) {
40b4f539 2358 mcb->error = ret;
40b4f539
KW
2359 }
2360
2361 mcb->num_requests--;
2362 if (mcb->num_requests == 0) {
de189a1b 2363 multiwrite_user_cb(mcb);
7267c094 2364 g_free(mcb);
40b4f539
KW
2365 }
2366}
2367
2368static int multiwrite_req_compare(const void *a, const void *b)
2369{
77be4366
CH
2370 const BlockRequest *req1 = a, *req2 = b;
2371
2372 /*
2373 * Note that we can't simply subtract req2->sector from req1->sector
2374 * here as that could overflow the return value.
2375 */
2376 if (req1->sector > req2->sector) {
2377 return 1;
2378 } else if (req1->sector < req2->sector) {
2379 return -1;
2380 } else {
2381 return 0;
2382 }
40b4f539
KW
2383}
2384
2385/*
2386 * Takes a bunch of requests and tries to merge them. Returns the number of
2387 * requests that remain after merging.
2388 */
2389static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2390 int num_reqs, MultiwriteCB *mcb)
2391{
2392 int i, outidx;
2393
2394 // Sort requests by start sector
2395 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2396
2397 // Check if adjacent requests touch the same clusters. If so, combine them,
2398 // filling up gaps with zero sectors.
2399 outidx = 0;
2400 for (i = 1; i < num_reqs; i++) {
2401 int merge = 0;
2402 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2403
2404 // This handles the cases that are valid for all block drivers, namely
2405 // exactly sequential writes and overlapping writes.
2406 if (reqs[i].sector <= oldreq_last) {
2407 merge = 1;
2408 }
2409
2410 // The block driver may decide that it makes sense to combine requests
2411 // even if there is a gap of some sectors between them. In this case,
2412 // the gap is filled with zeros (therefore only applicable for yet
2413 // unused space in format like qcow2).
2414 if (!merge && bs->drv->bdrv_merge_requests) {
2415 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2416 }
2417
e2a305fb
CH
2418 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2419 merge = 0;
2420 }
2421
40b4f539
KW
2422 if (merge) {
2423 size_t size;
7267c094 2424 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
2425 qemu_iovec_init(qiov,
2426 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2427
2428 // Add the first request to the merged one. If the requests are
2429 // overlapping, drop the last sectors of the first request.
2430 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2431 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2432
2433 // We might need to add some zeros between the two requests
2434 if (reqs[i].sector > oldreq_last) {
2435 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2436 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2437 memset(buf, 0, zero_bytes);
2438 qemu_iovec_add(qiov, buf, zero_bytes);
2439 mcb->callbacks[i].free_buf = buf;
2440 }
2441
2442 // Add the second request
2443 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2444
cbf1dff2 2445 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
2446 reqs[outidx].qiov = qiov;
2447
2448 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2449 } else {
2450 outidx++;
2451 reqs[outidx].sector = reqs[i].sector;
2452 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2453 reqs[outidx].qiov = reqs[i].qiov;
2454 }
2455 }
2456
2457 return outidx + 1;
2458}
2459
2460/*
2461 * Submit multiple AIO write requests at once.
2462 *
2463 * On success, the function returns 0 and all requests in the reqs array have
2464 * been submitted. In error case this function returns -1, and any of the
2465 * requests may or may not be submitted yet. In particular, this means that the
2466 * callback will be called for some of the requests, for others it won't. The
2467 * caller must check the error field of the BlockRequest to wait for the right
2468 * callbacks (if error != 0, no callback will be called).
2469 *
2470 * The implementation may modify the contents of the reqs array, e.g. to merge
2471 * requests. However, the fields opaque and error are left unmodified as they
2472 * are used to signal failure for a single request to the caller.
2473 */
2474int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2475{
2476 BlockDriverAIOCB *acb;
2477 MultiwriteCB *mcb;
2478 int i;
2479
301db7c2
RH
2480 /* don't submit writes if we don't have a medium */
2481 if (bs->drv == NULL) {
2482 for (i = 0; i < num_reqs; i++) {
2483 reqs[i].error = -ENOMEDIUM;
2484 }
2485 return -1;
2486 }
2487
40b4f539
KW
2488 if (num_reqs == 0) {
2489 return 0;
2490 }
2491
2492 // Create MultiwriteCB structure
7267c094 2493 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
2494 mcb->num_requests = 0;
2495 mcb->num_callbacks = num_reqs;
2496
2497 for (i = 0; i < num_reqs; i++) {
2498 mcb->callbacks[i].cb = reqs[i].cb;
2499 mcb->callbacks[i].opaque = reqs[i].opaque;
2500 }
2501
2502 // Check for mergable requests
2503 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2504
6d519a5f
SH
2505 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2506
453f9a16
KW
2507 /*
2508 * Run the aio requests. As soon as one request can't be submitted
2509 * successfully, fail all requests that are not yet submitted (we must
2510 * return failure for all requests anyway)
2511 *
2512 * num_requests cannot be set to the right value immediately: If
2513 * bdrv_aio_writev fails for some request, num_requests would be too high
2514 * and therefore multiwrite_cb() would never recognize the multiwrite
2515 * request as completed. We also cannot use the loop variable i to set it
2516 * when the first request fails because the callback may already have been
2517 * called for previously submitted requests. Thus, num_requests must be
2518 * incremented for each request that is submitted.
2519 *
2520 * The problem that callbacks may be called early also means that we need
2521 * to take care that num_requests doesn't become 0 before all requests are
2522 * submitted - multiwrite_cb() would consider the multiwrite request
2523 * completed. A dummy request that is "completed" by a manual call to
2524 * multiwrite_cb() takes care of this.
2525 */
2526 mcb->num_requests = 1;
2527
6d519a5f 2528 // Run the aio requests
40b4f539 2529 for (i = 0; i < num_reqs; i++) {
453f9a16 2530 mcb->num_requests++;
40b4f539
KW
2531 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2532 reqs[i].nb_sectors, multiwrite_cb, mcb);
2533
2534 if (acb == NULL) {
2535 // We can only fail the whole thing if no request has been
2536 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2537 // complete and report the error in the callback.
453f9a16 2538 if (i == 0) {
6d519a5f 2539 trace_bdrv_aio_multiwrite_earlyfail(mcb);
40b4f539
KW
2540 goto fail;
2541 } else {
6d519a5f 2542 trace_bdrv_aio_multiwrite_latefail(mcb, i);
7eb58a6c 2543 multiwrite_cb(mcb, -EIO);
40b4f539
KW
2544 break;
2545 }
40b4f539
KW
2546 }
2547 }
2548
453f9a16
KW
2549 /* Complete the dummy request */
2550 multiwrite_cb(mcb, 0);
2551
40b4f539
KW
2552 return 0;
2553
2554fail:
453f9a16
KW
2555 for (i = 0; i < mcb->num_callbacks; i++) {
2556 reqs[i].error = -EIO;
2557 }
7267c094 2558 g_free(mcb);
40b4f539
KW
2559 return -1;
2560}
2561
83f64091 2562void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 2563{
6bbff9a0 2564 acb->pool->cancel(acb);
83f64091
FB
2565}
2566
ce1a14dc 2567
83f64091
FB
2568/**************************************************************/
2569/* async block device emulation */
2570
c16b5a2c
CH
2571typedef struct BlockDriverAIOCBSync {
2572 BlockDriverAIOCB common;
2573 QEMUBH *bh;
2574 int ret;
2575 /* vector translation state */
2576 QEMUIOVector *qiov;
2577 uint8_t *bounce;
2578 int is_write;
2579} BlockDriverAIOCBSync;
2580
2581static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2582{
b666d239
KW
2583 BlockDriverAIOCBSync *acb =
2584 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 2585 qemu_bh_delete(acb->bh);
36afc451 2586 acb->bh = NULL;
c16b5a2c
CH
2587 qemu_aio_release(acb);
2588}
2589
2590static AIOPool bdrv_em_aio_pool = {
2591 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2592 .cancel = bdrv_aio_cancel_em,
2593};
2594
ce1a14dc 2595static void bdrv_aio_bh_cb(void *opaque)
83f64091 2596{
ce1a14dc 2597 BlockDriverAIOCBSync *acb = opaque;
f141eafe 2598
f141eafe
AL
2599 if (!acb->is_write)
2600 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 2601 qemu_vfree(acb->bounce);
ce1a14dc 2602 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 2603 qemu_bh_delete(acb->bh);
36afc451 2604 acb->bh = NULL;
ce1a14dc 2605 qemu_aio_release(acb);
83f64091 2606}
beac80cd 2607
f141eafe
AL
2608static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2609 int64_t sector_num,
2610 QEMUIOVector *qiov,
2611 int nb_sectors,
2612 BlockDriverCompletionFunc *cb,
2613 void *opaque,
2614 int is_write)
2615
83f64091 2616{
ce1a14dc 2617 BlockDriverAIOCBSync *acb;
ce1a14dc 2618
c16b5a2c 2619 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
2620 acb->is_write = is_write;
2621 acb->qiov = qiov;
e268ca52 2622 acb->bounce = qemu_blockalign(bs, qiov->size);
f141eafe 2623
ce1a14dc
PB
2624 if (!acb->bh)
2625 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
2626
2627 if (is_write) {
2628 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1ed20acf 2629 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 2630 } else {
1ed20acf 2631 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
2632 }
2633
ce1a14dc 2634 qemu_bh_schedule(acb->bh);
f141eafe 2635
ce1a14dc 2636 return &acb->common;
beac80cd
FB
2637}
2638
f141eafe
AL
2639static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2640 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 2641 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 2642{
f141eafe
AL
2643 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2644}
83f64091 2645
f141eafe
AL
2646static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2647 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2648 BlockDriverCompletionFunc *cb, void *opaque)
2649{
2650 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 2651}
beac80cd 2652
68485420
KW
2653
2654typedef struct BlockDriverAIOCBCoroutine {
2655 BlockDriverAIOCB common;
2656 BlockRequest req;
2657 bool is_write;
2658 QEMUBH* bh;
2659} BlockDriverAIOCBCoroutine;
2660
2661static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2662{
2663 qemu_aio_flush();
2664}
2665
2666static AIOPool bdrv_em_co_aio_pool = {
2667 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2668 .cancel = bdrv_aio_co_cancel_em,
2669};
2670
35246a68 2671static void bdrv_co_em_bh(void *opaque)
68485420
KW
2672{
2673 BlockDriverAIOCBCoroutine *acb = opaque;
2674
2675 acb->common.cb(acb->common.opaque, acb->req.error);
2676 qemu_bh_delete(acb->bh);
2677 qemu_aio_release(acb);
2678}
2679
b2a61371
SH
2680/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2681static void coroutine_fn bdrv_co_do_rw(void *opaque)
2682{
2683 BlockDriverAIOCBCoroutine *acb = opaque;
2684 BlockDriverState *bs = acb->common.bs;
2685
2686 if (!acb->is_write) {
2687 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
2688 acb->req.nb_sectors, acb->req.qiov);
2689 } else {
2690 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
2691 acb->req.nb_sectors, acb->req.qiov);
2692 }
2693
35246a68 2694 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2a61371
SH
2695 qemu_bh_schedule(acb->bh);
2696}
2697
68485420
KW
2698static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2699 int64_t sector_num,
2700 QEMUIOVector *qiov,
2701 int nb_sectors,
2702 BlockDriverCompletionFunc *cb,
2703 void *opaque,
8c5873d6 2704 bool is_write)
68485420
KW
2705{
2706 Coroutine *co;
2707 BlockDriverAIOCBCoroutine *acb;
2708
2709 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2710 acb->req.sector = sector_num;
2711 acb->req.nb_sectors = nb_sectors;
2712 acb->req.qiov = qiov;
2713 acb->is_write = is_write;
2714
8c5873d6 2715 co = qemu_coroutine_create(bdrv_co_do_rw);
68485420
KW
2716 qemu_coroutine_enter(co, acb);
2717
2718 return &acb->common;
2719}
2720
07f07615 2721static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
b2e12bc6 2722{
07f07615
PB
2723 BlockDriverAIOCBCoroutine *acb = opaque;
2724 BlockDriverState *bs = acb->common.bs;
b2e12bc6 2725
07f07615
PB
2726 acb->req.error = bdrv_co_flush(bs);
2727 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2e12bc6 2728 qemu_bh_schedule(acb->bh);
b2e12bc6
CH
2729}
2730
07f07615 2731BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
016f5cf6
AG
2732 BlockDriverCompletionFunc *cb, void *opaque)
2733{
07f07615 2734 trace_bdrv_aio_flush(bs, opaque);
016f5cf6 2735
07f07615
PB
2736 Coroutine *co;
2737 BlockDriverAIOCBCoroutine *acb;
016f5cf6 2738
07f07615
PB
2739 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2740 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
2741 qemu_coroutine_enter(co, acb);
016f5cf6 2742
016f5cf6
AG
2743 return &acb->common;
2744}
2745
4265d620
PB
2746static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
2747{
2748 BlockDriverAIOCBCoroutine *acb = opaque;
2749 BlockDriverState *bs = acb->common.bs;
2750
2751 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
2752 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2753 qemu_bh_schedule(acb->bh);
2754}
2755
2756BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
2757 int64_t sector_num, int nb_sectors,
2758 BlockDriverCompletionFunc *cb, void *opaque)
2759{
2760 Coroutine *co;
2761 BlockDriverAIOCBCoroutine *acb;
2762
2763 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
2764
2765 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2766 acb->req.sector = sector_num;
2767 acb->req.nb_sectors = nb_sectors;
2768 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
2769 qemu_coroutine_enter(co, acb);
2770
2771 return &acb->common;
2772}
2773
ea2384d3
FB
2774void bdrv_init(void)
2775{
5efa9d5a 2776 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 2777}
ce1a14dc 2778
eb852011
MA
2779void bdrv_init_with_whitelist(void)
2780{
2781 use_bdrv_whitelist = 1;
2782 bdrv_init();
2783}
2784
c16b5a2c
CH
2785void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2786 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 2787{
ce1a14dc
PB
2788 BlockDriverAIOCB *acb;
2789
6bbff9a0
AL
2790 if (pool->free_aiocb) {
2791 acb = pool->free_aiocb;
2792 pool->free_aiocb = acb->next;
ce1a14dc 2793 } else {
7267c094 2794 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 2795 acb->pool = pool;
ce1a14dc
PB
2796 }
2797 acb->bs = bs;
2798 acb->cb = cb;
2799 acb->opaque = opaque;
2800 return acb;
2801}
2802
2803void qemu_aio_release(void *p)
2804{
6bbff9a0
AL
2805 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2806 AIOPool *pool = acb->pool;
2807 acb->next = pool->free_aiocb;
2808 pool->free_aiocb = acb;
ce1a14dc 2809}
19cb3738 2810
f9f05dc5
KW
2811/**************************************************************/
2812/* Coroutine block device emulation */
2813
2814typedef struct CoroutineIOCompletion {
2815 Coroutine *coroutine;
2816 int ret;
2817} CoroutineIOCompletion;
2818
2819static void bdrv_co_io_em_complete(void *opaque, int ret)
2820{
2821 CoroutineIOCompletion *co = opaque;
2822
2823 co->ret = ret;
2824 qemu_coroutine_enter(co->coroutine, NULL);
2825}
2826
2827static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2828 int nb_sectors, QEMUIOVector *iov,
2829 bool is_write)
2830{
2831 CoroutineIOCompletion co = {
2832 .coroutine = qemu_coroutine_self(),
2833 };
2834 BlockDriverAIOCB *acb;
2835
2836 if (is_write) {
a652d160
SH
2837 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2838 bdrv_co_io_em_complete, &co);
f9f05dc5 2839 } else {
a652d160
SH
2840 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2841 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
2842 }
2843
59370aaa 2844 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
2845 if (!acb) {
2846 return -EIO;
2847 }
2848 qemu_coroutine_yield();
2849
2850 return co.ret;
2851}
2852
2853static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2854 int64_t sector_num, int nb_sectors,
2855 QEMUIOVector *iov)
2856{
2857 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2858}
2859
2860static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2861 int64_t sector_num, int nb_sectors,
2862 QEMUIOVector *iov)
2863{
2864 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2865}
2866
07f07615 2867static void coroutine_fn bdrv_flush_co_entry(void *opaque)
e7a8a783 2868{
07f07615
PB
2869 RwCo *rwco = opaque;
2870
2871 rwco->ret = bdrv_co_flush(rwco->bs);
2872}
2873
2874int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2875{
2876 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2877 return 0;
2878 } else if (!bs->drv) {
2879 return 0;
2880 } else if (bs->drv->bdrv_co_flush) {
2881 return bs->drv->bdrv_co_flush(bs);
2882 } else if (bs->drv->bdrv_aio_flush) {
2883 BlockDriverAIOCB *acb;
2884 CoroutineIOCompletion co = {
2885 .coroutine = qemu_coroutine_self(),
2886 };
2887
2888 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2889 if (acb == NULL) {
2890 return -EIO;
2891 } else {
2892 qemu_coroutine_yield();
2893 return co.ret;
2894 }
07f07615
PB
2895 } else {
2896 /*
2897 * Some block drivers always operate in either writethrough or unsafe
2898 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
2899 * know how the server works (because the behaviour is hardcoded or
2900 * depends on server-side configuration), so we can't ensure that
2901 * everything is safe on disk. Returning an error doesn't work because
2902 * that would break guests even if the server operates in writethrough
2903 * mode.
2904 *
2905 * Let's hope the user knows what he's doing.
2906 */
2907 return 0;
2908 }
2909}
2910
2911int bdrv_flush(BlockDriverState *bs)
2912{
2913 Coroutine *co;
2914 RwCo rwco = {
2915 .bs = bs,
2916 .ret = NOT_DONE,
e7a8a783 2917 };
e7a8a783 2918
07f07615
PB
2919 if (qemu_in_coroutine()) {
2920 /* Fast-path if already in coroutine context */
2921 bdrv_flush_co_entry(&rwco);
2922 } else {
2923 co = qemu_coroutine_create(bdrv_flush_co_entry);
2924 qemu_coroutine_enter(co, &rwco);
2925 while (rwco.ret == NOT_DONE) {
2926 qemu_aio_wait();
2927 }
e7a8a783 2928 }
07f07615
PB
2929
2930 return rwco.ret;
e7a8a783
KW
2931}
2932
4265d620
PB
2933static void coroutine_fn bdrv_discard_co_entry(void *opaque)
2934{
2935 RwCo *rwco = opaque;
2936
2937 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
2938}
2939
2940int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
2941 int nb_sectors)
2942{
2943 if (!bs->drv) {
2944 return -ENOMEDIUM;
2945 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
2946 return -EIO;
2947 } else if (bs->read_only) {
2948 return -EROFS;
2949 } else if (bs->drv->bdrv_co_discard) {
2950 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
2951 } else if (bs->drv->bdrv_aio_discard) {
2952 BlockDriverAIOCB *acb;
2953 CoroutineIOCompletion co = {
2954 .coroutine = qemu_coroutine_self(),
2955 };
2956
2957 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
2958 bdrv_co_io_em_complete, &co);
2959 if (acb == NULL) {
2960 return -EIO;
2961 } else {
2962 qemu_coroutine_yield();
2963 return co.ret;
2964 }
4265d620
PB
2965 } else {
2966 return 0;
2967 }
2968}
2969
2970int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
2971{
2972 Coroutine *co;
2973 RwCo rwco = {
2974 .bs = bs,
2975 .sector_num = sector_num,
2976 .nb_sectors = nb_sectors,
2977 .ret = NOT_DONE,
2978 };
2979
2980 if (qemu_in_coroutine()) {
2981 /* Fast-path if already in coroutine context */
2982 bdrv_discard_co_entry(&rwco);
2983 } else {
2984 co = qemu_coroutine_create(bdrv_discard_co_entry);
2985 qemu_coroutine_enter(co, &rwco);
2986 while (rwco.ret == NOT_DONE) {
2987 qemu_aio_wait();
2988 }
2989 }
2990
2991 return rwco.ret;
2992}
2993
19cb3738
FB
2994/**************************************************************/
2995/* removable device support */
2996
2997/**
2998 * Return TRUE if the media is present
2999 */
3000int bdrv_is_inserted(BlockDriverState *bs)
3001{
3002 BlockDriver *drv = bs->drv;
a1aff5bf 3003
19cb3738
FB
3004 if (!drv)
3005 return 0;
3006 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
3007 return 1;
3008 return drv->bdrv_is_inserted(bs);
19cb3738
FB
3009}
3010
3011/**
8e49ca46
MA
3012 * Return whether the media changed since the last call to this
3013 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3014 */
3015int bdrv_media_changed(BlockDriverState *bs)
3016{
3017 BlockDriver *drv = bs->drv;
19cb3738 3018
8e49ca46
MA
3019 if (drv && drv->bdrv_media_changed) {
3020 return drv->bdrv_media_changed(bs);
3021 }
3022 return -ENOTSUP;
19cb3738
FB
3023}
3024
3025/**
3026 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3027 */
fdec4404 3028void bdrv_eject(BlockDriverState *bs, int eject_flag)
19cb3738
FB
3029{
3030 BlockDriver *drv = bs->drv;
19cb3738 3031
822e1cd1
MA
3032 if (drv && drv->bdrv_eject) {
3033 drv->bdrv_eject(bs, eject_flag);
19cb3738
FB
3034 }
3035}
3036
19cb3738
FB
3037/**
3038 * Lock or unlock the media (if it is locked, the user won't be able
3039 * to eject it manually).
3040 */
025e849a 3041void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3042{
3043 BlockDriver *drv = bs->drv;
3044
025e849a 3045 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3046
025e849a
MA
3047 if (drv && drv->bdrv_lock_medium) {
3048 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3049 }
3050}
985a03b0
TS
3051
3052/* needed for generic scsi interface */
3053
3054int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3055{
3056 BlockDriver *drv = bs->drv;
3057
3058 if (drv && drv->bdrv_ioctl)
3059 return drv->bdrv_ioctl(bs, req, buf);
3060 return -ENOTSUP;
3061}
7d780669 3062
221f715d
AL
3063BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3064 unsigned long int req, void *buf,
3065 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3066{
221f715d 3067 BlockDriver *drv = bs->drv;
7d780669 3068
221f715d
AL
3069 if (drv && drv->bdrv_aio_ioctl)
3070 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3071 return NULL;
7d780669 3072}
e268ca52 3073
7b6f9300
MA
3074void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3075{
3076 bs->buffer_alignment = align;
3077}
7cd1e32a 3078
e268ca52
AL
3079void *qemu_blockalign(BlockDriverState *bs, size_t size)
3080{
3081 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3082}
7cd1e32a
LS
3083
3084void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3085{
3086 int64_t bitmap_size;
a55eb92c 3087
aaa0eb75 3088 bs->dirty_count = 0;
a55eb92c 3089 if (enable) {
c6d22830
JK
3090 if (!bs->dirty_bitmap) {
3091 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3092 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3093 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
a55eb92c 3094
7267c094 3095 bs->dirty_bitmap = g_malloc0(bitmap_size);
a55eb92c 3096 }
7cd1e32a 3097 } else {
c6d22830 3098 if (bs->dirty_bitmap) {
7267c094 3099 g_free(bs->dirty_bitmap);
c6d22830 3100 bs->dirty_bitmap = NULL;
a55eb92c 3101 }
7cd1e32a
LS
3102 }
3103}
3104
3105int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3106{
6ea44308 3107 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 3108
c6d22830
JK
3109 if (bs->dirty_bitmap &&
3110 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
3111 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3112 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a
LS
3113 } else {
3114 return 0;
3115 }
3116}
3117
a55eb92c
JK
3118void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3119 int nr_sectors)
7cd1e32a
LS
3120{
3121 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3122}
aaa0eb75
LS
3123
3124int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3125{
3126 return bs->dirty_count;
3127}
f88e1a42 3128
db593f25
MT
3129void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3130{
3131 assert(bs->in_use != in_use);
3132 bs->in_use = in_use;
3133}
3134
3135int bdrv_in_use(BlockDriverState *bs)
3136{
3137 return bs->in_use;
3138}
3139
28a7282a
LC
3140void bdrv_iostatus_enable(BlockDriverState *bs)
3141{
3142 bs->iostatus = BDRV_IOS_OK;
3143}
3144
3145/* The I/O status is only enabled if the drive explicitly
3146 * enables it _and_ the VM is configured to stop on errors */
3147bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3148{
3149 return (bs->iostatus != BDRV_IOS_INVAL &&
3150 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3151 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3152 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3153}
3154
3155void bdrv_iostatus_disable(BlockDriverState *bs)
3156{
3157 bs->iostatus = BDRV_IOS_INVAL;
3158}
3159
3160void bdrv_iostatus_reset(BlockDriverState *bs)
3161{
3162 if (bdrv_iostatus_is_enabled(bs)) {
3163 bs->iostatus = BDRV_IOS_OK;
3164 }
3165}
3166
3167/* XXX: Today this is set by device models because it makes the implementation
3168 quite simple. However, the block layer knows about the error, so it's
3169 possible to implement this without device models being involved */
3170void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3171{
3172 if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
3173 assert(error >= 0);
3174 bs->iostatus = error == ENOSPC ? BDRV_IOS_ENOSPC : BDRV_IOS_FAILED;
3175 }
3176}
3177
a597e79c
CH
3178void
3179bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3180 enum BlockAcctType type)
3181{
3182 assert(type < BDRV_MAX_IOTYPE);
3183
3184 cookie->bytes = bytes;
c488c7f6 3185 cookie->start_time_ns = get_clock();
a597e79c
CH
3186 cookie->type = type;
3187}
3188
3189void
3190bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3191{
3192 assert(cookie->type < BDRV_MAX_IOTYPE);
3193
3194 bs->nr_bytes[cookie->type] += cookie->bytes;
3195 bs->nr_ops[cookie->type]++;
c488c7f6 3196 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
3197}
3198
f88e1a42
JS
3199int bdrv_img_create(const char *filename, const char *fmt,
3200 const char *base_filename, const char *base_fmt,
3201 char *options, uint64_t img_size, int flags)
3202{
3203 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 3204 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
3205 BlockDriverState *bs = NULL;
3206 BlockDriver *drv, *proto_drv;
96df67d1 3207 BlockDriver *backing_drv = NULL;
f88e1a42
JS
3208 int ret = 0;
3209
3210 /* Find driver and parse its options */
3211 drv = bdrv_find_format(fmt);
3212 if (!drv) {
3213 error_report("Unknown file format '%s'", fmt);
4f70f249 3214 ret = -EINVAL;
f88e1a42
JS
3215 goto out;
3216 }
3217
3218 proto_drv = bdrv_find_protocol(filename);
3219 if (!proto_drv) {
3220 error_report("Unknown protocol '%s'", filename);
4f70f249 3221 ret = -EINVAL;
f88e1a42
JS
3222 goto out;
3223 }
3224
3225 create_options = append_option_parameters(create_options,
3226 drv->create_options);
3227 create_options = append_option_parameters(create_options,
3228 proto_drv->create_options);
3229
3230 /* Create parameter list with default values */
3231 param = parse_option_parameters("", create_options, param);
3232
3233 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3234
3235 /* Parse -o options */
3236 if (options) {
3237 param = parse_option_parameters(options, create_options, param);
3238 if (param == NULL) {
3239 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 3240 ret = -EINVAL;
f88e1a42
JS
3241 goto out;
3242 }
3243 }
3244
3245 if (base_filename) {
3246 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3247 base_filename)) {
3248 error_report("Backing file not supported for file format '%s'",
3249 fmt);
4f70f249 3250 ret = -EINVAL;
f88e1a42
JS
3251 goto out;
3252 }
3253 }
3254
3255 if (base_fmt) {
3256 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3257 error_report("Backing file format not supported for file "
3258 "format '%s'", fmt);
4f70f249 3259 ret = -EINVAL;
f88e1a42
JS
3260 goto out;
3261 }
3262 }
3263
792da93a
JS
3264 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3265 if (backing_file && backing_file->value.s) {
3266 if (!strcmp(filename, backing_file->value.s)) {
3267 error_report("Error: Trying to create an image with the "
3268 "same filename as the backing file");
4f70f249 3269 ret = -EINVAL;
792da93a
JS
3270 goto out;
3271 }
3272 }
3273
f88e1a42
JS
3274 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3275 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
3276 backing_drv = bdrv_find_format(backing_fmt->value.s);
3277 if (!backing_drv) {
f88e1a42
JS
3278 error_report("Unknown backing file format '%s'",
3279 backing_fmt->value.s);
4f70f249 3280 ret = -EINVAL;
f88e1a42
JS
3281 goto out;
3282 }
3283 }
3284
3285 // The size for the image must always be specified, with one exception:
3286 // If we are using a backing file, we can obtain the size from there
d220894e
KW
3287 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3288 if (size && size->value.n == -1) {
f88e1a42
JS
3289 if (backing_file && backing_file->value.s) {
3290 uint64_t size;
f88e1a42
JS
3291 char buf[32];
3292
f88e1a42
JS
3293 bs = bdrv_new("");
3294
96df67d1 3295 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
f88e1a42 3296 if (ret < 0) {
96df67d1 3297 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
3298 goto out;
3299 }
3300 bdrv_get_geometry(bs, &size);
3301 size *= 512;
3302
3303 snprintf(buf, sizeof(buf), "%" PRId64, size);
3304 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3305 } else {
3306 error_report("Image creation needs a size parameter");
4f70f249 3307 ret = -EINVAL;
f88e1a42
JS
3308 goto out;
3309 }
3310 }
3311
3312 printf("Formatting '%s', fmt=%s ", filename, fmt);
3313 print_option_parameters(param);
3314 puts("");
3315
3316 ret = bdrv_create(drv, filename, param);
3317
3318 if (ret < 0) {
3319 if (ret == -ENOTSUP) {
3320 error_report("Formatting or formatting option not supported for "
3321 "file format '%s'", fmt);
3322 } else if (ret == -EFBIG) {
3323 error_report("The image size is too large for file format '%s'",
3324 fmt);
3325 } else {
3326 error_report("%s: error while creating %s: %s", filename, fmt,
3327 strerror(-ret));
3328 }
3329 }
3330
3331out:
3332 free_option_parameters(create_options);
3333 free_option_parameters(param);
3334
3335 if (bs) {
3336 bdrv_delete(bs);
3337 }
4f70f249
JS
3338
3339 return ret;
f88e1a42 3340}