]> git.proxmox.com Git - qemu.git/blame - block.c
block: add the blockio limits command line support
[qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
f795e743 30#include "qjson.h"
68485420 31#include "qemu-coroutine.h"
b2023818 32#include "qmp-commands.h"
0563e191 33#include "qemu-timer.h"
fc01f7e7 34
71e72a19 35#ifdef CONFIG_BSD
7674e7bf
FB
36#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
72cf2d4f 39#include <sys/queue.h>
c5e97233 40#ifndef __DragonFly__
7674e7bf
FB
41#include <sys/disk.h>
42#endif
c5e97233 43#endif
7674e7bf 44
49dc768d
AL
45#ifdef _WIN32
46#include <windows.h>
47#endif
48
1c9805a3
SH
49#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
7d4b4ba5 51static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
52static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
53 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 54 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
55static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
56 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 57 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
58static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
59 int64_t sector_num, int nb_sectors,
60 QEMUIOVector *iov);
61static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
62 int64_t sector_num, int nb_sectors,
63 QEMUIOVector *iov);
c5fbe571
SH
64static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
1c9805a3
SH
66static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
b2a61371
SH
68static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
69 int64_t sector_num,
70 QEMUIOVector *qiov,
71 int nb_sectors,
72 BlockDriverCompletionFunc *cb,
73 void *opaque,
8c5873d6 74 bool is_write);
b2a61371 75static void coroutine_fn bdrv_co_do_rw(void *opaque);
ec530c81 76
1b7bdbc1
SH
77static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
78 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 79
8a22f02a
SH
80static QLIST_HEAD(, BlockDriver) bdrv_drivers =
81 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 82
f9092b10
MA
83/* The device to use for VM snapshots */
84static BlockDriverState *bs_snapshots;
85
eb852011
MA
86/* If non-zero, use only whitelisted block drivers */
87static int use_bdrv_whitelist;
88
9e0b22f4
SH
89#ifdef _WIN32
90static int is_windows_drive_prefix(const char *filename)
91{
92 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
93 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
94 filename[1] == ':');
95}
96
97int is_windows_drive(const char *filename)
98{
99 if (is_windows_drive_prefix(filename) &&
100 filename[2] == '\0')
101 return 1;
102 if (strstart(filename, "\\\\.\\", NULL) ||
103 strstart(filename, "//./", NULL))
104 return 1;
105 return 0;
106}
107#endif
108
0563e191
ZYW
109/* throttling disk I/O limits */
110static void bdrv_block_timer(void *opaque)
111{
112 BlockDriverState *bs = opaque;
113
114 qemu_co_queue_next(&bs->throttled_reqs);
115}
116
117void bdrv_io_limits_enable(BlockDriverState *bs)
118{
119 qemu_co_queue_init(&bs->throttled_reqs);
120 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
121 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
122 bs->slice_start = qemu_get_clock_ns(vm_clock);
123 bs->slice_end = bs->slice_start + bs->slice_time;
124 memset(&bs->io_base, 0, sizeof(bs->io_base));
125 bs->io_limits_enabled = true;
126}
127
128bool bdrv_io_limits_enabled(BlockDriverState *bs)
129{
130 BlockIOLimit *io_limits = &bs->io_limits;
131 return io_limits->bps[BLOCK_IO_LIMIT_READ]
132 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
133 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
134 || io_limits->iops[BLOCK_IO_LIMIT_READ]
135 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
136 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
137}
138
9e0b22f4
SH
139/* check if the path starts with "<protocol>:" */
140static int path_has_protocol(const char *path)
141{
142#ifdef _WIN32
143 if (is_windows_drive(path) ||
144 is_windows_drive_prefix(path)) {
145 return 0;
146 }
147#endif
148
149 return strchr(path, ':') != NULL;
150}
151
83f64091 152int path_is_absolute(const char *path)
3b0d4f61 153{
83f64091 154 const char *p;
21664424
FB
155#ifdef _WIN32
156 /* specific case for names like: "\\.\d:" */
157 if (*path == '/' || *path == '\\')
158 return 1;
159#endif
83f64091
FB
160 p = strchr(path, ':');
161 if (p)
162 p++;
163 else
164 p = path;
3b9f94e1
FB
165#ifdef _WIN32
166 return (*p == '/' || *p == '\\');
167#else
168 return (*p == '/');
169#endif
3b0d4f61
FB
170}
171
83f64091
FB
172/* if filename is absolute, just copy it to dest. Otherwise, build a
173 path to it by considering it is relative to base_path. URL are
174 supported. */
175void path_combine(char *dest, int dest_size,
176 const char *base_path,
177 const char *filename)
3b0d4f61 178{
83f64091
FB
179 const char *p, *p1;
180 int len;
181
182 if (dest_size <= 0)
183 return;
184 if (path_is_absolute(filename)) {
185 pstrcpy(dest, dest_size, filename);
186 } else {
187 p = strchr(base_path, ':');
188 if (p)
189 p++;
190 else
191 p = base_path;
3b9f94e1
FB
192 p1 = strrchr(base_path, '/');
193#ifdef _WIN32
194 {
195 const char *p2;
196 p2 = strrchr(base_path, '\\');
197 if (!p1 || p2 > p1)
198 p1 = p2;
199 }
200#endif
83f64091
FB
201 if (p1)
202 p1++;
203 else
204 p1 = base_path;
205 if (p1 > p)
206 p = p1;
207 len = p - base_path;
208 if (len > dest_size - 1)
209 len = dest_size - 1;
210 memcpy(dest, base_path, len);
211 dest[len] = '\0';
212 pstrcat(dest, dest_size, filename);
3b0d4f61 213 }
3b0d4f61
FB
214}
215
5efa9d5a 216void bdrv_register(BlockDriver *bdrv)
ea2384d3 217{
8c5873d6
SH
218 /* Block drivers without coroutine functions need emulation */
219 if (!bdrv->bdrv_co_readv) {
f9f05dc5
KW
220 bdrv->bdrv_co_readv = bdrv_co_readv_em;
221 bdrv->bdrv_co_writev = bdrv_co_writev_em;
222
f8c35c1d
SH
223 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
224 * the block driver lacks aio we need to emulate that too.
225 */
f9f05dc5
KW
226 if (!bdrv->bdrv_aio_readv) {
227 /* add AIO emulation layer */
228 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
229 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
f9f05dc5 230 }
83f64091 231 }
b2e12bc6 232
8a22f02a 233 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 234}
b338082b
FB
235
236/* create a new block device (by default it is empty) */
237BlockDriverState *bdrv_new(const char *device_name)
238{
1b7bdbc1 239 BlockDriverState *bs;
b338082b 240
7267c094 241 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 242 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 243 if (device_name[0] != '\0') {
1b7bdbc1 244 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 245 }
28a7282a 246 bdrv_iostatus_disable(bs);
b338082b
FB
247 return bs;
248}
249
ea2384d3
FB
250BlockDriver *bdrv_find_format(const char *format_name)
251{
252 BlockDriver *drv1;
8a22f02a
SH
253 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
254 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 255 return drv1;
8a22f02a 256 }
ea2384d3
FB
257 }
258 return NULL;
259}
260
eb852011
MA
261static int bdrv_is_whitelisted(BlockDriver *drv)
262{
263 static const char *whitelist[] = {
264 CONFIG_BDRV_WHITELIST
265 };
266 const char **p;
267
268 if (!whitelist[0])
269 return 1; /* no whitelist, anything goes */
270
271 for (p = whitelist; *p; p++) {
272 if (!strcmp(drv->format_name, *p)) {
273 return 1;
274 }
275 }
276 return 0;
277}
278
279BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
280{
281 BlockDriver *drv = bdrv_find_format(format_name);
282 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
283}
284
0e7e1989
KW
285int bdrv_create(BlockDriver *drv, const char* filename,
286 QEMUOptionParameter *options)
ea2384d3
FB
287{
288 if (!drv->bdrv_create)
289 return -ENOTSUP;
0e7e1989
KW
290
291 return drv->bdrv_create(filename, options);
ea2384d3
FB
292}
293
84a12e66
CH
294int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
295{
296 BlockDriver *drv;
297
b50cbabc 298 drv = bdrv_find_protocol(filename);
84a12e66 299 if (drv == NULL) {
16905d71 300 return -ENOENT;
84a12e66
CH
301 }
302
303 return bdrv_create(drv, filename, options);
304}
305
d5249393 306#ifdef _WIN32
95389c86 307void get_tmp_filename(char *filename, int size)
d5249393 308{
3b9f94e1 309 char temp_dir[MAX_PATH];
3b46e624 310
3b9f94e1
FB
311 GetTempPath(MAX_PATH, temp_dir);
312 GetTempFileName(temp_dir, "qem", 0, filename);
d5249393
FB
313}
314#else
95389c86 315void get_tmp_filename(char *filename, int size)
fc01f7e7 316{
67b915a5 317 int fd;
7ccfb2eb 318 const char *tmpdir;
d5249393 319 /* XXX: race condition possible */
0badc1ee
AJ
320 tmpdir = getenv("TMPDIR");
321 if (!tmpdir)
322 tmpdir = "/tmp";
323 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
ea2384d3
FB
324 fd = mkstemp(filename);
325 close(fd);
326}
d5249393 327#endif
fc01f7e7 328
84a12e66
CH
329/*
330 * Detect host devices. By convention, /dev/cdrom[N] is always
331 * recognized as a host CDROM.
332 */
333static BlockDriver *find_hdev_driver(const char *filename)
334{
335 int score_max = 0, score;
336 BlockDriver *drv = NULL, *d;
337
338 QLIST_FOREACH(d, &bdrv_drivers, list) {
339 if (d->bdrv_probe_device) {
340 score = d->bdrv_probe_device(filename);
341 if (score > score_max) {
342 score_max = score;
343 drv = d;
344 }
345 }
346 }
347
348 return drv;
349}
350
b50cbabc 351BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
352{
353 BlockDriver *drv1;
354 char protocol[128];
1cec71e3 355 int len;
83f64091 356 const char *p;
19cb3738 357
66f82cee
KW
358 /* TODO Drivers without bdrv_file_open must be specified explicitly */
359
39508e7a
CH
360 /*
361 * XXX(hch): we really should not let host device detection
362 * override an explicit protocol specification, but moving this
363 * later breaks access to device names with colons in them.
364 * Thanks to the brain-dead persistent naming schemes on udev-
365 * based Linux systems those actually are quite common.
366 */
367 drv1 = find_hdev_driver(filename);
368 if (drv1) {
369 return drv1;
370 }
371
9e0b22f4 372 if (!path_has_protocol(filename)) {
39508e7a 373 return bdrv_find_format("file");
84a12e66 374 }
9e0b22f4
SH
375 p = strchr(filename, ':');
376 assert(p != NULL);
1cec71e3
AL
377 len = p - filename;
378 if (len > sizeof(protocol) - 1)
379 len = sizeof(protocol) - 1;
380 memcpy(protocol, filename, len);
381 protocol[len] = '\0';
8a22f02a 382 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 383 if (drv1->protocol_name &&
8a22f02a 384 !strcmp(drv1->protocol_name, protocol)) {
83f64091 385 return drv1;
8a22f02a 386 }
83f64091
FB
387 }
388 return NULL;
389}
390
c98ac35d 391static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
392{
393 int ret, score, score_max;
394 BlockDriver *drv1, *drv;
395 uint8_t buf[2048];
396 BlockDriverState *bs;
397
f5edb014 398 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
399 if (ret < 0) {
400 *pdrv = NULL;
401 return ret;
402 }
f8ea0b00 403
08a00559
KW
404 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
405 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 406 bdrv_delete(bs);
c98ac35d
SW
407 drv = bdrv_find_format("raw");
408 if (!drv) {
409 ret = -ENOENT;
410 }
411 *pdrv = drv;
412 return ret;
1a396859 413 }
f8ea0b00 414
83f64091
FB
415 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
416 bdrv_delete(bs);
417 if (ret < 0) {
c98ac35d
SW
418 *pdrv = NULL;
419 return ret;
83f64091
FB
420 }
421
ea2384d3 422 score_max = 0;
84a12e66 423 drv = NULL;
8a22f02a 424 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
425 if (drv1->bdrv_probe) {
426 score = drv1->bdrv_probe(buf, ret, filename);
427 if (score > score_max) {
428 score_max = score;
429 drv = drv1;
430 }
0849bf08 431 }
fc01f7e7 432 }
c98ac35d
SW
433 if (!drv) {
434 ret = -ENOENT;
435 }
436 *pdrv = drv;
437 return ret;
ea2384d3
FB
438}
439
51762288
SH
440/**
441 * Set the current 'total_sectors' value
442 */
443static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
444{
445 BlockDriver *drv = bs->drv;
446
396759ad
NB
447 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
448 if (bs->sg)
449 return 0;
450
51762288
SH
451 /* query actual device if possible, otherwise just trust the hint */
452 if (drv->bdrv_getlength) {
453 int64_t length = drv->bdrv_getlength(bs);
454 if (length < 0) {
455 return length;
456 }
457 hint = length >> BDRV_SECTOR_BITS;
458 }
459
460 bs->total_sectors = hint;
461 return 0;
462}
463
c3993cdc
SH
464/**
465 * Set open flags for a given cache mode
466 *
467 * Return 0 on success, -1 if the cache mode was invalid.
468 */
469int bdrv_parse_cache_flags(const char *mode, int *flags)
470{
471 *flags &= ~BDRV_O_CACHE_MASK;
472
473 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
474 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
475 } else if (!strcmp(mode, "directsync")) {
476 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
477 } else if (!strcmp(mode, "writeback")) {
478 *flags |= BDRV_O_CACHE_WB;
479 } else if (!strcmp(mode, "unsafe")) {
480 *flags |= BDRV_O_CACHE_WB;
481 *flags |= BDRV_O_NO_FLUSH;
482 } else if (!strcmp(mode, "writethrough")) {
483 /* this is the default */
484 } else {
485 return -1;
486 }
487
488 return 0;
489}
490
57915332
KW
491/*
492 * Common part for opening disk images and files
493 */
494static int bdrv_open_common(BlockDriverState *bs, const char *filename,
495 int flags, BlockDriver *drv)
496{
497 int ret, open_flags;
498
499 assert(drv != NULL);
500
28dcee10
SH
501 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
502
66f82cee 503 bs->file = NULL;
51762288 504 bs->total_sectors = 0;
57915332
KW
505 bs->encrypted = 0;
506 bs->valid_key = 0;
03f541bd 507 bs->sg = 0;
57915332 508 bs->open_flags = flags;
03f541bd 509 bs->growable = 0;
57915332
KW
510 bs->buffer_alignment = 512;
511
512 pstrcpy(bs->filename, sizeof(bs->filename), filename);
03f541bd 513 bs->backing_file[0] = '\0';
57915332
KW
514
515 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
516 return -ENOTSUP;
517 }
518
519 bs->drv = drv;
7267c094 520 bs->opaque = g_malloc0(drv->instance_size);
57915332 521
03f541bd 522 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
57915332
KW
523
524 /*
525 * Clear flags that are internal to the block layer before opening the
526 * image.
527 */
528 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
529
530 /*
ebabb67a 531 * Snapshots should be writable.
57915332
KW
532 */
533 if (bs->is_temporary) {
534 open_flags |= BDRV_O_RDWR;
535 }
536
e7c63796
SH
537 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
538
66f82cee
KW
539 /* Open the image, either directly or using a protocol */
540 if (drv->bdrv_file_open) {
541 ret = drv->bdrv_file_open(bs, filename, open_flags);
542 } else {
543 ret = bdrv_file_open(&bs->file, filename, open_flags);
544 if (ret >= 0) {
545 ret = drv->bdrv_open(bs, open_flags);
546 }
547 }
548
57915332
KW
549 if (ret < 0) {
550 goto free_and_fail;
551 }
552
51762288
SH
553 ret = refresh_total_sectors(bs, bs->total_sectors);
554 if (ret < 0) {
555 goto free_and_fail;
57915332 556 }
51762288 557
57915332
KW
558#ifndef _WIN32
559 if (bs->is_temporary) {
560 unlink(filename);
561 }
562#endif
563 return 0;
564
565free_and_fail:
66f82cee
KW
566 if (bs->file) {
567 bdrv_delete(bs->file);
568 bs->file = NULL;
569 }
7267c094 570 g_free(bs->opaque);
57915332
KW
571 bs->opaque = NULL;
572 bs->drv = NULL;
573 return ret;
574}
575
b6ce07aa
KW
576/*
577 * Opens a file using a protocol (file, host_device, nbd, ...)
578 */
83f64091 579int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 580{
83f64091 581 BlockDriverState *bs;
6db95603 582 BlockDriver *drv;
83f64091
FB
583 int ret;
584
b50cbabc 585 drv = bdrv_find_protocol(filename);
6db95603
CH
586 if (!drv) {
587 return -ENOENT;
588 }
589
83f64091 590 bs = bdrv_new("");
b6ce07aa 591 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
592 if (ret < 0) {
593 bdrv_delete(bs);
594 return ret;
3b0d4f61 595 }
71d0770c 596 bs->growable = 1;
83f64091
FB
597 *pbs = bs;
598 return 0;
599}
600
b6ce07aa
KW
601/*
602 * Opens a disk image (raw, qcow2, vmdk, ...)
603 */
d6e9098e
KW
604int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
605 BlockDriver *drv)
ea2384d3 606{
b6ce07aa 607 int ret;
2b572816 608 char tmp_filename[PATH_MAX];
712e7874 609
83f64091 610 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
611 BlockDriverState *bs1;
612 int64_t total_size;
7c96d46e 613 int is_protocol = 0;
91a073a9
KW
614 BlockDriver *bdrv_qcow2;
615 QEMUOptionParameter *options;
b6ce07aa 616 char backing_filename[PATH_MAX];
3b46e624 617
ea2384d3
FB
618 /* if snapshot, we create a temporary backing file and open it
619 instead of opening 'filename' directly */
33e3963e 620
ea2384d3
FB
621 /* if there is a backing file, use it */
622 bs1 = bdrv_new("");
d6e9098e 623 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 624 if (ret < 0) {
ea2384d3 625 bdrv_delete(bs1);
51d7c00c 626 return ret;
ea2384d3 627 }
3e82990b 628 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
629
630 if (bs1->drv && bs1->drv->protocol_name)
631 is_protocol = 1;
632
ea2384d3 633 bdrv_delete(bs1);
3b46e624 634
ea2384d3 635 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
7c96d46e
AL
636
637 /* Real path is meaningless for protocols */
638 if (is_protocol)
639 snprintf(backing_filename, sizeof(backing_filename),
640 "%s", filename);
114cdfa9
KS
641 else if (!realpath(filename, backing_filename))
642 return -errno;
7c96d46e 643
91a073a9
KW
644 bdrv_qcow2 = bdrv_find_format("qcow2");
645 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
646
3e82990b 647 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
648 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
649 if (drv) {
650 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
651 drv->format_name);
652 }
653
654 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 655 free_option_parameters(options);
51d7c00c
AL
656 if (ret < 0) {
657 return ret;
ea2384d3 658 }
91a073a9 659
ea2384d3 660 filename = tmp_filename;
91a073a9 661 drv = bdrv_qcow2;
ea2384d3
FB
662 bs->is_temporary = 1;
663 }
712e7874 664
b6ce07aa 665 /* Find the right image format driver */
6db95603 666 if (!drv) {
c98ac35d 667 ret = find_image_format(filename, &drv);
51d7c00c 668 }
6987307c 669
51d7c00c 670 if (!drv) {
51d7c00c 671 goto unlink_and_fail;
ea2384d3 672 }
b6ce07aa
KW
673
674 /* Open the image */
675 ret = bdrv_open_common(bs, filename, flags, drv);
676 if (ret < 0) {
6987307c
CH
677 goto unlink_and_fail;
678 }
679
b6ce07aa
KW
680 /* If there is a backing file, use it */
681 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
682 char backing_filename[PATH_MAX];
683 int back_flags;
684 BlockDriver *back_drv = NULL;
685
686 bs->backing_hd = bdrv_new("");
df2dbb4a
SH
687
688 if (path_has_protocol(bs->backing_file)) {
689 pstrcpy(backing_filename, sizeof(backing_filename),
690 bs->backing_file);
691 } else {
692 path_combine(backing_filename, sizeof(backing_filename),
693 filename, bs->backing_file);
694 }
695
696 if (bs->backing_format[0] != '\0') {
b6ce07aa 697 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 698 }
b6ce07aa
KW
699
700 /* backing files always opened read-only */
701 back_flags =
702 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
703
704 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
705 if (ret < 0) {
706 bdrv_close(bs);
707 return ret;
708 }
709 if (bs->is_temporary) {
710 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
711 } else {
712 /* base image inherits from "parent" */
713 bs->backing_hd->keep_read_only = bs->keep_read_only;
714 }
715 }
716
717 if (!bdrv_key_required(bs)) {
7d4b4ba5 718 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
719 }
720
721 return 0;
722
723unlink_and_fail:
724 if (bs->is_temporary) {
725 unlink(filename);
726 }
727 return ret;
728}
729
fc01f7e7
FB
730void bdrv_close(BlockDriverState *bs)
731{
19cb3738 732 if (bs->drv) {
f9092b10
MA
733 if (bs == bs_snapshots) {
734 bs_snapshots = NULL;
735 }
557df6ac 736 if (bs->backing_hd) {
ea2384d3 737 bdrv_delete(bs->backing_hd);
557df6ac
SH
738 bs->backing_hd = NULL;
739 }
ea2384d3 740 bs->drv->bdrv_close(bs);
7267c094 741 g_free(bs->opaque);
ea2384d3
FB
742#ifdef _WIN32
743 if (bs->is_temporary) {
744 unlink(bs->filename);
745 }
67b915a5 746#endif
ea2384d3
FB
747 bs->opaque = NULL;
748 bs->drv = NULL;
b338082b 749
66f82cee
KW
750 if (bs->file != NULL) {
751 bdrv_close(bs->file);
752 }
753
7d4b4ba5 754 bdrv_dev_change_media_cb(bs, false);
b338082b
FB
755 }
756}
757
2bc93fed
MK
758void bdrv_close_all(void)
759{
760 BlockDriverState *bs;
761
762 QTAILQ_FOREACH(bs, &bdrv_states, list) {
763 bdrv_close(bs);
764 }
765}
766
d22b2f41
RH
767/* make a BlockDriverState anonymous by removing from bdrv_state list.
768 Also, NULL terminate the device_name to prevent double remove */
769void bdrv_make_anon(BlockDriverState *bs)
770{
771 if (bs->device_name[0] != '\0') {
772 QTAILQ_REMOVE(&bdrv_states, bs, list);
773 }
774 bs->device_name[0] = '\0';
775}
776
b338082b
FB
777void bdrv_delete(BlockDriverState *bs)
778{
fa879d62 779 assert(!bs->dev);
18846dee 780
1b7bdbc1 781 /* remove from list, if necessary */
d22b2f41 782 bdrv_make_anon(bs);
34c6f050 783
b338082b 784 bdrv_close(bs);
66f82cee
KW
785 if (bs->file != NULL) {
786 bdrv_delete(bs->file);
787 }
788
f9092b10 789 assert(bs != bs_snapshots);
7267c094 790 g_free(bs);
fc01f7e7
FB
791}
792
fa879d62
MA
793int bdrv_attach_dev(BlockDriverState *bs, void *dev)
794/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 795{
fa879d62 796 if (bs->dev) {
18846dee
MA
797 return -EBUSY;
798 }
fa879d62 799 bs->dev = dev;
28a7282a 800 bdrv_iostatus_reset(bs);
18846dee
MA
801 return 0;
802}
803
fa879d62
MA
804/* TODO qdevified devices don't use this, remove when devices are qdevified */
805void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 806{
fa879d62
MA
807 if (bdrv_attach_dev(bs, dev) < 0) {
808 abort();
809 }
810}
811
812void bdrv_detach_dev(BlockDriverState *bs, void *dev)
813/* TODO change to DeviceState *dev when all users are qdevified */
814{
815 assert(bs->dev == dev);
816 bs->dev = NULL;
0e49de52
MA
817 bs->dev_ops = NULL;
818 bs->dev_opaque = NULL;
29e05f20 819 bs->buffer_alignment = 512;
18846dee
MA
820}
821
fa879d62
MA
822/* TODO change to return DeviceState * when all users are qdevified */
823void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 824{
fa879d62 825 return bs->dev;
18846dee
MA
826}
827
0e49de52
MA
828void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
829 void *opaque)
830{
831 bs->dev_ops = ops;
832 bs->dev_opaque = opaque;
2c6942fa
MA
833 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
834 bs_snapshots = NULL;
835 }
0e49de52
MA
836}
837
7d4b4ba5 838static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 839{
145feb17 840 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
7d4b4ba5 841 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
145feb17
MA
842 }
843}
844
2c6942fa
MA
845bool bdrv_dev_has_removable_media(BlockDriverState *bs)
846{
847 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
848}
849
025ccaa7
PB
850void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
851{
852 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
853 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
854 }
855}
856
e4def80b
MA
857bool bdrv_dev_is_tray_open(BlockDriverState *bs)
858{
859 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
860 return bs->dev_ops->is_tray_open(bs->dev_opaque);
861 }
862 return false;
863}
864
145feb17
MA
865static void bdrv_dev_resize_cb(BlockDriverState *bs)
866{
867 if (bs->dev_ops && bs->dev_ops->resize_cb) {
868 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
869 }
870}
871
f107639a
MA
872bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
873{
874 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
875 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
876 }
877 return false;
878}
879
e97fc193
AL
880/*
881 * Run consistency checks on an image
882 *
e076f338 883 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 884 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 885 * check are stored in res.
e97fc193 886 */
e076f338 887int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
e97fc193
AL
888{
889 if (bs->drv->bdrv_check == NULL) {
890 return -ENOTSUP;
891 }
892
e076f338 893 memset(res, 0, sizeof(*res));
9ac228e0 894 return bs->drv->bdrv_check(bs, res);
e97fc193
AL
895}
896
8a426614
KW
897#define COMMIT_BUF_SECTORS 2048
898
33e3963e
FB
899/* commit COW file into the raw image */
900int bdrv_commit(BlockDriverState *bs)
901{
19cb3738 902 BlockDriver *drv = bs->drv;
ee181196 903 BlockDriver *backing_drv;
8a426614
KW
904 int64_t sector, total_sectors;
905 int n, ro, open_flags;
4dca4b63 906 int ret = 0, rw_ret = 0;
8a426614 907 uint8_t *buf;
4dca4b63
NS
908 char filename[1024];
909 BlockDriverState *bs_rw, *bs_ro;
33e3963e 910
19cb3738
FB
911 if (!drv)
912 return -ENOMEDIUM;
4dca4b63
NS
913
914 if (!bs->backing_hd) {
915 return -ENOTSUP;
33e3963e
FB
916 }
917
4dca4b63
NS
918 if (bs->backing_hd->keep_read_only) {
919 return -EACCES;
920 }
ee181196
KW
921
922 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
923 ro = bs->backing_hd->read_only;
924 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
925 open_flags = bs->backing_hd->open_flags;
926
927 if (ro) {
928 /* re-open as RW */
929 bdrv_delete(bs->backing_hd);
930 bs->backing_hd = NULL;
931 bs_rw = bdrv_new("");
ee181196
KW
932 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
933 backing_drv);
4dca4b63
NS
934 if (rw_ret < 0) {
935 bdrv_delete(bs_rw);
936 /* try to re-open read-only */
937 bs_ro = bdrv_new("");
ee181196
KW
938 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
939 backing_drv);
4dca4b63
NS
940 if (ret < 0) {
941 bdrv_delete(bs_ro);
942 /* drive not functional anymore */
943 bs->drv = NULL;
944 return ret;
945 }
946 bs->backing_hd = bs_ro;
947 return rw_ret;
948 }
949 bs->backing_hd = bs_rw;
ea2384d3 950 }
33e3963e 951
6ea44308 952 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 953 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
954
955 for (sector = 0; sector < total_sectors; sector += n) {
956 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
957
958 if (bdrv_read(bs, sector, buf, n) != 0) {
959 ret = -EIO;
960 goto ro_cleanup;
961 }
962
963 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
964 ret = -EIO;
965 goto ro_cleanup;
966 }
ea2384d3 967 }
33e3963e 968 }
95389c86 969
1d44952f
CH
970 if (drv->bdrv_make_empty) {
971 ret = drv->bdrv_make_empty(bs);
972 bdrv_flush(bs);
973 }
95389c86 974
3f5075ae
CH
975 /*
976 * Make sure all data we wrote to the backing device is actually
977 * stable on disk.
978 */
979 if (bs->backing_hd)
980 bdrv_flush(bs->backing_hd);
4dca4b63
NS
981
982ro_cleanup:
7267c094 983 g_free(buf);
4dca4b63
NS
984
985 if (ro) {
986 /* re-open as RO */
987 bdrv_delete(bs->backing_hd);
988 bs->backing_hd = NULL;
989 bs_ro = bdrv_new("");
ee181196
KW
990 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
991 backing_drv);
4dca4b63
NS
992 if (ret < 0) {
993 bdrv_delete(bs_ro);
994 /* drive not functional anymore */
995 bs->drv = NULL;
996 return ret;
997 }
998 bs->backing_hd = bs_ro;
999 bs->backing_hd->keep_read_only = 0;
1000 }
1001
1d44952f 1002 return ret;
33e3963e
FB
1003}
1004
6ab4b5ab
MA
1005void bdrv_commit_all(void)
1006{
1007 BlockDriverState *bs;
1008
1009 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1010 bdrv_commit(bs);
1011 }
1012}
1013
756e6736
KW
1014/*
1015 * Return values:
1016 * 0 - success
1017 * -EINVAL - backing format specified, but no file
1018 * -ENOSPC - can't update the backing file because no space is left in the
1019 * image file header
1020 * -ENOTSUP - format driver doesn't support changing the backing file
1021 */
1022int bdrv_change_backing_file(BlockDriverState *bs,
1023 const char *backing_file, const char *backing_fmt)
1024{
1025 BlockDriver *drv = bs->drv;
1026
1027 if (drv->bdrv_change_backing_file != NULL) {
1028 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1029 } else {
1030 return -ENOTSUP;
1031 }
1032}
1033
71d0770c
AL
1034static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1035 size_t size)
1036{
1037 int64_t len;
1038
1039 if (!bdrv_is_inserted(bs))
1040 return -ENOMEDIUM;
1041
1042 if (bs->growable)
1043 return 0;
1044
1045 len = bdrv_getlength(bs);
1046
fbb7b4e0
KW
1047 if (offset < 0)
1048 return -EIO;
1049
1050 if ((offset > len) || (len - offset < size))
71d0770c
AL
1051 return -EIO;
1052
1053 return 0;
1054}
1055
1056static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1057 int nb_sectors)
1058{
eb5a3165
JS
1059 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1060 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1061}
1062
1c9805a3
SH
1063typedef struct RwCo {
1064 BlockDriverState *bs;
1065 int64_t sector_num;
1066 int nb_sectors;
1067 QEMUIOVector *qiov;
1068 bool is_write;
1069 int ret;
1070} RwCo;
1071
1072static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 1073{
1c9805a3 1074 RwCo *rwco = opaque;
ea2384d3 1075
1c9805a3
SH
1076 if (!rwco->is_write) {
1077 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1078 rwco->nb_sectors, rwco->qiov);
1079 } else {
1080 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1081 rwco->nb_sectors, rwco->qiov);
1082 }
1083}
e7a8a783 1084
1c9805a3
SH
1085/*
1086 * Process a synchronous request using coroutines
1087 */
1088static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1089 int nb_sectors, bool is_write)
1090{
1091 QEMUIOVector qiov;
1092 struct iovec iov = {
1093 .iov_base = (void *)buf,
1094 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1095 };
1096 Coroutine *co;
1097 RwCo rwco = {
1098 .bs = bs,
1099 .sector_num = sector_num,
1100 .nb_sectors = nb_sectors,
1101 .qiov = &qiov,
1102 .is_write = is_write,
1103 .ret = NOT_DONE,
1104 };
e7a8a783 1105
1c9805a3 1106 qemu_iovec_init_external(&qiov, &iov, 1);
e7a8a783 1107
1c9805a3
SH
1108 if (qemu_in_coroutine()) {
1109 /* Fast-path if already in coroutine context */
1110 bdrv_rw_co_entry(&rwco);
1111 } else {
1112 co = qemu_coroutine_create(bdrv_rw_co_entry);
1113 qemu_coroutine_enter(co, &rwco);
1114 while (rwco.ret == NOT_DONE) {
1115 qemu_aio_wait();
1116 }
1117 }
1118 return rwco.ret;
1119}
b338082b 1120
1c9805a3
SH
1121/* return < 0 if error. See bdrv_write() for the return codes */
1122int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1123 uint8_t *buf, int nb_sectors)
1124{
1125 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
fc01f7e7
FB
1126}
1127
7cd1e32a 1128static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1129 int nb_sectors, int dirty)
7cd1e32a 1130{
1131 int64_t start, end;
c6d22830 1132 unsigned long val, idx, bit;
a55eb92c 1133
6ea44308 1134 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1135 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1136
1137 for (; start <= end; start++) {
c6d22830
JK
1138 idx = start / (sizeof(unsigned long) * 8);
1139 bit = start % (sizeof(unsigned long) * 8);
1140 val = bs->dirty_bitmap[idx];
1141 if (dirty) {
6d59fec1 1142 if (!(val & (1UL << bit))) {
aaa0eb75 1143 bs->dirty_count++;
6d59fec1 1144 val |= 1UL << bit;
aaa0eb75 1145 }
c6d22830 1146 } else {
6d59fec1 1147 if (val & (1UL << bit)) {
aaa0eb75 1148 bs->dirty_count--;
6d59fec1 1149 val &= ~(1UL << bit);
aaa0eb75 1150 }
c6d22830
JK
1151 }
1152 bs->dirty_bitmap[idx] = val;
7cd1e32a 1153 }
1154}
1155
5fafdf24 1156/* Return < 0 if error. Important errors are:
19cb3738
FB
1157 -EIO generic I/O error (may happen for all errors)
1158 -ENOMEDIUM No media inserted.
1159 -EINVAL Invalid sector number or nb_sectors
1160 -EACCES Trying to write a read-only device
1161*/
5fafdf24 1162int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1163 const uint8_t *buf, int nb_sectors)
1164{
1c9805a3 1165 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
83f64091
FB
1166}
1167
eda578e5
AL
1168int bdrv_pread(BlockDriverState *bs, int64_t offset,
1169 void *buf, int count1)
83f64091 1170{
6ea44308 1171 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1172 int len, nb_sectors, count;
1173 int64_t sector_num;
9a8c4cce 1174 int ret;
83f64091
FB
1175
1176 count = count1;
1177 /* first read to align to sector start */
6ea44308 1178 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1179 if (len > count)
1180 len = count;
6ea44308 1181 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1182 if (len > 0) {
9a8c4cce
KW
1183 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1184 return ret;
6ea44308 1185 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1186 count -= len;
1187 if (count == 0)
1188 return count1;
1189 sector_num++;
1190 buf += len;
1191 }
1192
1193 /* read the sectors "in place" */
6ea44308 1194 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1195 if (nb_sectors > 0) {
9a8c4cce
KW
1196 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1197 return ret;
83f64091 1198 sector_num += nb_sectors;
6ea44308 1199 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1200 buf += len;
1201 count -= len;
1202 }
1203
1204 /* add data from the last sector */
1205 if (count > 0) {
9a8c4cce
KW
1206 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1207 return ret;
83f64091
FB
1208 memcpy(buf, tmp_buf, count);
1209 }
1210 return count1;
1211}
1212
eda578e5
AL
1213int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1214 const void *buf, int count1)
83f64091 1215{
6ea44308 1216 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1217 int len, nb_sectors, count;
1218 int64_t sector_num;
9a8c4cce 1219 int ret;
83f64091
FB
1220
1221 count = count1;
1222 /* first write to align to sector start */
6ea44308 1223 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1224 if (len > count)
1225 len = count;
6ea44308 1226 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1227 if (len > 0) {
9a8c4cce
KW
1228 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1229 return ret;
6ea44308 1230 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1231 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1232 return ret;
83f64091
FB
1233 count -= len;
1234 if (count == 0)
1235 return count1;
1236 sector_num++;
1237 buf += len;
1238 }
1239
1240 /* write the sectors "in place" */
6ea44308 1241 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1242 if (nb_sectors > 0) {
9a8c4cce
KW
1243 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1244 return ret;
83f64091 1245 sector_num += nb_sectors;
6ea44308 1246 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1247 buf += len;
1248 count -= len;
1249 }
1250
1251 /* add data from the last sector */
1252 if (count > 0) {
9a8c4cce
KW
1253 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1254 return ret;
83f64091 1255 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1256 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1257 return ret;
83f64091
FB
1258 }
1259 return count1;
1260}
83f64091 1261
f08145fe
KW
1262/*
1263 * Writes to the file and ensures that no writes are reordered across this
1264 * request (acts as a barrier)
1265 *
1266 * Returns 0 on success, -errno in error cases.
1267 */
1268int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1269 const void *buf, int count)
1270{
1271 int ret;
1272
1273 ret = bdrv_pwrite(bs, offset, buf, count);
1274 if (ret < 0) {
1275 return ret;
1276 }
1277
92196b2f
SH
1278 /* No flush needed for cache modes that use O_DSYNC */
1279 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
f08145fe
KW
1280 bdrv_flush(bs);
1281 }
1282
1283 return 0;
1284}
1285
c5fbe571
SH
1286/*
1287 * Handle a read request in coroutine context
1288 */
1289static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1290 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
da1fa91d
KW
1291{
1292 BlockDriver *drv = bs->drv;
1293
da1fa91d
KW
1294 if (!drv) {
1295 return -ENOMEDIUM;
1296 }
1297 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1298 return -EIO;
1299 }
1300
1301 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1302}
1303
c5fbe571 1304int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
1305 int nb_sectors, QEMUIOVector *qiov)
1306{
c5fbe571 1307 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 1308
c5fbe571
SH
1309 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1310}
1311
1312/*
1313 * Handle a write request in coroutine context
1314 */
1315static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1316 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1317{
1318 BlockDriver *drv = bs->drv;
6b7cb247 1319 int ret;
da1fa91d
KW
1320
1321 if (!bs->drv) {
1322 return -ENOMEDIUM;
1323 }
1324 if (bs->read_only) {
1325 return -EACCES;
1326 }
1327 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1328 return -EIO;
1329 }
1330
6b7cb247
SH
1331 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1332
da1fa91d
KW
1333 if (bs->dirty_bitmap) {
1334 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1335 }
1336
1337 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1338 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1339 }
1340
6b7cb247 1341 return ret;
da1fa91d
KW
1342}
1343
c5fbe571
SH
1344int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1345 int nb_sectors, QEMUIOVector *qiov)
1346{
1347 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1348
1349 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1350}
1351
83f64091
FB
1352/**
1353 * Truncate file to 'offset' bytes (needed only for file protocols)
1354 */
1355int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1356{
1357 BlockDriver *drv = bs->drv;
51762288 1358 int ret;
83f64091 1359 if (!drv)
19cb3738 1360 return -ENOMEDIUM;
83f64091
FB
1361 if (!drv->bdrv_truncate)
1362 return -ENOTSUP;
59f2689d
NS
1363 if (bs->read_only)
1364 return -EACCES;
8591675f
MT
1365 if (bdrv_in_use(bs))
1366 return -EBUSY;
51762288
SH
1367 ret = drv->bdrv_truncate(bs, offset);
1368 if (ret == 0) {
1369 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 1370 bdrv_dev_resize_cb(bs);
51762288
SH
1371 }
1372 return ret;
83f64091
FB
1373}
1374
4a1d5e1f
FZ
1375/**
1376 * Length of a allocated file in bytes. Sparse files are counted by actual
1377 * allocated space. Return < 0 if error or unknown.
1378 */
1379int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1380{
1381 BlockDriver *drv = bs->drv;
1382 if (!drv) {
1383 return -ENOMEDIUM;
1384 }
1385 if (drv->bdrv_get_allocated_file_size) {
1386 return drv->bdrv_get_allocated_file_size(bs);
1387 }
1388 if (bs->file) {
1389 return bdrv_get_allocated_file_size(bs->file);
1390 }
1391 return -ENOTSUP;
1392}
1393
83f64091
FB
1394/**
1395 * Length of a file in bytes. Return < 0 if error or unknown.
1396 */
1397int64_t bdrv_getlength(BlockDriverState *bs)
1398{
1399 BlockDriver *drv = bs->drv;
1400 if (!drv)
19cb3738 1401 return -ENOMEDIUM;
51762288 1402
2c6942fa 1403 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
1404 if (drv->bdrv_getlength) {
1405 return drv->bdrv_getlength(bs);
1406 }
83f64091 1407 }
46a4e4e6 1408 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
1409}
1410
19cb3738 1411/* return 0 as number of sectors if no device present or error */
96b8f136 1412void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 1413{
19cb3738
FB
1414 int64_t length;
1415 length = bdrv_getlength(bs);
1416 if (length < 0)
1417 length = 0;
1418 else
6ea44308 1419 length = length >> BDRV_SECTOR_BITS;
19cb3738 1420 *nb_sectors_ptr = length;
fc01f7e7 1421}
cf98951b 1422
f3d54fc4
AL
1423struct partition {
1424 uint8_t boot_ind; /* 0x80 - active */
1425 uint8_t head; /* starting head */
1426 uint8_t sector; /* starting sector */
1427 uint8_t cyl; /* starting cylinder */
1428 uint8_t sys_ind; /* What partition type */
1429 uint8_t end_head; /* end head */
1430 uint8_t end_sector; /* end sector */
1431 uint8_t end_cyl; /* end cylinder */
1432 uint32_t start_sect; /* starting sector counting from 0 */
1433 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 1434} QEMU_PACKED;
f3d54fc4
AL
1435
1436/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1437static int guess_disk_lchs(BlockDriverState *bs,
1438 int *pcylinders, int *pheads, int *psectors)
1439{
eb5a3165 1440 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
1441 int ret, i, heads, sectors, cylinders;
1442 struct partition *p;
1443 uint32_t nr_sects;
a38131b6 1444 uint64_t nb_sectors;
f3d54fc4
AL
1445
1446 bdrv_get_geometry(bs, &nb_sectors);
1447
1448 ret = bdrv_read(bs, 0, buf, 1);
1449 if (ret < 0)
1450 return -1;
1451 /* test msdos magic */
1452 if (buf[510] != 0x55 || buf[511] != 0xaa)
1453 return -1;
1454 for(i = 0; i < 4; i++) {
1455 p = ((struct partition *)(buf + 0x1be)) + i;
1456 nr_sects = le32_to_cpu(p->nr_sects);
1457 if (nr_sects && p->end_head) {
1458 /* We make the assumption that the partition terminates on
1459 a cylinder boundary */
1460 heads = p->end_head + 1;
1461 sectors = p->end_sector & 63;
1462 if (sectors == 0)
1463 continue;
1464 cylinders = nb_sectors / (heads * sectors);
1465 if (cylinders < 1 || cylinders > 16383)
1466 continue;
1467 *pheads = heads;
1468 *psectors = sectors;
1469 *pcylinders = cylinders;
1470#if 0
1471 printf("guessed geometry: LCHS=%d %d %d\n",
1472 cylinders, heads, sectors);
1473#endif
1474 return 0;
1475 }
1476 }
1477 return -1;
1478}
1479
1480void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1481{
1482 int translation, lba_detected = 0;
1483 int cylinders, heads, secs;
a38131b6 1484 uint64_t nb_sectors;
f3d54fc4
AL
1485
1486 /* if a geometry hint is available, use it */
1487 bdrv_get_geometry(bs, &nb_sectors);
1488 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1489 translation = bdrv_get_translation_hint(bs);
1490 if (cylinders != 0) {
1491 *pcyls = cylinders;
1492 *pheads = heads;
1493 *psecs = secs;
1494 } else {
1495 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1496 if (heads > 16) {
1497 /* if heads > 16, it means that a BIOS LBA
1498 translation was active, so the default
1499 hardware geometry is OK */
1500 lba_detected = 1;
1501 goto default_geometry;
1502 } else {
1503 *pcyls = cylinders;
1504 *pheads = heads;
1505 *psecs = secs;
1506 /* disable any translation to be in sync with
1507 the logical geometry */
1508 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1509 bdrv_set_translation_hint(bs,
1510 BIOS_ATA_TRANSLATION_NONE);
1511 }
1512 }
1513 } else {
1514 default_geometry:
1515 /* if no geometry, use a standard physical disk geometry */
1516 cylinders = nb_sectors / (16 * 63);
1517
1518 if (cylinders > 16383)
1519 cylinders = 16383;
1520 else if (cylinders < 2)
1521 cylinders = 2;
1522 *pcyls = cylinders;
1523 *pheads = 16;
1524 *psecs = 63;
1525 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1526 if ((*pcyls * *pheads) <= 131072) {
1527 bdrv_set_translation_hint(bs,
1528 BIOS_ATA_TRANSLATION_LARGE);
1529 } else {
1530 bdrv_set_translation_hint(bs,
1531 BIOS_ATA_TRANSLATION_LBA);
1532 }
1533 }
1534 }
1535 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1536 }
1537}
1538
5fafdf24 1539void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
1540 int cyls, int heads, int secs)
1541{
1542 bs->cyls = cyls;
1543 bs->heads = heads;
1544 bs->secs = secs;
1545}
1546
46d4767d
FB
1547void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1548{
1549 bs->translation = translation;
1550}
1551
5fafdf24 1552void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
1553 int *pcyls, int *pheads, int *psecs)
1554{
1555 *pcyls = bs->cyls;
1556 *pheads = bs->heads;
1557 *psecs = bs->secs;
1558}
1559
0563e191
ZYW
1560/* throttling disk io limits */
1561void bdrv_set_io_limits(BlockDriverState *bs,
1562 BlockIOLimit *io_limits)
1563{
1564 bs->io_limits = *io_limits;
1565 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
1566}
1567
5bbdbb46
BS
1568/* Recognize floppy formats */
1569typedef struct FDFormat {
1570 FDriveType drive;
1571 uint8_t last_sect;
1572 uint8_t max_track;
1573 uint8_t max_head;
1574} FDFormat;
1575
1576static const FDFormat fd_formats[] = {
1577 /* First entry is default format */
1578 /* 1.44 MB 3"1/2 floppy disks */
1579 { FDRIVE_DRV_144, 18, 80, 1, },
1580 { FDRIVE_DRV_144, 20, 80, 1, },
1581 { FDRIVE_DRV_144, 21, 80, 1, },
1582 { FDRIVE_DRV_144, 21, 82, 1, },
1583 { FDRIVE_DRV_144, 21, 83, 1, },
1584 { FDRIVE_DRV_144, 22, 80, 1, },
1585 { FDRIVE_DRV_144, 23, 80, 1, },
1586 { FDRIVE_DRV_144, 24, 80, 1, },
1587 /* 2.88 MB 3"1/2 floppy disks */
1588 { FDRIVE_DRV_288, 36, 80, 1, },
1589 { FDRIVE_DRV_288, 39, 80, 1, },
1590 { FDRIVE_DRV_288, 40, 80, 1, },
1591 { FDRIVE_DRV_288, 44, 80, 1, },
1592 { FDRIVE_DRV_288, 48, 80, 1, },
1593 /* 720 kB 3"1/2 floppy disks */
1594 { FDRIVE_DRV_144, 9, 80, 1, },
1595 { FDRIVE_DRV_144, 10, 80, 1, },
1596 { FDRIVE_DRV_144, 10, 82, 1, },
1597 { FDRIVE_DRV_144, 10, 83, 1, },
1598 { FDRIVE_DRV_144, 13, 80, 1, },
1599 { FDRIVE_DRV_144, 14, 80, 1, },
1600 /* 1.2 MB 5"1/4 floppy disks */
1601 { FDRIVE_DRV_120, 15, 80, 1, },
1602 { FDRIVE_DRV_120, 18, 80, 1, },
1603 { FDRIVE_DRV_120, 18, 82, 1, },
1604 { FDRIVE_DRV_120, 18, 83, 1, },
1605 { FDRIVE_DRV_120, 20, 80, 1, },
1606 /* 720 kB 5"1/4 floppy disks */
1607 { FDRIVE_DRV_120, 9, 80, 1, },
1608 { FDRIVE_DRV_120, 11, 80, 1, },
1609 /* 360 kB 5"1/4 floppy disks */
1610 { FDRIVE_DRV_120, 9, 40, 1, },
1611 { FDRIVE_DRV_120, 9, 40, 0, },
1612 { FDRIVE_DRV_120, 10, 41, 1, },
1613 { FDRIVE_DRV_120, 10, 42, 1, },
1614 /* 320 kB 5"1/4 floppy disks */
1615 { FDRIVE_DRV_120, 8, 40, 1, },
1616 { FDRIVE_DRV_120, 8, 40, 0, },
1617 /* 360 kB must match 5"1/4 better than 3"1/2... */
1618 { FDRIVE_DRV_144, 9, 80, 0, },
1619 /* end */
1620 { FDRIVE_DRV_NONE, -1, -1, 0, },
1621};
1622
1623void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1624 int *max_track, int *last_sect,
1625 FDriveType drive_in, FDriveType *drive)
1626{
1627 const FDFormat *parse;
1628 uint64_t nb_sectors, size;
1629 int i, first_match, match;
1630
1631 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1632 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1633 /* User defined disk */
1634 } else {
1635 bdrv_get_geometry(bs, &nb_sectors);
1636 match = -1;
1637 first_match = -1;
1638 for (i = 0; ; i++) {
1639 parse = &fd_formats[i];
1640 if (parse->drive == FDRIVE_DRV_NONE) {
1641 break;
1642 }
1643 if (drive_in == parse->drive ||
1644 drive_in == FDRIVE_DRV_NONE) {
1645 size = (parse->max_head + 1) * parse->max_track *
1646 parse->last_sect;
1647 if (nb_sectors == size) {
1648 match = i;
1649 break;
1650 }
1651 if (first_match == -1) {
1652 first_match = i;
1653 }
1654 }
1655 }
1656 if (match == -1) {
1657 if (first_match == -1) {
1658 match = 1;
1659 } else {
1660 match = first_match;
1661 }
1662 parse = &fd_formats[match];
1663 }
1664 *nb_heads = parse->max_head + 1;
1665 *max_track = parse->max_track;
1666 *last_sect = parse->last_sect;
1667 *drive = parse->drive;
1668 }
1669}
1670
46d4767d
FB
1671int bdrv_get_translation_hint(BlockDriverState *bs)
1672{
1673 return bs->translation;
1674}
1675
abd7f68d
MA
1676void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1677 BlockErrorAction on_write_error)
1678{
1679 bs->on_read_error = on_read_error;
1680 bs->on_write_error = on_write_error;
1681}
1682
1683BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1684{
1685 return is_read ? bs->on_read_error : bs->on_write_error;
1686}
1687
b338082b
FB
1688int bdrv_is_read_only(BlockDriverState *bs)
1689{
1690 return bs->read_only;
1691}
1692
985a03b0
TS
1693int bdrv_is_sg(BlockDriverState *bs)
1694{
1695 return bs->sg;
1696}
1697
e900a7b7
CH
1698int bdrv_enable_write_cache(BlockDriverState *bs)
1699{
1700 return bs->enable_write_cache;
1701}
1702
ea2384d3
FB
1703int bdrv_is_encrypted(BlockDriverState *bs)
1704{
1705 if (bs->backing_hd && bs->backing_hd->encrypted)
1706 return 1;
1707 return bs->encrypted;
1708}
1709
c0f4ce77
AL
1710int bdrv_key_required(BlockDriverState *bs)
1711{
1712 BlockDriverState *backing_hd = bs->backing_hd;
1713
1714 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1715 return 1;
1716 return (bs->encrypted && !bs->valid_key);
1717}
1718
ea2384d3
FB
1719int bdrv_set_key(BlockDriverState *bs, const char *key)
1720{
1721 int ret;
1722 if (bs->backing_hd && bs->backing_hd->encrypted) {
1723 ret = bdrv_set_key(bs->backing_hd, key);
1724 if (ret < 0)
1725 return ret;
1726 if (!bs->encrypted)
1727 return 0;
1728 }
fd04a2ae
SH
1729 if (!bs->encrypted) {
1730 return -EINVAL;
1731 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1732 return -ENOMEDIUM;
1733 }
c0f4ce77 1734 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
1735 if (ret < 0) {
1736 bs->valid_key = 0;
1737 } else if (!bs->valid_key) {
1738 bs->valid_key = 1;
1739 /* call the change callback now, we skipped it on open */
7d4b4ba5 1740 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 1741 }
c0f4ce77 1742 return ret;
ea2384d3
FB
1743}
1744
1745void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1746{
19cb3738 1747 if (!bs->drv) {
ea2384d3
FB
1748 buf[0] = '\0';
1749 } else {
1750 pstrcpy(buf, buf_size, bs->drv->format_name);
1751 }
1752}
1753
5fafdf24 1754void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
1755 void *opaque)
1756{
1757 BlockDriver *drv;
1758
8a22f02a 1759 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
1760 it(opaque, drv->format_name);
1761 }
1762}
1763
b338082b
FB
1764BlockDriverState *bdrv_find(const char *name)
1765{
1766 BlockDriverState *bs;
1767
1b7bdbc1
SH
1768 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1769 if (!strcmp(name, bs->device_name)) {
b338082b 1770 return bs;
1b7bdbc1 1771 }
b338082b
FB
1772 }
1773 return NULL;
1774}
1775
2f399b0a
MA
1776BlockDriverState *bdrv_next(BlockDriverState *bs)
1777{
1778 if (!bs) {
1779 return QTAILQ_FIRST(&bdrv_states);
1780 }
1781 return QTAILQ_NEXT(bs, list);
1782}
1783
51de9760 1784void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
1785{
1786 BlockDriverState *bs;
1787
1b7bdbc1 1788 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 1789 it(opaque, bs);
81d0912d
FB
1790 }
1791}
1792
ea2384d3
FB
1793const char *bdrv_get_device_name(BlockDriverState *bs)
1794{
1795 return bs->device_name;
1796}
1797
c6ca28d6
AL
1798void bdrv_flush_all(void)
1799{
1800 BlockDriverState *bs;
1801
1b7bdbc1 1802 QTAILQ_FOREACH(bs, &bdrv_states, list) {
c602a489 1803 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
c6ca28d6 1804 bdrv_flush(bs);
1b7bdbc1
SH
1805 }
1806 }
c6ca28d6
AL
1807}
1808
f2feebbd
KW
1809int bdrv_has_zero_init(BlockDriverState *bs)
1810{
1811 assert(bs->drv);
1812
336c1c12
KW
1813 if (bs->drv->bdrv_has_zero_init) {
1814 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
1815 }
1816
1817 return 1;
1818}
1819
f58c7b35
TS
1820/*
1821 * Returns true iff the specified sector is present in the disk image. Drivers
1822 * not implementing the functionality are assumed to not support backing files,
1823 * hence all their sectors are reported as allocated.
1824 *
1825 * 'pnum' is set to the number of sectors (including and immediately following
1826 * the specified sector) that are known to be in the same
1827 * allocated/unallocated state.
1828 *
1829 * 'nb_sectors' is the max value 'pnum' should be set to.
1830 */
1831int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1832 int *pnum)
1833{
1834 int64_t n;
1835 if (!bs->drv->bdrv_is_allocated) {
1836 if (sector_num >= bs->total_sectors) {
1837 *pnum = 0;
1838 return 0;
1839 }
1840 n = bs->total_sectors - sector_num;
1841 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1842 return 1;
1843 }
1844 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1845}
1846
2582bfed
LC
1847void bdrv_mon_event(const BlockDriverState *bdrv,
1848 BlockMonEventAction action, int is_read)
1849{
1850 QObject *data;
1851 const char *action_str;
1852
1853 switch (action) {
1854 case BDRV_ACTION_REPORT:
1855 action_str = "report";
1856 break;
1857 case BDRV_ACTION_IGNORE:
1858 action_str = "ignore";
1859 break;
1860 case BDRV_ACTION_STOP:
1861 action_str = "stop";
1862 break;
1863 default:
1864 abort();
1865 }
1866
1867 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1868 bdrv->device_name,
1869 action_str,
1870 is_read ? "read" : "write");
1871 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1872
1873 qobject_decref(data);
1874}
1875
b2023818 1876BlockInfoList *qmp_query_block(Error **errp)
b338082b 1877{
b2023818 1878 BlockInfoList *head = NULL, *cur_item = NULL;
b338082b
FB
1879 BlockDriverState *bs;
1880
1b7bdbc1 1881 QTAILQ_FOREACH(bs, &bdrv_states, list) {
b2023818 1882 BlockInfoList *info = g_malloc0(sizeof(*info));
d15e5465 1883
b2023818
LC
1884 info->value = g_malloc0(sizeof(*info->value));
1885 info->value->device = g_strdup(bs->device_name);
1886 info->value->type = g_strdup("unknown");
1887 info->value->locked = bdrv_dev_is_medium_locked(bs);
1888 info->value->removable = bdrv_dev_has_removable_media(bs);
d15e5465 1889
e4def80b 1890 if (bdrv_dev_has_removable_media(bs)) {
b2023818
LC
1891 info->value->has_tray_open = true;
1892 info->value->tray_open = bdrv_dev_is_tray_open(bs);
e4def80b 1893 }
f04ef601
LC
1894
1895 if (bdrv_iostatus_is_enabled(bs)) {
b2023818
LC
1896 info->value->has_io_status = true;
1897 info->value->io_status = bs->iostatus;
f04ef601
LC
1898 }
1899
19cb3738 1900 if (bs->drv) {
b2023818
LC
1901 info->value->has_inserted = true;
1902 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
1903 info->value->inserted->file = g_strdup(bs->filename);
1904 info->value->inserted->ro = bs->read_only;
1905 info->value->inserted->drv = g_strdup(bs->drv->format_name);
1906 info->value->inserted->encrypted = bs->encrypted;
1907 if (bs->backing_file[0]) {
1908 info->value->inserted->has_backing_file = true;
1909 info->value->inserted->backing_file = g_strdup(bs->backing_file);
376253ec 1910 }
b2023818 1911 }
d15e5465 1912
b2023818
LC
1913 /* XXX: waiting for the qapi to support GSList */
1914 if (!cur_item) {
1915 head = cur_item = info;
1916 } else {
1917 cur_item->next = info;
1918 cur_item = info;
b338082b 1919 }
b338082b 1920 }
d15e5465 1921
b2023818 1922 return head;
b338082b 1923}
a36e69dd 1924
f11f57e4
LC
1925/* Consider exposing this as a full fledged QMP command */
1926static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
1927{
1928 BlockStats *s;
1929
1930 s = g_malloc0(sizeof(*s));
1931
1932 if (bs->device_name[0]) {
1933 s->has_device = true;
1934 s->device = g_strdup(bs->device_name);
294cc35f
KW
1935 }
1936
f11f57e4
LC
1937 s->stats = g_malloc0(sizeof(*s->stats));
1938 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
1939 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
1940 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
1941 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
1942 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
1943 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
1944 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
1945 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
1946 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
1947
294cc35f 1948 if (bs->file) {
f11f57e4
LC
1949 s->has_parent = true;
1950 s->parent = qmp_query_blockstat(bs->file, NULL);
294cc35f
KW
1951 }
1952
f11f57e4 1953 return s;
294cc35f
KW
1954}
1955
f11f57e4 1956BlockStatsList *qmp_query_blockstats(Error **errp)
218a536a 1957{
f11f57e4 1958 BlockStatsList *head = NULL, *cur_item = NULL;
a36e69dd
TS
1959 BlockDriverState *bs;
1960
1b7bdbc1 1961 QTAILQ_FOREACH(bs, &bdrv_states, list) {
f11f57e4
LC
1962 BlockStatsList *info = g_malloc0(sizeof(*info));
1963 info->value = qmp_query_blockstat(bs, NULL);
1964
1965 /* XXX: waiting for the qapi to support GSList */
1966 if (!cur_item) {
1967 head = cur_item = info;
1968 } else {
1969 cur_item->next = info;
1970 cur_item = info;
1971 }
a36e69dd 1972 }
218a536a 1973
f11f57e4 1974 return head;
a36e69dd 1975}
ea2384d3 1976
045df330
AL
1977const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1978{
1979 if (bs->backing_hd && bs->backing_hd->encrypted)
1980 return bs->backing_file;
1981 else if (bs->encrypted)
1982 return bs->filename;
1983 else
1984 return NULL;
1985}
1986
5fafdf24 1987void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
1988 char *filename, int filename_size)
1989{
3574c608 1990 pstrcpy(filename, filename_size, bs->backing_file);
83f64091
FB
1991}
1992
5fafdf24 1993int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
1994 const uint8_t *buf, int nb_sectors)
1995{
1996 BlockDriver *drv = bs->drv;
1997 if (!drv)
19cb3738 1998 return -ENOMEDIUM;
faea38e7
FB
1999 if (!drv->bdrv_write_compressed)
2000 return -ENOTSUP;
fbb7b4e0
KW
2001 if (bdrv_check_request(bs, sector_num, nb_sectors))
2002 return -EIO;
a55eb92c 2003
c6d22830 2004 if (bs->dirty_bitmap) {
7cd1e32a 2005 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2006 }
a55eb92c 2007
faea38e7
FB
2008 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2009}
3b46e624 2010
faea38e7
FB
2011int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2012{
2013 BlockDriver *drv = bs->drv;
2014 if (!drv)
19cb3738 2015 return -ENOMEDIUM;
faea38e7
FB
2016 if (!drv->bdrv_get_info)
2017 return -ENOTSUP;
2018 memset(bdi, 0, sizeof(*bdi));
2019 return drv->bdrv_get_info(bs, bdi);
2020}
2021
45566e9c
CH
2022int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2023 int64_t pos, int size)
178e08a5
AL
2024{
2025 BlockDriver *drv = bs->drv;
2026 if (!drv)
2027 return -ENOMEDIUM;
7cdb1f6d
MK
2028 if (drv->bdrv_save_vmstate)
2029 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2030 if (bs->file)
2031 return bdrv_save_vmstate(bs->file, buf, pos, size);
2032 return -ENOTSUP;
178e08a5
AL
2033}
2034
45566e9c
CH
2035int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2036 int64_t pos, int size)
178e08a5
AL
2037{
2038 BlockDriver *drv = bs->drv;
2039 if (!drv)
2040 return -ENOMEDIUM;
7cdb1f6d
MK
2041 if (drv->bdrv_load_vmstate)
2042 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2043 if (bs->file)
2044 return bdrv_load_vmstate(bs->file, buf, pos, size);
2045 return -ENOTSUP;
178e08a5
AL
2046}
2047
8b9b0cc2
KW
2048void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2049{
2050 BlockDriver *drv = bs->drv;
2051
2052 if (!drv || !drv->bdrv_debug_event) {
2053 return;
2054 }
2055
2056 return drv->bdrv_debug_event(bs, event);
2057
2058}
2059
faea38e7
FB
2060/**************************************************************/
2061/* handling of snapshots */
2062
feeee5ac
MDCF
2063int bdrv_can_snapshot(BlockDriverState *bs)
2064{
2065 BlockDriver *drv = bs->drv;
07b70bfb 2066 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2067 return 0;
2068 }
2069
2070 if (!drv->bdrv_snapshot_create) {
2071 if (bs->file != NULL) {
2072 return bdrv_can_snapshot(bs->file);
2073 }
2074 return 0;
2075 }
2076
2077 return 1;
2078}
2079
199630b6
BS
2080int bdrv_is_snapshot(BlockDriverState *bs)
2081{
2082 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2083}
2084
f9092b10
MA
2085BlockDriverState *bdrv_snapshots(void)
2086{
2087 BlockDriverState *bs;
2088
3ac906f7 2089 if (bs_snapshots) {
f9092b10 2090 return bs_snapshots;
3ac906f7 2091 }
f9092b10
MA
2092
2093 bs = NULL;
2094 while ((bs = bdrv_next(bs))) {
2095 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2096 bs_snapshots = bs;
2097 return bs;
f9092b10
MA
2098 }
2099 }
2100 return NULL;
f9092b10
MA
2101}
2102
5fafdf24 2103int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2104 QEMUSnapshotInfo *sn_info)
2105{
2106 BlockDriver *drv = bs->drv;
2107 if (!drv)
19cb3738 2108 return -ENOMEDIUM;
7cdb1f6d
MK
2109 if (drv->bdrv_snapshot_create)
2110 return drv->bdrv_snapshot_create(bs, sn_info);
2111 if (bs->file)
2112 return bdrv_snapshot_create(bs->file, sn_info);
2113 return -ENOTSUP;
faea38e7
FB
2114}
2115
5fafdf24 2116int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2117 const char *snapshot_id)
2118{
2119 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2120 int ret, open_ret;
2121
faea38e7 2122 if (!drv)
19cb3738 2123 return -ENOMEDIUM;
7cdb1f6d
MK
2124 if (drv->bdrv_snapshot_goto)
2125 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2126
2127 if (bs->file) {
2128 drv->bdrv_close(bs);
2129 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2130 open_ret = drv->bdrv_open(bs, bs->open_flags);
2131 if (open_ret < 0) {
2132 bdrv_delete(bs->file);
2133 bs->drv = NULL;
2134 return open_ret;
2135 }
2136 return ret;
2137 }
2138
2139 return -ENOTSUP;
faea38e7
FB
2140}
2141
2142int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2143{
2144 BlockDriver *drv = bs->drv;
2145 if (!drv)
19cb3738 2146 return -ENOMEDIUM;
7cdb1f6d
MK
2147 if (drv->bdrv_snapshot_delete)
2148 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2149 if (bs->file)
2150 return bdrv_snapshot_delete(bs->file, snapshot_id);
2151 return -ENOTSUP;
faea38e7
FB
2152}
2153
5fafdf24 2154int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2155 QEMUSnapshotInfo **psn_info)
2156{
2157 BlockDriver *drv = bs->drv;
2158 if (!drv)
19cb3738 2159 return -ENOMEDIUM;
7cdb1f6d
MK
2160 if (drv->bdrv_snapshot_list)
2161 return drv->bdrv_snapshot_list(bs, psn_info);
2162 if (bs->file)
2163 return bdrv_snapshot_list(bs->file, psn_info);
2164 return -ENOTSUP;
faea38e7
FB
2165}
2166
51ef6727 2167int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2168 const char *snapshot_name)
2169{
2170 BlockDriver *drv = bs->drv;
2171 if (!drv) {
2172 return -ENOMEDIUM;
2173 }
2174 if (!bs->read_only) {
2175 return -EINVAL;
2176 }
2177 if (drv->bdrv_snapshot_load_tmp) {
2178 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2179 }
2180 return -ENOTSUP;
2181}
2182
faea38e7
FB
2183#define NB_SUFFIXES 4
2184
2185char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2186{
2187 static const char suffixes[NB_SUFFIXES] = "KMGT";
2188 int64_t base;
2189 int i;
2190
2191 if (size <= 999) {
2192 snprintf(buf, buf_size, "%" PRId64, size);
2193 } else {
2194 base = 1024;
2195 for(i = 0; i < NB_SUFFIXES; i++) {
2196 if (size < (10 * base)) {
5fafdf24 2197 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
2198 (double)size / base,
2199 suffixes[i]);
2200 break;
2201 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 2202 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
2203 ((size + (base >> 1)) / base),
2204 suffixes[i]);
2205 break;
2206 }
2207 base = base * 1024;
2208 }
2209 }
2210 return buf;
2211}
2212
2213char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2214{
2215 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
2216#ifdef _WIN32
2217 struct tm *ptm;
2218#else
faea38e7 2219 struct tm tm;
3b9f94e1 2220#endif
faea38e7
FB
2221 time_t ti;
2222 int64_t secs;
2223
2224 if (!sn) {
5fafdf24
TS
2225 snprintf(buf, buf_size,
2226 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2227 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2228 } else {
2229 ti = sn->date_sec;
3b9f94e1
FB
2230#ifdef _WIN32
2231 ptm = localtime(&ti);
2232 strftime(date_buf, sizeof(date_buf),
2233 "%Y-%m-%d %H:%M:%S", ptm);
2234#else
faea38e7
FB
2235 localtime_r(&ti, &tm);
2236 strftime(date_buf, sizeof(date_buf),
2237 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 2238#endif
faea38e7
FB
2239 secs = sn->vm_clock_nsec / 1000000000;
2240 snprintf(clock_buf, sizeof(clock_buf),
2241 "%02d:%02d:%02d.%03d",
2242 (int)(secs / 3600),
2243 (int)((secs / 60) % 60),
5fafdf24 2244 (int)(secs % 60),
faea38e7
FB
2245 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2246 snprintf(buf, buf_size,
5fafdf24 2247 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2248 sn->id_str, sn->name,
2249 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2250 date_buf,
2251 clock_buf);
2252 }
2253 return buf;
2254}
2255
ea2384d3 2256/**************************************************************/
83f64091 2257/* async I/Os */
ea2384d3 2258
3b69e4b9 2259BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 2260 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 2261 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 2262{
bbf0a440
SH
2263 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2264
b2a61371 2265 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2266 cb, opaque, false);
ea2384d3
FB
2267}
2268
f141eafe
AL
2269BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2270 QEMUIOVector *qiov, int nb_sectors,
2271 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 2272{
bbf0a440
SH
2273 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2274
1a6e115b 2275 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2276 cb, opaque, true);
83f64091
FB
2277}
2278
40b4f539
KW
2279
2280typedef struct MultiwriteCB {
2281 int error;
2282 int num_requests;
2283 int num_callbacks;
2284 struct {
2285 BlockDriverCompletionFunc *cb;
2286 void *opaque;
2287 QEMUIOVector *free_qiov;
2288 void *free_buf;
2289 } callbacks[];
2290} MultiwriteCB;
2291
2292static void multiwrite_user_cb(MultiwriteCB *mcb)
2293{
2294 int i;
2295
2296 for (i = 0; i < mcb->num_callbacks; i++) {
2297 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
2298 if (mcb->callbacks[i].free_qiov) {
2299 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2300 }
7267c094 2301 g_free(mcb->callbacks[i].free_qiov);
f8a83245 2302 qemu_vfree(mcb->callbacks[i].free_buf);
40b4f539
KW
2303 }
2304}
2305
2306static void multiwrite_cb(void *opaque, int ret)
2307{
2308 MultiwriteCB *mcb = opaque;
2309
6d519a5f
SH
2310 trace_multiwrite_cb(mcb, ret);
2311
cb6d3ca0 2312 if (ret < 0 && !mcb->error) {
40b4f539 2313 mcb->error = ret;
40b4f539
KW
2314 }
2315
2316 mcb->num_requests--;
2317 if (mcb->num_requests == 0) {
de189a1b 2318 multiwrite_user_cb(mcb);
7267c094 2319 g_free(mcb);
40b4f539
KW
2320 }
2321}
2322
2323static int multiwrite_req_compare(const void *a, const void *b)
2324{
77be4366
CH
2325 const BlockRequest *req1 = a, *req2 = b;
2326
2327 /*
2328 * Note that we can't simply subtract req2->sector from req1->sector
2329 * here as that could overflow the return value.
2330 */
2331 if (req1->sector > req2->sector) {
2332 return 1;
2333 } else if (req1->sector < req2->sector) {
2334 return -1;
2335 } else {
2336 return 0;
2337 }
40b4f539
KW
2338}
2339
2340/*
2341 * Takes a bunch of requests and tries to merge them. Returns the number of
2342 * requests that remain after merging.
2343 */
2344static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2345 int num_reqs, MultiwriteCB *mcb)
2346{
2347 int i, outidx;
2348
2349 // Sort requests by start sector
2350 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2351
2352 // Check if adjacent requests touch the same clusters. If so, combine them,
2353 // filling up gaps with zero sectors.
2354 outidx = 0;
2355 for (i = 1; i < num_reqs; i++) {
2356 int merge = 0;
2357 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2358
2359 // This handles the cases that are valid for all block drivers, namely
2360 // exactly sequential writes and overlapping writes.
2361 if (reqs[i].sector <= oldreq_last) {
2362 merge = 1;
2363 }
2364
2365 // The block driver may decide that it makes sense to combine requests
2366 // even if there is a gap of some sectors between them. In this case,
2367 // the gap is filled with zeros (therefore only applicable for yet
2368 // unused space in format like qcow2).
2369 if (!merge && bs->drv->bdrv_merge_requests) {
2370 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2371 }
2372
e2a305fb
CH
2373 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2374 merge = 0;
2375 }
2376
40b4f539
KW
2377 if (merge) {
2378 size_t size;
7267c094 2379 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
2380 qemu_iovec_init(qiov,
2381 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2382
2383 // Add the first request to the merged one. If the requests are
2384 // overlapping, drop the last sectors of the first request.
2385 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2386 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2387
2388 // We might need to add some zeros between the two requests
2389 if (reqs[i].sector > oldreq_last) {
2390 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2391 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2392 memset(buf, 0, zero_bytes);
2393 qemu_iovec_add(qiov, buf, zero_bytes);
2394 mcb->callbacks[i].free_buf = buf;
2395 }
2396
2397 // Add the second request
2398 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2399
cbf1dff2 2400 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
2401 reqs[outidx].qiov = qiov;
2402
2403 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2404 } else {
2405 outidx++;
2406 reqs[outidx].sector = reqs[i].sector;
2407 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2408 reqs[outidx].qiov = reqs[i].qiov;
2409 }
2410 }
2411
2412 return outidx + 1;
2413}
2414
2415/*
2416 * Submit multiple AIO write requests at once.
2417 *
2418 * On success, the function returns 0 and all requests in the reqs array have
2419 * been submitted. In error case this function returns -1, and any of the
2420 * requests may or may not be submitted yet. In particular, this means that the
2421 * callback will be called for some of the requests, for others it won't. The
2422 * caller must check the error field of the BlockRequest to wait for the right
2423 * callbacks (if error != 0, no callback will be called).
2424 *
2425 * The implementation may modify the contents of the reqs array, e.g. to merge
2426 * requests. However, the fields opaque and error are left unmodified as they
2427 * are used to signal failure for a single request to the caller.
2428 */
2429int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2430{
2431 BlockDriverAIOCB *acb;
2432 MultiwriteCB *mcb;
2433 int i;
2434
301db7c2
RH
2435 /* don't submit writes if we don't have a medium */
2436 if (bs->drv == NULL) {
2437 for (i = 0; i < num_reqs; i++) {
2438 reqs[i].error = -ENOMEDIUM;
2439 }
2440 return -1;
2441 }
2442
40b4f539
KW
2443 if (num_reqs == 0) {
2444 return 0;
2445 }
2446
2447 // Create MultiwriteCB structure
7267c094 2448 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
2449 mcb->num_requests = 0;
2450 mcb->num_callbacks = num_reqs;
2451
2452 for (i = 0; i < num_reqs; i++) {
2453 mcb->callbacks[i].cb = reqs[i].cb;
2454 mcb->callbacks[i].opaque = reqs[i].opaque;
2455 }
2456
2457 // Check for mergable requests
2458 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2459
6d519a5f
SH
2460 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2461
453f9a16
KW
2462 /*
2463 * Run the aio requests. As soon as one request can't be submitted
2464 * successfully, fail all requests that are not yet submitted (we must
2465 * return failure for all requests anyway)
2466 *
2467 * num_requests cannot be set to the right value immediately: If
2468 * bdrv_aio_writev fails for some request, num_requests would be too high
2469 * and therefore multiwrite_cb() would never recognize the multiwrite
2470 * request as completed. We also cannot use the loop variable i to set it
2471 * when the first request fails because the callback may already have been
2472 * called for previously submitted requests. Thus, num_requests must be
2473 * incremented for each request that is submitted.
2474 *
2475 * The problem that callbacks may be called early also means that we need
2476 * to take care that num_requests doesn't become 0 before all requests are
2477 * submitted - multiwrite_cb() would consider the multiwrite request
2478 * completed. A dummy request that is "completed" by a manual call to
2479 * multiwrite_cb() takes care of this.
2480 */
2481 mcb->num_requests = 1;
2482
6d519a5f 2483 // Run the aio requests
40b4f539 2484 for (i = 0; i < num_reqs; i++) {
453f9a16 2485 mcb->num_requests++;
40b4f539
KW
2486 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2487 reqs[i].nb_sectors, multiwrite_cb, mcb);
2488
2489 if (acb == NULL) {
2490 // We can only fail the whole thing if no request has been
2491 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2492 // complete and report the error in the callback.
453f9a16 2493 if (i == 0) {
6d519a5f 2494 trace_bdrv_aio_multiwrite_earlyfail(mcb);
40b4f539
KW
2495 goto fail;
2496 } else {
6d519a5f 2497 trace_bdrv_aio_multiwrite_latefail(mcb, i);
7eb58a6c 2498 multiwrite_cb(mcb, -EIO);
40b4f539
KW
2499 break;
2500 }
40b4f539
KW
2501 }
2502 }
2503
453f9a16
KW
2504 /* Complete the dummy request */
2505 multiwrite_cb(mcb, 0);
2506
40b4f539
KW
2507 return 0;
2508
2509fail:
453f9a16
KW
2510 for (i = 0; i < mcb->num_callbacks; i++) {
2511 reqs[i].error = -EIO;
2512 }
7267c094 2513 g_free(mcb);
40b4f539
KW
2514 return -1;
2515}
2516
83f64091 2517void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 2518{
6bbff9a0 2519 acb->pool->cancel(acb);
83f64091
FB
2520}
2521
ce1a14dc 2522
83f64091
FB
2523/**************************************************************/
2524/* async block device emulation */
2525
c16b5a2c
CH
2526typedef struct BlockDriverAIOCBSync {
2527 BlockDriverAIOCB common;
2528 QEMUBH *bh;
2529 int ret;
2530 /* vector translation state */
2531 QEMUIOVector *qiov;
2532 uint8_t *bounce;
2533 int is_write;
2534} BlockDriverAIOCBSync;
2535
2536static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2537{
b666d239
KW
2538 BlockDriverAIOCBSync *acb =
2539 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 2540 qemu_bh_delete(acb->bh);
36afc451 2541 acb->bh = NULL;
c16b5a2c
CH
2542 qemu_aio_release(acb);
2543}
2544
2545static AIOPool bdrv_em_aio_pool = {
2546 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2547 .cancel = bdrv_aio_cancel_em,
2548};
2549
ce1a14dc 2550static void bdrv_aio_bh_cb(void *opaque)
83f64091 2551{
ce1a14dc 2552 BlockDriverAIOCBSync *acb = opaque;
f141eafe 2553
f141eafe
AL
2554 if (!acb->is_write)
2555 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 2556 qemu_vfree(acb->bounce);
ce1a14dc 2557 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 2558 qemu_bh_delete(acb->bh);
36afc451 2559 acb->bh = NULL;
ce1a14dc 2560 qemu_aio_release(acb);
83f64091 2561}
beac80cd 2562
f141eafe
AL
2563static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2564 int64_t sector_num,
2565 QEMUIOVector *qiov,
2566 int nb_sectors,
2567 BlockDriverCompletionFunc *cb,
2568 void *opaque,
2569 int is_write)
2570
83f64091 2571{
ce1a14dc 2572 BlockDriverAIOCBSync *acb;
ce1a14dc 2573
c16b5a2c 2574 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
2575 acb->is_write = is_write;
2576 acb->qiov = qiov;
e268ca52 2577 acb->bounce = qemu_blockalign(bs, qiov->size);
f141eafe 2578
ce1a14dc
PB
2579 if (!acb->bh)
2580 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
2581
2582 if (is_write) {
2583 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1ed20acf 2584 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 2585 } else {
1ed20acf 2586 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
2587 }
2588
ce1a14dc 2589 qemu_bh_schedule(acb->bh);
f141eafe 2590
ce1a14dc 2591 return &acb->common;
beac80cd
FB
2592}
2593
f141eafe
AL
2594static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2595 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 2596 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 2597{
f141eafe
AL
2598 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2599}
83f64091 2600
f141eafe
AL
2601static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2602 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2603 BlockDriverCompletionFunc *cb, void *opaque)
2604{
2605 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 2606}
beac80cd 2607
68485420
KW
2608
2609typedef struct BlockDriverAIOCBCoroutine {
2610 BlockDriverAIOCB common;
2611 BlockRequest req;
2612 bool is_write;
2613 QEMUBH* bh;
2614} BlockDriverAIOCBCoroutine;
2615
2616static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2617{
2618 qemu_aio_flush();
2619}
2620
2621static AIOPool bdrv_em_co_aio_pool = {
2622 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2623 .cancel = bdrv_aio_co_cancel_em,
2624};
2625
35246a68 2626static void bdrv_co_em_bh(void *opaque)
68485420
KW
2627{
2628 BlockDriverAIOCBCoroutine *acb = opaque;
2629
2630 acb->common.cb(acb->common.opaque, acb->req.error);
2631 qemu_bh_delete(acb->bh);
2632 qemu_aio_release(acb);
2633}
2634
b2a61371
SH
2635/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2636static void coroutine_fn bdrv_co_do_rw(void *opaque)
2637{
2638 BlockDriverAIOCBCoroutine *acb = opaque;
2639 BlockDriverState *bs = acb->common.bs;
2640
2641 if (!acb->is_write) {
2642 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
2643 acb->req.nb_sectors, acb->req.qiov);
2644 } else {
2645 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
2646 acb->req.nb_sectors, acb->req.qiov);
2647 }
2648
35246a68 2649 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2a61371
SH
2650 qemu_bh_schedule(acb->bh);
2651}
2652
68485420
KW
2653static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2654 int64_t sector_num,
2655 QEMUIOVector *qiov,
2656 int nb_sectors,
2657 BlockDriverCompletionFunc *cb,
2658 void *opaque,
8c5873d6 2659 bool is_write)
68485420
KW
2660{
2661 Coroutine *co;
2662 BlockDriverAIOCBCoroutine *acb;
2663
2664 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2665 acb->req.sector = sector_num;
2666 acb->req.nb_sectors = nb_sectors;
2667 acb->req.qiov = qiov;
2668 acb->is_write = is_write;
2669
8c5873d6 2670 co = qemu_coroutine_create(bdrv_co_do_rw);
68485420
KW
2671 qemu_coroutine_enter(co, acb);
2672
2673 return &acb->common;
2674}
2675
07f07615 2676static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
b2e12bc6 2677{
07f07615
PB
2678 BlockDriverAIOCBCoroutine *acb = opaque;
2679 BlockDriverState *bs = acb->common.bs;
b2e12bc6 2680
07f07615
PB
2681 acb->req.error = bdrv_co_flush(bs);
2682 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2e12bc6 2683 qemu_bh_schedule(acb->bh);
b2e12bc6
CH
2684}
2685
07f07615 2686BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
016f5cf6
AG
2687 BlockDriverCompletionFunc *cb, void *opaque)
2688{
07f07615 2689 trace_bdrv_aio_flush(bs, opaque);
016f5cf6 2690
07f07615
PB
2691 Coroutine *co;
2692 BlockDriverAIOCBCoroutine *acb;
016f5cf6 2693
07f07615
PB
2694 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2695 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
2696 qemu_coroutine_enter(co, acb);
016f5cf6 2697
016f5cf6
AG
2698 return &acb->common;
2699}
2700
4265d620
PB
2701static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
2702{
2703 BlockDriverAIOCBCoroutine *acb = opaque;
2704 BlockDriverState *bs = acb->common.bs;
2705
2706 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
2707 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2708 qemu_bh_schedule(acb->bh);
2709}
2710
2711BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
2712 int64_t sector_num, int nb_sectors,
2713 BlockDriverCompletionFunc *cb, void *opaque)
2714{
2715 Coroutine *co;
2716 BlockDriverAIOCBCoroutine *acb;
2717
2718 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
2719
2720 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2721 acb->req.sector = sector_num;
2722 acb->req.nb_sectors = nb_sectors;
2723 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
2724 qemu_coroutine_enter(co, acb);
2725
2726 return &acb->common;
2727}
2728
ea2384d3
FB
2729void bdrv_init(void)
2730{
5efa9d5a 2731 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 2732}
ce1a14dc 2733
eb852011
MA
2734void bdrv_init_with_whitelist(void)
2735{
2736 use_bdrv_whitelist = 1;
2737 bdrv_init();
2738}
2739
c16b5a2c
CH
2740void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2741 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 2742{
ce1a14dc
PB
2743 BlockDriverAIOCB *acb;
2744
6bbff9a0
AL
2745 if (pool->free_aiocb) {
2746 acb = pool->free_aiocb;
2747 pool->free_aiocb = acb->next;
ce1a14dc 2748 } else {
7267c094 2749 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 2750 acb->pool = pool;
ce1a14dc
PB
2751 }
2752 acb->bs = bs;
2753 acb->cb = cb;
2754 acb->opaque = opaque;
2755 return acb;
2756}
2757
2758void qemu_aio_release(void *p)
2759{
6bbff9a0
AL
2760 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2761 AIOPool *pool = acb->pool;
2762 acb->next = pool->free_aiocb;
2763 pool->free_aiocb = acb;
ce1a14dc 2764}
19cb3738 2765
f9f05dc5
KW
2766/**************************************************************/
2767/* Coroutine block device emulation */
2768
2769typedef struct CoroutineIOCompletion {
2770 Coroutine *coroutine;
2771 int ret;
2772} CoroutineIOCompletion;
2773
2774static void bdrv_co_io_em_complete(void *opaque, int ret)
2775{
2776 CoroutineIOCompletion *co = opaque;
2777
2778 co->ret = ret;
2779 qemu_coroutine_enter(co->coroutine, NULL);
2780}
2781
2782static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2783 int nb_sectors, QEMUIOVector *iov,
2784 bool is_write)
2785{
2786 CoroutineIOCompletion co = {
2787 .coroutine = qemu_coroutine_self(),
2788 };
2789 BlockDriverAIOCB *acb;
2790
2791 if (is_write) {
a652d160
SH
2792 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2793 bdrv_co_io_em_complete, &co);
f9f05dc5 2794 } else {
a652d160
SH
2795 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2796 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
2797 }
2798
59370aaa 2799 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
2800 if (!acb) {
2801 return -EIO;
2802 }
2803 qemu_coroutine_yield();
2804
2805 return co.ret;
2806}
2807
2808static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2809 int64_t sector_num, int nb_sectors,
2810 QEMUIOVector *iov)
2811{
2812 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2813}
2814
2815static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2816 int64_t sector_num, int nb_sectors,
2817 QEMUIOVector *iov)
2818{
2819 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2820}
2821
07f07615 2822static void coroutine_fn bdrv_flush_co_entry(void *opaque)
e7a8a783 2823{
07f07615
PB
2824 RwCo *rwco = opaque;
2825
2826 rwco->ret = bdrv_co_flush(rwco->bs);
2827}
2828
2829int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2830{
eb489bb1
KW
2831 int ret;
2832
ca716364 2833 if (!bs->drv) {
07f07615 2834 return 0;
eb489bb1
KW
2835 }
2836
ca716364 2837 /* Write back cached data to the OS even with cache=unsafe */
eb489bb1
KW
2838 if (bs->drv->bdrv_co_flush_to_os) {
2839 ret = bs->drv->bdrv_co_flush_to_os(bs);
2840 if (ret < 0) {
2841 return ret;
2842 }
2843 }
2844
ca716364
KW
2845 /* But don't actually force it to the disk with cache=unsafe */
2846 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2847 return 0;
2848 }
2849
eb489bb1 2850 if (bs->drv->bdrv_co_flush_to_disk) {
c68b89ac 2851 return bs->drv->bdrv_co_flush_to_disk(bs);
07f07615
PB
2852 } else if (bs->drv->bdrv_aio_flush) {
2853 BlockDriverAIOCB *acb;
2854 CoroutineIOCompletion co = {
2855 .coroutine = qemu_coroutine_self(),
2856 };
2857
2858 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2859 if (acb == NULL) {
2860 return -EIO;
2861 } else {
2862 qemu_coroutine_yield();
2863 return co.ret;
2864 }
07f07615
PB
2865 } else {
2866 /*
2867 * Some block drivers always operate in either writethrough or unsafe
2868 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
2869 * know how the server works (because the behaviour is hardcoded or
2870 * depends on server-side configuration), so we can't ensure that
2871 * everything is safe on disk. Returning an error doesn't work because
2872 * that would break guests even if the server operates in writethrough
2873 * mode.
2874 *
2875 * Let's hope the user knows what he's doing.
2876 */
2877 return 0;
2878 }
2879}
2880
0f15423c
AL
2881void bdrv_invalidate_cache(BlockDriverState *bs)
2882{
2883 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
2884 bs->drv->bdrv_invalidate_cache(bs);
2885 }
2886}
2887
2888void bdrv_invalidate_cache_all(void)
2889{
2890 BlockDriverState *bs;
2891
2892 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2893 bdrv_invalidate_cache(bs);
2894 }
2895}
2896
07f07615
PB
2897int bdrv_flush(BlockDriverState *bs)
2898{
2899 Coroutine *co;
2900 RwCo rwco = {
2901 .bs = bs,
2902 .ret = NOT_DONE,
e7a8a783 2903 };
e7a8a783 2904
07f07615
PB
2905 if (qemu_in_coroutine()) {
2906 /* Fast-path if already in coroutine context */
2907 bdrv_flush_co_entry(&rwco);
2908 } else {
2909 co = qemu_coroutine_create(bdrv_flush_co_entry);
2910 qemu_coroutine_enter(co, &rwco);
2911 while (rwco.ret == NOT_DONE) {
2912 qemu_aio_wait();
2913 }
e7a8a783 2914 }
07f07615
PB
2915
2916 return rwco.ret;
e7a8a783
KW
2917}
2918
4265d620
PB
2919static void coroutine_fn bdrv_discard_co_entry(void *opaque)
2920{
2921 RwCo *rwco = opaque;
2922
2923 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
2924}
2925
2926int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
2927 int nb_sectors)
2928{
2929 if (!bs->drv) {
2930 return -ENOMEDIUM;
2931 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
2932 return -EIO;
2933 } else if (bs->read_only) {
2934 return -EROFS;
2935 } else if (bs->drv->bdrv_co_discard) {
2936 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
2937 } else if (bs->drv->bdrv_aio_discard) {
2938 BlockDriverAIOCB *acb;
2939 CoroutineIOCompletion co = {
2940 .coroutine = qemu_coroutine_self(),
2941 };
2942
2943 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
2944 bdrv_co_io_em_complete, &co);
2945 if (acb == NULL) {
2946 return -EIO;
2947 } else {
2948 qemu_coroutine_yield();
2949 return co.ret;
2950 }
4265d620
PB
2951 } else {
2952 return 0;
2953 }
2954}
2955
2956int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
2957{
2958 Coroutine *co;
2959 RwCo rwco = {
2960 .bs = bs,
2961 .sector_num = sector_num,
2962 .nb_sectors = nb_sectors,
2963 .ret = NOT_DONE,
2964 };
2965
2966 if (qemu_in_coroutine()) {
2967 /* Fast-path if already in coroutine context */
2968 bdrv_discard_co_entry(&rwco);
2969 } else {
2970 co = qemu_coroutine_create(bdrv_discard_co_entry);
2971 qemu_coroutine_enter(co, &rwco);
2972 while (rwco.ret == NOT_DONE) {
2973 qemu_aio_wait();
2974 }
2975 }
2976
2977 return rwco.ret;
2978}
2979
19cb3738
FB
2980/**************************************************************/
2981/* removable device support */
2982
2983/**
2984 * Return TRUE if the media is present
2985 */
2986int bdrv_is_inserted(BlockDriverState *bs)
2987{
2988 BlockDriver *drv = bs->drv;
a1aff5bf 2989
19cb3738
FB
2990 if (!drv)
2991 return 0;
2992 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
2993 return 1;
2994 return drv->bdrv_is_inserted(bs);
19cb3738
FB
2995}
2996
2997/**
8e49ca46
MA
2998 * Return whether the media changed since the last call to this
2999 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3000 */
3001int bdrv_media_changed(BlockDriverState *bs)
3002{
3003 BlockDriver *drv = bs->drv;
19cb3738 3004
8e49ca46
MA
3005 if (drv && drv->bdrv_media_changed) {
3006 return drv->bdrv_media_changed(bs);
3007 }
3008 return -ENOTSUP;
19cb3738
FB
3009}
3010
3011/**
3012 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3013 */
fdec4404 3014void bdrv_eject(BlockDriverState *bs, int eject_flag)
19cb3738
FB
3015{
3016 BlockDriver *drv = bs->drv;
19cb3738 3017
822e1cd1
MA
3018 if (drv && drv->bdrv_eject) {
3019 drv->bdrv_eject(bs, eject_flag);
19cb3738
FB
3020 }
3021}
3022
19cb3738
FB
3023/**
3024 * Lock or unlock the media (if it is locked, the user won't be able
3025 * to eject it manually).
3026 */
025e849a 3027void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3028{
3029 BlockDriver *drv = bs->drv;
3030
025e849a 3031 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3032
025e849a
MA
3033 if (drv && drv->bdrv_lock_medium) {
3034 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3035 }
3036}
985a03b0
TS
3037
3038/* needed for generic scsi interface */
3039
3040int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3041{
3042 BlockDriver *drv = bs->drv;
3043
3044 if (drv && drv->bdrv_ioctl)
3045 return drv->bdrv_ioctl(bs, req, buf);
3046 return -ENOTSUP;
3047}
7d780669 3048
221f715d
AL
3049BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3050 unsigned long int req, void *buf,
3051 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3052{
221f715d 3053 BlockDriver *drv = bs->drv;
7d780669 3054
221f715d
AL
3055 if (drv && drv->bdrv_aio_ioctl)
3056 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3057 return NULL;
7d780669 3058}
e268ca52 3059
7b6f9300
MA
3060void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3061{
3062 bs->buffer_alignment = align;
3063}
7cd1e32a 3064
e268ca52
AL
3065void *qemu_blockalign(BlockDriverState *bs, size_t size)
3066{
3067 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3068}
7cd1e32a 3069
3070void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3071{
3072 int64_t bitmap_size;
a55eb92c 3073
aaa0eb75 3074 bs->dirty_count = 0;
a55eb92c 3075 if (enable) {
c6d22830
JK
3076 if (!bs->dirty_bitmap) {
3077 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3078 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3079 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
a55eb92c 3080
7267c094 3081 bs->dirty_bitmap = g_malloc0(bitmap_size);
a55eb92c 3082 }
7cd1e32a 3083 } else {
c6d22830 3084 if (bs->dirty_bitmap) {
7267c094 3085 g_free(bs->dirty_bitmap);
c6d22830 3086 bs->dirty_bitmap = NULL;
a55eb92c 3087 }
7cd1e32a 3088 }
3089}
3090
3091int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3092{
6ea44308 3093 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 3094
c6d22830
JK
3095 if (bs->dirty_bitmap &&
3096 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
3097 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3098 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a 3099 } else {
3100 return 0;
3101 }
3102}
3103
a55eb92c
JK
3104void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3105 int nr_sectors)
7cd1e32a 3106{
3107 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3108}
aaa0eb75
LS
3109
3110int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3111{
3112 return bs->dirty_count;
3113}
f88e1a42 3114
db593f25
MT
3115void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3116{
3117 assert(bs->in_use != in_use);
3118 bs->in_use = in_use;
3119}
3120
3121int bdrv_in_use(BlockDriverState *bs)
3122{
3123 return bs->in_use;
3124}
3125
28a7282a
LC
3126void bdrv_iostatus_enable(BlockDriverState *bs)
3127{
d6bf279e 3128 bs->iostatus_enabled = true;
58e21ef5 3129 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3130}
3131
3132/* The I/O status is only enabled if the drive explicitly
3133 * enables it _and_ the VM is configured to stop on errors */
3134bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3135{
d6bf279e 3136 return (bs->iostatus_enabled &&
28a7282a
LC
3137 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3138 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3139 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3140}
3141
3142void bdrv_iostatus_disable(BlockDriverState *bs)
3143{
d6bf279e 3144 bs->iostatus_enabled = false;
28a7282a
LC
3145}
3146
3147void bdrv_iostatus_reset(BlockDriverState *bs)
3148{
3149 if (bdrv_iostatus_is_enabled(bs)) {
58e21ef5 3150 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3151 }
3152}
3153
3154/* XXX: Today this is set by device models because it makes the implementation
3155 quite simple. However, the block layer knows about the error, so it's
3156 possible to implement this without device models being involved */
3157void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3158{
58e21ef5
LC
3159 if (bdrv_iostatus_is_enabled(bs) &&
3160 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
28a7282a 3161 assert(error >= 0);
58e21ef5
LC
3162 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3163 BLOCK_DEVICE_IO_STATUS_FAILED;
28a7282a
LC
3164 }
3165}
3166
a597e79c
CH
3167void
3168bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3169 enum BlockAcctType type)
3170{
3171 assert(type < BDRV_MAX_IOTYPE);
3172
3173 cookie->bytes = bytes;
c488c7f6 3174 cookie->start_time_ns = get_clock();
a597e79c
CH
3175 cookie->type = type;
3176}
3177
3178void
3179bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3180{
3181 assert(cookie->type < BDRV_MAX_IOTYPE);
3182
3183 bs->nr_bytes[cookie->type] += cookie->bytes;
3184 bs->nr_ops[cookie->type]++;
c488c7f6 3185 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
3186}
3187
f88e1a42
JS
3188int bdrv_img_create(const char *filename, const char *fmt,
3189 const char *base_filename, const char *base_fmt,
3190 char *options, uint64_t img_size, int flags)
3191{
3192 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 3193 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
3194 BlockDriverState *bs = NULL;
3195 BlockDriver *drv, *proto_drv;
96df67d1 3196 BlockDriver *backing_drv = NULL;
f88e1a42
JS
3197 int ret = 0;
3198
3199 /* Find driver and parse its options */
3200 drv = bdrv_find_format(fmt);
3201 if (!drv) {
3202 error_report("Unknown file format '%s'", fmt);
4f70f249 3203 ret = -EINVAL;
f88e1a42
JS
3204 goto out;
3205 }
3206
3207 proto_drv = bdrv_find_protocol(filename);
3208 if (!proto_drv) {
3209 error_report("Unknown protocol '%s'", filename);
4f70f249 3210 ret = -EINVAL;
f88e1a42
JS
3211 goto out;
3212 }
3213
3214 create_options = append_option_parameters(create_options,
3215 drv->create_options);
3216 create_options = append_option_parameters(create_options,
3217 proto_drv->create_options);
3218
3219 /* Create parameter list with default values */
3220 param = parse_option_parameters("", create_options, param);
3221
3222 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3223
3224 /* Parse -o options */
3225 if (options) {
3226 param = parse_option_parameters(options, create_options, param);
3227 if (param == NULL) {
3228 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 3229 ret = -EINVAL;
f88e1a42
JS
3230 goto out;
3231 }
3232 }
3233
3234 if (base_filename) {
3235 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3236 base_filename)) {
3237 error_report("Backing file not supported for file format '%s'",
3238 fmt);
4f70f249 3239 ret = -EINVAL;
f88e1a42
JS
3240 goto out;
3241 }
3242 }
3243
3244 if (base_fmt) {
3245 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3246 error_report("Backing file format not supported for file "
3247 "format '%s'", fmt);
4f70f249 3248 ret = -EINVAL;
f88e1a42
JS
3249 goto out;
3250 }
3251 }
3252
792da93a
JS
3253 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3254 if (backing_file && backing_file->value.s) {
3255 if (!strcmp(filename, backing_file->value.s)) {
3256 error_report("Error: Trying to create an image with the "
3257 "same filename as the backing file");
4f70f249 3258 ret = -EINVAL;
792da93a
JS
3259 goto out;
3260 }
3261 }
3262
f88e1a42
JS
3263 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3264 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
3265 backing_drv = bdrv_find_format(backing_fmt->value.s);
3266 if (!backing_drv) {
f88e1a42
JS
3267 error_report("Unknown backing file format '%s'",
3268 backing_fmt->value.s);
4f70f249 3269 ret = -EINVAL;
f88e1a42
JS
3270 goto out;
3271 }
3272 }
3273
3274 // The size for the image must always be specified, with one exception:
3275 // If we are using a backing file, we can obtain the size from there
d220894e
KW
3276 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3277 if (size && size->value.n == -1) {
f88e1a42
JS
3278 if (backing_file && backing_file->value.s) {
3279 uint64_t size;
f88e1a42
JS
3280 char buf[32];
3281
f88e1a42
JS
3282 bs = bdrv_new("");
3283
96df67d1 3284 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
f88e1a42 3285 if (ret < 0) {
96df67d1 3286 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
3287 goto out;
3288 }
3289 bdrv_get_geometry(bs, &size);
3290 size *= 512;
3291
3292 snprintf(buf, sizeof(buf), "%" PRId64, size);
3293 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3294 } else {
3295 error_report("Image creation needs a size parameter");
4f70f249 3296 ret = -EINVAL;
f88e1a42
JS
3297 goto out;
3298 }
3299 }
3300
3301 printf("Formatting '%s', fmt=%s ", filename, fmt);
3302 print_option_parameters(param);
3303 puts("");
3304
3305 ret = bdrv_create(drv, filename, param);
3306
3307 if (ret < 0) {
3308 if (ret == -ENOTSUP) {
3309 error_report("Formatting or formatting option not supported for "
3310 "file format '%s'", fmt);
3311 } else if (ret == -EFBIG) {
3312 error_report("The image size is too large for file format '%s'",
3313 fmt);
3314 } else {
3315 error_report("%s: error while creating %s: %s", filename, fmt,
3316 strerror(-ret));
3317 }
3318 }
3319
3320out:
3321 free_option_parameters(create_options);
3322 free_option_parameters(param);
3323
3324 if (bs) {
3325 bdrv_delete(bs);
3326 }
4f70f249
JS
3327
3328 return ret;
f88e1a42 3329}