]> git.proxmox.com Git - qemu.git/blame - block.c
sun4u: give ISA bus to ISA methods
[qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
f795e743 30#include "qjson.h"
68485420 31#include "qemu-coroutine.h"
b2023818 32#include "qmp-commands.h"
0563e191 33#include "qemu-timer.h"
fc01f7e7 34
71e72a19 35#ifdef CONFIG_BSD
7674e7bf
FB
36#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
72cf2d4f 39#include <sys/queue.h>
c5e97233 40#ifndef __DragonFly__
7674e7bf
FB
41#include <sys/disk.h>
42#endif
c5e97233 43#endif
7674e7bf 44
49dc768d
AL
45#ifdef _WIN32
46#include <windows.h>
47#endif
48
1c9805a3
SH
49#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
7d4b4ba5 51static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
52static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
53 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 54 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
55static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
56 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 57 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
58static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
59 int64_t sector_num, int nb_sectors,
60 QEMUIOVector *iov);
61static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
62 int64_t sector_num, int nb_sectors,
63 QEMUIOVector *iov);
c5fbe571
SH
64static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
1c9805a3
SH
66static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
b2a61371
SH
68static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
69 int64_t sector_num,
70 QEMUIOVector *qiov,
71 int nb_sectors,
72 BlockDriverCompletionFunc *cb,
73 void *opaque,
8c5873d6 74 bool is_write);
b2a61371 75static void coroutine_fn bdrv_co_do_rw(void *opaque);
ec530c81 76
98f90dba
ZYW
77static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
78 bool is_write, double elapsed_time, uint64_t *wait);
79static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
80 double elapsed_time, uint64_t *wait);
81static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
82 bool is_write, int64_t *wait);
83
1b7bdbc1
SH
84static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
85 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 86
8a22f02a
SH
87static QLIST_HEAD(, BlockDriver) bdrv_drivers =
88 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 89
f9092b10
MA
90/* The device to use for VM snapshots */
91static BlockDriverState *bs_snapshots;
92
eb852011
MA
93/* If non-zero, use only whitelisted block drivers */
94static int use_bdrv_whitelist;
95
9e0b22f4
SH
96#ifdef _WIN32
97static int is_windows_drive_prefix(const char *filename)
98{
99 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
100 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
101 filename[1] == ':');
102}
103
104int is_windows_drive(const char *filename)
105{
106 if (is_windows_drive_prefix(filename) &&
107 filename[2] == '\0')
108 return 1;
109 if (strstart(filename, "\\\\.\\", NULL) ||
110 strstart(filename, "//./", NULL))
111 return 1;
112 return 0;
113}
114#endif
115
0563e191 116/* throttling disk I/O limits */
98f90dba
ZYW
117void bdrv_io_limits_disable(BlockDriverState *bs)
118{
119 bs->io_limits_enabled = false;
120
121 while (qemu_co_queue_next(&bs->throttled_reqs));
122
123 if (bs->block_timer) {
124 qemu_del_timer(bs->block_timer);
125 qemu_free_timer(bs->block_timer);
126 bs->block_timer = NULL;
127 }
128
129 bs->slice_start = 0;
130 bs->slice_end = 0;
131 bs->slice_time = 0;
132 memset(&bs->io_base, 0, sizeof(bs->io_base));
133}
134
0563e191
ZYW
135static void bdrv_block_timer(void *opaque)
136{
137 BlockDriverState *bs = opaque;
138
139 qemu_co_queue_next(&bs->throttled_reqs);
140}
141
142void bdrv_io_limits_enable(BlockDriverState *bs)
143{
144 qemu_co_queue_init(&bs->throttled_reqs);
145 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
146 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
147 bs->slice_start = qemu_get_clock_ns(vm_clock);
148 bs->slice_end = bs->slice_start + bs->slice_time;
149 memset(&bs->io_base, 0, sizeof(bs->io_base));
150 bs->io_limits_enabled = true;
151}
152
153bool bdrv_io_limits_enabled(BlockDriverState *bs)
154{
155 BlockIOLimit *io_limits = &bs->io_limits;
156 return io_limits->bps[BLOCK_IO_LIMIT_READ]
157 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
158 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
159 || io_limits->iops[BLOCK_IO_LIMIT_READ]
160 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
161 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
162}
163
98f90dba
ZYW
164static void bdrv_io_limits_intercept(BlockDriverState *bs,
165 bool is_write, int nb_sectors)
166{
167 int64_t wait_time = -1;
168
169 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
170 qemu_co_queue_wait(&bs->throttled_reqs);
171 }
172
173 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
174 * throttled requests will not be dequeued until the current request is
175 * allowed to be serviced. So if the current request still exceeds the
176 * limits, it will be inserted to the head. All requests followed it will
177 * be still in throttled_reqs queue.
178 */
179
180 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
181 qemu_mod_timer(bs->block_timer,
182 wait_time + qemu_get_clock_ns(vm_clock));
183 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
184 }
185
186 qemu_co_queue_next(&bs->throttled_reqs);
187}
188
9e0b22f4
SH
189/* check if the path starts with "<protocol>:" */
190static int path_has_protocol(const char *path)
191{
192#ifdef _WIN32
193 if (is_windows_drive(path) ||
194 is_windows_drive_prefix(path)) {
195 return 0;
196 }
197#endif
198
199 return strchr(path, ':') != NULL;
200}
201
83f64091 202int path_is_absolute(const char *path)
3b0d4f61 203{
83f64091 204 const char *p;
21664424
FB
205#ifdef _WIN32
206 /* specific case for names like: "\\.\d:" */
207 if (*path == '/' || *path == '\\')
208 return 1;
209#endif
83f64091
FB
210 p = strchr(path, ':');
211 if (p)
212 p++;
213 else
214 p = path;
3b9f94e1
FB
215#ifdef _WIN32
216 return (*p == '/' || *p == '\\');
217#else
218 return (*p == '/');
219#endif
3b0d4f61
FB
220}
221
83f64091
FB
222/* if filename is absolute, just copy it to dest. Otherwise, build a
223 path to it by considering it is relative to base_path. URL are
224 supported. */
225void path_combine(char *dest, int dest_size,
226 const char *base_path,
227 const char *filename)
3b0d4f61 228{
83f64091
FB
229 const char *p, *p1;
230 int len;
231
232 if (dest_size <= 0)
233 return;
234 if (path_is_absolute(filename)) {
235 pstrcpy(dest, dest_size, filename);
236 } else {
237 p = strchr(base_path, ':');
238 if (p)
239 p++;
240 else
241 p = base_path;
3b9f94e1
FB
242 p1 = strrchr(base_path, '/');
243#ifdef _WIN32
244 {
245 const char *p2;
246 p2 = strrchr(base_path, '\\');
247 if (!p1 || p2 > p1)
248 p1 = p2;
249 }
250#endif
83f64091
FB
251 if (p1)
252 p1++;
253 else
254 p1 = base_path;
255 if (p1 > p)
256 p = p1;
257 len = p - base_path;
258 if (len > dest_size - 1)
259 len = dest_size - 1;
260 memcpy(dest, base_path, len);
261 dest[len] = '\0';
262 pstrcat(dest, dest_size, filename);
3b0d4f61 263 }
3b0d4f61
FB
264}
265
5efa9d5a 266void bdrv_register(BlockDriver *bdrv)
ea2384d3 267{
8c5873d6
SH
268 /* Block drivers without coroutine functions need emulation */
269 if (!bdrv->bdrv_co_readv) {
f9f05dc5
KW
270 bdrv->bdrv_co_readv = bdrv_co_readv_em;
271 bdrv->bdrv_co_writev = bdrv_co_writev_em;
272
f8c35c1d
SH
273 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
274 * the block driver lacks aio we need to emulate that too.
275 */
f9f05dc5
KW
276 if (!bdrv->bdrv_aio_readv) {
277 /* add AIO emulation layer */
278 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
279 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
f9f05dc5 280 }
83f64091 281 }
b2e12bc6 282
8a22f02a 283 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 284}
b338082b
FB
285
286/* create a new block device (by default it is empty) */
287BlockDriverState *bdrv_new(const char *device_name)
288{
1b7bdbc1 289 BlockDriverState *bs;
b338082b 290
7267c094 291 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 292 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 293 if (device_name[0] != '\0') {
1b7bdbc1 294 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 295 }
28a7282a 296 bdrv_iostatus_disable(bs);
b338082b
FB
297 return bs;
298}
299
ea2384d3
FB
300BlockDriver *bdrv_find_format(const char *format_name)
301{
302 BlockDriver *drv1;
8a22f02a
SH
303 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
304 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 305 return drv1;
8a22f02a 306 }
ea2384d3
FB
307 }
308 return NULL;
309}
310
eb852011
MA
311static int bdrv_is_whitelisted(BlockDriver *drv)
312{
313 static const char *whitelist[] = {
314 CONFIG_BDRV_WHITELIST
315 };
316 const char **p;
317
318 if (!whitelist[0])
319 return 1; /* no whitelist, anything goes */
320
321 for (p = whitelist; *p; p++) {
322 if (!strcmp(drv->format_name, *p)) {
323 return 1;
324 }
325 }
326 return 0;
327}
328
329BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
330{
331 BlockDriver *drv = bdrv_find_format(format_name);
332 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
333}
334
0e7e1989
KW
335int bdrv_create(BlockDriver *drv, const char* filename,
336 QEMUOptionParameter *options)
ea2384d3
FB
337{
338 if (!drv->bdrv_create)
339 return -ENOTSUP;
0e7e1989
KW
340
341 return drv->bdrv_create(filename, options);
ea2384d3
FB
342}
343
84a12e66
CH
344int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
345{
346 BlockDriver *drv;
347
b50cbabc 348 drv = bdrv_find_protocol(filename);
84a12e66 349 if (drv == NULL) {
16905d71 350 return -ENOENT;
84a12e66
CH
351 }
352
353 return bdrv_create(drv, filename, options);
354}
355
d5249393 356#ifdef _WIN32
95389c86 357void get_tmp_filename(char *filename, int size)
d5249393 358{
3b9f94e1 359 char temp_dir[MAX_PATH];
3b46e624 360
3b9f94e1
FB
361 GetTempPath(MAX_PATH, temp_dir);
362 GetTempFileName(temp_dir, "qem", 0, filename);
d5249393
FB
363}
364#else
95389c86 365void get_tmp_filename(char *filename, int size)
fc01f7e7 366{
67b915a5 367 int fd;
7ccfb2eb 368 const char *tmpdir;
d5249393 369 /* XXX: race condition possible */
0badc1ee
AJ
370 tmpdir = getenv("TMPDIR");
371 if (!tmpdir)
372 tmpdir = "/tmp";
373 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
ea2384d3
FB
374 fd = mkstemp(filename);
375 close(fd);
376}
d5249393 377#endif
fc01f7e7 378
84a12e66
CH
379/*
380 * Detect host devices. By convention, /dev/cdrom[N] is always
381 * recognized as a host CDROM.
382 */
383static BlockDriver *find_hdev_driver(const char *filename)
384{
385 int score_max = 0, score;
386 BlockDriver *drv = NULL, *d;
387
388 QLIST_FOREACH(d, &bdrv_drivers, list) {
389 if (d->bdrv_probe_device) {
390 score = d->bdrv_probe_device(filename);
391 if (score > score_max) {
392 score_max = score;
393 drv = d;
394 }
395 }
396 }
397
398 return drv;
399}
400
b50cbabc 401BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
402{
403 BlockDriver *drv1;
404 char protocol[128];
1cec71e3 405 int len;
83f64091 406 const char *p;
19cb3738 407
66f82cee
KW
408 /* TODO Drivers without bdrv_file_open must be specified explicitly */
409
39508e7a
CH
410 /*
411 * XXX(hch): we really should not let host device detection
412 * override an explicit protocol specification, but moving this
413 * later breaks access to device names with colons in them.
414 * Thanks to the brain-dead persistent naming schemes on udev-
415 * based Linux systems those actually are quite common.
416 */
417 drv1 = find_hdev_driver(filename);
418 if (drv1) {
419 return drv1;
420 }
421
9e0b22f4 422 if (!path_has_protocol(filename)) {
39508e7a 423 return bdrv_find_format("file");
84a12e66 424 }
9e0b22f4
SH
425 p = strchr(filename, ':');
426 assert(p != NULL);
1cec71e3
AL
427 len = p - filename;
428 if (len > sizeof(protocol) - 1)
429 len = sizeof(protocol) - 1;
430 memcpy(protocol, filename, len);
431 protocol[len] = '\0';
8a22f02a 432 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 433 if (drv1->protocol_name &&
8a22f02a 434 !strcmp(drv1->protocol_name, protocol)) {
83f64091 435 return drv1;
8a22f02a 436 }
83f64091
FB
437 }
438 return NULL;
439}
440
c98ac35d 441static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
442{
443 int ret, score, score_max;
444 BlockDriver *drv1, *drv;
445 uint8_t buf[2048];
446 BlockDriverState *bs;
447
f5edb014 448 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
449 if (ret < 0) {
450 *pdrv = NULL;
451 return ret;
452 }
f8ea0b00 453
08a00559
KW
454 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
455 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 456 bdrv_delete(bs);
c98ac35d
SW
457 drv = bdrv_find_format("raw");
458 if (!drv) {
459 ret = -ENOENT;
460 }
461 *pdrv = drv;
462 return ret;
1a396859 463 }
f8ea0b00 464
83f64091
FB
465 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
466 bdrv_delete(bs);
467 if (ret < 0) {
c98ac35d
SW
468 *pdrv = NULL;
469 return ret;
83f64091
FB
470 }
471
ea2384d3 472 score_max = 0;
84a12e66 473 drv = NULL;
8a22f02a 474 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
475 if (drv1->bdrv_probe) {
476 score = drv1->bdrv_probe(buf, ret, filename);
477 if (score > score_max) {
478 score_max = score;
479 drv = drv1;
480 }
0849bf08 481 }
fc01f7e7 482 }
c98ac35d
SW
483 if (!drv) {
484 ret = -ENOENT;
485 }
486 *pdrv = drv;
487 return ret;
ea2384d3
FB
488}
489
51762288
SH
490/**
491 * Set the current 'total_sectors' value
492 */
493static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
494{
495 BlockDriver *drv = bs->drv;
496
396759ad
NB
497 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
498 if (bs->sg)
499 return 0;
500
51762288
SH
501 /* query actual device if possible, otherwise just trust the hint */
502 if (drv->bdrv_getlength) {
503 int64_t length = drv->bdrv_getlength(bs);
504 if (length < 0) {
505 return length;
506 }
507 hint = length >> BDRV_SECTOR_BITS;
508 }
509
510 bs->total_sectors = hint;
511 return 0;
512}
513
c3993cdc
SH
514/**
515 * Set open flags for a given cache mode
516 *
517 * Return 0 on success, -1 if the cache mode was invalid.
518 */
519int bdrv_parse_cache_flags(const char *mode, int *flags)
520{
521 *flags &= ~BDRV_O_CACHE_MASK;
522
523 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
524 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
525 } else if (!strcmp(mode, "directsync")) {
526 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
527 } else if (!strcmp(mode, "writeback")) {
528 *flags |= BDRV_O_CACHE_WB;
529 } else if (!strcmp(mode, "unsafe")) {
530 *flags |= BDRV_O_CACHE_WB;
531 *flags |= BDRV_O_NO_FLUSH;
532 } else if (!strcmp(mode, "writethrough")) {
533 /* this is the default */
534 } else {
535 return -1;
536 }
537
538 return 0;
539}
540
53fec9d3
SH
541/**
542 * The copy-on-read flag is actually a reference count so multiple users may
543 * use the feature without worrying about clobbering its previous state.
544 * Copy-on-read stays enabled until all users have called to disable it.
545 */
546void bdrv_enable_copy_on_read(BlockDriverState *bs)
547{
548 bs->copy_on_read++;
549}
550
551void bdrv_disable_copy_on_read(BlockDriverState *bs)
552{
553 assert(bs->copy_on_read > 0);
554 bs->copy_on_read--;
555}
556
57915332
KW
557/*
558 * Common part for opening disk images and files
559 */
560static int bdrv_open_common(BlockDriverState *bs, const char *filename,
561 int flags, BlockDriver *drv)
562{
563 int ret, open_flags;
564
565 assert(drv != NULL);
566
28dcee10
SH
567 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
568
66f82cee 569 bs->file = NULL;
51762288 570 bs->total_sectors = 0;
57915332
KW
571 bs->encrypted = 0;
572 bs->valid_key = 0;
03f541bd 573 bs->sg = 0;
57915332 574 bs->open_flags = flags;
03f541bd 575 bs->growable = 0;
57915332
KW
576 bs->buffer_alignment = 512;
577
53fec9d3
SH
578 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
579 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
580 bdrv_enable_copy_on_read(bs);
581 }
582
57915332 583 pstrcpy(bs->filename, sizeof(bs->filename), filename);
03f541bd 584 bs->backing_file[0] = '\0';
57915332
KW
585
586 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
587 return -ENOTSUP;
588 }
589
590 bs->drv = drv;
7267c094 591 bs->opaque = g_malloc0(drv->instance_size);
57915332 592
03f541bd 593 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
57915332
KW
594
595 /*
596 * Clear flags that are internal to the block layer before opening the
597 * image.
598 */
599 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
600
601 /*
ebabb67a 602 * Snapshots should be writable.
57915332
KW
603 */
604 if (bs->is_temporary) {
605 open_flags |= BDRV_O_RDWR;
606 }
607
e7c63796
SH
608 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
609
66f82cee
KW
610 /* Open the image, either directly or using a protocol */
611 if (drv->bdrv_file_open) {
612 ret = drv->bdrv_file_open(bs, filename, open_flags);
613 } else {
614 ret = bdrv_file_open(&bs->file, filename, open_flags);
615 if (ret >= 0) {
616 ret = drv->bdrv_open(bs, open_flags);
617 }
618 }
619
57915332
KW
620 if (ret < 0) {
621 goto free_and_fail;
622 }
623
51762288
SH
624 ret = refresh_total_sectors(bs, bs->total_sectors);
625 if (ret < 0) {
626 goto free_and_fail;
57915332 627 }
51762288 628
57915332
KW
629#ifndef _WIN32
630 if (bs->is_temporary) {
631 unlink(filename);
632 }
633#endif
634 return 0;
635
636free_and_fail:
66f82cee
KW
637 if (bs->file) {
638 bdrv_delete(bs->file);
639 bs->file = NULL;
640 }
7267c094 641 g_free(bs->opaque);
57915332
KW
642 bs->opaque = NULL;
643 bs->drv = NULL;
644 return ret;
645}
646
b6ce07aa
KW
647/*
648 * Opens a file using a protocol (file, host_device, nbd, ...)
649 */
83f64091 650int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 651{
83f64091 652 BlockDriverState *bs;
6db95603 653 BlockDriver *drv;
83f64091
FB
654 int ret;
655
b50cbabc 656 drv = bdrv_find_protocol(filename);
6db95603
CH
657 if (!drv) {
658 return -ENOENT;
659 }
660
83f64091 661 bs = bdrv_new("");
b6ce07aa 662 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
663 if (ret < 0) {
664 bdrv_delete(bs);
665 return ret;
3b0d4f61 666 }
71d0770c 667 bs->growable = 1;
83f64091
FB
668 *pbs = bs;
669 return 0;
670}
671
b6ce07aa
KW
672/*
673 * Opens a disk image (raw, qcow2, vmdk, ...)
674 */
d6e9098e
KW
675int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
676 BlockDriver *drv)
ea2384d3 677{
b6ce07aa 678 int ret;
2b572816 679 char tmp_filename[PATH_MAX];
712e7874 680
83f64091 681 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
682 BlockDriverState *bs1;
683 int64_t total_size;
7c96d46e 684 int is_protocol = 0;
91a073a9
KW
685 BlockDriver *bdrv_qcow2;
686 QEMUOptionParameter *options;
b6ce07aa 687 char backing_filename[PATH_MAX];
3b46e624 688
ea2384d3
FB
689 /* if snapshot, we create a temporary backing file and open it
690 instead of opening 'filename' directly */
33e3963e 691
ea2384d3
FB
692 /* if there is a backing file, use it */
693 bs1 = bdrv_new("");
d6e9098e 694 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 695 if (ret < 0) {
ea2384d3 696 bdrv_delete(bs1);
51d7c00c 697 return ret;
ea2384d3 698 }
3e82990b 699 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
700
701 if (bs1->drv && bs1->drv->protocol_name)
702 is_protocol = 1;
703
ea2384d3 704 bdrv_delete(bs1);
3b46e624 705
ea2384d3 706 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
7c96d46e
AL
707
708 /* Real path is meaningless for protocols */
709 if (is_protocol)
710 snprintf(backing_filename, sizeof(backing_filename),
711 "%s", filename);
114cdfa9
KS
712 else if (!realpath(filename, backing_filename))
713 return -errno;
7c96d46e 714
91a073a9
KW
715 bdrv_qcow2 = bdrv_find_format("qcow2");
716 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
717
3e82990b 718 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
719 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
720 if (drv) {
721 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
722 drv->format_name);
723 }
724
725 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 726 free_option_parameters(options);
51d7c00c
AL
727 if (ret < 0) {
728 return ret;
ea2384d3 729 }
91a073a9 730
ea2384d3 731 filename = tmp_filename;
91a073a9 732 drv = bdrv_qcow2;
ea2384d3
FB
733 bs->is_temporary = 1;
734 }
712e7874 735
b6ce07aa 736 /* Find the right image format driver */
6db95603 737 if (!drv) {
c98ac35d 738 ret = find_image_format(filename, &drv);
51d7c00c 739 }
6987307c 740
51d7c00c 741 if (!drv) {
51d7c00c 742 goto unlink_and_fail;
ea2384d3 743 }
b6ce07aa
KW
744
745 /* Open the image */
746 ret = bdrv_open_common(bs, filename, flags, drv);
747 if (ret < 0) {
6987307c
CH
748 goto unlink_and_fail;
749 }
750
b6ce07aa
KW
751 /* If there is a backing file, use it */
752 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
753 char backing_filename[PATH_MAX];
754 int back_flags;
755 BlockDriver *back_drv = NULL;
756
757 bs->backing_hd = bdrv_new("");
df2dbb4a
SH
758
759 if (path_has_protocol(bs->backing_file)) {
760 pstrcpy(backing_filename, sizeof(backing_filename),
761 bs->backing_file);
762 } else {
763 path_combine(backing_filename, sizeof(backing_filename),
764 filename, bs->backing_file);
765 }
766
767 if (bs->backing_format[0] != '\0') {
b6ce07aa 768 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 769 }
b6ce07aa
KW
770
771 /* backing files always opened read-only */
772 back_flags =
773 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
774
775 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
776 if (ret < 0) {
777 bdrv_close(bs);
778 return ret;
779 }
780 if (bs->is_temporary) {
781 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
782 } else {
783 /* base image inherits from "parent" */
784 bs->backing_hd->keep_read_only = bs->keep_read_only;
785 }
786 }
787
788 if (!bdrv_key_required(bs)) {
7d4b4ba5 789 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
790 }
791
98f90dba
ZYW
792 /* throttling disk I/O limits */
793 if (bs->io_limits_enabled) {
794 bdrv_io_limits_enable(bs);
795 }
796
b6ce07aa
KW
797 return 0;
798
799unlink_and_fail:
800 if (bs->is_temporary) {
801 unlink(filename);
802 }
803 return ret;
804}
805
fc01f7e7
FB
806void bdrv_close(BlockDriverState *bs)
807{
19cb3738 808 if (bs->drv) {
f9092b10
MA
809 if (bs == bs_snapshots) {
810 bs_snapshots = NULL;
811 }
557df6ac 812 if (bs->backing_hd) {
ea2384d3 813 bdrv_delete(bs->backing_hd);
557df6ac
SH
814 bs->backing_hd = NULL;
815 }
ea2384d3 816 bs->drv->bdrv_close(bs);
7267c094 817 g_free(bs->opaque);
ea2384d3
FB
818#ifdef _WIN32
819 if (bs->is_temporary) {
820 unlink(bs->filename);
821 }
67b915a5 822#endif
ea2384d3
FB
823 bs->opaque = NULL;
824 bs->drv = NULL;
53fec9d3 825 bs->copy_on_read = 0;
b338082b 826
66f82cee
KW
827 if (bs->file != NULL) {
828 bdrv_close(bs->file);
829 }
830
7d4b4ba5 831 bdrv_dev_change_media_cb(bs, false);
b338082b 832 }
98f90dba
ZYW
833
834 /*throttling disk I/O limits*/
835 if (bs->io_limits_enabled) {
836 bdrv_io_limits_disable(bs);
837 }
b338082b
FB
838}
839
2bc93fed
MK
840void bdrv_close_all(void)
841{
842 BlockDriverState *bs;
843
844 QTAILQ_FOREACH(bs, &bdrv_states, list) {
845 bdrv_close(bs);
846 }
847}
848
922453bc
SH
849/*
850 * Wait for pending requests to complete across all BlockDriverStates
851 *
852 * This function does not flush data to disk, use bdrv_flush_all() for that
853 * after calling this function.
854 */
855void bdrv_drain_all(void)
856{
857 BlockDriverState *bs;
858
859 qemu_aio_flush();
860
861 /* If requests are still pending there is a bug somewhere */
862 QTAILQ_FOREACH(bs, &bdrv_states, list) {
863 assert(QLIST_EMPTY(&bs->tracked_requests));
864 assert(qemu_co_queue_empty(&bs->throttled_reqs));
865 }
866}
867
d22b2f41
RH
868/* make a BlockDriverState anonymous by removing from bdrv_state list.
869 Also, NULL terminate the device_name to prevent double remove */
870void bdrv_make_anon(BlockDriverState *bs)
871{
872 if (bs->device_name[0] != '\0') {
873 QTAILQ_REMOVE(&bdrv_states, bs, list);
874 }
875 bs->device_name[0] = '\0';
876}
877
b338082b
FB
878void bdrv_delete(BlockDriverState *bs)
879{
fa879d62 880 assert(!bs->dev);
18846dee 881
1b7bdbc1 882 /* remove from list, if necessary */
d22b2f41 883 bdrv_make_anon(bs);
34c6f050 884
b338082b 885 bdrv_close(bs);
66f82cee
KW
886 if (bs->file != NULL) {
887 bdrv_delete(bs->file);
888 }
889
f9092b10 890 assert(bs != bs_snapshots);
7267c094 891 g_free(bs);
fc01f7e7
FB
892}
893
fa879d62
MA
894int bdrv_attach_dev(BlockDriverState *bs, void *dev)
895/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 896{
fa879d62 897 if (bs->dev) {
18846dee
MA
898 return -EBUSY;
899 }
fa879d62 900 bs->dev = dev;
28a7282a 901 bdrv_iostatus_reset(bs);
18846dee
MA
902 return 0;
903}
904
fa879d62
MA
905/* TODO qdevified devices don't use this, remove when devices are qdevified */
906void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 907{
fa879d62
MA
908 if (bdrv_attach_dev(bs, dev) < 0) {
909 abort();
910 }
911}
912
913void bdrv_detach_dev(BlockDriverState *bs, void *dev)
914/* TODO change to DeviceState *dev when all users are qdevified */
915{
916 assert(bs->dev == dev);
917 bs->dev = NULL;
0e49de52
MA
918 bs->dev_ops = NULL;
919 bs->dev_opaque = NULL;
29e05f20 920 bs->buffer_alignment = 512;
18846dee
MA
921}
922
fa879d62
MA
923/* TODO change to return DeviceState * when all users are qdevified */
924void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 925{
fa879d62 926 return bs->dev;
18846dee
MA
927}
928
0e49de52
MA
929void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
930 void *opaque)
931{
932 bs->dev_ops = ops;
933 bs->dev_opaque = opaque;
2c6942fa
MA
934 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
935 bs_snapshots = NULL;
936 }
0e49de52
MA
937}
938
7d4b4ba5 939static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 940{
145feb17 941 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
7d4b4ba5 942 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
145feb17
MA
943 }
944}
945
2c6942fa
MA
946bool bdrv_dev_has_removable_media(BlockDriverState *bs)
947{
948 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
949}
950
025ccaa7
PB
951void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
952{
953 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
954 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
955 }
956}
957
e4def80b
MA
958bool bdrv_dev_is_tray_open(BlockDriverState *bs)
959{
960 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
961 return bs->dev_ops->is_tray_open(bs->dev_opaque);
962 }
963 return false;
964}
965
145feb17
MA
966static void bdrv_dev_resize_cb(BlockDriverState *bs)
967{
968 if (bs->dev_ops && bs->dev_ops->resize_cb) {
969 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
970 }
971}
972
f107639a
MA
973bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
974{
975 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
976 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
977 }
978 return false;
979}
980
e97fc193
AL
981/*
982 * Run consistency checks on an image
983 *
e076f338 984 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 985 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 986 * check are stored in res.
e97fc193 987 */
e076f338 988int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
e97fc193
AL
989{
990 if (bs->drv->bdrv_check == NULL) {
991 return -ENOTSUP;
992 }
993
e076f338 994 memset(res, 0, sizeof(*res));
9ac228e0 995 return bs->drv->bdrv_check(bs, res);
e97fc193
AL
996}
997
8a426614
KW
998#define COMMIT_BUF_SECTORS 2048
999
33e3963e
FB
1000/* commit COW file into the raw image */
1001int bdrv_commit(BlockDriverState *bs)
1002{
19cb3738 1003 BlockDriver *drv = bs->drv;
ee181196 1004 BlockDriver *backing_drv;
8a426614
KW
1005 int64_t sector, total_sectors;
1006 int n, ro, open_flags;
4dca4b63 1007 int ret = 0, rw_ret = 0;
8a426614 1008 uint8_t *buf;
4dca4b63
NS
1009 char filename[1024];
1010 BlockDriverState *bs_rw, *bs_ro;
33e3963e 1011
19cb3738
FB
1012 if (!drv)
1013 return -ENOMEDIUM;
4dca4b63
NS
1014
1015 if (!bs->backing_hd) {
1016 return -ENOTSUP;
33e3963e
FB
1017 }
1018
4dca4b63
NS
1019 if (bs->backing_hd->keep_read_only) {
1020 return -EACCES;
1021 }
ee181196
KW
1022
1023 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
1024 ro = bs->backing_hd->read_only;
1025 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1026 open_flags = bs->backing_hd->open_flags;
1027
1028 if (ro) {
1029 /* re-open as RW */
1030 bdrv_delete(bs->backing_hd);
1031 bs->backing_hd = NULL;
1032 bs_rw = bdrv_new("");
ee181196
KW
1033 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1034 backing_drv);
4dca4b63
NS
1035 if (rw_ret < 0) {
1036 bdrv_delete(bs_rw);
1037 /* try to re-open read-only */
1038 bs_ro = bdrv_new("");
ee181196
KW
1039 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1040 backing_drv);
4dca4b63
NS
1041 if (ret < 0) {
1042 bdrv_delete(bs_ro);
1043 /* drive not functional anymore */
1044 bs->drv = NULL;
1045 return ret;
1046 }
1047 bs->backing_hd = bs_ro;
1048 return rw_ret;
1049 }
1050 bs->backing_hd = bs_rw;
ea2384d3 1051 }
33e3963e 1052
6ea44308 1053 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 1054 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
1055
1056 for (sector = 0; sector < total_sectors; sector += n) {
05c4af54 1057 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
8a426614
KW
1058
1059 if (bdrv_read(bs, sector, buf, n) != 0) {
1060 ret = -EIO;
1061 goto ro_cleanup;
1062 }
1063
1064 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1065 ret = -EIO;
1066 goto ro_cleanup;
1067 }
ea2384d3 1068 }
33e3963e 1069 }
95389c86 1070
1d44952f
CH
1071 if (drv->bdrv_make_empty) {
1072 ret = drv->bdrv_make_empty(bs);
1073 bdrv_flush(bs);
1074 }
95389c86 1075
3f5075ae
CH
1076 /*
1077 * Make sure all data we wrote to the backing device is actually
1078 * stable on disk.
1079 */
1080 if (bs->backing_hd)
1081 bdrv_flush(bs->backing_hd);
4dca4b63
NS
1082
1083ro_cleanup:
7267c094 1084 g_free(buf);
4dca4b63
NS
1085
1086 if (ro) {
1087 /* re-open as RO */
1088 bdrv_delete(bs->backing_hd);
1089 bs->backing_hd = NULL;
1090 bs_ro = bdrv_new("");
ee181196
KW
1091 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1092 backing_drv);
4dca4b63
NS
1093 if (ret < 0) {
1094 bdrv_delete(bs_ro);
1095 /* drive not functional anymore */
1096 bs->drv = NULL;
1097 return ret;
1098 }
1099 bs->backing_hd = bs_ro;
1100 bs->backing_hd->keep_read_only = 0;
1101 }
1102
1d44952f 1103 return ret;
33e3963e
FB
1104}
1105
6ab4b5ab
MA
1106void bdrv_commit_all(void)
1107{
1108 BlockDriverState *bs;
1109
1110 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1111 bdrv_commit(bs);
1112 }
1113}
1114
dbffbdcf
SH
1115struct BdrvTrackedRequest {
1116 BlockDriverState *bs;
1117 int64_t sector_num;
1118 int nb_sectors;
1119 bool is_write;
1120 QLIST_ENTRY(BdrvTrackedRequest) list;
5f8b6491 1121 Coroutine *co; /* owner, used for deadlock detection */
f4658285 1122 CoQueue wait_queue; /* coroutines blocked on this request */
dbffbdcf
SH
1123};
1124
1125/**
1126 * Remove an active request from the tracked requests list
1127 *
1128 * This function should be called when a tracked request is completing.
1129 */
1130static void tracked_request_end(BdrvTrackedRequest *req)
1131{
1132 QLIST_REMOVE(req, list);
f4658285 1133 qemu_co_queue_restart_all(&req->wait_queue);
dbffbdcf
SH
1134}
1135
1136/**
1137 * Add an active request to the tracked requests list
1138 */
1139static void tracked_request_begin(BdrvTrackedRequest *req,
1140 BlockDriverState *bs,
1141 int64_t sector_num,
1142 int nb_sectors, bool is_write)
1143{
1144 *req = (BdrvTrackedRequest){
1145 .bs = bs,
1146 .sector_num = sector_num,
1147 .nb_sectors = nb_sectors,
1148 .is_write = is_write,
5f8b6491 1149 .co = qemu_coroutine_self(),
dbffbdcf
SH
1150 };
1151
f4658285
SH
1152 qemu_co_queue_init(&req->wait_queue);
1153
dbffbdcf
SH
1154 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1155}
1156
d83947ac
SH
1157/**
1158 * Round a region to cluster boundaries
1159 */
1160static void round_to_clusters(BlockDriverState *bs,
1161 int64_t sector_num, int nb_sectors,
1162 int64_t *cluster_sector_num,
1163 int *cluster_nb_sectors)
1164{
1165 BlockDriverInfo bdi;
1166
1167 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1168 *cluster_sector_num = sector_num;
1169 *cluster_nb_sectors = nb_sectors;
1170 } else {
1171 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1172 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1173 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1174 nb_sectors, c);
1175 }
1176}
1177
f4658285
SH
1178static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1179 int64_t sector_num, int nb_sectors) {
d83947ac
SH
1180 /* aaaa bbbb */
1181 if (sector_num >= req->sector_num + req->nb_sectors) {
1182 return false;
1183 }
1184 /* bbbb aaaa */
1185 if (req->sector_num >= sector_num + nb_sectors) {
1186 return false;
1187 }
1188 return true;
f4658285
SH
1189}
1190
1191static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1192 int64_t sector_num, int nb_sectors)
1193{
1194 BdrvTrackedRequest *req;
d83947ac
SH
1195 int64_t cluster_sector_num;
1196 int cluster_nb_sectors;
f4658285
SH
1197 bool retry;
1198
d83947ac
SH
1199 /* If we touch the same cluster it counts as an overlap. This guarantees
1200 * that allocating writes will be serialized and not race with each other
1201 * for the same cluster. For example, in copy-on-read it ensures that the
1202 * CoR read and write operations are atomic and guest writes cannot
1203 * interleave between them.
1204 */
1205 round_to_clusters(bs, sector_num, nb_sectors,
1206 &cluster_sector_num, &cluster_nb_sectors);
1207
f4658285
SH
1208 do {
1209 retry = false;
1210 QLIST_FOREACH(req, &bs->tracked_requests, list) {
d83947ac
SH
1211 if (tracked_request_overlaps(req, cluster_sector_num,
1212 cluster_nb_sectors)) {
5f8b6491
SH
1213 /* Hitting this means there was a reentrant request, for
1214 * example, a block driver issuing nested requests. This must
1215 * never happen since it means deadlock.
1216 */
1217 assert(qemu_coroutine_self() != req->co);
1218
f4658285
SH
1219 qemu_co_queue_wait(&req->wait_queue);
1220 retry = true;
1221 break;
1222 }
1223 }
1224 } while (retry);
1225}
1226
756e6736
KW
1227/*
1228 * Return values:
1229 * 0 - success
1230 * -EINVAL - backing format specified, but no file
1231 * -ENOSPC - can't update the backing file because no space is left in the
1232 * image file header
1233 * -ENOTSUP - format driver doesn't support changing the backing file
1234 */
1235int bdrv_change_backing_file(BlockDriverState *bs,
1236 const char *backing_file, const char *backing_fmt)
1237{
1238 BlockDriver *drv = bs->drv;
1239
1240 if (drv->bdrv_change_backing_file != NULL) {
1241 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1242 } else {
1243 return -ENOTSUP;
1244 }
1245}
1246
71d0770c
AL
1247static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1248 size_t size)
1249{
1250 int64_t len;
1251
1252 if (!bdrv_is_inserted(bs))
1253 return -ENOMEDIUM;
1254
1255 if (bs->growable)
1256 return 0;
1257
1258 len = bdrv_getlength(bs);
1259
fbb7b4e0
KW
1260 if (offset < 0)
1261 return -EIO;
1262
1263 if ((offset > len) || (len - offset < size))
71d0770c
AL
1264 return -EIO;
1265
1266 return 0;
1267}
1268
1269static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1270 int nb_sectors)
1271{
eb5a3165
JS
1272 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1273 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1274}
1275
1c9805a3
SH
1276typedef struct RwCo {
1277 BlockDriverState *bs;
1278 int64_t sector_num;
1279 int nb_sectors;
1280 QEMUIOVector *qiov;
1281 bool is_write;
1282 int ret;
1283} RwCo;
1284
1285static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 1286{
1c9805a3 1287 RwCo *rwco = opaque;
ea2384d3 1288
1c9805a3
SH
1289 if (!rwco->is_write) {
1290 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1291 rwco->nb_sectors, rwco->qiov);
1292 } else {
1293 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1294 rwco->nb_sectors, rwco->qiov);
1295 }
1296}
e7a8a783 1297
1c9805a3
SH
1298/*
1299 * Process a synchronous request using coroutines
1300 */
1301static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1302 int nb_sectors, bool is_write)
1303{
1304 QEMUIOVector qiov;
1305 struct iovec iov = {
1306 .iov_base = (void *)buf,
1307 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1308 };
1309 Coroutine *co;
1310 RwCo rwco = {
1311 .bs = bs,
1312 .sector_num = sector_num,
1313 .nb_sectors = nb_sectors,
1314 .qiov = &qiov,
1315 .is_write = is_write,
1316 .ret = NOT_DONE,
1317 };
e7a8a783 1318
1c9805a3 1319 qemu_iovec_init_external(&qiov, &iov, 1);
e7a8a783 1320
1c9805a3
SH
1321 if (qemu_in_coroutine()) {
1322 /* Fast-path if already in coroutine context */
1323 bdrv_rw_co_entry(&rwco);
1324 } else {
1325 co = qemu_coroutine_create(bdrv_rw_co_entry);
1326 qemu_coroutine_enter(co, &rwco);
1327 while (rwco.ret == NOT_DONE) {
1328 qemu_aio_wait();
1329 }
1330 }
1331 return rwco.ret;
1332}
b338082b 1333
1c9805a3
SH
1334/* return < 0 if error. See bdrv_write() for the return codes */
1335int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1336 uint8_t *buf, int nb_sectors)
1337{
1338 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
fc01f7e7
FB
1339}
1340
7cd1e32a 1341static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1342 int nb_sectors, int dirty)
7cd1e32a 1343{
1344 int64_t start, end;
c6d22830 1345 unsigned long val, idx, bit;
a55eb92c 1346
6ea44308 1347 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1348 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1349
1350 for (; start <= end; start++) {
c6d22830
JK
1351 idx = start / (sizeof(unsigned long) * 8);
1352 bit = start % (sizeof(unsigned long) * 8);
1353 val = bs->dirty_bitmap[idx];
1354 if (dirty) {
6d59fec1 1355 if (!(val & (1UL << bit))) {
aaa0eb75 1356 bs->dirty_count++;
6d59fec1 1357 val |= 1UL << bit;
aaa0eb75 1358 }
c6d22830 1359 } else {
6d59fec1 1360 if (val & (1UL << bit)) {
aaa0eb75 1361 bs->dirty_count--;
6d59fec1 1362 val &= ~(1UL << bit);
aaa0eb75 1363 }
c6d22830
JK
1364 }
1365 bs->dirty_bitmap[idx] = val;
7cd1e32a 1366 }
1367}
1368
5fafdf24 1369/* Return < 0 if error. Important errors are:
19cb3738
FB
1370 -EIO generic I/O error (may happen for all errors)
1371 -ENOMEDIUM No media inserted.
1372 -EINVAL Invalid sector number or nb_sectors
1373 -EACCES Trying to write a read-only device
1374*/
5fafdf24 1375int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1376 const uint8_t *buf, int nb_sectors)
1377{
1c9805a3 1378 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
83f64091
FB
1379}
1380
eda578e5
AL
1381int bdrv_pread(BlockDriverState *bs, int64_t offset,
1382 void *buf, int count1)
83f64091 1383{
6ea44308 1384 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1385 int len, nb_sectors, count;
1386 int64_t sector_num;
9a8c4cce 1387 int ret;
83f64091
FB
1388
1389 count = count1;
1390 /* first read to align to sector start */
6ea44308 1391 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1392 if (len > count)
1393 len = count;
6ea44308 1394 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1395 if (len > 0) {
9a8c4cce
KW
1396 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1397 return ret;
6ea44308 1398 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1399 count -= len;
1400 if (count == 0)
1401 return count1;
1402 sector_num++;
1403 buf += len;
1404 }
1405
1406 /* read the sectors "in place" */
6ea44308 1407 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1408 if (nb_sectors > 0) {
9a8c4cce
KW
1409 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1410 return ret;
83f64091 1411 sector_num += nb_sectors;
6ea44308 1412 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1413 buf += len;
1414 count -= len;
1415 }
1416
1417 /* add data from the last sector */
1418 if (count > 0) {
9a8c4cce
KW
1419 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1420 return ret;
83f64091
FB
1421 memcpy(buf, tmp_buf, count);
1422 }
1423 return count1;
1424}
1425
eda578e5
AL
1426int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1427 const void *buf, int count1)
83f64091 1428{
6ea44308 1429 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1430 int len, nb_sectors, count;
1431 int64_t sector_num;
9a8c4cce 1432 int ret;
83f64091
FB
1433
1434 count = count1;
1435 /* first write to align to sector start */
6ea44308 1436 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1437 if (len > count)
1438 len = count;
6ea44308 1439 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1440 if (len > 0) {
9a8c4cce
KW
1441 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1442 return ret;
6ea44308 1443 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1444 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1445 return ret;
83f64091
FB
1446 count -= len;
1447 if (count == 0)
1448 return count1;
1449 sector_num++;
1450 buf += len;
1451 }
1452
1453 /* write the sectors "in place" */
6ea44308 1454 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1455 if (nb_sectors > 0) {
9a8c4cce
KW
1456 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1457 return ret;
83f64091 1458 sector_num += nb_sectors;
6ea44308 1459 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1460 buf += len;
1461 count -= len;
1462 }
1463
1464 /* add data from the last sector */
1465 if (count > 0) {
9a8c4cce
KW
1466 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1467 return ret;
83f64091 1468 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1469 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1470 return ret;
83f64091
FB
1471 }
1472 return count1;
1473}
83f64091 1474
f08145fe
KW
1475/*
1476 * Writes to the file and ensures that no writes are reordered across this
1477 * request (acts as a barrier)
1478 *
1479 * Returns 0 on success, -errno in error cases.
1480 */
1481int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1482 const void *buf, int count)
1483{
1484 int ret;
1485
1486 ret = bdrv_pwrite(bs, offset, buf, count);
1487 if (ret < 0) {
1488 return ret;
1489 }
1490
92196b2f
SH
1491 /* No flush needed for cache modes that use O_DSYNC */
1492 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
f08145fe
KW
1493 bdrv_flush(bs);
1494 }
1495
1496 return 0;
1497}
1498
ab185921
SH
1499static int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1500 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1501{
1502 /* Perform I/O through a temporary buffer so that users who scribble over
1503 * their read buffer while the operation is in progress do not end up
1504 * modifying the image file. This is critical for zero-copy guest I/O
1505 * where anything might happen inside guest memory.
1506 */
1507 void *bounce_buffer;
1508
1509 struct iovec iov;
1510 QEMUIOVector bounce_qiov;
1511 int64_t cluster_sector_num;
1512 int cluster_nb_sectors;
1513 size_t skip_bytes;
1514 int ret;
1515
1516 /* Cover entire cluster so no additional backing file I/O is required when
1517 * allocating cluster in the image file.
1518 */
1519 round_to_clusters(bs, sector_num, nb_sectors,
1520 &cluster_sector_num, &cluster_nb_sectors);
1521
1522 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors,
1523 cluster_sector_num, cluster_nb_sectors);
1524
1525 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1526 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1527 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1528
1529 ret = bs->drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1530 &bounce_qiov);
1531 if (ret < 0) {
1532 goto err;
1533 }
1534
1535 ret = bs->drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
1536 &bounce_qiov);
1537 if (ret < 0) {
1538 /* It might be okay to ignore write errors for guest requests. If this
1539 * is a deliberate copy-on-read then we don't want to ignore the error.
1540 * Simply report it in all cases.
1541 */
1542 goto err;
1543 }
1544
1545 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1546 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1547 nb_sectors * BDRV_SECTOR_SIZE);
1548
1549err:
1550 qemu_vfree(bounce_buffer);
1551 return ret;
1552}
1553
c5fbe571
SH
1554/*
1555 * Handle a read request in coroutine context
1556 */
1557static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1558 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
da1fa91d
KW
1559{
1560 BlockDriver *drv = bs->drv;
dbffbdcf
SH
1561 BdrvTrackedRequest req;
1562 int ret;
da1fa91d 1563
da1fa91d
KW
1564 if (!drv) {
1565 return -ENOMEDIUM;
1566 }
1567 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1568 return -EIO;
1569 }
1570
98f90dba
ZYW
1571 /* throttling disk read I/O */
1572 if (bs->io_limits_enabled) {
1573 bdrv_io_limits_intercept(bs, false, nb_sectors);
1574 }
1575
f4658285
SH
1576 if (bs->copy_on_read) {
1577 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1578 }
1579
dbffbdcf 1580 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
ab185921
SH
1581
1582 if (bs->copy_on_read) {
1583 int pnum;
1584
1585 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1586 if (ret < 0) {
1587 goto out;
1588 }
1589
1590 if (!ret || pnum != nb_sectors) {
1591 ret = bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, qiov);
1592 goto out;
1593 }
1594 }
1595
dbffbdcf 1596 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
ab185921
SH
1597
1598out:
dbffbdcf
SH
1599 tracked_request_end(&req);
1600 return ret;
da1fa91d
KW
1601}
1602
c5fbe571 1603int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
1604 int nb_sectors, QEMUIOVector *qiov)
1605{
c5fbe571 1606 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 1607
c5fbe571
SH
1608 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1609}
1610
1611/*
1612 * Handle a write request in coroutine context
1613 */
1614static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1615 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1616{
1617 BlockDriver *drv = bs->drv;
dbffbdcf 1618 BdrvTrackedRequest req;
6b7cb247 1619 int ret;
da1fa91d
KW
1620
1621 if (!bs->drv) {
1622 return -ENOMEDIUM;
1623 }
1624 if (bs->read_only) {
1625 return -EACCES;
1626 }
1627 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1628 return -EIO;
1629 }
1630
98f90dba
ZYW
1631 /* throttling disk write I/O */
1632 if (bs->io_limits_enabled) {
1633 bdrv_io_limits_intercept(bs, true, nb_sectors);
1634 }
1635
f4658285
SH
1636 if (bs->copy_on_read) {
1637 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1638 }
1639
dbffbdcf
SH
1640 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1641
6b7cb247
SH
1642 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1643
da1fa91d
KW
1644 if (bs->dirty_bitmap) {
1645 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1646 }
1647
1648 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1649 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1650 }
1651
dbffbdcf
SH
1652 tracked_request_end(&req);
1653
6b7cb247 1654 return ret;
da1fa91d
KW
1655}
1656
c5fbe571
SH
1657int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1658 int nb_sectors, QEMUIOVector *qiov)
1659{
1660 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1661
1662 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1663}
1664
83f64091
FB
1665/**
1666 * Truncate file to 'offset' bytes (needed only for file protocols)
1667 */
1668int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1669{
1670 BlockDriver *drv = bs->drv;
51762288 1671 int ret;
83f64091 1672 if (!drv)
19cb3738 1673 return -ENOMEDIUM;
83f64091
FB
1674 if (!drv->bdrv_truncate)
1675 return -ENOTSUP;
59f2689d
NS
1676 if (bs->read_only)
1677 return -EACCES;
8591675f
MT
1678 if (bdrv_in_use(bs))
1679 return -EBUSY;
51762288
SH
1680 ret = drv->bdrv_truncate(bs, offset);
1681 if (ret == 0) {
1682 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 1683 bdrv_dev_resize_cb(bs);
51762288
SH
1684 }
1685 return ret;
83f64091
FB
1686}
1687
4a1d5e1f
FZ
1688/**
1689 * Length of a allocated file in bytes. Sparse files are counted by actual
1690 * allocated space. Return < 0 if error or unknown.
1691 */
1692int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1693{
1694 BlockDriver *drv = bs->drv;
1695 if (!drv) {
1696 return -ENOMEDIUM;
1697 }
1698 if (drv->bdrv_get_allocated_file_size) {
1699 return drv->bdrv_get_allocated_file_size(bs);
1700 }
1701 if (bs->file) {
1702 return bdrv_get_allocated_file_size(bs->file);
1703 }
1704 return -ENOTSUP;
1705}
1706
83f64091
FB
1707/**
1708 * Length of a file in bytes. Return < 0 if error or unknown.
1709 */
1710int64_t bdrv_getlength(BlockDriverState *bs)
1711{
1712 BlockDriver *drv = bs->drv;
1713 if (!drv)
19cb3738 1714 return -ENOMEDIUM;
51762288 1715
2c6942fa 1716 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
1717 if (drv->bdrv_getlength) {
1718 return drv->bdrv_getlength(bs);
1719 }
83f64091 1720 }
46a4e4e6 1721 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
1722}
1723
19cb3738 1724/* return 0 as number of sectors if no device present or error */
96b8f136 1725void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 1726{
19cb3738
FB
1727 int64_t length;
1728 length = bdrv_getlength(bs);
1729 if (length < 0)
1730 length = 0;
1731 else
6ea44308 1732 length = length >> BDRV_SECTOR_BITS;
19cb3738 1733 *nb_sectors_ptr = length;
fc01f7e7 1734}
cf98951b 1735
f3d54fc4
AL
1736struct partition {
1737 uint8_t boot_ind; /* 0x80 - active */
1738 uint8_t head; /* starting head */
1739 uint8_t sector; /* starting sector */
1740 uint8_t cyl; /* starting cylinder */
1741 uint8_t sys_ind; /* What partition type */
1742 uint8_t end_head; /* end head */
1743 uint8_t end_sector; /* end sector */
1744 uint8_t end_cyl; /* end cylinder */
1745 uint32_t start_sect; /* starting sector counting from 0 */
1746 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 1747} QEMU_PACKED;
f3d54fc4
AL
1748
1749/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1750static int guess_disk_lchs(BlockDriverState *bs,
1751 int *pcylinders, int *pheads, int *psectors)
1752{
eb5a3165 1753 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
1754 int ret, i, heads, sectors, cylinders;
1755 struct partition *p;
1756 uint32_t nr_sects;
a38131b6 1757 uint64_t nb_sectors;
f3d54fc4
AL
1758
1759 bdrv_get_geometry(bs, &nb_sectors);
1760
1761 ret = bdrv_read(bs, 0, buf, 1);
1762 if (ret < 0)
1763 return -1;
1764 /* test msdos magic */
1765 if (buf[510] != 0x55 || buf[511] != 0xaa)
1766 return -1;
1767 for(i = 0; i < 4; i++) {
1768 p = ((struct partition *)(buf + 0x1be)) + i;
1769 nr_sects = le32_to_cpu(p->nr_sects);
1770 if (nr_sects && p->end_head) {
1771 /* We make the assumption that the partition terminates on
1772 a cylinder boundary */
1773 heads = p->end_head + 1;
1774 sectors = p->end_sector & 63;
1775 if (sectors == 0)
1776 continue;
1777 cylinders = nb_sectors / (heads * sectors);
1778 if (cylinders < 1 || cylinders > 16383)
1779 continue;
1780 *pheads = heads;
1781 *psectors = sectors;
1782 *pcylinders = cylinders;
1783#if 0
1784 printf("guessed geometry: LCHS=%d %d %d\n",
1785 cylinders, heads, sectors);
1786#endif
1787 return 0;
1788 }
1789 }
1790 return -1;
1791}
1792
1793void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1794{
1795 int translation, lba_detected = 0;
1796 int cylinders, heads, secs;
a38131b6 1797 uint64_t nb_sectors;
f3d54fc4
AL
1798
1799 /* if a geometry hint is available, use it */
1800 bdrv_get_geometry(bs, &nb_sectors);
1801 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1802 translation = bdrv_get_translation_hint(bs);
1803 if (cylinders != 0) {
1804 *pcyls = cylinders;
1805 *pheads = heads;
1806 *psecs = secs;
1807 } else {
1808 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1809 if (heads > 16) {
1810 /* if heads > 16, it means that a BIOS LBA
1811 translation was active, so the default
1812 hardware geometry is OK */
1813 lba_detected = 1;
1814 goto default_geometry;
1815 } else {
1816 *pcyls = cylinders;
1817 *pheads = heads;
1818 *psecs = secs;
1819 /* disable any translation to be in sync with
1820 the logical geometry */
1821 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1822 bdrv_set_translation_hint(bs,
1823 BIOS_ATA_TRANSLATION_NONE);
1824 }
1825 }
1826 } else {
1827 default_geometry:
1828 /* if no geometry, use a standard physical disk geometry */
1829 cylinders = nb_sectors / (16 * 63);
1830
1831 if (cylinders > 16383)
1832 cylinders = 16383;
1833 else if (cylinders < 2)
1834 cylinders = 2;
1835 *pcyls = cylinders;
1836 *pheads = 16;
1837 *psecs = 63;
1838 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1839 if ((*pcyls * *pheads) <= 131072) {
1840 bdrv_set_translation_hint(bs,
1841 BIOS_ATA_TRANSLATION_LARGE);
1842 } else {
1843 bdrv_set_translation_hint(bs,
1844 BIOS_ATA_TRANSLATION_LBA);
1845 }
1846 }
1847 }
1848 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1849 }
1850}
1851
5fafdf24 1852void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
1853 int cyls, int heads, int secs)
1854{
1855 bs->cyls = cyls;
1856 bs->heads = heads;
1857 bs->secs = secs;
1858}
1859
46d4767d
FB
1860void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1861{
1862 bs->translation = translation;
1863}
1864
5fafdf24 1865void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
1866 int *pcyls, int *pheads, int *psecs)
1867{
1868 *pcyls = bs->cyls;
1869 *pheads = bs->heads;
1870 *psecs = bs->secs;
1871}
1872
0563e191
ZYW
1873/* throttling disk io limits */
1874void bdrv_set_io_limits(BlockDriverState *bs,
1875 BlockIOLimit *io_limits)
1876{
1877 bs->io_limits = *io_limits;
1878 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
1879}
1880
5bbdbb46
BS
1881/* Recognize floppy formats */
1882typedef struct FDFormat {
1883 FDriveType drive;
1884 uint8_t last_sect;
1885 uint8_t max_track;
1886 uint8_t max_head;
1887} FDFormat;
1888
1889static const FDFormat fd_formats[] = {
1890 /* First entry is default format */
1891 /* 1.44 MB 3"1/2 floppy disks */
1892 { FDRIVE_DRV_144, 18, 80, 1, },
1893 { FDRIVE_DRV_144, 20, 80, 1, },
1894 { FDRIVE_DRV_144, 21, 80, 1, },
1895 { FDRIVE_DRV_144, 21, 82, 1, },
1896 { FDRIVE_DRV_144, 21, 83, 1, },
1897 { FDRIVE_DRV_144, 22, 80, 1, },
1898 { FDRIVE_DRV_144, 23, 80, 1, },
1899 { FDRIVE_DRV_144, 24, 80, 1, },
1900 /* 2.88 MB 3"1/2 floppy disks */
1901 { FDRIVE_DRV_288, 36, 80, 1, },
1902 { FDRIVE_DRV_288, 39, 80, 1, },
1903 { FDRIVE_DRV_288, 40, 80, 1, },
1904 { FDRIVE_DRV_288, 44, 80, 1, },
1905 { FDRIVE_DRV_288, 48, 80, 1, },
1906 /* 720 kB 3"1/2 floppy disks */
1907 { FDRIVE_DRV_144, 9, 80, 1, },
1908 { FDRIVE_DRV_144, 10, 80, 1, },
1909 { FDRIVE_DRV_144, 10, 82, 1, },
1910 { FDRIVE_DRV_144, 10, 83, 1, },
1911 { FDRIVE_DRV_144, 13, 80, 1, },
1912 { FDRIVE_DRV_144, 14, 80, 1, },
1913 /* 1.2 MB 5"1/4 floppy disks */
1914 { FDRIVE_DRV_120, 15, 80, 1, },
1915 { FDRIVE_DRV_120, 18, 80, 1, },
1916 { FDRIVE_DRV_120, 18, 82, 1, },
1917 { FDRIVE_DRV_120, 18, 83, 1, },
1918 { FDRIVE_DRV_120, 20, 80, 1, },
1919 /* 720 kB 5"1/4 floppy disks */
1920 { FDRIVE_DRV_120, 9, 80, 1, },
1921 { FDRIVE_DRV_120, 11, 80, 1, },
1922 /* 360 kB 5"1/4 floppy disks */
1923 { FDRIVE_DRV_120, 9, 40, 1, },
1924 { FDRIVE_DRV_120, 9, 40, 0, },
1925 { FDRIVE_DRV_120, 10, 41, 1, },
1926 { FDRIVE_DRV_120, 10, 42, 1, },
1927 /* 320 kB 5"1/4 floppy disks */
1928 { FDRIVE_DRV_120, 8, 40, 1, },
1929 { FDRIVE_DRV_120, 8, 40, 0, },
1930 /* 360 kB must match 5"1/4 better than 3"1/2... */
1931 { FDRIVE_DRV_144, 9, 80, 0, },
1932 /* end */
1933 { FDRIVE_DRV_NONE, -1, -1, 0, },
1934};
1935
1936void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1937 int *max_track, int *last_sect,
1938 FDriveType drive_in, FDriveType *drive)
1939{
1940 const FDFormat *parse;
1941 uint64_t nb_sectors, size;
1942 int i, first_match, match;
1943
1944 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1945 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1946 /* User defined disk */
1947 } else {
1948 bdrv_get_geometry(bs, &nb_sectors);
1949 match = -1;
1950 first_match = -1;
1951 for (i = 0; ; i++) {
1952 parse = &fd_formats[i];
1953 if (parse->drive == FDRIVE_DRV_NONE) {
1954 break;
1955 }
1956 if (drive_in == parse->drive ||
1957 drive_in == FDRIVE_DRV_NONE) {
1958 size = (parse->max_head + 1) * parse->max_track *
1959 parse->last_sect;
1960 if (nb_sectors == size) {
1961 match = i;
1962 break;
1963 }
1964 if (first_match == -1) {
1965 first_match = i;
1966 }
1967 }
1968 }
1969 if (match == -1) {
1970 if (first_match == -1) {
1971 match = 1;
1972 } else {
1973 match = first_match;
1974 }
1975 parse = &fd_formats[match];
1976 }
1977 *nb_heads = parse->max_head + 1;
1978 *max_track = parse->max_track;
1979 *last_sect = parse->last_sect;
1980 *drive = parse->drive;
1981 }
1982}
1983
46d4767d
FB
1984int bdrv_get_translation_hint(BlockDriverState *bs)
1985{
1986 return bs->translation;
1987}
1988
abd7f68d
MA
1989void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1990 BlockErrorAction on_write_error)
1991{
1992 bs->on_read_error = on_read_error;
1993 bs->on_write_error = on_write_error;
1994}
1995
1996BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1997{
1998 return is_read ? bs->on_read_error : bs->on_write_error;
1999}
2000
b338082b
FB
2001int bdrv_is_read_only(BlockDriverState *bs)
2002{
2003 return bs->read_only;
2004}
2005
985a03b0
TS
2006int bdrv_is_sg(BlockDriverState *bs)
2007{
2008 return bs->sg;
2009}
2010
e900a7b7
CH
2011int bdrv_enable_write_cache(BlockDriverState *bs)
2012{
2013 return bs->enable_write_cache;
2014}
2015
ea2384d3
FB
2016int bdrv_is_encrypted(BlockDriverState *bs)
2017{
2018 if (bs->backing_hd && bs->backing_hd->encrypted)
2019 return 1;
2020 return bs->encrypted;
2021}
2022
c0f4ce77
AL
2023int bdrv_key_required(BlockDriverState *bs)
2024{
2025 BlockDriverState *backing_hd = bs->backing_hd;
2026
2027 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2028 return 1;
2029 return (bs->encrypted && !bs->valid_key);
2030}
2031
ea2384d3
FB
2032int bdrv_set_key(BlockDriverState *bs, const char *key)
2033{
2034 int ret;
2035 if (bs->backing_hd && bs->backing_hd->encrypted) {
2036 ret = bdrv_set_key(bs->backing_hd, key);
2037 if (ret < 0)
2038 return ret;
2039 if (!bs->encrypted)
2040 return 0;
2041 }
fd04a2ae
SH
2042 if (!bs->encrypted) {
2043 return -EINVAL;
2044 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2045 return -ENOMEDIUM;
2046 }
c0f4ce77 2047 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
2048 if (ret < 0) {
2049 bs->valid_key = 0;
2050 } else if (!bs->valid_key) {
2051 bs->valid_key = 1;
2052 /* call the change callback now, we skipped it on open */
7d4b4ba5 2053 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 2054 }
c0f4ce77 2055 return ret;
ea2384d3
FB
2056}
2057
2058void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2059{
19cb3738 2060 if (!bs->drv) {
ea2384d3
FB
2061 buf[0] = '\0';
2062 } else {
2063 pstrcpy(buf, buf_size, bs->drv->format_name);
2064 }
2065}
2066
5fafdf24 2067void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
2068 void *opaque)
2069{
2070 BlockDriver *drv;
2071
8a22f02a 2072 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
2073 it(opaque, drv->format_name);
2074 }
2075}
2076
b338082b
FB
2077BlockDriverState *bdrv_find(const char *name)
2078{
2079 BlockDriverState *bs;
2080
1b7bdbc1
SH
2081 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2082 if (!strcmp(name, bs->device_name)) {
b338082b 2083 return bs;
1b7bdbc1 2084 }
b338082b
FB
2085 }
2086 return NULL;
2087}
2088
2f399b0a
MA
2089BlockDriverState *bdrv_next(BlockDriverState *bs)
2090{
2091 if (!bs) {
2092 return QTAILQ_FIRST(&bdrv_states);
2093 }
2094 return QTAILQ_NEXT(bs, list);
2095}
2096
51de9760 2097void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
2098{
2099 BlockDriverState *bs;
2100
1b7bdbc1 2101 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 2102 it(opaque, bs);
81d0912d
FB
2103 }
2104}
2105
ea2384d3
FB
2106const char *bdrv_get_device_name(BlockDriverState *bs)
2107{
2108 return bs->device_name;
2109}
2110
c6ca28d6
AL
2111void bdrv_flush_all(void)
2112{
2113 BlockDriverState *bs;
2114
1b7bdbc1 2115 QTAILQ_FOREACH(bs, &bdrv_states, list) {
c602a489 2116 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
c6ca28d6 2117 bdrv_flush(bs);
1b7bdbc1
SH
2118 }
2119 }
c6ca28d6
AL
2120}
2121
f2feebbd
KW
2122int bdrv_has_zero_init(BlockDriverState *bs)
2123{
2124 assert(bs->drv);
2125
336c1c12
KW
2126 if (bs->drv->bdrv_has_zero_init) {
2127 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
2128 }
2129
2130 return 1;
2131}
2132
376ae3f1
SH
2133typedef struct BdrvCoIsAllocatedData {
2134 BlockDriverState *bs;
2135 int64_t sector_num;
2136 int nb_sectors;
2137 int *pnum;
2138 int ret;
2139 bool done;
2140} BdrvCoIsAllocatedData;
2141
f58c7b35
TS
2142/*
2143 * Returns true iff the specified sector is present in the disk image. Drivers
2144 * not implementing the functionality are assumed to not support backing files,
2145 * hence all their sectors are reported as allocated.
2146 *
bd9533e3
SH
2147 * If 'sector_num' is beyond the end of the disk image the return value is 0
2148 * and 'pnum' is set to 0.
2149 *
f58c7b35
TS
2150 * 'pnum' is set to the number of sectors (including and immediately following
2151 * the specified sector) that are known to be in the same
2152 * allocated/unallocated state.
2153 *
bd9533e3
SH
2154 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2155 * beyond the end of the disk image it will be clamped.
f58c7b35 2156 */
060f51c9
SH
2157int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2158 int nb_sectors, int *pnum)
f58c7b35 2159{
bd9533e3
SH
2160 int64_t n;
2161
2162 if (sector_num >= bs->total_sectors) {
2163 *pnum = 0;
2164 return 0;
2165 }
2166
2167 n = bs->total_sectors - sector_num;
2168 if (n < nb_sectors) {
2169 nb_sectors = n;
2170 }
2171
6aebab14 2172 if (!bs->drv->bdrv_co_is_allocated) {
bd9533e3 2173 *pnum = nb_sectors;
f58c7b35
TS
2174 return 1;
2175 }
6aebab14 2176
060f51c9
SH
2177 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2178}
2179
2180/* Coroutine wrapper for bdrv_is_allocated() */
2181static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2182{
2183 BdrvCoIsAllocatedData *data = opaque;
2184 BlockDriverState *bs = data->bs;
2185
2186 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2187 data->pnum);
2188 data->done = true;
2189}
2190
2191/*
2192 * Synchronous wrapper around bdrv_co_is_allocated().
2193 *
2194 * See bdrv_co_is_allocated() for details.
2195 */
2196int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2197 int *pnum)
2198{
6aebab14
SH
2199 Coroutine *co;
2200 BdrvCoIsAllocatedData data = {
2201 .bs = bs,
2202 .sector_num = sector_num,
2203 .nb_sectors = nb_sectors,
2204 .pnum = pnum,
2205 .done = false,
2206 };
2207
2208 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2209 qemu_coroutine_enter(co, &data);
2210 while (!data.done) {
2211 qemu_aio_wait();
2212 }
2213 return data.ret;
f58c7b35
TS
2214}
2215
2582bfed
LC
2216void bdrv_mon_event(const BlockDriverState *bdrv,
2217 BlockMonEventAction action, int is_read)
2218{
2219 QObject *data;
2220 const char *action_str;
2221
2222 switch (action) {
2223 case BDRV_ACTION_REPORT:
2224 action_str = "report";
2225 break;
2226 case BDRV_ACTION_IGNORE:
2227 action_str = "ignore";
2228 break;
2229 case BDRV_ACTION_STOP:
2230 action_str = "stop";
2231 break;
2232 default:
2233 abort();
2234 }
2235
2236 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2237 bdrv->device_name,
2238 action_str,
2239 is_read ? "read" : "write");
2240 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
2241
2242 qobject_decref(data);
2243}
2244
b2023818 2245BlockInfoList *qmp_query_block(Error **errp)
b338082b 2246{
b2023818 2247 BlockInfoList *head = NULL, *cur_item = NULL;
b338082b
FB
2248 BlockDriverState *bs;
2249
1b7bdbc1 2250 QTAILQ_FOREACH(bs, &bdrv_states, list) {
b2023818 2251 BlockInfoList *info = g_malloc0(sizeof(*info));
d15e5465 2252
b2023818
LC
2253 info->value = g_malloc0(sizeof(*info->value));
2254 info->value->device = g_strdup(bs->device_name);
2255 info->value->type = g_strdup("unknown");
2256 info->value->locked = bdrv_dev_is_medium_locked(bs);
2257 info->value->removable = bdrv_dev_has_removable_media(bs);
d15e5465 2258
e4def80b 2259 if (bdrv_dev_has_removable_media(bs)) {
b2023818
LC
2260 info->value->has_tray_open = true;
2261 info->value->tray_open = bdrv_dev_is_tray_open(bs);
e4def80b 2262 }
f04ef601
LC
2263
2264 if (bdrv_iostatus_is_enabled(bs)) {
b2023818
LC
2265 info->value->has_io_status = true;
2266 info->value->io_status = bs->iostatus;
f04ef601
LC
2267 }
2268
19cb3738 2269 if (bs->drv) {
b2023818
LC
2270 info->value->has_inserted = true;
2271 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2272 info->value->inserted->file = g_strdup(bs->filename);
2273 info->value->inserted->ro = bs->read_only;
2274 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2275 info->value->inserted->encrypted = bs->encrypted;
2276 if (bs->backing_file[0]) {
2277 info->value->inserted->has_backing_file = true;
2278 info->value->inserted->backing_file = g_strdup(bs->backing_file);
376253ec 2279 }
727f005e
ZYW
2280
2281 if (bs->io_limits_enabled) {
2282 info->value->inserted->bps =
2283 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2284 info->value->inserted->bps_rd =
2285 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2286 info->value->inserted->bps_wr =
2287 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2288 info->value->inserted->iops =
2289 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2290 info->value->inserted->iops_rd =
2291 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2292 info->value->inserted->iops_wr =
2293 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2294 }
b2023818 2295 }
d15e5465 2296
b2023818
LC
2297 /* XXX: waiting for the qapi to support GSList */
2298 if (!cur_item) {
2299 head = cur_item = info;
2300 } else {
2301 cur_item->next = info;
2302 cur_item = info;
b338082b 2303 }
b338082b 2304 }
d15e5465 2305
b2023818 2306 return head;
b338082b 2307}
a36e69dd 2308
f11f57e4
LC
2309/* Consider exposing this as a full fledged QMP command */
2310static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
2311{
2312 BlockStats *s;
2313
2314 s = g_malloc0(sizeof(*s));
2315
2316 if (bs->device_name[0]) {
2317 s->has_device = true;
2318 s->device = g_strdup(bs->device_name);
294cc35f
KW
2319 }
2320
f11f57e4
LC
2321 s->stats = g_malloc0(sizeof(*s->stats));
2322 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2323 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2324 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2325 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2326 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2327 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2328 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2329 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2330 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2331
294cc35f 2332 if (bs->file) {
f11f57e4
LC
2333 s->has_parent = true;
2334 s->parent = qmp_query_blockstat(bs->file, NULL);
294cc35f
KW
2335 }
2336
f11f57e4 2337 return s;
294cc35f
KW
2338}
2339
f11f57e4 2340BlockStatsList *qmp_query_blockstats(Error **errp)
218a536a 2341{
f11f57e4 2342 BlockStatsList *head = NULL, *cur_item = NULL;
a36e69dd
TS
2343 BlockDriverState *bs;
2344
1b7bdbc1 2345 QTAILQ_FOREACH(bs, &bdrv_states, list) {
f11f57e4
LC
2346 BlockStatsList *info = g_malloc0(sizeof(*info));
2347 info->value = qmp_query_blockstat(bs, NULL);
2348
2349 /* XXX: waiting for the qapi to support GSList */
2350 if (!cur_item) {
2351 head = cur_item = info;
2352 } else {
2353 cur_item->next = info;
2354 cur_item = info;
2355 }
a36e69dd 2356 }
218a536a 2357
f11f57e4 2358 return head;
a36e69dd 2359}
ea2384d3 2360
045df330
AL
2361const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2362{
2363 if (bs->backing_hd && bs->backing_hd->encrypted)
2364 return bs->backing_file;
2365 else if (bs->encrypted)
2366 return bs->filename;
2367 else
2368 return NULL;
2369}
2370
5fafdf24 2371void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
2372 char *filename, int filename_size)
2373{
3574c608 2374 pstrcpy(filename, filename_size, bs->backing_file);
83f64091
FB
2375}
2376
5fafdf24 2377int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
2378 const uint8_t *buf, int nb_sectors)
2379{
2380 BlockDriver *drv = bs->drv;
2381 if (!drv)
19cb3738 2382 return -ENOMEDIUM;
faea38e7
FB
2383 if (!drv->bdrv_write_compressed)
2384 return -ENOTSUP;
fbb7b4e0
KW
2385 if (bdrv_check_request(bs, sector_num, nb_sectors))
2386 return -EIO;
a55eb92c 2387
c6d22830 2388 if (bs->dirty_bitmap) {
7cd1e32a 2389 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2390 }
a55eb92c 2391
faea38e7
FB
2392 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2393}
3b46e624 2394
faea38e7
FB
2395int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2396{
2397 BlockDriver *drv = bs->drv;
2398 if (!drv)
19cb3738 2399 return -ENOMEDIUM;
faea38e7
FB
2400 if (!drv->bdrv_get_info)
2401 return -ENOTSUP;
2402 memset(bdi, 0, sizeof(*bdi));
2403 return drv->bdrv_get_info(bs, bdi);
2404}
2405
45566e9c
CH
2406int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2407 int64_t pos, int size)
178e08a5
AL
2408{
2409 BlockDriver *drv = bs->drv;
2410 if (!drv)
2411 return -ENOMEDIUM;
7cdb1f6d
MK
2412 if (drv->bdrv_save_vmstate)
2413 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2414 if (bs->file)
2415 return bdrv_save_vmstate(bs->file, buf, pos, size);
2416 return -ENOTSUP;
178e08a5
AL
2417}
2418
45566e9c
CH
2419int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2420 int64_t pos, int size)
178e08a5
AL
2421{
2422 BlockDriver *drv = bs->drv;
2423 if (!drv)
2424 return -ENOMEDIUM;
7cdb1f6d
MK
2425 if (drv->bdrv_load_vmstate)
2426 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2427 if (bs->file)
2428 return bdrv_load_vmstate(bs->file, buf, pos, size);
2429 return -ENOTSUP;
178e08a5
AL
2430}
2431
8b9b0cc2
KW
2432void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2433{
2434 BlockDriver *drv = bs->drv;
2435
2436 if (!drv || !drv->bdrv_debug_event) {
2437 return;
2438 }
2439
2440 return drv->bdrv_debug_event(bs, event);
2441
2442}
2443
faea38e7
FB
2444/**************************************************************/
2445/* handling of snapshots */
2446
feeee5ac
MDCF
2447int bdrv_can_snapshot(BlockDriverState *bs)
2448{
2449 BlockDriver *drv = bs->drv;
07b70bfb 2450 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2451 return 0;
2452 }
2453
2454 if (!drv->bdrv_snapshot_create) {
2455 if (bs->file != NULL) {
2456 return bdrv_can_snapshot(bs->file);
2457 }
2458 return 0;
2459 }
2460
2461 return 1;
2462}
2463
199630b6
BS
2464int bdrv_is_snapshot(BlockDriverState *bs)
2465{
2466 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2467}
2468
f9092b10
MA
2469BlockDriverState *bdrv_snapshots(void)
2470{
2471 BlockDriverState *bs;
2472
3ac906f7 2473 if (bs_snapshots) {
f9092b10 2474 return bs_snapshots;
3ac906f7 2475 }
f9092b10
MA
2476
2477 bs = NULL;
2478 while ((bs = bdrv_next(bs))) {
2479 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2480 bs_snapshots = bs;
2481 return bs;
f9092b10
MA
2482 }
2483 }
2484 return NULL;
f9092b10
MA
2485}
2486
5fafdf24 2487int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2488 QEMUSnapshotInfo *sn_info)
2489{
2490 BlockDriver *drv = bs->drv;
2491 if (!drv)
19cb3738 2492 return -ENOMEDIUM;
7cdb1f6d
MK
2493 if (drv->bdrv_snapshot_create)
2494 return drv->bdrv_snapshot_create(bs, sn_info);
2495 if (bs->file)
2496 return bdrv_snapshot_create(bs->file, sn_info);
2497 return -ENOTSUP;
faea38e7
FB
2498}
2499
5fafdf24 2500int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2501 const char *snapshot_id)
2502{
2503 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2504 int ret, open_ret;
2505
faea38e7 2506 if (!drv)
19cb3738 2507 return -ENOMEDIUM;
7cdb1f6d
MK
2508 if (drv->bdrv_snapshot_goto)
2509 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2510
2511 if (bs->file) {
2512 drv->bdrv_close(bs);
2513 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2514 open_ret = drv->bdrv_open(bs, bs->open_flags);
2515 if (open_ret < 0) {
2516 bdrv_delete(bs->file);
2517 bs->drv = NULL;
2518 return open_ret;
2519 }
2520 return ret;
2521 }
2522
2523 return -ENOTSUP;
faea38e7
FB
2524}
2525
2526int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2527{
2528 BlockDriver *drv = bs->drv;
2529 if (!drv)
19cb3738 2530 return -ENOMEDIUM;
7cdb1f6d
MK
2531 if (drv->bdrv_snapshot_delete)
2532 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2533 if (bs->file)
2534 return bdrv_snapshot_delete(bs->file, snapshot_id);
2535 return -ENOTSUP;
faea38e7
FB
2536}
2537
5fafdf24 2538int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2539 QEMUSnapshotInfo **psn_info)
2540{
2541 BlockDriver *drv = bs->drv;
2542 if (!drv)
19cb3738 2543 return -ENOMEDIUM;
7cdb1f6d
MK
2544 if (drv->bdrv_snapshot_list)
2545 return drv->bdrv_snapshot_list(bs, psn_info);
2546 if (bs->file)
2547 return bdrv_snapshot_list(bs->file, psn_info);
2548 return -ENOTSUP;
faea38e7
FB
2549}
2550
51ef6727 2551int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2552 const char *snapshot_name)
2553{
2554 BlockDriver *drv = bs->drv;
2555 if (!drv) {
2556 return -ENOMEDIUM;
2557 }
2558 if (!bs->read_only) {
2559 return -EINVAL;
2560 }
2561 if (drv->bdrv_snapshot_load_tmp) {
2562 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2563 }
2564 return -ENOTSUP;
2565}
2566
faea38e7
FB
2567#define NB_SUFFIXES 4
2568
2569char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2570{
2571 static const char suffixes[NB_SUFFIXES] = "KMGT";
2572 int64_t base;
2573 int i;
2574
2575 if (size <= 999) {
2576 snprintf(buf, buf_size, "%" PRId64, size);
2577 } else {
2578 base = 1024;
2579 for(i = 0; i < NB_SUFFIXES; i++) {
2580 if (size < (10 * base)) {
5fafdf24 2581 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
2582 (double)size / base,
2583 suffixes[i]);
2584 break;
2585 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 2586 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
2587 ((size + (base >> 1)) / base),
2588 suffixes[i]);
2589 break;
2590 }
2591 base = base * 1024;
2592 }
2593 }
2594 return buf;
2595}
2596
2597char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2598{
2599 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
2600#ifdef _WIN32
2601 struct tm *ptm;
2602#else
faea38e7 2603 struct tm tm;
3b9f94e1 2604#endif
faea38e7
FB
2605 time_t ti;
2606 int64_t secs;
2607
2608 if (!sn) {
5fafdf24
TS
2609 snprintf(buf, buf_size,
2610 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2611 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2612 } else {
2613 ti = sn->date_sec;
3b9f94e1
FB
2614#ifdef _WIN32
2615 ptm = localtime(&ti);
2616 strftime(date_buf, sizeof(date_buf),
2617 "%Y-%m-%d %H:%M:%S", ptm);
2618#else
faea38e7
FB
2619 localtime_r(&ti, &tm);
2620 strftime(date_buf, sizeof(date_buf),
2621 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 2622#endif
faea38e7
FB
2623 secs = sn->vm_clock_nsec / 1000000000;
2624 snprintf(clock_buf, sizeof(clock_buf),
2625 "%02d:%02d:%02d.%03d",
2626 (int)(secs / 3600),
2627 (int)((secs / 60) % 60),
5fafdf24 2628 (int)(secs % 60),
faea38e7
FB
2629 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2630 snprintf(buf, buf_size,
5fafdf24 2631 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2632 sn->id_str, sn->name,
2633 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2634 date_buf,
2635 clock_buf);
2636 }
2637 return buf;
2638}
2639
ea2384d3 2640/**************************************************************/
83f64091 2641/* async I/Os */
ea2384d3 2642
3b69e4b9 2643BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 2644 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 2645 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 2646{
bbf0a440
SH
2647 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2648
b2a61371 2649 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2650 cb, opaque, false);
ea2384d3
FB
2651}
2652
f141eafe
AL
2653BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2654 QEMUIOVector *qiov, int nb_sectors,
2655 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 2656{
bbf0a440
SH
2657 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2658
1a6e115b 2659 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2660 cb, opaque, true);
83f64091
FB
2661}
2662
40b4f539
KW
2663
2664typedef struct MultiwriteCB {
2665 int error;
2666 int num_requests;
2667 int num_callbacks;
2668 struct {
2669 BlockDriverCompletionFunc *cb;
2670 void *opaque;
2671 QEMUIOVector *free_qiov;
2672 void *free_buf;
2673 } callbacks[];
2674} MultiwriteCB;
2675
2676static void multiwrite_user_cb(MultiwriteCB *mcb)
2677{
2678 int i;
2679
2680 for (i = 0; i < mcb->num_callbacks; i++) {
2681 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
2682 if (mcb->callbacks[i].free_qiov) {
2683 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2684 }
7267c094 2685 g_free(mcb->callbacks[i].free_qiov);
f8a83245 2686 qemu_vfree(mcb->callbacks[i].free_buf);
40b4f539
KW
2687 }
2688}
2689
2690static void multiwrite_cb(void *opaque, int ret)
2691{
2692 MultiwriteCB *mcb = opaque;
2693
6d519a5f
SH
2694 trace_multiwrite_cb(mcb, ret);
2695
cb6d3ca0 2696 if (ret < 0 && !mcb->error) {
40b4f539 2697 mcb->error = ret;
40b4f539
KW
2698 }
2699
2700 mcb->num_requests--;
2701 if (mcb->num_requests == 0) {
de189a1b 2702 multiwrite_user_cb(mcb);
7267c094 2703 g_free(mcb);
40b4f539
KW
2704 }
2705}
2706
2707static int multiwrite_req_compare(const void *a, const void *b)
2708{
77be4366
CH
2709 const BlockRequest *req1 = a, *req2 = b;
2710
2711 /*
2712 * Note that we can't simply subtract req2->sector from req1->sector
2713 * here as that could overflow the return value.
2714 */
2715 if (req1->sector > req2->sector) {
2716 return 1;
2717 } else if (req1->sector < req2->sector) {
2718 return -1;
2719 } else {
2720 return 0;
2721 }
40b4f539
KW
2722}
2723
2724/*
2725 * Takes a bunch of requests and tries to merge them. Returns the number of
2726 * requests that remain after merging.
2727 */
2728static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2729 int num_reqs, MultiwriteCB *mcb)
2730{
2731 int i, outidx;
2732
2733 // Sort requests by start sector
2734 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2735
2736 // Check if adjacent requests touch the same clusters. If so, combine them,
2737 // filling up gaps with zero sectors.
2738 outidx = 0;
2739 for (i = 1; i < num_reqs; i++) {
2740 int merge = 0;
2741 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2742
2743 // This handles the cases that are valid for all block drivers, namely
2744 // exactly sequential writes and overlapping writes.
2745 if (reqs[i].sector <= oldreq_last) {
2746 merge = 1;
2747 }
2748
2749 // The block driver may decide that it makes sense to combine requests
2750 // even if there is a gap of some sectors between them. In this case,
2751 // the gap is filled with zeros (therefore only applicable for yet
2752 // unused space in format like qcow2).
2753 if (!merge && bs->drv->bdrv_merge_requests) {
2754 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2755 }
2756
e2a305fb
CH
2757 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2758 merge = 0;
2759 }
2760
40b4f539
KW
2761 if (merge) {
2762 size_t size;
7267c094 2763 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
2764 qemu_iovec_init(qiov,
2765 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2766
2767 // Add the first request to the merged one. If the requests are
2768 // overlapping, drop the last sectors of the first request.
2769 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2770 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2771
2772 // We might need to add some zeros between the two requests
2773 if (reqs[i].sector > oldreq_last) {
2774 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2775 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2776 memset(buf, 0, zero_bytes);
2777 qemu_iovec_add(qiov, buf, zero_bytes);
2778 mcb->callbacks[i].free_buf = buf;
2779 }
2780
2781 // Add the second request
2782 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2783
cbf1dff2 2784 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
2785 reqs[outidx].qiov = qiov;
2786
2787 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2788 } else {
2789 outidx++;
2790 reqs[outidx].sector = reqs[i].sector;
2791 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2792 reqs[outidx].qiov = reqs[i].qiov;
2793 }
2794 }
2795
2796 return outidx + 1;
2797}
2798
2799/*
2800 * Submit multiple AIO write requests at once.
2801 *
2802 * On success, the function returns 0 and all requests in the reqs array have
2803 * been submitted. In error case this function returns -1, and any of the
2804 * requests may or may not be submitted yet. In particular, this means that the
2805 * callback will be called for some of the requests, for others it won't. The
2806 * caller must check the error field of the BlockRequest to wait for the right
2807 * callbacks (if error != 0, no callback will be called).
2808 *
2809 * The implementation may modify the contents of the reqs array, e.g. to merge
2810 * requests. However, the fields opaque and error are left unmodified as they
2811 * are used to signal failure for a single request to the caller.
2812 */
2813int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2814{
40b4f539
KW
2815 MultiwriteCB *mcb;
2816 int i;
2817
301db7c2
RH
2818 /* don't submit writes if we don't have a medium */
2819 if (bs->drv == NULL) {
2820 for (i = 0; i < num_reqs; i++) {
2821 reqs[i].error = -ENOMEDIUM;
2822 }
2823 return -1;
2824 }
2825
40b4f539
KW
2826 if (num_reqs == 0) {
2827 return 0;
2828 }
2829
2830 // Create MultiwriteCB structure
7267c094 2831 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
2832 mcb->num_requests = 0;
2833 mcb->num_callbacks = num_reqs;
2834
2835 for (i = 0; i < num_reqs; i++) {
2836 mcb->callbacks[i].cb = reqs[i].cb;
2837 mcb->callbacks[i].opaque = reqs[i].opaque;
2838 }
2839
2840 // Check for mergable requests
2841 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2842
6d519a5f
SH
2843 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2844
df9309fb
PB
2845 /* Run the aio requests. */
2846 mcb->num_requests = num_reqs;
40b4f539 2847 for (i = 0; i < num_reqs; i++) {
ad54ae80 2848 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
40b4f539 2849 reqs[i].nb_sectors, multiwrite_cb, mcb);
40b4f539
KW
2850 }
2851
2852 return 0;
40b4f539
KW
2853}
2854
83f64091 2855void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 2856{
6bbff9a0 2857 acb->pool->cancel(acb);
83f64091
FB
2858}
2859
98f90dba
ZYW
2860/* block I/O throttling */
2861static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
2862 bool is_write, double elapsed_time, uint64_t *wait)
2863{
2864 uint64_t bps_limit = 0;
2865 double bytes_limit, bytes_base, bytes_res;
2866 double slice_time, wait_time;
2867
2868 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2869 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2870 } else if (bs->io_limits.bps[is_write]) {
2871 bps_limit = bs->io_limits.bps[is_write];
2872 } else {
2873 if (wait) {
2874 *wait = 0;
2875 }
2876
2877 return false;
2878 }
2879
2880 slice_time = bs->slice_end - bs->slice_start;
2881 slice_time /= (NANOSECONDS_PER_SECOND);
2882 bytes_limit = bps_limit * slice_time;
2883 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
2884 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2885 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
2886 }
2887
2888 /* bytes_base: the bytes of data which have been read/written; and
2889 * it is obtained from the history statistic info.
2890 * bytes_res: the remaining bytes of data which need to be read/written.
2891 * (bytes_base + bytes_res) / bps_limit: used to calcuate
2892 * the total time for completing reading/writting all data.
2893 */
2894 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2895
2896 if (bytes_base + bytes_res <= bytes_limit) {
2897 if (wait) {
2898 *wait = 0;
2899 }
2900
2901 return false;
2902 }
2903
2904 /* Calc approx time to dispatch */
2905 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
2906
2907 /* When the I/O rate at runtime exceeds the limits,
2908 * bs->slice_end need to be extended in order that the current statistic
2909 * info can be kept until the timer fire, so it is increased and tuned
2910 * based on the result of experiment.
2911 */
2912 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2913 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2914 if (wait) {
2915 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2916 }
2917
2918 return true;
2919}
2920
2921static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
2922 double elapsed_time, uint64_t *wait)
2923{
2924 uint64_t iops_limit = 0;
2925 double ios_limit, ios_base;
2926 double slice_time, wait_time;
2927
2928 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2929 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2930 } else if (bs->io_limits.iops[is_write]) {
2931 iops_limit = bs->io_limits.iops[is_write];
2932 } else {
2933 if (wait) {
2934 *wait = 0;
2935 }
2936
2937 return false;
2938 }
2939
2940 slice_time = bs->slice_end - bs->slice_start;
2941 slice_time /= (NANOSECONDS_PER_SECOND);
2942 ios_limit = iops_limit * slice_time;
2943 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
2944 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2945 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
2946 }
2947
2948 if (ios_base + 1 <= ios_limit) {
2949 if (wait) {
2950 *wait = 0;
2951 }
2952
2953 return false;
2954 }
2955
2956 /* Calc approx time to dispatch */
2957 wait_time = (ios_base + 1) / iops_limit;
2958 if (wait_time > elapsed_time) {
2959 wait_time = wait_time - elapsed_time;
2960 } else {
2961 wait_time = 0;
2962 }
2963
2964 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2965 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2966 if (wait) {
2967 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2968 }
2969
2970 return true;
2971}
2972
2973static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
2974 bool is_write, int64_t *wait)
2975{
2976 int64_t now, max_wait;
2977 uint64_t bps_wait = 0, iops_wait = 0;
2978 double elapsed_time;
2979 int bps_ret, iops_ret;
2980
2981 now = qemu_get_clock_ns(vm_clock);
2982 if ((bs->slice_start < now)
2983 && (bs->slice_end > now)) {
2984 bs->slice_end = now + bs->slice_time;
2985 } else {
2986 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
2987 bs->slice_start = now;
2988 bs->slice_end = now + bs->slice_time;
2989
2990 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
2991 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
2992
2993 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
2994 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
2995 }
2996
2997 elapsed_time = now - bs->slice_start;
2998 elapsed_time /= (NANOSECONDS_PER_SECOND);
2999
3000 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3001 is_write, elapsed_time, &bps_wait);
3002 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3003 elapsed_time, &iops_wait);
3004 if (bps_ret || iops_ret) {
3005 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3006 if (wait) {
3007 *wait = max_wait;
3008 }
3009
3010 now = qemu_get_clock_ns(vm_clock);
3011 if (bs->slice_end < now + max_wait) {
3012 bs->slice_end = now + max_wait;
3013 }
3014
3015 return true;
3016 }
3017
3018 if (wait) {
3019 *wait = 0;
3020 }
3021
3022 return false;
3023}
ce1a14dc 3024
83f64091
FB
3025/**************************************************************/
3026/* async block device emulation */
3027
c16b5a2c
CH
3028typedef struct BlockDriverAIOCBSync {
3029 BlockDriverAIOCB common;
3030 QEMUBH *bh;
3031 int ret;
3032 /* vector translation state */
3033 QEMUIOVector *qiov;
3034 uint8_t *bounce;
3035 int is_write;
3036} BlockDriverAIOCBSync;
3037
3038static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3039{
b666d239
KW
3040 BlockDriverAIOCBSync *acb =
3041 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 3042 qemu_bh_delete(acb->bh);
36afc451 3043 acb->bh = NULL;
c16b5a2c
CH
3044 qemu_aio_release(acb);
3045}
3046
3047static AIOPool bdrv_em_aio_pool = {
3048 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3049 .cancel = bdrv_aio_cancel_em,
3050};
3051
ce1a14dc 3052static void bdrv_aio_bh_cb(void *opaque)
83f64091 3053{
ce1a14dc 3054 BlockDriverAIOCBSync *acb = opaque;
f141eafe 3055
f141eafe
AL
3056 if (!acb->is_write)
3057 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 3058 qemu_vfree(acb->bounce);
ce1a14dc 3059 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 3060 qemu_bh_delete(acb->bh);
36afc451 3061 acb->bh = NULL;
ce1a14dc 3062 qemu_aio_release(acb);
83f64091 3063}
beac80cd 3064
f141eafe
AL
3065static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3066 int64_t sector_num,
3067 QEMUIOVector *qiov,
3068 int nb_sectors,
3069 BlockDriverCompletionFunc *cb,
3070 void *opaque,
3071 int is_write)
3072
83f64091 3073{
ce1a14dc 3074 BlockDriverAIOCBSync *acb;
ce1a14dc 3075
c16b5a2c 3076 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
3077 acb->is_write = is_write;
3078 acb->qiov = qiov;
e268ca52 3079 acb->bounce = qemu_blockalign(bs, qiov->size);
3f3aace8 3080 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
3081
3082 if (is_write) {
3083 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1ed20acf 3084 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 3085 } else {
1ed20acf 3086 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
3087 }
3088
ce1a14dc 3089 qemu_bh_schedule(acb->bh);
f141eafe 3090
ce1a14dc 3091 return &acb->common;
beac80cd
FB
3092}
3093
f141eafe
AL
3094static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3095 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 3096 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 3097{
f141eafe
AL
3098 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
3099}
83f64091 3100
f141eafe
AL
3101static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3102 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3103 BlockDriverCompletionFunc *cb, void *opaque)
3104{
3105 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 3106}
beac80cd 3107
68485420
KW
3108
3109typedef struct BlockDriverAIOCBCoroutine {
3110 BlockDriverAIOCB common;
3111 BlockRequest req;
3112 bool is_write;
3113 QEMUBH* bh;
3114} BlockDriverAIOCBCoroutine;
3115
3116static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3117{
3118 qemu_aio_flush();
3119}
3120
3121static AIOPool bdrv_em_co_aio_pool = {
3122 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3123 .cancel = bdrv_aio_co_cancel_em,
3124};
3125
35246a68 3126static void bdrv_co_em_bh(void *opaque)
68485420
KW
3127{
3128 BlockDriverAIOCBCoroutine *acb = opaque;
3129
3130 acb->common.cb(acb->common.opaque, acb->req.error);
3131 qemu_bh_delete(acb->bh);
3132 qemu_aio_release(acb);
3133}
3134
b2a61371
SH
3135/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3136static void coroutine_fn bdrv_co_do_rw(void *opaque)
3137{
3138 BlockDriverAIOCBCoroutine *acb = opaque;
3139 BlockDriverState *bs = acb->common.bs;
3140
3141 if (!acb->is_write) {
3142 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
3143 acb->req.nb_sectors, acb->req.qiov);
3144 } else {
3145 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
3146 acb->req.nb_sectors, acb->req.qiov);
3147 }
3148
35246a68 3149 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2a61371
SH
3150 qemu_bh_schedule(acb->bh);
3151}
3152
68485420
KW
3153static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3154 int64_t sector_num,
3155 QEMUIOVector *qiov,
3156 int nb_sectors,
3157 BlockDriverCompletionFunc *cb,
3158 void *opaque,
8c5873d6 3159 bool is_write)
68485420
KW
3160{
3161 Coroutine *co;
3162 BlockDriverAIOCBCoroutine *acb;
3163
3164 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3165 acb->req.sector = sector_num;
3166 acb->req.nb_sectors = nb_sectors;
3167 acb->req.qiov = qiov;
3168 acb->is_write = is_write;
3169
8c5873d6 3170 co = qemu_coroutine_create(bdrv_co_do_rw);
68485420
KW
3171 qemu_coroutine_enter(co, acb);
3172
3173 return &acb->common;
3174}
3175
07f07615 3176static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
b2e12bc6 3177{
07f07615
PB
3178 BlockDriverAIOCBCoroutine *acb = opaque;
3179 BlockDriverState *bs = acb->common.bs;
b2e12bc6 3180
07f07615
PB
3181 acb->req.error = bdrv_co_flush(bs);
3182 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2e12bc6 3183 qemu_bh_schedule(acb->bh);
b2e12bc6
CH
3184}
3185
07f07615 3186BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
016f5cf6
AG
3187 BlockDriverCompletionFunc *cb, void *opaque)
3188{
07f07615 3189 trace_bdrv_aio_flush(bs, opaque);
016f5cf6 3190
07f07615
PB
3191 Coroutine *co;
3192 BlockDriverAIOCBCoroutine *acb;
016f5cf6 3193
07f07615
PB
3194 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3195 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3196 qemu_coroutine_enter(co, acb);
016f5cf6 3197
016f5cf6
AG
3198 return &acb->common;
3199}
3200
4265d620
PB
3201static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3202{
3203 BlockDriverAIOCBCoroutine *acb = opaque;
3204 BlockDriverState *bs = acb->common.bs;
3205
3206 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3207 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3208 qemu_bh_schedule(acb->bh);
3209}
3210
3211BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3212 int64_t sector_num, int nb_sectors,
3213 BlockDriverCompletionFunc *cb, void *opaque)
3214{
3215 Coroutine *co;
3216 BlockDriverAIOCBCoroutine *acb;
3217
3218 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3219
3220 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3221 acb->req.sector = sector_num;
3222 acb->req.nb_sectors = nb_sectors;
3223 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3224 qemu_coroutine_enter(co, acb);
3225
3226 return &acb->common;
3227}
3228
ea2384d3
FB
3229void bdrv_init(void)
3230{
5efa9d5a 3231 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 3232}
ce1a14dc 3233
eb852011
MA
3234void bdrv_init_with_whitelist(void)
3235{
3236 use_bdrv_whitelist = 1;
3237 bdrv_init();
3238}
3239
c16b5a2c
CH
3240void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3241 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 3242{
ce1a14dc
PB
3243 BlockDriverAIOCB *acb;
3244
6bbff9a0
AL
3245 if (pool->free_aiocb) {
3246 acb = pool->free_aiocb;
3247 pool->free_aiocb = acb->next;
ce1a14dc 3248 } else {
7267c094 3249 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 3250 acb->pool = pool;
ce1a14dc
PB
3251 }
3252 acb->bs = bs;
3253 acb->cb = cb;
3254 acb->opaque = opaque;
3255 return acb;
3256}
3257
3258void qemu_aio_release(void *p)
3259{
6bbff9a0
AL
3260 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3261 AIOPool *pool = acb->pool;
3262 acb->next = pool->free_aiocb;
3263 pool->free_aiocb = acb;
ce1a14dc 3264}
19cb3738 3265
f9f05dc5
KW
3266/**************************************************************/
3267/* Coroutine block device emulation */
3268
3269typedef struct CoroutineIOCompletion {
3270 Coroutine *coroutine;
3271 int ret;
3272} CoroutineIOCompletion;
3273
3274static void bdrv_co_io_em_complete(void *opaque, int ret)
3275{
3276 CoroutineIOCompletion *co = opaque;
3277
3278 co->ret = ret;
3279 qemu_coroutine_enter(co->coroutine, NULL);
3280}
3281
3282static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3283 int nb_sectors, QEMUIOVector *iov,
3284 bool is_write)
3285{
3286 CoroutineIOCompletion co = {
3287 .coroutine = qemu_coroutine_self(),
3288 };
3289 BlockDriverAIOCB *acb;
3290
3291 if (is_write) {
a652d160
SH
3292 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3293 bdrv_co_io_em_complete, &co);
f9f05dc5 3294 } else {
a652d160
SH
3295 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3296 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
3297 }
3298
59370aaa 3299 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
3300 if (!acb) {
3301 return -EIO;
3302 }
3303 qemu_coroutine_yield();
3304
3305 return co.ret;
3306}
3307
3308static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3309 int64_t sector_num, int nb_sectors,
3310 QEMUIOVector *iov)
3311{
3312 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3313}
3314
3315static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3316 int64_t sector_num, int nb_sectors,
3317 QEMUIOVector *iov)
3318{
3319 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3320}
3321
07f07615 3322static void coroutine_fn bdrv_flush_co_entry(void *opaque)
e7a8a783 3323{
07f07615
PB
3324 RwCo *rwco = opaque;
3325
3326 rwco->ret = bdrv_co_flush(rwco->bs);
3327}
3328
3329int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3330{
eb489bb1
KW
3331 int ret;
3332
ca716364 3333 if (!bs->drv) {
07f07615 3334 return 0;
eb489bb1
KW
3335 }
3336
ca716364 3337 /* Write back cached data to the OS even with cache=unsafe */
eb489bb1
KW
3338 if (bs->drv->bdrv_co_flush_to_os) {
3339 ret = bs->drv->bdrv_co_flush_to_os(bs);
3340 if (ret < 0) {
3341 return ret;
3342 }
3343 }
3344
ca716364
KW
3345 /* But don't actually force it to the disk with cache=unsafe */
3346 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3347 return 0;
3348 }
3349
eb489bb1 3350 if (bs->drv->bdrv_co_flush_to_disk) {
c68b89ac 3351 return bs->drv->bdrv_co_flush_to_disk(bs);
07f07615
PB
3352 } else if (bs->drv->bdrv_aio_flush) {
3353 BlockDriverAIOCB *acb;
3354 CoroutineIOCompletion co = {
3355 .coroutine = qemu_coroutine_self(),
3356 };
3357
3358 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3359 if (acb == NULL) {
3360 return -EIO;
3361 } else {
3362 qemu_coroutine_yield();
3363 return co.ret;
3364 }
07f07615
PB
3365 } else {
3366 /*
3367 * Some block drivers always operate in either writethrough or unsafe
3368 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3369 * know how the server works (because the behaviour is hardcoded or
3370 * depends on server-side configuration), so we can't ensure that
3371 * everything is safe on disk. Returning an error doesn't work because
3372 * that would break guests even if the server operates in writethrough
3373 * mode.
3374 *
3375 * Let's hope the user knows what he's doing.
3376 */
3377 return 0;
3378 }
3379}
3380
0f15423c
AL
3381void bdrv_invalidate_cache(BlockDriverState *bs)
3382{
3383 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3384 bs->drv->bdrv_invalidate_cache(bs);
3385 }
3386}
3387
3388void bdrv_invalidate_cache_all(void)
3389{
3390 BlockDriverState *bs;
3391
3392 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3393 bdrv_invalidate_cache(bs);
3394 }
3395}
3396
07f07615
PB
3397int bdrv_flush(BlockDriverState *bs)
3398{
3399 Coroutine *co;
3400 RwCo rwco = {
3401 .bs = bs,
3402 .ret = NOT_DONE,
e7a8a783 3403 };
e7a8a783 3404
07f07615
PB
3405 if (qemu_in_coroutine()) {
3406 /* Fast-path if already in coroutine context */
3407 bdrv_flush_co_entry(&rwco);
3408 } else {
3409 co = qemu_coroutine_create(bdrv_flush_co_entry);
3410 qemu_coroutine_enter(co, &rwco);
3411 while (rwco.ret == NOT_DONE) {
3412 qemu_aio_wait();
3413 }
e7a8a783 3414 }
07f07615
PB
3415
3416 return rwco.ret;
e7a8a783
KW
3417}
3418
4265d620
PB
3419static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3420{
3421 RwCo *rwco = opaque;
3422
3423 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3424}
3425
3426int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3427 int nb_sectors)
3428{
3429 if (!bs->drv) {
3430 return -ENOMEDIUM;
3431 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3432 return -EIO;
3433 } else if (bs->read_only) {
3434 return -EROFS;
3435 } else if (bs->drv->bdrv_co_discard) {
3436 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3437 } else if (bs->drv->bdrv_aio_discard) {
3438 BlockDriverAIOCB *acb;
3439 CoroutineIOCompletion co = {
3440 .coroutine = qemu_coroutine_self(),
3441 };
3442
3443 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3444 bdrv_co_io_em_complete, &co);
3445 if (acb == NULL) {
3446 return -EIO;
3447 } else {
3448 qemu_coroutine_yield();
3449 return co.ret;
3450 }
4265d620
PB
3451 } else {
3452 return 0;
3453 }
3454}
3455
3456int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3457{
3458 Coroutine *co;
3459 RwCo rwco = {
3460 .bs = bs,
3461 .sector_num = sector_num,
3462 .nb_sectors = nb_sectors,
3463 .ret = NOT_DONE,
3464 };
3465
3466 if (qemu_in_coroutine()) {
3467 /* Fast-path if already in coroutine context */
3468 bdrv_discard_co_entry(&rwco);
3469 } else {
3470 co = qemu_coroutine_create(bdrv_discard_co_entry);
3471 qemu_coroutine_enter(co, &rwco);
3472 while (rwco.ret == NOT_DONE) {
3473 qemu_aio_wait();
3474 }
3475 }
3476
3477 return rwco.ret;
3478}
3479
19cb3738
FB
3480/**************************************************************/
3481/* removable device support */
3482
3483/**
3484 * Return TRUE if the media is present
3485 */
3486int bdrv_is_inserted(BlockDriverState *bs)
3487{
3488 BlockDriver *drv = bs->drv;
a1aff5bf 3489
19cb3738
FB
3490 if (!drv)
3491 return 0;
3492 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
3493 return 1;
3494 return drv->bdrv_is_inserted(bs);
19cb3738
FB
3495}
3496
3497/**
8e49ca46
MA
3498 * Return whether the media changed since the last call to this
3499 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3500 */
3501int bdrv_media_changed(BlockDriverState *bs)
3502{
3503 BlockDriver *drv = bs->drv;
19cb3738 3504
8e49ca46
MA
3505 if (drv && drv->bdrv_media_changed) {
3506 return drv->bdrv_media_changed(bs);
3507 }
3508 return -ENOTSUP;
19cb3738
FB
3509}
3510
3511/**
3512 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3513 */
fdec4404 3514void bdrv_eject(BlockDriverState *bs, int eject_flag)
19cb3738
FB
3515{
3516 BlockDriver *drv = bs->drv;
19cb3738 3517
822e1cd1
MA
3518 if (drv && drv->bdrv_eject) {
3519 drv->bdrv_eject(bs, eject_flag);
19cb3738
FB
3520 }
3521}
3522
19cb3738
FB
3523/**
3524 * Lock or unlock the media (if it is locked, the user won't be able
3525 * to eject it manually).
3526 */
025e849a 3527void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3528{
3529 BlockDriver *drv = bs->drv;
3530
025e849a 3531 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3532
025e849a
MA
3533 if (drv && drv->bdrv_lock_medium) {
3534 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3535 }
3536}
985a03b0
TS
3537
3538/* needed for generic scsi interface */
3539
3540int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3541{
3542 BlockDriver *drv = bs->drv;
3543
3544 if (drv && drv->bdrv_ioctl)
3545 return drv->bdrv_ioctl(bs, req, buf);
3546 return -ENOTSUP;
3547}
7d780669 3548
221f715d
AL
3549BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3550 unsigned long int req, void *buf,
3551 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3552{
221f715d 3553 BlockDriver *drv = bs->drv;
7d780669 3554
221f715d
AL
3555 if (drv && drv->bdrv_aio_ioctl)
3556 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3557 return NULL;
7d780669 3558}
e268ca52 3559
7b6f9300
MA
3560void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3561{
3562 bs->buffer_alignment = align;
3563}
7cd1e32a 3564
e268ca52
AL
3565void *qemu_blockalign(BlockDriverState *bs, size_t size)
3566{
3567 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3568}
7cd1e32a 3569
3570void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3571{
3572 int64_t bitmap_size;
a55eb92c 3573
aaa0eb75 3574 bs->dirty_count = 0;
a55eb92c 3575 if (enable) {
c6d22830
JK
3576 if (!bs->dirty_bitmap) {
3577 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3578 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3579 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
a55eb92c 3580
7267c094 3581 bs->dirty_bitmap = g_malloc0(bitmap_size);
a55eb92c 3582 }
7cd1e32a 3583 } else {
c6d22830 3584 if (bs->dirty_bitmap) {
7267c094 3585 g_free(bs->dirty_bitmap);
c6d22830 3586 bs->dirty_bitmap = NULL;
a55eb92c 3587 }
7cd1e32a 3588 }
3589}
3590
3591int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3592{
6ea44308 3593 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 3594
c6d22830
JK
3595 if (bs->dirty_bitmap &&
3596 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
3597 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3598 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a 3599 } else {
3600 return 0;
3601 }
3602}
3603
a55eb92c
JK
3604void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3605 int nr_sectors)
7cd1e32a 3606{
3607 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3608}
aaa0eb75
LS
3609
3610int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3611{
3612 return bs->dirty_count;
3613}
f88e1a42 3614
db593f25
MT
3615void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3616{
3617 assert(bs->in_use != in_use);
3618 bs->in_use = in_use;
3619}
3620
3621int bdrv_in_use(BlockDriverState *bs)
3622{
3623 return bs->in_use;
3624}
3625
28a7282a
LC
3626void bdrv_iostatus_enable(BlockDriverState *bs)
3627{
d6bf279e 3628 bs->iostatus_enabled = true;
58e21ef5 3629 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3630}
3631
3632/* The I/O status is only enabled if the drive explicitly
3633 * enables it _and_ the VM is configured to stop on errors */
3634bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3635{
d6bf279e 3636 return (bs->iostatus_enabled &&
28a7282a
LC
3637 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3638 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3639 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3640}
3641
3642void bdrv_iostatus_disable(BlockDriverState *bs)
3643{
d6bf279e 3644 bs->iostatus_enabled = false;
28a7282a
LC
3645}
3646
3647void bdrv_iostatus_reset(BlockDriverState *bs)
3648{
3649 if (bdrv_iostatus_is_enabled(bs)) {
58e21ef5 3650 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3651 }
3652}
3653
3654/* XXX: Today this is set by device models because it makes the implementation
3655 quite simple. However, the block layer knows about the error, so it's
3656 possible to implement this without device models being involved */
3657void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3658{
58e21ef5
LC
3659 if (bdrv_iostatus_is_enabled(bs) &&
3660 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
28a7282a 3661 assert(error >= 0);
58e21ef5
LC
3662 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3663 BLOCK_DEVICE_IO_STATUS_FAILED;
28a7282a
LC
3664 }
3665}
3666
a597e79c
CH
3667void
3668bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3669 enum BlockAcctType type)
3670{
3671 assert(type < BDRV_MAX_IOTYPE);
3672
3673 cookie->bytes = bytes;
c488c7f6 3674 cookie->start_time_ns = get_clock();
a597e79c
CH
3675 cookie->type = type;
3676}
3677
3678void
3679bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3680{
3681 assert(cookie->type < BDRV_MAX_IOTYPE);
3682
3683 bs->nr_bytes[cookie->type] += cookie->bytes;
3684 bs->nr_ops[cookie->type]++;
c488c7f6 3685 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
3686}
3687
f88e1a42
JS
3688int bdrv_img_create(const char *filename, const char *fmt,
3689 const char *base_filename, const char *base_fmt,
3690 char *options, uint64_t img_size, int flags)
3691{
3692 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 3693 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
3694 BlockDriverState *bs = NULL;
3695 BlockDriver *drv, *proto_drv;
96df67d1 3696 BlockDriver *backing_drv = NULL;
f88e1a42
JS
3697 int ret = 0;
3698
3699 /* Find driver and parse its options */
3700 drv = bdrv_find_format(fmt);
3701 if (!drv) {
3702 error_report("Unknown file format '%s'", fmt);
4f70f249 3703 ret = -EINVAL;
f88e1a42
JS
3704 goto out;
3705 }
3706
3707 proto_drv = bdrv_find_protocol(filename);
3708 if (!proto_drv) {
3709 error_report("Unknown protocol '%s'", filename);
4f70f249 3710 ret = -EINVAL;
f88e1a42
JS
3711 goto out;
3712 }
3713
3714 create_options = append_option_parameters(create_options,
3715 drv->create_options);
3716 create_options = append_option_parameters(create_options,
3717 proto_drv->create_options);
3718
3719 /* Create parameter list with default values */
3720 param = parse_option_parameters("", create_options, param);
3721
3722 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3723
3724 /* Parse -o options */
3725 if (options) {
3726 param = parse_option_parameters(options, create_options, param);
3727 if (param == NULL) {
3728 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 3729 ret = -EINVAL;
f88e1a42
JS
3730 goto out;
3731 }
3732 }
3733
3734 if (base_filename) {
3735 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3736 base_filename)) {
3737 error_report("Backing file not supported for file format '%s'",
3738 fmt);
4f70f249 3739 ret = -EINVAL;
f88e1a42
JS
3740 goto out;
3741 }
3742 }
3743
3744 if (base_fmt) {
3745 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3746 error_report("Backing file format not supported for file "
3747 "format '%s'", fmt);
4f70f249 3748 ret = -EINVAL;
f88e1a42
JS
3749 goto out;
3750 }
3751 }
3752
792da93a
JS
3753 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3754 if (backing_file && backing_file->value.s) {
3755 if (!strcmp(filename, backing_file->value.s)) {
3756 error_report("Error: Trying to create an image with the "
3757 "same filename as the backing file");
4f70f249 3758 ret = -EINVAL;
792da93a
JS
3759 goto out;
3760 }
3761 }
3762
f88e1a42
JS
3763 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3764 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
3765 backing_drv = bdrv_find_format(backing_fmt->value.s);
3766 if (!backing_drv) {
f88e1a42
JS
3767 error_report("Unknown backing file format '%s'",
3768 backing_fmt->value.s);
4f70f249 3769 ret = -EINVAL;
f88e1a42
JS
3770 goto out;
3771 }
3772 }
3773
3774 // The size for the image must always be specified, with one exception:
3775 // If we are using a backing file, we can obtain the size from there
d220894e
KW
3776 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3777 if (size && size->value.n == -1) {
f88e1a42
JS
3778 if (backing_file && backing_file->value.s) {
3779 uint64_t size;
f88e1a42
JS
3780 char buf[32];
3781
f88e1a42
JS
3782 bs = bdrv_new("");
3783
96df67d1 3784 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
f88e1a42 3785 if (ret < 0) {
96df67d1 3786 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
3787 goto out;
3788 }
3789 bdrv_get_geometry(bs, &size);
3790 size *= 512;
3791
3792 snprintf(buf, sizeof(buf), "%" PRId64, size);
3793 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3794 } else {
3795 error_report("Image creation needs a size parameter");
4f70f249 3796 ret = -EINVAL;
f88e1a42
JS
3797 goto out;
3798 }
3799 }
3800
3801 printf("Formatting '%s', fmt=%s ", filename, fmt);
3802 print_option_parameters(param);
3803 puts("");
3804
3805 ret = bdrv_create(drv, filename, param);
3806
3807 if (ret < 0) {
3808 if (ret == -ENOTSUP) {
3809 error_report("Formatting or formatting option not supported for "
3810 "file format '%s'", fmt);
3811 } else if (ret == -EFBIG) {
3812 error_report("The image size is too large for file format '%s'",
3813 fmt);
3814 } else {
3815 error_report("%s: error while creating %s: %s", filename, fmt,
3816 strerror(-ret));
3817 }
3818 }
3819
3820out:
3821 free_option_parameters(create_options);
3822 free_option_parameters(param);
3823
3824 if (bs) {
3825 bdrv_delete(bs);
3826 }
4f70f249
JS
3827
3828 return ret;
f88e1a42 3829}