2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "config-host.h"
25 #include "qemu-common.h"
28 #include "block_int.h"
31 #include "qemu-coroutine.h"
32 #include "qmp-commands.h"
33 #include "qemu-timer.h"
36 #include <sys/types.h>
38 #include <sys/ioctl.h>
39 #include <sys/queue.h>
49 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
51 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
);
52 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
53 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
54 BlockDriverCompletionFunc
*cb
, void *opaque
);
55 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
56 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
57 BlockDriverCompletionFunc
*cb
, void *opaque
);
58 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
59 int64_t sector_num
, int nb_sectors
,
61 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
62 int64_t sector_num
, int nb_sectors
,
64 static int coroutine_fn
bdrv_co_do_readv(BlockDriverState
*bs
,
65 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
);
66 static int coroutine_fn
bdrv_co_do_writev(BlockDriverState
*bs
,
67 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
);
68 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
72 BlockDriverCompletionFunc
*cb
,
75 static void coroutine_fn
bdrv_co_do_rw(void *opaque
);
77 static bool bdrv_exceed_bps_limits(BlockDriverState
*bs
, int nb_sectors
,
78 bool is_write
, double elapsed_time
, uint64_t *wait
);
79 static bool bdrv_exceed_iops_limits(BlockDriverState
*bs
, bool is_write
,
80 double elapsed_time
, uint64_t *wait
);
81 static bool bdrv_exceed_io_limits(BlockDriverState
*bs
, int nb_sectors
,
82 bool is_write
, int64_t *wait
);
84 static QTAILQ_HEAD(, BlockDriverState
) bdrv_states
=
85 QTAILQ_HEAD_INITIALIZER(bdrv_states
);
87 static QLIST_HEAD(, BlockDriver
) bdrv_drivers
=
88 QLIST_HEAD_INITIALIZER(bdrv_drivers
);
90 /* The device to use for VM snapshots */
91 static BlockDriverState
*bs_snapshots
;
93 /* If non-zero, use only whitelisted block drivers */
94 static int use_bdrv_whitelist
;
97 static int is_windows_drive_prefix(const char *filename
)
99 return (((filename
[0] >= 'a' && filename
[0] <= 'z') ||
100 (filename
[0] >= 'A' && filename
[0] <= 'Z')) &&
104 int is_windows_drive(const char *filename
)
106 if (is_windows_drive_prefix(filename
) &&
109 if (strstart(filename
, "\\\\.\\", NULL
) ||
110 strstart(filename
, "//./", NULL
))
116 /* throttling disk I/O limits */
117 void bdrv_io_limits_disable(BlockDriverState
*bs
)
119 bs
->io_limits_enabled
= false;
121 while (qemu_co_queue_next(&bs
->throttled_reqs
));
123 if (bs
->block_timer
) {
124 qemu_del_timer(bs
->block_timer
);
125 qemu_free_timer(bs
->block_timer
);
126 bs
->block_timer
= NULL
;
132 memset(&bs
->io_base
, 0, sizeof(bs
->io_base
));
135 static void bdrv_block_timer(void *opaque
)
137 BlockDriverState
*bs
= opaque
;
139 qemu_co_queue_next(&bs
->throttled_reqs
);
142 void bdrv_io_limits_enable(BlockDriverState
*bs
)
144 qemu_co_queue_init(&bs
->throttled_reqs
);
145 bs
->block_timer
= qemu_new_timer_ns(vm_clock
, bdrv_block_timer
, bs
);
146 bs
->slice_time
= 5 * BLOCK_IO_SLICE_TIME
;
147 bs
->slice_start
= qemu_get_clock_ns(vm_clock
);
148 bs
->slice_end
= bs
->slice_start
+ bs
->slice_time
;
149 memset(&bs
->io_base
, 0, sizeof(bs
->io_base
));
150 bs
->io_limits_enabled
= true;
153 bool bdrv_io_limits_enabled(BlockDriverState
*bs
)
155 BlockIOLimit
*io_limits
= &bs
->io_limits
;
156 return io_limits
->bps
[BLOCK_IO_LIMIT_READ
]
157 || io_limits
->bps
[BLOCK_IO_LIMIT_WRITE
]
158 || io_limits
->bps
[BLOCK_IO_LIMIT_TOTAL
]
159 || io_limits
->iops
[BLOCK_IO_LIMIT_READ
]
160 || io_limits
->iops
[BLOCK_IO_LIMIT_WRITE
]
161 || io_limits
->iops
[BLOCK_IO_LIMIT_TOTAL
];
164 static void bdrv_io_limits_intercept(BlockDriverState
*bs
,
165 bool is_write
, int nb_sectors
)
167 int64_t wait_time
= -1;
169 if (!qemu_co_queue_empty(&bs
->throttled_reqs
)) {
170 qemu_co_queue_wait(&bs
->throttled_reqs
);
173 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
174 * throttled requests will not be dequeued until the current request is
175 * allowed to be serviced. So if the current request still exceeds the
176 * limits, it will be inserted to the head. All requests followed it will
177 * be still in throttled_reqs queue.
180 while (bdrv_exceed_io_limits(bs
, nb_sectors
, is_write
, &wait_time
)) {
181 qemu_mod_timer(bs
->block_timer
,
182 wait_time
+ qemu_get_clock_ns(vm_clock
));
183 qemu_co_queue_wait_insert_head(&bs
->throttled_reqs
);
186 qemu_co_queue_next(&bs
->throttled_reqs
);
189 /* check if the path starts with "<protocol>:" */
190 static int path_has_protocol(const char *path
)
193 if (is_windows_drive(path
) ||
194 is_windows_drive_prefix(path
)) {
199 return strchr(path
, ':') != NULL
;
202 int path_is_absolute(const char *path
)
206 /* specific case for names like: "\\.\d:" */
207 if (*path
== '/' || *path
== '\\')
210 p
= strchr(path
, ':');
216 return (*p
== '/' || *p
== '\\');
222 /* if filename is absolute, just copy it to dest. Otherwise, build a
223 path to it by considering it is relative to base_path. URL are
225 void path_combine(char *dest
, int dest_size
,
226 const char *base_path
,
227 const char *filename
)
234 if (path_is_absolute(filename
)) {
235 pstrcpy(dest
, dest_size
, filename
);
237 p
= strchr(base_path
, ':');
242 p1
= strrchr(base_path
, '/');
246 p2
= strrchr(base_path
, '\\');
258 if (len
> dest_size
- 1)
260 memcpy(dest
, base_path
, len
);
262 pstrcat(dest
, dest_size
, filename
);
266 void bdrv_register(BlockDriver
*bdrv
)
268 /* Block drivers without coroutine functions need emulation */
269 if (!bdrv
->bdrv_co_readv
) {
270 bdrv
->bdrv_co_readv
= bdrv_co_readv_em
;
271 bdrv
->bdrv_co_writev
= bdrv_co_writev_em
;
273 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
274 * the block driver lacks aio we need to emulate that too.
276 if (!bdrv
->bdrv_aio_readv
) {
277 /* add AIO emulation layer */
278 bdrv
->bdrv_aio_readv
= bdrv_aio_readv_em
;
279 bdrv
->bdrv_aio_writev
= bdrv_aio_writev_em
;
283 QLIST_INSERT_HEAD(&bdrv_drivers
, bdrv
, list
);
286 /* create a new block device (by default it is empty) */
287 BlockDriverState
*bdrv_new(const char *device_name
)
289 BlockDriverState
*bs
;
291 bs
= g_malloc0(sizeof(BlockDriverState
));
292 pstrcpy(bs
->device_name
, sizeof(bs
->device_name
), device_name
);
293 if (device_name
[0] != '\0') {
294 QTAILQ_INSERT_TAIL(&bdrv_states
, bs
, list
);
296 bdrv_iostatus_disable(bs
);
300 BlockDriver
*bdrv_find_format(const char *format_name
)
303 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
304 if (!strcmp(drv1
->format_name
, format_name
)) {
311 static int bdrv_is_whitelisted(BlockDriver
*drv
)
313 static const char *whitelist
[] = {
314 CONFIG_BDRV_WHITELIST
319 return 1; /* no whitelist, anything goes */
321 for (p
= whitelist
; *p
; p
++) {
322 if (!strcmp(drv
->format_name
, *p
)) {
329 BlockDriver
*bdrv_find_whitelisted_format(const char *format_name
)
331 BlockDriver
*drv
= bdrv_find_format(format_name
);
332 return drv
&& bdrv_is_whitelisted(drv
) ? drv
: NULL
;
335 int bdrv_create(BlockDriver
*drv
, const char* filename
,
336 QEMUOptionParameter
*options
)
338 if (!drv
->bdrv_create
)
341 return drv
->bdrv_create(filename
, options
);
344 int bdrv_create_file(const char* filename
, QEMUOptionParameter
*options
)
348 drv
= bdrv_find_protocol(filename
);
353 return bdrv_create(drv
, filename
, options
);
357 void get_tmp_filename(char *filename
, int size
)
359 char temp_dir
[MAX_PATH
];
361 GetTempPath(MAX_PATH
, temp_dir
);
362 GetTempFileName(temp_dir
, "qem", 0, filename
);
365 void get_tmp_filename(char *filename
, int size
)
369 /* XXX: race condition possible */
370 tmpdir
= getenv("TMPDIR");
373 snprintf(filename
, size
, "%s/vl.XXXXXX", tmpdir
);
374 fd
= mkstemp(filename
);
380 * Detect host devices. By convention, /dev/cdrom[N] is always
381 * recognized as a host CDROM.
383 static BlockDriver
*find_hdev_driver(const char *filename
)
385 int score_max
= 0, score
;
386 BlockDriver
*drv
= NULL
, *d
;
388 QLIST_FOREACH(d
, &bdrv_drivers
, list
) {
389 if (d
->bdrv_probe_device
) {
390 score
= d
->bdrv_probe_device(filename
);
391 if (score
> score_max
) {
401 BlockDriver
*bdrv_find_protocol(const char *filename
)
408 /* TODO Drivers without bdrv_file_open must be specified explicitly */
411 * XXX(hch): we really should not let host device detection
412 * override an explicit protocol specification, but moving this
413 * later breaks access to device names with colons in them.
414 * Thanks to the brain-dead persistent naming schemes on udev-
415 * based Linux systems those actually are quite common.
417 drv1
= find_hdev_driver(filename
);
422 if (!path_has_protocol(filename
)) {
423 return bdrv_find_format("file");
425 p
= strchr(filename
, ':');
428 if (len
> sizeof(protocol
) - 1)
429 len
= sizeof(protocol
) - 1;
430 memcpy(protocol
, filename
, len
);
431 protocol
[len
] = '\0';
432 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
433 if (drv1
->protocol_name
&&
434 !strcmp(drv1
->protocol_name
, protocol
)) {
441 static int find_image_format(const char *filename
, BlockDriver
**pdrv
)
443 int ret
, score
, score_max
;
444 BlockDriver
*drv1
, *drv
;
446 BlockDriverState
*bs
;
448 ret
= bdrv_file_open(&bs
, filename
, 0);
454 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
455 if (bs
->sg
|| !bdrv_is_inserted(bs
)) {
457 drv
= bdrv_find_format("raw");
465 ret
= bdrv_pread(bs
, 0, buf
, sizeof(buf
));
474 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
475 if (drv1
->bdrv_probe
) {
476 score
= drv1
->bdrv_probe(buf
, ret
, filename
);
477 if (score
> score_max
) {
491 * Set the current 'total_sectors' value
493 static int refresh_total_sectors(BlockDriverState
*bs
, int64_t hint
)
495 BlockDriver
*drv
= bs
->drv
;
497 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
501 /* query actual device if possible, otherwise just trust the hint */
502 if (drv
->bdrv_getlength
) {
503 int64_t length
= drv
->bdrv_getlength(bs
);
507 hint
= length
>> BDRV_SECTOR_BITS
;
510 bs
->total_sectors
= hint
;
515 * Set open flags for a given cache mode
517 * Return 0 on success, -1 if the cache mode was invalid.
519 int bdrv_parse_cache_flags(const char *mode
, int *flags
)
521 *flags
&= ~BDRV_O_CACHE_MASK
;
523 if (!strcmp(mode
, "off") || !strcmp(mode
, "none")) {
524 *flags
|= BDRV_O_NOCACHE
| BDRV_O_CACHE_WB
;
525 } else if (!strcmp(mode
, "directsync")) {
526 *flags
|= BDRV_O_NOCACHE
;
527 } else if (!strcmp(mode
, "writeback")) {
528 *flags
|= BDRV_O_CACHE_WB
;
529 } else if (!strcmp(mode
, "unsafe")) {
530 *flags
|= BDRV_O_CACHE_WB
;
531 *flags
|= BDRV_O_NO_FLUSH
;
532 } else if (!strcmp(mode
, "writethrough")) {
533 /* this is the default */
542 * The copy-on-read flag is actually a reference count so multiple users may
543 * use the feature without worrying about clobbering its previous state.
544 * Copy-on-read stays enabled until all users have called to disable it.
546 void bdrv_enable_copy_on_read(BlockDriverState
*bs
)
551 void bdrv_disable_copy_on_read(BlockDriverState
*bs
)
553 assert(bs
->copy_on_read
> 0);
558 * Common part for opening disk images and files
560 static int bdrv_open_common(BlockDriverState
*bs
, const char *filename
,
561 int flags
, BlockDriver
*drv
)
567 trace_bdrv_open_common(bs
, filename
, flags
, drv
->format_name
);
570 bs
->total_sectors
= 0;
574 bs
->open_flags
= flags
;
576 bs
->buffer_alignment
= 512;
578 assert(bs
->copy_on_read
== 0); /* bdrv_new() and bdrv_close() make it so */
579 if ((flags
& BDRV_O_RDWR
) && (flags
& BDRV_O_COPY_ON_READ
)) {
580 bdrv_enable_copy_on_read(bs
);
583 pstrcpy(bs
->filename
, sizeof(bs
->filename
), filename
);
584 bs
->backing_file
[0] = '\0';
586 if (use_bdrv_whitelist
&& !bdrv_is_whitelisted(drv
)) {
591 bs
->opaque
= g_malloc0(drv
->instance_size
);
593 bs
->enable_write_cache
= !!(flags
& BDRV_O_CACHE_WB
);
596 * Clear flags that are internal to the block layer before opening the
599 open_flags
= flags
& ~(BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
602 * Snapshots should be writable.
604 if (bs
->is_temporary
) {
605 open_flags
|= BDRV_O_RDWR
;
608 bs
->keep_read_only
= bs
->read_only
= !(open_flags
& BDRV_O_RDWR
);
610 /* Open the image, either directly or using a protocol */
611 if (drv
->bdrv_file_open
) {
612 ret
= drv
->bdrv_file_open(bs
, filename
, open_flags
);
614 ret
= bdrv_file_open(&bs
->file
, filename
, open_flags
);
616 ret
= drv
->bdrv_open(bs
, open_flags
);
624 ret
= refresh_total_sectors(bs
, bs
->total_sectors
);
630 if (bs
->is_temporary
) {
638 bdrv_delete(bs
->file
);
648 * Opens a file using a protocol (file, host_device, nbd, ...)
650 int bdrv_file_open(BlockDriverState
**pbs
, const char *filename
, int flags
)
652 BlockDriverState
*bs
;
656 drv
= bdrv_find_protocol(filename
);
662 ret
= bdrv_open_common(bs
, filename
, flags
, drv
);
673 * Opens a disk image (raw, qcow2, vmdk, ...)
675 int bdrv_open(BlockDriverState
*bs
, const char *filename
, int flags
,
679 char tmp_filename
[PATH_MAX
];
681 if (flags
& BDRV_O_SNAPSHOT
) {
682 BlockDriverState
*bs1
;
685 BlockDriver
*bdrv_qcow2
;
686 QEMUOptionParameter
*options
;
687 char backing_filename
[PATH_MAX
];
689 /* if snapshot, we create a temporary backing file and open it
690 instead of opening 'filename' directly */
692 /* if there is a backing file, use it */
694 ret
= bdrv_open(bs1
, filename
, 0, drv
);
699 total_size
= bdrv_getlength(bs1
) & BDRV_SECTOR_MASK
;
701 if (bs1
->drv
&& bs1
->drv
->protocol_name
)
706 get_tmp_filename(tmp_filename
, sizeof(tmp_filename
));
708 /* Real path is meaningless for protocols */
710 snprintf(backing_filename
, sizeof(backing_filename
),
712 else if (!realpath(filename
, backing_filename
))
715 bdrv_qcow2
= bdrv_find_format("qcow2");
716 options
= parse_option_parameters("", bdrv_qcow2
->create_options
, NULL
);
718 set_option_parameter_int(options
, BLOCK_OPT_SIZE
, total_size
);
719 set_option_parameter(options
, BLOCK_OPT_BACKING_FILE
, backing_filename
);
721 set_option_parameter(options
, BLOCK_OPT_BACKING_FMT
,
725 ret
= bdrv_create(bdrv_qcow2
, tmp_filename
, options
);
726 free_option_parameters(options
);
731 filename
= tmp_filename
;
733 bs
->is_temporary
= 1;
736 /* Find the right image format driver */
738 ret
= find_image_format(filename
, &drv
);
742 goto unlink_and_fail
;
746 ret
= bdrv_open_common(bs
, filename
, flags
, drv
);
748 goto unlink_and_fail
;
751 /* If there is a backing file, use it */
752 if ((flags
& BDRV_O_NO_BACKING
) == 0 && bs
->backing_file
[0] != '\0') {
753 char backing_filename
[PATH_MAX
];
755 BlockDriver
*back_drv
= NULL
;
757 bs
->backing_hd
= bdrv_new("");
759 if (path_has_protocol(bs
->backing_file
)) {
760 pstrcpy(backing_filename
, sizeof(backing_filename
),
763 path_combine(backing_filename
, sizeof(backing_filename
),
764 filename
, bs
->backing_file
);
767 if (bs
->backing_format
[0] != '\0') {
768 back_drv
= bdrv_find_format(bs
->backing_format
);
771 /* backing files always opened read-only */
773 flags
& ~(BDRV_O_RDWR
| BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
775 ret
= bdrv_open(bs
->backing_hd
, backing_filename
, back_flags
, back_drv
);
780 if (bs
->is_temporary
) {
781 bs
->backing_hd
->keep_read_only
= !(flags
& BDRV_O_RDWR
);
783 /* base image inherits from "parent" */
784 bs
->backing_hd
->keep_read_only
= bs
->keep_read_only
;
788 if (!bdrv_key_required(bs
)) {
789 bdrv_dev_change_media_cb(bs
, true);
792 /* throttling disk I/O limits */
793 if (bs
->io_limits_enabled
) {
794 bdrv_io_limits_enable(bs
);
800 if (bs
->is_temporary
) {
806 void bdrv_close(BlockDriverState
*bs
)
809 if (bs
== bs_snapshots
) {
812 if (bs
->backing_hd
) {
813 bdrv_delete(bs
->backing_hd
);
814 bs
->backing_hd
= NULL
;
816 bs
->drv
->bdrv_close(bs
);
819 if (bs
->is_temporary
) {
820 unlink(bs
->filename
);
825 bs
->copy_on_read
= 0;
827 if (bs
->file
!= NULL
) {
828 bdrv_close(bs
->file
);
831 bdrv_dev_change_media_cb(bs
, false);
834 /*throttling disk I/O limits*/
835 if (bs
->io_limits_enabled
) {
836 bdrv_io_limits_disable(bs
);
840 void bdrv_close_all(void)
842 BlockDriverState
*bs
;
844 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
850 * Wait for pending requests to complete across all BlockDriverStates
852 * This function does not flush data to disk, use bdrv_flush_all() for that
853 * after calling this function.
855 void bdrv_drain_all(void)
857 BlockDriverState
*bs
;
861 /* If requests are still pending there is a bug somewhere */
862 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
863 assert(QLIST_EMPTY(&bs
->tracked_requests
));
864 assert(qemu_co_queue_empty(&bs
->throttled_reqs
));
868 /* make a BlockDriverState anonymous by removing from bdrv_state list.
869 Also, NULL terminate the device_name to prevent double remove */
870 void bdrv_make_anon(BlockDriverState
*bs
)
872 if (bs
->device_name
[0] != '\0') {
873 QTAILQ_REMOVE(&bdrv_states
, bs
, list
);
875 bs
->device_name
[0] = '\0';
878 void bdrv_delete(BlockDriverState
*bs
)
882 /* remove from list, if necessary */
886 if (bs
->file
!= NULL
) {
887 bdrv_delete(bs
->file
);
890 assert(bs
!= bs_snapshots
);
894 int bdrv_attach_dev(BlockDriverState
*bs
, void *dev
)
895 /* TODO change to DeviceState *dev when all users are qdevified */
901 bdrv_iostatus_reset(bs
);
905 /* TODO qdevified devices don't use this, remove when devices are qdevified */
906 void bdrv_attach_dev_nofail(BlockDriverState
*bs
, void *dev
)
908 if (bdrv_attach_dev(bs
, dev
) < 0) {
913 void bdrv_detach_dev(BlockDriverState
*bs
, void *dev
)
914 /* TODO change to DeviceState *dev when all users are qdevified */
916 assert(bs
->dev
== dev
);
919 bs
->dev_opaque
= NULL
;
920 bs
->buffer_alignment
= 512;
923 /* TODO change to return DeviceState * when all users are qdevified */
924 void *bdrv_get_attached_dev(BlockDriverState
*bs
)
929 void bdrv_set_dev_ops(BlockDriverState
*bs
, const BlockDevOps
*ops
,
933 bs
->dev_opaque
= opaque
;
934 if (bdrv_dev_has_removable_media(bs
) && bs
== bs_snapshots
) {
939 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
)
941 if (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
) {
942 bs
->dev_ops
->change_media_cb(bs
->dev_opaque
, load
);
946 bool bdrv_dev_has_removable_media(BlockDriverState
*bs
)
948 return !bs
->dev
|| (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
);
951 void bdrv_dev_eject_request(BlockDriverState
*bs
, bool force
)
953 if (bs
->dev_ops
&& bs
->dev_ops
->eject_request_cb
) {
954 bs
->dev_ops
->eject_request_cb(bs
->dev_opaque
, force
);
958 bool bdrv_dev_is_tray_open(BlockDriverState
*bs
)
960 if (bs
->dev_ops
&& bs
->dev_ops
->is_tray_open
) {
961 return bs
->dev_ops
->is_tray_open(bs
->dev_opaque
);
966 static void bdrv_dev_resize_cb(BlockDriverState
*bs
)
968 if (bs
->dev_ops
&& bs
->dev_ops
->resize_cb
) {
969 bs
->dev_ops
->resize_cb(bs
->dev_opaque
);
973 bool bdrv_dev_is_medium_locked(BlockDriverState
*bs
)
975 if (bs
->dev_ops
&& bs
->dev_ops
->is_medium_locked
) {
976 return bs
->dev_ops
->is_medium_locked(bs
->dev_opaque
);
982 * Run consistency checks on an image
984 * Returns 0 if the check could be completed (it doesn't mean that the image is
985 * free of errors) or -errno when an internal error occurred. The results of the
986 * check are stored in res.
988 int bdrv_check(BlockDriverState
*bs
, BdrvCheckResult
*res
)
990 if (bs
->drv
->bdrv_check
== NULL
) {
994 memset(res
, 0, sizeof(*res
));
995 return bs
->drv
->bdrv_check(bs
, res
);
998 #define COMMIT_BUF_SECTORS 2048
1000 /* commit COW file into the raw image */
1001 int bdrv_commit(BlockDriverState
*bs
)
1003 BlockDriver
*drv
= bs
->drv
;
1004 BlockDriver
*backing_drv
;
1005 int64_t sector
, total_sectors
;
1006 int n
, ro
, open_flags
;
1007 int ret
= 0, rw_ret
= 0;
1009 char filename
[1024];
1010 BlockDriverState
*bs_rw
, *bs_ro
;
1015 if (!bs
->backing_hd
) {
1019 if (bs
->backing_hd
->keep_read_only
) {
1023 if (bdrv_in_use(bs
) || bdrv_in_use(bs
->backing_hd
)) {
1027 backing_drv
= bs
->backing_hd
->drv
;
1028 ro
= bs
->backing_hd
->read_only
;
1029 strncpy(filename
, bs
->backing_hd
->filename
, sizeof(filename
));
1030 open_flags
= bs
->backing_hd
->open_flags
;
1034 bdrv_delete(bs
->backing_hd
);
1035 bs
->backing_hd
= NULL
;
1036 bs_rw
= bdrv_new("");
1037 rw_ret
= bdrv_open(bs_rw
, filename
, open_flags
| BDRV_O_RDWR
,
1041 /* try to re-open read-only */
1042 bs_ro
= bdrv_new("");
1043 ret
= bdrv_open(bs_ro
, filename
, open_flags
& ~BDRV_O_RDWR
,
1047 /* drive not functional anymore */
1051 bs
->backing_hd
= bs_ro
;
1054 bs
->backing_hd
= bs_rw
;
1057 total_sectors
= bdrv_getlength(bs
) >> BDRV_SECTOR_BITS
;
1058 buf
= g_malloc(COMMIT_BUF_SECTORS
* BDRV_SECTOR_SIZE
);
1060 for (sector
= 0; sector
< total_sectors
; sector
+= n
) {
1061 if (bdrv_is_allocated(bs
, sector
, COMMIT_BUF_SECTORS
, &n
)) {
1063 if (bdrv_read(bs
, sector
, buf
, n
) != 0) {
1068 if (bdrv_write(bs
->backing_hd
, sector
, buf
, n
) != 0) {
1075 if (drv
->bdrv_make_empty
) {
1076 ret
= drv
->bdrv_make_empty(bs
);
1081 * Make sure all data we wrote to the backing device is actually
1085 bdrv_flush(bs
->backing_hd
);
1092 bdrv_delete(bs
->backing_hd
);
1093 bs
->backing_hd
= NULL
;
1094 bs_ro
= bdrv_new("");
1095 ret
= bdrv_open(bs_ro
, filename
, open_flags
& ~BDRV_O_RDWR
,
1099 /* drive not functional anymore */
1103 bs
->backing_hd
= bs_ro
;
1104 bs
->backing_hd
->keep_read_only
= 0;
1110 void bdrv_commit_all(void)
1112 BlockDriverState
*bs
;
1114 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
1119 struct BdrvTrackedRequest
{
1120 BlockDriverState
*bs
;
1124 QLIST_ENTRY(BdrvTrackedRequest
) list
;
1125 Coroutine
*co
; /* owner, used for deadlock detection */
1126 CoQueue wait_queue
; /* coroutines blocked on this request */
1130 * Remove an active request from the tracked requests list
1132 * This function should be called when a tracked request is completing.
1134 static void tracked_request_end(BdrvTrackedRequest
*req
)
1136 QLIST_REMOVE(req
, list
);
1137 qemu_co_queue_restart_all(&req
->wait_queue
);
1141 * Add an active request to the tracked requests list
1143 static void tracked_request_begin(BdrvTrackedRequest
*req
,
1144 BlockDriverState
*bs
,
1146 int nb_sectors
, bool is_write
)
1148 *req
= (BdrvTrackedRequest
){
1150 .sector_num
= sector_num
,
1151 .nb_sectors
= nb_sectors
,
1152 .is_write
= is_write
,
1153 .co
= qemu_coroutine_self(),
1156 qemu_co_queue_init(&req
->wait_queue
);
1158 QLIST_INSERT_HEAD(&bs
->tracked_requests
, req
, list
);
1162 * Round a region to cluster boundaries
1164 static void round_to_clusters(BlockDriverState
*bs
,
1165 int64_t sector_num
, int nb_sectors
,
1166 int64_t *cluster_sector_num
,
1167 int *cluster_nb_sectors
)
1169 BlockDriverInfo bdi
;
1171 if (bdrv_get_info(bs
, &bdi
) < 0 || bdi
.cluster_size
== 0) {
1172 *cluster_sector_num
= sector_num
;
1173 *cluster_nb_sectors
= nb_sectors
;
1175 int64_t c
= bdi
.cluster_size
/ BDRV_SECTOR_SIZE
;
1176 *cluster_sector_num
= QEMU_ALIGN_DOWN(sector_num
, c
);
1177 *cluster_nb_sectors
= QEMU_ALIGN_UP(sector_num
- *cluster_sector_num
+
1182 static bool tracked_request_overlaps(BdrvTrackedRequest
*req
,
1183 int64_t sector_num
, int nb_sectors
) {
1185 if (sector_num
>= req
->sector_num
+ req
->nb_sectors
) {
1189 if (req
->sector_num
>= sector_num
+ nb_sectors
) {
1195 static void coroutine_fn
wait_for_overlapping_requests(BlockDriverState
*bs
,
1196 int64_t sector_num
, int nb_sectors
)
1198 BdrvTrackedRequest
*req
;
1199 int64_t cluster_sector_num
;
1200 int cluster_nb_sectors
;
1203 /* If we touch the same cluster it counts as an overlap. This guarantees
1204 * that allocating writes will be serialized and not race with each other
1205 * for the same cluster. For example, in copy-on-read it ensures that the
1206 * CoR read and write operations are atomic and guest writes cannot
1207 * interleave between them.
1209 round_to_clusters(bs
, sector_num
, nb_sectors
,
1210 &cluster_sector_num
, &cluster_nb_sectors
);
1214 QLIST_FOREACH(req
, &bs
->tracked_requests
, list
) {
1215 if (tracked_request_overlaps(req
, cluster_sector_num
,
1216 cluster_nb_sectors
)) {
1217 /* Hitting this means there was a reentrant request, for
1218 * example, a block driver issuing nested requests. This must
1219 * never happen since it means deadlock.
1221 assert(qemu_coroutine_self() != req
->co
);
1223 qemu_co_queue_wait(&req
->wait_queue
);
1234 * -EINVAL - backing format specified, but no file
1235 * -ENOSPC - can't update the backing file because no space is left in the
1237 * -ENOTSUP - format driver doesn't support changing the backing file
1239 int bdrv_change_backing_file(BlockDriverState
*bs
,
1240 const char *backing_file
, const char *backing_fmt
)
1242 BlockDriver
*drv
= bs
->drv
;
1244 if (drv
->bdrv_change_backing_file
!= NULL
) {
1245 return drv
->bdrv_change_backing_file(bs
, backing_file
, backing_fmt
);
1251 static int bdrv_check_byte_request(BlockDriverState
*bs
, int64_t offset
,
1256 if (!bdrv_is_inserted(bs
))
1262 len
= bdrv_getlength(bs
);
1267 if ((offset
> len
) || (len
- offset
< size
))
1273 static int bdrv_check_request(BlockDriverState
*bs
, int64_t sector_num
,
1276 return bdrv_check_byte_request(bs
, sector_num
* BDRV_SECTOR_SIZE
,
1277 nb_sectors
* BDRV_SECTOR_SIZE
);
1280 typedef struct RwCo
{
1281 BlockDriverState
*bs
;
1289 static void coroutine_fn
bdrv_rw_co_entry(void *opaque
)
1291 RwCo
*rwco
= opaque
;
1293 if (!rwco
->is_write
) {
1294 rwco
->ret
= bdrv_co_do_readv(rwco
->bs
, rwco
->sector_num
,
1295 rwco
->nb_sectors
, rwco
->qiov
);
1297 rwco
->ret
= bdrv_co_do_writev(rwco
->bs
, rwco
->sector_num
,
1298 rwco
->nb_sectors
, rwco
->qiov
);
1303 * Process a synchronous request using coroutines
1305 static int bdrv_rw_co(BlockDriverState
*bs
, int64_t sector_num
, uint8_t *buf
,
1306 int nb_sectors
, bool is_write
)
1309 struct iovec iov
= {
1310 .iov_base
= (void *)buf
,
1311 .iov_len
= nb_sectors
* BDRV_SECTOR_SIZE
,
1316 .sector_num
= sector_num
,
1317 .nb_sectors
= nb_sectors
,
1319 .is_write
= is_write
,
1323 qemu_iovec_init_external(&qiov
, &iov
, 1);
1325 if (qemu_in_coroutine()) {
1326 /* Fast-path if already in coroutine context */
1327 bdrv_rw_co_entry(&rwco
);
1329 co
= qemu_coroutine_create(bdrv_rw_co_entry
);
1330 qemu_coroutine_enter(co
, &rwco
);
1331 while (rwco
.ret
== NOT_DONE
) {
1338 /* return < 0 if error. See bdrv_write() for the return codes */
1339 int bdrv_read(BlockDriverState
*bs
, int64_t sector_num
,
1340 uint8_t *buf
, int nb_sectors
)
1342 return bdrv_rw_co(bs
, sector_num
, buf
, nb_sectors
, false);
1345 static void set_dirty_bitmap(BlockDriverState
*bs
, int64_t sector_num
,
1346 int nb_sectors
, int dirty
)
1349 unsigned long val
, idx
, bit
;
1351 start
= sector_num
/ BDRV_SECTORS_PER_DIRTY_CHUNK
;
1352 end
= (sector_num
+ nb_sectors
- 1) / BDRV_SECTORS_PER_DIRTY_CHUNK
;
1354 for (; start
<= end
; start
++) {
1355 idx
= start
/ (sizeof(unsigned long) * 8);
1356 bit
= start
% (sizeof(unsigned long) * 8);
1357 val
= bs
->dirty_bitmap
[idx
];
1359 if (!(val
& (1UL << bit
))) {
1364 if (val
& (1UL << bit
)) {
1366 val
&= ~(1UL << bit
);
1369 bs
->dirty_bitmap
[idx
] = val
;
1373 /* Return < 0 if error. Important errors are:
1374 -EIO generic I/O error (may happen for all errors)
1375 -ENOMEDIUM No media inserted.
1376 -EINVAL Invalid sector number or nb_sectors
1377 -EACCES Trying to write a read-only device
1379 int bdrv_write(BlockDriverState
*bs
, int64_t sector_num
,
1380 const uint8_t *buf
, int nb_sectors
)
1382 return bdrv_rw_co(bs
, sector_num
, (uint8_t *)buf
, nb_sectors
, true);
1385 int bdrv_pread(BlockDriverState
*bs
, int64_t offset
,
1386 void *buf
, int count1
)
1388 uint8_t tmp_buf
[BDRV_SECTOR_SIZE
];
1389 int len
, nb_sectors
, count
;
1394 /* first read to align to sector start */
1395 len
= (BDRV_SECTOR_SIZE
- offset
) & (BDRV_SECTOR_SIZE
- 1);
1398 sector_num
= offset
>> BDRV_SECTOR_BITS
;
1400 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1402 memcpy(buf
, tmp_buf
+ (offset
& (BDRV_SECTOR_SIZE
- 1)), len
);
1410 /* read the sectors "in place" */
1411 nb_sectors
= count
>> BDRV_SECTOR_BITS
;
1412 if (nb_sectors
> 0) {
1413 if ((ret
= bdrv_read(bs
, sector_num
, buf
, nb_sectors
)) < 0)
1415 sector_num
+= nb_sectors
;
1416 len
= nb_sectors
<< BDRV_SECTOR_BITS
;
1421 /* add data from the last sector */
1423 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1425 memcpy(buf
, tmp_buf
, count
);
1430 int bdrv_pwrite(BlockDriverState
*bs
, int64_t offset
,
1431 const void *buf
, int count1
)
1433 uint8_t tmp_buf
[BDRV_SECTOR_SIZE
];
1434 int len
, nb_sectors
, count
;
1439 /* first write to align to sector start */
1440 len
= (BDRV_SECTOR_SIZE
- offset
) & (BDRV_SECTOR_SIZE
- 1);
1443 sector_num
= offset
>> BDRV_SECTOR_BITS
;
1445 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1447 memcpy(tmp_buf
+ (offset
& (BDRV_SECTOR_SIZE
- 1)), buf
, len
);
1448 if ((ret
= bdrv_write(bs
, sector_num
, tmp_buf
, 1)) < 0)
1457 /* write the sectors "in place" */
1458 nb_sectors
= count
>> BDRV_SECTOR_BITS
;
1459 if (nb_sectors
> 0) {
1460 if ((ret
= bdrv_write(bs
, sector_num
, buf
, nb_sectors
)) < 0)
1462 sector_num
+= nb_sectors
;
1463 len
= nb_sectors
<< BDRV_SECTOR_BITS
;
1468 /* add data from the last sector */
1470 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1472 memcpy(tmp_buf
, buf
, count
);
1473 if ((ret
= bdrv_write(bs
, sector_num
, tmp_buf
, 1)) < 0)
1480 * Writes to the file and ensures that no writes are reordered across this
1481 * request (acts as a barrier)
1483 * Returns 0 on success, -errno in error cases.
1485 int bdrv_pwrite_sync(BlockDriverState
*bs
, int64_t offset
,
1486 const void *buf
, int count
)
1490 ret
= bdrv_pwrite(bs
, offset
, buf
, count
);
1495 /* No flush needed for cache modes that use O_DSYNC */
1496 if ((bs
->open_flags
& BDRV_O_CACHE_WB
) != 0) {
1503 static int coroutine_fn
bdrv_co_copy_on_readv(BlockDriverState
*bs
,
1504 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
1506 /* Perform I/O through a temporary buffer so that users who scribble over
1507 * their read buffer while the operation is in progress do not end up
1508 * modifying the image file. This is critical for zero-copy guest I/O
1509 * where anything might happen inside guest memory.
1511 void *bounce_buffer
;
1514 QEMUIOVector bounce_qiov
;
1515 int64_t cluster_sector_num
;
1516 int cluster_nb_sectors
;
1520 /* Cover entire cluster so no additional backing file I/O is required when
1521 * allocating cluster in the image file.
1523 round_to_clusters(bs
, sector_num
, nb_sectors
,
1524 &cluster_sector_num
, &cluster_nb_sectors
);
1526 trace_bdrv_co_copy_on_readv(bs
, sector_num
, nb_sectors
,
1527 cluster_sector_num
, cluster_nb_sectors
);
1529 iov
.iov_len
= cluster_nb_sectors
* BDRV_SECTOR_SIZE
;
1530 iov
.iov_base
= bounce_buffer
= qemu_blockalign(bs
, iov
.iov_len
);
1531 qemu_iovec_init_external(&bounce_qiov
, &iov
, 1);
1533 ret
= bs
->drv
->bdrv_co_readv(bs
, cluster_sector_num
, cluster_nb_sectors
,
1539 ret
= bs
->drv
->bdrv_co_writev(bs
, cluster_sector_num
, cluster_nb_sectors
,
1542 /* It might be okay to ignore write errors for guest requests. If this
1543 * is a deliberate copy-on-read then we don't want to ignore the error.
1544 * Simply report it in all cases.
1549 skip_bytes
= (sector_num
- cluster_sector_num
) * BDRV_SECTOR_SIZE
;
1550 qemu_iovec_from_buffer(qiov
, bounce_buffer
+ skip_bytes
,
1551 nb_sectors
* BDRV_SECTOR_SIZE
);
1554 qemu_vfree(bounce_buffer
);
1559 * Handle a read request in coroutine context
1561 static int coroutine_fn
bdrv_co_do_readv(BlockDriverState
*bs
,
1562 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
1564 BlockDriver
*drv
= bs
->drv
;
1565 BdrvTrackedRequest req
;
1571 if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
1575 /* throttling disk read I/O */
1576 if (bs
->io_limits_enabled
) {
1577 bdrv_io_limits_intercept(bs
, false, nb_sectors
);
1580 if (bs
->copy_on_read
) {
1581 wait_for_overlapping_requests(bs
, sector_num
, nb_sectors
);
1584 tracked_request_begin(&req
, bs
, sector_num
, nb_sectors
, false);
1586 if (bs
->copy_on_read
) {
1589 ret
= bdrv_co_is_allocated(bs
, sector_num
, nb_sectors
, &pnum
);
1594 if (!ret
|| pnum
!= nb_sectors
) {
1595 ret
= bdrv_co_copy_on_readv(bs
, sector_num
, nb_sectors
, qiov
);
1600 ret
= drv
->bdrv_co_readv(bs
, sector_num
, nb_sectors
, qiov
);
1603 tracked_request_end(&req
);
1607 int coroutine_fn
bdrv_co_readv(BlockDriverState
*bs
, int64_t sector_num
,
1608 int nb_sectors
, QEMUIOVector
*qiov
)
1610 trace_bdrv_co_readv(bs
, sector_num
, nb_sectors
);
1612 return bdrv_co_do_readv(bs
, sector_num
, nb_sectors
, qiov
);
1616 * Handle a write request in coroutine context
1618 static int coroutine_fn
bdrv_co_do_writev(BlockDriverState
*bs
,
1619 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
1621 BlockDriver
*drv
= bs
->drv
;
1622 BdrvTrackedRequest req
;
1628 if (bs
->read_only
) {
1631 if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
1635 /* throttling disk write I/O */
1636 if (bs
->io_limits_enabled
) {
1637 bdrv_io_limits_intercept(bs
, true, nb_sectors
);
1640 if (bs
->copy_on_read
) {
1641 wait_for_overlapping_requests(bs
, sector_num
, nb_sectors
);
1644 tracked_request_begin(&req
, bs
, sector_num
, nb_sectors
, true);
1646 ret
= drv
->bdrv_co_writev(bs
, sector_num
, nb_sectors
, qiov
);
1648 if (bs
->dirty_bitmap
) {
1649 set_dirty_bitmap(bs
, sector_num
, nb_sectors
, 1);
1652 if (bs
->wr_highest_sector
< sector_num
+ nb_sectors
- 1) {
1653 bs
->wr_highest_sector
= sector_num
+ nb_sectors
- 1;
1656 tracked_request_end(&req
);
1661 int coroutine_fn
bdrv_co_writev(BlockDriverState
*bs
, int64_t sector_num
,
1662 int nb_sectors
, QEMUIOVector
*qiov
)
1664 trace_bdrv_co_writev(bs
, sector_num
, nb_sectors
);
1666 return bdrv_co_do_writev(bs
, sector_num
, nb_sectors
, qiov
);
1670 * Truncate file to 'offset' bytes (needed only for file protocols)
1672 int bdrv_truncate(BlockDriverState
*bs
, int64_t offset
)
1674 BlockDriver
*drv
= bs
->drv
;
1678 if (!drv
->bdrv_truncate
)
1682 if (bdrv_in_use(bs
))
1684 ret
= drv
->bdrv_truncate(bs
, offset
);
1686 ret
= refresh_total_sectors(bs
, offset
>> BDRV_SECTOR_BITS
);
1687 bdrv_dev_resize_cb(bs
);
1693 * Length of a allocated file in bytes. Sparse files are counted by actual
1694 * allocated space. Return < 0 if error or unknown.
1696 int64_t bdrv_get_allocated_file_size(BlockDriverState
*bs
)
1698 BlockDriver
*drv
= bs
->drv
;
1702 if (drv
->bdrv_get_allocated_file_size
) {
1703 return drv
->bdrv_get_allocated_file_size(bs
);
1706 return bdrv_get_allocated_file_size(bs
->file
);
1712 * Length of a file in bytes. Return < 0 if error or unknown.
1714 int64_t bdrv_getlength(BlockDriverState
*bs
)
1716 BlockDriver
*drv
= bs
->drv
;
1720 if (bs
->growable
|| bdrv_dev_has_removable_media(bs
)) {
1721 if (drv
->bdrv_getlength
) {
1722 return drv
->bdrv_getlength(bs
);
1725 return bs
->total_sectors
* BDRV_SECTOR_SIZE
;
1728 /* return 0 as number of sectors if no device present or error */
1729 void bdrv_get_geometry(BlockDriverState
*bs
, uint64_t *nb_sectors_ptr
)
1732 length
= bdrv_getlength(bs
);
1736 length
= length
>> BDRV_SECTOR_BITS
;
1737 *nb_sectors_ptr
= length
;
1741 uint8_t boot_ind
; /* 0x80 - active */
1742 uint8_t head
; /* starting head */
1743 uint8_t sector
; /* starting sector */
1744 uint8_t cyl
; /* starting cylinder */
1745 uint8_t sys_ind
; /* What partition type */
1746 uint8_t end_head
; /* end head */
1747 uint8_t end_sector
; /* end sector */
1748 uint8_t end_cyl
; /* end cylinder */
1749 uint32_t start_sect
; /* starting sector counting from 0 */
1750 uint32_t nr_sects
; /* nr of sectors in partition */
1753 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1754 static int guess_disk_lchs(BlockDriverState
*bs
,
1755 int *pcylinders
, int *pheads
, int *psectors
)
1757 uint8_t buf
[BDRV_SECTOR_SIZE
];
1758 int ret
, i
, heads
, sectors
, cylinders
;
1759 struct partition
*p
;
1761 uint64_t nb_sectors
;
1763 bdrv_get_geometry(bs
, &nb_sectors
);
1765 ret
= bdrv_read(bs
, 0, buf
, 1);
1768 /* test msdos magic */
1769 if (buf
[510] != 0x55 || buf
[511] != 0xaa)
1771 for(i
= 0; i
< 4; i
++) {
1772 p
= ((struct partition
*)(buf
+ 0x1be)) + i
;
1773 nr_sects
= le32_to_cpu(p
->nr_sects
);
1774 if (nr_sects
&& p
->end_head
) {
1775 /* We make the assumption that the partition terminates on
1776 a cylinder boundary */
1777 heads
= p
->end_head
+ 1;
1778 sectors
= p
->end_sector
& 63;
1781 cylinders
= nb_sectors
/ (heads
* sectors
);
1782 if (cylinders
< 1 || cylinders
> 16383)
1785 *psectors
= sectors
;
1786 *pcylinders
= cylinders
;
1788 printf("guessed geometry: LCHS=%d %d %d\n",
1789 cylinders
, heads
, sectors
);
1797 void bdrv_guess_geometry(BlockDriverState
*bs
, int *pcyls
, int *pheads
, int *psecs
)
1799 int translation
, lba_detected
= 0;
1800 int cylinders
, heads
, secs
;
1801 uint64_t nb_sectors
;
1803 /* if a geometry hint is available, use it */
1804 bdrv_get_geometry(bs
, &nb_sectors
);
1805 bdrv_get_geometry_hint(bs
, &cylinders
, &heads
, &secs
);
1806 translation
= bdrv_get_translation_hint(bs
);
1807 if (cylinders
!= 0) {
1812 if (guess_disk_lchs(bs
, &cylinders
, &heads
, &secs
) == 0) {
1814 /* if heads > 16, it means that a BIOS LBA
1815 translation was active, so the default
1816 hardware geometry is OK */
1818 goto default_geometry
;
1823 /* disable any translation to be in sync with
1824 the logical geometry */
1825 if (translation
== BIOS_ATA_TRANSLATION_AUTO
) {
1826 bdrv_set_translation_hint(bs
,
1827 BIOS_ATA_TRANSLATION_NONE
);
1832 /* if no geometry, use a standard physical disk geometry */
1833 cylinders
= nb_sectors
/ (16 * 63);
1835 if (cylinders
> 16383)
1837 else if (cylinders
< 2)
1842 if ((lba_detected
== 1) && (translation
== BIOS_ATA_TRANSLATION_AUTO
)) {
1843 if ((*pcyls
* *pheads
) <= 131072) {
1844 bdrv_set_translation_hint(bs
,
1845 BIOS_ATA_TRANSLATION_LARGE
);
1847 bdrv_set_translation_hint(bs
,
1848 BIOS_ATA_TRANSLATION_LBA
);
1852 bdrv_set_geometry_hint(bs
, *pcyls
, *pheads
, *psecs
);
1856 void bdrv_set_geometry_hint(BlockDriverState
*bs
,
1857 int cyls
, int heads
, int secs
)
1864 void bdrv_set_translation_hint(BlockDriverState
*bs
, int translation
)
1866 bs
->translation
= translation
;
1869 void bdrv_get_geometry_hint(BlockDriverState
*bs
,
1870 int *pcyls
, int *pheads
, int *psecs
)
1873 *pheads
= bs
->heads
;
1877 /* throttling disk io limits */
1878 void bdrv_set_io_limits(BlockDriverState
*bs
,
1879 BlockIOLimit
*io_limits
)
1881 bs
->io_limits
= *io_limits
;
1882 bs
->io_limits_enabled
= bdrv_io_limits_enabled(bs
);
1885 /* Recognize floppy formats */
1886 typedef struct FDFormat
{
1893 static const FDFormat fd_formats
[] = {
1894 /* First entry is default format */
1895 /* 1.44 MB 3"1/2 floppy disks */
1896 { FDRIVE_DRV_144
, 18, 80, 1, },
1897 { FDRIVE_DRV_144
, 20, 80, 1, },
1898 { FDRIVE_DRV_144
, 21, 80, 1, },
1899 { FDRIVE_DRV_144
, 21, 82, 1, },
1900 { FDRIVE_DRV_144
, 21, 83, 1, },
1901 { FDRIVE_DRV_144
, 22, 80, 1, },
1902 { FDRIVE_DRV_144
, 23, 80, 1, },
1903 { FDRIVE_DRV_144
, 24, 80, 1, },
1904 /* 2.88 MB 3"1/2 floppy disks */
1905 { FDRIVE_DRV_288
, 36, 80, 1, },
1906 { FDRIVE_DRV_288
, 39, 80, 1, },
1907 { FDRIVE_DRV_288
, 40, 80, 1, },
1908 { FDRIVE_DRV_288
, 44, 80, 1, },
1909 { FDRIVE_DRV_288
, 48, 80, 1, },
1910 /* 720 kB 3"1/2 floppy disks */
1911 { FDRIVE_DRV_144
, 9, 80, 1, },
1912 { FDRIVE_DRV_144
, 10, 80, 1, },
1913 { FDRIVE_DRV_144
, 10, 82, 1, },
1914 { FDRIVE_DRV_144
, 10, 83, 1, },
1915 { FDRIVE_DRV_144
, 13, 80, 1, },
1916 { FDRIVE_DRV_144
, 14, 80, 1, },
1917 /* 1.2 MB 5"1/4 floppy disks */
1918 { FDRIVE_DRV_120
, 15, 80, 1, },
1919 { FDRIVE_DRV_120
, 18, 80, 1, },
1920 { FDRIVE_DRV_120
, 18, 82, 1, },
1921 { FDRIVE_DRV_120
, 18, 83, 1, },
1922 { FDRIVE_DRV_120
, 20, 80, 1, },
1923 /* 720 kB 5"1/4 floppy disks */
1924 { FDRIVE_DRV_120
, 9, 80, 1, },
1925 { FDRIVE_DRV_120
, 11, 80, 1, },
1926 /* 360 kB 5"1/4 floppy disks */
1927 { FDRIVE_DRV_120
, 9, 40, 1, },
1928 { FDRIVE_DRV_120
, 9, 40, 0, },
1929 { FDRIVE_DRV_120
, 10, 41, 1, },
1930 { FDRIVE_DRV_120
, 10, 42, 1, },
1931 /* 320 kB 5"1/4 floppy disks */
1932 { FDRIVE_DRV_120
, 8, 40, 1, },
1933 { FDRIVE_DRV_120
, 8, 40, 0, },
1934 /* 360 kB must match 5"1/4 better than 3"1/2... */
1935 { FDRIVE_DRV_144
, 9, 80, 0, },
1937 { FDRIVE_DRV_NONE
, -1, -1, 0, },
1940 void bdrv_get_floppy_geometry_hint(BlockDriverState
*bs
, int *nb_heads
,
1941 int *max_track
, int *last_sect
,
1942 FDriveType drive_in
, FDriveType
*drive
)
1944 const FDFormat
*parse
;
1945 uint64_t nb_sectors
, size
;
1946 int i
, first_match
, match
;
1948 bdrv_get_geometry_hint(bs
, nb_heads
, max_track
, last_sect
);
1949 if (*nb_heads
!= 0 && *max_track
!= 0 && *last_sect
!= 0) {
1950 /* User defined disk */
1952 bdrv_get_geometry(bs
, &nb_sectors
);
1955 for (i
= 0; ; i
++) {
1956 parse
= &fd_formats
[i
];
1957 if (parse
->drive
== FDRIVE_DRV_NONE
) {
1960 if (drive_in
== parse
->drive
||
1961 drive_in
== FDRIVE_DRV_NONE
) {
1962 size
= (parse
->max_head
+ 1) * parse
->max_track
*
1964 if (nb_sectors
== size
) {
1968 if (first_match
== -1) {
1974 if (first_match
== -1) {
1977 match
= first_match
;
1979 parse
= &fd_formats
[match
];
1981 *nb_heads
= parse
->max_head
+ 1;
1982 *max_track
= parse
->max_track
;
1983 *last_sect
= parse
->last_sect
;
1984 *drive
= parse
->drive
;
1988 int bdrv_get_translation_hint(BlockDriverState
*bs
)
1990 return bs
->translation
;
1993 void bdrv_set_on_error(BlockDriverState
*bs
, BlockErrorAction on_read_error
,
1994 BlockErrorAction on_write_error
)
1996 bs
->on_read_error
= on_read_error
;
1997 bs
->on_write_error
= on_write_error
;
2000 BlockErrorAction
bdrv_get_on_error(BlockDriverState
*bs
, int is_read
)
2002 return is_read
? bs
->on_read_error
: bs
->on_write_error
;
2005 int bdrv_is_read_only(BlockDriverState
*bs
)
2007 return bs
->read_only
;
2010 int bdrv_is_sg(BlockDriverState
*bs
)
2015 int bdrv_enable_write_cache(BlockDriverState
*bs
)
2017 return bs
->enable_write_cache
;
2020 int bdrv_is_encrypted(BlockDriverState
*bs
)
2022 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
2024 return bs
->encrypted
;
2027 int bdrv_key_required(BlockDriverState
*bs
)
2029 BlockDriverState
*backing_hd
= bs
->backing_hd
;
2031 if (backing_hd
&& backing_hd
->encrypted
&& !backing_hd
->valid_key
)
2033 return (bs
->encrypted
&& !bs
->valid_key
);
2036 int bdrv_set_key(BlockDriverState
*bs
, const char *key
)
2039 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
) {
2040 ret
= bdrv_set_key(bs
->backing_hd
, key
);
2046 if (!bs
->encrypted
) {
2048 } else if (!bs
->drv
|| !bs
->drv
->bdrv_set_key
) {
2051 ret
= bs
->drv
->bdrv_set_key(bs
, key
);
2054 } else if (!bs
->valid_key
) {
2056 /* call the change callback now, we skipped it on open */
2057 bdrv_dev_change_media_cb(bs
, true);
2062 void bdrv_get_format(BlockDriverState
*bs
, char *buf
, int buf_size
)
2067 pstrcpy(buf
, buf_size
, bs
->drv
->format_name
);
2071 void bdrv_iterate_format(void (*it
)(void *opaque
, const char *name
),
2076 QLIST_FOREACH(drv
, &bdrv_drivers
, list
) {
2077 it(opaque
, drv
->format_name
);
2081 BlockDriverState
*bdrv_find(const char *name
)
2083 BlockDriverState
*bs
;
2085 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2086 if (!strcmp(name
, bs
->device_name
)) {
2093 BlockDriverState
*bdrv_next(BlockDriverState
*bs
)
2096 return QTAILQ_FIRST(&bdrv_states
);
2098 return QTAILQ_NEXT(bs
, list
);
2101 void bdrv_iterate(void (*it
)(void *opaque
, BlockDriverState
*bs
), void *opaque
)
2103 BlockDriverState
*bs
;
2105 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2110 const char *bdrv_get_device_name(BlockDriverState
*bs
)
2112 return bs
->device_name
;
2115 void bdrv_flush_all(void)
2117 BlockDriverState
*bs
;
2119 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2120 if (!bdrv_is_read_only(bs
) && bdrv_is_inserted(bs
)) {
2126 int bdrv_has_zero_init(BlockDriverState
*bs
)
2130 if (bs
->drv
->bdrv_has_zero_init
) {
2131 return bs
->drv
->bdrv_has_zero_init(bs
);
2137 typedef struct BdrvCoIsAllocatedData
{
2138 BlockDriverState
*bs
;
2144 } BdrvCoIsAllocatedData
;
2147 * Returns true iff the specified sector is present in the disk image. Drivers
2148 * not implementing the functionality are assumed to not support backing files,
2149 * hence all their sectors are reported as allocated.
2151 * If 'sector_num' is beyond the end of the disk image the return value is 0
2152 * and 'pnum' is set to 0.
2154 * 'pnum' is set to the number of sectors (including and immediately following
2155 * the specified sector) that are known to be in the same
2156 * allocated/unallocated state.
2158 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2159 * beyond the end of the disk image it will be clamped.
2161 int coroutine_fn
bdrv_co_is_allocated(BlockDriverState
*bs
, int64_t sector_num
,
2162 int nb_sectors
, int *pnum
)
2166 if (sector_num
>= bs
->total_sectors
) {
2171 n
= bs
->total_sectors
- sector_num
;
2172 if (n
< nb_sectors
) {
2176 if (!bs
->drv
->bdrv_co_is_allocated
) {
2181 return bs
->drv
->bdrv_co_is_allocated(bs
, sector_num
, nb_sectors
, pnum
);
2184 /* Coroutine wrapper for bdrv_is_allocated() */
2185 static void coroutine_fn
bdrv_is_allocated_co_entry(void *opaque
)
2187 BdrvCoIsAllocatedData
*data
= opaque
;
2188 BlockDriverState
*bs
= data
->bs
;
2190 data
->ret
= bdrv_co_is_allocated(bs
, data
->sector_num
, data
->nb_sectors
,
2196 * Synchronous wrapper around bdrv_co_is_allocated().
2198 * See bdrv_co_is_allocated() for details.
2200 int bdrv_is_allocated(BlockDriverState
*bs
, int64_t sector_num
, int nb_sectors
,
2204 BdrvCoIsAllocatedData data
= {
2206 .sector_num
= sector_num
,
2207 .nb_sectors
= nb_sectors
,
2212 co
= qemu_coroutine_create(bdrv_is_allocated_co_entry
);
2213 qemu_coroutine_enter(co
, &data
);
2214 while (!data
.done
) {
2220 void bdrv_mon_event(const BlockDriverState
*bdrv
,
2221 BlockMonEventAction action
, int is_read
)
2224 const char *action_str
;
2227 case BDRV_ACTION_REPORT
:
2228 action_str
= "report";
2230 case BDRV_ACTION_IGNORE
:
2231 action_str
= "ignore";
2233 case BDRV_ACTION_STOP
:
2234 action_str
= "stop";
2240 data
= qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2243 is_read
? "read" : "write");
2244 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR
, data
);
2246 qobject_decref(data
);
2249 BlockInfoList
*qmp_query_block(Error
**errp
)
2251 BlockInfoList
*head
= NULL
, *cur_item
= NULL
;
2252 BlockDriverState
*bs
;
2254 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2255 BlockInfoList
*info
= g_malloc0(sizeof(*info
));
2257 info
->value
= g_malloc0(sizeof(*info
->value
));
2258 info
->value
->device
= g_strdup(bs
->device_name
);
2259 info
->value
->type
= g_strdup("unknown");
2260 info
->value
->locked
= bdrv_dev_is_medium_locked(bs
);
2261 info
->value
->removable
= bdrv_dev_has_removable_media(bs
);
2263 if (bdrv_dev_has_removable_media(bs
)) {
2264 info
->value
->has_tray_open
= true;
2265 info
->value
->tray_open
= bdrv_dev_is_tray_open(bs
);
2268 if (bdrv_iostatus_is_enabled(bs
)) {
2269 info
->value
->has_io_status
= true;
2270 info
->value
->io_status
= bs
->iostatus
;
2274 info
->value
->has_inserted
= true;
2275 info
->value
->inserted
= g_malloc0(sizeof(*info
->value
->inserted
));
2276 info
->value
->inserted
->file
= g_strdup(bs
->filename
);
2277 info
->value
->inserted
->ro
= bs
->read_only
;
2278 info
->value
->inserted
->drv
= g_strdup(bs
->drv
->format_name
);
2279 info
->value
->inserted
->encrypted
= bs
->encrypted
;
2280 if (bs
->backing_file
[0]) {
2281 info
->value
->inserted
->has_backing_file
= true;
2282 info
->value
->inserted
->backing_file
= g_strdup(bs
->backing_file
);
2285 if (bs
->io_limits_enabled
) {
2286 info
->value
->inserted
->bps
=
2287 bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
];
2288 info
->value
->inserted
->bps_rd
=
2289 bs
->io_limits
.bps
[BLOCK_IO_LIMIT_READ
];
2290 info
->value
->inserted
->bps_wr
=
2291 bs
->io_limits
.bps
[BLOCK_IO_LIMIT_WRITE
];
2292 info
->value
->inserted
->iops
=
2293 bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
];
2294 info
->value
->inserted
->iops_rd
=
2295 bs
->io_limits
.iops
[BLOCK_IO_LIMIT_READ
];
2296 info
->value
->inserted
->iops_wr
=
2297 bs
->io_limits
.iops
[BLOCK_IO_LIMIT_WRITE
];
2301 /* XXX: waiting for the qapi to support GSList */
2303 head
= cur_item
= info
;
2305 cur_item
->next
= info
;
2313 /* Consider exposing this as a full fledged QMP command */
2314 static BlockStats
*qmp_query_blockstat(const BlockDriverState
*bs
, Error
**errp
)
2318 s
= g_malloc0(sizeof(*s
));
2320 if (bs
->device_name
[0]) {
2321 s
->has_device
= true;
2322 s
->device
= g_strdup(bs
->device_name
);
2325 s
->stats
= g_malloc0(sizeof(*s
->stats
));
2326 s
->stats
->rd_bytes
= bs
->nr_bytes
[BDRV_ACCT_READ
];
2327 s
->stats
->wr_bytes
= bs
->nr_bytes
[BDRV_ACCT_WRITE
];
2328 s
->stats
->rd_operations
= bs
->nr_ops
[BDRV_ACCT_READ
];
2329 s
->stats
->wr_operations
= bs
->nr_ops
[BDRV_ACCT_WRITE
];
2330 s
->stats
->wr_highest_offset
= bs
->wr_highest_sector
* BDRV_SECTOR_SIZE
;
2331 s
->stats
->flush_operations
= bs
->nr_ops
[BDRV_ACCT_FLUSH
];
2332 s
->stats
->wr_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_WRITE
];
2333 s
->stats
->rd_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_READ
];
2334 s
->stats
->flush_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_FLUSH
];
2337 s
->has_parent
= true;
2338 s
->parent
= qmp_query_blockstat(bs
->file
, NULL
);
2344 BlockStatsList
*qmp_query_blockstats(Error
**errp
)
2346 BlockStatsList
*head
= NULL
, *cur_item
= NULL
;
2347 BlockDriverState
*bs
;
2349 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2350 BlockStatsList
*info
= g_malloc0(sizeof(*info
));
2351 info
->value
= qmp_query_blockstat(bs
, NULL
);
2353 /* XXX: waiting for the qapi to support GSList */
2355 head
= cur_item
= info
;
2357 cur_item
->next
= info
;
2365 const char *bdrv_get_encrypted_filename(BlockDriverState
*bs
)
2367 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
2368 return bs
->backing_file
;
2369 else if (bs
->encrypted
)
2370 return bs
->filename
;
2375 void bdrv_get_backing_filename(BlockDriverState
*bs
,
2376 char *filename
, int filename_size
)
2378 pstrcpy(filename
, filename_size
, bs
->backing_file
);
2381 int bdrv_write_compressed(BlockDriverState
*bs
, int64_t sector_num
,
2382 const uint8_t *buf
, int nb_sectors
)
2384 BlockDriver
*drv
= bs
->drv
;
2387 if (!drv
->bdrv_write_compressed
)
2389 if (bdrv_check_request(bs
, sector_num
, nb_sectors
))
2392 if (bs
->dirty_bitmap
) {
2393 set_dirty_bitmap(bs
, sector_num
, nb_sectors
, 1);
2396 return drv
->bdrv_write_compressed(bs
, sector_num
, buf
, nb_sectors
);
2399 int bdrv_get_info(BlockDriverState
*bs
, BlockDriverInfo
*bdi
)
2401 BlockDriver
*drv
= bs
->drv
;
2404 if (!drv
->bdrv_get_info
)
2406 memset(bdi
, 0, sizeof(*bdi
));
2407 return drv
->bdrv_get_info(bs
, bdi
);
2410 int bdrv_save_vmstate(BlockDriverState
*bs
, const uint8_t *buf
,
2411 int64_t pos
, int size
)
2413 BlockDriver
*drv
= bs
->drv
;
2416 if (drv
->bdrv_save_vmstate
)
2417 return drv
->bdrv_save_vmstate(bs
, buf
, pos
, size
);
2419 return bdrv_save_vmstate(bs
->file
, buf
, pos
, size
);
2423 int bdrv_load_vmstate(BlockDriverState
*bs
, uint8_t *buf
,
2424 int64_t pos
, int size
)
2426 BlockDriver
*drv
= bs
->drv
;
2429 if (drv
->bdrv_load_vmstate
)
2430 return drv
->bdrv_load_vmstate(bs
, buf
, pos
, size
);
2432 return bdrv_load_vmstate(bs
->file
, buf
, pos
, size
);
2436 void bdrv_debug_event(BlockDriverState
*bs
, BlkDebugEvent event
)
2438 BlockDriver
*drv
= bs
->drv
;
2440 if (!drv
|| !drv
->bdrv_debug_event
) {
2444 return drv
->bdrv_debug_event(bs
, event
);
2448 /**************************************************************/
2449 /* handling of snapshots */
2451 int bdrv_can_snapshot(BlockDriverState
*bs
)
2453 BlockDriver
*drv
= bs
->drv
;
2454 if (!drv
|| !bdrv_is_inserted(bs
) || bdrv_is_read_only(bs
)) {
2458 if (!drv
->bdrv_snapshot_create
) {
2459 if (bs
->file
!= NULL
) {
2460 return bdrv_can_snapshot(bs
->file
);
2468 int bdrv_is_snapshot(BlockDriverState
*bs
)
2470 return !!(bs
->open_flags
& BDRV_O_SNAPSHOT
);
2473 BlockDriverState
*bdrv_snapshots(void)
2475 BlockDriverState
*bs
;
2478 return bs_snapshots
;
2482 while ((bs
= bdrv_next(bs
))) {
2483 if (bdrv_can_snapshot(bs
)) {
2491 int bdrv_snapshot_create(BlockDriverState
*bs
,
2492 QEMUSnapshotInfo
*sn_info
)
2494 BlockDriver
*drv
= bs
->drv
;
2497 if (drv
->bdrv_snapshot_create
)
2498 return drv
->bdrv_snapshot_create(bs
, sn_info
);
2500 return bdrv_snapshot_create(bs
->file
, sn_info
);
2504 int bdrv_snapshot_goto(BlockDriverState
*bs
,
2505 const char *snapshot_id
)
2507 BlockDriver
*drv
= bs
->drv
;
2512 if (drv
->bdrv_snapshot_goto
)
2513 return drv
->bdrv_snapshot_goto(bs
, snapshot_id
);
2516 drv
->bdrv_close(bs
);
2517 ret
= bdrv_snapshot_goto(bs
->file
, snapshot_id
);
2518 open_ret
= drv
->bdrv_open(bs
, bs
->open_flags
);
2520 bdrv_delete(bs
->file
);
2530 int bdrv_snapshot_delete(BlockDriverState
*bs
, const char *snapshot_id
)
2532 BlockDriver
*drv
= bs
->drv
;
2535 if (drv
->bdrv_snapshot_delete
)
2536 return drv
->bdrv_snapshot_delete(bs
, snapshot_id
);
2538 return bdrv_snapshot_delete(bs
->file
, snapshot_id
);
2542 int bdrv_snapshot_list(BlockDriverState
*bs
,
2543 QEMUSnapshotInfo
**psn_info
)
2545 BlockDriver
*drv
= bs
->drv
;
2548 if (drv
->bdrv_snapshot_list
)
2549 return drv
->bdrv_snapshot_list(bs
, psn_info
);
2551 return bdrv_snapshot_list(bs
->file
, psn_info
);
2555 int bdrv_snapshot_load_tmp(BlockDriverState
*bs
,
2556 const char *snapshot_name
)
2558 BlockDriver
*drv
= bs
->drv
;
2562 if (!bs
->read_only
) {
2565 if (drv
->bdrv_snapshot_load_tmp
) {
2566 return drv
->bdrv_snapshot_load_tmp(bs
, snapshot_name
);
2571 #define NB_SUFFIXES 4
2573 char *get_human_readable_size(char *buf
, int buf_size
, int64_t size
)
2575 static const char suffixes
[NB_SUFFIXES
] = "KMGT";
2580 snprintf(buf
, buf_size
, "%" PRId64
, size
);
2583 for(i
= 0; i
< NB_SUFFIXES
; i
++) {
2584 if (size
< (10 * base
)) {
2585 snprintf(buf
, buf_size
, "%0.1f%c",
2586 (double)size
/ base
,
2589 } else if (size
< (1000 * base
) || i
== (NB_SUFFIXES
- 1)) {
2590 snprintf(buf
, buf_size
, "%" PRId64
"%c",
2591 ((size
+ (base
>> 1)) / base
),
2601 char *bdrv_snapshot_dump(char *buf
, int buf_size
, QEMUSnapshotInfo
*sn
)
2603 char buf1
[128], date_buf
[128], clock_buf
[128];
2613 snprintf(buf
, buf_size
,
2614 "%-10s%-20s%7s%20s%15s",
2615 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2619 ptm
= localtime(&ti
);
2620 strftime(date_buf
, sizeof(date_buf
),
2621 "%Y-%m-%d %H:%M:%S", ptm
);
2623 localtime_r(&ti
, &tm
);
2624 strftime(date_buf
, sizeof(date_buf
),
2625 "%Y-%m-%d %H:%M:%S", &tm
);
2627 secs
= sn
->vm_clock_nsec
/ 1000000000;
2628 snprintf(clock_buf
, sizeof(clock_buf
),
2629 "%02d:%02d:%02d.%03d",
2631 (int)((secs
/ 60) % 60),
2633 (int)((sn
->vm_clock_nsec
/ 1000000) % 1000));
2634 snprintf(buf
, buf_size
,
2635 "%-10s%-20s%7s%20s%15s",
2636 sn
->id_str
, sn
->name
,
2637 get_human_readable_size(buf1
, sizeof(buf1
), sn
->vm_state_size
),
2644 /**************************************************************/
2647 BlockDriverAIOCB
*bdrv_aio_readv(BlockDriverState
*bs
, int64_t sector_num
,
2648 QEMUIOVector
*qiov
, int nb_sectors
,
2649 BlockDriverCompletionFunc
*cb
, void *opaque
)
2651 trace_bdrv_aio_readv(bs
, sector_num
, nb_sectors
, opaque
);
2653 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
,
2657 BlockDriverAIOCB
*bdrv_aio_writev(BlockDriverState
*bs
, int64_t sector_num
,
2658 QEMUIOVector
*qiov
, int nb_sectors
,
2659 BlockDriverCompletionFunc
*cb
, void *opaque
)
2661 trace_bdrv_aio_writev(bs
, sector_num
, nb_sectors
, opaque
);
2663 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
,
2668 typedef struct MultiwriteCB
{
2673 BlockDriverCompletionFunc
*cb
;
2675 QEMUIOVector
*free_qiov
;
2680 static void multiwrite_user_cb(MultiwriteCB
*mcb
)
2684 for (i
= 0; i
< mcb
->num_callbacks
; i
++) {
2685 mcb
->callbacks
[i
].cb(mcb
->callbacks
[i
].opaque
, mcb
->error
);
2686 if (mcb
->callbacks
[i
].free_qiov
) {
2687 qemu_iovec_destroy(mcb
->callbacks
[i
].free_qiov
);
2689 g_free(mcb
->callbacks
[i
].free_qiov
);
2690 qemu_vfree(mcb
->callbacks
[i
].free_buf
);
2694 static void multiwrite_cb(void *opaque
, int ret
)
2696 MultiwriteCB
*mcb
= opaque
;
2698 trace_multiwrite_cb(mcb
, ret
);
2700 if (ret
< 0 && !mcb
->error
) {
2704 mcb
->num_requests
--;
2705 if (mcb
->num_requests
== 0) {
2706 multiwrite_user_cb(mcb
);
2711 static int multiwrite_req_compare(const void *a
, const void *b
)
2713 const BlockRequest
*req1
= a
, *req2
= b
;
2716 * Note that we can't simply subtract req2->sector from req1->sector
2717 * here as that could overflow the return value.
2719 if (req1
->sector
> req2
->sector
) {
2721 } else if (req1
->sector
< req2
->sector
) {
2729 * Takes a bunch of requests and tries to merge them. Returns the number of
2730 * requests that remain after merging.
2732 static int multiwrite_merge(BlockDriverState
*bs
, BlockRequest
*reqs
,
2733 int num_reqs
, MultiwriteCB
*mcb
)
2737 // Sort requests by start sector
2738 qsort(reqs
, num_reqs
, sizeof(*reqs
), &multiwrite_req_compare
);
2740 // Check if adjacent requests touch the same clusters. If so, combine them,
2741 // filling up gaps with zero sectors.
2743 for (i
= 1; i
< num_reqs
; i
++) {
2745 int64_t oldreq_last
= reqs
[outidx
].sector
+ reqs
[outidx
].nb_sectors
;
2747 // This handles the cases that are valid for all block drivers, namely
2748 // exactly sequential writes and overlapping writes.
2749 if (reqs
[i
].sector
<= oldreq_last
) {
2753 // The block driver may decide that it makes sense to combine requests
2754 // even if there is a gap of some sectors between them. In this case,
2755 // the gap is filled with zeros (therefore only applicable for yet
2756 // unused space in format like qcow2).
2757 if (!merge
&& bs
->drv
->bdrv_merge_requests
) {
2758 merge
= bs
->drv
->bdrv_merge_requests(bs
, &reqs
[outidx
], &reqs
[i
]);
2761 if (reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1 > IOV_MAX
) {
2767 QEMUIOVector
*qiov
= g_malloc0(sizeof(*qiov
));
2768 qemu_iovec_init(qiov
,
2769 reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1);
2771 // Add the first request to the merged one. If the requests are
2772 // overlapping, drop the last sectors of the first request.
2773 size
= (reqs
[i
].sector
- reqs
[outidx
].sector
) << 9;
2774 qemu_iovec_concat(qiov
, reqs
[outidx
].qiov
, size
);
2776 // We might need to add some zeros between the two requests
2777 if (reqs
[i
].sector
> oldreq_last
) {
2778 size_t zero_bytes
= (reqs
[i
].sector
- oldreq_last
) << 9;
2779 uint8_t *buf
= qemu_blockalign(bs
, zero_bytes
);
2780 memset(buf
, 0, zero_bytes
);
2781 qemu_iovec_add(qiov
, buf
, zero_bytes
);
2782 mcb
->callbacks
[i
].free_buf
= buf
;
2785 // Add the second request
2786 qemu_iovec_concat(qiov
, reqs
[i
].qiov
, reqs
[i
].qiov
->size
);
2788 reqs
[outidx
].nb_sectors
= qiov
->size
>> 9;
2789 reqs
[outidx
].qiov
= qiov
;
2791 mcb
->callbacks
[i
].free_qiov
= reqs
[outidx
].qiov
;
2794 reqs
[outidx
].sector
= reqs
[i
].sector
;
2795 reqs
[outidx
].nb_sectors
= reqs
[i
].nb_sectors
;
2796 reqs
[outidx
].qiov
= reqs
[i
].qiov
;
2804 * Submit multiple AIO write requests at once.
2806 * On success, the function returns 0 and all requests in the reqs array have
2807 * been submitted. In error case this function returns -1, and any of the
2808 * requests may or may not be submitted yet. In particular, this means that the
2809 * callback will be called for some of the requests, for others it won't. The
2810 * caller must check the error field of the BlockRequest to wait for the right
2811 * callbacks (if error != 0, no callback will be called).
2813 * The implementation may modify the contents of the reqs array, e.g. to merge
2814 * requests. However, the fields opaque and error are left unmodified as they
2815 * are used to signal failure for a single request to the caller.
2817 int bdrv_aio_multiwrite(BlockDriverState
*bs
, BlockRequest
*reqs
, int num_reqs
)
2822 /* don't submit writes if we don't have a medium */
2823 if (bs
->drv
== NULL
) {
2824 for (i
= 0; i
< num_reqs
; i
++) {
2825 reqs
[i
].error
= -ENOMEDIUM
;
2830 if (num_reqs
== 0) {
2834 // Create MultiwriteCB structure
2835 mcb
= g_malloc0(sizeof(*mcb
) + num_reqs
* sizeof(*mcb
->callbacks
));
2836 mcb
->num_requests
= 0;
2837 mcb
->num_callbacks
= num_reqs
;
2839 for (i
= 0; i
< num_reqs
; i
++) {
2840 mcb
->callbacks
[i
].cb
= reqs
[i
].cb
;
2841 mcb
->callbacks
[i
].opaque
= reqs
[i
].opaque
;
2844 // Check for mergable requests
2845 num_reqs
= multiwrite_merge(bs
, reqs
, num_reqs
, mcb
);
2847 trace_bdrv_aio_multiwrite(mcb
, mcb
->num_callbacks
, num_reqs
);
2849 /* Run the aio requests. */
2850 mcb
->num_requests
= num_reqs
;
2851 for (i
= 0; i
< num_reqs
; i
++) {
2852 bdrv_aio_writev(bs
, reqs
[i
].sector
, reqs
[i
].qiov
,
2853 reqs
[i
].nb_sectors
, multiwrite_cb
, mcb
);
2859 void bdrv_aio_cancel(BlockDriverAIOCB
*acb
)
2861 acb
->pool
->cancel(acb
);
2864 /* block I/O throttling */
2865 static bool bdrv_exceed_bps_limits(BlockDriverState
*bs
, int nb_sectors
,
2866 bool is_write
, double elapsed_time
, uint64_t *wait
)
2868 uint64_t bps_limit
= 0;
2869 double bytes_limit
, bytes_base
, bytes_res
;
2870 double slice_time
, wait_time
;
2872 if (bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
]) {
2873 bps_limit
= bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
];
2874 } else if (bs
->io_limits
.bps
[is_write
]) {
2875 bps_limit
= bs
->io_limits
.bps
[is_write
];
2884 slice_time
= bs
->slice_end
- bs
->slice_start
;
2885 slice_time
/= (NANOSECONDS_PER_SECOND
);
2886 bytes_limit
= bps_limit
* slice_time
;
2887 bytes_base
= bs
->nr_bytes
[is_write
] - bs
->io_base
.bytes
[is_write
];
2888 if (bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
]) {
2889 bytes_base
+= bs
->nr_bytes
[!is_write
] - bs
->io_base
.bytes
[!is_write
];
2892 /* bytes_base: the bytes of data which have been read/written; and
2893 * it is obtained from the history statistic info.
2894 * bytes_res: the remaining bytes of data which need to be read/written.
2895 * (bytes_base + bytes_res) / bps_limit: used to calcuate
2896 * the total time for completing reading/writting all data.
2898 bytes_res
= (unsigned) nb_sectors
* BDRV_SECTOR_SIZE
;
2900 if (bytes_base
+ bytes_res
<= bytes_limit
) {
2908 /* Calc approx time to dispatch */
2909 wait_time
= (bytes_base
+ bytes_res
) / bps_limit
- elapsed_time
;
2911 /* When the I/O rate at runtime exceeds the limits,
2912 * bs->slice_end need to be extended in order that the current statistic
2913 * info can be kept until the timer fire, so it is increased and tuned
2914 * based on the result of experiment.
2916 bs
->slice_time
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
2917 bs
->slice_end
+= bs
->slice_time
- 3 * BLOCK_IO_SLICE_TIME
;
2919 *wait
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
2925 static bool bdrv_exceed_iops_limits(BlockDriverState
*bs
, bool is_write
,
2926 double elapsed_time
, uint64_t *wait
)
2928 uint64_t iops_limit
= 0;
2929 double ios_limit
, ios_base
;
2930 double slice_time
, wait_time
;
2932 if (bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
]) {
2933 iops_limit
= bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
];
2934 } else if (bs
->io_limits
.iops
[is_write
]) {
2935 iops_limit
= bs
->io_limits
.iops
[is_write
];
2944 slice_time
= bs
->slice_end
- bs
->slice_start
;
2945 slice_time
/= (NANOSECONDS_PER_SECOND
);
2946 ios_limit
= iops_limit
* slice_time
;
2947 ios_base
= bs
->nr_ops
[is_write
] - bs
->io_base
.ios
[is_write
];
2948 if (bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
]) {
2949 ios_base
+= bs
->nr_ops
[!is_write
] - bs
->io_base
.ios
[!is_write
];
2952 if (ios_base
+ 1 <= ios_limit
) {
2960 /* Calc approx time to dispatch */
2961 wait_time
= (ios_base
+ 1) / iops_limit
;
2962 if (wait_time
> elapsed_time
) {
2963 wait_time
= wait_time
- elapsed_time
;
2968 bs
->slice_time
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
2969 bs
->slice_end
+= bs
->slice_time
- 3 * BLOCK_IO_SLICE_TIME
;
2971 *wait
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
2977 static bool bdrv_exceed_io_limits(BlockDriverState
*bs
, int nb_sectors
,
2978 bool is_write
, int64_t *wait
)
2980 int64_t now
, max_wait
;
2981 uint64_t bps_wait
= 0, iops_wait
= 0;
2982 double elapsed_time
;
2983 int bps_ret
, iops_ret
;
2985 now
= qemu_get_clock_ns(vm_clock
);
2986 if ((bs
->slice_start
< now
)
2987 && (bs
->slice_end
> now
)) {
2988 bs
->slice_end
= now
+ bs
->slice_time
;
2990 bs
->slice_time
= 5 * BLOCK_IO_SLICE_TIME
;
2991 bs
->slice_start
= now
;
2992 bs
->slice_end
= now
+ bs
->slice_time
;
2994 bs
->io_base
.bytes
[is_write
] = bs
->nr_bytes
[is_write
];
2995 bs
->io_base
.bytes
[!is_write
] = bs
->nr_bytes
[!is_write
];
2997 bs
->io_base
.ios
[is_write
] = bs
->nr_ops
[is_write
];
2998 bs
->io_base
.ios
[!is_write
] = bs
->nr_ops
[!is_write
];
3001 elapsed_time
= now
- bs
->slice_start
;
3002 elapsed_time
/= (NANOSECONDS_PER_SECOND
);
3004 bps_ret
= bdrv_exceed_bps_limits(bs
, nb_sectors
,
3005 is_write
, elapsed_time
, &bps_wait
);
3006 iops_ret
= bdrv_exceed_iops_limits(bs
, is_write
,
3007 elapsed_time
, &iops_wait
);
3008 if (bps_ret
|| iops_ret
) {
3009 max_wait
= bps_wait
> iops_wait
? bps_wait
: iops_wait
;
3014 now
= qemu_get_clock_ns(vm_clock
);
3015 if (bs
->slice_end
< now
+ max_wait
) {
3016 bs
->slice_end
= now
+ max_wait
;
3029 /**************************************************************/
3030 /* async block device emulation */
3032 typedef struct BlockDriverAIOCBSync
{
3033 BlockDriverAIOCB common
;
3036 /* vector translation state */
3040 } BlockDriverAIOCBSync
;
3042 static void bdrv_aio_cancel_em(BlockDriverAIOCB
*blockacb
)
3044 BlockDriverAIOCBSync
*acb
=
3045 container_of(blockacb
, BlockDriverAIOCBSync
, common
);
3046 qemu_bh_delete(acb
->bh
);
3048 qemu_aio_release(acb
);
3051 static AIOPool bdrv_em_aio_pool
= {
3052 .aiocb_size
= sizeof(BlockDriverAIOCBSync
),
3053 .cancel
= bdrv_aio_cancel_em
,
3056 static void bdrv_aio_bh_cb(void *opaque
)
3058 BlockDriverAIOCBSync
*acb
= opaque
;
3061 qemu_iovec_from_buffer(acb
->qiov
, acb
->bounce
, acb
->qiov
->size
);
3062 qemu_vfree(acb
->bounce
);
3063 acb
->common
.cb(acb
->common
.opaque
, acb
->ret
);
3064 qemu_bh_delete(acb
->bh
);
3066 qemu_aio_release(acb
);
3069 static BlockDriverAIOCB
*bdrv_aio_rw_vector(BlockDriverState
*bs
,
3073 BlockDriverCompletionFunc
*cb
,
3078 BlockDriverAIOCBSync
*acb
;
3080 acb
= qemu_aio_get(&bdrv_em_aio_pool
, bs
, cb
, opaque
);
3081 acb
->is_write
= is_write
;
3083 acb
->bounce
= qemu_blockalign(bs
, qiov
->size
);
3084 acb
->bh
= qemu_bh_new(bdrv_aio_bh_cb
, acb
);
3087 qemu_iovec_to_buffer(acb
->qiov
, acb
->bounce
);
3088 acb
->ret
= bs
->drv
->bdrv_write(bs
, sector_num
, acb
->bounce
, nb_sectors
);
3090 acb
->ret
= bs
->drv
->bdrv_read(bs
, sector_num
, acb
->bounce
, nb_sectors
);
3093 qemu_bh_schedule(acb
->bh
);
3095 return &acb
->common
;
3098 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
3099 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
3100 BlockDriverCompletionFunc
*cb
, void *opaque
)
3102 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 0);
3105 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
3106 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
3107 BlockDriverCompletionFunc
*cb
, void *opaque
)
3109 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 1);
3113 typedef struct BlockDriverAIOCBCoroutine
{
3114 BlockDriverAIOCB common
;
3118 } BlockDriverAIOCBCoroutine
;
3120 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB
*blockacb
)
3125 static AIOPool bdrv_em_co_aio_pool
= {
3126 .aiocb_size
= sizeof(BlockDriverAIOCBCoroutine
),
3127 .cancel
= bdrv_aio_co_cancel_em
,
3130 static void bdrv_co_em_bh(void *opaque
)
3132 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3134 acb
->common
.cb(acb
->common
.opaque
, acb
->req
.error
);
3135 qemu_bh_delete(acb
->bh
);
3136 qemu_aio_release(acb
);
3139 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3140 static void coroutine_fn
bdrv_co_do_rw(void *opaque
)
3142 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3143 BlockDriverState
*bs
= acb
->common
.bs
;
3145 if (!acb
->is_write
) {
3146 acb
->req
.error
= bdrv_co_do_readv(bs
, acb
->req
.sector
,
3147 acb
->req
.nb_sectors
, acb
->req
.qiov
);
3149 acb
->req
.error
= bdrv_co_do_writev(bs
, acb
->req
.sector
,
3150 acb
->req
.nb_sectors
, acb
->req
.qiov
);
3153 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
3154 qemu_bh_schedule(acb
->bh
);
3157 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
3161 BlockDriverCompletionFunc
*cb
,
3166 BlockDriverAIOCBCoroutine
*acb
;
3168 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
3169 acb
->req
.sector
= sector_num
;
3170 acb
->req
.nb_sectors
= nb_sectors
;
3171 acb
->req
.qiov
= qiov
;
3172 acb
->is_write
= is_write
;
3174 co
= qemu_coroutine_create(bdrv_co_do_rw
);
3175 qemu_coroutine_enter(co
, acb
);
3177 return &acb
->common
;
3180 static void coroutine_fn
bdrv_aio_flush_co_entry(void *opaque
)
3182 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3183 BlockDriverState
*bs
= acb
->common
.bs
;
3185 acb
->req
.error
= bdrv_co_flush(bs
);
3186 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
3187 qemu_bh_schedule(acb
->bh
);
3190 BlockDriverAIOCB
*bdrv_aio_flush(BlockDriverState
*bs
,
3191 BlockDriverCompletionFunc
*cb
, void *opaque
)
3193 trace_bdrv_aio_flush(bs
, opaque
);
3196 BlockDriverAIOCBCoroutine
*acb
;
3198 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
3199 co
= qemu_coroutine_create(bdrv_aio_flush_co_entry
);
3200 qemu_coroutine_enter(co
, acb
);
3202 return &acb
->common
;
3205 static void coroutine_fn
bdrv_aio_discard_co_entry(void *opaque
)
3207 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3208 BlockDriverState
*bs
= acb
->common
.bs
;
3210 acb
->req
.error
= bdrv_co_discard(bs
, acb
->req
.sector
, acb
->req
.nb_sectors
);
3211 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
3212 qemu_bh_schedule(acb
->bh
);
3215 BlockDriverAIOCB
*bdrv_aio_discard(BlockDriverState
*bs
,
3216 int64_t sector_num
, int nb_sectors
,
3217 BlockDriverCompletionFunc
*cb
, void *opaque
)
3220 BlockDriverAIOCBCoroutine
*acb
;
3222 trace_bdrv_aio_discard(bs
, sector_num
, nb_sectors
, opaque
);
3224 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
3225 acb
->req
.sector
= sector_num
;
3226 acb
->req
.nb_sectors
= nb_sectors
;
3227 co
= qemu_coroutine_create(bdrv_aio_discard_co_entry
);
3228 qemu_coroutine_enter(co
, acb
);
3230 return &acb
->common
;
3233 void bdrv_init(void)
3235 module_call_init(MODULE_INIT_BLOCK
);
3238 void bdrv_init_with_whitelist(void)
3240 use_bdrv_whitelist
= 1;
3244 void *qemu_aio_get(AIOPool
*pool
, BlockDriverState
*bs
,
3245 BlockDriverCompletionFunc
*cb
, void *opaque
)
3247 BlockDriverAIOCB
*acb
;
3249 if (pool
->free_aiocb
) {
3250 acb
= pool
->free_aiocb
;
3251 pool
->free_aiocb
= acb
->next
;
3253 acb
= g_malloc0(pool
->aiocb_size
);
3258 acb
->opaque
= opaque
;
3262 void qemu_aio_release(void *p
)
3264 BlockDriverAIOCB
*acb
= (BlockDriverAIOCB
*)p
;
3265 AIOPool
*pool
= acb
->pool
;
3266 acb
->next
= pool
->free_aiocb
;
3267 pool
->free_aiocb
= acb
;
3270 /**************************************************************/
3271 /* Coroutine block device emulation */
3273 typedef struct CoroutineIOCompletion
{
3274 Coroutine
*coroutine
;
3276 } CoroutineIOCompletion
;
3278 static void bdrv_co_io_em_complete(void *opaque
, int ret
)
3280 CoroutineIOCompletion
*co
= opaque
;
3283 qemu_coroutine_enter(co
->coroutine
, NULL
);
3286 static int coroutine_fn
bdrv_co_io_em(BlockDriverState
*bs
, int64_t sector_num
,
3287 int nb_sectors
, QEMUIOVector
*iov
,
3290 CoroutineIOCompletion co
= {
3291 .coroutine
= qemu_coroutine_self(),
3293 BlockDriverAIOCB
*acb
;
3296 acb
= bs
->drv
->bdrv_aio_writev(bs
, sector_num
, iov
, nb_sectors
,
3297 bdrv_co_io_em_complete
, &co
);
3299 acb
= bs
->drv
->bdrv_aio_readv(bs
, sector_num
, iov
, nb_sectors
,
3300 bdrv_co_io_em_complete
, &co
);
3303 trace_bdrv_co_io_em(bs
, sector_num
, nb_sectors
, is_write
, acb
);
3307 qemu_coroutine_yield();
3312 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
3313 int64_t sector_num
, int nb_sectors
,
3316 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, false);
3319 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
3320 int64_t sector_num
, int nb_sectors
,
3323 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, true);
3326 static void coroutine_fn
bdrv_flush_co_entry(void *opaque
)
3328 RwCo
*rwco
= opaque
;
3330 rwco
->ret
= bdrv_co_flush(rwco
->bs
);
3333 int coroutine_fn
bdrv_co_flush(BlockDriverState
*bs
)
3341 /* Write back cached data to the OS even with cache=unsafe */
3342 if (bs
->drv
->bdrv_co_flush_to_os
) {
3343 ret
= bs
->drv
->bdrv_co_flush_to_os(bs
);
3349 /* But don't actually force it to the disk with cache=unsafe */
3350 if (bs
->open_flags
& BDRV_O_NO_FLUSH
) {
3354 if (bs
->drv
->bdrv_co_flush_to_disk
) {
3355 return bs
->drv
->bdrv_co_flush_to_disk(bs
);
3356 } else if (bs
->drv
->bdrv_aio_flush
) {
3357 BlockDriverAIOCB
*acb
;
3358 CoroutineIOCompletion co
= {
3359 .coroutine
= qemu_coroutine_self(),
3362 acb
= bs
->drv
->bdrv_aio_flush(bs
, bdrv_co_io_em_complete
, &co
);
3366 qemu_coroutine_yield();
3371 * Some block drivers always operate in either writethrough or unsafe
3372 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3373 * know how the server works (because the behaviour is hardcoded or
3374 * depends on server-side configuration), so we can't ensure that
3375 * everything is safe on disk. Returning an error doesn't work because
3376 * that would break guests even if the server operates in writethrough
3379 * Let's hope the user knows what he's doing.
3385 void bdrv_invalidate_cache(BlockDriverState
*bs
)
3387 if (bs
->drv
&& bs
->drv
->bdrv_invalidate_cache
) {
3388 bs
->drv
->bdrv_invalidate_cache(bs
);
3392 void bdrv_invalidate_cache_all(void)
3394 BlockDriverState
*bs
;
3396 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
3397 bdrv_invalidate_cache(bs
);
3401 int bdrv_flush(BlockDriverState
*bs
)
3409 if (qemu_in_coroutine()) {
3410 /* Fast-path if already in coroutine context */
3411 bdrv_flush_co_entry(&rwco
);
3413 co
= qemu_coroutine_create(bdrv_flush_co_entry
);
3414 qemu_coroutine_enter(co
, &rwco
);
3415 while (rwco
.ret
== NOT_DONE
) {
3423 static void coroutine_fn
bdrv_discard_co_entry(void *opaque
)
3425 RwCo
*rwco
= opaque
;
3427 rwco
->ret
= bdrv_co_discard(rwco
->bs
, rwco
->sector_num
, rwco
->nb_sectors
);
3430 int coroutine_fn
bdrv_co_discard(BlockDriverState
*bs
, int64_t sector_num
,
3435 } else if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
3437 } else if (bs
->read_only
) {
3439 } else if (bs
->drv
->bdrv_co_discard
) {
3440 return bs
->drv
->bdrv_co_discard(bs
, sector_num
, nb_sectors
);
3441 } else if (bs
->drv
->bdrv_aio_discard
) {
3442 BlockDriverAIOCB
*acb
;
3443 CoroutineIOCompletion co
= {
3444 .coroutine
= qemu_coroutine_self(),
3447 acb
= bs
->drv
->bdrv_aio_discard(bs
, sector_num
, nb_sectors
,
3448 bdrv_co_io_em_complete
, &co
);
3452 qemu_coroutine_yield();
3460 int bdrv_discard(BlockDriverState
*bs
, int64_t sector_num
, int nb_sectors
)
3465 .sector_num
= sector_num
,
3466 .nb_sectors
= nb_sectors
,
3470 if (qemu_in_coroutine()) {
3471 /* Fast-path if already in coroutine context */
3472 bdrv_discard_co_entry(&rwco
);
3474 co
= qemu_coroutine_create(bdrv_discard_co_entry
);
3475 qemu_coroutine_enter(co
, &rwco
);
3476 while (rwco
.ret
== NOT_DONE
) {
3484 /**************************************************************/
3485 /* removable device support */
3488 * Return TRUE if the media is present
3490 int bdrv_is_inserted(BlockDriverState
*bs
)
3492 BlockDriver
*drv
= bs
->drv
;
3496 if (!drv
->bdrv_is_inserted
)
3498 return drv
->bdrv_is_inserted(bs
);
3502 * Return whether the media changed since the last call to this
3503 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3505 int bdrv_media_changed(BlockDriverState
*bs
)
3507 BlockDriver
*drv
= bs
->drv
;
3509 if (drv
&& drv
->bdrv_media_changed
) {
3510 return drv
->bdrv_media_changed(bs
);
3516 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3518 void bdrv_eject(BlockDriverState
*bs
, int eject_flag
)
3520 BlockDriver
*drv
= bs
->drv
;
3522 if (drv
&& drv
->bdrv_eject
) {
3523 drv
->bdrv_eject(bs
, eject_flag
);
3528 * Lock or unlock the media (if it is locked, the user won't be able
3529 * to eject it manually).
3531 void bdrv_lock_medium(BlockDriverState
*bs
, bool locked
)
3533 BlockDriver
*drv
= bs
->drv
;
3535 trace_bdrv_lock_medium(bs
, locked
);
3537 if (drv
&& drv
->bdrv_lock_medium
) {
3538 drv
->bdrv_lock_medium(bs
, locked
);
3542 /* needed for generic scsi interface */
3544 int bdrv_ioctl(BlockDriverState
*bs
, unsigned long int req
, void *buf
)
3546 BlockDriver
*drv
= bs
->drv
;
3548 if (drv
&& drv
->bdrv_ioctl
)
3549 return drv
->bdrv_ioctl(bs
, req
, buf
);
3553 BlockDriverAIOCB
*bdrv_aio_ioctl(BlockDriverState
*bs
,
3554 unsigned long int req
, void *buf
,
3555 BlockDriverCompletionFunc
*cb
, void *opaque
)
3557 BlockDriver
*drv
= bs
->drv
;
3559 if (drv
&& drv
->bdrv_aio_ioctl
)
3560 return drv
->bdrv_aio_ioctl(bs
, req
, buf
, cb
, opaque
);
3564 void bdrv_set_buffer_alignment(BlockDriverState
*bs
, int align
)
3566 bs
->buffer_alignment
= align
;
3569 void *qemu_blockalign(BlockDriverState
*bs
, size_t size
)
3571 return qemu_memalign((bs
&& bs
->buffer_alignment
) ? bs
->buffer_alignment
: 512, size
);
3574 void bdrv_set_dirty_tracking(BlockDriverState
*bs
, int enable
)
3576 int64_t bitmap_size
;
3578 bs
->dirty_count
= 0;
3580 if (!bs
->dirty_bitmap
) {
3581 bitmap_size
= (bdrv_getlength(bs
) >> BDRV_SECTOR_BITS
) +
3582 BDRV_SECTORS_PER_DIRTY_CHUNK
* 8 - 1;
3583 bitmap_size
/= BDRV_SECTORS_PER_DIRTY_CHUNK
* 8;
3585 bs
->dirty_bitmap
= g_malloc0(bitmap_size
);
3588 if (bs
->dirty_bitmap
) {
3589 g_free(bs
->dirty_bitmap
);
3590 bs
->dirty_bitmap
= NULL
;
3595 int bdrv_get_dirty(BlockDriverState
*bs
, int64_t sector
)
3597 int64_t chunk
= sector
/ (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK
;
3599 if (bs
->dirty_bitmap
&&
3600 (sector
<< BDRV_SECTOR_BITS
) < bdrv_getlength(bs
)) {
3601 return !!(bs
->dirty_bitmap
[chunk
/ (sizeof(unsigned long) * 8)] &
3602 (1UL << (chunk
% (sizeof(unsigned long) * 8))));
3608 void bdrv_reset_dirty(BlockDriverState
*bs
, int64_t cur_sector
,
3611 set_dirty_bitmap(bs
, cur_sector
, nr_sectors
, 0);
3614 int64_t bdrv_get_dirty_count(BlockDriverState
*bs
)
3616 return bs
->dirty_count
;
3619 void bdrv_set_in_use(BlockDriverState
*bs
, int in_use
)
3621 assert(bs
->in_use
!= in_use
);
3622 bs
->in_use
= in_use
;
3625 int bdrv_in_use(BlockDriverState
*bs
)
3630 void bdrv_iostatus_enable(BlockDriverState
*bs
)
3632 bs
->iostatus_enabled
= true;
3633 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
3636 /* The I/O status is only enabled if the drive explicitly
3637 * enables it _and_ the VM is configured to stop on errors */
3638 bool bdrv_iostatus_is_enabled(const BlockDriverState
*bs
)
3640 return (bs
->iostatus_enabled
&&
3641 (bs
->on_write_error
== BLOCK_ERR_STOP_ENOSPC
||
3642 bs
->on_write_error
== BLOCK_ERR_STOP_ANY
||
3643 bs
->on_read_error
== BLOCK_ERR_STOP_ANY
));
3646 void bdrv_iostatus_disable(BlockDriverState
*bs
)
3648 bs
->iostatus_enabled
= false;
3651 void bdrv_iostatus_reset(BlockDriverState
*bs
)
3653 if (bdrv_iostatus_is_enabled(bs
)) {
3654 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
3658 /* XXX: Today this is set by device models because it makes the implementation
3659 quite simple. However, the block layer knows about the error, so it's
3660 possible to implement this without device models being involved */
3661 void bdrv_iostatus_set_err(BlockDriverState
*bs
, int error
)
3663 if (bdrv_iostatus_is_enabled(bs
) &&
3664 bs
->iostatus
== BLOCK_DEVICE_IO_STATUS_OK
) {
3666 bs
->iostatus
= error
== ENOSPC
? BLOCK_DEVICE_IO_STATUS_NOSPACE
:
3667 BLOCK_DEVICE_IO_STATUS_FAILED
;
3672 bdrv_acct_start(BlockDriverState
*bs
, BlockAcctCookie
*cookie
, int64_t bytes
,
3673 enum BlockAcctType type
)
3675 assert(type
< BDRV_MAX_IOTYPE
);
3677 cookie
->bytes
= bytes
;
3678 cookie
->start_time_ns
= get_clock();
3679 cookie
->type
= type
;
3683 bdrv_acct_done(BlockDriverState
*bs
, BlockAcctCookie
*cookie
)
3685 assert(cookie
->type
< BDRV_MAX_IOTYPE
);
3687 bs
->nr_bytes
[cookie
->type
] += cookie
->bytes
;
3688 bs
->nr_ops
[cookie
->type
]++;
3689 bs
->total_time_ns
[cookie
->type
] += get_clock() - cookie
->start_time_ns
;
3692 int bdrv_img_create(const char *filename
, const char *fmt
,
3693 const char *base_filename
, const char *base_fmt
,
3694 char *options
, uint64_t img_size
, int flags
)
3696 QEMUOptionParameter
*param
= NULL
, *create_options
= NULL
;
3697 QEMUOptionParameter
*backing_fmt
, *backing_file
, *size
;
3698 BlockDriverState
*bs
= NULL
;
3699 BlockDriver
*drv
, *proto_drv
;
3700 BlockDriver
*backing_drv
= NULL
;
3703 /* Find driver and parse its options */
3704 drv
= bdrv_find_format(fmt
);
3706 error_report("Unknown file format '%s'", fmt
);
3711 proto_drv
= bdrv_find_protocol(filename
);
3713 error_report("Unknown protocol '%s'", filename
);
3718 create_options
= append_option_parameters(create_options
,
3719 drv
->create_options
);
3720 create_options
= append_option_parameters(create_options
,
3721 proto_drv
->create_options
);
3723 /* Create parameter list with default values */
3724 param
= parse_option_parameters("", create_options
, param
);
3726 set_option_parameter_int(param
, BLOCK_OPT_SIZE
, img_size
);
3728 /* Parse -o options */
3730 param
= parse_option_parameters(options
, create_options
, param
);
3731 if (param
== NULL
) {
3732 error_report("Invalid options for file format '%s'.", fmt
);
3738 if (base_filename
) {
3739 if (set_option_parameter(param
, BLOCK_OPT_BACKING_FILE
,
3741 error_report("Backing file not supported for file format '%s'",
3749 if (set_option_parameter(param
, BLOCK_OPT_BACKING_FMT
, base_fmt
)) {
3750 error_report("Backing file format not supported for file "
3751 "format '%s'", fmt
);
3757 backing_file
= get_option_parameter(param
, BLOCK_OPT_BACKING_FILE
);
3758 if (backing_file
&& backing_file
->value
.s
) {
3759 if (!strcmp(filename
, backing_file
->value
.s
)) {
3760 error_report("Error: Trying to create an image with the "
3761 "same filename as the backing file");
3767 backing_fmt
= get_option_parameter(param
, BLOCK_OPT_BACKING_FMT
);
3768 if (backing_fmt
&& backing_fmt
->value
.s
) {
3769 backing_drv
= bdrv_find_format(backing_fmt
->value
.s
);
3771 error_report("Unknown backing file format '%s'",
3772 backing_fmt
->value
.s
);
3778 // The size for the image must always be specified, with one exception:
3779 // If we are using a backing file, we can obtain the size from there
3780 size
= get_option_parameter(param
, BLOCK_OPT_SIZE
);
3781 if (size
&& size
->value
.n
== -1) {
3782 if (backing_file
&& backing_file
->value
.s
) {
3788 ret
= bdrv_open(bs
, backing_file
->value
.s
, flags
, backing_drv
);
3790 error_report("Could not open '%s'", backing_file
->value
.s
);
3793 bdrv_get_geometry(bs
, &size
);
3796 snprintf(buf
, sizeof(buf
), "%" PRId64
, size
);
3797 set_option_parameter(param
, BLOCK_OPT_SIZE
, buf
);
3799 error_report("Image creation needs a size parameter");
3805 printf("Formatting '%s', fmt=%s ", filename
, fmt
);
3806 print_option_parameters(param
);
3809 ret
= bdrv_create(drv
, filename
, param
);
3812 if (ret
== -ENOTSUP
) {
3813 error_report("Formatting or formatting option not supported for "
3814 "file format '%s'", fmt
);
3815 } else if (ret
== -EFBIG
) {
3816 error_report("The image size is too large for file format '%s'",
3819 error_report("%s: error while creating %s: %s", filename
, fmt
,
3825 free_option_parameters(create_options
);
3826 free_option_parameters(param
);