]>
Commit | Line | Data |
---|---|---|
ea2384d3 FB |
1 | /* |
2 | * QEMU System Emulator block driver | |
5fafdf24 | 3 | * |
ea2384d3 | 4 | * Copyright (c) 2003 Fabrice Bellard |
5fafdf24 | 5 | * |
ea2384d3 FB |
6 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | * of this software and associated documentation files (the "Software"), to deal | |
8 | * in the Software without restriction, including without limitation the rights | |
9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
10 | * copies of the Software, and to permit persons to whom the Software is | |
11 | * furnished to do so, subject to the following conditions: | |
12 | * | |
13 | * The above copyright notice and this permission notice shall be included in | |
14 | * all copies or substantial portions of the Software. | |
15 | * | |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
22 | * THE SOFTWARE. | |
23 | */ | |
24 | #ifndef BLOCK_INT_H | |
25 | #define BLOCK_INT_H | |
26 | ||
5e5a94b6 | 27 | #include "block/accounting.h" |
737e150e | 28 | #include "block/block.h" |
1de7afc9 PB |
29 | #include "qemu/option.h" |
30 | #include "qemu/queue.h" | |
10817bf0 | 31 | #include "qemu/coroutine.h" |
1de7afc9 | 32 | #include "qemu/timer.h" |
b2023818 | 33 | #include "qapi-types.h" |
8f0720ec | 34 | #include "qemu/hbitmap.h" |
f364ec65 | 35 | #include "block/snapshot.h" |
6a1751b7 | 36 | #include "qemu/main-loop.h" |
cc0681c4 | 37 | #include "qemu/throttle.h" |
faf07963 | 38 | |
bfe8043e | 39 | #define BLOCK_FLAG_ENCRYPT 1 |
bfe8043e | 40 | #define BLOCK_FLAG_LAZY_REFCOUNTS 8 |
ec36ba14 | 41 | |
bfe8043e SH |
42 | #define BLOCK_OPT_SIZE "size" |
43 | #define BLOCK_OPT_ENCRYPT "encryption" | |
44 | #define BLOCK_OPT_COMPAT6 "compat6" | |
f249924e | 45 | #define BLOCK_OPT_HWVERSION "hwversion" |
bfe8043e SH |
46 | #define BLOCK_OPT_BACKING_FILE "backing_file" |
47 | #define BLOCK_OPT_BACKING_FMT "backing_fmt" | |
48 | #define BLOCK_OPT_CLUSTER_SIZE "cluster_size" | |
49 | #define BLOCK_OPT_TABLE_SIZE "table_size" | |
50 | #define BLOCK_OPT_PREALLOC "preallocation" | |
51 | #define BLOCK_OPT_SUBFMT "subformat" | |
52 | #define BLOCK_OPT_COMPAT_LEVEL "compat" | |
53 | #define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts" | |
7f2039f6 | 54 | #define BLOCK_OPT_ADAPTER_TYPE "adapter_type" |
b3af018f | 55 | #define BLOCK_OPT_REDUNDANCY "redundancy" |
4ab15590 | 56 | #define BLOCK_OPT_NOCOW "nocow" |
876eb1b0 | 57 | #define BLOCK_OPT_OBJECT_SIZE "object_size" |
06d05fa7 | 58 | #define BLOCK_OPT_REFCOUNT_BITS "refcount_bits" |
0e7e1989 | 59 | |
7cddd372 KW |
60 | #define BLOCK_PROBE_BUF_SIZE 512 |
61 | ||
ebde595c FZ |
62 | enum BdrvTrackedRequestType { |
63 | BDRV_TRACKED_READ, | |
64 | BDRV_TRACKED_WRITE, | |
ebde595c FZ |
65 | BDRV_TRACKED_DISCARD, |
66 | }; | |
67 | ||
d616b224 SH |
68 | typedef struct BdrvTrackedRequest { |
69 | BlockDriverState *bs; | |
793ed47a KW |
70 | int64_t offset; |
71 | unsigned int bytes; | |
ebde595c | 72 | enum BdrvTrackedRequestType type; |
7327145f | 73 | |
2dbafdc0 | 74 | bool serialising; |
7327145f KW |
75 | int64_t overlap_offset; |
76 | unsigned int overlap_bytes; | |
77 | ||
d616b224 SH |
78 | QLIST_ENTRY(BdrvTrackedRequest) list; |
79 | Coroutine *co; /* owner, used for deadlock detection */ | |
80 | CoQueue wait_queue; /* coroutines blocked on this request */ | |
6460440f KW |
81 | |
82 | struct BdrvTrackedRequest *waiting_for; | |
d616b224 SH |
83 | } BdrvTrackedRequest; |
84 | ||
ea2384d3 FB |
85 | struct BlockDriver { |
86 | const char *format_name; | |
87 | int instance_size; | |
f6186f49 | 88 | |
b5042a36 BC |
89 | /* set to true if the BlockDriver is a block filter */ |
90 | bool is_filter; | |
91 | /* for snapshots block filter like Quorum can implement the | |
92 | * following recursive callback. | |
212a5a8f BC |
93 | * It's purpose is to recurse on the filter children while calling |
94 | * bdrv_recurse_is_first_non_filter on them. | |
95 | * For a sample implementation look in the future Quorum block filter. | |
f6186f49 | 96 | */ |
212a5a8f BC |
97 | bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs, |
98 | BlockDriverState *candidate); | |
f6186f49 | 99 | |
ea2384d3 | 100 | int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); |
508c7cb3 | 101 | int (*bdrv_probe_device)(const char *filename); |
c2ad1b0c KW |
102 | |
103 | /* Any driver implementing this callback is expected to be able to handle | |
104 | * NULL file names in its .bdrv_open() implementation */ | |
6963a30d | 105 | void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp); |
030be321 BC |
106 | /* Drivers not implementing bdrv_parse_filename nor bdrv_open should have |
107 | * this field set to true, except ones that are defined only by their | |
108 | * child's bs. | |
109 | * An example of the last type will be the quorum block driver. | |
110 | */ | |
111 | bool bdrv_needs_filename; | |
e971aa12 | 112 | |
8ee79e70 KW |
113 | /* Set if a driver can support backing files */ |
114 | bool supports_backing; | |
115 | ||
e971aa12 JC |
116 | /* For handling image reopen for split or non-split files */ |
117 | int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, | |
118 | BlockReopenQueue *queue, Error **errp); | |
119 | void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); | |
120 | void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); | |
5365f44d | 121 | void (*bdrv_join_options)(QDict *options, QDict *old_options); |
e971aa12 | 122 | |
015a1036 HR |
123 | int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags, |
124 | Error **errp); | |
125 | int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags, | |
126 | Error **errp); | |
e2731add | 127 | void (*bdrv_close)(BlockDriverState *bs); |
c282e1fd | 128 | int (*bdrv_create)(const char *filename, QemuOpts *opts, Error **errp); |
ea2384d3 | 129 | int (*bdrv_set_key)(BlockDriverState *bs, const char *key); |
95389c86 | 130 | int (*bdrv_make_empty)(BlockDriverState *bs); |
91af7014 | 131 | |
4cdd01d3 | 132 | void (*bdrv_refresh_filename)(BlockDriverState *bs, QDict *options); |
91af7014 | 133 | |
83f64091 | 134 | /* aio */ |
7c84b1b8 | 135 | BlockAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs, |
f141eafe | 136 | int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, |
097310b5 | 137 | BlockCompletionFunc *cb, void *opaque); |
7c84b1b8 | 138 | BlockAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs, |
f141eafe | 139 | int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, |
097310b5 | 140 | BlockCompletionFunc *cb, void *opaque); |
7c84b1b8 | 141 | BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs, |
097310b5 | 142 | BlockCompletionFunc *cb, void *opaque); |
4da444a0 EB |
143 | BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs, |
144 | int64_t offset, int count, | |
097310b5 | 145 | BlockCompletionFunc *cb, void *opaque); |
83f64091 | 146 | |
da1fa91d KW |
147 | int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs, |
148 | int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); | |
3fb06697 KW |
149 | int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs, |
150 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); | |
da1fa91d KW |
151 | int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, |
152 | int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); | |
93f5e6d8 KW |
153 | int coroutine_fn (*bdrv_co_writev_flags)(BlockDriverState *bs, |
154 | int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags); | |
3fb06697 KW |
155 | int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs, |
156 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); | |
93f5e6d8 | 157 | |
f08f2dda SH |
158 | /* |
159 | * Efficiently zero a region of the disk image. Typically an image format | |
160 | * would use a compact metadata representation to implement this. This | |
465fe887 EB |
161 | * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev() |
162 | * will be called instead. | |
f08f2dda | 163 | */ |
d05aa8bb EB |
164 | int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs, |
165 | int64_t offset, int count, BdrvRequestFlags flags); | |
47a5486d EB |
166 | int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs, |
167 | int64_t offset, int count); | |
b6b8a333 | 168 | int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs, |
67a0fd2a FZ |
169 | int64_t sector_num, int nb_sectors, int *pnum, |
170 | BlockDriverState **file); | |
da1fa91d | 171 | |
0f15423c AL |
172 | /* |
173 | * Invalidate any cached meta-data. | |
174 | */ | |
5a8a30db | 175 | void (*bdrv_invalidate_cache)(BlockDriverState *bs, Error **errp); |
76b1c7fe | 176 | int (*bdrv_inactivate)(BlockDriverState *bs); |
0f15423c | 177 | |
c32b82af PD |
178 | /* |
179 | * Flushes all data for all layers by calling bdrv_co_flush for underlying | |
180 | * layers, if needed. This function is needed for deterministic | |
181 | * synchronization of the flush finishing callback. | |
182 | */ | |
183 | int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs); | |
184 | ||
c68b89ac KW |
185 | /* |
186 | * Flushes all data that was already written to the OS all the way down to | |
187 | * the disk (for example raw-posix calls fsync()). | |
188 | */ | |
189 | int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs); | |
190 | ||
eb489bb1 KW |
191 | /* |
192 | * Flushes all internal caches to the OS. The data may still sit in a | |
193 | * writeback cache of the host OS, but it will survive a crash of the qemu | |
194 | * process. | |
195 | */ | |
196 | int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs); | |
197 | ||
83f64091 | 198 | const char *protocol_name; |
83f64091 | 199 | int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset); |
b94a2610 | 200 | |
83f64091 | 201 | int64_t (*bdrv_getlength)(BlockDriverState *bs); |
b94a2610 | 202 | bool has_variable_length; |
4a1d5e1f | 203 | int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs); |
b94a2610 | 204 | |
29a298af PB |
205 | int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs, |
206 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov); | |
207 | ||
5fafdf24 | 208 | int (*bdrv_snapshot_create)(BlockDriverState *bs, |
faea38e7 | 209 | QEMUSnapshotInfo *sn_info); |
5fafdf24 | 210 | int (*bdrv_snapshot_goto)(BlockDriverState *bs, |
faea38e7 | 211 | const char *snapshot_id); |
a89d89d3 WX |
212 | int (*bdrv_snapshot_delete)(BlockDriverState *bs, |
213 | const char *snapshot_id, | |
214 | const char *name, | |
215 | Error **errp); | |
5fafdf24 | 216 | int (*bdrv_snapshot_list)(BlockDriverState *bs, |
faea38e7 | 217 | QEMUSnapshotInfo **psn_info); |
51ef6727 | 218 | int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs, |
7b4c4781 WX |
219 | const char *snapshot_id, |
220 | const char *name, | |
221 | Error **errp); | |
faea38e7 | 222 | int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi); |
eae041fe | 223 | ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs); |
83f64091 | 224 | |
1a8ae822 KW |
225 | int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs, |
226 | QEMUIOVector *qiov, | |
227 | int64_t pos); | |
228 | int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs, | |
229 | QEMUIOVector *qiov, | |
230 | int64_t pos); | |
178e08a5 | 231 | |
756e6736 KW |
232 | int (*bdrv_change_backing_file)(BlockDriverState *bs, |
233 | const char *backing_file, const char *backing_fmt); | |
234 | ||
19cb3738 | 235 | /* removable device specific */ |
e031f750 | 236 | bool (*bdrv_is_inserted)(BlockDriverState *bs); |
19cb3738 | 237 | int (*bdrv_media_changed)(BlockDriverState *bs); |
f36f3949 | 238 | void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag); |
025e849a | 239 | void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked); |
3b46e624 | 240 | |
985a03b0 | 241 | /* to control generic scsi devices */ |
7c84b1b8 | 242 | BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs, |
221f715d | 243 | unsigned long int req, void *buf, |
097310b5 | 244 | BlockCompletionFunc *cb, void *opaque); |
16a389dc KW |
245 | int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs, |
246 | unsigned long int req, void *buf); | |
985a03b0 | 247 | |
0e7e1989 | 248 | /* List of options for creating images, terminated by name == NULL */ |
83d0521a | 249 | QemuOptsList *create_opts; |
5eb45639 | 250 | |
9ac228e0 KW |
251 | /* |
252 | * Returns 0 for completed check, -errno for internal errors. | |
253 | * The check results are stored in result. | |
254 | */ | |
4534ff54 KW |
255 | int (*bdrv_check)(BlockDriverState* bs, BdrvCheckResult *result, |
256 | BdrvCheckMode fix); | |
e97fc193 | 257 | |
77485434 | 258 | int (*bdrv_amend_options)(BlockDriverState *bs, QemuOpts *opts, |
8b13976d HR |
259 | BlockDriverAmendStatusCB *status_cb, |
260 | void *cb_opaque); | |
6f176b48 | 261 | |
a31939e6 | 262 | void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event); |
8b9b0cc2 | 263 | |
41c695c7 KW |
264 | /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */ |
265 | int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event, | |
266 | const char *tag); | |
4cc70e93 FZ |
267 | int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs, |
268 | const char *tag); | |
41c695c7 KW |
269 | int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag); |
270 | bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag); | |
271 | ||
3baca891 | 272 | void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp); |
d34682cd | 273 | |
336c1c12 KW |
274 | /* |
275 | * Returns 1 if newly created images are guaranteed to contain only | |
276 | * zeros, 0 otherwise. | |
277 | */ | |
278 | int (*bdrv_has_zero_init)(BlockDriverState *bs); | |
12c09b8c | 279 | |
dcd04228 SH |
280 | /* Remove fd handlers, timers, and other event loop callbacks so the event |
281 | * loop is no longer in use. Called with no in-flight requests and in | |
282 | * depth-first traversal order with parents before child nodes. | |
283 | */ | |
284 | void (*bdrv_detach_aio_context)(BlockDriverState *bs); | |
285 | ||
286 | /* Add fd handlers, timers, and other event loop callbacks so I/O requests | |
287 | * can be processed again. Called with no in-flight requests and in | |
288 | * depth-first traversal order with child nodes before parent nodes. | |
289 | */ | |
290 | void (*bdrv_attach_aio_context)(BlockDriverState *bs, | |
291 | AioContext *new_context); | |
292 | ||
448ad91d ML |
293 | /* io queue for linux-aio */ |
294 | void (*bdrv_io_plug)(BlockDriverState *bs); | |
295 | void (*bdrv_io_unplug)(BlockDriverState *bs); | |
448ad91d | 296 | |
892b7de8 ET |
297 | /** |
298 | * Try to get @bs's logical and physical block size. | |
299 | * On success, store them in @bsz and return zero. | |
300 | * On failure, return negative errno. | |
301 | */ | |
302 | int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz); | |
303 | /** | |
304 | * Try to get @bs's geometry (cyls, heads, sectors) | |
305 | * On success, store them in @geo and return 0. | |
306 | * On failure return -errno. | |
307 | * Only drivers that want to override guest geometry implement this | |
308 | * callback; see hd_geometry_guess(). | |
309 | */ | |
310 | int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo); | |
311 | ||
67da1dc5 FZ |
312 | /** |
313 | * Drain and stop any internal sources of requests in the driver, and | |
314 | * remain so until next I/O callback (e.g. bdrv_co_writev) is called. | |
315 | */ | |
316 | void (*bdrv_drain)(BlockDriverState *bs); | |
317 | ||
e06018ad WC |
318 | void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child, |
319 | Error **errp); | |
320 | void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child, | |
321 | Error **errp); | |
322 | ||
8a22f02a | 323 | QLIST_ENTRY(BlockDriver) list; |
ea2384d3 FB |
324 | }; |
325 | ||
fe81c2cc | 326 | typedef struct BlockLimits { |
a5b8dd2c EB |
327 | /* Alignment requirement, in bytes, for offset/length of I/O |
328 | * requests. Must be a power of 2 less than INT_MAX; defaults to | |
329 | * 1 for drivers with modern byte interfaces, and to 512 | |
330 | * otherwise. */ | |
331 | uint32_t request_alignment; | |
332 | ||
b8d0a980 EB |
333 | /* Maximum number of bytes that can be discarded at once (since it |
334 | * is signed, it must be < 2G, if set). Must be multiple of | |
b9f7855a EB |
335 | * pdiscard_alignment, but need not be power of 2. May be 0 if no |
336 | * inherent 32-bit limit */ | |
337 | int32_t max_pdiscard; | |
338 | ||
b8d0a980 EB |
339 | /* Optimal alignment for discard requests in bytes. A power of 2 |
340 | * is best but not mandatory. Must be a multiple of | |
341 | * bl.request_alignment, and must be less than max_pdiscard if | |
342 | * that is set. May be 0 if bl.request_alignment is good enough */ | |
b9f7855a | 343 | uint32_t pdiscard_alignment; |
fe81c2cc | 344 | |
b8d0a980 EB |
345 | /* Maximum number of bytes that can zeroized at once (since it is |
346 | * signed, it must be < 2G, if set). Must be multiple of | |
29cc6a68 | 347 | * pwrite_zeroes_alignment. May be 0 if no inherent 32-bit limit */ |
cf081fca | 348 | int32_t max_pwrite_zeroes; |
fe81c2cc | 349 | |
b8d0a980 EB |
350 | /* Optimal alignment for write zeroes requests in bytes. A power |
351 | * of 2 is best but not mandatory. Must be a multiple of | |
352 | * bl.request_alignment, and must be less than max_pwrite_zeroes | |
353 | * if that is set. May be 0 if bl.request_alignment is good | |
354 | * enough */ | |
cf081fca | 355 | uint32_t pwrite_zeroes_alignment; |
7337acaf | 356 | |
b8d0a980 EB |
357 | /* Optimal transfer length in bytes. A power of 2 is best but not |
358 | * mandatory. Must be a multiple of bl.request_alignment, or 0 if | |
359 | * no preferred size */ | |
5def6b80 EB |
360 | uint32_t opt_transfer; |
361 | ||
b8d0a980 EB |
362 | /* Maximal transfer length in bytes. Need not be power of 2, but |
363 | * must be multiple of opt_transfer and bl.request_alignment, or 0 | |
364 | * for no 32-bit limit. For now, anything larger than INT_MAX is | |
365 | * clamped down. */ | |
5def6b80 | 366 | uint32_t max_transfer; |
2647fab5 | 367 | |
a5b8dd2c | 368 | /* memory alignment, in bytes so that no bounce buffer is needed */ |
4196d2f0 DL |
369 | size_t min_mem_alignment; |
370 | ||
a5b8dd2c | 371 | /* memory alignment, in bytes, for bounce buffer */ |
339064d5 | 372 | size_t opt_mem_alignment; |
bd44feb7 SH |
373 | |
374 | /* maximum number of iovec elements */ | |
375 | int max_iov; | |
fe81c2cc PL |
376 | } BlockLimits; |
377 | ||
fbe40ff7 FZ |
378 | typedef struct BdrvOpBlocker BdrvOpBlocker; |
379 | ||
33384421 HR |
380 | typedef struct BdrvAioNotifier { |
381 | void (*attached_aio_context)(AioContext *new_context, void *opaque); | |
382 | void (*detach_aio_context)(void *opaque); | |
383 | ||
384 | void *opaque; | |
e8a095da | 385 | bool deleted; |
33384421 HR |
386 | |
387 | QLIST_ENTRY(BdrvAioNotifier) list; | |
388 | } BdrvAioNotifier; | |
389 | ||
f3930ed0 | 390 | struct BdrvChildRole { |
8e2160e2 KW |
391 | void (*inherit_options)(int *child_flags, QDict *child_options, |
392 | int parent_flags, QDict *parent_options); | |
c2066af0 | 393 | |
5c8cab48 KW |
394 | void (*change_media)(BdrvChild *child, bool load); |
395 | void (*resize)(BdrvChild *child); | |
396 | ||
4c265bf9 KW |
397 | /* Returns a name that is supposedly more useful for human users than the |
398 | * node name for identifying the node in question (in particular, a BB | |
399 | * name), or NULL if the parent can't provide a better name. */ | |
400 | const char* (*get_name)(BdrvChild *child); | |
401 | ||
c2066af0 KW |
402 | /* |
403 | * If this pair of functions is implemented, the parent doesn't issue new | |
404 | * requests after returning from .drained_begin() until .drained_end() is | |
405 | * called. | |
406 | * | |
407 | * Note that this can be nested. If drained_begin() was called twice, new | |
408 | * I/O is allowed only after drained_end() was called twice, too. | |
409 | */ | |
410 | void (*drained_begin)(BdrvChild *child); | |
411 | void (*drained_end)(BdrvChild *child); | |
f3930ed0 KW |
412 | }; |
413 | ||
414 | extern const BdrvChildRole child_file; | |
415 | extern const BdrvChildRole child_format; | |
416 | ||
b4b059f6 | 417 | struct BdrvChild { |
6e93e7c4 | 418 | BlockDriverState *bs; |
260fecf1 | 419 | char *name; |
6e93e7c4 | 420 | const BdrvChildRole *role; |
22aa8b24 | 421 | void *opaque; |
6e93e7c4 | 422 | QLIST_ENTRY(BdrvChild) next; |
d42a8a93 | 423 | QLIST_ENTRY(BdrvChild) next_parent; |
b4b059f6 | 424 | }; |
6e93e7c4 | 425 | |
8802d1fd JC |
426 | /* |
427 | * Note: the function bdrv_append() copies and swaps contents of | |
428 | * BlockDriverStates, so if you add new fields to this struct, please | |
429 | * inspect bdrv_append() to determine if the new fields need to be | |
430 | * copied as well. | |
431 | */ | |
ea2384d3 | 432 | struct BlockDriverState { |
d15a771d FB |
433 | int64_t total_sectors; /* if we are reading a disk image, give its |
434 | size in sectors */ | |
4dca4b63 | 435 | int open_flags; /* flags used to open the file, re-used for re-open */ |
54115412 EB |
436 | bool read_only; /* if true, the media is read only */ |
437 | bool encrypted; /* if true, the media is encrypted */ | |
438 | bool valid_key; /* if true, a valid encryption key has been set */ | |
439 | bool sg; /* if true, the device is a /dev/sg* */ | |
440 | bool probed; /* if true, format was probed rather than specified */ | |
441 | ||
442 | int copy_on_read; /* if nonzero, copy read backing sectors into image. | |
53fec9d3 | 443 | note this is a reference count */ |
ea2384d3 | 444 | |
3ff2f67a | 445 | CoQueue flush_queue; /* Serializing flush queue */ |
99723548 | 446 | bool active_flush_req; /* Flush request in flight? */ |
3ff2f67a | 447 | unsigned int write_gen; /* Current data generation */ |
3ff2f67a EY |
448 | unsigned int flushed_gen; /* Flushed write generation */ |
449 | ||
19cb3738 | 450 | BlockDriver *drv; /* NULL means no media */ |
ea2384d3 FB |
451 | void *opaque; |
452 | ||
dcd04228 | 453 | AioContext *aio_context; /* event loop used for fd handlers, timers, etc */ |
33384421 HR |
454 | /* long-running tasks intended to always use the same AioContext as this |
455 | * BDS may register themselves in this list to be notified of changes | |
456 | * regarding this BDS's context */ | |
457 | QLIST_HEAD(, BdrvAioNotifier) aio_notifiers; | |
e8a095da | 458 | bool walking_aio_notifiers; /* to make removal during iteration safe */ |
dcd04228 | 459 | |
9a29e18f JC |
460 | char filename[PATH_MAX]; |
461 | char backing_file[PATH_MAX]; /* if non zero, the image is a diff of | |
462 | this file image */ | |
5eb45639 | 463 | char backing_format[16]; /* if non-zero and backing_file exists */ |
19cb3738 | 464 | |
91af7014 | 465 | QDict *full_open_options; |
9a29e18f | 466 | char exact_filename[PATH_MAX]; |
91af7014 | 467 | |
760e0063 | 468 | BdrvChild *backing; |
9a4f4c31 | 469 | BdrvChild *file; |
66f82cee | 470 | |
d616b224 SH |
471 | /* Callback before write request is processed */ |
472 | NotifierWithReturnList before_write_notifiers; | |
473 | ||
99723548 PB |
474 | /* number of in-flight requests; overall and serialising */ |
475 | unsigned int in_flight; | |
2dbafdc0 | 476 | unsigned int serialising_in_flight; |
470c0504 | 477 | |
c9d1a561 PB |
478 | bool wakeup; |
479 | ||
53d8f9d8 HR |
480 | /* Offset after the highest byte written to */ |
481 | uint64_t wr_highest_offset; | |
482 | ||
fe81c2cc PL |
483 | /* I/O Limits */ |
484 | BlockLimits bl; | |
485 | ||
4df863f3 EB |
486 | /* Flags honored during pwrite (so far: BDRV_REQ_FUA) */ |
487 | unsigned int supported_write_flags; | |
d05aa8bb | 488 | /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA, |
465fe887 EB |
489 | * BDRV_REQ_MAY_UNMAP) */ |
490 | unsigned int supported_zero_flags; | |
c25f53b0 | 491 | |
dc364f4c BC |
492 | /* the following member gives a name to every node on the bs graph. */ |
493 | char node_name[32]; | |
494 | /* element of the list of named nodes building the graph */ | |
495 | QTAILQ_ENTRY(BlockDriverState) node_list; | |
2c1d04e0 HR |
496 | /* element of the list of all BlockDriverStates (all_bdrv_states) */ |
497 | QTAILQ_ENTRY(BlockDriverState) bs_list; | |
9c4218e9 HR |
498 | /* element of the list of monitor-owned BDS */ |
499 | QTAILQ_ENTRY(BlockDriverState) monitor_list; | |
e4654d2d | 500 | QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps; |
9fcb0251 | 501 | int refcnt; |
dbffbdcf SH |
502 | |
503 | QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; | |
eeec61f2 | 504 | |
fbe40ff7 FZ |
505 | /* operation blockers */ |
506 | QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX]; | |
507 | ||
eeec61f2 SH |
508 | /* long-running background operation */ |
509 | BlockJob *job; | |
e971aa12 | 510 | |
bddcec37 KW |
511 | /* The node that this node inherited default options from (and a reopen on |
512 | * which can affect this node by changing these defaults). This is always a | |
513 | * parent node of this node. */ | |
514 | BlockDriverState *inherits_from; | |
6e93e7c4 | 515 | QLIST_HEAD(, BdrvChild) children; |
d42a8a93 | 516 | QLIST_HEAD(, BdrvChild) parents; |
6e93e7c4 | 517 | |
de9c0cec | 518 | QDict *options; |
145f598e | 519 | QDict *explicit_options; |
465bee1d | 520 | BlockdevDetectZeroesOptions detect_zeroes; |
826b6ca0 FZ |
521 | |
522 | /* The error object in use for blocking operations on backing_hd */ | |
523 | Error *backing_blocker; | |
e2462113 FR |
524 | |
525 | /* threshold limit for writes, in bytes. "High water mark". */ | |
526 | uint64_t write_threshold_offset; | |
527 | NotifierWithReturn write_threshold_notifier; | |
51288d79 | 528 | |
6b98bd64 PB |
529 | /* counters for nested bdrv_io_plug and bdrv_io_unplugged_begin */ |
530 | unsigned io_plugged; | |
531 | unsigned io_plug_disabled; | |
532 | ||
51288d79 | 533 | int quiesce_counter; |
ea2384d3 FB |
534 | }; |
535 | ||
281d22d8 HR |
536 | struct BlockBackendRootState { |
537 | int open_flags; | |
538 | bool read_only; | |
539 | BlockdevDetectZeroesOptions detect_zeroes; | |
281d22d8 HR |
540 | }; |
541 | ||
274fccee HR |
542 | typedef enum BlockMirrorBackingMode { |
543 | /* Reuse the existing backing chain from the source for the target. | |
544 | * - sync=full: Set backing BDS to NULL. | |
545 | * - sync=top: Use source's backing BDS. | |
546 | * - sync=none: Use source as the backing BDS. */ | |
547 | MIRROR_SOURCE_BACKING_CHAIN, | |
548 | ||
549 | /* Open the target's backing chain completely anew */ | |
550 | MIRROR_OPEN_BACKING_CHAIN, | |
551 | ||
552 | /* Do not change the target's backing BDS after job completion */ | |
553 | MIRROR_LEAVE_BACKING_CHAIN, | |
554 | } BlockMirrorBackingMode; | |
555 | ||
760e0063 KW |
556 | static inline BlockDriverState *backing_bs(BlockDriverState *bs) |
557 | { | |
558 | return bs->backing ? bs->backing->bs : NULL; | |
559 | } | |
560 | ||
5f535a94 HR |
561 | |
562 | /* Essential block drivers which must always be statically linked into qemu, and | |
563 | * which therefore can be accessed without using bdrv_find_format() */ | |
564 | extern BlockDriver bdrv_file; | |
565 | extern BlockDriver bdrv_raw; | |
566 | extern BlockDriver bdrv_qcow2; | |
567 | ||
a03ef88f | 568 | int coroutine_fn bdrv_co_preadv(BdrvChild *child, |
1bf1cbc9 KW |
569 | int64_t offset, unsigned int bytes, QEMUIOVector *qiov, |
570 | BdrvRequestFlags flags); | |
a03ef88f | 571 | int coroutine_fn bdrv_co_pwritev(BdrvChild *child, |
a8823a3b KW |
572 | int64_t offset, unsigned int bytes, QEMUIOVector *qiov, |
573 | BdrvRequestFlags flags); | |
1bf1cbc9 | 574 | |
eba25057 | 575 | int get_tmp_filename(char *filename, int size); |
38f3ef57 KW |
576 | BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, |
577 | const char *filename); | |
95389c86 | 578 | |
0563e191 | 579 | |
d616b224 SH |
580 | /** |
581 | * bdrv_add_before_write_notifier: | |
582 | * | |
583 | * Register a callback that is invoked before write requests are processed but | |
584 | * after any throttling or waiting for overlapping requests. | |
585 | */ | |
586 | void bdrv_add_before_write_notifier(BlockDriverState *bs, | |
587 | NotifierWithReturn *notifier); | |
588 | ||
dcd04228 SH |
589 | /** |
590 | * bdrv_detach_aio_context: | |
591 | * | |
592 | * May be called from .bdrv_detach_aio_context() to detach children from the | |
593 | * current #AioContext. This is only needed by block drivers that manage their | |
760e0063 | 594 | * own children. Both ->file and ->backing are automatically handled and |
dcd04228 SH |
595 | * block drivers should not call this function on them explicitly. |
596 | */ | |
597 | void bdrv_detach_aio_context(BlockDriverState *bs); | |
598 | ||
599 | /** | |
600 | * bdrv_attach_aio_context: | |
601 | * | |
602 | * May be called from .bdrv_attach_aio_context() to attach children to the new | |
603 | * #AioContext. This is only needed by block drivers that manage their own | |
760e0063 | 604 | * children. Both ->file and ->backing are automatically handled and block |
dcd04228 SH |
605 | * drivers should not call this function on them explicitly. |
606 | */ | |
607 | void bdrv_attach_aio_context(BlockDriverState *bs, | |
608 | AioContext *new_context); | |
609 | ||
33384421 HR |
610 | /** |
611 | * bdrv_add_aio_context_notifier: | |
612 | * | |
613 | * If a long-running job intends to be always run in the same AioContext as a | |
614 | * certain BDS, it may use this function to be notified of changes regarding the | |
615 | * association of the BDS to an AioContext. | |
616 | * | |
617 | * attached_aio_context() is called after the target BDS has been attached to a | |
618 | * new AioContext; detach_aio_context() is called before the target BDS is being | |
619 | * detached from its old AioContext. | |
620 | */ | |
621 | void bdrv_add_aio_context_notifier(BlockDriverState *bs, | |
622 | void (*attached_aio_context)(AioContext *new_context, void *opaque), | |
623 | void (*detach_aio_context)(void *opaque), void *opaque); | |
624 | ||
625 | /** | |
626 | * bdrv_remove_aio_context_notifier: | |
627 | * | |
628 | * Unsubscribe of change notifications regarding the BDS's AioContext. The | |
629 | * parameters given here have to be the same as those given to | |
630 | * bdrv_add_aio_context_notifier(). | |
631 | */ | |
632 | void bdrv_remove_aio_context_notifier(BlockDriverState *bs, | |
633 | void (*aio_context_attached)(AioContext *, | |
634 | void *), | |
635 | void (*aio_context_detached)(void *), | |
636 | void *opaque); | |
637 | ||
c9d1a561 PB |
638 | /** |
639 | * bdrv_wakeup: | |
640 | * @bs: The BlockDriverState for which an I/O operation has been completed. | |
641 | * | |
642 | * Wake up the main thread if it is waiting on BDRV_POLL_WHILE. During | |
643 | * synchronous I/O on a BlockDriverState that is attached to another | |
644 | * I/O thread, the main thread lets the I/O thread's event loop run, | |
645 | * waiting for the I/O operation to complete. A bdrv_wakeup will wake | |
646 | * up the main thread if necessary. | |
647 | * | |
648 | * Manual calls to bdrv_wakeup are rarely necessary, because | |
649 | * bdrv_dec_in_flight already calls it. | |
650 | */ | |
651 | void bdrv_wakeup(BlockDriverState *bs); | |
652 | ||
508c7cb3 CH |
653 | #ifdef _WIN32 |
654 | int is_windows_drive(const char *filename); | |
655 | #endif | |
656 | ||
dc534f8f PB |
657 | /** |
658 | * stream_start: | |
2323322e AG |
659 | * @job_id: The id of the newly-created job, or %NULL to use the |
660 | * device name of @bs. | |
dc534f8f PB |
661 | * @bs: Block device to operate on. |
662 | * @base: Block device that will become the new base, or %NULL to | |
663 | * flatten the whole backing file chain onto @bs. | |
29338003 AG |
664 | * @backing_file_str: The file name that will be written to @bs as the |
665 | * the new backing file if the job completes. Ignored if @base is %NULL. | |
c83c66c3 | 666 | * @speed: The maximum speed, in bytes per second, or 0 for unlimited. |
1d809098 | 667 | * @on_error: The action to take upon error. |
fd7f8c65 | 668 | * @errp: Error object. |
dc534f8f PB |
669 | * |
670 | * Start a streaming operation on @bs. Clusters that are unallocated | |
671 | * in @bs, but allocated in any image between @base and @bs (both | |
672 | * exclusive) will be written to @bs. At the end of a successful | |
673 | * streaming job, the backing file of @bs will be changed to | |
29338003 AG |
674 | * @backing_file_str in the written image and to @base in the live |
675 | * BlockDriverState. | |
dc534f8f | 676 | */ |
2323322e AG |
677 | void stream_start(const char *job_id, BlockDriverState *bs, |
678 | BlockDriverState *base, const char *backing_file_str, | |
8254b6d9 | 679 | int64_t speed, BlockdevOnError on_error, Error **errp); |
4f1043b4 | 680 | |
747ff602 JC |
681 | /** |
682 | * commit_start: | |
fd62c609 AG |
683 | * @job_id: The id of the newly-created job, or %NULL to use the |
684 | * device name of @bs. | |
03544a6e FZ |
685 | * @bs: Active block device. |
686 | * @top: Top block device to be committed. | |
687 | * @base: Block device that will be written into, and become the new top. | |
747ff602 JC |
688 | * @speed: The maximum speed, in bytes per second, or 0 for unlimited. |
689 | * @on_error: The action to take upon error. | |
54e26900 | 690 | * @backing_file_str: String to use as the backing file in @top's overlay |
747ff602 JC |
691 | * @errp: Error object. |
692 | * | |
693 | */ | |
fd62c609 AG |
694 | void commit_start(const char *job_id, BlockDriverState *bs, |
695 | BlockDriverState *base, BlockDriverState *top, int64_t speed, | |
8254b6d9 JS |
696 | BlockdevOnError on_error, const char *backing_file_str, |
697 | Error **errp); | |
03544a6e FZ |
698 | /** |
699 | * commit_active_start: | |
fd62c609 AG |
700 | * @job_id: The id of the newly-created job, or %NULL to use the |
701 | * device name of @bs. | |
03544a6e FZ |
702 | * @bs: Active block device to be committed. |
703 | * @base: Block device that will be written into, and become the new top. | |
47970dfb JS |
704 | * @creation_flags: Flags that control the behavior of the Job lifetime. |
705 | * See @BlockJobCreateFlags | |
03544a6e FZ |
706 | * @speed: The maximum speed, in bytes per second, or 0 for unlimited. |
707 | * @on_error: The action to take upon error. | |
708 | * @cb: Completion function for the job. | |
709 | * @opaque: Opaque pointer value passed to @cb. | |
710 | * @errp: Error object. | |
b49f7ead | 711 | * @auto_complete: Auto complete the job. |
03544a6e FZ |
712 | * |
713 | */ | |
fd62c609 | 714 | void commit_active_start(const char *job_id, BlockDriverState *bs, |
47970dfb JS |
715 | BlockDriverState *base, int creation_flags, |
716 | int64_t speed, BlockdevOnError on_error, | |
097310b5 | 717 | BlockCompletionFunc *cb, |
b49f7ead | 718 | void *opaque, Error **errp, bool auto_complete); |
893f7eba PB |
719 | /* |
720 | * mirror_start: | |
71aa9867 AG |
721 | * @job_id: The id of the newly-created job, or %NULL to use the |
722 | * device name of @bs. | |
893f7eba PB |
723 | * @bs: Block device to operate on. |
724 | * @target: Block device to write to. | |
09158f00 BC |
725 | * @replaces: Block graph node name to replace once the mirror is done. Can |
726 | * only be used when full mirroring is selected. | |
893f7eba | 727 | * @speed: The maximum speed, in bytes per second, or 0 for unlimited. |
eee13dfe | 728 | * @granularity: The chosen granularity for the dirty bitmap. |
08e4ed6c | 729 | * @buf_size: The amount of data that can be in flight at one time. |
893f7eba | 730 | * @mode: Whether to collapse all images in the chain to the target. |
274fccee | 731 | * @backing_mode: How to establish the target's backing chain after completion. |
b952b558 PB |
732 | * @on_source_error: The action to take upon error reading from the source. |
733 | * @on_target_error: The action to take upon error writing to the target. | |
0fc9f8ea | 734 | * @unmap: Whether to unmap target where source sectors only contain zeroes. |
893f7eba PB |
735 | * @errp: Error object. |
736 | * | |
737 | * Start a mirroring operation on @bs. Clusters that are allocated | |
e7e4f9f9 | 738 | * in @bs will be written to @target until the job is cancelled or |
893f7eba PB |
739 | * manually completed. At the end of a successful mirroring job, |
740 | * @bs will be switched to read from @target. | |
741 | */ | |
71aa9867 AG |
742 | void mirror_start(const char *job_id, BlockDriverState *bs, |
743 | BlockDriverState *target, const char *replaces, | |
5fba6c0e | 744 | int64_t speed, uint32_t granularity, int64_t buf_size, |
274fccee HR |
745 | MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, |
746 | BlockdevOnError on_source_error, | |
b952b558 | 747 | BlockdevOnError on_target_error, |
8254b6d9 | 748 | bool unmap, Error **errp); |
893f7eba | 749 | |
98d2c6f2 DM |
750 | /* |
751 | * backup_start: | |
70559d49 AG |
752 | * @job_id: The id of the newly-created job, or %NULL to use the |
753 | * device name of @bs. | |
98d2c6f2 DM |
754 | * @bs: Block device to operate on. |
755 | * @target: Block device to write to. | |
756 | * @speed: The maximum speed, in bytes per second, or 0 for unlimited. | |
fc5d3f84 | 757 | * @sync_mode: What parts of the disk image should be copied to the destination. |
4b80ab2b | 758 | * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_INCREMENTAL. |
98d2c6f2 DM |
759 | * @on_source_error: The action to take upon error reading from the source. |
760 | * @on_target_error: The action to take upon error writing to the target. | |
47970dfb JS |
761 | * @creation_flags: Flags that control the behavior of the Job lifetime. |
762 | * See @BlockJobCreateFlags | |
98d2c6f2 DM |
763 | * @cb: Completion function for the job. |
764 | * @opaque: Opaque pointer value passed to @cb. | |
78f51fde | 765 | * @txn: Transaction that this job is part of (may be NULL). |
98d2c6f2 DM |
766 | * |
767 | * Start a backup operation on @bs. Clusters in @bs are written to @target | |
768 | * until the job is cancelled or manually completed. | |
769 | */ | |
70559d49 AG |
770 | void backup_start(const char *job_id, BlockDriverState *bs, |
771 | BlockDriverState *target, int64_t speed, | |
772 | MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap, | |
13b9414b | 773 | bool compress, |
fc5d3f84 | 774 | BlockdevOnError on_source_error, |
98d2c6f2 | 775 | BlockdevOnError on_target_error, |
47970dfb | 776 | int creation_flags, |
097310b5 | 777 | BlockCompletionFunc *cb, void *opaque, |
78f51fde | 778 | BlockJobTxn *txn, Error **errp); |
98d2c6f2 | 779 | |
abb21ac3 KW |
780 | void hmp_drive_add_node(Monitor *mon, const char *optstr); |
781 | ||
f21d96d0 KW |
782 | BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, |
783 | const char *child_name, | |
36fe1331 KW |
784 | const BdrvChildRole *child_role, |
785 | void *opaque); | |
f21d96d0 KW |
786 | void bdrv_root_unref_child(BdrvChild *child); |
787 | ||
1f0c461b | 788 | const char *bdrv_get_parent_name(const BlockDriverState *bs); |
a7f53e26 MA |
789 | void blk_dev_change_media_cb(BlockBackend *blk, bool load); |
790 | bool blk_dev_has_removable_media(BlockBackend *blk); | |
8f3a73bc | 791 | bool blk_dev_has_tray(BlockBackend *blk); |
a7f53e26 MA |
792 | void blk_dev_eject_request(BlockBackend *blk, bool force); |
793 | bool blk_dev_is_tray_open(BlockBackend *blk); | |
794 | bool blk_dev_is_medium_locked(BlockBackend *blk); | |
a7f53e26 | 795 | |
6d078599 | 796 | void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int64_t nr_sect); |
439db28c | 797 | bool bdrv_requests_pending(BlockDriverState *bs); |
e0c47b6c | 798 | |
df9a681d FZ |
799 | void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); |
800 | void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in); | |
801 | ||
99723548 PB |
802 | void bdrv_inc_in_flight(BlockDriverState *bs); |
803 | void bdrv_dec_in_flight(BlockDriverState *bs); | |
804 | ||
9c4218e9 HR |
805 | void blockdev_close_all_bdrv_states(void); |
806 | ||
ea2384d3 | 807 | #endif /* BLOCK_INT_H */ |