]>
Commit | Line | Data |
---|---|---|
ebc2752b EGE |
1 | /* |
2 | * QEMU System Emulator block driver | |
3 | * | |
4 | * Copyright (c) 2003 Fabrice Bellard | |
5 | * | |
6 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
7 | * of this software and associated documentation files (the "Software"), to deal | |
8 | * in the Software without restriction, including without limitation the rights | |
9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
10 | * copies of the Software, and to permit persons to whom the Software is | |
11 | * furnished to do so, subject to the following conditions: | |
12 | * | |
13 | * The above copyright notice and this permission notice shall be included in | |
14 | * all copies or substantial portions of the Software. | |
15 | * | |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
22 | * THE SOFTWARE. | |
23 | */ | |
24 | #ifndef BLOCK_INT_COMMON_H | |
25 | #define BLOCK_INT_COMMON_H | |
26 | ||
e2c1c34f MA |
27 | #include "block/aio.h" |
28 | #include "block/block-common.h" | |
29 | #include "block/block-global-state.h" | |
ebc2752b | 30 | #include "block/snapshot.h" |
e2c1c34f | 31 | #include "qemu/iov.h" |
ebc2752b | 32 | #include "qemu/rcu.h" |
e2c1c34f | 33 | #include "qemu/stats64.h" |
ebc2752b EGE |
34 | |
35 | #define BLOCK_FLAG_LAZY_REFCOUNTS 8 | |
36 | ||
37 | #define BLOCK_OPT_SIZE "size" | |
38 | #define BLOCK_OPT_ENCRYPT "encryption" | |
39 | #define BLOCK_OPT_ENCRYPT_FORMAT "encrypt.format" | |
40 | #define BLOCK_OPT_COMPAT6 "compat6" | |
41 | #define BLOCK_OPT_HWVERSION "hwversion" | |
42 | #define BLOCK_OPT_BACKING_FILE "backing_file" | |
43 | #define BLOCK_OPT_BACKING_FMT "backing_fmt" | |
44 | #define BLOCK_OPT_CLUSTER_SIZE "cluster_size" | |
45 | #define BLOCK_OPT_TABLE_SIZE "table_size" | |
46 | #define BLOCK_OPT_PREALLOC "preallocation" | |
47 | #define BLOCK_OPT_SUBFMT "subformat" | |
48 | #define BLOCK_OPT_COMPAT_LEVEL "compat" | |
49 | #define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts" | |
50 | #define BLOCK_OPT_ADAPTER_TYPE "adapter_type" | |
51 | #define BLOCK_OPT_REDUNDANCY "redundancy" | |
52 | #define BLOCK_OPT_NOCOW "nocow" | |
53 | #define BLOCK_OPT_EXTENT_SIZE_HINT "extent_size_hint" | |
54 | #define BLOCK_OPT_OBJECT_SIZE "object_size" | |
55 | #define BLOCK_OPT_REFCOUNT_BITS "refcount_bits" | |
56 | #define BLOCK_OPT_DATA_FILE "data_file" | |
57 | #define BLOCK_OPT_DATA_FILE_RAW "data_file_raw" | |
58 | #define BLOCK_OPT_COMPRESSION_TYPE "compression_type" | |
59 | #define BLOCK_OPT_EXTL2 "extended_l2" | |
60 | ||
61 | #define BLOCK_PROBE_BUF_SIZE 512 | |
62 | ||
63 | enum BdrvTrackedRequestType { | |
64 | BDRV_TRACKED_READ, | |
65 | BDRV_TRACKED_WRITE, | |
66 | BDRV_TRACKED_DISCARD, | |
67 | BDRV_TRACKED_TRUNCATE, | |
68 | }; | |
69 | ||
70 | /* | |
71 | * That is not quite good that BdrvTrackedRequest structure is public, | |
72 | * as block/io.c is very careful about incoming offset/bytes being | |
73 | * correct. Be sure to assert bdrv_check_request() succeeded after any | |
74 | * modification of BdrvTrackedRequest object out of block/io.c | |
75 | */ | |
76 | typedef struct BdrvTrackedRequest { | |
77 | BlockDriverState *bs; | |
78 | int64_t offset; | |
79 | int64_t bytes; | |
80 | enum BdrvTrackedRequestType type; | |
81 | ||
82 | bool serialising; | |
83 | int64_t overlap_offset; | |
84 | int64_t overlap_bytes; | |
85 | ||
86 | QLIST_ENTRY(BdrvTrackedRequest) list; | |
87 | Coroutine *co; /* owner, used for deadlock detection */ | |
88 | CoQueue wait_queue; /* coroutines blocked on this request */ | |
89 | ||
90 | struct BdrvTrackedRequest *waiting_for; | |
91 | } BdrvTrackedRequest; | |
92 | ||
93 | ||
94 | struct BlockDriver { | |
69c0bf11 EGE |
95 | /* |
96 | * These fields are initialized when this object is created, | |
97 | * and are never changed afterwards. | |
98 | */ | |
99 | ||
ebc2752b EGE |
100 | const char *format_name; |
101 | int instance_size; | |
102 | ||
103 | /* | |
104 | * Set to true if the BlockDriver is a block filter. Block filters pass | |
105 | * certain callbacks that refer to data (see block.c) to their bs->file | |
106 | * or bs->backing (whichever one exists) if the driver doesn't implement | |
107 | * them. Drivers that do not wish to forward must implement them and return | |
108 | * -ENOTSUP. | |
109 | * Note that filters are not allowed to modify data. | |
110 | * | |
111 | * Filters generally cannot have more than a single filtered child, | |
112 | * because the data they present must at all times be the same as | |
113 | * that on their filtered child. That would be impossible to | |
114 | * achieve for multiple filtered children. | |
115 | * (And this filtered child must then be bs->file or bs->backing.) | |
116 | */ | |
117 | bool is_filter; | |
046fd84f VSO |
118 | /* |
119 | * Only make sense for filter drivers, for others must be false. | |
120 | * If true, filtered child is bs->backing. Otherwise it's bs->file. | |
1921b4f7 VSO |
121 | * Two internal filters use bs->backing as filtered child and has this |
122 | * field set to true: mirror_top and commit_top. There also two such test | |
123 | * filters in tests/unit/test-bdrv-graph-mod.c. | |
046fd84f VSO |
124 | * |
125 | * Never create any more such filters! | |
126 | * | |
127 | * TODO: imagine how to deprecate this behavior and make all filters work | |
128 | * similarly using bs->file as filtered child. | |
129 | */ | |
130 | bool filtered_child_is_backing; | |
131 | ||
ebc2752b EGE |
132 | /* |
133 | * Set to true if the BlockDriver is a format driver. Format nodes | |
134 | * generally do not expect their children to be other format nodes | |
135 | * (except for backing files), and so format probing is disabled | |
136 | * on those children. | |
137 | */ | |
138 | bool is_format; | |
139 | ||
774c726c SL |
140 | /* |
141 | * Set to true if the BlockDriver supports zoned children. | |
142 | */ | |
143 | bool supports_zoned_children; | |
144 | ||
69c0bf11 EGE |
145 | /* |
146 | * Drivers not implementing bdrv_parse_filename nor bdrv_open should have | |
147 | * this field set to true, except ones that are defined only by their | |
148 | * child's bs. | |
149 | * An example of the last type will be the quorum block driver. | |
150 | */ | |
151 | bool bdrv_needs_filename; | |
152 | ||
153 | /* | |
154 | * Set if a driver can support backing files. This also implies the | |
155 | * following semantics: | |
156 | * | |
157 | * - Return status 0 of .bdrv_co_block_status means that corresponding | |
158 | * blocks are not allocated in this layer of backing-chain | |
159 | * - For such (unallocated) blocks, read will: | |
160 | * - fill buffer with zeros if there is no backing file | |
161 | * - read from the backing file otherwise, where the block layer | |
162 | * takes care of reading zeros beyond EOF if backing file is short | |
163 | */ | |
164 | bool supports_backing; | |
165 | ||
69c0bf11 EGE |
166 | /* |
167 | * Drivers setting this field must be able to work with just a plain | |
168 | * filename with '<protocol_name>:' as a prefix, and no other options. | |
169 | * Options may be extracted from the filename by implementing | |
170 | * bdrv_parse_filename. | |
171 | */ | |
172 | const char *protocol_name; | |
173 | ||
174 | /* List of options for creating images, terminated by name == NULL */ | |
175 | QemuOptsList *create_opts; | |
176 | ||
177 | /* List of options for image amend */ | |
178 | QemuOptsList *amend_opts; | |
179 | ||
180 | /* | |
181 | * If this driver supports reopening images this contains a | |
182 | * NULL-terminated list of the runtime options that can be | |
183 | * modified. If an option in this list is unspecified during | |
184 | * reopen then it _must_ be reset to its default value or return | |
185 | * an error. | |
186 | */ | |
187 | const char *const *mutable_opts; | |
188 | ||
189 | /* | |
190 | * Pointer to a NULL-terminated array of names of strong options | |
191 | * that can be specified for bdrv_open(). A strong option is one | |
192 | * that changes the data of a BDS. | |
193 | * If this pointer is NULL, the array is considered empty. | |
194 | * "filename" and "driver" are always considered strong. | |
195 | */ | |
196 | const char *const *strong_runtime_opts; | |
197 | ||
198 | ||
199 | /* | |
200 | * Global state (GS) API. These functions run under the BQL. | |
201 | * | |
202 | * See include/block/block-global-state.h for more information about | |
203 | * the GS API. | |
204 | */ | |
205 | ||
ebc2752b EGE |
206 | /* |
207 | * This function is invoked under BQL before .bdrv_co_amend() | |
208 | * (which in contrast does not necessarily run under the BQL) | |
209 | * to allow driver-specific initialization code that requires | |
210 | * the BQL, like setting up specific permission flags. | |
211 | */ | |
840428a2 EGE |
212 | int GRAPH_RDLOCK_PTR (*bdrv_amend_pre_run)( |
213 | BlockDriverState *bs, Error **errp); | |
ebc2752b EGE |
214 | /* |
215 | * This function is invoked under BQL after .bdrv_co_amend() | |
216 | * to allow cleaning up what was done in .bdrv_amend_pre_run(). | |
217 | */ | |
840428a2 | 218 | void GRAPH_RDLOCK_PTR (*bdrv_amend_clean)(BlockDriverState *bs); |
ebc2752b EGE |
219 | |
220 | /* | |
221 | * Return true if @to_replace can be replaced by a BDS with the | |
222 | * same data as @bs without it affecting @bs's behavior (that is, | |
223 | * without it being visible to @bs's parents). | |
224 | */ | |
533c6e4e KW |
225 | bool GRAPH_RDLOCK_PTR (*bdrv_recurse_can_replace)( |
226 | BlockDriverState *bs, BlockDriverState *to_replace); | |
ebc2752b | 227 | |
ebc2752b EGE |
228 | int (*bdrv_probe_device)(const char *filename); |
229 | ||
230 | /* | |
231 | * Any driver implementing this callback is expected to be able to handle | |
232 | * NULL file names in its .bdrv_open() implementation. | |
233 | */ | |
234 | void (*bdrv_parse_filename)(const char *filename, QDict *options, | |
235 | Error **errp); | |
ebc2752b | 236 | |
69c0bf11 | 237 | /* For handling image reopen for split or non-split files. */ |
ce433d29 KW |
238 | int GRAPH_UNLOCKED_PTR (*bdrv_reopen_prepare)( |
239 | BDRVReopenState *reopen_state, BlockReopenQueue *queue, Error **errp); | |
240 | void GRAPH_UNLOCKED_PTR (*bdrv_reopen_commit)( | |
241 | BDRVReopenState *reopen_state); | |
242 | void GRAPH_UNLOCKED_PTR (*bdrv_reopen_commit_post)( | |
243 | BDRVReopenState *reopen_state); | |
244 | void GRAPH_UNLOCKED_PTR (*bdrv_reopen_abort)( | |
245 | BDRVReopenState *reopen_state); | |
ebc2752b EGE |
246 | void (*bdrv_join_options)(QDict *options, QDict *old_options); |
247 | ||
1a30b0f5 KW |
248 | int GRAPH_UNLOCKED_PTR (*bdrv_open)( |
249 | BlockDriverState *bs, QDict *options, int flags, Error **errp); | |
ebc2752b EGE |
250 | |
251 | /* Protocol drivers should implement this instead of bdrv_open */ | |
1a30b0f5 KW |
252 | int GRAPH_UNLOCKED_PTR (*bdrv_file_open)( |
253 | BlockDriverState *bs, QDict *options, int flags, Error **errp); | |
ebc2752b EGE |
254 | void (*bdrv_close)(BlockDriverState *bs); |
255 | ||
4db7ba3b | 256 | int coroutine_fn GRAPH_UNLOCKED_PTR (*bdrv_co_create)( |
4ec8df01 KW |
257 | BlockdevCreateOptions *opts, Error **errp); |
258 | ||
4db7ba3b | 259 | int coroutine_fn GRAPH_UNLOCKED_PTR (*bdrv_co_create_opts)( |
4ec8df01 | 260 | BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp); |
ebc2752b | 261 | |
bd131d67 KW |
262 | int GRAPH_RDLOCK_PTR (*bdrv_amend_options)( |
263 | BlockDriverState *bs, QemuOpts *opts, | |
264 | BlockDriverAmendStatusCB *status_cb, void *cb_opaque, | |
265 | bool force, Error **errp); | |
ebc2752b | 266 | |
0bb79c97 | 267 | int GRAPH_RDLOCK_PTR (*bdrv_make_empty)(BlockDriverState *bs); |
ebc2752b EGE |
268 | |
269 | /* | |
270 | * Refreshes the bs->exact_filename field. If that is impossible, | |
271 | * bs->exact_filename has to be left empty. | |
272 | */ | |
b7cfc7d5 | 273 | void GRAPH_RDLOCK_PTR (*bdrv_refresh_filename)(BlockDriverState *bs); |
ebc2752b EGE |
274 | |
275 | /* | |
276 | * Gathers the open options for all children into @target. | |
277 | * A simple format driver (without backing file support) might | |
278 | * implement this function like this: | |
279 | * | |
280 | * QINCREF(bs->file->bs->full_open_options); | |
281 | * qdict_put(target, "file", bs->file->bs->full_open_options); | |
282 | * | |
283 | * If not specified, the generic implementation will simply put | |
284 | * all children's options under their respective name. | |
285 | * | |
286 | * @backing_overridden is true when bs->backing seems not to be | |
287 | * the child that would result from opening bs->backing_file. | |
288 | * Therefore, if it is true, the backing child's options should be | |
289 | * gathered; otherwise, there is no need since the backing child | |
290 | * is the one implied by the image header. | |
291 | * | |
292 | * Note that ideally this function would not be needed. Every | |
293 | * block driver which implements it is probably doing something | |
294 | * shady regarding its runtime option structure. | |
295 | */ | |
b7cfc7d5 KW |
296 | void GRAPH_RDLOCK_PTR (*bdrv_gather_child_options)( |
297 | BlockDriverState *bs, QDict *target, bool backing_overridden); | |
ebc2752b EGE |
298 | |
299 | /* | |
300 | * Returns an allocated string which is the directory name of this BDS: It | |
301 | * will be used to make relative filenames absolute by prepending this | |
302 | * function's return value to them. | |
303 | */ | |
b7cfc7d5 | 304 | char * GRAPH_RDLOCK_PTR (*bdrv_dirname)(BlockDriverState *bs, Error **errp); |
ebc2752b | 305 | |
69c0bf11 EGE |
306 | /* |
307 | * This informs the driver that we are no longer interested in the result | |
308 | * of in-flight requests, so don't waste the time if possible. | |
309 | * | |
310 | * One example usage is to avoid waiting for an nbd target node reconnect | |
311 | * timeout during job-cancel with force=true. | |
312 | */ | |
79a55866 | 313 | void GRAPH_RDLOCK_PTR (*bdrv_cancel_in_flight)(BlockDriverState *bs); |
69c0bf11 | 314 | |
3804e3cf | 315 | int GRAPH_RDLOCK_PTR (*bdrv_inactivate)(BlockDriverState *bs); |
69c0bf11 | 316 | |
a32e7818 KW |
317 | int GRAPH_RDLOCK_PTR (*bdrv_snapshot_create)( |
318 | BlockDriverState *bs, QEMUSnapshotInfo *sn_info); | |
319 | ||
320 | int GRAPH_UNLOCKED_PTR (*bdrv_snapshot_goto)( | |
321 | BlockDriverState *bs, const char *snapshot_id); | |
322 | ||
323 | int GRAPH_RDLOCK_PTR (*bdrv_snapshot_delete)( | |
324 | BlockDriverState *bs, const char *snapshot_id, const char *name, | |
325 | Error **errp); | |
326 | ||
79a55866 KW |
327 | int GRAPH_RDLOCK_PTR (*bdrv_snapshot_list)( |
328 | BlockDriverState *bs, QEMUSnapshotInfo **psn_info); | |
329 | ||
330 | int GRAPH_RDLOCK_PTR (*bdrv_snapshot_load_tmp)( | |
331 | BlockDriverState *bs, const char *snapshot_id, const char *name, | |
332 | Error **errp); | |
69c0bf11 | 333 | |
e2dd2737 KW |
334 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_change_backing_file)( |
335 | BlockDriverState *bs, const char *backing_file, | |
336 | const char *backing_fmt); | |
69c0bf11 EGE |
337 | |
338 | /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */ | |
339 | int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event, | |
340 | const char *tag); | |
341 | int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs, | |
342 | const char *tag); | |
343 | int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag); | |
344 | bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag); | |
345 | ||
e19b157f KW |
346 | void GRAPH_RDLOCK_PTR (*bdrv_refresh_limits)( |
347 | BlockDriverState *bs, Error **errp); | |
69c0bf11 EGE |
348 | |
349 | /* | |
350 | * Returns 1 if newly created images are guaranteed to contain only | |
351 | * zeros, 0 otherwise. | |
352 | */ | |
06717986 | 353 | int GRAPH_RDLOCK_PTR (*bdrv_has_zero_init)(BlockDriverState *bs); |
69c0bf11 EGE |
354 | |
355 | /* | |
356 | * Remove fd handlers, timers, and other event loop callbacks so the event | |
357 | * loop is no longer in use. Called with no in-flight requests and in | |
358 | * depth-first traversal order with parents before child nodes. | |
359 | */ | |
360 | void (*bdrv_detach_aio_context)(BlockDriverState *bs); | |
361 | ||
362 | /* | |
363 | * Add fd handlers, timers, and other event loop callbacks so I/O requests | |
364 | * can be processed again. Called with no in-flight requests and in | |
365 | * depth-first traversal order with child nodes before parent nodes. | |
366 | */ | |
367 | void (*bdrv_attach_aio_context)(BlockDriverState *bs, | |
368 | AioContext *new_context); | |
369 | ||
ab613350 SH |
370 | /** |
371 | * bdrv_drain_begin is called if implemented in the beginning of a | |
372 | * drain operation to drain and stop any internal sources of requests in | |
373 | * the driver. | |
374 | * bdrv_drain_end is called if implemented at the end of the drain. | |
375 | * | |
376 | * They should be used by the driver to e.g. manage scheduled I/O | |
377 | * requests, or toggle an internal state. After the end of the drain new | |
378 | * requests will continue normally. | |
379 | * | |
380 | * Implementations of both functions must not call aio_poll(). | |
381 | */ | |
382 | void (*bdrv_drain_begin)(BlockDriverState *bs); | |
383 | void (*bdrv_drain_end)(BlockDriverState *bs); | |
384 | ||
69c0bf11 EGE |
385 | /** |
386 | * Try to get @bs's logical and physical block size. | |
387 | * On success, store them in @bsz and return zero. | |
388 | * On failure, return negative errno. | |
389 | */ | |
221caadc KW |
390 | int GRAPH_RDLOCK_PTR (*bdrv_probe_blocksizes)( |
391 | BlockDriverState *bs, BlockSizes *bsz); | |
69c0bf11 EGE |
392 | /** |
393 | * Try to get @bs's geometry (cyls, heads, sectors) | |
394 | * On success, store them in @geo and return 0. | |
395 | * On failure return -errno. | |
396 | * Only drivers that want to override guest geometry implement this | |
397 | * callback; see hd_geometry_guess(). | |
398 | */ | |
79a55866 KW |
399 | int GRAPH_RDLOCK_PTR (*bdrv_probe_geometry)( |
400 | BlockDriverState *bs, HDGeometry *geo); | |
69c0bf11 | 401 | |
9def6082 KW |
402 | void GRAPH_WRLOCK_PTR (*bdrv_add_child)( |
403 | BlockDriverState *parent, BlockDriverState *child, Error **errp); | |
404 | ||
405 | void GRAPH_WRLOCK_PTR (*bdrv_del_child)( | |
406 | BlockDriverState *parent, BdrvChild *child, Error **errp); | |
69c0bf11 EGE |
407 | |
408 | /** | |
409 | * Informs the block driver that a permission change is intended. The | |
410 | * driver checks whether the change is permissible and may take other | |
411 | * preparations for the change (e.g. get file system locks). This operation | |
412 | * is always followed either by a call to either .bdrv_set_perm or | |
413 | * .bdrv_abort_perm_update. | |
414 | * | |
415 | * Checks whether the requested set of cumulative permissions in @perm | |
416 | * can be granted for accessing @bs and whether no other users are using | |
417 | * permissions other than those given in @shared (both arguments take | |
418 | * BLK_PERM_* bitmasks). | |
419 | * | |
420 | * If both conditions are met, 0 is returned. Otherwise, -errno is returned | |
421 | * and errp is set to an error describing the conflict. | |
422 | */ | |
bce73bc2 KW |
423 | int GRAPH_RDLOCK_PTR (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm, |
424 | uint64_t shared, Error **errp); | |
69c0bf11 EGE |
425 | |
426 | /** | |
427 | * Called to inform the driver that the set of cumulative set of used | |
a1a62ced | 428 | * permissions for @bs has changed to @perm, and the set of shareable |
69c0bf11 EGE |
429 | * permission to @shared. The driver can use this to propagate changes to |
430 | * its children (i.e. request permissions only if a parent actually needs | |
431 | * them). | |
432 | * | |
433 | * This function is only invoked after bdrv_check_perm(), so block drivers | |
434 | * may rely on preparations made in their .bdrv_check_perm implementation. | |
435 | */ | |
bce73bc2 KW |
436 | void GRAPH_RDLOCK_PTR (*bdrv_set_perm)( |
437 | BlockDriverState *bs, uint64_t perm, uint64_t shared); | |
69c0bf11 EGE |
438 | |
439 | /* | |
440 | * Called to inform the driver that after a previous bdrv_check_perm() | |
441 | * call, the permission update is not performed and any preparations made | |
442 | * for it (e.g. taken file locks) need to be undone. | |
443 | * | |
444 | * This function can be called even for nodes that never saw a | |
445 | * bdrv_check_perm() call. It is a no-op then. | |
446 | */ | |
bce73bc2 | 447 | void GRAPH_RDLOCK_PTR (*bdrv_abort_perm_update)(BlockDriverState *bs); |
69c0bf11 EGE |
448 | |
449 | /** | |
450 | * Returns in @nperm and @nshared the permissions that the driver for @bs | |
451 | * needs on its child @c, based on the cumulative permissions requested by | |
452 | * the parents in @parent_perm and @parent_shared. | |
453 | * | |
454 | * If @c is NULL, return the permissions for attaching a new child for the | |
455 | * given @child_class and @role. | |
456 | * | |
457 | * If @reopen_queue is non-NULL, don't return the currently needed | |
458 | * permissions, but those that will be needed after applying the | |
459 | * @reopen_queue. | |
460 | */ | |
c629b6d2 KW |
461 | void GRAPH_RDLOCK_PTR (*bdrv_child_perm)( |
462 | BlockDriverState *bs, BdrvChild *c, BdrvChildRole role, | |
463 | BlockReopenQueue *reopen_queue, | |
464 | uint64_t parent_perm, uint64_t parent_shared, | |
465 | uint64_t *nperm, uint64_t *nshared); | |
69c0bf11 EGE |
466 | |
467 | /** | |
468 | * Register/unregister a buffer for I/O. For example, when the driver is | |
469 | * interested to know the memory areas that will later be used in iovs, so | |
470 | * that it can do IOMMU mapping with VFIO etc., in order to get better | |
471 | * performance. In the case of VFIO drivers, this callback is used to do | |
472 | * DMA mapping for hot buffers. | |
f4ec04ba SH |
473 | * |
474 | * Returns: true on success, false on failure | |
69c0bf11 | 475 | */ |
d9249c25 KW |
476 | bool GRAPH_RDLOCK_PTR (*bdrv_register_buf)( |
477 | BlockDriverState *bs, void *host, size_t size, Error **errp); | |
478 | void GRAPH_RDLOCK_PTR (*bdrv_unregister_buf)( | |
479 | BlockDriverState *bs, void *host, size_t size); | |
69c0bf11 EGE |
480 | |
481 | /* | |
482 | * This field is modified only under the BQL, and is part of | |
483 | * the global state. | |
484 | */ | |
485 | QLIST_ENTRY(BlockDriver) list; | |
486 | ||
487 | /* | |
488 | * I/O API functions. These functions are thread-safe. | |
489 | * | |
490 | * See include/block/block-io.h for more information about | |
491 | * the I/O API. | |
492 | */ | |
493 | ||
494 | int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); | |
495 | ||
840428a2 EGE |
496 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_amend)( |
497 | BlockDriverState *bs, BlockdevAmendOptions *opts, bool force, | |
498 | Error **errp); | |
69c0bf11 | 499 | |
ebc2752b | 500 | /* aio */ |
7b1fb72e | 501 | BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_preadv)(BlockDriverState *bs, |
ebc2752b EGE |
502 | int64_t offset, int64_t bytes, QEMUIOVector *qiov, |
503 | BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); | |
7b1fb72e KW |
504 | |
505 | BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_pwritev)(BlockDriverState *bs, | |
ebc2752b EGE |
506 | int64_t offset, int64_t bytes, QEMUIOVector *qiov, |
507 | BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); | |
7b1fb72e | 508 | |
88095349 EGE |
509 | BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_flush)( |
510 | BlockDriverState *bs, BlockCompletionFunc *cb, void *opaque); | |
9a5a1c62 EGE |
511 | |
512 | BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_pdiscard)( | |
513 | BlockDriverState *bs, int64_t offset, int bytes, | |
ebc2752b EGE |
514 | BlockCompletionFunc *cb, void *opaque); |
515 | ||
7b1fb72e | 516 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_readv)(BlockDriverState *bs, |
ebc2752b EGE |
517 | int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); |
518 | ||
519 | /** | |
520 | * @offset: position in bytes to read at | |
521 | * @bytes: number of bytes to read | |
522 | * @qiov: the buffers to fill with read data | |
523 | * @flags: currently unused, always 0 | |
524 | * | |
525 | * @offset and @bytes will be a multiple of 'request_alignment', | |
526 | * but the length of individual @qiov elements does not have to | |
527 | * be a multiple. | |
528 | * | |
529 | * @bytes will always equal the total size of @qiov, and will be | |
530 | * no larger than 'max_transfer'. | |
531 | * | |
532 | * The buffer in @qiov may point directly to guest memory. | |
533 | */ | |
7b1fb72e | 534 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_preadv)(BlockDriverState *bs, |
ebc2752b EGE |
535 | int64_t offset, int64_t bytes, QEMUIOVector *qiov, |
536 | BdrvRequestFlags flags); | |
537 | ||
7b1fb72e KW |
538 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_preadv_part)( |
539 | BlockDriverState *bs, int64_t offset, int64_t bytes, | |
ebc2752b EGE |
540 | QEMUIOVector *qiov, size_t qiov_offset, |
541 | BdrvRequestFlags flags); | |
542 | ||
7b1fb72e | 543 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_writev)(BlockDriverState *bs, |
ebc2752b EGE |
544 | int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, |
545 | int flags); | |
546 | /** | |
547 | * @offset: position in bytes to write at | |
548 | * @bytes: number of bytes to write | |
549 | * @qiov: the buffers containing data to write | |
550 | * @flags: zero or more bits allowed by 'supported_write_flags' | |
551 | * | |
552 | * @offset and @bytes will be a multiple of 'request_alignment', | |
553 | * but the length of individual @qiov elements does not have to | |
554 | * be a multiple. | |
555 | * | |
556 | * @bytes will always equal the total size of @qiov, and will be | |
557 | * no larger than 'max_transfer'. | |
558 | * | |
559 | * The buffer in @qiov may point directly to guest memory. | |
560 | */ | |
7b1fb72e KW |
561 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwritev)( |
562 | BlockDriverState *bs, int64_t offset, int64_t bytes, QEMUIOVector *qiov, | |
ebc2752b | 563 | BdrvRequestFlags flags); |
7b1fb72e KW |
564 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwritev_part)( |
565 | BlockDriverState *bs, int64_t offset, int64_t bytes, QEMUIOVector *qiov, | |
566 | size_t qiov_offset, BdrvRequestFlags flags); | |
ebc2752b EGE |
567 | |
568 | /* | |
569 | * Efficiently zero a region of the disk image. Typically an image format | |
570 | * would use a compact metadata representation to implement this. This | |
571 | * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev() | |
572 | * will be called instead. | |
573 | */ | |
abaf8b75 KW |
574 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwrite_zeroes)( |
575 | BlockDriverState *bs, int64_t offset, int64_t bytes, | |
576 | BdrvRequestFlags flags); | |
9a5a1c62 EGE |
577 | |
578 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pdiscard)( | |
579 | BlockDriverState *bs, int64_t offset, int64_t bytes); | |
ebc2752b EGE |
580 | |
581 | /* | |
582 | * Map [offset, offset + nbytes) range onto a child of @bs to copy from, | |
583 | * and invoke bdrv_co_copy_range_from(child, ...), or invoke | |
584 | * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from. | |
585 | * | |
586 | * See the comment of bdrv_co_copy_range for the parameter and return value | |
587 | * semantics. | |
588 | */ | |
742bf09b EGE |
589 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_copy_range_from)( |
590 | BlockDriverState *bs, BdrvChild *src, int64_t offset, | |
591 | BdrvChild *dst, int64_t dst_offset, int64_t bytes, | |
592 | BdrvRequestFlags read_flags, BdrvRequestFlags write_flags); | |
ebc2752b EGE |
593 | |
594 | /* | |
595 | * Map [offset, offset + nbytes) range onto a child of bs to copy data to, | |
596 | * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy | |
597 | * operation if @bs is the leaf and @src has the same BlockDriver. Return | |
598 | * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver. | |
599 | * | |
600 | * See the comment of bdrv_co_copy_range for the parameter and return value | |
601 | * semantics. | |
602 | */ | |
742bf09b EGE |
603 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_copy_range_to)( |
604 | BlockDriverState *bs, BdrvChild *src, int64_t src_offset, | |
605 | BdrvChild *dst, int64_t dst_offset, int64_t bytes, | |
606 | BdrvRequestFlags read_flags, BdrvRequestFlags write_flags); | |
ebc2752b EGE |
607 | |
608 | /* | |
609 | * Building block for bdrv_block_status[_above] and | |
610 | * bdrv_is_allocated[_above]. The driver should answer only | |
611 | * according to the current layer, and should only need to set | |
612 | * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID, | |
613 | * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing | |
614 | * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See | |
615 | * block.h for the overall meaning of the bits. As a hint, the | |
616 | * flag want_zero is true if the caller cares more about precise | |
617 | * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for | |
618 | * overall allocation (favor larger *pnum, perhaps by reporting | |
619 | * _DATA instead of _ZERO). The block layer guarantees input | |
620 | * clamped to bdrv_getlength() and aligned to request_alignment, | |
621 | * as well as non-NULL pnum, map, and file; in turn, the driver | |
622 | * must return an error or set pnum to an aligned non-zero value. | |
623 | * | |
624 | * Note that @bytes is just a hint on how big of a region the | |
625 | * caller wants to inspect. It is not a limit on *pnum. | |
626 | * Implementations are free to return larger values of *pnum if | |
627 | * doing so does not incur a performance penalty. | |
628 | * | |
629 | * block/io.c's bdrv_co_block_status() will utilize an unclamped | |
630 | * *pnum value for the block-status cache on protocol nodes, prior | |
631 | * to clamping *pnum for return to its caller. | |
632 | */ | |
7ff9579e KW |
633 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_block_status)( |
634 | BlockDriverState *bs, | |
ebc2752b EGE |
635 | bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum, |
636 | int64_t *map, BlockDriverState **file); | |
637 | ||
ce14f3b4 VSO |
638 | /* |
639 | * Snapshot-access API. | |
640 | * | |
641 | * Block-driver may provide snapshot-access API: special functions to access | |
642 | * some internal "snapshot". The functions are similar with normal | |
643 | * read/block_status/discard handler, but don't have any specific handling | |
644 | * in generic block-layer: no serializing, no alignment, no tracked | |
645 | * requests. So, block-driver that realizes these APIs is fully responsible | |
646 | * for synchronization between snapshot-access API and normal IO requests. | |
1c14eaab VSO |
647 | * |
648 | * TODO: To be able to support qcow2's internal snapshots, this API will | |
649 | * need to be extended to: | |
650 | * - be able to select a specific snapshot | |
651 | * - receive the snapshot's actual length (which may differ from bs's | |
652 | * length) | |
ce14f3b4 | 653 | */ |
7b9e8b22 KW |
654 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_preadv_snapshot)( |
655 | BlockDriverState *bs, int64_t offset, int64_t bytes, | |
656 | QEMUIOVector *qiov, size_t qiov_offset); | |
657 | ||
658 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_snapshot_block_status)( | |
659 | BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes, | |
660 | int64_t *pnum, int64_t *map, BlockDriverState **file); | |
9a5a1c62 EGE |
661 | |
662 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pdiscard_snapshot)( | |
663 | BlockDriverState *bs, int64_t offset, int64_t bytes); | |
ce14f3b4 | 664 | |
ebc2752b EGE |
665 | /* |
666 | * Invalidate any cached meta-data. | |
667 | */ | |
1b3ff9fe KW |
668 | void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_invalidate_cache)( |
669 | BlockDriverState *bs, Error **errp); | |
ebc2752b EGE |
670 | |
671 | /* | |
672 | * Flushes all data for all layers by calling bdrv_co_flush for underlying | |
673 | * layers, if needed. This function is needed for deterministic | |
674 | * synchronization of the flush finishing callback. | |
675 | */ | |
88095349 | 676 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_flush)(BlockDriverState *bs); |
ebc2752b EGE |
677 | |
678 | /* Delete a created file. */ | |
48aef794 KW |
679 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_delete_file)( |
680 | BlockDriverState *bs, Error **errp); | |
ebc2752b EGE |
681 | |
682 | /* | |
683 | * Flushes all data that was already written to the OS all the way down to | |
684 | * the disk (for example file-posix.c calls fsync()). | |
685 | */ | |
88095349 EGE |
686 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_flush_to_disk)( |
687 | BlockDriverState *bs); | |
ebc2752b EGE |
688 | |
689 | /* | |
690 | * Flushes all internal caches to the OS. The data may still sit in a | |
691 | * writeback cache of the host OS, but it will survive a crash of the qemu | |
692 | * process. | |
693 | */ | |
88095349 EGE |
694 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_flush_to_os)( |
695 | BlockDriverState *bs); | |
ebc2752b | 696 | |
ebc2752b EGE |
697 | /* |
698 | * Truncate @bs to @offset bytes using the given @prealloc mode | |
699 | * when growing. Modes other than PREALLOC_MODE_OFF should be | |
700 | * rejected when shrinking @bs. | |
701 | * | |
702 | * If @exact is true, @bs must be resized to exactly @offset. | |
703 | * Otherwise, it is sufficient for @bs (if it is a host block | |
704 | * device and thus there is no way to resize it) to be at least | |
705 | * @offset bytes in length. | |
706 | * | |
707 | * If @exact is true and this function fails but would succeed | |
708 | * with @exact = false, it should return -ENOTSUP. | |
709 | */ | |
c2b8e315 KW |
710 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_truncate)( |
711 | BlockDriverState *bs, int64_t offset, bool exact, | |
712 | PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); | |
713 | ||
8ab8140a KW |
714 | int64_t coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_getlength)( |
715 | BlockDriverState *bs); | |
716 | ||
de335638 | 717 | int64_t coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_get_allocated_file_size)( |
82618d7b EGE |
718 | BlockDriverState *bs); |
719 | ||
ebc2752b EGE |
720 | BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs, |
721 | Error **errp); | |
722 | ||
7b1fb72e KW |
723 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwritev_compressed)( |
724 | BlockDriverState *bs, int64_t offset, int64_t bytes, | |
725 | QEMUIOVector *qiov); | |
726 | ||
727 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwritev_compressed_part)( | |
728 | BlockDriverState *bs, int64_t offset, int64_t bytes, | |
729 | QEMUIOVector *qiov, size_t qiov_offset); | |
ebc2752b | 730 | |
a00e70c0 EGE |
731 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_get_info)( |
732 | BlockDriverState *bs, BlockDriverInfo *bdi); | |
ebc2752b | 733 | |
3574499a KW |
734 | ImageInfoSpecific * GRAPH_RDLOCK_PTR (*bdrv_get_specific_info)( |
735 | BlockDriverState *bs, Error **errp); | |
ebc2752b EGE |
736 | BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs); |
737 | ||
ca5e2ad9 | 738 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_save_vmstate)( |
1b3ff9fe KW |
739 | BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); |
740 | ||
ca5e2ad9 | 741 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_load_vmstate)( |
1b3ff9fe | 742 | BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); |
ebc2752b | 743 | |
6d43eaa3 SL |
744 | int coroutine_fn (*bdrv_co_zone_report)(BlockDriverState *bs, |
745 | int64_t offset, unsigned int *nr_zones, | |
746 | BlockZoneDescriptor *zones); | |
747 | int coroutine_fn (*bdrv_co_zone_mgmt)(BlockDriverState *bs, BlockZoneOp op, | |
748 | int64_t offset, int64_t len); | |
4751d09a SL |
749 | int coroutine_fn (*bdrv_co_zone_append)(BlockDriverState *bs, |
750 | int64_t *offset, QEMUIOVector *qiov, | |
751 | BdrvRequestFlags flags); | |
6d43eaa3 | 752 | |
ebc2752b | 753 | /* removable device specific */ |
c73ff92c EGE |
754 | bool coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_is_inserted)( |
755 | BlockDriverState *bs); | |
79a292e5 KW |
756 | void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_eject)( |
757 | BlockDriverState *bs, bool eject_flag); | |
758 | void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_lock_medium)( | |
759 | BlockDriverState *bs, bool locked); | |
ebc2752b EGE |
760 | |
761 | /* to control generic scsi devices */ | |
26c518ab KW |
762 | BlockAIOCB *coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_aio_ioctl)( |
763 | BlockDriverState *bs, unsigned long int req, void *buf, | |
ebc2752b | 764 | BlockCompletionFunc *cb, void *opaque); |
26c518ab KW |
765 | |
766 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_ioctl)( | |
767 | BlockDriverState *bs, unsigned long int req, void *buf); | |
ebc2752b | 768 | |
ebc2752b EGE |
769 | /* |
770 | * Returns 0 for completed check, -errno for internal errors. | |
771 | * The check results are stored in result. | |
772 | */ | |
1b3ff9fe KW |
773 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_check)( |
774 | BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix); | |
ebc2752b | 775 | |
cb2bfaa4 EGE |
776 | void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_debug_event)( |
777 | BlockDriverState *bs, BlkdebugEvent event); | |
ebc2752b | 778 | |
ebc2752b | 779 | bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); |
167f748d KW |
780 | |
781 | bool coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_can_store_new_dirty_bitmap)( | |
c2d76808 AF |
782 | BlockDriverState *bs, const char *name, uint32_t granularity, |
783 | Error **errp); | |
167f748d KW |
784 | |
785 | int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_remove_persistent_dirty_bitmap)( | |
c2d76808 | 786 | BlockDriverState *bs, const char *name, Error **errp); |
ebc2752b EGE |
787 | }; |
788 | ||
7b1fb72e | 789 | static inline bool TSA_NO_TSA block_driver_can_compress(BlockDriver *drv) |
ebc2752b EGE |
790 | { |
791 | return drv->bdrv_co_pwritev_compressed || | |
792 | drv->bdrv_co_pwritev_compressed_part; | |
793 | } | |
794 | ||
795 | typedef struct BlockLimits { | |
796 | /* | |
797 | * Alignment requirement, in bytes, for offset/length of I/O | |
798 | * requests. Must be a power of 2 less than INT_MAX; defaults to | |
799 | * 1 for drivers with modern byte interfaces, and to 512 | |
800 | * otherwise. | |
801 | */ | |
802 | uint32_t request_alignment; | |
803 | ||
804 | /* | |
805 | * Maximum number of bytes that can be discarded at once. Must be multiple | |
806 | * of pdiscard_alignment, but need not be power of 2. May be 0 if no | |
807 | * inherent 64-bit limit. | |
808 | */ | |
809 | int64_t max_pdiscard; | |
810 | ||
811 | /* | |
812 | * Optimal alignment for discard requests in bytes. A power of 2 | |
813 | * is best but not mandatory. Must be a multiple of | |
814 | * bl.request_alignment, and must be less than max_pdiscard if | |
815 | * that is set. May be 0 if bl.request_alignment is good enough | |
816 | */ | |
817 | uint32_t pdiscard_alignment; | |
818 | ||
819 | /* | |
820 | * Maximum number of bytes that can zeroized at once. Must be multiple of | |
821 | * pwrite_zeroes_alignment. 0 means no limit. | |
822 | */ | |
823 | int64_t max_pwrite_zeroes; | |
824 | ||
825 | /* | |
826 | * Optimal alignment for write zeroes requests in bytes. A power | |
827 | * of 2 is best but not mandatory. Must be a multiple of | |
828 | * bl.request_alignment, and must be less than max_pwrite_zeroes | |
829 | * if that is set. May be 0 if bl.request_alignment is good | |
830 | * enough | |
831 | */ | |
832 | uint32_t pwrite_zeroes_alignment; | |
833 | ||
834 | /* | |
835 | * Optimal transfer length in bytes. A power of 2 is best but not | |
836 | * mandatory. Must be a multiple of bl.request_alignment, or 0 if | |
837 | * no preferred size | |
838 | */ | |
839 | uint32_t opt_transfer; | |
840 | ||
841 | /* | |
842 | * Maximal transfer length in bytes. Need not be power of 2, but | |
843 | * must be multiple of opt_transfer and bl.request_alignment, or 0 | |
844 | * for no 32-bit limit. For now, anything larger than INT_MAX is | |
845 | * clamped down. | |
846 | */ | |
847 | uint32_t max_transfer; | |
848 | ||
849 | /* | |
850 | * Maximal hardware transfer length in bytes. Applies whenever | |
851 | * transfers to the device bypass the kernel I/O scheduler, for | |
852 | * example with SG_IO. If larger than max_transfer or if zero, | |
853 | * blk_get_max_hw_transfer will fall back to max_transfer. | |
854 | */ | |
855 | uint64_t max_hw_transfer; | |
856 | ||
857 | /* | |
858 | * Maximal number of scatter/gather elements allowed by the hardware. | |
859 | * Applies whenever transfers to the device bypass the kernel I/O | |
860 | * scheduler, for example with SG_IO. If larger than max_iov | |
861 | * or if zero, blk_get_max_hw_iov will fall back to max_iov. | |
862 | */ | |
863 | int max_hw_iov; | |
864 | ||
865 | ||
866 | /* memory alignment, in bytes so that no bounce buffer is needed */ | |
867 | size_t min_mem_alignment; | |
868 | ||
869 | /* memory alignment, in bytes, for bounce buffer */ | |
870 | size_t opt_mem_alignment; | |
871 | ||
872 | /* maximum number of iovec elements */ | |
873 | int max_iov; | |
160a29e2 PB |
874 | |
875 | /* | |
876 | * true if the length of the underlying file can change, and QEMU | |
877 | * is expected to adjust automatically. Mostly for CD-ROM drives, | |
878 | * whose length is zero when the tray is empty (they don't need | |
879 | * an explicit monitor command to load the disk inside the guest). | |
880 | */ | |
881 | bool has_variable_length; | |
a735b56e SL |
882 | |
883 | /* device zone model */ | |
884 | BlockZoneModel zoned; | |
6d43eaa3 SL |
885 | |
886 | /* zone size expressed in bytes */ | |
887 | uint32_t zone_size; | |
888 | ||
889 | /* total number of zones */ | |
890 | uint32_t nr_zones; | |
891 | ||
892 | /* maximum sectors of a zone append write operation */ | |
893 | uint32_t max_append_sectors; | |
894 | ||
895 | /* maximum number of open zones */ | |
896 | uint32_t max_open_zones; | |
897 | ||
898 | /* maximum number of active zones */ | |
899 | uint32_t max_active_zones; | |
a3c41f06 SL |
900 | |
901 | uint32_t write_granularity; | |
ebc2752b EGE |
902 | } BlockLimits; |
903 | ||
904 | typedef struct BdrvOpBlocker BdrvOpBlocker; | |
905 | ||
906 | typedef struct BdrvAioNotifier { | |
907 | void (*attached_aio_context)(AioContext *new_context, void *opaque); | |
908 | void (*detach_aio_context)(void *opaque); | |
909 | ||
910 | void *opaque; | |
911 | bool deleted; | |
912 | ||
913 | QLIST_ENTRY(BdrvAioNotifier) list; | |
914 | } BdrvAioNotifier; | |
915 | ||
916 | struct BdrvChildClass { | |
917 | /* | |
918 | * If true, bdrv_replace_node() doesn't change the node this BdrvChild | |
919 | * points to. | |
920 | */ | |
921 | bool stay_at_node; | |
922 | ||
923 | /* | |
924 | * If true, the parent is a BlockDriverState and bdrv_next_all_states() | |
925 | * will return it. This information is used for drain_all, where every node | |
926 | * will be drained separately, so the drain only needs to be propagated to | |
927 | * non-BDS parents. | |
928 | */ | |
929 | bool parent_is_bds; | |
930 | ||
abc5a79c EGE |
931 | /* |
932 | * Global state (GS) API. These functions run under the BQL. | |
933 | * | |
934 | * See include/block/block-global-state.h for more information about | |
935 | * the GS API. | |
936 | */ | |
ebc2752b EGE |
937 | void (*inherit_options)(BdrvChildRole role, bool parent_is_format, |
938 | int *child_flags, QDict *child_options, | |
939 | int parent_flags, QDict *parent_options); | |
ebc2752b | 940 | void (*change_media)(BdrvChild *child, bool load); |
ebc2752b EGE |
941 | |
942 | /* | |
943 | * Returns a malloced string that describes the parent of the child for a | |
944 | * human reader. This could be a node-name, BlockBackend name, qdev ID or | |
945 | * QOM path of the device owning the BlockBackend, job type and ID etc. The | |
946 | * caller is responsible for freeing the memory. | |
947 | */ | |
948 | char *(*get_parent_desc)(BdrvChild *child); | |
949 | ||
abc5a79c EGE |
950 | /* |
951 | * Notifies the parent that the child has been activated/inactivated (e.g. | |
952 | * when migration is completing) and it can start/stop requesting | |
953 | * permissions and doing I/O on it. | |
954 | */ | |
3804e3cf KW |
955 | void GRAPH_RDLOCK_PTR (*activate)(BdrvChild *child, Error **errp); |
956 | int GRAPH_RDLOCK_PTR (*inactivate)(BdrvChild *child); | |
abc5a79c | 957 | |
303de47b KW |
958 | void GRAPH_WRLOCK_PTR (*attach)(BdrvChild *child); |
959 | void GRAPH_WRLOCK_PTR (*detach)(BdrvChild *child); | |
abc5a79c | 960 | |
ab613350 SH |
961 | /* |
962 | * If this pair of functions is implemented, the parent doesn't issue new | |
963 | * requests after returning from .drained_begin() until .drained_end() is | |
964 | * called. | |
965 | * | |
966 | * These functions must not change the graph (and therefore also must not | |
967 | * call aio_poll(), which could change the graph indirectly). | |
968 | * | |
969 | * Note that this can be nested. If drained_begin() was called twice, new | |
970 | * I/O is allowed only after drained_end() was called twice, too. | |
971 | */ | |
d05ab380 EGE |
972 | void GRAPH_RDLOCK_PTR (*drained_begin)(BdrvChild *child); |
973 | void GRAPH_RDLOCK_PTR (*drained_end)(BdrvChild *child); | |
ab613350 SH |
974 | |
975 | /* | |
976 | * Returns whether the parent has pending requests for the child. This | |
977 | * callback is polled after .drained_begin() has been called until all | |
978 | * activity on the child has stopped. | |
979 | */ | |
d05ab380 | 980 | bool GRAPH_RDLOCK_PTR (*drained_poll)(BdrvChild *child); |
ab613350 | 981 | |
abc5a79c EGE |
982 | /* |
983 | * Notifies the parent that the filename of its child has changed (e.g. | |
984 | * because the direct child was removed from the backing chain), so that it | |
985 | * can update its reference. | |
986 | */ | |
987 | int (*update_filename)(BdrvChild *child, BlockDriverState *new_base, | |
4b028cbe PK |
988 | const char *filename, |
989 | bool backing_mask_protocol, | |
990 | Error **errp); | |
abc5a79c | 991 | |
7e8c182f | 992 | bool (*change_aio_ctx)(BdrvChild *child, AioContext *ctx, |
e08cc001 EGE |
993 | GHashTable *visited, Transaction *tran, |
994 | Error **errp); | |
abc5a79c | 995 | |
abc5a79c EGE |
996 | /* |
997 | * I/O API functions. These functions are thread-safe. | |
998 | * | |
999 | * See include/block/block-io.h for more information about | |
1000 | * the I/O API. | |
1001 | */ | |
1002 | ||
1003 | void (*resize)(BdrvChild *child); | |
1004 | ||
1005 | /* | |
1006 | * Returns a name that is supposedly more useful for human users than the | |
1007 | * node name for identifying the node in question (in particular, a BB | |
1008 | * name), or NULL if the parent can't provide a better name. | |
1009 | */ | |
1010 | const char *(*get_name)(BdrvChild *child); | |
1011 | ||
d5f8d79c | 1012 | AioContext *(*get_parent_aio_context)(BdrvChild *child); |
ebc2752b EGE |
1013 | }; |
1014 | ||
1015 | extern const BdrvChildClass child_of_bds; | |
1016 | ||
1017 | struct BdrvChild { | |
1018 | BlockDriverState *bs; | |
1019 | char *name; | |
1020 | const BdrvChildClass *klass; | |
1021 | BdrvChildRole role; | |
1022 | void *opaque; | |
1023 | ||
1024 | /** | |
1025 | * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask) | |
1026 | */ | |
1027 | uint64_t perm; | |
1028 | ||
1029 | /** | |
1030 | * Permissions that can still be granted to other users of @bs while this | |
1031 | * BdrvChild is still attached to it. (BLK_PERM_* bitmask) | |
1032 | */ | |
1033 | uint64_t shared_perm; | |
1034 | ||
1035 | /* | |
1036 | * This link is frozen: the child can neither be replaced nor | |
1037 | * detached from the parent. | |
1038 | */ | |
1039 | bool frozen; | |
1040 | ||
1041 | /* | |
57e05be3 | 1042 | * True if the parent of this child has been drained by this BdrvChild |
ebc2752b | 1043 | * (through klass->drained_*). |
57e05be3 KW |
1044 | * |
1045 | * It is generally true if bs->quiesce_counter > 0. It may differ while the | |
ebc2752b EGE |
1046 | * child is entering or leaving a drained section. |
1047 | */ | |
57e05be3 | 1048 | bool quiesced_parent; |
ebc2752b | 1049 | |
680e0cc4 | 1050 | QLIST_ENTRY(BdrvChild GRAPH_RDLOCK_PTR) next; |
b59b4660 | 1051 | QLIST_ENTRY(BdrvChild GRAPH_RDLOCK_PTR) next_parent; |
ebc2752b EGE |
1052 | }; |
1053 | ||
1054 | /* | |
1055 | * Allows bdrv_co_block_status() to cache one data region for a | |
1056 | * protocol node. | |
1057 | * | |
1058 | * @valid: Whether the cache is valid (should be accessed with atomic | |
1059 | * functions so this can be reset by RCU readers) | |
1060 | * @data_start: Offset where we know (or strongly assume) is data | |
1061 | * @data_end: Offset where the data region ends (which is not necessarily | |
1062 | * the start of a zeroed region) | |
1063 | */ | |
1064 | typedef struct BdrvBlockStatusCache { | |
1065 | struct rcu_head rcu; | |
1066 | ||
1067 | bool valid; | |
1068 | int64_t data_start; | |
1069 | int64_t data_end; | |
1070 | } BdrvBlockStatusCache; | |
1071 | ||
1072 | struct BlockDriverState { | |
1073 | /* | |
1074 | * Protected by big QEMU lock or read-only after opening. No special | |
1075 | * locking needed during I/O... | |
1076 | */ | |
1077 | int open_flags; /* flags used to open the file, re-used for re-open */ | |
1078 | bool encrypted; /* if true, the media is encrypted */ | |
1079 | bool sg; /* if true, the device is a /dev/sg* */ | |
1080 | bool probed; /* if true, format was probed rather than specified */ | |
1081 | bool force_share; /* if true, always allow all shared permissions */ | |
1082 | bool implicit; /* if true, this filter node was automatically inserted */ | |
1083 | ||
1084 | BlockDriver *drv; /* NULL means no media */ | |
1085 | void *opaque; | |
1086 | ||
1087 | AioContext *aio_context; /* event loop used for fd handlers, timers, etc */ | |
1088 | /* | |
1089 | * long-running tasks intended to always use the same AioContext as this | |
1090 | * BDS may register themselves in this list to be notified of changes | |
1091 | * regarding this BDS's context | |
1092 | */ | |
1093 | QLIST_HEAD(, BdrvAioNotifier) aio_notifiers; | |
1094 | bool walking_aio_notifiers; /* to make removal during iteration safe */ | |
1095 | ||
1096 | char filename[PATH_MAX]; | |
1097 | /* | |
1098 | * If not empty, this image is a diff in relation to backing_file. | |
1099 | * Note that this is the name given in the image header and | |
1100 | * therefore may or may not be equal to .backing->bs->filename. | |
1101 | * If this field contains a relative path, it is to be resolved | |
1102 | * relatively to the overlay's location. | |
1103 | */ | |
1104 | char backing_file[PATH_MAX]; | |
1105 | /* | |
1106 | * The backing filename indicated by the image header. Contrary | |
1107 | * to backing_file, if we ever open this file, auto_backing_file | |
1108 | * is replaced by the resulting BDS's filename (i.e. after a | |
1109 | * bdrv_refresh_filename() run). | |
1110 | */ | |
1111 | char auto_backing_file[PATH_MAX]; | |
1112 | char backing_format[16]; /* if non-zero and backing_file exists */ | |
1113 | ||
1114 | QDict *full_open_options; | |
1115 | char exact_filename[PATH_MAX]; | |
1116 | ||
ebc2752b EGE |
1117 | /* I/O Limits */ |
1118 | BlockLimits bl; | |
1119 | ||
1120 | /* | |
1121 | * Flags honored during pread | |
1122 | */ | |
98b3ddc7 | 1123 | BdrvRequestFlags supported_read_flags; |
ebc2752b EGE |
1124 | /* |
1125 | * Flags honored during pwrite (so far: BDRV_REQ_FUA, | |
1126 | * BDRV_REQ_WRITE_UNCHANGED). | |
1127 | * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those | |
1128 | * writes will be issued as normal writes without the flag set. | |
1129 | * This is important to note for drivers that do not explicitly | |
1130 | * request a WRITE permission for their children and instead take | |
1131 | * the same permissions as their parent did (this is commonly what | |
1132 | * block filters do). Such drivers have to be aware that the | |
1133 | * parent may have taken a WRITE_UNCHANGED permission only and is | |
1134 | * issuing such requests. Drivers either must make sure that | |
1135 | * these requests do not result in plain WRITE accesses (usually | |
1136 | * by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding | |
1137 | * every incoming write request as-is, including potentially that | |
1138 | * flag), or they have to explicitly take the WRITE permission for | |
1139 | * their children. | |
1140 | */ | |
98b3ddc7 | 1141 | BdrvRequestFlags supported_write_flags; |
ebc2752b EGE |
1142 | /* |
1143 | * Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA, | |
1144 | * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) | |
1145 | */ | |
98b3ddc7 | 1146 | BdrvRequestFlags supported_zero_flags; |
ebc2752b EGE |
1147 | /* |
1148 | * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE). | |
1149 | * | |
1150 | * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure | |
1151 | * that any added space reads as all zeros. If this can't be guaranteed, | |
1152 | * the operation must fail. | |
1153 | */ | |
98b3ddc7 | 1154 | BdrvRequestFlags supported_truncate_flags; |
ebc2752b EGE |
1155 | |
1156 | /* the following member gives a name to every node on the bs graph. */ | |
1157 | char node_name[32]; | |
1158 | /* element of the list of named nodes building the graph */ | |
1159 | QTAILQ_ENTRY(BlockDriverState) node_list; | |
1160 | /* element of the list of all BlockDriverStates (all_bdrv_states) */ | |
1161 | QTAILQ_ENTRY(BlockDriverState) bs_list; | |
1162 | /* element of the list of monitor-owned BDS */ | |
1163 | QTAILQ_ENTRY(BlockDriverState) monitor_list; | |
1164 | int refcnt; | |
1165 | ||
1166 | /* operation blockers. Protected by BQL. */ | |
1167 | QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX]; | |
1168 | ||
1169 | /* | |
1170 | * The node that this node inherited default options from (and a reopen on | |
1171 | * which can affect this node by changing these defaults). This is always a | |
1172 | * parent node of this node. | |
1173 | */ | |
1174 | BlockDriverState *inherits_from; | |
5bb04747 VSO |
1175 | |
1176 | /* | |
1177 | * @backing and @file are some of @children or NULL. All these three fields | |
1178 | * (@file, @backing and @children) are modified only in | |
1179 | * bdrv_child_cb_attach() and bdrv_child_cb_detach(). | |
1180 | * | |
1181 | * See also comment in include/block/block.h, to learn how backing and file | |
1182 | * are connected with BdrvChildRole. | |
1183 | */ | |
680e0cc4 | 1184 | QLIST_HEAD(, BdrvChild GRAPH_RDLOCK_PTR) children; |
004915a9 | 1185 | BdrvChild * GRAPH_RDLOCK_PTR backing; |
1f051dcb | 1186 | BdrvChild * GRAPH_RDLOCK_PTR file; |
5bb04747 | 1187 | |
b59b4660 | 1188 | QLIST_HEAD(, BdrvChild GRAPH_RDLOCK_PTR) parents; |
ebc2752b EGE |
1189 | |
1190 | QDict *options; | |
1191 | QDict *explicit_options; | |
1192 | BlockdevDetectZeroesOptions detect_zeroes; | |
1193 | ||
1194 | /* The error object in use for blocking operations on backing_hd */ | |
1195 | Error *backing_blocker; | |
1196 | ||
ebc2752b EGE |
1197 | /* |
1198 | * If we are reading a disk image, give its size in sectors. | |
1199 | * Generally read-only; it is written to by load_snapshot and | |
1200 | * save_snaphost, but the block layer is quiescent during those. | |
1201 | */ | |
1202 | int64_t total_sectors; | |
1203 | ||
1204 | /* threshold limit for writes, in bytes. "High water mark". */ | |
1205 | uint64_t write_threshold_offset; | |
1206 | ||
1207 | /* | |
1208 | * Writing to the list requires the BQL _and_ the dirty_bitmap_mutex. | |
1209 | * Reading from the list can be done with either the BQL or the | |
1210 | * dirty_bitmap_mutex. Modifying a bitmap only requires | |
1211 | * dirty_bitmap_mutex. | |
1212 | */ | |
1213 | QemuMutex dirty_bitmap_mutex; | |
1214 | QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps; | |
1215 | ||
1216 | /* Offset after the highest byte written to */ | |
1217 | Stat64 wr_highest_offset; | |
1218 | ||
1219 | /* | |
1220 | * If true, copy read backing sectors into image. Can be >1 if more | |
1221 | * than one client has requested copy-on-read. Accessed with atomic | |
1222 | * ops. | |
1223 | */ | |
1224 | int copy_on_read; | |
1225 | ||
1226 | /* | |
1227 | * number of in-flight requests; overall and serialising. | |
1228 | * Accessed with atomic ops. | |
1229 | */ | |
1230 | unsigned int in_flight; | |
1231 | unsigned int serialising_in_flight; | |
1232 | ||
ebc2752b EGE |
1233 | /* do we need to tell the quest if we have a volatile write cache? */ |
1234 | int enable_write_cache; | |
1235 | ||
1236 | /* Accessed with atomic ops. */ | |
1237 | int quiesce_counter; | |
ebc2752b EGE |
1238 | |
1239 | unsigned int write_gen; /* Current data generation */ | |
1240 | ||
1241 | /* Protected by reqs_lock. */ | |
fa9185fc | 1242 | QemuMutex reqs_lock; |
ebc2752b EGE |
1243 | QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; |
1244 | CoQueue flush_queue; /* Serializing flush queue */ | |
1245 | bool active_flush_req; /* Flush request in flight? */ | |
1246 | ||
1247 | /* Only read/written by whoever has set active_flush_req to true. */ | |
1248 | unsigned int flushed_gen; /* Flushed write generation */ | |
1249 | ||
1250 | /* BdrvChild links to this node may never be frozen */ | |
1251 | bool never_freeze; | |
1252 | ||
1253 | /* Lock for block-status cache RCU writers */ | |
1254 | CoMutex bsc_modify_lock; | |
1255 | /* Always non-NULL, but must only be dereferenced under an RCU read guard */ | |
1256 | BdrvBlockStatusCache *block_status_cache; | |
a3c41f06 SL |
1257 | |
1258 | /* array of write pointers' location of each zone in the zoned device. */ | |
1259 | BlockZoneWps *wps; | |
ebc2752b EGE |
1260 | }; |
1261 | ||
1262 | struct BlockBackendRootState { | |
1263 | int open_flags; | |
1264 | BlockdevDetectZeroesOptions detect_zeroes; | |
1265 | }; | |
1266 | ||
1267 | typedef enum BlockMirrorBackingMode { | |
1268 | /* | |
1269 | * Reuse the existing backing chain from the source for the target. | |
1270 | * - sync=full: Set backing BDS to NULL. | |
1271 | * - sync=top: Use source's backing BDS. | |
1272 | * - sync=none: Use source as the backing BDS. | |
1273 | */ | |
1274 | MIRROR_SOURCE_BACKING_CHAIN, | |
1275 | ||
1276 | /* Open the target's backing chain completely anew */ | |
1277 | MIRROR_OPEN_BACKING_CHAIN, | |
1278 | ||
1279 | /* Do not change the target's backing BDS after job completion */ | |
1280 | MIRROR_LEAVE_BACKING_CHAIN, | |
1281 | } BlockMirrorBackingMode; | |
1282 | ||
1283 | ||
1284 | /* | |
1285 | * Essential block drivers which must always be statically linked into qemu, and | |
1286 | * which therefore can be accessed without using bdrv_find_format() | |
1287 | */ | |
1288 | extern BlockDriver bdrv_file; | |
1289 | extern BlockDriver bdrv_raw; | |
1290 | extern BlockDriver bdrv_qcow2; | |
1291 | ||
1292 | extern unsigned int bdrv_drain_all_count; | |
1293 | extern QemuOptsList bdrv_create_opts_simple; | |
1294 | ||
1295 | /* | |
1296 | * Common functions that are neither I/O nor Global State. | |
1297 | * | |
04ae220d | 1298 | * See include/block/block-common.h for more information about |
ebc2752b EGE |
1299 | * the Common API. |
1300 | */ | |
1301 | ||
1302 | static inline BlockDriverState *child_bs(BdrvChild *child) | |
1303 | { | |
1304 | return child ? child->bs : NULL; | |
1305 | } | |
1306 | ||
1307 | int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp); | |
69fbfff9 | 1308 | char *create_tmp_file(Error **errp); |
ebc2752b EGE |
1309 | void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, |
1310 | QDict *options); | |
1311 | ||
1312 | ||
1313 | int bdrv_check_qiov_request(int64_t offset, int64_t bytes, | |
1314 | QEMUIOVector *qiov, size_t qiov_offset, | |
1315 | Error **errp); | |
1316 | ||
1317 | #ifdef _WIN32 | |
1318 | int is_windows_drive(const char *filename); | |
1319 | #endif | |
1320 | ||
1321 | #endif /* BLOCK_INT_COMMON_H */ |