]> git.proxmox.com Git - mirror_qemu.git/blame - include/block/block-common.h
block-coroutine-wrapper.py: introduce annotations that take the graph rdlock
[mirror_qemu.git] / include / block / block-common.h
CommitLineData
3b491a90
EGE
1/*
2 * QEMU System Emulator block driver
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24#ifndef BLOCK_COMMON_H
25#define BLOCK_COMMON_H
26
27#include "block/aio.h"
28#include "block/aio-wait.h"
29#include "qemu/iov.h"
30#include "qemu/coroutine.h"
31#include "block/accounting.h"
3b491a90
EGE
32#include "qemu/hbitmap.h"
33#include "qemu/transactions.h"
34
35/*
1bd54201 36 * co_wrapper{*}: Function specifiers used by block-coroutine-wrapper.py
3b491a90 37 *
1bd54201 38 * Function specifiers, which do nothing but mark functions to be
3b491a90
EGE
39 * generated by scripts/block-coroutine-wrapper.py
40 *
1bd54201
EGE
41 * Usage: read docs/devel/block-coroutine-wrapper.rst
42 *
e6d3f7a6 43 * There are 4 kind of specifiers:
76a2f554
EGE
44 * - co_wrapper functions can be called by only non-coroutine context, because
45 * they always generate a new coroutine.
46 * - co_wrapper_mixed functions can be called by both coroutine and
47 * non-coroutine context.
e6d3f7a6
EGE
48 * - co_wrapper_bdrv_rdlock are co_wrapper functions but automatically take and
49 * release the graph rdlock when creating a new coroutine
50 * - co_wrapper_mixed_bdrv_rdlock are co_wrapper_mixed functions but
51 * automatically take and release the graph rdlock when creating a new
52 * coroutine.
3b491a90 53 */
76a2f554 54#define co_wrapper
1bd54201 55#define co_wrapper_mixed
e6d3f7a6
EGE
56#define co_wrapper_bdrv_rdlock
57#define co_wrapper_mixed_bdrv_rdlock
3b491a90 58
0508d0be
EGE
59#include "block/dirty-bitmap.h"
60#include "block/blockjob.h"
61
3b491a90
EGE
62/* block.c */
63typedef struct BlockDriver BlockDriver;
64typedef struct BdrvChild BdrvChild;
65typedef struct BdrvChildClass BdrvChildClass;
66
67typedef struct BlockDriverInfo {
68 /* in bytes, 0 if irrelevant */
69 int cluster_size;
70 /* offset at which the VM state can be saved (0 if not possible) */
71 int64_t vm_state_offset;
72 bool is_dirty;
73 /*
74 * True if this block driver only supports compressed writes
75 */
76 bool needs_compressed_writes;
77} BlockDriverInfo;
78
79typedef struct BlockFragInfo {
80 uint64_t allocated_clusters;
81 uint64_t total_clusters;
82 uint64_t fragmented_clusters;
83 uint64_t compressed_clusters;
84} BlockFragInfo;
85
86typedef enum {
87 BDRV_REQ_COPY_ON_READ = 0x1,
88 BDRV_REQ_ZERO_WRITE = 0x2,
89
90 /*
91 * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate
92 * that the block driver should unmap (discard) blocks if it is guaranteed
93 * that the result will read back as zeroes. The flag is only passed to the
94 * driver if the block device is opened with BDRV_O_UNMAP.
95 */
96 BDRV_REQ_MAY_UNMAP = 0x4,
97
e8b65355
SH
98 /*
99 * An optimization hint when all QEMUIOVector elements are within
100 * previously registered bdrv_register_buf() memory ranges.
101 *
102 * Code that replaces the user's QEMUIOVector elements with bounce buffers
103 * must take care to clear this flag.
104 */
105 BDRV_REQ_REGISTERED_BUF = 0x8,
106
3b491a90
EGE
107 BDRV_REQ_FUA = 0x10,
108 BDRV_REQ_WRITE_COMPRESSED = 0x20,
109
110 /*
111 * Signifies that this write request will not change the visible disk
112 * content.
113 */
114 BDRV_REQ_WRITE_UNCHANGED = 0x40,
115
116 /*
117 * Forces request serialisation. Use only with write requests.
118 */
119 BDRV_REQ_SERIALISING = 0x80,
120
121 /*
122 * Execute the request only if the operation can be offloaded or otherwise
123 * be executed efficiently, but return an error instead of using a slow
124 * fallback.
125 */
126 BDRV_REQ_NO_FALLBACK = 0x100,
127
128 /*
129 * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
130 * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR
131 * filter is involved), in which case it signals that the COR operation
132 * need not read the data into memory (qiov) but only ensure they are
133 * copied to the top layer (i.e., that COR operation is done).
134 */
135 BDRV_REQ_PREFETCH = 0x200,
136
137 /*
138 * If we need to wait for other requests, just fail immediately. Used
45e62b46
VSO
139 * only together with BDRV_REQ_SERIALISING. Used only with requests aligned
140 * to request_alignment (corresponding assertions are in block/io.c).
3b491a90
EGE
141 */
142 BDRV_REQ_NO_WAIT = 0x400,
143
144 /* Mask of valid flags */
145 BDRV_REQ_MASK = 0x7ff,
146} BdrvRequestFlags;
147
148#define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */
149#define BDRV_O_RDWR 0x0002
150#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */
151#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save
152 writes in a snapshot */
153#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */
154#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
155#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the
156 thread pool */
157#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
158#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
159#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
160#define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */
161#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
162#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
163#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */
164#define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given:
165 select an appropriate protocol driver,
166 ignoring the format layer */
167#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */
168#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening
169 read-write fails */
170#define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */
171
172#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
173
174
175/* Option names of options parsed by the block layer */
176
177#define BDRV_OPT_CACHE_WB "cache.writeback"
178#define BDRV_OPT_CACHE_DIRECT "cache.direct"
179#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
180#define BDRV_OPT_READ_ONLY "read-only"
181#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only"
182#define BDRV_OPT_DISCARD "discard"
183#define BDRV_OPT_FORCE_SHARE "force-share"
184
185
186#define BDRV_SECTOR_BITS 9
187#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
188
189#define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \
190 INT_MAX >> BDRV_SECTOR_BITS)
191#define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS)
192
193/*
194 * We want allow aligning requests and disk length up to any 32bit alignment
195 * and don't afraid of overflow.
196 * To achieve it, and in the same time use some pretty number as maximum disk
197 * size, let's define maximum "length" (a limit for any offset/bytes request and
198 * for disk size) to be the greatest power of 2 less than INT64_MAX.
199 */
200#define BDRV_MAX_ALIGNMENT (1L << 30)
201#define BDRV_MAX_LENGTH (QEMU_ALIGN_DOWN(INT64_MAX, BDRV_MAX_ALIGNMENT))
202
203/*
204 * Allocation status flags for bdrv_block_status() and friends.
205 *
206 * Public flags:
207 * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer
208 * BDRV_BLOCK_ZERO: offset reads as zero
209 * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
210 * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
211 * layer rather than any backing, set by block layer
212 * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
213 * layer, set by block layer
214 *
215 * Internal flags:
216 * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
217 * that the block layer recompute the answer from the returned
218 * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
219 * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for
220 * zeroes in file child of current block node inside
221 * returned region. Only valid together with both
222 * BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not
223 * appear with BDRV_BLOCK_ZERO.
224 *
225 * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
226 * host offset within the returned BDS that is allocated for the
227 * corresponding raw guest data. However, whether that offset
228 * actually contains data also depends on BDRV_BLOCK_DATA, as follows:
229 *
230 * DATA ZERO OFFSET_VALID
231 * t t t sectors read as zero, returned file is zero at offset
232 * t f t sectors read as valid from file at offset
233 * f t t sectors preallocated, read as zero, returned file not
234 * necessarily zero at offset
235 * f f t sectors preallocated but read from backing_hd,
236 * returned file contains garbage at offset
237 * t t f sectors preallocated, read as zero, unknown offset
238 * t f f sectors read from unknown file or offset
239 * f t f not allocated or unknown offset, read as zero
240 * f f f not allocated or unknown offset, read from backing_hd
241 */
242#define BDRV_BLOCK_DATA 0x01
243#define BDRV_BLOCK_ZERO 0x02
244#define BDRV_BLOCK_OFFSET_VALID 0x04
245#define BDRV_BLOCK_RAW 0x08
246#define BDRV_BLOCK_ALLOCATED 0x10
247#define BDRV_BLOCK_EOF 0x20
248#define BDRV_BLOCK_RECURSE 0x40
249
250typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
251
252typedef struct BDRVReopenState {
253 BlockDriverState *bs;
254 int flags;
255 BlockdevDetectZeroesOptions detect_zeroes;
256 bool backing_missing;
257 BlockDriverState *old_backing_bs; /* keep pointer for permissions update */
258 BlockDriverState *old_file_bs; /* keep pointer for permissions update */
259 QDict *options;
260 QDict *explicit_options;
261 void *opaque;
262} BDRVReopenState;
263
264/*
265 * Block operation types
266 */
267typedef enum BlockOpType {
268 BLOCK_OP_TYPE_BACKUP_SOURCE,
269 BLOCK_OP_TYPE_BACKUP_TARGET,
270 BLOCK_OP_TYPE_CHANGE,
271 BLOCK_OP_TYPE_COMMIT_SOURCE,
272 BLOCK_OP_TYPE_COMMIT_TARGET,
273 BLOCK_OP_TYPE_DATAPLANE,
274 BLOCK_OP_TYPE_DRIVE_DEL,
275 BLOCK_OP_TYPE_EJECT,
276 BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
277 BLOCK_OP_TYPE_INTERNAL_SNAPSHOT,
278 BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE,
279 BLOCK_OP_TYPE_MIRROR_SOURCE,
280 BLOCK_OP_TYPE_MIRROR_TARGET,
281 BLOCK_OP_TYPE_RESIZE,
282 BLOCK_OP_TYPE_STREAM,
283 BLOCK_OP_TYPE_REPLACE,
284 BLOCK_OP_TYPE_MAX,
285} BlockOpType;
286
287/* Block node permission constants */
288enum {
289 /**
290 * A user that has the "permission" of consistent reads is guaranteed that
291 * their view of the contents of the block device is complete and
292 * self-consistent, representing the contents of a disk at a specific
293 * point.
294 *
295 * For most block devices (including their backing files) this is true, but
296 * the property cannot be maintained in a few situations like for
297 * intermediate nodes of a commit block job.
298 */
299 BLK_PERM_CONSISTENT_READ = 0x01,
300
301 /** This permission is required to change the visible disk contents. */
302 BLK_PERM_WRITE = 0x02,
303
304 /**
305 * This permission (which is weaker than BLK_PERM_WRITE) is both enough and
306 * required for writes to the block node when the caller promises that
307 * the visible disk content doesn't change.
308 *
309 * As the BLK_PERM_WRITE permission is strictly stronger, either is
310 * sufficient to perform an unchanging write.
311 */
312 BLK_PERM_WRITE_UNCHANGED = 0x04,
313
314 /** This permission is required to change the size of a block node. */
315 BLK_PERM_RESIZE = 0x08,
316
317 /**
318 * There was a now-removed bit BLK_PERM_GRAPH_MOD, with value of 0x10. QEMU
319 * 6.1 and earlier may still lock the corresponding byte in block/file-posix
320 * locking. So, implementing some new permission should be very careful to
321 * not interfere with this old unused thing.
322 */
323
324 BLK_PERM_ALL = 0x0f,
325
326 DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ
327 | BLK_PERM_WRITE
328 | BLK_PERM_WRITE_UNCHANGED
329 | BLK_PERM_RESIZE,
330
331 DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH,
332};
333
334/*
335 * Flags that parent nodes assign to child nodes to specify what kind of
336 * role(s) they take.
337 *
338 * At least one of DATA, METADATA, FILTERED, or COW must be set for
339 * every child.
71ca4385
VSO
340 *
341 *
342 * = Connection with bs->children, bs->file and bs->backing fields =
343 *
344 * 1. Filters
345 *
346 * Filter drivers have drv->is_filter = true.
347 *
348 * Filter node has exactly one FILTERED|PRIMARY child, and may have other
349 * children which must not have these bits (one example is the
350 * copy-before-write filter, which also has its target DATA child).
351 *
352 * Filter nodes never have COW children.
353 *
354 * For most filters, the filtered child is linked in bs->file, bs->backing is
355 * NULL. For some filters (as an exception), it is the other way around; those
356 * drivers will have drv->filtered_child_is_backing set to true (see that
357 * field’s documentation for what drivers this concerns)
358 *
359 * 2. "raw" driver (block/raw-format.c)
360 *
361 * Formally it's not a filter (drv->is_filter = false)
362 *
363 * bs->backing is always NULL
364 *
365 * Only has one child, linked in bs->file. Its role is either FILTERED|PRIMARY
366 * (like filter) or DATA|PRIMARY depending on options.
367 *
368 * 3. Other drivers
369 *
370 * Don't have any FILTERED children.
371 *
372 * May have at most one COW child. In this case it's linked in bs->backing.
373 * Otherwise bs->backing is NULL. COW child is never PRIMARY.
374 *
375 * May have at most one PRIMARY child. In this case it's linked in bs->file.
376 * Otherwise bs->file is NULL.
377 *
378 * May also have some other children that don't have the PRIMARY or COW bit set.
3b491a90
EGE
379 */
380enum BdrvChildRoleBits {
381 /*
382 * This child stores data.
383 * Any node may have an arbitrary number of such children.
384 */
385 BDRV_CHILD_DATA = (1 << 0),
386
387 /*
388 * This child stores metadata.
389 * Any node may have an arbitrary number of metadata-storing
390 * children.
391 */
392 BDRV_CHILD_METADATA = (1 << 1),
393
394 /*
395 * A child that always presents exactly the same visible data as
396 * the parent, e.g. by virtue of the parent forwarding all reads
397 * and writes.
398 * This flag is mutually exclusive with DATA, METADATA, and COW.
399 * Any node may have at most one filtered child at a time.
400 */
401 BDRV_CHILD_FILTERED = (1 << 2),
402
403 /*
404 * Child from which to read all data that isn't allocated in the
405 * parent (i.e., the backing child); such data is copied to the
406 * parent through COW (and optionally COR).
407 * This field is mutually exclusive with DATA, METADATA, and
408 * FILTERED.
409 * Any node may have at most one such backing child at a time.
410 */
411 BDRV_CHILD_COW = (1 << 3),
412
413 /*
414 * The primary child. For most drivers, this is the child whose
415 * filename applies best to the parent node.
416 * Any node may have at most one primary child at a time.
417 */
418 BDRV_CHILD_PRIMARY = (1 << 4),
419
420 /* Useful combination of flags */
421 BDRV_CHILD_IMAGE = BDRV_CHILD_DATA
422 | BDRV_CHILD_METADATA
423 | BDRV_CHILD_PRIMARY,
424};
425
426/* Mask of BdrvChildRoleBits values */
427typedef unsigned int BdrvChildRole;
428
429typedef struct BdrvCheckResult {
430 int corruptions;
431 int leaks;
432 int check_errors;
433 int corruptions_fixed;
434 int leaks_fixed;
435 int64_t image_end_offset;
436 BlockFragInfo bfi;
437} BdrvCheckResult;
438
439typedef enum {
440 BDRV_FIX_LEAKS = 1,
441 BDRV_FIX_ERRORS = 2,
442} BdrvCheckMode;
443
444typedef struct BlockSizes {
445 uint32_t phys;
446 uint32_t log;
447} BlockSizes;
448
449typedef struct HDGeometry {
450 uint32_t heads;
451 uint32_t sectors;
452 uint32_t cylinders;
453} HDGeometry;
454
455/*
456 * Common functions that are neither I/O nor Global State.
457 *
458 * These functions must never call any function from other categories
459 * (I/O, "I/O or GS", Global State) except this one, but can be invoked by
460 * all of them.
461 */
462
463char *bdrv_perm_names(uint64_t perm);
464uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm);
465
466void bdrv_init_with_whitelist(void);
467bool bdrv_uses_whitelist(void);
468int bdrv_is_whitelisted(BlockDriver *drv, bool read_only);
469
470int bdrv_parse_aio(const char *mode, int *flags);
471int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
472int bdrv_parse_discard_flags(const char *mode, int *flags);
473
474int path_has_protocol(const char *path);
475int path_is_absolute(const char *path);
476char *path_combine(const char *base_path, const char *filename);
477
478char *bdrv_get_full_backing_filename_from_filename(const char *backed,
479 const char *backing,
480 Error **errp);
481
482#endif /* BLOCK_COMMON_H */