]> git.proxmox.com Git - mirror_qemu.git/blob - include/block/block-common.h
Merge remote-tracking branch 'remotes/legoater/tags/pull-ppc-20220305' into staging
[mirror_qemu.git] / include / block / block-common.h
1 /*
2 * QEMU System Emulator block driver
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24 #ifndef BLOCK_COMMON_H
25 #define BLOCK_COMMON_H
26
27 #include "block/aio.h"
28 #include "block/aio-wait.h"
29 #include "qemu/iov.h"
30 #include "qemu/coroutine.h"
31 #include "block/accounting.h"
32 #include "block/dirty-bitmap.h"
33 #include "block/blockjob.h"
34 #include "qemu/hbitmap.h"
35 #include "qemu/transactions.h"
36
37 /*
38 * generated_co_wrapper
39 *
40 * Function specifier, which does nothing but mark functions to be
41 * generated by scripts/block-coroutine-wrapper.py
42 *
43 * Read more in docs/devel/block-coroutine-wrapper.rst
44 */
45 #define generated_co_wrapper
46
47 /* block.c */
48 typedef struct BlockDriver BlockDriver;
49 typedef struct BdrvChild BdrvChild;
50 typedef struct BdrvChildClass BdrvChildClass;
51
52 typedef struct BlockDriverInfo {
53 /* in bytes, 0 if irrelevant */
54 int cluster_size;
55 /* offset at which the VM state can be saved (0 if not possible) */
56 int64_t vm_state_offset;
57 bool is_dirty;
58 /*
59 * True if this block driver only supports compressed writes
60 */
61 bool needs_compressed_writes;
62 } BlockDriverInfo;
63
64 typedef struct BlockFragInfo {
65 uint64_t allocated_clusters;
66 uint64_t total_clusters;
67 uint64_t fragmented_clusters;
68 uint64_t compressed_clusters;
69 } BlockFragInfo;
70
71 typedef enum {
72 BDRV_REQ_COPY_ON_READ = 0x1,
73 BDRV_REQ_ZERO_WRITE = 0x2,
74
75 /*
76 * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate
77 * that the block driver should unmap (discard) blocks if it is guaranteed
78 * that the result will read back as zeroes. The flag is only passed to the
79 * driver if the block device is opened with BDRV_O_UNMAP.
80 */
81 BDRV_REQ_MAY_UNMAP = 0x4,
82
83 BDRV_REQ_FUA = 0x10,
84 BDRV_REQ_WRITE_COMPRESSED = 0x20,
85
86 /*
87 * Signifies that this write request will not change the visible disk
88 * content.
89 */
90 BDRV_REQ_WRITE_UNCHANGED = 0x40,
91
92 /*
93 * Forces request serialisation. Use only with write requests.
94 */
95 BDRV_REQ_SERIALISING = 0x80,
96
97 /*
98 * Execute the request only if the operation can be offloaded or otherwise
99 * be executed efficiently, but return an error instead of using a slow
100 * fallback.
101 */
102 BDRV_REQ_NO_FALLBACK = 0x100,
103
104 /*
105 * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
106 * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR
107 * filter is involved), in which case it signals that the COR operation
108 * need not read the data into memory (qiov) but only ensure they are
109 * copied to the top layer (i.e., that COR operation is done).
110 */
111 BDRV_REQ_PREFETCH = 0x200,
112
113 /*
114 * If we need to wait for other requests, just fail immediately. Used
115 * only together with BDRV_REQ_SERIALISING.
116 */
117 BDRV_REQ_NO_WAIT = 0x400,
118
119 /* Mask of valid flags */
120 BDRV_REQ_MASK = 0x7ff,
121 } BdrvRequestFlags;
122
123 #define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */
124 #define BDRV_O_RDWR 0x0002
125 #define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */
126 #define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save
127 writes in a snapshot */
128 #define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */
129 #define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
130 #define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the
131 thread pool */
132 #define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
133 #define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
134 #define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
135 #define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */
136 #define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
137 #define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
138 #define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */
139 #define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given:
140 select an appropriate protocol driver,
141 ignoring the format layer */
142 #define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */
143 #define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening
144 read-write fails */
145 #define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */
146
147 #define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
148
149
150 /* Option names of options parsed by the block layer */
151
152 #define BDRV_OPT_CACHE_WB "cache.writeback"
153 #define BDRV_OPT_CACHE_DIRECT "cache.direct"
154 #define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
155 #define BDRV_OPT_READ_ONLY "read-only"
156 #define BDRV_OPT_AUTO_READ_ONLY "auto-read-only"
157 #define BDRV_OPT_DISCARD "discard"
158 #define BDRV_OPT_FORCE_SHARE "force-share"
159
160
161 #define BDRV_SECTOR_BITS 9
162 #define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
163
164 #define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \
165 INT_MAX >> BDRV_SECTOR_BITS)
166 #define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS)
167
168 /*
169 * We want allow aligning requests and disk length up to any 32bit alignment
170 * and don't afraid of overflow.
171 * To achieve it, and in the same time use some pretty number as maximum disk
172 * size, let's define maximum "length" (a limit for any offset/bytes request and
173 * for disk size) to be the greatest power of 2 less than INT64_MAX.
174 */
175 #define BDRV_MAX_ALIGNMENT (1L << 30)
176 #define BDRV_MAX_LENGTH (QEMU_ALIGN_DOWN(INT64_MAX, BDRV_MAX_ALIGNMENT))
177
178 /*
179 * Allocation status flags for bdrv_block_status() and friends.
180 *
181 * Public flags:
182 * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer
183 * BDRV_BLOCK_ZERO: offset reads as zero
184 * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
185 * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
186 * layer rather than any backing, set by block layer
187 * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
188 * layer, set by block layer
189 *
190 * Internal flags:
191 * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
192 * that the block layer recompute the answer from the returned
193 * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
194 * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for
195 * zeroes in file child of current block node inside
196 * returned region. Only valid together with both
197 * BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not
198 * appear with BDRV_BLOCK_ZERO.
199 *
200 * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
201 * host offset within the returned BDS that is allocated for the
202 * corresponding raw guest data. However, whether that offset
203 * actually contains data also depends on BDRV_BLOCK_DATA, as follows:
204 *
205 * DATA ZERO OFFSET_VALID
206 * t t t sectors read as zero, returned file is zero at offset
207 * t f t sectors read as valid from file at offset
208 * f t t sectors preallocated, read as zero, returned file not
209 * necessarily zero at offset
210 * f f t sectors preallocated but read from backing_hd,
211 * returned file contains garbage at offset
212 * t t f sectors preallocated, read as zero, unknown offset
213 * t f f sectors read from unknown file or offset
214 * f t f not allocated or unknown offset, read as zero
215 * f f f not allocated or unknown offset, read from backing_hd
216 */
217 #define BDRV_BLOCK_DATA 0x01
218 #define BDRV_BLOCK_ZERO 0x02
219 #define BDRV_BLOCK_OFFSET_VALID 0x04
220 #define BDRV_BLOCK_RAW 0x08
221 #define BDRV_BLOCK_ALLOCATED 0x10
222 #define BDRV_BLOCK_EOF 0x20
223 #define BDRV_BLOCK_RECURSE 0x40
224
225 typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
226
227 typedef struct BDRVReopenState {
228 BlockDriverState *bs;
229 int flags;
230 BlockdevDetectZeroesOptions detect_zeroes;
231 bool backing_missing;
232 BlockDriverState *old_backing_bs; /* keep pointer for permissions update */
233 BlockDriverState *old_file_bs; /* keep pointer for permissions update */
234 QDict *options;
235 QDict *explicit_options;
236 void *opaque;
237 } BDRVReopenState;
238
239 /*
240 * Block operation types
241 */
242 typedef enum BlockOpType {
243 BLOCK_OP_TYPE_BACKUP_SOURCE,
244 BLOCK_OP_TYPE_BACKUP_TARGET,
245 BLOCK_OP_TYPE_CHANGE,
246 BLOCK_OP_TYPE_COMMIT_SOURCE,
247 BLOCK_OP_TYPE_COMMIT_TARGET,
248 BLOCK_OP_TYPE_DATAPLANE,
249 BLOCK_OP_TYPE_DRIVE_DEL,
250 BLOCK_OP_TYPE_EJECT,
251 BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
252 BLOCK_OP_TYPE_INTERNAL_SNAPSHOT,
253 BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE,
254 BLOCK_OP_TYPE_MIRROR_SOURCE,
255 BLOCK_OP_TYPE_MIRROR_TARGET,
256 BLOCK_OP_TYPE_RESIZE,
257 BLOCK_OP_TYPE_STREAM,
258 BLOCK_OP_TYPE_REPLACE,
259 BLOCK_OP_TYPE_MAX,
260 } BlockOpType;
261
262 /* Block node permission constants */
263 enum {
264 /**
265 * A user that has the "permission" of consistent reads is guaranteed that
266 * their view of the contents of the block device is complete and
267 * self-consistent, representing the contents of a disk at a specific
268 * point.
269 *
270 * For most block devices (including their backing files) this is true, but
271 * the property cannot be maintained in a few situations like for
272 * intermediate nodes of a commit block job.
273 */
274 BLK_PERM_CONSISTENT_READ = 0x01,
275
276 /** This permission is required to change the visible disk contents. */
277 BLK_PERM_WRITE = 0x02,
278
279 /**
280 * This permission (which is weaker than BLK_PERM_WRITE) is both enough and
281 * required for writes to the block node when the caller promises that
282 * the visible disk content doesn't change.
283 *
284 * As the BLK_PERM_WRITE permission is strictly stronger, either is
285 * sufficient to perform an unchanging write.
286 */
287 BLK_PERM_WRITE_UNCHANGED = 0x04,
288
289 /** This permission is required to change the size of a block node. */
290 BLK_PERM_RESIZE = 0x08,
291
292 /**
293 * There was a now-removed bit BLK_PERM_GRAPH_MOD, with value of 0x10. QEMU
294 * 6.1 and earlier may still lock the corresponding byte in block/file-posix
295 * locking. So, implementing some new permission should be very careful to
296 * not interfere with this old unused thing.
297 */
298
299 BLK_PERM_ALL = 0x0f,
300
301 DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ
302 | BLK_PERM_WRITE
303 | BLK_PERM_WRITE_UNCHANGED
304 | BLK_PERM_RESIZE,
305
306 DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH,
307 };
308
309 /*
310 * Flags that parent nodes assign to child nodes to specify what kind of
311 * role(s) they take.
312 *
313 * At least one of DATA, METADATA, FILTERED, or COW must be set for
314 * every child.
315 */
316 enum BdrvChildRoleBits {
317 /*
318 * This child stores data.
319 * Any node may have an arbitrary number of such children.
320 */
321 BDRV_CHILD_DATA = (1 << 0),
322
323 /*
324 * This child stores metadata.
325 * Any node may have an arbitrary number of metadata-storing
326 * children.
327 */
328 BDRV_CHILD_METADATA = (1 << 1),
329
330 /*
331 * A child that always presents exactly the same visible data as
332 * the parent, e.g. by virtue of the parent forwarding all reads
333 * and writes.
334 * This flag is mutually exclusive with DATA, METADATA, and COW.
335 * Any node may have at most one filtered child at a time.
336 */
337 BDRV_CHILD_FILTERED = (1 << 2),
338
339 /*
340 * Child from which to read all data that isn't allocated in the
341 * parent (i.e., the backing child); such data is copied to the
342 * parent through COW (and optionally COR).
343 * This field is mutually exclusive with DATA, METADATA, and
344 * FILTERED.
345 * Any node may have at most one such backing child at a time.
346 */
347 BDRV_CHILD_COW = (1 << 3),
348
349 /*
350 * The primary child. For most drivers, this is the child whose
351 * filename applies best to the parent node.
352 * Any node may have at most one primary child at a time.
353 */
354 BDRV_CHILD_PRIMARY = (1 << 4),
355
356 /* Useful combination of flags */
357 BDRV_CHILD_IMAGE = BDRV_CHILD_DATA
358 | BDRV_CHILD_METADATA
359 | BDRV_CHILD_PRIMARY,
360 };
361
362 /* Mask of BdrvChildRoleBits values */
363 typedef unsigned int BdrvChildRole;
364
365 typedef struct BdrvCheckResult {
366 int corruptions;
367 int leaks;
368 int check_errors;
369 int corruptions_fixed;
370 int leaks_fixed;
371 int64_t image_end_offset;
372 BlockFragInfo bfi;
373 } BdrvCheckResult;
374
375 typedef enum {
376 BDRV_FIX_LEAKS = 1,
377 BDRV_FIX_ERRORS = 2,
378 } BdrvCheckMode;
379
380 typedef struct BlockSizes {
381 uint32_t phys;
382 uint32_t log;
383 } BlockSizes;
384
385 typedef struct HDGeometry {
386 uint32_t heads;
387 uint32_t sectors;
388 uint32_t cylinders;
389 } HDGeometry;
390
391 /*
392 * Common functions that are neither I/O nor Global State.
393 *
394 * These functions must never call any function from other categories
395 * (I/O, "I/O or GS", Global State) except this one, but can be invoked by
396 * all of them.
397 */
398
399 char *bdrv_perm_names(uint64_t perm);
400 uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm);
401
402 void bdrv_init_with_whitelist(void);
403 bool bdrv_uses_whitelist(void);
404 int bdrv_is_whitelisted(BlockDriver *drv, bool read_only);
405
406 int bdrv_parse_aio(const char *mode, int *flags);
407 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
408 int bdrv_parse_discard_flags(const char *mode, int *flags);
409
410 int path_has_protocol(const char *path);
411 int path_is_absolute(const char *path);
412 char *path_combine(const char *base_path, const char *filename);
413
414 char *bdrv_get_full_backing_filename_from_filename(const char *backed,
415 const char *backing,
416 Error **errp);
417
418 #endif /* BLOCK_COMMON_H */