]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
1d3ba0bf | 9 | * or https://opensource.org/licenses/CDDL-1.0. |
34dc7c2f BB |
10 | * See the License for the specific language governing permissions |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | ||
22 | /* | |
428870ff | 23 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
a38718a6 | 24 | * Copyright 2011 Nexenta Systems, Inc. All rights reserved. |
4f072827 | 25 | * Copyright (c) 2012, 2020 by Delphix. All rights reserved. |
9759c60f | 26 | * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. |
e550644f BB |
27 | * Copyright (c) 2013, Joyent, Inc. All rights reserved. |
28 | * Copyright 2016 Toomas Soome <tsoome@me.com> | |
10b3c7f5 | 29 | * Copyright (c) 2019, Allan Jude |
cac416f1 | 30 | * Copyright (c) 2019, 2023, 2024, Klara Inc. |
10b3c7f5 | 31 | * Copyright (c) 2019-2020, Michael Niewöhner |
a38718a6 | 32 | */ |
34dc7c2f BB |
33 | |
34 | #ifndef _ZIO_H | |
35 | #define _ZIO_H | |
36 | ||
fcff0f35 | 37 | #include <sys/zio_priority.h> |
34dc7c2f BB |
38 | #include <sys/zfs_context.h> |
39 | #include <sys/spa.h> | |
40 | #include <sys/txg.h> | |
41 | #include <sys/avl.h> | |
34dc7c2f BB |
42 | #include <sys/fs/zfs.h> |
43 | #include <sys/zio_impl.h> | |
44 | ||
45 | #ifdef __cplusplus | |
46 | extern "C" { | |
47 | #endif | |
48 | ||
428870ff BB |
49 | /* |
50 | * Embedded checksum | |
51 | */ | |
52 | #define ZEC_MAGIC 0x210da7ab10c7a11ULL | |
34dc7c2f | 53 | |
428870ff BB |
54 | typedef struct zio_eck { |
55 | uint64_t zec_magic; /* for validation, endianness */ | |
56 | zio_cksum_t zec_cksum; /* 256-bit checksum */ | |
57 | } zio_eck_t; | |
34dc7c2f BB |
58 | |
59 | /* | |
60 | * Gang block headers are self-checksumming and contain an array | |
61 | * of block pointers. | |
62 | */ | |
63 | #define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE | |
64 | #define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \ | |
428870ff | 65 | sizeof (zio_eck_t)) / sizeof (blkptr_t)) |
34dc7c2f | 66 | #define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \ |
428870ff | 67 | sizeof (zio_eck_t) - \ |
34dc7c2f BB |
68 | (SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\ |
69 | sizeof (uint64_t)) | |
70 | ||
34dc7c2f BB |
71 | typedef struct zio_gbh { |
72 | blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS]; | |
73 | uint64_t zg_filler[SPA_GBH_FILLER]; | |
428870ff | 74 | zio_eck_t zg_tail; |
34dc7c2f BB |
75 | } zio_gbh_phys_t; |
76 | ||
77 | enum zio_checksum { | |
78 | ZIO_CHECKSUM_INHERIT = 0, | |
79 | ZIO_CHECKSUM_ON, | |
80 | ZIO_CHECKSUM_OFF, | |
81 | ZIO_CHECKSUM_LABEL, | |
82 | ZIO_CHECKSUM_GANG_HEADER, | |
83 | ZIO_CHECKSUM_ZILOG, | |
84 | ZIO_CHECKSUM_FLETCHER_2, | |
85 | ZIO_CHECKSUM_FLETCHER_4, | |
86 | ZIO_CHECKSUM_SHA256, | |
428870ff | 87 | ZIO_CHECKSUM_ZILOG2, |
3c67d83a TH |
88 | ZIO_CHECKSUM_NOPARITY, |
89 | ZIO_CHECKSUM_SHA512, | |
90 | ZIO_CHECKSUM_SKEIN, | |
91 | ZIO_CHECKSUM_EDONR, | |
985c33b1 | 92 | ZIO_CHECKSUM_BLAKE3, |
34dc7c2f BB |
93 | ZIO_CHECKSUM_FUNCTIONS |
94 | }; | |
95 | ||
9b67f605 MA |
96 | /* |
97 | * The number of "legacy" compression functions which can be set on individual | |
98 | * objects. | |
99 | */ | |
100 | #define ZIO_CHECKSUM_LEGACY_FUNCTIONS ZIO_CHECKSUM_ZILOG2 | |
101 | ||
9babb374 | 102 | #define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_4 |
34dc7c2f BB |
103 | #define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON |
104 | ||
428870ff | 105 | #define ZIO_CHECKSUM_MASK 0xffULL |
d7852458 | 106 | #define ZIO_CHECKSUM_VERIFY (1U << 8) |
428870ff BB |
107 | |
108 | #define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256 | |
428870ff | 109 | |
b5256303 TC |
110 | /* macros defining encryption lengths */ |
111 | #define ZIO_OBJSET_MAC_LEN 32 | |
112 | #define ZIO_DATA_IV_LEN 12 | |
113 | #define ZIO_DATA_SALT_LEN 8 | |
114 | #define ZIO_DATA_MAC_LEN 16 | |
115 | ||
9b67f605 MA |
116 | /* |
117 | * The number of "legacy" compression functions which can be set on individual | |
118 | * objects. | |
119 | */ | |
120 | #define ZIO_COMPRESS_LEGACY_FUNCTIONS ZIO_COMPRESS_LZ4 | |
121 | ||
99197f03 JG |
122 | /* |
123 | * The meaning of "compress = on" selected by the compression features enabled | |
124 | * on a given pool. | |
125 | */ | |
126 | #define ZIO_COMPRESS_LEGACY_ON_VALUE ZIO_COMPRESS_LZJB | |
127 | #define ZIO_COMPRESS_LZ4_ON_VALUE ZIO_COMPRESS_LZ4 | |
128 | ||
56fa4aa9 | 129 | #define ZIO_COMPRESS_DEFAULT ZIO_COMPRESS_ON |
34dc7c2f | 130 | |
428870ff BB |
131 | #define BOOTFS_COMPRESS_VALID(compress) \ |
132 | ((compress) == ZIO_COMPRESS_LZJB || \ | |
9759c60f | 133 | (compress) == ZIO_COMPRESS_LZ4 || \ |
8aab1218 TS |
134 | (compress) == ZIO_COMPRESS_GZIP_1 || \ |
135 | (compress) == ZIO_COMPRESS_GZIP_2 || \ | |
136 | (compress) == ZIO_COMPRESS_GZIP_3 || \ | |
137 | (compress) == ZIO_COMPRESS_GZIP_4 || \ | |
138 | (compress) == ZIO_COMPRESS_GZIP_5 || \ | |
139 | (compress) == ZIO_COMPRESS_GZIP_6 || \ | |
140 | (compress) == ZIO_COMPRESS_GZIP_7 || \ | |
141 | (compress) == ZIO_COMPRESS_GZIP_8 || \ | |
142 | (compress) == ZIO_COMPRESS_GZIP_9 || \ | |
143 | (compress) == ZIO_COMPRESS_ZLE || \ | |
10b3c7f5 | 144 | (compress) == ZIO_COMPRESS_ZSTD || \ |
99197f03 | 145 | (compress) == ZIO_COMPRESS_ON || \ |
428870ff BB |
146 | (compress) == ZIO_COMPRESS_OFF) |
147 | ||
10b3c7f5 MN |
148 | |
149 | #define ZIO_COMPRESS_ALGO(x) (x & SPA_COMPRESSMASK) | |
150 | #define ZIO_COMPRESS_LEVEL(x) ((x & ~SPA_COMPRESSMASK) >> SPA_COMPRESSBITS) | |
151 | #define ZIO_COMPRESS_RAW(type, level) (type | ((level) << SPA_COMPRESSBITS)) | |
152 | ||
153 | #define ZIO_COMPLEVEL_ZSTD(level) \ | |
154 | ZIO_COMPRESS_RAW(ZIO_COMPRESS_ZSTD, level) | |
155 | ||
34dc7c2f BB |
156 | #define ZIO_FAILURE_MODE_WAIT 0 |
157 | #define ZIO_FAILURE_MODE_CONTINUE 1 | |
158 | #define ZIO_FAILURE_MODE_PANIC 2 | |
159 | ||
cec3a0a1 OF |
160 | typedef enum zio_suspend_reason { |
161 | ZIO_SUSPEND_NONE = 0, | |
162 | ZIO_SUSPEND_IOERR, | |
163 | ZIO_SUSPEND_MMP, | |
164 | } zio_suspend_reason_t; | |
165 | ||
4938d01d RY |
166 | /* |
167 | * This was originally an enum type. However, those are 32-bit and there is no | |
168 | * way to make a 64-bit enum type. Since we ran out of bits for flags, we were | |
169 | * forced to upgrade it to a uint64_t. | |
170 | */ | |
171 | typedef uint64_t zio_flag_t; | |
428870ff BB |
172 | /* |
173 | * Flags inherited by gang, ddt, and vdev children, | |
174 | * and that must be equal for two zios to aggregate | |
175 | */ | |
4938d01d RY |
176 | #define ZIO_FLAG_DONT_AGGREGATE (1ULL << 0) |
177 | #define ZIO_FLAG_IO_REPAIR (1ULL << 1) | |
178 | #define ZIO_FLAG_SELF_HEAL (1ULL << 2) | |
179 | #define ZIO_FLAG_RESILVER (1ULL << 3) | |
180 | #define ZIO_FLAG_SCRUB (1ULL << 4) | |
181 | #define ZIO_FLAG_SCAN_THREAD (1ULL << 5) | |
182 | #define ZIO_FLAG_PHYSICAL (1ULL << 6) | |
428870ff BB |
183 | |
184 | #define ZIO_FLAG_AGG_INHERIT (ZIO_FLAG_CANFAIL - 1) | |
185 | ||
186 | /* | |
187 | * Flags inherited by ddt, gang, and vdev children. | |
188 | */ | |
4938d01d RY |
189 | #define ZIO_FLAG_CANFAIL (1ULL << 7) /* must be first for INHERIT */ |
190 | #define ZIO_FLAG_SPECULATIVE (1ULL << 8) | |
191 | #define ZIO_FLAG_CONFIG_WRITER (1ULL << 9) | |
192 | #define ZIO_FLAG_DONT_RETRY (1ULL << 10) | |
4938d01d RY |
193 | #define ZIO_FLAG_NODATA (1ULL << 12) |
194 | #define ZIO_FLAG_INDUCE_DAMAGE (1ULL << 13) | |
195 | #define ZIO_FLAG_IO_ALLOCATING (1ULL << 14) | |
428870ff BB |
196 | |
197 | #define ZIO_FLAG_DDT_INHERIT (ZIO_FLAG_IO_RETRY - 1) | |
198 | #define ZIO_FLAG_GANG_INHERIT (ZIO_FLAG_IO_RETRY - 1) | |
199 | ||
200 | /* | |
201 | * Flags inherited by vdev children. | |
202 | */ | |
4938d01d RY |
203 | #define ZIO_FLAG_IO_RETRY (1ULL << 15) /* must be first for INHERIT */ |
204 | #define ZIO_FLAG_PROBE (1ULL << 16) | |
205 | #define ZIO_FLAG_TRYHARD (1ULL << 17) | |
206 | #define ZIO_FLAG_OPTIONAL (1ULL << 18) | |
428870ff BB |
207 | |
208 | #define ZIO_FLAG_VDEV_INHERIT (ZIO_FLAG_DONT_QUEUE - 1) | |
209 | ||
210 | /* | |
211 | * Flags not inherited by any children. | |
212 | */ | |
4938d01d RY |
213 | #define ZIO_FLAG_DONT_QUEUE (1ULL << 19) /* must be first for INHERIT */ |
214 | #define ZIO_FLAG_DONT_PROPAGATE (1ULL << 20) | |
215 | #define ZIO_FLAG_IO_BYPASS (1ULL << 21) | |
216 | #define ZIO_FLAG_IO_REWRITE (1ULL << 22) | |
217 | #define ZIO_FLAG_RAW_COMPRESS (1ULL << 23) | |
218 | #define ZIO_FLAG_RAW_ENCRYPT (1ULL << 24) | |
219 | #define ZIO_FLAG_GANG_CHILD (1ULL << 25) | |
220 | #define ZIO_FLAG_DDT_CHILD (1ULL << 26) | |
221 | #define ZIO_FLAG_GODFATHER (1ULL << 27) | |
222 | #define ZIO_FLAG_NOPWRITE (1ULL << 28) | |
223 | #define ZIO_FLAG_REEXECUTED (1ULL << 29) | |
224 | #define ZIO_FLAG_DELEGATED (1ULL << 30) | |
428870ff | 225 | |
3bd4df38 EN |
226 | #define ZIO_ALLOCATOR_NONE (-1) |
227 | #define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE) | |
228 | ||
428870ff | 229 | #define ZIO_FLAG_MUSTSUCCEED 0 |
b5256303 | 230 | #define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT) |
428870ff BB |
231 | |
232 | #define ZIO_DDT_CHILD_FLAGS(zio) \ | |
233 | (((zio)->io_flags & ZIO_FLAG_DDT_INHERIT) | \ | |
234 | ZIO_FLAG_DDT_CHILD | ZIO_FLAG_CANFAIL) | |
235 | ||
b128c09f BB |
236 | #define ZIO_GANG_CHILD_FLAGS(zio) \ |
237 | (((zio)->io_flags & ZIO_FLAG_GANG_INHERIT) | \ | |
238 | ZIO_FLAG_GANG_CHILD | ZIO_FLAG_CANFAIL) | |
239 | ||
428870ff BB |
240 | #define ZIO_VDEV_CHILD_FLAGS(zio) \ |
241 | (((zio)->io_flags & ZIO_FLAG_VDEV_INHERIT) | \ | |
a1d477c2 | 242 | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_CANFAIL) |
428870ff | 243 | |
d7852458 TS |
244 | #define ZIO_CHILD_BIT(x) (1U << (x)) |
245 | #define ZIO_CHILD_BIT_IS_SET(val, x) ((val) & (1U << (x))) | |
ddc751d5 | 246 | |
b128c09f BB |
247 | enum zio_child { |
248 | ZIO_CHILD_VDEV = 0, | |
249 | ZIO_CHILD_GANG, | |
428870ff | 250 | ZIO_CHILD_DDT, |
b128c09f BB |
251 | ZIO_CHILD_LOGICAL, |
252 | ZIO_CHILD_TYPES | |
253 | }; | |
254 | ||
ddc751d5 GW |
255 | #define ZIO_CHILD_VDEV_BIT ZIO_CHILD_BIT(ZIO_CHILD_VDEV) |
256 | #define ZIO_CHILD_GANG_BIT ZIO_CHILD_BIT(ZIO_CHILD_GANG) | |
257 | #define ZIO_CHILD_DDT_BIT ZIO_CHILD_BIT(ZIO_CHILD_DDT) | |
258 | #define ZIO_CHILD_LOGICAL_BIT ZIO_CHILD_BIT(ZIO_CHILD_LOGICAL) | |
259 | #define ZIO_CHILD_ALL_BITS \ | |
62840030 | 260 | (ZIO_CHILD_VDEV_BIT | ZIO_CHILD_GANG_BIT | \ |
ddc751d5 GW |
261 | ZIO_CHILD_DDT_BIT | ZIO_CHILD_LOGICAL_BIT) |
262 | ||
b128c09f BB |
263 | enum zio_wait_type { |
264 | ZIO_WAIT_READY = 0, | |
265 | ZIO_WAIT_DONE, | |
266 | ZIO_WAIT_TYPES | |
267 | }; | |
268 | ||
34dc7c2f BB |
269 | typedef void zio_done_func_t(zio_t *zio); |
270 | ||
3c502d3b | 271 | extern int zio_exclude_metadata; |
3dfb57a3 | 272 | extern int zio_dva_throttle_enabled; |
18168da7 | 273 | extern const char *const zio_type_name[ZIO_TYPES]; |
34dc7c2f BB |
274 | |
275 | /* | |
276 | * A bookmark is a four-tuple <objset, object, level, blkid> that uniquely | |
277 | * identifies any block in the pool. By convention, the meta-objset (MOS) | |
428870ff BB |
278 | * is objset 0, and the meta-dnode is object 0. This covers all blocks |
279 | * except root blocks and ZIL blocks, which are defined as follows: | |
34dc7c2f | 280 | * |
428870ff BB |
281 | * Root blocks (objset_phys_t) are object 0, level -1: <objset, 0, -1, 0>. |
282 | * ZIL blocks are bookmarked <objset, 0, -2, blkid == ZIL sequence number>. | |
283 | * dmu_sync()ed ZIL data blocks are bookmarked <objset, object, -2, blkid>. | |
fcff0f35 | 284 | * dnode visit bookmarks are <objset, object id of dnode, -3, 0>. |
34dc7c2f | 285 | * |
428870ff BB |
286 | * Note: this structure is called a bookmark because its original purpose |
287 | * was to remember where to resume a pool-wide traverse. | |
34dc7c2f | 288 | * |
5dbd68a3 MA |
289 | * Note: this structure is passed between userland and the kernel, and is |
290 | * stored on disk (by virtue of being incorporated into other on-disk | |
291 | * structures, e.g. dsl_scan_phys_t). | |
0409d332 GA |
292 | * |
293 | * If the head_errlog feature is enabled a different on-disk format for error | |
294 | * logs is used. This introduces the use of an error bookmark, a four-tuple | |
295 | * <object, level, blkid, birth> that uniquely identifies any error block | |
296 | * in the pool. The birth transaction group is used to track whether the block | |
297 | * has been overwritten by newer data or added to a snapshot since its marking | |
298 | * as an error. | |
34dc7c2f | 299 | */ |
5dbd68a3 | 300 | struct zbookmark_phys { |
34dc7c2f BB |
301 | uint64_t zb_objset; |
302 | uint64_t zb_object; | |
303 | int64_t zb_level; | |
304 | uint64_t zb_blkid; | |
1421c891 | 305 | }; |
34dc7c2f | 306 | |
431083f7 | 307 | struct zbookmark_err_phys { |
0409d332 GA |
308 | uint64_t zb_object; |
309 | int64_t zb_level; | |
310 | uint64_t zb_blkid; | |
311 | uint64_t zb_birth; | |
431083f7 | 312 | }; |
0409d332 | 313 | |
428870ff BB |
314 | #define SET_BOOKMARK(zb, objset, object, level, blkid) \ |
315 | { \ | |
316 | (zb)->zb_objset = objset; \ | |
317 | (zb)->zb_object = object; \ | |
318 | (zb)->zb_level = level; \ | |
319 | (zb)->zb_blkid = blkid; \ | |
320 | } | |
321 | ||
322 | #define ZB_DESTROYED_OBJSET (-1ULL) | |
323 | ||
324 | #define ZB_ROOT_OBJECT (0ULL) | |
325 | #define ZB_ROOT_LEVEL (-1LL) | |
326 | #define ZB_ROOT_BLKID (0ULL) | |
327 | ||
328 | #define ZB_ZIL_OBJECT (0ULL) | |
329 | #define ZB_ZIL_LEVEL (-2LL) | |
330 | ||
fcff0f35 PD |
331 | #define ZB_DNODE_LEVEL (-3LL) |
332 | #define ZB_DNODE_BLKID (0ULL) | |
333 | ||
9ae529ec CS |
334 | #define ZB_IS_ZERO(zb) \ |
335 | ((zb)->zb_objset == 0 && (zb)->zb_object == 0 && \ | |
336 | (zb)->zb_level == 0 && (zb)->zb_blkid == 0) | |
337 | #define ZB_IS_ROOT(zb) \ | |
338 | ((zb)->zb_object == ZB_ROOT_OBJECT && \ | |
339 | (zb)->zb_level == ZB_ROOT_LEVEL && \ | |
340 | (zb)->zb_blkid == ZB_ROOT_BLKID) | |
341 | ||
b128c09f BB |
342 | typedef struct zio_prop { |
343 | enum zio_checksum zp_checksum; | |
344 | enum zio_compress zp_compress; | |
10b3c7f5 | 345 | uint8_t zp_complevel; |
b128c09f | 346 | uint8_t zp_level; |
428870ff | 347 | uint8_t zp_copies; |
b4a08730 | 348 | dmu_object_type_t zp_type; |
03c6040b GW |
349 | boolean_t zp_dedup; |
350 | boolean_t zp_dedup_verify; | |
351 | boolean_t zp_nopwrite; | |
67a1b037 | 352 | boolean_t zp_brtwrite; |
b5256303 TC |
353 | boolean_t zp_encrypt; |
354 | boolean_t zp_byteorder; | |
355 | uint8_t zp_salt[ZIO_DATA_SALT_LEN]; | |
356 | uint8_t zp_iv[ZIO_DATA_IV_LEN]; | |
357 | uint8_t zp_mac[ZIO_DATA_MAC_LEN]; | |
cc99f275 | 358 | uint32_t zp_zpl_smallblk; |
b128c09f BB |
359 | } zio_prop_t; |
360 | ||
428870ff BB |
361 | typedef struct zio_cksum_report zio_cksum_report_t; |
362 | ||
363 | typedef void zio_cksum_finish_f(zio_cksum_report_t *rep, | |
84c07ada | 364 | const abd_t *good_data); |
428870ff BB |
365 | typedef void zio_cksum_free_f(void *cbdata, size_t size); |
366 | ||
367 | struct zio_bad_cksum; /* defined in zio_checksum.h */ | |
9ae529ec | 368 | struct dnode_phys; |
a6255b7f | 369 | struct abd; |
428870ff BB |
370 | |
371 | struct zio_cksum_report { | |
372 | struct zio_cksum_report *zcr_next; | |
373 | nvlist_t *zcr_ereport; | |
374 | nvlist_t *zcr_detector; | |
375 | void *zcr_cbdata; | |
376 | size_t zcr_cbinfo; /* passed to zcr_free() */ | |
b2255edc | 377 | uint64_t zcr_sector; |
428870ff BB |
378 | uint64_t zcr_align; |
379 | uint64_t zcr_length; | |
380 | zio_cksum_finish_f *zcr_finish; | |
381 | zio_cksum_free_f *zcr_free; | |
382 | ||
383 | /* internal use only */ | |
384 | struct zio_bad_cksum *zcr_ckinfo; /* information from failure */ | |
385 | }; | |
386 | ||
428870ff BB |
387 | typedef struct zio_vsd_ops { |
388 | zio_done_func_t *vsd_free; | |
428870ff BB |
389 | } zio_vsd_ops_t; |
390 | ||
b128c09f BB |
391 | typedef struct zio_gang_node { |
392 | zio_gbh_phys_t *gn_gbh; | |
393 | struct zio_gang_node *gn_child[SPA_GBH_NBLKPTRS]; | |
394 | } zio_gang_node_t; | |
395 | ||
396 | typedef zio_t *zio_gang_issue_func_t(zio_t *zio, blkptr_t *bp, | |
a6255b7f | 397 | zio_gang_node_t *gn, struct abd *data, uint64_t offset); |
b128c09f | 398 | |
a6255b7f | 399 | typedef void zio_transform_func_t(zio_t *zio, struct abd *data, uint64_t size); |
b128c09f BB |
400 | |
401 | typedef struct zio_transform { | |
a6255b7f | 402 | struct abd *zt_orig_abd; |
b128c09f BB |
403 | uint64_t zt_orig_size; |
404 | uint64_t zt_bufsize; | |
405 | zio_transform_func_t *zt_transform; | |
406 | struct zio_transform *zt_next; | |
407 | } zio_transform_t; | |
408 | ||
62840030 | 409 | typedef zio_t *zio_pipe_stage_t(zio_t *zio); |
b128c09f BB |
410 | |
411 | /* | |
412 | * The io_reexecute flags are distinct from io_flags because the child must | |
413 | * be able to propagate them to the parent. The normal io_flags are local | |
414 | * to the zio, not protected by any lock, and not modifiable by children; | |
415 | * the reexecute flags are protected by io_lock, modifiable by children, | |
416 | * and always propagated -- even when ZIO_FLAG_DONT_PROPAGATE is set. | |
417 | */ | |
418 | #define ZIO_REEXECUTE_NOW 0x01 | |
419 | #define ZIO_REEXECUTE_SUSPEND 0x02 | |
420 | ||
1b939560 BB |
421 | /* |
422 | * The io_trim flags are used to specify the type of TRIM to perform. They | |
423 | * only apply to ZIO_TYPE_TRIM zios are distinct from io_flags. | |
424 | */ | |
425 | enum trim_flag { | |
d7852458 | 426 | ZIO_TRIM_SECURE = 1U << 0, |
1b939560 BB |
427 | }; |
428 | ||
4e21fd06 DB |
429 | typedef struct zio_alloc_list { |
430 | list_t zal_list; | |
431 | uint64_t zal_size; | |
432 | } zio_alloc_list_t; | |
433 | ||
d164b209 BB |
434 | typedef struct zio_link { |
435 | zio_t *zl_parent; | |
436 | zio_t *zl_child; | |
437 | list_node_t zl_parent_node; | |
438 | list_node_t zl_child_node; | |
439 | } zio_link_t; | |
440 | ||
8469b5aa AM |
441 | enum zio_qstate { |
442 | ZIO_QS_NONE = 0, | |
443 | ZIO_QS_QUEUED, | |
444 | ZIO_QS_ACTIVE, | |
445 | }; | |
446 | ||
34dc7c2f BB |
447 | struct zio { |
448 | /* Core information about this I/O */ | |
5dbd68a3 | 449 | zbookmark_phys_t io_bookmark; |
b128c09f BB |
450 | zio_prop_t io_prop; |
451 | zio_type_t io_type; | |
452 | enum zio_child io_child_type; | |
1b939560 | 453 | enum trim_flag io_trim_flags; |
e8b96c60 | 454 | zio_priority_t io_priority; |
b128c09f | 455 | uint8_t io_reexecute; |
d164b209 | 456 | uint8_t io_state[ZIO_WAIT_TYPES]; |
34dc7c2f | 457 | uint64_t io_txg; |
b128c09f | 458 | spa_t *io_spa; |
34dc7c2f | 459 | blkptr_t *io_bp; |
428870ff | 460 | blkptr_t *io_bp_override; |
34dc7c2f | 461 | blkptr_t io_bp_copy; |
d164b209 BB |
462 | list_t io_parent_list; |
463 | list_t io_child_list; | |
34dc7c2f | 464 | zio_t *io_logical; |
b128c09f | 465 | zio_transform_t *io_transform_stack; |
34dc7c2f BB |
466 | |
467 | /* Callback info */ | |
bc77ba73 PD |
468 | zio_done_func_t *io_ready; |
469 | zio_done_func_t *io_children_ready; | |
34dc7c2f BB |
470 | zio_done_func_t *io_done; |
471 | void *io_private; | |
428870ff | 472 | int64_t io_prev_space_delta; /* DMU private */ |
34dc7c2f | 473 | blkptr_t io_bp_orig; |
2aa34383 DK |
474 | /* io_lsize != io_orig_size iff this is a raw write */ |
475 | uint64_t io_lsize; | |
34dc7c2f BB |
476 | |
477 | /* Data represented by this I/O */ | |
a6255b7f DQ |
478 | struct abd *io_abd; |
479 | struct abd *io_orig_abd; | |
34dc7c2f | 480 | uint64_t io_size; |
428870ff | 481 | uint64_t io_orig_size; |
34dc7c2f BB |
482 | |
483 | /* Stuff for the vdev stack */ | |
484 | vdev_t *io_vd; | |
485 | void *io_vsd; | |
428870ff | 486 | const zio_vsd_ops_t *io_vsd_ops; |
cc99f275 | 487 | metaslab_class_t *io_metaslab_class; /* dva throttle class */ |
428870ff | 488 | |
8469b5aa AM |
489 | enum zio_qstate io_queue_state; /* vdev queue state */ |
490 | union { | |
491 | list_node_t l; | |
492 | avl_node_t a; | |
493 | } io_queue_node ____cacheline_aligned; /* allocator and vdev queues */ | |
494 | avl_node_t io_offset_node; /* vdev offset queues */ | |
34dc7c2f | 495 | uint64_t io_offset; |
cb682a17 | 496 | hrtime_t io_timestamp; /* submitted at */ |
3dfb57a3 | 497 | hrtime_t io_queued_timestamp; |
26ef0cc7 | 498 | hrtime_t io_target_timestamp; |
cb682a17 | 499 | hrtime_t io_delta; /* vdev queue service delta */ |
193a37cb TH |
500 | hrtime_t io_delay; /* Device access time (disk or */ |
501 | /* file). */ | |
4e21fd06 | 502 | zio_alloc_list_t io_alloc_list; |
34dc7c2f BB |
503 | |
504 | /* Internal pipeline state */ | |
4938d01d | 505 | zio_flag_t io_flags; |
428870ff BB |
506 | enum zio_stage io_stage; |
507 | enum zio_stage io_pipeline; | |
4938d01d | 508 | zio_flag_t io_orig_flags; |
428870ff BB |
509 | enum zio_stage io_orig_stage; |
510 | enum zio_stage io_orig_pipeline; | |
3dfb57a3 | 511 | enum zio_stage io_pipeline_trace; |
b128c09f BB |
512 | int io_error; |
513 | int io_child_error[ZIO_CHILD_TYPES]; | |
514 | uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES]; | |
515 | uint64_t *io_stall; | |
9babb374 | 516 | zio_t *io_gang_leader; |
b128c09f BB |
517 | zio_gang_node_t *io_gang_tree; |
518 | void *io_executor; | |
34dc7c2f | 519 | void *io_waiter; |
d6f67df6 | 520 | void *io_bio; |
34dc7c2f BB |
521 | kmutex_t io_lock; |
522 | kcondvar_t io_cv; | |
492f64e9 | 523 | int io_allocator; |
34dc7c2f BB |
524 | |
525 | /* FMA state */ | |
428870ff | 526 | zio_cksum_report_t *io_cksum_report; |
34dc7c2f | 527 | uint64_t io_ena; |
a38718a6 GA |
528 | |
529 | /* Taskq dispatching state */ | |
530 | taskq_ent_t io_tqent; | |
3bd4df38 EN |
531 | |
532 | /* write issue taskq selection, based upon sync thread */ | |
533 | taskq_t *io_wr_iss_tq; | |
34dc7c2f BB |
534 | }; |
535 | ||
bc67cba7 PZ |
536 | enum blk_verify_flag { |
537 | BLK_VERIFY_ONLY, | |
538 | BLK_VERIFY_LOG, | |
539 | BLK_VERIFY_HALT | |
540 | }; | |
541 | ||
3095ca91 MA |
542 | enum blk_config_flag { |
543 | BLK_CONFIG_HELD, // SCL_VDEV held for writer | |
544 | BLK_CONFIG_NEEDED, // SCL_VDEV should be obtained for reader | |
545 | BLK_CONFIG_SKIP, // skip checks which require SCL_VDEV | |
546 | }; | |
547 | ||
64fc7762 | 548 | extern int zio_bookmark_compare(const void *, const void *); |
3dfb57a3 | 549 | |
d164b209 | 550 | extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, |
4938d01d | 551 | zio_done_func_t *done, void *priv, zio_flag_t flags); |
34dc7c2f BB |
552 | |
553 | extern zio_t *zio_root(spa_t *spa, | |
4938d01d | 554 | zio_done_func_t *done, void *priv, zio_flag_t flags); |
34dc7c2f | 555 | |
e8cf3a4f AP |
556 | extern void zio_destroy(zio_t *zio); |
557 | ||
a6255b7f | 558 | extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, |
60265072 | 559 | struct abd *data, uint64_t lsize, zio_done_func_t *done, void *priv, |
4938d01d | 560 | zio_priority_t priority, zio_flag_t flags, const zbookmark_phys_t *zb); |
34dc7c2f | 561 | |
b128c09f | 562 | extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, |
a6255b7f | 563 | struct abd *data, uint64_t size, uint64_t psize, const zio_prop_t *zp, |
bc77ba73 | 564 | zio_done_func_t *ready, zio_done_func_t *children_ready, |
ccec7fbe AM |
565 | zio_done_func_t *done, void *priv, zio_priority_t priority, |
566 | zio_flag_t flags, const zbookmark_phys_t *zb); | |
34dc7c2f | 567 | |
b128c09f | 568 | extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, |
60265072 | 569 | struct abd *data, uint64_t size, zio_done_func_t *done, void *priv, |
4938d01d | 570 | zio_priority_t priority, zio_flag_t flags, zbookmark_phys_t *zb); |
b128c09f | 571 | |
03c6040b | 572 | extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies, |
67a1b037 | 573 | boolean_t nopwrite, boolean_t brtwrite); |
34dc7c2f | 574 | |
428870ff | 575 | extern void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp); |
34dc7c2f | 576 | |
428870ff BB |
577 | extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, |
578 | const blkptr_t *bp, | |
4938d01d | 579 | zio_done_func_t *done, void *priv, zio_flag_t flags); |
34dc7c2f | 580 | |
1b939560 | 581 | extern zio_t *zio_trim(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, |
60265072 | 582 | zio_done_func_t *done, void *priv, zio_priority_t priority, |
4938d01d | 583 | zio_flag_t flags, enum trim_flag trim_flags); |
1b939560 | 584 | |
34dc7c2f | 585 | extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, |
a6255b7f | 586 | uint64_t size, struct abd *data, int checksum, |
60265072 | 587 | zio_done_func_t *done, void *priv, zio_priority_t priority, |
4938d01d | 588 | zio_flag_t flags, boolean_t labels); |
34dc7c2f BB |
589 | |
590 | extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, | |
a6255b7f | 591 | uint64_t size, struct abd *data, int checksum, |
60265072 | 592 | zio_done_func_t *done, void *priv, zio_priority_t priority, |
4938d01d | 593 | zio_flag_t flags, boolean_t labels); |
34dc7c2f | 594 | |
428870ff | 595 | extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, |
4938d01d | 596 | const blkptr_t *bp, zio_flag_t flags); |
428870ff | 597 | |
b5256303 TC |
598 | extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, |
599 | blkptr_t *new_bp, uint64_t size, boolean_t *slog); | |
34dc7c2f | 600 | extern void zio_flush(zio_t *zio, vdev_t *vd); |
428870ff | 601 | extern void zio_shrink(zio_t *zio, uint64_t size); |
34dc7c2f BB |
602 | |
603 | extern int zio_wait(zio_t *zio); | |
604 | extern void zio_nowait(zio_t *zio); | |
23c13c7e AL |
605 | extern void zio_execute(void *zio); |
606 | extern void zio_interrupt(void *zio); | |
26ef0cc7 TH |
607 | extern void zio_delay_init(zio_t *zio); |
608 | extern void zio_delay_interrupt(zio_t *zio); | |
dd66857d | 609 | extern void zio_deadman(zio_t *zio, const char *tag); |
34dc7c2f | 610 | |
3dfb57a3 DB |
611 | extern zio_t *zio_walk_parents(zio_t *cio, zio_link_t **); |
612 | extern zio_t *zio_walk_children(zio_t *pio, zio_link_t **); | |
d164b209 BB |
613 | extern zio_t *zio_unique_parent(zio_t *cio); |
614 | extern void zio_add_child(zio_t *pio, zio_t *cio); | |
b4a08730 | 615 | extern void zio_add_child_first(zio_t *pio, zio_t *cio); |
d164b209 | 616 | |
34dc7c2f BB |
617 | extern void *zio_buf_alloc(size_t size); |
618 | extern void zio_buf_free(void *buf, size_t size); | |
619 | extern void *zio_data_buf_alloc(size_t size); | |
620 | extern void zio_data_buf_free(void *buf, size_t size); | |
621 | ||
a6255b7f | 622 | extern void zio_push_transform(zio_t *zio, struct abd *abd, uint64_t size, |
d3c2ae1c GW |
623 | uint64_t bufsize, zio_transform_func_t *transform); |
624 | extern void zio_pop_transforms(zio_t *zio); | |
625 | ||
34dc7c2f BB |
626 | extern void zio_resubmit_stage_async(void *); |
627 | ||
34dc7c2f | 628 | extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd, |
a6255b7f | 629 | uint64_t offset, struct abd *data, uint64_t size, int type, |
4938d01d | 630 | zio_priority_t priority, zio_flag_t flags, |
60265072 | 631 | zio_done_func_t *done, void *priv); |
34dc7c2f | 632 | |
b128c09f | 633 | extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, |
9e052db4 | 634 | struct abd *data, uint64_t size, zio_type_t type, zio_priority_t priority, |
4938d01d | 635 | zio_flag_t flags, zio_done_func_t *done, void *priv); |
b128c09f | 636 | |
34dc7c2f BB |
637 | extern void zio_vdev_io_bypass(zio_t *zio); |
638 | extern void zio_vdev_io_reissue(zio_t *zio); | |
639 | extern void zio_vdev_io_redone(zio_t *zio); | |
640 | ||
a8b2e306 TC |
641 | extern void zio_change_priority(zio_t *pio, zio_priority_t priority); |
642 | ||
34dc7c2f | 643 | extern void zio_checksum_verified(zio_t *zio); |
b128c09f | 644 | extern int zio_worst_error(int e1, int e2); |
34dc7c2f | 645 | |
428870ff BB |
646 | extern enum zio_checksum zio_checksum_select(enum zio_checksum child, |
647 | enum zio_checksum parent); | |
648 | extern enum zio_checksum zio_checksum_dedup_select(spa_t *spa, | |
649 | enum zio_checksum child, enum zio_checksum parent); | |
99197f03 JG |
650 | extern enum zio_compress zio_compress_select(spa_t *spa, |
651 | enum zio_compress child, enum zio_compress parent); | |
10b3c7f5 MN |
652 | extern uint8_t zio_complevel_select(spa_t *spa, enum zio_compress compress, |
653 | uint8_t child, uint8_t parent); | |
34dc7c2f | 654 | |
cec3a0a1 | 655 | extern void zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t); |
9babb374 | 656 | extern int zio_resume(spa_t *spa); |
b128c09f | 657 | extern void zio_resume_wait(spa_t *spa); |
34dc7c2f | 658 | |
bc67cba7 | 659 | extern boolean_t zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, |
3095ca91 | 660 | enum blk_config_flag blk_config, enum blk_verify_flag blk_verify); |
bc67cba7 | 661 | |
34dc7c2f BB |
662 | /* |
663 | * Initial setup and teardown. | |
664 | */ | |
665 | extern void zio_init(void); | |
666 | extern void zio_fini(void); | |
667 | ||
668 | /* | |
669 | * Fault injection | |
670 | */ | |
671 | struct zinject_record; | |
672 | extern uint32_t zio_injection_enabled; | |
673 | extern int zio_inject_fault(char *name, int flags, int *id, | |
674 | struct zinject_record *record); | |
675 | extern int zio_inject_list_next(int *id, char *name, size_t buflen, | |
676 | struct zinject_record *record); | |
677 | extern int zio_clear_fault(int id); | |
dd66857d AZ |
678 | extern void zio_handle_panic_injection(spa_t *spa, const char *tag, |
679 | uint64_t type); | |
be9a5c35 TC |
680 | extern int zio_handle_decrypt_injection(spa_t *spa, const zbookmark_phys_t *zb, |
681 | uint64_t type, int error); | |
34dc7c2f | 682 | extern int zio_handle_fault_injection(zio_t *zio, int error); |
9babb374 | 683 | extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error); |
d977122d DB |
684 | extern int zio_handle_device_injections(vdev_t *vd, zio_t *zio, int err1, |
685 | int err2); | |
b128c09f | 686 | extern int zio_handle_label_injection(zio_t *zio, int error); |
428870ff | 687 | extern void zio_handle_ignored_writes(zio_t *zio); |
26ef0cc7 | 688 | extern hrtime_t zio_handle_io_delay(zio_t *zio); |
428870ff BB |
689 | |
690 | /* | |
691 | * Checksum ereport functions | |
692 | */ | |
4f072827 | 693 | extern int zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, |
a2c2ed1b | 694 | const zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, |
330c6c05 | 695 | uint64_t length, struct zio_bad_cksum *info); |
428870ff | 696 | extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report, |
84c07ada | 697 | const abd_t *good_data, const abd_t *bad_data, boolean_t drop_if_identical); |
428870ff | 698 | |
428870ff BB |
699 | extern void zfs_ereport_free_checksum(zio_cksum_report_t *report); |
700 | ||
701 | /* If we have the good data in hand, this function can be used */ | |
ad796b8a | 702 | extern int zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, |
a2c2ed1b TC |
703 | const zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, |
704 | uint64_t length, const abd_t *good_data, const abd_t *bad_data, | |
705 | struct zio_bad_cksum *info); | |
428870ff | 706 | |
330c6c05 | 707 | void zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr); |
5a54a4e0 JL |
708 | extern void zfs_ereport_snapshot_post(const char *subclass, spa_t *spa, |
709 | const char *name); | |
330c6c05 | 710 | |
428870ff BB |
711 | /* Called from spa_sync(), but primarily an injection handler */ |
712 | extern void spa_handle_ignored_writes(spa_t *spa); | |
34dc7c2f | 713 | |
5dbd68a3 | 714 | /* zbookmark_phys functions */ |
fcff0f35 PD |
715 | boolean_t zbookmark_subtree_completed(const struct dnode_phys *dnp, |
716 | const zbookmark_phys_t *subtree_root, const zbookmark_phys_t *last_block); | |
33dba8c7 AM |
717 | boolean_t zbookmark_subtree_tbd(const struct dnode_phys *dnp, |
718 | const zbookmark_phys_t *subtree_root, const zbookmark_phys_t *last_block); | |
fcff0f35 PD |
719 | int zbookmark_compare(uint16_t dbss1, uint8_t ibs1, uint16_t dbss2, |
720 | uint8_t ibs2, const zbookmark_phys_t *zb1, const zbookmark_phys_t *zb2); | |
9ae529ec | 721 | |
34dc7c2f BB |
722 | #ifdef __cplusplus |
723 | } | |
724 | #endif | |
725 | ||
726 | #endif /* _ZIO_H */ |