]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | ||
22 | /* | |
23 | * Copyright 2008 Sun Microsystems, Inc. All rights reserved. | |
24 | * Use is subject to license terms. | |
25 | */ | |
26 | ||
27 | #ifndef _ZIO_H | |
28 | #define _ZIO_H | |
29 | ||
34dc7c2f BB |
30 | #include <sys/zfs_context.h> |
31 | #include <sys/spa.h> | |
32 | #include <sys/txg.h> | |
33 | #include <sys/avl.h> | |
34dc7c2f BB |
34 | #include <sys/fs/zfs.h> |
35 | #include <sys/zio_impl.h> | |
36 | ||
37 | #ifdef __cplusplus | |
38 | extern "C" { | |
39 | #endif | |
40 | ||
41 | #define ZBT_MAGIC 0x210da7ab10c7a11ULL /* zio data bloc tail */ | |
42 | ||
43 | typedef struct zio_block_tail { | |
44 | uint64_t zbt_magic; /* for validation, endianness */ | |
45 | zio_cksum_t zbt_cksum; /* 256-bit checksum */ | |
46 | } zio_block_tail_t; | |
47 | ||
48 | /* | |
49 | * Gang block headers are self-checksumming and contain an array | |
50 | * of block pointers. | |
51 | */ | |
52 | #define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE | |
53 | #define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \ | |
54 | sizeof (zio_block_tail_t)) / sizeof (blkptr_t)) | |
55 | #define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \ | |
56 | sizeof (zio_block_tail_t) - \ | |
57 | (SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\ | |
58 | sizeof (uint64_t)) | |
59 | ||
34dc7c2f BB |
60 | typedef struct zio_gbh { |
61 | blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS]; | |
62 | uint64_t zg_filler[SPA_GBH_FILLER]; | |
63 | zio_block_tail_t zg_tail; | |
64 | } zio_gbh_phys_t; | |
65 | ||
66 | enum zio_checksum { | |
67 | ZIO_CHECKSUM_INHERIT = 0, | |
68 | ZIO_CHECKSUM_ON, | |
69 | ZIO_CHECKSUM_OFF, | |
70 | ZIO_CHECKSUM_LABEL, | |
71 | ZIO_CHECKSUM_GANG_HEADER, | |
72 | ZIO_CHECKSUM_ZILOG, | |
73 | ZIO_CHECKSUM_FLETCHER_2, | |
74 | ZIO_CHECKSUM_FLETCHER_4, | |
75 | ZIO_CHECKSUM_SHA256, | |
76 | ZIO_CHECKSUM_FUNCTIONS | |
77 | }; | |
78 | ||
79 | #define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_2 | |
80 | #define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON | |
81 | ||
82 | enum zio_compress { | |
83 | ZIO_COMPRESS_INHERIT = 0, | |
84 | ZIO_COMPRESS_ON, | |
85 | ZIO_COMPRESS_OFF, | |
86 | ZIO_COMPRESS_LZJB, | |
87 | ZIO_COMPRESS_EMPTY, | |
88 | ZIO_COMPRESS_GZIP_1, | |
89 | ZIO_COMPRESS_GZIP_2, | |
90 | ZIO_COMPRESS_GZIP_3, | |
91 | ZIO_COMPRESS_GZIP_4, | |
92 | ZIO_COMPRESS_GZIP_5, | |
93 | ZIO_COMPRESS_GZIP_6, | |
94 | ZIO_COMPRESS_GZIP_7, | |
95 | ZIO_COMPRESS_GZIP_8, | |
96 | ZIO_COMPRESS_GZIP_9, | |
97 | ZIO_COMPRESS_FUNCTIONS | |
98 | }; | |
99 | ||
100 | #define ZIO_COMPRESS_ON_VALUE ZIO_COMPRESS_LZJB | |
101 | #define ZIO_COMPRESS_DEFAULT ZIO_COMPRESS_OFF | |
102 | ||
103 | #define ZIO_FAILURE_MODE_WAIT 0 | |
104 | #define ZIO_FAILURE_MODE_CONTINUE 1 | |
105 | #define ZIO_FAILURE_MODE_PANIC 2 | |
106 | ||
107 | #define ZIO_PRIORITY_NOW (zio_priority_table[0]) | |
108 | #define ZIO_PRIORITY_SYNC_READ (zio_priority_table[1]) | |
109 | #define ZIO_PRIORITY_SYNC_WRITE (zio_priority_table[2]) | |
110 | #define ZIO_PRIORITY_ASYNC_READ (zio_priority_table[3]) | |
111 | #define ZIO_PRIORITY_ASYNC_WRITE (zio_priority_table[4]) | |
112 | #define ZIO_PRIORITY_FREE (zio_priority_table[5]) | |
113 | #define ZIO_PRIORITY_CACHE_FILL (zio_priority_table[6]) | |
114 | #define ZIO_PRIORITY_LOG_WRITE (zio_priority_table[7]) | |
115 | #define ZIO_PRIORITY_RESILVER (zio_priority_table[8]) | |
116 | #define ZIO_PRIORITY_SCRUB (zio_priority_table[9]) | |
117 | #define ZIO_PRIORITY_TABLE_SIZE 10 | |
118 | ||
119 | #define ZIO_FLAG_MUSTSUCCEED 0x00000 | |
120 | #define ZIO_FLAG_CANFAIL 0x00001 | |
b128c09f BB |
121 | #define ZIO_FLAG_SPECULATIVE 0x00002 |
122 | #define ZIO_FLAG_CONFIG_WRITER 0x00004 | |
123 | #define ZIO_FLAG_DONT_RETRY 0x00008 | |
34dc7c2f BB |
124 | |
125 | #define ZIO_FLAG_DONT_CACHE 0x00010 | |
126 | #define ZIO_FLAG_DONT_QUEUE 0x00020 | |
b128c09f BB |
127 | #define ZIO_FLAG_DONT_AGGREGATE 0x00040 |
128 | #define ZIO_FLAG_DONT_PROPAGATE 0x00080 | |
34dc7c2f | 129 | |
b128c09f BB |
130 | #define ZIO_FLAG_IO_BYPASS 0x00100 |
131 | #define ZIO_FLAG_IO_REPAIR 0x00200 | |
132 | #define ZIO_FLAG_IO_RETRY 0x00400 | |
133 | #define ZIO_FLAG_IO_REWRITE 0x00800 | |
34dc7c2f | 134 | |
b128c09f BB |
135 | #define ZIO_FLAG_PROBE 0x01000 |
136 | #define ZIO_FLAG_RESILVER 0x02000 | |
137 | #define ZIO_FLAG_SCRUB 0x04000 | |
138 | #define ZIO_FLAG_SCRUB_THREAD 0x08000 | |
34dc7c2f | 139 | |
b128c09f | 140 | #define ZIO_FLAG_GANG_CHILD 0x10000 |
34dc7c2f BB |
141 | |
142 | #define ZIO_FLAG_GANG_INHERIT \ | |
143 | (ZIO_FLAG_CANFAIL | \ | |
34dc7c2f | 144 | ZIO_FLAG_SPECULATIVE | \ |
b128c09f BB |
145 | ZIO_FLAG_CONFIG_WRITER | \ |
146 | ZIO_FLAG_DONT_RETRY | \ | |
147 | ZIO_FLAG_DONT_CACHE | \ | |
148 | ZIO_FLAG_DONT_AGGREGATE | \ | |
34dc7c2f BB |
149 | ZIO_FLAG_RESILVER | \ |
150 | ZIO_FLAG_SCRUB | \ | |
b128c09f | 151 | ZIO_FLAG_SCRUB_THREAD) |
34dc7c2f BB |
152 | |
153 | #define ZIO_FLAG_VDEV_INHERIT \ | |
154 | (ZIO_FLAG_GANG_INHERIT | \ | |
b128c09f BB |
155 | ZIO_FLAG_IO_REPAIR | \ |
156 | ZIO_FLAG_IO_RETRY | \ | |
157 | ZIO_FLAG_PROBE) | |
34dc7c2f BB |
158 | |
159 | #define ZIO_PIPELINE_CONTINUE 0x100 | |
160 | #define ZIO_PIPELINE_STOP 0x101 | |
161 | ||
b128c09f BB |
162 | #define ZIO_GANG_CHILD_FLAGS(zio) \ |
163 | (((zio)->io_flags & ZIO_FLAG_GANG_INHERIT) | \ | |
164 | ZIO_FLAG_GANG_CHILD | ZIO_FLAG_CANFAIL) | |
165 | ||
166 | enum zio_child { | |
167 | ZIO_CHILD_VDEV = 0, | |
168 | ZIO_CHILD_GANG, | |
169 | ZIO_CHILD_LOGICAL, | |
170 | ZIO_CHILD_TYPES | |
171 | }; | |
172 | ||
173 | enum zio_wait_type { | |
174 | ZIO_WAIT_READY = 0, | |
175 | ZIO_WAIT_DONE, | |
176 | ZIO_WAIT_TYPES | |
177 | }; | |
178 | ||
34dc7c2f BB |
179 | /* |
180 | * We'll take the unused errnos, 'EBADE' and 'EBADR' (from the Convergent | |
181 | * graveyard) to indicate checksum errors and fragmentation. | |
182 | */ | |
183 | #define ECKSUM EBADE | |
184 | #define EFRAGS EBADR | |
185 | ||
186 | typedef struct zio zio_t; | |
187 | typedef void zio_done_func_t(zio_t *zio); | |
188 | ||
189 | extern uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE]; | |
190 | extern char *zio_type_name[ZIO_TYPES]; | |
191 | ||
192 | /* | |
193 | * A bookmark is a four-tuple <objset, object, level, blkid> that uniquely | |
194 | * identifies any block in the pool. By convention, the meta-objset (MOS) | |
195 | * is objset 0, the meta-dnode is object 0, the root block (osphys_t) is | |
196 | * level -1 of the meta-dnode, and intent log blocks (which are chained | |
197 | * off the root block) have blkid == sequence number. In summary: | |
198 | * | |
199 | * mos is objset 0 | |
200 | * meta-dnode is object 0 | |
201 | * root block is <objset, 0, -1, 0> | |
202 | * intent log is <objset, 0, -1, ZIL sequence number> | |
203 | * | |
204 | * Note: this structure is called a bookmark because its first purpose was | |
205 | * to remember where to resume a pool-wide traverse. The absolute ordering | |
206 | * for block visitation during traversal is defined in compare_bookmark(). | |
207 | * | |
208 | * Note: this structure is passed between userland and the kernel. | |
209 | * Therefore it must not change size or alignment between 32/64 bit | |
210 | * compilation options. | |
211 | */ | |
212 | typedef struct zbookmark { | |
213 | uint64_t zb_objset; | |
214 | uint64_t zb_object; | |
215 | int64_t zb_level; | |
216 | uint64_t zb_blkid; | |
217 | } zbookmark_t; | |
218 | ||
b128c09f BB |
219 | typedef struct zio_prop { |
220 | enum zio_checksum zp_checksum; | |
221 | enum zio_compress zp_compress; | |
222 | dmu_object_type_t zp_type; | |
223 | uint8_t zp_level; | |
224 | uint8_t zp_ndvas; | |
225 | } zio_prop_t; | |
226 | ||
227 | typedef struct zio_gang_node { | |
228 | zio_gbh_phys_t *gn_gbh; | |
229 | struct zio_gang_node *gn_child[SPA_GBH_NBLKPTRS]; | |
230 | } zio_gang_node_t; | |
231 | ||
232 | typedef zio_t *zio_gang_issue_func_t(zio_t *zio, blkptr_t *bp, | |
233 | zio_gang_node_t *gn, void *data); | |
234 | ||
235 | typedef void zio_transform_func_t(zio_t *zio, void *data, uint64_t size); | |
236 | ||
237 | typedef struct zio_transform { | |
238 | void *zt_orig_data; | |
239 | uint64_t zt_orig_size; | |
240 | uint64_t zt_bufsize; | |
241 | zio_transform_func_t *zt_transform; | |
242 | struct zio_transform *zt_next; | |
243 | } zio_transform_t; | |
244 | ||
245 | typedef int zio_pipe_stage_t(zio_t *zio); | |
246 | ||
247 | /* | |
248 | * The io_reexecute flags are distinct from io_flags because the child must | |
249 | * be able to propagate them to the parent. The normal io_flags are local | |
250 | * to the zio, not protected by any lock, and not modifiable by children; | |
251 | * the reexecute flags are protected by io_lock, modifiable by children, | |
252 | * and always propagated -- even when ZIO_FLAG_DONT_PROPAGATE is set. | |
253 | */ | |
254 | #define ZIO_REEXECUTE_NOW 0x01 | |
255 | #define ZIO_REEXECUTE_SUSPEND 0x02 | |
256 | ||
34dc7c2f BB |
257 | struct zio { |
258 | /* Core information about this I/O */ | |
34dc7c2f | 259 | zbookmark_t io_bookmark; |
b128c09f BB |
260 | zio_prop_t io_prop; |
261 | zio_type_t io_type; | |
262 | enum zio_child io_child_type; | |
263 | int io_cmd; | |
264 | uint8_t io_priority; | |
265 | uint8_t io_reexecute; | |
266 | uint8_t io_async_root; | |
34dc7c2f | 267 | uint64_t io_txg; |
b128c09f | 268 | spa_t *io_spa; |
34dc7c2f BB |
269 | blkptr_t *io_bp; |
270 | blkptr_t io_bp_copy; | |
b128c09f | 271 | zio_t *io_parent; |
34dc7c2f BB |
272 | zio_t *io_child; |
273 | zio_t *io_sibling_prev; | |
274 | zio_t *io_sibling_next; | |
34dc7c2f | 275 | zio_t *io_logical; |
b128c09f | 276 | zio_transform_t *io_transform_stack; |
34dc7c2f BB |
277 | |
278 | /* Callback info */ | |
279 | zio_done_func_t *io_ready; | |
280 | zio_done_func_t *io_done; | |
281 | void *io_private; | |
282 | blkptr_t io_bp_orig; | |
283 | ||
284 | /* Data represented by this I/O */ | |
285 | void *io_data; | |
286 | uint64_t io_size; | |
287 | ||
288 | /* Stuff for the vdev stack */ | |
289 | vdev_t *io_vd; | |
290 | void *io_vsd; | |
b128c09f | 291 | zio_done_func_t *io_vsd_free; |
34dc7c2f BB |
292 | uint64_t io_offset; |
293 | uint64_t io_deadline; | |
34dc7c2f BB |
294 | avl_node_t io_offset_node; |
295 | avl_node_t io_deadline_node; | |
296 | avl_tree_t *io_vdev_tree; | |
297 | zio_t *io_delegate_list; | |
298 | zio_t *io_delegate_next; | |
299 | ||
300 | /* Internal pipeline state */ | |
301 | int io_flags; | |
b128c09f | 302 | zio_stage_t io_stage; |
34dc7c2f | 303 | uint32_t io_pipeline; |
b128c09f BB |
304 | int io_orig_flags; |
305 | zio_stage_t io_orig_stage; | |
34dc7c2f | 306 | uint32_t io_orig_pipeline; |
b128c09f BB |
307 | int io_error; |
308 | int io_child_error[ZIO_CHILD_TYPES]; | |
309 | uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES]; | |
310 | uint64_t *io_stall; | |
311 | zio_gang_node_t *io_gang_tree; | |
312 | void *io_executor; | |
34dc7c2f BB |
313 | void *io_waiter; |
314 | kmutex_t io_lock; | |
315 | kcondvar_t io_cv; | |
316 | ||
317 | /* FMA state */ | |
318 | uint64_t io_ena; | |
319 | }; | |
320 | ||
321 | extern zio_t *zio_null(zio_t *pio, spa_t *spa, | |
322 | zio_done_func_t *done, void *private, int flags); | |
323 | ||
324 | extern zio_t *zio_root(spa_t *spa, | |
325 | zio_done_func_t *done, void *private, int flags); | |
326 | ||
b128c09f | 327 | extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data, |
34dc7c2f | 328 | uint64_t size, zio_done_func_t *done, void *private, |
b128c09f | 329 | int priority, int flags, const zbookmark_t *zb); |
34dc7c2f | 330 | |
b128c09f BB |
331 | extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, |
332 | void *data, uint64_t size, zio_prop_t *zp, | |
333 | zio_done_func_t *ready, zio_done_func_t *done, void *private, | |
334 | int priority, int flags, const zbookmark_t *zb); | |
34dc7c2f | 335 | |
b128c09f BB |
336 | extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, |
337 | void *data, uint64_t size, zio_done_func_t *done, void *private, | |
338 | int priority, int flags, zbookmark_t *zb); | |
339 | ||
340 | extern void zio_skip_write(zio_t *zio); | |
34dc7c2f BB |
341 | |
342 | extern zio_t *zio_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, | |
b128c09f | 343 | zio_done_func_t *done, void *private, int flags); |
34dc7c2f BB |
344 | |
345 | extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, | |
b128c09f | 346 | zio_done_func_t *done, void *private, int flags); |
34dc7c2f BB |
347 | |
348 | extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, | |
349 | zio_done_func_t *done, void *private, int priority, int flags); | |
350 | ||
351 | extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, | |
352 | uint64_t size, void *data, int checksum, | |
353 | zio_done_func_t *done, void *private, int priority, int flags, | |
354 | boolean_t labels); | |
355 | ||
356 | extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, | |
357 | uint64_t size, void *data, int checksum, | |
358 | zio_done_func_t *done, void *private, int priority, int flags, | |
359 | boolean_t labels); | |
360 | ||
361 | extern int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp, | |
362 | blkptr_t *old_bp, uint64_t txg); | |
363 | extern void zio_free_blk(spa_t *spa, blkptr_t *bp, uint64_t txg); | |
364 | extern void zio_flush(zio_t *zio, vdev_t *vd); | |
365 | ||
366 | extern int zio_wait(zio_t *zio); | |
367 | extern void zio_nowait(zio_t *zio); | |
368 | extern void zio_execute(zio_t *zio); | |
369 | extern void zio_interrupt(zio_t *zio); | |
370 | ||
34dc7c2f BB |
371 | extern void *zio_buf_alloc(size_t size); |
372 | extern void zio_buf_free(void *buf, size_t size); | |
373 | extern void *zio_data_buf_alloc(size_t size); | |
374 | extern void zio_data_buf_free(void *buf, size_t size); | |
375 | ||
376 | extern void zio_resubmit_stage_async(void *); | |
377 | ||
34dc7c2f BB |
378 | extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd, |
379 | uint64_t offset, void *data, uint64_t size, int type, int priority, | |
380 | int flags, zio_done_func_t *done, void *private); | |
381 | ||
b128c09f BB |
382 | extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, |
383 | void *data, uint64_t size, int type, int priority, | |
384 | int flags, zio_done_func_t *done, void *private); | |
385 | ||
34dc7c2f BB |
386 | extern void zio_vdev_io_bypass(zio_t *zio); |
387 | extern void zio_vdev_io_reissue(zio_t *zio); | |
388 | extern void zio_vdev_io_redone(zio_t *zio); | |
389 | ||
390 | extern void zio_checksum_verified(zio_t *zio); | |
b128c09f | 391 | extern int zio_worst_error(int e1, int e2); |
34dc7c2f BB |
392 | |
393 | extern uint8_t zio_checksum_select(uint8_t child, uint8_t parent); | |
394 | extern uint8_t zio_compress_select(uint8_t child, uint8_t parent); | |
395 | ||
b128c09f BB |
396 | extern void zio_suspend(spa_t *spa, zio_t *zio); |
397 | extern void zio_resume(spa_t *spa); | |
398 | extern void zio_resume_wait(spa_t *spa); | |
34dc7c2f BB |
399 | |
400 | /* | |
401 | * Initial setup and teardown. | |
402 | */ | |
403 | extern void zio_init(void); | |
404 | extern void zio_fini(void); | |
405 | ||
406 | /* | |
407 | * Fault injection | |
408 | */ | |
409 | struct zinject_record; | |
410 | extern uint32_t zio_injection_enabled; | |
411 | extern int zio_inject_fault(char *name, int flags, int *id, | |
412 | struct zinject_record *record); | |
413 | extern int zio_inject_list_next(int *id, char *name, size_t buflen, | |
414 | struct zinject_record *record); | |
415 | extern int zio_clear_fault(int id); | |
416 | extern int zio_handle_fault_injection(zio_t *zio, int error); | |
417 | extern int zio_handle_device_injection(vdev_t *vd, int error); | |
b128c09f | 418 | extern int zio_handle_label_injection(zio_t *zio, int error); |
34dc7c2f BB |
419 | |
420 | #ifdef __cplusplus | |
421 | } | |
422 | #endif | |
423 | ||
424 | #endif /* _ZIO_H */ |