]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
1d3ba0bf | 9 | * or https://opensource.org/licenses/CDDL-1.0. |
34dc7c2f BB |
10 | * See the License for the specific language governing permissions |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
428870ff | 22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
bc77ba73 | 23 | * Copyright (c) 2012, 2016 by Delphix. All rights reserved. |
3a17a7a9 | 24 | * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. |
10b3c7f5 MN |
25 | * Copyright (c) 2019, Allan Jude |
26 | * Copyright (c) 2019, Klara Inc. | |
34dc7c2f BB |
27 | */ |
28 | ||
29 | #ifndef _SYS_ARC_H | |
30 | #define _SYS_ARC_H | |
31 | ||
34dc7c2f BB |
32 | #include <sys/zfs_context.h> |
33 | ||
34 | #ifdef __cplusplus | |
35 | extern "C" { | |
36 | #endif | |
37 | ||
38 | #include <sys/zio.h> | |
39 | #include <sys/dmu.h> | |
40 | #include <sys/spa.h> | |
27d96d22 | 41 | #include <sys/zfs_refcount.h> |
34dc7c2f | 42 | |
ca0bf58d PS |
43 | /* |
44 | * Used by arc_flush() to inform arc_evict_state() that it should evict | |
45 | * all available buffers from the arc state being passed in. | |
46 | */ | |
8172df64 | 47 | #define ARC_EVICT_ALL UINT64_MAX |
ca0bf58d | 48 | |
e945e8d7 AJ |
49 | /* |
50 | * ZFS gets very unhappy when the maximum ARC size is smaller than the maximum | |
51 | * block size and a larger block is written. To leave some safety margin, we | |
52 | * limit the minimum for zfs_arc_max to the maximium transaction size. | |
53 | */ | |
54 | #define MIN_ARC_MAX DMU_MAX_ACCESS | |
55 | ||
d3c2ae1c GW |
56 | #define HDR_SET_LSIZE(hdr, x) do { \ |
57 | ASSERT(IS_P2ALIGNED(x, 1U << SPA_MINBLOCKSHIFT)); \ | |
58 | (hdr)->b_lsize = ((x) >> SPA_MINBLOCKSHIFT); \ | |
037af3e0 | 59 | } while (0) |
d3c2ae1c GW |
60 | |
61 | #define HDR_SET_PSIZE(hdr, x) do { \ | |
62 | ASSERT(IS_P2ALIGNED((x), 1U << SPA_MINBLOCKSHIFT)); \ | |
63 | (hdr)->b_psize = ((x) >> SPA_MINBLOCKSHIFT); \ | |
037af3e0 | 64 | } while (0) |
d3c2ae1c GW |
65 | |
66 | #define HDR_GET_LSIZE(hdr) ((hdr)->b_lsize << SPA_MINBLOCKSHIFT) | |
67 | #define HDR_GET_PSIZE(hdr) ((hdr)->b_psize << SPA_MINBLOCKSHIFT) | |
68 | ||
34dc7c2f BB |
69 | typedef struct arc_buf_hdr arc_buf_hdr_t; |
70 | typedef struct arc_buf arc_buf_t; | |
ab26409d | 71 | typedef struct arc_prune arc_prune_t; |
b5256303 TC |
72 | |
73 | /* | |
74 | * Because the ARC can store encrypted data, errors (not due to bugs) may arise | |
75 | * while transforming data into its desired format - specifically, when | |
76 | * decrypting, the key may not be present, or the HMAC may not be correct | |
77 | * which signifies deliberate tampering with the on-disk state | |
d4a72f23 TC |
78 | * (assuming that the checksum was correct). If any error occurs, the "buf" |
79 | * parameter will be NULL. | |
b5256303 | 80 | */ |
d4a72f23 | 81 | typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb, |
60265072 RM |
82 | const blkptr_t *bp, arc_buf_t *buf, void *priv); |
83 | typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv); | |
84 | typedef void arc_prune_func_t(int64_t bytes, void *priv); | |
34dc7c2f | 85 | |
69de3421 | 86 | /* Shared module parameters */ |
fdc2d303 | 87 | extern uint_t zfs_arc_average_blocksize; |
c9d62d13 | 88 | extern int l2arc_exclude_special; |
69de3421 | 89 | |
34dc7c2f | 90 | /* generic arc_done_func_t's which you can use */ |
b5256303 TC |
91 | arc_read_done_func_t arc_bcopy_func; |
92 | arc_read_done_func_t arc_getbuf_func; | |
34dc7c2f | 93 | |
ab26409d BB |
94 | /* generic arc_prune_func_t wrapper for callbacks */ |
95 | struct arc_prune { | |
96 | arc_prune_func_t *p_pfunc; | |
97 | void *p_private; | |
f6046738 | 98 | uint64_t p_adjust; |
ab26409d | 99 | list_node_t p_node; |
c13060e4 | 100 | zfs_refcount_t p_refcnt; |
ab26409d BB |
101 | }; |
102 | ||
f6046738 BB |
103 | typedef enum arc_strategy { |
104 | ARC_STRATEGY_META_ONLY = 0, /* Evict only meta data buffers */ | |
105 | ARC_STRATEGY_META_BALANCED = 1, /* Evict data buffers if needed */ | |
106 | } arc_strategy_t; | |
107 | ||
2a432414 GW |
108 | typedef enum arc_flags |
109 | { | |
110 | /* | |
111 | * Public flags that can be passed into the ARC by external consumers. | |
112 | */ | |
d3c2ae1c GW |
113 | ARC_FLAG_WAIT = 1 << 0, /* perform sync I/O */ |
114 | ARC_FLAG_NOWAIT = 1 << 1, /* perform async I/O */ | |
115 | ARC_FLAG_PREFETCH = 1 << 2, /* I/O is a prefetch */ | |
116 | ARC_FLAG_CACHED = 1 << 3, /* I/O was in cache */ | |
117 | ARC_FLAG_L2CACHE = 1 << 4, /* cache in L2ARC */ | |
118 | ARC_FLAG_PREDICTIVE_PREFETCH = 1 << 5, /* I/O from zfetch */ | |
d4a72f23 | 119 | ARC_FLAG_PRESCIENT_PREFETCH = 1 << 6, /* long min lifespan */ |
2a432414 GW |
120 | |
121 | /* | |
122 | * Private ARC flags. These flags are private ARC only flags that | |
123 | * will show up in b_flags in the arc_hdr_buf_t. These flags should | |
124 | * only be set by ARC code. | |
125 | */ | |
d4a72f23 TC |
126 | ARC_FLAG_IN_HASH_TABLE = 1 << 7, /* buffer is hashed */ |
127 | ARC_FLAG_IO_IN_PROGRESS = 1 << 8, /* I/O in progress */ | |
128 | ARC_FLAG_IO_ERROR = 1 << 9, /* I/O failed for buf */ | |
129 | ARC_FLAG_INDIRECT = 1 << 10, /* indirect block */ | |
7f60329a | 130 | /* Indicates that block was read with ASYNC priority. */ |
d4a72f23 TC |
131 | ARC_FLAG_PRIO_ASYNC_READ = 1 << 11, |
132 | ARC_FLAG_L2_WRITING = 1 << 12, /* write in progress */ | |
133 | ARC_FLAG_L2_EVICTED = 1 << 13, /* evicted during I/O */ | |
134 | ARC_FLAG_L2_WRITE_HEAD = 1 << 14, /* head of write list */ | |
b5256303 TC |
135 | /* |
136 | * Encrypted or authenticated on disk (may be plaintext in memory). | |
137 | * This header has b_crypt_hdr allocated. Does not include indirect | |
138 | * blocks with checksums of MACs which will also have their X | |
139 | * (encrypted) bit set in the bp. | |
140 | */ | |
d4a72f23 | 141 | ARC_FLAG_PROTECTED = 1 << 15, |
b5256303 | 142 | /* data has not been authenticated yet */ |
d4a72f23 | 143 | ARC_FLAG_NOAUTH = 1 << 16, |
b9541d6b | 144 | /* indicates that the buffer contains metadata (otherwise, data) */ |
d4a72f23 | 145 | ARC_FLAG_BUFC_METADATA = 1 << 17, |
b9541d6b CW |
146 | |
147 | /* Flags specifying whether optional hdr struct fields are defined */ | |
d4a72f23 TC |
148 | ARC_FLAG_HAS_L1HDR = 1 << 18, |
149 | ARC_FLAG_HAS_L2HDR = 1 << 19, | |
d3c2ae1c GW |
150 | |
151 | /* | |
152 | * Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data. | |
153 | * This allows the l2arc to use the blkptr's checksum to verify | |
154 | * the data without having to store the checksum in the hdr. | |
155 | */ | |
d4a72f23 TC |
156 | ARC_FLAG_COMPRESSED_ARC = 1 << 20, |
157 | ARC_FLAG_SHARED_DATA = 1 << 21, | |
d3c2ae1c | 158 | |
1dc32a67 MA |
159 | /* |
160 | * Fail this arc_read() (with ENOENT) if the data is not already present | |
161 | * in cache. | |
162 | */ | |
163 | ARC_FLAG_CACHED_ONLY = 1 << 22, | |
164 | ||
1e4732cb MM |
165 | /* |
166 | * Don't instantiate an arc_buf_t for arc_read_done. | |
167 | */ | |
168 | ARC_FLAG_NO_BUF = 1 << 23, | |
169 | ||
d3c2ae1c GW |
170 | /* |
171 | * The arc buffer's compression mode is stored in the top 7 bits of the | |
172 | * flags field, so these dummy flags are included so that MDB can | |
173 | * interpret the enum properly. | |
174 | */ | |
175 | ARC_FLAG_COMPRESS_0 = 1 << 24, | |
176 | ARC_FLAG_COMPRESS_1 = 1 << 25, | |
177 | ARC_FLAG_COMPRESS_2 = 1 << 26, | |
178 | ARC_FLAG_COMPRESS_3 = 1 << 27, | |
179 | ARC_FLAG_COMPRESS_4 = 1 << 28, | |
180 | ARC_FLAG_COMPRESS_5 = 1 << 29, | |
181 | ARC_FLAG_COMPRESS_6 = 1 << 30 | |
7f60329a | 182 | |
2a432414 GW |
183 | } arc_flags_t; |
184 | ||
2aa34383 DK |
185 | typedef enum arc_buf_flags { |
186 | ARC_BUF_FLAG_SHARED = 1 << 0, | |
b5256303 TC |
187 | ARC_BUF_FLAG_COMPRESSED = 1 << 1, |
188 | /* | |
189 | * indicates whether this arc_buf_t is encrypted, regardless of | |
190 | * state on-disk | |
191 | */ | |
192 | ARC_BUF_FLAG_ENCRYPTED = 1 << 2 | |
2aa34383 DK |
193 | } arc_buf_flags_t; |
194 | ||
34dc7c2f BB |
195 | struct arc_buf { |
196 | arc_buf_hdr_t *b_hdr; | |
197 | arc_buf_t *b_next; | |
428870ff | 198 | kmutex_t b_evict_lock; |
34dc7c2f | 199 | void *b_data; |
524b4217 | 200 | arc_buf_flags_t b_flags; |
34dc7c2f BB |
201 | }; |
202 | ||
203 | typedef enum arc_buf_contents { | |
d3c2ae1c | 204 | ARC_BUFC_INVALID, /* invalid type */ |
34dc7c2f BB |
205 | ARC_BUFC_DATA, /* buffer contains data */ |
206 | ARC_BUFC_METADATA, /* buffer contains metadata */ | |
207 | ARC_BUFC_NUMTYPES | |
208 | } arc_buf_contents_t; | |
34dc7c2f | 209 | |
d164b209 | 210 | /* |
cf7c5a03 | 211 | * The following breakdowns of arc_size exist for kstat only. |
d164b209 BB |
212 | */ |
213 | typedef enum arc_space_type { | |
214 | ARC_SPACE_DATA, | |
cc7f677c | 215 | ARC_SPACE_META, |
d164b209 BB |
216 | ARC_SPACE_HDRS, |
217 | ARC_SPACE_L2HDRS, | |
25458cbe TC |
218 | ARC_SPACE_DBUF, |
219 | ARC_SPACE_DNODE, | |
220 | ARC_SPACE_BONUS, | |
85ec5cba | 221 | ARC_SPACE_ABD_CHUNK_WASTE, |
d164b209 BB |
222 | ARC_SPACE_NUMTYPES |
223 | } arc_space_type_t; | |
224 | ||
e0b0ca98 BB |
225 | typedef enum arc_state_type { |
226 | ARC_STATE_ANON, | |
227 | ARC_STATE_MRU, | |
228 | ARC_STATE_MRU_GHOST, | |
229 | ARC_STATE_MFU, | |
230 | ARC_STATE_MFU_GHOST, | |
231 | ARC_STATE_L2C_ONLY, | |
232 | ARC_STATE_NUMTYPES | |
233 | } arc_state_type_t; | |
234 | ||
235 | typedef struct arc_buf_info { | |
236 | arc_state_type_t abi_state_type; | |
237 | arc_buf_contents_t abi_state_contents; | |
e0b0ca98 | 238 | uint32_t abi_flags; |
d3c2ae1c | 239 | uint32_t abi_bufcnt; |
e0b0ca98 BB |
240 | uint64_t abi_size; |
241 | uint64_t abi_spa; | |
242 | uint64_t abi_access; | |
243 | uint32_t abi_mru_hits; | |
244 | uint32_t abi_mru_ghost_hits; | |
245 | uint32_t abi_mfu_hits; | |
246 | uint32_t abi_mfu_ghost_hits; | |
247 | uint32_t abi_l2arc_hits; | |
248 | uint32_t abi_holds; | |
249 | uint64_t abi_l2arc_dattr; | |
250 | uint64_t abi_l2arc_asize; | |
251 | enum zio_compress abi_l2arc_compress; | |
252 | } arc_buf_info_t; | |
253 | ||
d164b209 BB |
254 | void arc_space_consume(uint64_t space, arc_space_type_t type); |
255 | void arc_space_return(uint64_t space, arc_space_type_t type); | |
2aa34383 | 256 | boolean_t arc_is_metadata(arc_buf_t *buf); |
b5256303 TC |
257 | boolean_t arc_is_encrypted(arc_buf_t *buf); |
258 | boolean_t arc_is_unauthenticated(arc_buf_t *buf); | |
2aa34383 | 259 | enum zio_compress arc_get_compression(arc_buf_t *buf); |
b5256303 TC |
260 | void arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt, |
261 | uint8_t *iv, uint8_t *mac); | |
a2c2ed1b | 262 | int arc_untransform(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb, |
b5256303 TC |
263 | boolean_t in_place); |
264 | void arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder, | |
265 | dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv, | |
266 | const uint8_t *mac); | |
a926aab9 | 267 | arc_buf_t *arc_alloc_buf(spa_t *spa, const void *tag, arc_buf_contents_t type, |
2aa34383 | 268 | int32_t size); |
a926aab9 | 269 | arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, const void *tag, |
10b3c7f5 MN |
270 | uint64_t psize, uint64_t lsize, enum zio_compress compression_type, |
271 | uint8_t complevel); | |
a926aab9 | 272 | arc_buf_t *arc_alloc_raw_buf(spa_t *spa, const void *tag, uint64_t dsobj, |
b5256303 TC |
273 | boolean_t byteorder, const uint8_t *salt, const uint8_t *iv, |
274 | const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize, | |
10b3c7f5 MN |
275 | enum zio_compress compression_type, uint8_t complevel); |
276 | uint8_t arc_get_complevel(arc_buf_t *buf); | |
2aa34383 DK |
277 | arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size); |
278 | arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize, | |
10b3c7f5 | 279 | enum zio_compress compression_type, uint8_t complevel); |
b5256303 TC |
280 | arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder, |
281 | const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, | |
282 | dmu_object_type_t ot, uint64_t psize, uint64_t lsize, | |
10b3c7f5 | 283 | enum zio_compress compression_type, uint8_t complevel); |
dd66857d AZ |
284 | void arc_return_buf(arc_buf_t *buf, const void *tag); |
285 | void arc_loan_inuse_buf(arc_buf_t *buf, const void *tag); | |
286 | void arc_buf_destroy(arc_buf_t *buf, const void *tag); | |
e0b0ca98 | 287 | void arc_buf_info(arc_buf_t *buf, arc_buf_info_t *abi, int state_index); |
5f6d0b6f | 288 | uint64_t arc_buf_size(arc_buf_t *buf); |
2aa34383 | 289 | uint64_t arc_buf_lsize(arc_buf_t *buf); |
0873bb63 | 290 | void arc_buf_access(arc_buf_t *buf); |
dd66857d | 291 | void arc_release(arc_buf_t *buf, const void *tag); |
34dc7c2f | 292 | int arc_released(arc_buf_t *buf); |
498877ba | 293 | void arc_buf_sigsegv(int sig, siginfo_t *si, void *unused); |
34dc7c2f BB |
294 | void arc_buf_freeze(arc_buf_t *buf); |
295 | void arc_buf_thaw(arc_buf_t *buf); | |
296 | #ifdef ZFS_DEBUG | |
297 | int arc_referenced(arc_buf_t *buf); | |
83719bd6 AZ |
298 | #else |
299 | #define arc_referenced(buf) ((void) sizeof (buf), 0) | |
34dc7c2f BB |
300 | #endif |
301 | ||
294f6806 | 302 | int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, |
60265072 | 303 | arc_read_done_func_t *done, void *priv, zio_priority_t priority, |
b5256303 | 304 | int flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb); |
428870ff | 305 | zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, |
d3c2ae1c | 306 | blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp, |
b5256303 TC |
307 | arc_write_done_func_t *ready, arc_write_done_func_t *child_ready, |
308 | arc_write_done_func_t *physdone, arc_write_done_func_t *done, | |
60265072 | 309 | void *priv, zio_priority_t priority, int zio_flags, |
bc77ba73 | 310 | const zbookmark_phys_t *zb); |
34dc7c2f | 311 | |
60265072 | 312 | arc_prune_t *arc_add_prune_callback(arc_prune_func_t *func, void *priv); |
ab26409d | 313 | void arc_remove_prune_callback(arc_prune_t *p); |
df4474f9 | 314 | void arc_freed(spa_t *spa, const blkptr_t *bp); |
ab26409d | 315 | |
ca0bf58d | 316 | void arc_flush(spa_t *spa, boolean_t retry); |
34dc7c2f | 317 | void arc_tempreserve_clear(uint64_t reserve); |
dae3e9ea | 318 | int arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg); |
34dc7c2f | 319 | |
f09fda50 | 320 | uint64_t arc_all_memory(void); |
9a51738b | 321 | uint64_t arc_default_max(uint64_t min, uint64_t allmem); |
e71cade6 | 322 | uint64_t arc_target_bytes(void); |
60a4c7d2 | 323 | void arc_set_limits(uint64_t); |
34dc7c2f BB |
324 | void arc_init(void); |
325 | void arc_fini(void); | |
326 | ||
327 | /* | |
328 | * Level 2 ARC | |
329 | */ | |
330 | ||
9babb374 | 331 | void l2arc_add_vdev(spa_t *spa, vdev_t *vd); |
34dc7c2f | 332 | void l2arc_remove_vdev(vdev_t *vd); |
b128c09f | 333 | boolean_t l2arc_vdev_present(vdev_t *vd); |
77f6826b GA |
334 | void l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen); |
335 | boolean_t l2arc_range_check_overlap(uint64_t bottom, uint64_t top, | |
336 | uint64_t check); | |
34dc7c2f BB |
337 | void l2arc_init(void); |
338 | void l2arc_fini(void); | |
b128c09f BB |
339 | void l2arc_start(void); |
340 | void l2arc_stop(void); | |
77f6826b | 341 | void l2arc_spa_rebuild_start(spa_t *spa); |
34dc7c2f | 342 | |
498877ba MA |
343 | #ifndef _KERNEL |
344 | extern boolean_t arc_watch; | |
345 | #endif | |
346 | ||
34dc7c2f BB |
347 | #ifdef __cplusplus |
348 | } | |
349 | #endif | |
350 | ||
351 | #endif /* _SYS_ARC_H */ |