]> git.proxmox.com Git - mirror_zfs.git/blob - include/sys/arc.h
Fix ARC hit rate
[mirror_zfs.git] / include / sys / arc.h
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
24 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
25 */
26
27 #ifndef _SYS_ARC_H
28 #define _SYS_ARC_H
29
30 #include <sys/zfs_context.h>
31
32 #ifdef __cplusplus
33 extern "C" {
34 #endif
35
36 #include <sys/zio.h>
37 #include <sys/dmu.h>
38 #include <sys/spa.h>
39 #include <sys/refcount.h>
40
41 /*
42 * Used by arc_flush() to inform arc_evict_state() that it should evict
43 * all available buffers from the arc state being passed in.
44 */
45 #define ARC_EVICT_ALL -1ULL
46
47 #define HDR_SET_LSIZE(hdr, x) do { \
48 ASSERT(IS_P2ALIGNED(x, 1U << SPA_MINBLOCKSHIFT)); \
49 (hdr)->b_lsize = ((x) >> SPA_MINBLOCKSHIFT); \
50 _NOTE(CONSTCOND) } while (0)
51
52 #define HDR_SET_PSIZE(hdr, x) do { \
53 ASSERT(IS_P2ALIGNED((x), 1U << SPA_MINBLOCKSHIFT)); \
54 (hdr)->b_psize = ((x) >> SPA_MINBLOCKSHIFT); \
55 _NOTE(CONSTCOND) } while (0)
56
57 #define HDR_GET_LSIZE(hdr) ((hdr)->b_lsize << SPA_MINBLOCKSHIFT)
58 #define HDR_GET_PSIZE(hdr) ((hdr)->b_psize << SPA_MINBLOCKSHIFT)
59
60 typedef struct arc_buf_hdr arc_buf_hdr_t;
61 typedef struct arc_buf arc_buf_t;
62 typedef struct arc_prune arc_prune_t;
63
64 /*
65 * Because the ARC can store encrypted data, errors (not due to bugs) may arise
66 * while transforming data into its desired format - specifically, when
67 * decrypting, the key may not be present, or the HMAC may not be correct
68 * which signifies deliberate tampering with the on-disk state
69 * (assuming that the checksum was correct). If any error occurs, the "buf"
70 * parameter will be NULL.
71 */
72 typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb,
73 const blkptr_t *bp, arc_buf_t *buf, void *private);
74 typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
75 typedef void arc_prune_func_t(int64_t bytes, void *private);
76
77 /* Shared module parameters */
78 extern int zfs_arc_average_blocksize;
79
80 /* generic arc_done_func_t's which you can use */
81 arc_read_done_func_t arc_bcopy_func;
82 arc_read_done_func_t arc_getbuf_func;
83
84 /* generic arc_prune_func_t wrapper for callbacks */
85 struct arc_prune {
86 arc_prune_func_t *p_pfunc;
87 void *p_private;
88 uint64_t p_adjust;
89 list_node_t p_node;
90 refcount_t p_refcnt;
91 };
92
93 typedef enum arc_strategy {
94 ARC_STRATEGY_META_ONLY = 0, /* Evict only meta data buffers */
95 ARC_STRATEGY_META_BALANCED = 1, /* Evict data buffers if needed */
96 } arc_strategy_t;
97
98 typedef enum arc_flags
99 {
100 /*
101 * Public flags that can be passed into the ARC by external consumers.
102 */
103 ARC_FLAG_WAIT = 1 << 0, /* perform sync I/O */
104 ARC_FLAG_NOWAIT = 1 << 1, /* perform async I/O */
105 ARC_FLAG_PREFETCH = 1 << 2, /* I/O is a prefetch */
106 ARC_FLAG_CACHED = 1 << 3, /* I/O was in cache */
107 ARC_FLAG_L2CACHE = 1 << 4, /* cache in L2ARC */
108 ARC_FLAG_PREDICTIVE_PREFETCH = 1 << 5, /* I/O from zfetch */
109 ARC_FLAG_PRESCIENT_PREFETCH = 1 << 6, /* long min lifespan */
110
111 /*
112 * Private ARC flags. These flags are private ARC only flags that
113 * will show up in b_flags in the arc_hdr_buf_t. These flags should
114 * only be set by ARC code.
115 */
116 ARC_FLAG_IN_HASH_TABLE = 1 << 7, /* buffer is hashed */
117 ARC_FLAG_IO_IN_PROGRESS = 1 << 8, /* I/O in progress */
118 ARC_FLAG_IO_ERROR = 1 << 9, /* I/O failed for buf */
119 ARC_FLAG_INDIRECT = 1 << 10, /* indirect block */
120 /* Indicates that block was read with ASYNC priority. */
121 ARC_FLAG_PRIO_ASYNC_READ = 1 << 11,
122 ARC_FLAG_L2_WRITING = 1 << 12, /* write in progress */
123 ARC_FLAG_L2_EVICTED = 1 << 13, /* evicted during I/O */
124 ARC_FLAG_L2_WRITE_HEAD = 1 << 14, /* head of write list */
125 /*
126 * Encrypted or authenticated on disk (may be plaintext in memory).
127 * This header has b_crypt_hdr allocated. Does not include indirect
128 * blocks with checksums of MACs which will also have their X
129 * (encrypted) bit set in the bp.
130 */
131 ARC_FLAG_PROTECTED = 1 << 15,
132 /* data has not been authenticated yet */
133 ARC_FLAG_NOAUTH = 1 << 16,
134 /* indicates that the buffer contains metadata (otherwise, data) */
135 ARC_FLAG_BUFC_METADATA = 1 << 17,
136
137 /* Flags specifying whether optional hdr struct fields are defined */
138 ARC_FLAG_HAS_L1HDR = 1 << 18,
139 ARC_FLAG_HAS_L2HDR = 1 << 19,
140
141 /*
142 * Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data.
143 * This allows the l2arc to use the blkptr's checksum to verify
144 * the data without having to store the checksum in the hdr.
145 */
146 ARC_FLAG_COMPRESSED_ARC = 1 << 20,
147 ARC_FLAG_SHARED_DATA = 1 << 21,
148
149 /*
150 * The arc buffer's compression mode is stored in the top 7 bits of the
151 * flags field, so these dummy flags are included so that MDB can
152 * interpret the enum properly.
153 */
154 ARC_FLAG_COMPRESS_0 = 1 << 24,
155 ARC_FLAG_COMPRESS_1 = 1 << 25,
156 ARC_FLAG_COMPRESS_2 = 1 << 26,
157 ARC_FLAG_COMPRESS_3 = 1 << 27,
158 ARC_FLAG_COMPRESS_4 = 1 << 28,
159 ARC_FLAG_COMPRESS_5 = 1 << 29,
160 ARC_FLAG_COMPRESS_6 = 1 << 30
161
162 } arc_flags_t;
163
164 typedef enum arc_buf_flags {
165 ARC_BUF_FLAG_SHARED = 1 << 0,
166 ARC_BUF_FLAG_COMPRESSED = 1 << 1,
167 /*
168 * indicates whether this arc_buf_t is encrypted, regardless of
169 * state on-disk
170 */
171 ARC_BUF_FLAG_ENCRYPTED = 1 << 2
172 } arc_buf_flags_t;
173
174 struct arc_buf {
175 arc_buf_hdr_t *b_hdr;
176 arc_buf_t *b_next;
177 kmutex_t b_evict_lock;
178 void *b_data;
179 arc_buf_flags_t b_flags;
180 };
181
182 typedef enum arc_buf_contents {
183 ARC_BUFC_INVALID, /* invalid type */
184 ARC_BUFC_DATA, /* buffer contains data */
185 ARC_BUFC_METADATA, /* buffer contains metadata */
186 ARC_BUFC_NUMTYPES
187 } arc_buf_contents_t;
188
189 /*
190 * The following breakdows of arc_size exist for kstat only.
191 */
192 typedef enum arc_space_type {
193 ARC_SPACE_DATA,
194 ARC_SPACE_META,
195 ARC_SPACE_HDRS,
196 ARC_SPACE_L2HDRS,
197 ARC_SPACE_DBUF,
198 ARC_SPACE_DNODE,
199 ARC_SPACE_BONUS,
200 ARC_SPACE_NUMTYPES
201 } arc_space_type_t;
202
203 typedef enum arc_state_type {
204 ARC_STATE_ANON,
205 ARC_STATE_MRU,
206 ARC_STATE_MRU_GHOST,
207 ARC_STATE_MFU,
208 ARC_STATE_MFU_GHOST,
209 ARC_STATE_L2C_ONLY,
210 ARC_STATE_NUMTYPES
211 } arc_state_type_t;
212
213 typedef struct arc_buf_info {
214 arc_state_type_t abi_state_type;
215 arc_buf_contents_t abi_state_contents;
216 uint32_t abi_flags;
217 uint32_t abi_bufcnt;
218 uint64_t abi_size;
219 uint64_t abi_spa;
220 uint64_t abi_access;
221 uint32_t abi_mru_hits;
222 uint32_t abi_mru_ghost_hits;
223 uint32_t abi_mfu_hits;
224 uint32_t abi_mfu_ghost_hits;
225 uint32_t abi_l2arc_hits;
226 uint32_t abi_holds;
227 uint64_t abi_l2arc_dattr;
228 uint64_t abi_l2arc_asize;
229 enum zio_compress abi_l2arc_compress;
230 } arc_buf_info_t;
231
232 void arc_space_consume(uint64_t space, arc_space_type_t type);
233 void arc_space_return(uint64_t space, arc_space_type_t type);
234 boolean_t arc_is_metadata(arc_buf_t *buf);
235 boolean_t arc_is_encrypted(arc_buf_t *buf);
236 boolean_t arc_is_unauthenticated(arc_buf_t *buf);
237 enum zio_compress arc_get_compression(arc_buf_t *buf);
238 void arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt,
239 uint8_t *iv, uint8_t *mac);
240 int arc_untransform(arc_buf_t *buf, spa_t *spa, uint64_t dsobj,
241 boolean_t in_place);
242 void arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
243 dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv,
244 const uint8_t *mac);
245 arc_buf_t *arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type,
246 int32_t size);
247 arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, void *tag,
248 uint64_t psize, uint64_t lsize, enum zio_compress compression_type);
249 arc_buf_t *arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj,
250 boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
251 const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
252 enum zio_compress compression_type);
253 arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size);
254 arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
255 enum zio_compress compression_type);
256 arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
257 const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
258 dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
259 enum zio_compress compression_type);
260 void arc_return_buf(arc_buf_t *buf, void *tag);
261 void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
262 void arc_buf_destroy(arc_buf_t *buf, void *tag);
263 void arc_buf_info(arc_buf_t *buf, arc_buf_info_t *abi, int state_index);
264 uint64_t arc_buf_size(arc_buf_t *buf);
265 uint64_t arc_buf_lsize(arc_buf_t *buf);
266 void arc_buf_access(arc_buf_t *buf);
267 void arc_release(arc_buf_t *buf, void *tag);
268 int arc_released(arc_buf_t *buf);
269 void arc_buf_sigsegv(int sig, siginfo_t *si, void *unused);
270 void arc_buf_freeze(arc_buf_t *buf);
271 void arc_buf_thaw(arc_buf_t *buf);
272 #ifdef ZFS_DEBUG
273 int arc_referenced(arc_buf_t *buf);
274 #endif
275
276 int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
277 arc_read_done_func_t *done, void *private, zio_priority_t priority,
278 int flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
279 zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
280 blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp,
281 arc_write_done_func_t *ready, arc_write_done_func_t *child_ready,
282 arc_write_done_func_t *physdone, arc_write_done_func_t *done,
283 void *private, zio_priority_t priority, int zio_flags,
284 const zbookmark_phys_t *zb);
285
286 arc_prune_t *arc_add_prune_callback(arc_prune_func_t *func, void *private);
287 void arc_remove_prune_callback(arc_prune_t *p);
288 void arc_freed(spa_t *spa, const blkptr_t *bp);
289
290 void arc_flush(spa_t *spa, boolean_t retry);
291 void arc_tempreserve_clear(uint64_t reserve);
292 int arc_tempreserve_space(uint64_t reserve, uint64_t txg);
293
294 uint64_t arc_target_bytes(void);
295 void arc_init(void);
296 void arc_fini(void);
297
298 /*
299 * Level 2 ARC
300 */
301
302 void l2arc_add_vdev(spa_t *spa, vdev_t *vd);
303 void l2arc_remove_vdev(vdev_t *vd);
304 boolean_t l2arc_vdev_present(vdev_t *vd);
305 void l2arc_init(void);
306 void l2arc_fini(void);
307 void l2arc_start(void);
308 void l2arc_stop(void);
309
310 #ifndef _KERNEL
311 extern boolean_t arc_watch;
312 #endif
313
314 #ifdef __cplusplus
315 }
316 #endif
317
318 #endif /* _SYS_ARC_H */