]> git.proxmox.com Git - mirror_zfs.git/blame - include/sys/arc_impl.h
dracut: 90zfs: respect zfs_force=1 on systemd systems
[mirror_zfs.git] / include / sys / arc_impl.h
CommitLineData
59ec819a
NB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
77f6826b
GA
23 * Copyright (c) 2013, Delphix. All rights reserved.
24 * Copyright (c) 2013, Saso Kiselkov. All rights reserved.
25 * Copyright (c) 2013, Nexenta Systems, Inc. All rights reserved.
26 * Copyright (c) 2020, George Amanakis. All rights reserved.
59ec819a
NB
27 */
28
29#ifndef _SYS_ARC_IMPL_H
30#define _SYS_ARC_IMPL_H
31
32#include <sys/arc.h>
b5256303 33#include <sys/zio_crypt.h>
c9c9c1e2
MM
34#include <sys/zthr.h>
35#include <sys/aggsum.h>
59ec819a
NB
36
37#ifdef __cplusplus
38extern "C" {
39#endif
40
41/*
42 * Note that buffers can be in one of 6 states:
43 * ARC_anon - anonymous (discussed below)
44 * ARC_mru - recently used, currently cached
cf7c5a03 45 * ARC_mru_ghost - recently used, no longer in cache
59ec819a
NB
46 * ARC_mfu - frequently used, currently cached
47 * ARC_mfu_ghost - frequently used, no longer in cache
48 * ARC_l2c_only - exists in L2ARC but not other states
49 * When there are no active references to the buffer, they are
50 * are linked onto a list in one of these arc states. These are
51 * the only buffers that can be evicted or deleted. Within each
52 * state there are multiple lists, one for meta-data and one for
53 * non-meta-data. Meta-data (indirect blocks, blocks of dnodes,
54 * etc.) is tracked separately so that it can be managed more
55 * explicitly: favored over data, limited explicitly.
56 *
57 * Anonymous buffers are buffers that are not associated with
58 * a DVA. These are buffers that hold dirty block copies
59 * before they are written to stable storage. By definition,
60 * they are "ref'd" and are considered part of arc_mru
4e33ba4c 61 * that cannot be freed. Generally, they will acquire a DVA
59ec819a
NB
62 * as they are written and migrate onto the arc_mru list.
63 *
64 * The ARC_l2c_only state is for buffers that are in the second
65 * level ARC but no longer in any of the ARC_m* lists. The second
66 * level ARC itself may also contain buffers that are in any of
67 * the ARC_m* states - meaning that a buffer can exist in two
68 * places. The reason for the ARC_l2c_only state is to keep the
69 * buffer header in the hash table, so that reads that hit the
70 * second level ARC benefit from these fast lookups.
71 */
72
73typedef struct arc_state {
ca0bf58d
PS
74 /*
75 * list of evictable buffers
76 */
64fc7762 77 multilist_t *arcs_list[ARC_BUFC_NUMTYPES];
ca0bf58d
PS
78 /*
79 * total amount of evictable data in this state
80 */
c13060e4 81 zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES];
ca0bf58d
PS
82 /*
83 * total amount of data in this state; this includes: evictable,
84 * non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA.
85 */
c13060e4 86 zfs_refcount_t arcs_size;
ca0bf58d
PS
87 /*
88 * supports the "dbufs" kstat
89 */
59ec819a
NB
90 arc_state_type_t arcs_state;
91} arc_state_t;
92
59ec819a
NB
93typedef struct arc_callback arc_callback_t;
94
95struct arc_callback {
96 void *acb_private;
b5256303 97 arc_read_done_func_t *acb_done;
59ec819a 98 arc_buf_t *acb_buf;
b5256303 99 boolean_t acb_encrypted;
2aa34383 100 boolean_t acb_compressed;
b5256303 101 boolean_t acb_noauth;
923d7303 102 boolean_t acb_nobuf;
be9a5c35 103 zbookmark_phys_t acb_zb;
59ec819a 104 zio_t *acb_zio_dummy;
a8b2e306 105 zio_t *acb_zio_head;
59ec819a
NB
106 arc_callback_t *acb_next;
107};
108
109typedef struct arc_write_callback arc_write_callback_t;
110
111struct arc_write_callback {
b5256303
TC
112 void *awcb_private;
113 arc_write_done_func_t *awcb_ready;
114 arc_write_done_func_t *awcb_children_ready;
115 arc_write_done_func_t *awcb_physdone;
116 arc_write_done_func_t *awcb_done;
117 arc_buf_t *awcb_buf;
59ec819a
NB
118};
119
b9541d6b
CW
120/*
121 * ARC buffers are separated into multiple structs as a memory saving measure:
122 * - Common fields struct, always defined, and embedded within it:
123 * - L2-only fields, always allocated but undefined when not in L2ARC
124 * - L1-only fields, only allocated when in L1ARC
125 *
126 * Buffer in L1 Buffer only in L2
127 * +------------------------+ +------------------------+
128 * | arc_buf_hdr_t | | arc_buf_hdr_t |
129 * | | | |
130 * | | | |
131 * | | | |
132 * +------------------------+ +------------------------+
133 * | l2arc_buf_hdr_t | | l2arc_buf_hdr_t |
134 * | (undefined if L1-only) | | |
135 * +------------------------+ +------------------------+
136 * | l1arc_buf_hdr_t |
137 * | |
138 * | |
139 * | |
140 * | |
141 * +------------------------+
142 *
143 * Because it's possible for the L2ARC to become extremely large, we can wind
144 * up eating a lot of memory in L2ARC buffer headers, so the size of a header
145 * is minimized by only allocating the fields necessary for an L1-cached buffer
146 * when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and
147 * l2arc_buf_hdr) are embedded rather than allocated separately to save a couple
148 * words in pointers. arc_hdr_realloc() is used to switch a header between
149 * these two allocation states.
150 */
151typedef struct l1arc_buf_hdr {
59ec819a 152 kmutex_t b_freeze_lock;
d3c2ae1c 153 zio_cksum_t *b_freeze_cksum;
59ec819a 154
59ec819a 155 arc_buf_t *b_buf;
d3c2ae1c 156 uint32_t b_bufcnt;
b9541d6b 157 /* for waiting on writes to complete */
59ec819a 158 kcondvar_t b_cv;
d3c2ae1c 159 uint8_t b_byteswap;
59ec819a 160
59ec819a
NB
161
162 /* protected by arc state mutex */
163 arc_state_t *b_state;
ca0bf58d 164 multilist_node_t b_arc_node;
59ec819a
NB
165
166 /* updated atomically */
167 clock_t b_arc_access;
168 uint32_t b_mru_hits;
169 uint32_t b_mru_ghost_hits;
170 uint32_t b_mfu_hits;
171 uint32_t b_mfu_ghost_hits;
172 uint32_t b_l2_hits;
173
174 /* self protecting */
c13060e4 175 zfs_refcount_t b_refcnt;
59ec819a 176
b9541d6b 177 arc_callback_t *b_acb;
a6255b7f 178 abd_t *b_pabd;
b9541d6b 179} l1arc_buf_hdr_t;
59ec819a 180
77f6826b
GA
181typedef enum l2arc_dev_hdr_flags_t {
182 L2ARC_DEV_HDR_EVICT_FIRST = (1 << 0) /* mirror of l2ad_first */
183} l2arc_dev_hdr_flags_t;
184
185/*
186 * Pointer used in persistent L2ARC (for pointing to log blocks).
187 */
188typedef struct l2arc_log_blkptr {
189 /*
190 * Offset of log block within the device, in bytes
191 */
192 uint64_t lbp_daddr;
193 /*
194 * Aligned payload size (in bytes) of the log block
195 */
196 uint64_t lbp_payload_asize;
197 /*
198 * Offset in bytes of the first buffer in the payload
199 */
200 uint64_t lbp_payload_start;
201 /*
202 * lbp_prop has the following format:
203 * * logical size (in bytes)
657fd33b 204 * * aligned (after compression) size (in bytes)
77f6826b
GA
205 * * compression algorithm (we always LZ4-compress l2arc logs)
206 * * checksum algorithm (used for lbp_cksum)
207 */
208 uint64_t lbp_prop;
209 zio_cksum_t lbp_cksum; /* checksum of log */
210} l2arc_log_blkptr_t;
211
212/*
213 * The persistent L2ARC device header.
214 * Byte order of magic determines whether 64-bit bswap of fields is necessary.
215 */
216typedef struct l2arc_dev_hdr_phys {
217 uint64_t dh_magic; /* L2ARC_DEV_HDR_MAGIC */
218 uint64_t dh_version; /* Persistent L2ARC version */
219
220 /*
221 * Global L2ARC device state and metadata.
222 */
223 uint64_t dh_spa_guid;
224 uint64_t dh_vdev_guid;
657fd33b 225 uint64_t dh_log_entries; /* mirror of l2ad_log_entries */
77f6826b
GA
226 uint64_t dh_evict; /* evicted offset in bytes */
227 uint64_t dh_flags; /* l2arc_dev_hdr_flags_t */
228 /*
229 * Used in zdb.c for determining if a log block is valid, in the same
230 * way that l2arc_rebuild() does.
231 */
657fd33b
GA
232 uint64_t dh_start; /* mirror of l2ad_start */
233 uint64_t dh_end; /* mirror of l2ad_end */
77f6826b
GA
234 /*
235 * Start of log block chain. [0] -> newest log, [1] -> one older (used
236 * for initiating prefetch).
237 */
238 l2arc_log_blkptr_t dh_start_lbps[2];
657fd33b
GA
239 /*
240 * Aligned size of all log blocks as accounted by vdev_space_update().
241 */
242 uint64_t dh_lb_asize; /* mirror of l2ad_lb_asize */
243 uint64_t dh_lb_count; /* mirror of l2ad_lb_count */
b7654bd7
GA
244 /*
245 * Mirrors of vdev_trim_action_time and vdev_trim_state, used to
246 * display when the cache device was fully trimmed for the last
247 * time.
248 */
249 uint64_t dh_trim_action_time;
250 uint64_t dh_trim_state;
251 const uint64_t dh_pad[30]; /* pad to 512 bytes */
77f6826b
GA
252 zio_eck_t dh_tail;
253} l2arc_dev_hdr_phys_t;
254CTASSERT_GLOBAL(sizeof (l2arc_dev_hdr_phys_t) == SPA_MINBLOCKSIZE);
255
256/*
257 * A single ARC buffer header entry in a l2arc_log_blk_phys_t.
258 */
259typedef struct l2arc_log_ent_phys {
260 dva_t le_dva; /* dva of buffer */
261 uint64_t le_birth; /* birth txg of buffer */
262 /*
263 * le_prop has the following format:
264 * * logical size (in bytes)
265 * * physical (compressed) size (in bytes)
266 * * compression algorithm
267 * * object type (used to restore arc_buf_contents_t)
268 * * protected status (used for encryption)
269 * * prefetch status (used in l2arc_read_done())
270 */
271 uint64_t le_prop;
272 uint64_t le_daddr; /* buf location on l2dev */
10b3c7f5 273 uint64_t le_complevel;
77f6826b
GA
274 /*
275 * We pad the size of each entry to a power of 2 so that the size of
276 * l2arc_log_blk_phys_t is power-of-2 aligned with SPA_MINBLOCKSHIFT,
277 * because of the L2ARC_SET_*SIZE macros.
278 */
10b3c7f5 279 const uint64_t le_pad[2]; /* pad to 64 bytes */
77f6826b
GA
280} l2arc_log_ent_phys_t;
281
282#define L2ARC_LOG_BLK_MAX_ENTRIES (1022)
283
284/*
285 * A log block of up to 1022 ARC buffer log entries, chained into the
286 * persistent L2ARC metadata linked list. Byte order of magic determines
287 * whether 64-bit bswap of fields is necessary.
288 */
289typedef struct l2arc_log_blk_phys {
290 uint64_t lb_magic; /* L2ARC_LOG_BLK_MAGIC */
291 /*
292 * There are 2 chains (headed by dh_start_lbps[2]), and this field
293 * points back to the previous block in this chain. We alternate
294 * which chain we append to, so they are time-wise and offset-wise
295 * interleaved, but that is an optimization rather than for
296 * correctness.
297 */
298 l2arc_log_blkptr_t lb_prev_lbp; /* pointer to prev log block */
299 /*
300 * Pad header section to 128 bytes
301 */
302 uint64_t lb_pad[7];
303 /* Payload */
304 l2arc_log_ent_phys_t lb_entries[L2ARC_LOG_BLK_MAX_ENTRIES];
305} l2arc_log_blk_phys_t; /* 64K total */
306
307/*
308 * The size of l2arc_log_blk_phys_t has to be power-of-2 aligned with
309 * SPA_MINBLOCKSHIFT because of L2BLK_SET_*SIZE macros.
310 */
311CTASSERT_GLOBAL(IS_P2ALIGNED(sizeof (l2arc_log_blk_phys_t),
312 1ULL << SPA_MINBLOCKSHIFT));
313CTASSERT_GLOBAL(sizeof (l2arc_log_blk_phys_t) >= SPA_MINBLOCKSIZE);
314CTASSERT_GLOBAL(sizeof (l2arc_log_blk_phys_t) <= SPA_MAXBLOCKSIZE);
315
316/*
317 * These structures hold in-flight abd buffers for log blocks as they're being
318 * written to the L2ARC device.
319 */
320typedef struct l2arc_lb_abd_buf {
321 abd_t *abd;
322 list_node_t node;
323} l2arc_lb_abd_buf_t;
324
325/*
326 * These structures hold pointers to log blocks present on the L2ARC device.
327 */
328typedef struct l2arc_lb_ptr_buf {
329 l2arc_log_blkptr_t *lb_ptr;
330 list_node_t node;
331} l2arc_lb_ptr_buf_t;
332
333/* Macros for setting fields in le_prop and lbp_prop */
334#define L2BLK_GET_LSIZE(field) \
335 BF64_GET_SB((field), 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1)
336#define L2BLK_SET_LSIZE(field, x) \
337 BF64_SET_SB((field), 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
338#define L2BLK_GET_PSIZE(field) \
339 BF64_GET_SB((field), 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1)
340#define L2BLK_SET_PSIZE(field, x) \
341 BF64_SET_SB((field), 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
342#define L2BLK_GET_COMPRESS(field) \
343 BF64_GET((field), 32, SPA_COMPRESSBITS)
344#define L2BLK_SET_COMPRESS(field, x) \
345 BF64_SET((field), 32, SPA_COMPRESSBITS, x)
346#define L2BLK_GET_PREFETCH(field) BF64_GET((field), 39, 1)
347#define L2BLK_SET_PREFETCH(field, x) BF64_SET((field), 39, 1, x)
348#define L2BLK_GET_CHECKSUM(field) BF64_GET((field), 40, 8)
349#define L2BLK_SET_CHECKSUM(field, x) BF64_SET((field), 40, 8, x)
350#define L2BLK_GET_TYPE(field) BF64_GET((field), 48, 8)
351#define L2BLK_SET_TYPE(field, x) BF64_SET((field), 48, 8, x)
352#define L2BLK_GET_PROTECTED(field) BF64_GET((field), 56, 1)
353#define L2BLK_SET_PROTECTED(field, x) BF64_SET((field), 56, 1, x)
08532162
GA
354#define L2BLK_GET_STATE(field) BF64_GET((field), 57, 4)
355#define L2BLK_SET_STATE(field, x) BF64_SET((field), 57, 4, x)
77f6826b
GA
356
357#define PTR_SWAP(x, y) \
358 do { \
359 void *tmp = (x);\
360 x = y; \
361 y = tmp; \
362 _NOTE(CONSTCOND)\
363 } while (0)
364
365#define L2ARC_DEV_HDR_MAGIC 0x5a46534341434845LLU /* ASCII: "ZFSCACHE" */
366#define L2ARC_LOG_BLK_MAGIC 0x4c4f47424c4b4844LLU /* ASCII: "LOGBLKHD" */
367
368/*
369 * L2ARC Internals
370 */
371typedef struct l2arc_dev {
372 vdev_t *l2ad_vdev; /* vdev */
373 spa_t *l2ad_spa; /* spa */
374 uint64_t l2ad_hand; /* next write location */
375 uint64_t l2ad_start; /* first addr on device */
376 uint64_t l2ad_end; /* last addr on device */
377 boolean_t l2ad_first; /* first sweep through */
378 boolean_t l2ad_writing; /* currently writing */
379 kmutex_t l2ad_mtx; /* lock for buffer list */
380 list_t l2ad_buflist; /* buffer list */
381 list_node_t l2ad_node; /* device list node */
382 zfs_refcount_t l2ad_alloc; /* allocated bytes */
383 /*
384 * Persistence-related stuff
385 */
386 l2arc_dev_hdr_phys_t *l2ad_dev_hdr; /* persistent device header */
387 uint64_t l2ad_dev_hdr_asize; /* aligned hdr size */
388 l2arc_log_blk_phys_t l2ad_log_blk; /* currently open log block */
389 int l2ad_log_ent_idx; /* index into cur log blk */
390 /* Number of bytes in current log block's payload */
391 uint64_t l2ad_log_blk_payload_asize;
392 /*
393 * Offset (in bytes) of the first buffer in current log block's
394 * payload.
395 */
396 uint64_t l2ad_log_blk_payload_start;
397 /* Flag indicating whether a rebuild is scheduled or is going on */
398 boolean_t l2ad_rebuild;
399 boolean_t l2ad_rebuild_cancel;
400 boolean_t l2ad_rebuild_began;
401 uint64_t l2ad_log_entries; /* entries per log blk */
402 uint64_t l2ad_evict; /* evicted offset in bytes */
403 /* List of pointers to log blocks present in the L2ARC device */
404 list_t l2ad_lbptr_list;
657fd33b
GA
405 /*
406 * Aligned size of all log blocks as accounted by vdev_space_update().
407 */
408 zfs_refcount_t l2ad_lb_asize;
409 /*
410 * Number of log blocks present on the device.
411 */
412 zfs_refcount_t l2ad_lb_count;
b7654bd7 413 boolean_t l2ad_trim_all; /* TRIM whole device */
77f6826b
GA
414} l2arc_dev_t;
415
b5256303
TC
416/*
417 * Encrypted blocks will need to be stored encrypted on the L2ARC
418 * disk as they appear in the main pool. In order for this to work we
419 * need to pass around the encryption parameters so they can be used
420 * to write data to the L2ARC. This struct is only defined in the
421 * arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED
422 * flag set.
423 */
424typedef struct arc_buf_hdr_crypt {
425 abd_t *b_rabd; /* raw encrypted data */
426 dmu_object_type_t b_ot; /* object type */
427 uint32_t b_ebufcnt; /* count of encrypted buffers */
428
429 /* dsobj for looking up encryption key for l2arc encryption */
430 uint64_t b_dsobj;
431
432 /* encryption parameters */
433 uint8_t b_salt[ZIO_DATA_SALT_LEN];
434 uint8_t b_iv[ZIO_DATA_IV_LEN];
435
436 /*
437 * Technically this could be removed since we will always be able to
438 * get the mac from the bp when we need it. However, it is inconvenient
439 * for callers of arc code to have to pass a bp in all the time. This
440 * also allows us to assert that L2ARC data is properly encrypted to
441 * match the data in the main storage pool.
442 */
443 uint8_t b_mac[ZIO_DATA_MAC_LEN];
444} arc_buf_hdr_crypt_t;
445
b9541d6b
CW
446typedef struct l2arc_buf_hdr {
447 /* protected by arc_buf_hdr mutex */
448 l2arc_dev_t *b_dev; /* L2ARC device */
449 uint64_t b_daddr; /* disk address, offset byte */
b9541d6b 450 uint32_t b_hits;
08532162 451 arc_state_type_t b_arcs_state;
b9541d6b
CW
452 list_node_t b_l2node;
453} l2arc_buf_hdr_t;
454
49ee64e5
NB
455typedef struct l2arc_write_callback {
456 l2arc_dev_t *l2wcb_dev; /* device info */
457 arc_buf_hdr_t *l2wcb_head; /* head of write buflist */
77f6826b
GA
458 /* in-flight list of log blocks */
459 list_t l2wcb_abd_list;
49ee64e5
NB
460} l2arc_write_callback_t;
461
b9541d6b
CW
462struct arc_buf_hdr {
463 /* protected by hash lock */
464 dva_t b_dva;
465 uint64_t b_birth;
b9541d6b 466
d3c2ae1c 467 arc_buf_contents_t b_type;
10b3c7f5
MN
468 uint8_t b_complevel;
469 uint8_t b_reserved1; /* used for 4 byte alignment */
470 uint16_t b_reserved2; /* used for 4 byte alignment */
b9541d6b
CW
471 arc_buf_hdr_t *b_hash_next;
472 arc_flags_t b_flags;
473
d3c2ae1c
GW
474 /*
475 * This field stores the size of the data buffer after
476 * compression, and is set in the arc's zio completion handlers.
477 * It is in units of SPA_MINBLOCKSIZE (e.g. 1 == 512 bytes).
478 *
479 * While the block pointers can store up to 32MB in their psize
480 * field, we can only store up to 32MB minus 512B. This is due
481 * to the bp using a bias of 1, whereas we use a bias of 0 (i.e.
482 * a field of zeros represents 512B in the bp). We can't use a
483 * bias of 1 since we need to reserve a psize of zero, here, to
484 * represent holes and embedded blocks.
485 *
486 * This isn't a problem in practice, since the maximum size of a
487 * buffer is limited to 16MB, so we never need to store 32MB in
488 * this field. Even in the upstream illumos code base, the
489 * maximum size of a buffer is limited to 16MB.
490 */
491 uint16_t b_psize;
492
493 /*
494 * This field stores the size of the data buffer before
495 * compression, and cannot change once set. It is in units
496 * of SPA_MINBLOCKSIZE (e.g. 2 == 1024 bytes)
497 */
498 uint16_t b_lsize; /* immutable */
499 uint64_t b_spa; /* immutable */
b9541d6b
CW
500
501 /* L2ARC fields. Undefined when not in L2ARC. */
502 l2arc_buf_hdr_t b_l2hdr;
503 /* L1ARC fields. Undefined when in l2arc_only state */
504 l1arc_buf_hdr_t b_l1hdr;
b5256303
TC
505 /*
506 * Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED
507 * is set and the L1 header exists.
508 */
509 arc_buf_hdr_crypt_t b_crypt_hdr;
b9541d6b 510};
13a4027a
MM
511
512typedef struct arc_stats {
513 kstat_named_t arcstat_hits;
514 kstat_named_t arcstat_misses;
515 kstat_named_t arcstat_demand_data_hits;
516 kstat_named_t arcstat_demand_data_misses;
517 kstat_named_t arcstat_demand_metadata_hits;
518 kstat_named_t arcstat_demand_metadata_misses;
519 kstat_named_t arcstat_prefetch_data_hits;
520 kstat_named_t arcstat_prefetch_data_misses;
521 kstat_named_t arcstat_prefetch_metadata_hits;
522 kstat_named_t arcstat_prefetch_metadata_misses;
523 kstat_named_t arcstat_mru_hits;
524 kstat_named_t arcstat_mru_ghost_hits;
525 kstat_named_t arcstat_mfu_hits;
526 kstat_named_t arcstat_mfu_ghost_hits;
527 kstat_named_t arcstat_deleted;
528 /*
529 * Number of buffers that could not be evicted because the hash lock
530 * was held by another thread. The lock may not necessarily be held
531 * by something using the same buffer, since hash locks are shared
532 * by multiple buffers.
533 */
534 kstat_named_t arcstat_mutex_miss;
535 /*
536 * Number of buffers skipped when updating the access state due to the
537 * header having already been released after acquiring the hash lock.
538 */
539 kstat_named_t arcstat_access_skip;
540 /*
541 * Number of buffers skipped because they have I/O in progress, are
542 * indirect prefetch buffers that have not lived long enough, or are
543 * not from the spa we're trying to evict from.
544 */
545 kstat_named_t arcstat_evict_skip;
546 /*
547 * Number of times arc_evict_state() was unable to evict enough
548 * buffers to reach its target amount.
549 */
550 kstat_named_t arcstat_evict_not_enough;
551 kstat_named_t arcstat_evict_l2_cached;
552 kstat_named_t arcstat_evict_l2_eligible;
08532162
GA
553 kstat_named_t arcstat_evict_l2_eligible_mfu;
554 kstat_named_t arcstat_evict_l2_eligible_mru;
13a4027a
MM
555 kstat_named_t arcstat_evict_l2_ineligible;
556 kstat_named_t arcstat_evict_l2_skip;
557 kstat_named_t arcstat_hash_elements;
558 kstat_named_t arcstat_hash_elements_max;
559 kstat_named_t arcstat_hash_collisions;
560 kstat_named_t arcstat_hash_chains;
561 kstat_named_t arcstat_hash_chain_max;
562 kstat_named_t arcstat_p;
563 kstat_named_t arcstat_c;
564 kstat_named_t arcstat_c_min;
565 kstat_named_t arcstat_c_max;
566 /* Not updated directly; only synced in arc_kstat_update. */
567 kstat_named_t arcstat_size;
568 /*
569 * Number of compressed bytes stored in the arc_buf_hdr_t's b_pabd.
570 * Note that the compressed bytes may match the uncompressed bytes
571 * if the block is either not compressed or compressed arc is disabled.
572 */
573 kstat_named_t arcstat_compressed_size;
574 /*
575 * Uncompressed size of the data stored in b_pabd. If compressed
576 * arc is disabled then this value will be identical to the stat
577 * above.
578 */
579 kstat_named_t arcstat_uncompressed_size;
580 /*
581 * Number of bytes stored in all the arc_buf_t's. This is classified
582 * as "overhead" since this data is typically short-lived and will
583 * be evicted from the arc when it becomes unreferenced unless the
584 * zfs_keep_uncompressed_metadata or zfs_keep_uncompressed_level
585 * values have been set (see comment in dbuf.c for more information).
586 */
587 kstat_named_t arcstat_overhead_size;
588 /*
589 * Number of bytes consumed by internal ARC structures necessary
590 * for tracking purposes; these structures are not actually
591 * backed by ARC buffers. This includes arc_buf_hdr_t structures
592 * (allocated via arc_buf_hdr_t_full and arc_buf_hdr_t_l2only
593 * caches), and arc_buf_t structures (allocated via arc_buf_t
594 * cache).
595 * Not updated directly; only synced in arc_kstat_update.
596 */
597 kstat_named_t arcstat_hdr_size;
598 /*
599 * Number of bytes consumed by ARC buffers of type equal to
600 * ARC_BUFC_DATA. This is generally consumed by buffers backing
601 * on disk user data (e.g. plain file contents).
602 * Not updated directly; only synced in arc_kstat_update.
603 */
604 kstat_named_t arcstat_data_size;
605 /*
606 * Number of bytes consumed by ARC buffers of type equal to
607 * ARC_BUFC_METADATA. This is generally consumed by buffers
608 * backing on disk data that is used for internal ZFS
609 * structures (e.g. ZAP, dnode, indirect blocks, etc).
610 * Not updated directly; only synced in arc_kstat_update.
611 */
612 kstat_named_t arcstat_metadata_size;
613 /*
614 * Number of bytes consumed by dmu_buf_impl_t objects.
615 * Not updated directly; only synced in arc_kstat_update.
616 */
617 kstat_named_t arcstat_dbuf_size;
618 /*
619 * Number of bytes consumed by dnode_t objects.
620 * Not updated directly; only synced in arc_kstat_update.
621 */
622 kstat_named_t arcstat_dnode_size;
623 /*
624 * Number of bytes consumed by bonus buffers.
625 * Not updated directly; only synced in arc_kstat_update.
626 */
627 kstat_named_t arcstat_bonus_size;
1c2725a1
MM
628#if defined(COMPAT_FREEBSD11)
629 /*
630 * Sum of the previous three counters, provided for compatibility.
631 */
632 kstat_named_t arcstat_other_size;
633#endif
634
13a4027a
MM
635 /*
636 * Total number of bytes consumed by ARC buffers residing in the
637 * arc_anon state. This includes *all* buffers in the arc_anon
638 * state; e.g. data, metadata, evictable, and unevictable buffers
639 * are all included in this value.
640 * Not updated directly; only synced in arc_kstat_update.
641 */
642 kstat_named_t arcstat_anon_size;
643 /*
644 * Number of bytes consumed by ARC buffers that meet the
645 * following criteria: backing buffers of type ARC_BUFC_DATA,
646 * residing in the arc_anon state, and are eligible for eviction
647 * (e.g. have no outstanding holds on the buffer).
648 * Not updated directly; only synced in arc_kstat_update.
649 */
650 kstat_named_t arcstat_anon_evictable_data;
651 /*
652 * Number of bytes consumed by ARC buffers that meet the
653 * following criteria: backing buffers of type ARC_BUFC_METADATA,
654 * residing in the arc_anon state, and are eligible for eviction
655 * (e.g. have no outstanding holds on the buffer).
656 * Not updated directly; only synced in arc_kstat_update.
657 */
658 kstat_named_t arcstat_anon_evictable_metadata;
659 /*
660 * Total number of bytes consumed by ARC buffers residing in the
661 * arc_mru state. This includes *all* buffers in the arc_mru
662 * state; e.g. data, metadata, evictable, and unevictable buffers
663 * are all included in this value.
664 * Not updated directly; only synced in arc_kstat_update.
665 */
666 kstat_named_t arcstat_mru_size;
667 /*
668 * Number of bytes consumed by ARC buffers that meet the
669 * following criteria: backing buffers of type ARC_BUFC_DATA,
670 * residing in the arc_mru state, and are eligible for eviction
671 * (e.g. have no outstanding holds on the buffer).
672 * Not updated directly; only synced in arc_kstat_update.
673 */
674 kstat_named_t arcstat_mru_evictable_data;
675 /*
676 * Number of bytes consumed by ARC buffers that meet the
677 * following criteria: backing buffers of type ARC_BUFC_METADATA,
678 * residing in the arc_mru state, and are eligible for eviction
679 * (e.g. have no outstanding holds on the buffer).
680 * Not updated directly; only synced in arc_kstat_update.
681 */
682 kstat_named_t arcstat_mru_evictable_metadata;
683 /*
684 * Total number of bytes that *would have been* consumed by ARC
685 * buffers in the arc_mru_ghost state. The key thing to note
686 * here, is the fact that this size doesn't actually indicate
687 * RAM consumption. The ghost lists only consist of headers and
688 * don't actually have ARC buffers linked off of these headers.
689 * Thus, *if* the headers had associated ARC buffers, these
690 * buffers *would have* consumed this number of bytes.
691 * Not updated directly; only synced in arc_kstat_update.
692 */
693 kstat_named_t arcstat_mru_ghost_size;
694 /*
695 * Number of bytes that *would have been* consumed by ARC
696 * buffers that are eligible for eviction, of type
697 * ARC_BUFC_DATA, and linked off the arc_mru_ghost state.
698 * Not updated directly; only synced in arc_kstat_update.
699 */
700 kstat_named_t arcstat_mru_ghost_evictable_data;
701 /*
702 * Number of bytes that *would have been* consumed by ARC
703 * buffers that are eligible for eviction, of type
704 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
705 * Not updated directly; only synced in arc_kstat_update.
706 */
707 kstat_named_t arcstat_mru_ghost_evictable_metadata;
708 /*
709 * Total number of bytes consumed by ARC buffers residing in the
710 * arc_mfu state. This includes *all* buffers in the arc_mfu
711 * state; e.g. data, metadata, evictable, and unevictable buffers
712 * are all included in this value.
713 * Not updated directly; only synced in arc_kstat_update.
714 */
715 kstat_named_t arcstat_mfu_size;
716 /*
717 * Number of bytes consumed by ARC buffers that are eligible for
718 * eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu
719 * state.
720 * Not updated directly; only synced in arc_kstat_update.
721 */
722 kstat_named_t arcstat_mfu_evictable_data;
723 /*
724 * Number of bytes consumed by ARC buffers that are eligible for
725 * eviction, of type ARC_BUFC_METADATA, and reside in the
726 * arc_mfu state.
727 * Not updated directly; only synced in arc_kstat_update.
728 */
729 kstat_named_t arcstat_mfu_evictable_metadata;
730 /*
731 * Total number of bytes that *would have been* consumed by ARC
732 * buffers in the arc_mfu_ghost state. See the comment above
733 * arcstat_mru_ghost_size for more details.
734 * Not updated directly; only synced in arc_kstat_update.
735 */
736 kstat_named_t arcstat_mfu_ghost_size;
737 /*
738 * Number of bytes that *would have been* consumed by ARC
739 * buffers that are eligible for eviction, of type
740 * ARC_BUFC_DATA, and linked off the arc_mfu_ghost state.
741 * Not updated directly; only synced in arc_kstat_update.
742 */
743 kstat_named_t arcstat_mfu_ghost_evictable_data;
744 /*
745 * Number of bytes that *would have been* consumed by ARC
746 * buffers that are eligible for eviction, of type
747 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
748 * Not updated directly; only synced in arc_kstat_update.
749 */
750 kstat_named_t arcstat_mfu_ghost_evictable_metadata;
751 kstat_named_t arcstat_l2_hits;
752 kstat_named_t arcstat_l2_misses;
08532162
GA
753 /*
754 * Allocated size (in bytes) of L2ARC cached buffers by ARC state.
755 */
756 kstat_named_t arcstat_l2_prefetch_asize;
757 kstat_named_t arcstat_l2_mru_asize;
758 kstat_named_t arcstat_l2_mfu_asize;
759 /*
760 * Allocated size (in bytes) of L2ARC cached buffers by buffer content
761 * type.
762 */
763 kstat_named_t arcstat_l2_bufc_data_asize;
764 kstat_named_t arcstat_l2_bufc_metadata_asize;
13a4027a
MM
765 kstat_named_t arcstat_l2_feeds;
766 kstat_named_t arcstat_l2_rw_clash;
767 kstat_named_t arcstat_l2_read_bytes;
768 kstat_named_t arcstat_l2_write_bytes;
769 kstat_named_t arcstat_l2_writes_sent;
770 kstat_named_t arcstat_l2_writes_done;
771 kstat_named_t arcstat_l2_writes_error;
772 kstat_named_t arcstat_l2_writes_lock_retry;
773 kstat_named_t arcstat_l2_evict_lock_retry;
774 kstat_named_t arcstat_l2_evict_reading;
775 kstat_named_t arcstat_l2_evict_l1cached;
776 kstat_named_t arcstat_l2_free_on_write;
777 kstat_named_t arcstat_l2_abort_lowmem;
778 kstat_named_t arcstat_l2_cksum_bad;
779 kstat_named_t arcstat_l2_io_error;
780 kstat_named_t arcstat_l2_lsize;
781 kstat_named_t arcstat_l2_psize;
782 /* Not updated directly; only synced in arc_kstat_update. */
783 kstat_named_t arcstat_l2_hdr_size;
77f6826b
GA
784 /*
785 * Number of L2ARC log blocks written. These are used for restoring the
786 * L2ARC. Updated during writing of L2ARC log blocks.
787 */
788 kstat_named_t arcstat_l2_log_blk_writes;
789 /*
657fd33b 790 * Moving average of the aligned size of the L2ARC log blocks, in
77f6826b
GA
791 * bytes. Updated during L2ARC rebuild and during writing of L2ARC
792 * log blocks.
793 */
657fd33b
GA
794 kstat_named_t arcstat_l2_log_blk_avg_asize;
795 /* Aligned size of L2ARC log blocks on L2ARC devices. */
796 kstat_named_t arcstat_l2_log_blk_asize;
797 /* Number of L2ARC log blocks present on L2ARC devices. */
798 kstat_named_t arcstat_l2_log_blk_count;
77f6826b 799 /*
657fd33b
GA
800 * Moving average of the aligned size of L2ARC restored data, in bytes,
801 * to the aligned size of their metadata in L2ARC, in bytes.
77f6826b
GA
802 * Updated during L2ARC rebuild and during writing of L2ARC log blocks.
803 */
804 kstat_named_t arcstat_l2_data_to_meta_ratio;
805 /*
806 * Number of times the L2ARC rebuild was successful for an L2ARC device.
807 */
808 kstat_named_t arcstat_l2_rebuild_success;
809 /*
810 * Number of times the L2ARC rebuild failed because the device header
811 * was in an unsupported format or corrupted.
812 */
813 kstat_named_t arcstat_l2_rebuild_abort_unsupported;
814 /*
815 * Number of times the L2ARC rebuild failed because of IO errors
816 * while reading a log block.
817 */
818 kstat_named_t arcstat_l2_rebuild_abort_io_errors;
819 /*
820 * Number of times the L2ARC rebuild failed because of IO errors when
821 * reading the device header.
822 */
823 kstat_named_t arcstat_l2_rebuild_abort_dh_errors;
824 /*
825 * Number of L2ARC log blocks which failed to be restored due to
826 * checksum errors.
827 */
828 kstat_named_t arcstat_l2_rebuild_abort_cksum_lb_errors;
829 /*
830 * Number of times the L2ARC rebuild was aborted due to low system
831 * memory.
832 */
833 kstat_named_t arcstat_l2_rebuild_abort_lowmem;
834 /* Logical size of L2ARC restored data, in bytes. */
835 kstat_named_t arcstat_l2_rebuild_size;
657fd33b
GA
836 /* Aligned size of L2ARC restored data, in bytes. */
837 kstat_named_t arcstat_l2_rebuild_asize;
77f6826b
GA
838 /*
839 * Number of L2ARC log entries (buffers) that were successfully
840 * restored in ARC.
841 */
842 kstat_named_t arcstat_l2_rebuild_bufs;
843 /*
844 * Number of L2ARC log entries (buffers) already cached in ARC. These
845 * were not restored again.
846 */
847 kstat_named_t arcstat_l2_rebuild_bufs_precached;
77f6826b
GA
848 /*
849 * Number of L2ARC log blocks that were restored successfully. Each
850 * log block may hold up to L2ARC_LOG_BLK_MAX_ENTRIES buffers.
851 */
852 kstat_named_t arcstat_l2_rebuild_log_blks;
13a4027a
MM
853 kstat_named_t arcstat_memory_throttle_count;
854 kstat_named_t arcstat_memory_direct_count;
855 kstat_named_t arcstat_memory_indirect_count;
856 kstat_named_t arcstat_memory_all_bytes;
857 kstat_named_t arcstat_memory_free_bytes;
858 kstat_named_t arcstat_memory_available_bytes;
859 kstat_named_t arcstat_no_grow;
860 kstat_named_t arcstat_tempreserve;
861 kstat_named_t arcstat_loaned_bytes;
862 kstat_named_t arcstat_prune;
863 /* Not updated directly; only synced in arc_kstat_update. */
864 kstat_named_t arcstat_meta_used;
865 kstat_named_t arcstat_meta_limit;
866 kstat_named_t arcstat_dnode_limit;
867 kstat_named_t arcstat_meta_max;
868 kstat_named_t arcstat_meta_min;
869 kstat_named_t arcstat_async_upgrade_sync;
870 kstat_named_t arcstat_demand_hit_predictive_prefetch;
871 kstat_named_t arcstat_demand_hit_prescient_prefetch;
872 kstat_named_t arcstat_need_free;
873 kstat_named_t arcstat_sys_free;
874 kstat_named_t arcstat_raw_size;
1dc32a67 875 kstat_named_t arcstat_cached_only_in_progress;
85ec5cba 876 kstat_named_t arcstat_abd_chunk_waste_size;
13a4027a
MM
877} arc_stats_t;
878
3442c2a0
MA
879typedef struct arc_evict_waiter {
880 list_node_t aew_node;
881 kcondvar_t aew_cv;
882 uint64_t aew_count;
883} arc_evict_waiter_t;
c9c9c1e2
MM
884
885#define ARCSTAT(stat) (arc_stats.stat.value.ui64)
886
887#define ARCSTAT_INCR(stat, val) \
888 atomic_add_64(&arc_stats.stat.value.ui64, (val))
889
890#define ARCSTAT_BUMP(stat) ARCSTAT_INCR(stat, 1)
891#define ARCSTAT_BUMPDOWN(stat) ARCSTAT_INCR(stat, -1)
892
893#define arc_no_grow ARCSTAT(arcstat_no_grow) /* do not grow cache size */
894#define arc_p ARCSTAT(arcstat_p) /* target size of MRU */
895#define arc_c ARCSTAT(arcstat_c) /* target size of cache */
896#define arc_c_min ARCSTAT(arcstat_c_min) /* min target cache size */
897#define arc_c_max ARCSTAT(arcstat_c_max) /* max target cache size */
898#define arc_sys_free ARCSTAT(arcstat_sys_free) /* target system free bytes */
c9c9c1e2 899
c9c9c1e2
MM
900extern taskq_t *arc_prune_taskq;
901extern arc_stats_t arc_stats;
902extern hrtime_t arc_growtime;
903extern boolean_t arc_warm;
904extern int arc_grow_retry;
f7a68f99 905extern int arc_no_grow_shift;
c9c9c1e2 906extern int arc_shrink_shift;
c9c9c1e2
MM
907extern kmutex_t arc_prune_mtx;
908extern list_t arc_prune_list;
909extern aggsum_t arc_size;
910extern arc_state_t *arc_mfu;
911extern arc_state_t *arc_mru;
912extern uint_t zfs_arc_pc_percent;
913extern int arc_lotsfree_percent;
0421f257
RM
914extern unsigned long zfs_arc_min;
915extern unsigned long zfs_arc_max;
c9c9c1e2
MM
916
917extern void arc_reduce_target_size(int64_t to_free);
918extern boolean_t arc_reclaim_needed(void);
919extern void arc_kmem_reap_soon(void);
67c0f0de 920extern boolean_t arc_is_overflowing(void);
3442c2a0 921extern void arc_wait_for_eviction(uint64_t);
c9c9c1e2
MM
922
923extern void arc_lowmem_init(void);
924extern void arc_lowmem_fini(void);
925extern void arc_prune_async(int64_t);
926extern int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg);
927extern uint64_t arc_free_memory(void);
928extern int64_t arc_available_memory(void);
36a6e233 929extern void arc_tuning_update(boolean_t);
60a4c7d2
PD
930extern void arc_register_hotplug(void);
931extern void arc_unregister_hotplug(void);
e3570464 932
7e3df9db
RM
933extern int param_set_arc_long(ZFS_MODULE_PARAM_ARGS);
934extern int param_set_arc_int(ZFS_MODULE_PARAM_ARGS);
c9c9c1e2 935
77f6826b
GA
936/* used in zdb.c */
937boolean_t l2arc_log_blkptr_valid(l2arc_dev_t *dev,
938 const l2arc_log_blkptr_t *lbp);
939
b7654bd7
GA
940/* used in vdev_trim.c */
941void l2arc_dev_hdr_update(l2arc_dev_t *dev);
942l2arc_dev_t *l2arc_vdev_get(vdev_t *vd);
943
59ec819a
NB
944#ifdef __cplusplus
945}
946#endif
947
948#endif /* _SYS_ARC_IMPL_H */