4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013, Delphix. All rights reserved.
24 * Copyright (c) 2013, Saso Kiselkov. All rights reserved.
25 * Copyright (c) 2013, Nexenta Systems, Inc. All rights reserved.
26 * Copyright (c) 2020, George Amanakis. All rights reserved.
29 #ifndef _SYS_ARC_IMPL_H
30 #define _SYS_ARC_IMPL_H
33 #include <sys/multilist.h>
34 #include <sys/zio_crypt.h>
36 #include <sys/aggsum.h>
37 #include <sys/wmsum.h>
44 * Note that buffers can be in one of 6 states:
45 * ARC_anon - anonymous (discussed below)
46 * ARC_mru - recently used, currently cached
47 * ARC_mru_ghost - recently used, no longer in cache
48 * ARC_mfu - frequently used, currently cached
49 * ARC_mfu_ghost - frequently used, no longer in cache
50 * ARC_uncached - uncacheable prefetch, to be evicted
51 * ARC_l2c_only - exists in L2ARC but not other states
52 * When there are no active references to the buffer, they are
53 * are linked onto a list in one of these arc states. These are
54 * the only buffers that can be evicted or deleted. Within each
55 * state there are multiple lists, one for meta-data and one for
56 * non-meta-data. Meta-data (indirect blocks, blocks of dnodes,
57 * etc.) is tracked separately so that it can be managed more
58 * explicitly: favored over data, limited explicitly.
60 * Anonymous buffers are buffers that are not associated with
61 * a DVA. These are buffers that hold dirty block copies
62 * before they are written to stable storage. By definition,
63 * they are "ref'd" and are considered part of arc_mru
64 * that cannot be freed. Generally, they will acquire a DVA
65 * as they are written and migrate onto the arc_mru list.
67 * The ARC_l2c_only state is for buffers that are in the second
68 * level ARC but no longer in any of the ARC_m* lists. The second
69 * level ARC itself may also contain buffers that are in any of
70 * the ARC_m* states - meaning that a buffer can exist in two
71 * places. The reason for the ARC_l2c_only state is to keep the
72 * buffer header in the hash table, so that reads that hit the
73 * second level ARC benefit from these fast lookups.
76 typedef struct arc_state
{
78 * list of evictable buffers
80 multilist_t arcs_list
[ARC_BUFC_NUMTYPES
];
82 * supports the "dbufs" kstat
84 arc_state_type_t arcs_state
;
86 * total amount of data in this state.
88 zfs_refcount_t arcs_size
[ARC_BUFC_NUMTYPES
] ____cacheline_aligned
;
90 * total amount of evictable data in this state
92 zfs_refcount_t arcs_esize
[ARC_BUFC_NUMTYPES
];
94 * amount of hit bytes for this state (counted only for ghost states)
96 wmsum_t arcs_hits
[ARC_BUFC_NUMTYPES
];
99 typedef struct arc_callback arc_callback_t
;
101 struct arc_callback
{
103 arc_read_done_func_t
*acb_done
;
105 boolean_t acb_encrypted
;
106 boolean_t acb_compressed
;
107 boolean_t acb_noauth
;
111 kmutex_t acb_wait_lock
;
112 kcondvar_t acb_wait_cv
;
113 zbookmark_phys_t acb_zb
;
114 zio_t
*acb_zio_dummy
;
116 arc_callback_t
*acb_prev
;
117 arc_callback_t
*acb_next
;
120 typedef struct arc_write_callback arc_write_callback_t
;
122 struct arc_write_callback
{
124 arc_write_done_func_t
*awcb_ready
;
125 arc_write_done_func_t
*awcb_children_ready
;
126 arc_write_done_func_t
*awcb_physdone
;
127 arc_write_done_func_t
*awcb_done
;
132 * ARC buffers are separated into multiple structs as a memory saving measure:
133 * - Common fields struct, always defined, and embedded within it:
134 * - L2-only fields, always allocated but undefined when not in L2ARC
135 * - L1-only fields, only allocated when in L1ARC
137 * Buffer in L1 Buffer only in L2
138 * +------------------------+ +------------------------+
139 * | arc_buf_hdr_t | | arc_buf_hdr_t |
143 * +------------------------+ +------------------------+
144 * | l2arc_buf_hdr_t | | l2arc_buf_hdr_t |
145 * | (undefined if L1-only) | | |
146 * +------------------------+ +------------------------+
147 * | l1arc_buf_hdr_t |
152 * +------------------------+
154 * Because it's possible for the L2ARC to become extremely large, we can wind
155 * up eating a lot of memory in L2ARC buffer headers, so the size of a header
156 * is minimized by only allocating the fields necessary for an L1-cached buffer
157 * when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and
158 * l2arc_buf_hdr) are embedded rather than allocated separately to save a couple
159 * words in pointers. arc_hdr_realloc() is used to switch a header between
160 * these two allocation states.
162 typedef struct l1arc_buf_hdr
{
163 /* for waiting on reads to complete */
167 /* protected by arc state mutex */
168 arc_state_t
*b_state
;
169 multilist_node_t b_arc_node
;
171 /* protected by hash lock */
172 clock_t b_arc_access
;
174 uint32_t b_mru_ghost_hits
;
176 uint32_t b_mfu_ghost_hits
;
180 /* self protecting */
181 zfs_refcount_t b_refcnt
;
183 arc_callback_t
*b_acb
;
187 zio_cksum_t
*b_freeze_cksum
;
188 kmutex_t b_freeze_lock
;
192 typedef enum l2arc_dev_hdr_flags_t
{
193 L2ARC_DEV_HDR_EVICT_FIRST
= (1 << 0) /* mirror of l2ad_first */
194 } l2arc_dev_hdr_flags_t
;
197 * Pointer used in persistent L2ARC (for pointing to log blocks).
199 typedef struct l2arc_log_blkptr
{
201 * Offset of log block within the device, in bytes
205 * Aligned payload size (in bytes) of the log block
207 uint64_t lbp_payload_asize
;
209 * Offset in bytes of the first buffer in the payload
211 uint64_t lbp_payload_start
;
213 * lbp_prop has the following format:
214 * * logical size (in bytes)
215 * * aligned (after compression) size (in bytes)
216 * * compression algorithm (we always LZ4-compress l2arc logs)
217 * * checksum algorithm (used for lbp_cksum)
220 zio_cksum_t lbp_cksum
; /* checksum of log */
221 } l2arc_log_blkptr_t
;
224 * The persistent L2ARC device header.
225 * Byte order of magic determines whether 64-bit bswap of fields is necessary.
227 typedef struct l2arc_dev_hdr_phys
{
228 uint64_t dh_magic
; /* L2ARC_DEV_HDR_MAGIC */
229 uint64_t dh_version
; /* Persistent L2ARC version */
232 * Global L2ARC device state and metadata.
234 uint64_t dh_spa_guid
;
235 uint64_t dh_vdev_guid
;
236 uint64_t dh_log_entries
; /* mirror of l2ad_log_entries */
237 uint64_t dh_evict
; /* evicted offset in bytes */
238 uint64_t dh_flags
; /* l2arc_dev_hdr_flags_t */
240 * Used in zdb.c for determining if a log block is valid, in the same
241 * way that l2arc_rebuild() does.
243 uint64_t dh_start
; /* mirror of l2ad_start */
244 uint64_t dh_end
; /* mirror of l2ad_end */
246 * Start of log block chain. [0] -> newest log, [1] -> one older (used
247 * for initiating prefetch).
249 l2arc_log_blkptr_t dh_start_lbps
[2];
251 * Aligned size of all log blocks as accounted by vdev_space_update().
253 uint64_t dh_lb_asize
; /* mirror of l2ad_lb_asize */
254 uint64_t dh_lb_count
; /* mirror of l2ad_lb_count */
256 * Mirrors of vdev_trim_action_time and vdev_trim_state, used to
257 * display when the cache device was fully trimmed for the last
260 uint64_t dh_trim_action_time
;
261 uint64_t dh_trim_state
;
262 const uint64_t dh_pad
[30]; /* pad to 512 bytes */
264 } l2arc_dev_hdr_phys_t
;
265 _Static_assert(sizeof (l2arc_dev_hdr_phys_t
) == SPA_MINBLOCKSIZE
,
266 "l2arc_dev_hdr_phys_t wrong size");
269 * A single ARC buffer header entry in a l2arc_log_blk_phys_t.
271 typedef struct l2arc_log_ent_phys
{
272 dva_t le_dva
; /* dva of buffer */
273 uint64_t le_birth
; /* birth txg of buffer */
275 * le_prop has the following format:
276 * * logical size (in bytes)
277 * * physical (compressed) size (in bytes)
278 * * compression algorithm
279 * * object type (used to restore arc_buf_contents_t)
280 * * protected status (used for encryption)
281 * * prefetch status (used in l2arc_read_done())
284 uint64_t le_daddr
; /* buf location on l2dev */
285 uint64_t le_complevel
;
287 * We pad the size of each entry to a power of 2 so that the size of
288 * l2arc_log_blk_phys_t is power-of-2 aligned with SPA_MINBLOCKSHIFT,
289 * because of the L2ARC_SET_*SIZE macros.
291 const uint64_t le_pad
[2]; /* pad to 64 bytes */
292 } l2arc_log_ent_phys_t
;
294 #define L2ARC_LOG_BLK_MAX_ENTRIES (1022)
297 * A log block of up to 1022 ARC buffer log entries, chained into the
298 * persistent L2ARC metadata linked list. Byte order of magic determines
299 * whether 64-bit bswap of fields is necessary.
301 typedef struct l2arc_log_blk_phys
{
302 uint64_t lb_magic
; /* L2ARC_LOG_BLK_MAGIC */
304 * There are 2 chains (headed by dh_start_lbps[2]), and this field
305 * points back to the previous block in this chain. We alternate
306 * which chain we append to, so they are time-wise and offset-wise
307 * interleaved, but that is an optimization rather than for
310 l2arc_log_blkptr_t lb_prev_lbp
; /* pointer to prev log block */
312 * Pad header section to 128 bytes
316 l2arc_log_ent_phys_t lb_entries
[L2ARC_LOG_BLK_MAX_ENTRIES
];
317 } l2arc_log_blk_phys_t
; /* 64K total */
320 * The size of l2arc_log_blk_phys_t has to be power-of-2 aligned with
321 * SPA_MINBLOCKSHIFT because of L2BLK_SET_*SIZE macros.
323 _Static_assert(IS_P2ALIGNED(sizeof (l2arc_log_blk_phys_t
),
324 1ULL << SPA_MINBLOCKSHIFT
), "l2arc_log_blk_phys_t misaligned");
325 _Static_assert(sizeof (l2arc_log_blk_phys_t
) >= SPA_MINBLOCKSIZE
,
326 "l2arc_log_blk_phys_t too small");
327 _Static_assert(sizeof (l2arc_log_blk_phys_t
) <= SPA_MAXBLOCKSIZE
,
328 "l2arc_log_blk_phys_t too big");
331 * These structures hold in-flight abd buffers for log blocks as they're being
332 * written to the L2ARC device.
334 typedef struct l2arc_lb_abd_buf
{
337 } l2arc_lb_abd_buf_t
;
340 * These structures hold pointers to log blocks present on the L2ARC device.
342 typedef struct l2arc_lb_ptr_buf
{
343 l2arc_log_blkptr_t
*lb_ptr
;
345 } l2arc_lb_ptr_buf_t
;
347 /* Macros for setting fields in le_prop and lbp_prop */
348 #define L2BLK_GET_LSIZE(field) \
349 BF64_GET_SB((field), 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1)
350 #define L2BLK_SET_LSIZE(field, x) \
351 BF64_SET_SB((field), 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
352 #define L2BLK_GET_PSIZE(field) \
353 BF64_GET_SB((field), 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1)
354 #define L2BLK_SET_PSIZE(field, x) \
355 BF64_SET_SB((field), 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
356 #define L2BLK_GET_COMPRESS(field) \
357 BF64_GET((field), 32, SPA_COMPRESSBITS)
358 #define L2BLK_SET_COMPRESS(field, x) \
359 BF64_SET((field), 32, SPA_COMPRESSBITS, x)
360 #define L2BLK_GET_PREFETCH(field) BF64_GET((field), 39, 1)
361 #define L2BLK_SET_PREFETCH(field, x) BF64_SET((field), 39, 1, x)
362 #define L2BLK_GET_CHECKSUM(field) BF64_GET((field), 40, 8)
363 #define L2BLK_SET_CHECKSUM(field, x) BF64_SET((field), 40, 8, x)
364 /* +/- 1 here are to keep compatibility after ARC_BUFC_INVALID removal. */
365 #define L2BLK_GET_TYPE(field) (BF64_GET((field), 48, 8) - 1)
366 #define L2BLK_SET_TYPE(field, x) BF64_SET((field), 48, 8, (x) + 1)
367 #define L2BLK_GET_PROTECTED(field) BF64_GET((field), 56, 1)
368 #define L2BLK_SET_PROTECTED(field, x) BF64_SET((field), 56, 1, x)
369 #define L2BLK_GET_STATE(field) BF64_GET((field), 57, 4)
370 #define L2BLK_SET_STATE(field, x) BF64_SET((field), 57, 4, x)
372 #define PTR_SWAP(x, y) \
379 #define L2ARC_DEV_HDR_MAGIC 0x5a46534341434845LLU /* ASCII: "ZFSCACHE" */
380 #define L2ARC_LOG_BLK_MAGIC 0x4c4f47424c4b4844LLU /* ASCII: "LOGBLKHD" */
385 typedef struct l2arc_dev
{
386 vdev_t
*l2ad_vdev
; /* vdev */
387 spa_t
*l2ad_spa
; /* spa */
388 uint64_t l2ad_hand
; /* next write location */
389 uint64_t l2ad_start
; /* first addr on device */
390 uint64_t l2ad_end
; /* last addr on device */
391 boolean_t l2ad_first
; /* first sweep through */
392 boolean_t l2ad_writing
; /* currently writing */
393 kmutex_t l2ad_mtx
; /* lock for buffer list */
394 list_t l2ad_buflist
; /* buffer list */
395 list_node_t l2ad_node
; /* device list node */
396 zfs_refcount_t l2ad_alloc
; /* allocated bytes */
398 * Persistence-related stuff
400 l2arc_dev_hdr_phys_t
*l2ad_dev_hdr
; /* persistent device header */
401 uint64_t l2ad_dev_hdr_asize
; /* aligned hdr size */
402 l2arc_log_blk_phys_t l2ad_log_blk
; /* currently open log block */
403 int l2ad_log_ent_idx
; /* index into cur log blk */
404 /* Number of bytes in current log block's payload */
405 uint64_t l2ad_log_blk_payload_asize
;
407 * Offset (in bytes) of the first buffer in current log block's
410 uint64_t l2ad_log_blk_payload_start
;
411 /* Flag indicating whether a rebuild is scheduled or is going on */
412 boolean_t l2ad_rebuild
;
413 boolean_t l2ad_rebuild_cancel
;
414 boolean_t l2ad_rebuild_began
;
415 uint64_t l2ad_log_entries
; /* entries per log blk */
416 uint64_t l2ad_evict
; /* evicted offset in bytes */
417 /* List of pointers to log blocks present in the L2ARC device */
418 list_t l2ad_lbptr_list
;
420 * Aligned size of all log blocks as accounted by vdev_space_update().
422 zfs_refcount_t l2ad_lb_asize
;
424 * Number of log blocks present on the device.
426 zfs_refcount_t l2ad_lb_count
;
427 boolean_t l2ad_trim_all
; /* TRIM whole device */
431 * Encrypted blocks will need to be stored encrypted on the L2ARC
432 * disk as they appear in the main pool. In order for this to work we
433 * need to pass around the encryption parameters so they can be used
434 * to write data to the L2ARC. This struct is only defined in the
435 * arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED
438 typedef struct arc_buf_hdr_crypt
{
439 abd_t
*b_rabd
; /* raw encrypted data */
440 dmu_object_type_t b_ot
; /* object type */
441 uint32_t b_ebufcnt
; /* count of encrypted buffers */
443 /* dsobj for looking up encryption key for l2arc encryption */
446 /* encryption parameters */
447 uint8_t b_salt
[ZIO_DATA_SALT_LEN
];
448 uint8_t b_iv
[ZIO_DATA_IV_LEN
];
451 * Technically this could be removed since we will always be able to
452 * get the mac from the bp when we need it. However, it is inconvenient
453 * for callers of arc code to have to pass a bp in all the time. This
454 * also allows us to assert that L2ARC data is properly encrypted to
455 * match the data in the main storage pool.
457 uint8_t b_mac
[ZIO_DATA_MAC_LEN
];
458 } arc_buf_hdr_crypt_t
;
460 typedef struct l2arc_buf_hdr
{
461 /* protected by arc_buf_hdr mutex */
462 l2arc_dev_t
*b_dev
; /* L2ARC device */
463 uint64_t b_daddr
; /* disk address, offset byte */
465 arc_state_type_t b_arcs_state
;
466 list_node_t b_l2node
;
469 typedef struct l2arc_write_callback
{
470 l2arc_dev_t
*l2wcb_dev
; /* device info */
471 arc_buf_hdr_t
*l2wcb_head
; /* head of write buflist */
472 /* in-flight list of log blocks */
473 list_t l2wcb_abd_list
;
474 } l2arc_write_callback_t
;
477 /* protected by hash lock */
481 arc_buf_contents_t b_type
;
483 uint8_t b_reserved1
; /* used for 4 byte alignment */
484 uint16_t b_reserved2
; /* used for 4 byte alignment */
485 arc_buf_hdr_t
*b_hash_next
;
489 * This field stores the size of the data buffer after
490 * compression, and is set in the arc's zio completion handlers.
491 * It is in units of SPA_MINBLOCKSIZE (e.g. 1 == 512 bytes).
493 * While the block pointers can store up to 32MB in their psize
494 * field, we can only store up to 32MB minus 512B. This is due
495 * to the bp using a bias of 1, whereas we use a bias of 0 (i.e.
496 * a field of zeros represents 512B in the bp). We can't use a
497 * bias of 1 since we need to reserve a psize of zero, here, to
498 * represent holes and embedded blocks.
500 * This isn't a problem in practice, since the maximum size of a
501 * buffer is limited to 16MB, so we never need to store 32MB in
502 * this field. Even in the upstream illumos code base, the
503 * maximum size of a buffer is limited to 16MB.
508 * This field stores the size of the data buffer before
509 * compression, and cannot change once set. It is in units
510 * of SPA_MINBLOCKSIZE (e.g. 2 == 1024 bytes)
512 uint16_t b_lsize
; /* immutable */
513 uint64_t b_spa
; /* immutable */
515 /* L2ARC fields. Undefined when not in L2ARC. */
516 l2arc_buf_hdr_t b_l2hdr
;
517 /* L1ARC fields. Undefined when in l2arc_only state */
518 l1arc_buf_hdr_t b_l1hdr
;
520 * Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED
521 * is set and the L1 header exists.
523 arc_buf_hdr_crypt_t b_crypt_hdr
;
526 typedef struct arc_stats
{
527 /* Number of requests that were satisfied without I/O. */
528 kstat_named_t arcstat_hits
;
529 /* Number of requests for which I/O was already running. */
530 kstat_named_t arcstat_iohits
;
531 /* Number of requests for which I/O has to be issued. */
532 kstat_named_t arcstat_misses
;
533 /* Same three, but specifically for demand data. */
534 kstat_named_t arcstat_demand_data_hits
;
535 kstat_named_t arcstat_demand_data_iohits
;
536 kstat_named_t arcstat_demand_data_misses
;
537 /* Same three, but specifically for demand metadata. */
538 kstat_named_t arcstat_demand_metadata_hits
;
539 kstat_named_t arcstat_demand_metadata_iohits
;
540 kstat_named_t arcstat_demand_metadata_misses
;
541 /* Same three, but specifically for prefetch data. */
542 kstat_named_t arcstat_prefetch_data_hits
;
543 kstat_named_t arcstat_prefetch_data_iohits
;
544 kstat_named_t arcstat_prefetch_data_misses
;
545 /* Same three, but specifically for prefetch metadata. */
546 kstat_named_t arcstat_prefetch_metadata_hits
;
547 kstat_named_t arcstat_prefetch_metadata_iohits
;
548 kstat_named_t arcstat_prefetch_metadata_misses
;
549 kstat_named_t arcstat_mru_hits
;
550 kstat_named_t arcstat_mru_ghost_hits
;
551 kstat_named_t arcstat_mfu_hits
;
552 kstat_named_t arcstat_mfu_ghost_hits
;
553 kstat_named_t arcstat_uncached_hits
;
554 kstat_named_t arcstat_deleted
;
556 * Number of buffers that could not be evicted because the hash lock
557 * was held by another thread. The lock may not necessarily be held
558 * by something using the same buffer, since hash locks are shared
559 * by multiple buffers.
561 kstat_named_t arcstat_mutex_miss
;
563 * Number of buffers skipped when updating the access state due to the
564 * header having already been released after acquiring the hash lock.
566 kstat_named_t arcstat_access_skip
;
568 * Number of buffers skipped because they have I/O in progress, are
569 * indirect prefetch buffers that have not lived long enough, or are
570 * not from the spa we're trying to evict from.
572 kstat_named_t arcstat_evict_skip
;
574 * Number of times arc_evict_state() was unable to evict enough
575 * buffers to reach its target amount.
577 kstat_named_t arcstat_evict_not_enough
;
578 kstat_named_t arcstat_evict_l2_cached
;
579 kstat_named_t arcstat_evict_l2_eligible
;
580 kstat_named_t arcstat_evict_l2_eligible_mfu
;
581 kstat_named_t arcstat_evict_l2_eligible_mru
;
582 kstat_named_t arcstat_evict_l2_ineligible
;
583 kstat_named_t arcstat_evict_l2_skip
;
584 kstat_named_t arcstat_hash_elements
;
585 kstat_named_t arcstat_hash_elements_max
;
586 kstat_named_t arcstat_hash_collisions
;
587 kstat_named_t arcstat_hash_chains
;
588 kstat_named_t arcstat_hash_chain_max
;
589 kstat_named_t arcstat_meta
;
590 kstat_named_t arcstat_pd
;
591 kstat_named_t arcstat_pm
;
592 kstat_named_t arcstat_c
;
593 kstat_named_t arcstat_c_min
;
594 kstat_named_t arcstat_c_max
;
595 kstat_named_t arcstat_size
;
597 * Number of compressed bytes stored in the arc_buf_hdr_t's b_pabd.
598 * Note that the compressed bytes may match the uncompressed bytes
599 * if the block is either not compressed or compressed arc is disabled.
601 kstat_named_t arcstat_compressed_size
;
603 * Uncompressed size of the data stored in b_pabd. If compressed
604 * arc is disabled then this value will be identical to the stat
607 kstat_named_t arcstat_uncompressed_size
;
609 * Number of bytes stored in all the arc_buf_t's. This is classified
610 * as "overhead" since this data is typically short-lived and will
611 * be evicted from the arc when it becomes unreferenced unless the
612 * zfs_keep_uncompressed_metadata or zfs_keep_uncompressed_level
613 * values have been set (see comment in dbuf.c for more information).
615 kstat_named_t arcstat_overhead_size
;
617 * Number of bytes consumed by internal ARC structures necessary
618 * for tracking purposes; these structures are not actually
619 * backed by ARC buffers. This includes arc_buf_hdr_t structures
620 * (allocated via arc_buf_hdr_t_full and arc_buf_hdr_t_l2only
621 * caches), and arc_buf_t structures (allocated via arc_buf_t
624 kstat_named_t arcstat_hdr_size
;
626 * Number of bytes consumed by ARC buffers of type equal to
627 * ARC_BUFC_DATA. This is generally consumed by buffers backing
628 * on disk user data (e.g. plain file contents).
630 kstat_named_t arcstat_data_size
;
632 * Number of bytes consumed by ARC buffers of type equal to
633 * ARC_BUFC_METADATA. This is generally consumed by buffers
634 * backing on disk data that is used for internal ZFS
635 * structures (e.g. ZAP, dnode, indirect blocks, etc).
637 kstat_named_t arcstat_metadata_size
;
639 * Number of bytes consumed by dmu_buf_impl_t objects.
641 kstat_named_t arcstat_dbuf_size
;
643 * Number of bytes consumed by dnode_t objects.
645 kstat_named_t arcstat_dnode_size
;
647 * Number of bytes consumed by bonus buffers.
649 kstat_named_t arcstat_bonus_size
;
650 #if defined(COMPAT_FREEBSD11)
652 * Sum of the previous three counters, provided for compatibility.
654 kstat_named_t arcstat_other_size
;
658 * Total number of bytes consumed by ARC buffers residing in the
659 * arc_anon state. This includes *all* buffers in the arc_anon
660 * state; e.g. data, metadata, evictable, and unevictable buffers
661 * are all included in this value.
663 kstat_named_t arcstat_anon_size
;
664 kstat_named_t arcstat_anon_data
;
665 kstat_named_t arcstat_anon_metadata
;
667 * Number of bytes consumed by ARC buffers that meet the
668 * following criteria: backing buffers of type ARC_BUFC_DATA,
669 * residing in the arc_anon state, and are eligible for eviction
670 * (e.g. have no outstanding holds on the buffer).
672 kstat_named_t arcstat_anon_evictable_data
;
674 * Number of bytes consumed by ARC buffers that meet the
675 * following criteria: backing buffers of type ARC_BUFC_METADATA,
676 * residing in the arc_anon state, and are eligible for eviction
677 * (e.g. have no outstanding holds on the buffer).
679 kstat_named_t arcstat_anon_evictable_metadata
;
681 * Total number of bytes consumed by ARC buffers residing in the
682 * arc_mru state. This includes *all* buffers in the arc_mru
683 * state; e.g. data, metadata, evictable, and unevictable buffers
684 * are all included in this value.
686 kstat_named_t arcstat_mru_size
;
687 kstat_named_t arcstat_mru_data
;
688 kstat_named_t arcstat_mru_metadata
;
690 * Number of bytes consumed by ARC buffers that meet the
691 * following criteria: backing buffers of type ARC_BUFC_DATA,
692 * residing in the arc_mru state, and are eligible for eviction
693 * (e.g. have no outstanding holds on the buffer).
695 kstat_named_t arcstat_mru_evictable_data
;
697 * Number of bytes consumed by ARC buffers that meet the
698 * following criteria: backing buffers of type ARC_BUFC_METADATA,
699 * residing in the arc_mru state, and are eligible for eviction
700 * (e.g. have no outstanding holds on the buffer).
702 kstat_named_t arcstat_mru_evictable_metadata
;
704 * Total number of bytes that *would have been* consumed by ARC
705 * buffers in the arc_mru_ghost state. The key thing to note
706 * here, is the fact that this size doesn't actually indicate
707 * RAM consumption. The ghost lists only consist of headers and
708 * don't actually have ARC buffers linked off of these headers.
709 * Thus, *if* the headers had associated ARC buffers, these
710 * buffers *would have* consumed this number of bytes.
712 kstat_named_t arcstat_mru_ghost_size
;
713 kstat_named_t arcstat_mru_ghost_data
;
714 kstat_named_t arcstat_mru_ghost_metadata
;
716 * Number of bytes that *would have been* consumed by ARC
717 * buffers that are eligible for eviction, of type
718 * ARC_BUFC_DATA, and linked off the arc_mru_ghost state.
720 kstat_named_t arcstat_mru_ghost_evictable_data
;
722 * Number of bytes that *would have been* consumed by ARC
723 * buffers that are eligible for eviction, of type
724 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
726 kstat_named_t arcstat_mru_ghost_evictable_metadata
;
728 * Total number of bytes consumed by ARC buffers residing in the
729 * arc_mfu state. This includes *all* buffers in the arc_mfu
730 * state; e.g. data, metadata, evictable, and unevictable buffers
731 * are all included in this value.
733 kstat_named_t arcstat_mfu_size
;
734 kstat_named_t arcstat_mfu_data
;
735 kstat_named_t arcstat_mfu_metadata
;
737 * Number of bytes consumed by ARC buffers that are eligible for
738 * eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu
741 kstat_named_t arcstat_mfu_evictable_data
;
743 * Number of bytes consumed by ARC buffers that are eligible for
744 * eviction, of type ARC_BUFC_METADATA, and reside in the
747 kstat_named_t arcstat_mfu_evictable_metadata
;
749 * Total number of bytes that *would have been* consumed by ARC
750 * buffers in the arc_mfu_ghost state. See the comment above
751 * arcstat_mru_ghost_size for more details.
753 kstat_named_t arcstat_mfu_ghost_size
;
754 kstat_named_t arcstat_mfu_ghost_data
;
755 kstat_named_t arcstat_mfu_ghost_metadata
;
757 * Number of bytes that *would have been* consumed by ARC
758 * buffers that are eligible for eviction, of type
759 * ARC_BUFC_DATA, and linked off the arc_mfu_ghost state.
761 kstat_named_t arcstat_mfu_ghost_evictable_data
;
763 * Number of bytes that *would have been* consumed by ARC
764 * buffers that are eligible for eviction, of type
765 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
767 kstat_named_t arcstat_mfu_ghost_evictable_metadata
;
769 * Total number of bytes that are going to be evicted from ARC due to
770 * ARC_FLAG_UNCACHED being set.
772 kstat_named_t arcstat_uncached_size
;
773 kstat_named_t arcstat_uncached_data
;
774 kstat_named_t arcstat_uncached_metadata
;
776 * Number of data bytes that are going to be evicted from ARC due to
777 * ARC_FLAG_UNCACHED being set.
779 kstat_named_t arcstat_uncached_evictable_data
;
781 * Number of metadata bytes that that are going to be evicted from ARC
782 * due to ARC_FLAG_UNCACHED being set.
784 kstat_named_t arcstat_uncached_evictable_metadata
;
785 kstat_named_t arcstat_l2_hits
;
786 kstat_named_t arcstat_l2_misses
;
788 * Allocated size (in bytes) of L2ARC cached buffers by ARC state.
790 kstat_named_t arcstat_l2_prefetch_asize
;
791 kstat_named_t arcstat_l2_mru_asize
;
792 kstat_named_t arcstat_l2_mfu_asize
;
794 * Allocated size (in bytes) of L2ARC cached buffers by buffer content
797 kstat_named_t arcstat_l2_bufc_data_asize
;
798 kstat_named_t arcstat_l2_bufc_metadata_asize
;
799 kstat_named_t arcstat_l2_feeds
;
800 kstat_named_t arcstat_l2_rw_clash
;
801 kstat_named_t arcstat_l2_read_bytes
;
802 kstat_named_t arcstat_l2_write_bytes
;
803 kstat_named_t arcstat_l2_writes_sent
;
804 kstat_named_t arcstat_l2_writes_done
;
805 kstat_named_t arcstat_l2_writes_error
;
806 kstat_named_t arcstat_l2_writes_lock_retry
;
807 kstat_named_t arcstat_l2_evict_lock_retry
;
808 kstat_named_t arcstat_l2_evict_reading
;
809 kstat_named_t arcstat_l2_evict_l1cached
;
810 kstat_named_t arcstat_l2_free_on_write
;
811 kstat_named_t arcstat_l2_abort_lowmem
;
812 kstat_named_t arcstat_l2_cksum_bad
;
813 kstat_named_t arcstat_l2_io_error
;
814 kstat_named_t arcstat_l2_lsize
;
815 kstat_named_t arcstat_l2_psize
;
816 kstat_named_t arcstat_l2_hdr_size
;
818 * Number of L2ARC log blocks written. These are used for restoring the
819 * L2ARC. Updated during writing of L2ARC log blocks.
821 kstat_named_t arcstat_l2_log_blk_writes
;
823 * Moving average of the aligned size of the L2ARC log blocks, in
824 * bytes. Updated during L2ARC rebuild and during writing of L2ARC
827 kstat_named_t arcstat_l2_log_blk_avg_asize
;
828 /* Aligned size of L2ARC log blocks on L2ARC devices. */
829 kstat_named_t arcstat_l2_log_blk_asize
;
830 /* Number of L2ARC log blocks present on L2ARC devices. */
831 kstat_named_t arcstat_l2_log_blk_count
;
833 * Moving average of the aligned size of L2ARC restored data, in bytes,
834 * to the aligned size of their metadata in L2ARC, in bytes.
835 * Updated during L2ARC rebuild and during writing of L2ARC log blocks.
837 kstat_named_t arcstat_l2_data_to_meta_ratio
;
839 * Number of times the L2ARC rebuild was successful for an L2ARC device.
841 kstat_named_t arcstat_l2_rebuild_success
;
843 * Number of times the L2ARC rebuild failed because the device header
844 * was in an unsupported format or corrupted.
846 kstat_named_t arcstat_l2_rebuild_abort_unsupported
;
848 * Number of times the L2ARC rebuild failed because of IO errors
849 * while reading a log block.
851 kstat_named_t arcstat_l2_rebuild_abort_io_errors
;
853 * Number of times the L2ARC rebuild failed because of IO errors when
854 * reading the device header.
856 kstat_named_t arcstat_l2_rebuild_abort_dh_errors
;
858 * Number of L2ARC log blocks which failed to be restored due to
861 kstat_named_t arcstat_l2_rebuild_abort_cksum_lb_errors
;
863 * Number of times the L2ARC rebuild was aborted due to low system
866 kstat_named_t arcstat_l2_rebuild_abort_lowmem
;
867 /* Logical size of L2ARC restored data, in bytes. */
868 kstat_named_t arcstat_l2_rebuild_size
;
869 /* Aligned size of L2ARC restored data, in bytes. */
870 kstat_named_t arcstat_l2_rebuild_asize
;
872 * Number of L2ARC log entries (buffers) that were successfully
875 kstat_named_t arcstat_l2_rebuild_bufs
;
877 * Number of L2ARC log entries (buffers) already cached in ARC. These
878 * were not restored again.
880 kstat_named_t arcstat_l2_rebuild_bufs_precached
;
882 * Number of L2ARC log blocks that were restored successfully. Each
883 * log block may hold up to L2ARC_LOG_BLK_MAX_ENTRIES buffers.
885 kstat_named_t arcstat_l2_rebuild_log_blks
;
886 kstat_named_t arcstat_memory_throttle_count
;
887 kstat_named_t arcstat_memory_direct_count
;
888 kstat_named_t arcstat_memory_indirect_count
;
889 kstat_named_t arcstat_memory_all_bytes
;
890 kstat_named_t arcstat_memory_free_bytes
;
891 kstat_named_t arcstat_memory_available_bytes
;
892 kstat_named_t arcstat_no_grow
;
893 kstat_named_t arcstat_tempreserve
;
894 kstat_named_t arcstat_loaned_bytes
;
895 kstat_named_t arcstat_prune
;
896 kstat_named_t arcstat_meta_used
;
897 kstat_named_t arcstat_dnode_limit
;
898 kstat_named_t arcstat_async_upgrade_sync
;
899 /* Number of predictive prefetch requests. */
900 kstat_named_t arcstat_predictive_prefetch
;
901 /* Number of requests for which predictive prefetch has completed. */
902 kstat_named_t arcstat_demand_hit_predictive_prefetch
;
903 /* Number of requests for which predictive prefetch was running. */
904 kstat_named_t arcstat_demand_iohit_predictive_prefetch
;
905 /* Number of prescient prefetch requests. */
906 kstat_named_t arcstat_prescient_prefetch
;
907 /* Number of requests for which prescient prefetch has completed. */
908 kstat_named_t arcstat_demand_hit_prescient_prefetch
;
909 /* Number of requests for which prescient prefetch was running. */
910 kstat_named_t arcstat_demand_iohit_prescient_prefetch
;
911 kstat_named_t arcstat_need_free
;
912 kstat_named_t arcstat_sys_free
;
913 kstat_named_t arcstat_raw_size
;
914 kstat_named_t arcstat_cached_only_in_progress
;
915 kstat_named_t arcstat_abd_chunk_waste_size
;
918 typedef struct arc_sums
{
919 wmsum_t arcstat_hits
;
920 wmsum_t arcstat_iohits
;
921 wmsum_t arcstat_misses
;
922 wmsum_t arcstat_demand_data_hits
;
923 wmsum_t arcstat_demand_data_iohits
;
924 wmsum_t arcstat_demand_data_misses
;
925 wmsum_t arcstat_demand_metadata_hits
;
926 wmsum_t arcstat_demand_metadata_iohits
;
927 wmsum_t arcstat_demand_metadata_misses
;
928 wmsum_t arcstat_prefetch_data_hits
;
929 wmsum_t arcstat_prefetch_data_iohits
;
930 wmsum_t arcstat_prefetch_data_misses
;
931 wmsum_t arcstat_prefetch_metadata_hits
;
932 wmsum_t arcstat_prefetch_metadata_iohits
;
933 wmsum_t arcstat_prefetch_metadata_misses
;
934 wmsum_t arcstat_mru_hits
;
935 wmsum_t arcstat_mru_ghost_hits
;
936 wmsum_t arcstat_mfu_hits
;
937 wmsum_t arcstat_mfu_ghost_hits
;
938 wmsum_t arcstat_uncached_hits
;
939 wmsum_t arcstat_deleted
;
940 wmsum_t arcstat_mutex_miss
;
941 wmsum_t arcstat_access_skip
;
942 wmsum_t arcstat_evict_skip
;
943 wmsum_t arcstat_evict_not_enough
;
944 wmsum_t arcstat_evict_l2_cached
;
945 wmsum_t arcstat_evict_l2_eligible
;
946 wmsum_t arcstat_evict_l2_eligible_mfu
;
947 wmsum_t arcstat_evict_l2_eligible_mru
;
948 wmsum_t arcstat_evict_l2_ineligible
;
949 wmsum_t arcstat_evict_l2_skip
;
950 wmsum_t arcstat_hash_collisions
;
951 wmsum_t arcstat_hash_chains
;
952 aggsum_t arcstat_size
;
953 wmsum_t arcstat_compressed_size
;
954 wmsum_t arcstat_uncompressed_size
;
955 wmsum_t arcstat_overhead_size
;
956 wmsum_t arcstat_hdr_size
;
957 wmsum_t arcstat_data_size
;
958 wmsum_t arcstat_metadata_size
;
959 wmsum_t arcstat_dbuf_size
;
960 wmsum_t arcstat_dnode_size
;
961 wmsum_t arcstat_bonus_size
;
962 wmsum_t arcstat_l2_hits
;
963 wmsum_t arcstat_l2_misses
;
964 wmsum_t arcstat_l2_prefetch_asize
;
965 wmsum_t arcstat_l2_mru_asize
;
966 wmsum_t arcstat_l2_mfu_asize
;
967 wmsum_t arcstat_l2_bufc_data_asize
;
968 wmsum_t arcstat_l2_bufc_metadata_asize
;
969 wmsum_t arcstat_l2_feeds
;
970 wmsum_t arcstat_l2_rw_clash
;
971 wmsum_t arcstat_l2_read_bytes
;
972 wmsum_t arcstat_l2_write_bytes
;
973 wmsum_t arcstat_l2_writes_sent
;
974 wmsum_t arcstat_l2_writes_done
;
975 wmsum_t arcstat_l2_writes_error
;
976 wmsum_t arcstat_l2_writes_lock_retry
;
977 wmsum_t arcstat_l2_evict_lock_retry
;
978 wmsum_t arcstat_l2_evict_reading
;
979 wmsum_t arcstat_l2_evict_l1cached
;
980 wmsum_t arcstat_l2_free_on_write
;
981 wmsum_t arcstat_l2_abort_lowmem
;
982 wmsum_t arcstat_l2_cksum_bad
;
983 wmsum_t arcstat_l2_io_error
;
984 wmsum_t arcstat_l2_lsize
;
985 wmsum_t arcstat_l2_psize
;
986 aggsum_t arcstat_l2_hdr_size
;
987 wmsum_t arcstat_l2_log_blk_writes
;
988 wmsum_t arcstat_l2_log_blk_asize
;
989 wmsum_t arcstat_l2_log_blk_count
;
990 wmsum_t arcstat_l2_rebuild_success
;
991 wmsum_t arcstat_l2_rebuild_abort_unsupported
;
992 wmsum_t arcstat_l2_rebuild_abort_io_errors
;
993 wmsum_t arcstat_l2_rebuild_abort_dh_errors
;
994 wmsum_t arcstat_l2_rebuild_abort_cksum_lb_errors
;
995 wmsum_t arcstat_l2_rebuild_abort_lowmem
;
996 wmsum_t arcstat_l2_rebuild_size
;
997 wmsum_t arcstat_l2_rebuild_asize
;
998 wmsum_t arcstat_l2_rebuild_bufs
;
999 wmsum_t arcstat_l2_rebuild_bufs_precached
;
1000 wmsum_t arcstat_l2_rebuild_log_blks
;
1001 wmsum_t arcstat_memory_throttle_count
;
1002 wmsum_t arcstat_memory_direct_count
;
1003 wmsum_t arcstat_memory_indirect_count
;
1004 wmsum_t arcstat_prune
;
1005 wmsum_t arcstat_meta_used
;
1006 wmsum_t arcstat_async_upgrade_sync
;
1007 wmsum_t arcstat_predictive_prefetch
;
1008 wmsum_t arcstat_demand_hit_predictive_prefetch
;
1009 wmsum_t arcstat_demand_iohit_predictive_prefetch
;
1010 wmsum_t arcstat_prescient_prefetch
;
1011 wmsum_t arcstat_demand_hit_prescient_prefetch
;
1012 wmsum_t arcstat_demand_iohit_prescient_prefetch
;
1013 wmsum_t arcstat_raw_size
;
1014 wmsum_t arcstat_cached_only_in_progress
;
1015 wmsum_t arcstat_abd_chunk_waste_size
;
1018 typedef struct arc_evict_waiter
{
1019 list_node_t aew_node
;
1022 } arc_evict_waiter_t
;
1024 #define ARCSTAT(stat) (arc_stats.stat.value.ui64)
1026 #define ARCSTAT_INCR(stat, val) \
1027 wmsum_add(&arc_sums.stat, (val))
1029 #define ARCSTAT_BUMP(stat) ARCSTAT_INCR(stat, 1)
1030 #define ARCSTAT_BUMPDOWN(stat) ARCSTAT_INCR(stat, -1)
1032 #define arc_no_grow ARCSTAT(arcstat_no_grow) /* do not grow cache size */
1033 #define arc_meta ARCSTAT(arcstat_meta) /* target frac of metadata */
1034 #define arc_pd ARCSTAT(arcstat_pd) /* target frac of data MRU */
1035 #define arc_pm ARCSTAT(arcstat_pm) /* target frac of meta MRU */
1036 #define arc_c ARCSTAT(arcstat_c) /* target size of cache */
1037 #define arc_c_min ARCSTAT(arcstat_c_min) /* min target cache size */
1038 #define arc_c_max ARCSTAT(arcstat_c_max) /* max target cache size */
1039 #define arc_sys_free ARCSTAT(arcstat_sys_free) /* target system free bytes */
1041 #define arc_anon (&ARC_anon)
1042 #define arc_mru (&ARC_mru)
1043 #define arc_mru_ghost (&ARC_mru_ghost)
1044 #define arc_mfu (&ARC_mfu)
1045 #define arc_mfu_ghost (&ARC_mfu_ghost)
1046 #define arc_l2c_only (&ARC_l2c_only)
1047 #define arc_uncached (&ARC_uncached)
1049 extern taskq_t
*arc_prune_taskq
;
1050 extern arc_stats_t arc_stats
;
1051 extern arc_sums_t arc_sums
;
1052 extern hrtime_t arc_growtime
;
1053 extern boolean_t arc_warm
;
1054 extern uint_t arc_grow_retry
;
1055 extern uint_t arc_no_grow_shift
;
1056 extern uint_t arc_shrink_shift
;
1057 extern kmutex_t arc_prune_mtx
;
1058 extern list_t arc_prune_list
;
1059 extern arc_state_t ARC_mfu
;
1060 extern arc_state_t ARC_mru
;
1061 extern uint_t zfs_arc_pc_percent
;
1062 extern uint_t arc_lotsfree_percent
;
1063 extern uint64_t zfs_arc_min
;
1064 extern uint64_t zfs_arc_max
;
1066 extern void arc_reduce_target_size(int64_t to_free
);
1067 extern boolean_t
arc_reclaim_needed(void);
1068 extern void arc_kmem_reap_soon(void);
1069 extern void arc_wait_for_eviction(uint64_t, boolean_t
);
1071 extern void arc_lowmem_init(void);
1072 extern void arc_lowmem_fini(void);
1073 extern void arc_prune_async(uint64_t);
1074 extern int arc_memory_throttle(spa_t
*spa
, uint64_t reserve
, uint64_t txg
);
1075 extern uint64_t arc_free_memory(void);
1076 extern int64_t arc_available_memory(void);
1077 extern void arc_tuning_update(boolean_t
);
1078 extern void arc_register_hotplug(void);
1079 extern void arc_unregister_hotplug(void);
1081 extern int param_set_arc_u64(ZFS_MODULE_PARAM_ARGS
);
1082 extern int param_set_arc_int(ZFS_MODULE_PARAM_ARGS
);
1083 extern int param_set_arc_min(ZFS_MODULE_PARAM_ARGS
);
1084 extern int param_set_arc_max(ZFS_MODULE_PARAM_ARGS
);
1087 boolean_t
l2arc_log_blkptr_valid(l2arc_dev_t
*dev
,
1088 const l2arc_log_blkptr_t
*lbp
);
1090 /* used in vdev_trim.c */
1091 void l2arc_dev_hdr_update(l2arc_dev_t
*dev
);
1092 l2arc_dev_t
*l2arc_vdev_get(vdev_t
*vd
);
1098 #endif /* _SYS_ARC_IMPL_H */