]> git.proxmox.com Git - mirror_zfs.git/blob - include/sys/arc_impl.h
Trim L2ARC
[mirror_zfs.git] / include / sys / arc_impl.h
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013, Delphix. All rights reserved.
24 * Copyright (c) 2013, Saso Kiselkov. All rights reserved.
25 * Copyright (c) 2013, Nexenta Systems, Inc. All rights reserved.
26 * Copyright (c) 2020, George Amanakis. All rights reserved.
27 */
28
29 #ifndef _SYS_ARC_IMPL_H
30 #define _SYS_ARC_IMPL_H
31
32 #include <sys/arc.h>
33 #include <sys/zio_crypt.h>
34 #include <sys/zthr.h>
35 #include <sys/aggsum.h>
36
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40
41 /*
42 * Note that buffers can be in one of 6 states:
43 * ARC_anon - anonymous (discussed below)
44 * ARC_mru - recently used, currently cached
45 * ARC_mru_ghost - recently used, no longer in cache
46 * ARC_mfu - frequently used, currently cached
47 * ARC_mfu_ghost - frequently used, no longer in cache
48 * ARC_l2c_only - exists in L2ARC but not other states
49 * When there are no active references to the buffer, they are
50 * are linked onto a list in one of these arc states. These are
51 * the only buffers that can be evicted or deleted. Within each
52 * state there are multiple lists, one for meta-data and one for
53 * non-meta-data. Meta-data (indirect blocks, blocks of dnodes,
54 * etc.) is tracked separately so that it can be managed more
55 * explicitly: favored over data, limited explicitly.
56 *
57 * Anonymous buffers are buffers that are not associated with
58 * a DVA. These are buffers that hold dirty block copies
59 * before they are written to stable storage. By definition,
60 * they are "ref'd" and are considered part of arc_mru
61 * that cannot be freed. Generally, they will acquire a DVA
62 * as they are written and migrate onto the arc_mru list.
63 *
64 * The ARC_l2c_only state is for buffers that are in the second
65 * level ARC but no longer in any of the ARC_m* lists. The second
66 * level ARC itself may also contain buffers that are in any of
67 * the ARC_m* states - meaning that a buffer can exist in two
68 * places. The reason for the ARC_l2c_only state is to keep the
69 * buffer header in the hash table, so that reads that hit the
70 * second level ARC benefit from these fast lookups.
71 */
72
73 typedef struct arc_state {
74 /*
75 * list of evictable buffers
76 */
77 multilist_t *arcs_list[ARC_BUFC_NUMTYPES];
78 /*
79 * total amount of evictable data in this state
80 */
81 zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES];
82 /*
83 * total amount of data in this state; this includes: evictable,
84 * non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA.
85 */
86 zfs_refcount_t arcs_size;
87 /*
88 * supports the "dbufs" kstat
89 */
90 arc_state_type_t arcs_state;
91 } arc_state_t;
92
93 typedef struct arc_callback arc_callback_t;
94
95 struct arc_callback {
96 void *acb_private;
97 arc_read_done_func_t *acb_done;
98 arc_buf_t *acb_buf;
99 boolean_t acb_encrypted;
100 boolean_t acb_compressed;
101 boolean_t acb_noauth;
102 zbookmark_phys_t acb_zb;
103 zio_t *acb_zio_dummy;
104 zio_t *acb_zio_head;
105 arc_callback_t *acb_next;
106 };
107
108 typedef struct arc_write_callback arc_write_callback_t;
109
110 struct arc_write_callback {
111 void *awcb_private;
112 arc_write_done_func_t *awcb_ready;
113 arc_write_done_func_t *awcb_children_ready;
114 arc_write_done_func_t *awcb_physdone;
115 arc_write_done_func_t *awcb_done;
116 arc_buf_t *awcb_buf;
117 };
118
119 /*
120 * ARC buffers are separated into multiple structs as a memory saving measure:
121 * - Common fields struct, always defined, and embedded within it:
122 * - L2-only fields, always allocated but undefined when not in L2ARC
123 * - L1-only fields, only allocated when in L1ARC
124 *
125 * Buffer in L1 Buffer only in L2
126 * +------------------------+ +------------------------+
127 * | arc_buf_hdr_t | | arc_buf_hdr_t |
128 * | | | |
129 * | | | |
130 * | | | |
131 * +------------------------+ +------------------------+
132 * | l2arc_buf_hdr_t | | l2arc_buf_hdr_t |
133 * | (undefined if L1-only) | | |
134 * +------------------------+ +------------------------+
135 * | l1arc_buf_hdr_t |
136 * | |
137 * | |
138 * | |
139 * | |
140 * +------------------------+
141 *
142 * Because it's possible for the L2ARC to become extremely large, we can wind
143 * up eating a lot of memory in L2ARC buffer headers, so the size of a header
144 * is minimized by only allocating the fields necessary for an L1-cached buffer
145 * when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and
146 * l2arc_buf_hdr) are embedded rather than allocated separately to save a couple
147 * words in pointers. arc_hdr_realloc() is used to switch a header between
148 * these two allocation states.
149 */
150 typedef struct l1arc_buf_hdr {
151 kmutex_t b_freeze_lock;
152 zio_cksum_t *b_freeze_cksum;
153
154 arc_buf_t *b_buf;
155 uint32_t b_bufcnt;
156 /* for waiting on writes to complete */
157 kcondvar_t b_cv;
158 uint8_t b_byteswap;
159
160
161 /* protected by arc state mutex */
162 arc_state_t *b_state;
163 multilist_node_t b_arc_node;
164
165 /* updated atomically */
166 clock_t b_arc_access;
167 uint32_t b_mru_hits;
168 uint32_t b_mru_ghost_hits;
169 uint32_t b_mfu_hits;
170 uint32_t b_mfu_ghost_hits;
171 uint32_t b_l2_hits;
172
173 /* self protecting */
174 zfs_refcount_t b_refcnt;
175
176 arc_callback_t *b_acb;
177 abd_t *b_pabd;
178 } l1arc_buf_hdr_t;
179
180 typedef enum l2arc_dev_hdr_flags_t {
181 L2ARC_DEV_HDR_EVICT_FIRST = (1 << 0) /* mirror of l2ad_first */
182 } l2arc_dev_hdr_flags_t;
183
184 /*
185 * Pointer used in persistent L2ARC (for pointing to log blocks).
186 */
187 typedef struct l2arc_log_blkptr {
188 /*
189 * Offset of log block within the device, in bytes
190 */
191 uint64_t lbp_daddr;
192 /*
193 * Aligned payload size (in bytes) of the log block
194 */
195 uint64_t lbp_payload_asize;
196 /*
197 * Offset in bytes of the first buffer in the payload
198 */
199 uint64_t lbp_payload_start;
200 /*
201 * lbp_prop has the following format:
202 * * logical size (in bytes)
203 * * aligned (after compression) size (in bytes)
204 * * compression algorithm (we always LZ4-compress l2arc logs)
205 * * checksum algorithm (used for lbp_cksum)
206 */
207 uint64_t lbp_prop;
208 zio_cksum_t lbp_cksum; /* checksum of log */
209 } l2arc_log_blkptr_t;
210
211 /*
212 * The persistent L2ARC device header.
213 * Byte order of magic determines whether 64-bit bswap of fields is necessary.
214 */
215 typedef struct l2arc_dev_hdr_phys {
216 uint64_t dh_magic; /* L2ARC_DEV_HDR_MAGIC */
217 uint64_t dh_version; /* Persistent L2ARC version */
218
219 /*
220 * Global L2ARC device state and metadata.
221 */
222 uint64_t dh_spa_guid;
223 uint64_t dh_vdev_guid;
224 uint64_t dh_log_entries; /* mirror of l2ad_log_entries */
225 uint64_t dh_evict; /* evicted offset in bytes */
226 uint64_t dh_flags; /* l2arc_dev_hdr_flags_t */
227 /*
228 * Used in zdb.c for determining if a log block is valid, in the same
229 * way that l2arc_rebuild() does.
230 */
231 uint64_t dh_start; /* mirror of l2ad_start */
232 uint64_t dh_end; /* mirror of l2ad_end */
233 /*
234 * Start of log block chain. [0] -> newest log, [1] -> one older (used
235 * for initiating prefetch).
236 */
237 l2arc_log_blkptr_t dh_start_lbps[2];
238 /*
239 * Aligned size of all log blocks as accounted by vdev_space_update().
240 */
241 uint64_t dh_lb_asize; /* mirror of l2ad_lb_asize */
242 uint64_t dh_lb_count; /* mirror of l2ad_lb_count */
243 /*
244 * Mirrors of vdev_trim_action_time and vdev_trim_state, used to
245 * display when the cache device was fully trimmed for the last
246 * time.
247 */
248 uint64_t dh_trim_action_time;
249 uint64_t dh_trim_state;
250 const uint64_t dh_pad[30]; /* pad to 512 bytes */
251 zio_eck_t dh_tail;
252 } l2arc_dev_hdr_phys_t;
253 CTASSERT_GLOBAL(sizeof (l2arc_dev_hdr_phys_t) == SPA_MINBLOCKSIZE);
254
255 /*
256 * A single ARC buffer header entry in a l2arc_log_blk_phys_t.
257 */
258 typedef struct l2arc_log_ent_phys {
259 dva_t le_dva; /* dva of buffer */
260 uint64_t le_birth; /* birth txg of buffer */
261 /*
262 * le_prop has the following format:
263 * * logical size (in bytes)
264 * * physical (compressed) size (in bytes)
265 * * compression algorithm
266 * * object type (used to restore arc_buf_contents_t)
267 * * protected status (used for encryption)
268 * * prefetch status (used in l2arc_read_done())
269 */
270 uint64_t le_prop;
271 uint64_t le_daddr; /* buf location on l2dev */
272 /*
273 * We pad the size of each entry to a power of 2 so that the size of
274 * l2arc_log_blk_phys_t is power-of-2 aligned with SPA_MINBLOCKSHIFT,
275 * because of the L2ARC_SET_*SIZE macros.
276 */
277 const uint64_t le_pad[3]; /* pad to 64 bytes */
278 } l2arc_log_ent_phys_t;
279
280 #define L2ARC_LOG_BLK_MAX_ENTRIES (1022)
281
282 /*
283 * A log block of up to 1022 ARC buffer log entries, chained into the
284 * persistent L2ARC metadata linked list. Byte order of magic determines
285 * whether 64-bit bswap of fields is necessary.
286 */
287 typedef struct l2arc_log_blk_phys {
288 uint64_t lb_magic; /* L2ARC_LOG_BLK_MAGIC */
289 /*
290 * There are 2 chains (headed by dh_start_lbps[2]), and this field
291 * points back to the previous block in this chain. We alternate
292 * which chain we append to, so they are time-wise and offset-wise
293 * interleaved, but that is an optimization rather than for
294 * correctness.
295 */
296 l2arc_log_blkptr_t lb_prev_lbp; /* pointer to prev log block */
297 /*
298 * Pad header section to 128 bytes
299 */
300 uint64_t lb_pad[7];
301 /* Payload */
302 l2arc_log_ent_phys_t lb_entries[L2ARC_LOG_BLK_MAX_ENTRIES];
303 } l2arc_log_blk_phys_t; /* 64K total */
304
305 /*
306 * The size of l2arc_log_blk_phys_t has to be power-of-2 aligned with
307 * SPA_MINBLOCKSHIFT because of L2BLK_SET_*SIZE macros.
308 */
309 CTASSERT_GLOBAL(IS_P2ALIGNED(sizeof (l2arc_log_blk_phys_t),
310 1ULL << SPA_MINBLOCKSHIFT));
311 CTASSERT_GLOBAL(sizeof (l2arc_log_blk_phys_t) >= SPA_MINBLOCKSIZE);
312 CTASSERT_GLOBAL(sizeof (l2arc_log_blk_phys_t) <= SPA_MAXBLOCKSIZE);
313
314 /*
315 * These structures hold in-flight abd buffers for log blocks as they're being
316 * written to the L2ARC device.
317 */
318 typedef struct l2arc_lb_abd_buf {
319 abd_t *abd;
320 list_node_t node;
321 } l2arc_lb_abd_buf_t;
322
323 /*
324 * These structures hold pointers to log blocks present on the L2ARC device.
325 */
326 typedef struct l2arc_lb_ptr_buf {
327 l2arc_log_blkptr_t *lb_ptr;
328 list_node_t node;
329 } l2arc_lb_ptr_buf_t;
330
331 /* Macros for setting fields in le_prop and lbp_prop */
332 #define L2BLK_GET_LSIZE(field) \
333 BF64_GET_SB((field), 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1)
334 #define L2BLK_SET_LSIZE(field, x) \
335 BF64_SET_SB((field), 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
336 #define L2BLK_GET_PSIZE(field) \
337 BF64_GET_SB((field), 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1)
338 #define L2BLK_SET_PSIZE(field, x) \
339 BF64_SET_SB((field), 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
340 #define L2BLK_GET_COMPRESS(field) \
341 BF64_GET((field), 32, SPA_COMPRESSBITS)
342 #define L2BLK_SET_COMPRESS(field, x) \
343 BF64_SET((field), 32, SPA_COMPRESSBITS, x)
344 #define L2BLK_GET_PREFETCH(field) BF64_GET((field), 39, 1)
345 #define L2BLK_SET_PREFETCH(field, x) BF64_SET((field), 39, 1, x)
346 #define L2BLK_GET_CHECKSUM(field) BF64_GET((field), 40, 8)
347 #define L2BLK_SET_CHECKSUM(field, x) BF64_SET((field), 40, 8, x)
348 #define L2BLK_GET_TYPE(field) BF64_GET((field), 48, 8)
349 #define L2BLK_SET_TYPE(field, x) BF64_SET((field), 48, 8, x)
350 #define L2BLK_GET_PROTECTED(field) BF64_GET((field), 56, 1)
351 #define L2BLK_SET_PROTECTED(field, x) BF64_SET((field), 56, 1, x)
352
353 #define PTR_SWAP(x, y) \
354 do { \
355 void *tmp = (x);\
356 x = y; \
357 y = tmp; \
358 _NOTE(CONSTCOND)\
359 } while (0)
360
361 #define L2ARC_DEV_HDR_MAGIC 0x5a46534341434845LLU /* ASCII: "ZFSCACHE" */
362 #define L2ARC_LOG_BLK_MAGIC 0x4c4f47424c4b4844LLU /* ASCII: "LOGBLKHD" */
363
364 /*
365 * L2ARC Internals
366 */
367 typedef struct l2arc_dev {
368 vdev_t *l2ad_vdev; /* vdev */
369 spa_t *l2ad_spa; /* spa */
370 uint64_t l2ad_hand; /* next write location */
371 uint64_t l2ad_start; /* first addr on device */
372 uint64_t l2ad_end; /* last addr on device */
373 boolean_t l2ad_first; /* first sweep through */
374 boolean_t l2ad_writing; /* currently writing */
375 kmutex_t l2ad_mtx; /* lock for buffer list */
376 list_t l2ad_buflist; /* buffer list */
377 list_node_t l2ad_node; /* device list node */
378 zfs_refcount_t l2ad_alloc; /* allocated bytes */
379 /*
380 * Persistence-related stuff
381 */
382 l2arc_dev_hdr_phys_t *l2ad_dev_hdr; /* persistent device header */
383 uint64_t l2ad_dev_hdr_asize; /* aligned hdr size */
384 l2arc_log_blk_phys_t l2ad_log_blk; /* currently open log block */
385 int l2ad_log_ent_idx; /* index into cur log blk */
386 /* Number of bytes in current log block's payload */
387 uint64_t l2ad_log_blk_payload_asize;
388 /*
389 * Offset (in bytes) of the first buffer in current log block's
390 * payload.
391 */
392 uint64_t l2ad_log_blk_payload_start;
393 /* Flag indicating whether a rebuild is scheduled or is going on */
394 boolean_t l2ad_rebuild;
395 boolean_t l2ad_rebuild_cancel;
396 boolean_t l2ad_rebuild_began;
397 uint64_t l2ad_log_entries; /* entries per log blk */
398 uint64_t l2ad_evict; /* evicted offset in bytes */
399 /* List of pointers to log blocks present in the L2ARC device */
400 list_t l2ad_lbptr_list;
401 /*
402 * Aligned size of all log blocks as accounted by vdev_space_update().
403 */
404 zfs_refcount_t l2ad_lb_asize;
405 /*
406 * Number of log blocks present on the device.
407 */
408 zfs_refcount_t l2ad_lb_count;
409 boolean_t l2ad_trim_all; /* TRIM whole device */
410 } l2arc_dev_t;
411
412 /*
413 * Encrypted blocks will need to be stored encrypted on the L2ARC
414 * disk as they appear in the main pool. In order for this to work we
415 * need to pass around the encryption parameters so they can be used
416 * to write data to the L2ARC. This struct is only defined in the
417 * arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED
418 * flag set.
419 */
420 typedef struct arc_buf_hdr_crypt {
421 abd_t *b_rabd; /* raw encrypted data */
422 dmu_object_type_t b_ot; /* object type */
423 uint32_t b_ebufcnt; /* count of encrypted buffers */
424
425 /* dsobj for looking up encryption key for l2arc encryption */
426 uint64_t b_dsobj;
427
428 /* encryption parameters */
429 uint8_t b_salt[ZIO_DATA_SALT_LEN];
430 uint8_t b_iv[ZIO_DATA_IV_LEN];
431
432 /*
433 * Technically this could be removed since we will always be able to
434 * get the mac from the bp when we need it. However, it is inconvenient
435 * for callers of arc code to have to pass a bp in all the time. This
436 * also allows us to assert that L2ARC data is properly encrypted to
437 * match the data in the main storage pool.
438 */
439 uint8_t b_mac[ZIO_DATA_MAC_LEN];
440 } arc_buf_hdr_crypt_t;
441
442 typedef struct l2arc_buf_hdr {
443 /* protected by arc_buf_hdr mutex */
444 l2arc_dev_t *b_dev; /* L2ARC device */
445 uint64_t b_daddr; /* disk address, offset byte */
446 uint32_t b_hits;
447 list_node_t b_l2node;
448 } l2arc_buf_hdr_t;
449
450 typedef struct l2arc_write_callback {
451 l2arc_dev_t *l2wcb_dev; /* device info */
452 arc_buf_hdr_t *l2wcb_head; /* head of write buflist */
453 /* in-flight list of log blocks */
454 list_t l2wcb_abd_list;
455 } l2arc_write_callback_t;
456
457 struct arc_buf_hdr {
458 /* protected by hash lock */
459 dva_t b_dva;
460 uint64_t b_birth;
461
462 arc_buf_contents_t b_type;
463 arc_buf_hdr_t *b_hash_next;
464 arc_flags_t b_flags;
465
466 /*
467 * This field stores the size of the data buffer after
468 * compression, and is set in the arc's zio completion handlers.
469 * It is in units of SPA_MINBLOCKSIZE (e.g. 1 == 512 bytes).
470 *
471 * While the block pointers can store up to 32MB in their psize
472 * field, we can only store up to 32MB minus 512B. This is due
473 * to the bp using a bias of 1, whereas we use a bias of 0 (i.e.
474 * a field of zeros represents 512B in the bp). We can't use a
475 * bias of 1 since we need to reserve a psize of zero, here, to
476 * represent holes and embedded blocks.
477 *
478 * This isn't a problem in practice, since the maximum size of a
479 * buffer is limited to 16MB, so we never need to store 32MB in
480 * this field. Even in the upstream illumos code base, the
481 * maximum size of a buffer is limited to 16MB.
482 */
483 uint16_t b_psize;
484
485 /*
486 * This field stores the size of the data buffer before
487 * compression, and cannot change once set. It is in units
488 * of SPA_MINBLOCKSIZE (e.g. 2 == 1024 bytes)
489 */
490 uint16_t b_lsize; /* immutable */
491 uint64_t b_spa; /* immutable */
492
493 /* L2ARC fields. Undefined when not in L2ARC. */
494 l2arc_buf_hdr_t b_l2hdr;
495 /* L1ARC fields. Undefined when in l2arc_only state */
496 l1arc_buf_hdr_t b_l1hdr;
497 /*
498 * Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED
499 * is set and the L1 header exists.
500 */
501 arc_buf_hdr_crypt_t b_crypt_hdr;
502 };
503
504 typedef struct arc_stats {
505 kstat_named_t arcstat_hits;
506 kstat_named_t arcstat_misses;
507 kstat_named_t arcstat_demand_data_hits;
508 kstat_named_t arcstat_demand_data_misses;
509 kstat_named_t arcstat_demand_metadata_hits;
510 kstat_named_t arcstat_demand_metadata_misses;
511 kstat_named_t arcstat_prefetch_data_hits;
512 kstat_named_t arcstat_prefetch_data_misses;
513 kstat_named_t arcstat_prefetch_metadata_hits;
514 kstat_named_t arcstat_prefetch_metadata_misses;
515 kstat_named_t arcstat_mru_hits;
516 kstat_named_t arcstat_mru_ghost_hits;
517 kstat_named_t arcstat_mfu_hits;
518 kstat_named_t arcstat_mfu_ghost_hits;
519 kstat_named_t arcstat_deleted;
520 /*
521 * Number of buffers that could not be evicted because the hash lock
522 * was held by another thread. The lock may not necessarily be held
523 * by something using the same buffer, since hash locks are shared
524 * by multiple buffers.
525 */
526 kstat_named_t arcstat_mutex_miss;
527 /*
528 * Number of buffers skipped when updating the access state due to the
529 * header having already been released after acquiring the hash lock.
530 */
531 kstat_named_t arcstat_access_skip;
532 /*
533 * Number of buffers skipped because they have I/O in progress, are
534 * indirect prefetch buffers that have not lived long enough, or are
535 * not from the spa we're trying to evict from.
536 */
537 kstat_named_t arcstat_evict_skip;
538 /*
539 * Number of times arc_evict_state() was unable to evict enough
540 * buffers to reach its target amount.
541 */
542 kstat_named_t arcstat_evict_not_enough;
543 kstat_named_t arcstat_evict_l2_cached;
544 kstat_named_t arcstat_evict_l2_eligible;
545 kstat_named_t arcstat_evict_l2_ineligible;
546 kstat_named_t arcstat_evict_l2_skip;
547 kstat_named_t arcstat_hash_elements;
548 kstat_named_t arcstat_hash_elements_max;
549 kstat_named_t arcstat_hash_collisions;
550 kstat_named_t arcstat_hash_chains;
551 kstat_named_t arcstat_hash_chain_max;
552 kstat_named_t arcstat_p;
553 kstat_named_t arcstat_c;
554 kstat_named_t arcstat_c_min;
555 kstat_named_t arcstat_c_max;
556 /* Not updated directly; only synced in arc_kstat_update. */
557 kstat_named_t arcstat_size;
558 /*
559 * Number of compressed bytes stored in the arc_buf_hdr_t's b_pabd.
560 * Note that the compressed bytes may match the uncompressed bytes
561 * if the block is either not compressed or compressed arc is disabled.
562 */
563 kstat_named_t arcstat_compressed_size;
564 /*
565 * Uncompressed size of the data stored in b_pabd. If compressed
566 * arc is disabled then this value will be identical to the stat
567 * above.
568 */
569 kstat_named_t arcstat_uncompressed_size;
570 /*
571 * Number of bytes stored in all the arc_buf_t's. This is classified
572 * as "overhead" since this data is typically short-lived and will
573 * be evicted from the arc when it becomes unreferenced unless the
574 * zfs_keep_uncompressed_metadata or zfs_keep_uncompressed_level
575 * values have been set (see comment in dbuf.c for more information).
576 */
577 kstat_named_t arcstat_overhead_size;
578 /*
579 * Number of bytes consumed by internal ARC structures necessary
580 * for tracking purposes; these structures are not actually
581 * backed by ARC buffers. This includes arc_buf_hdr_t structures
582 * (allocated via arc_buf_hdr_t_full and arc_buf_hdr_t_l2only
583 * caches), and arc_buf_t structures (allocated via arc_buf_t
584 * cache).
585 * Not updated directly; only synced in arc_kstat_update.
586 */
587 kstat_named_t arcstat_hdr_size;
588 /*
589 * Number of bytes consumed by ARC buffers of type equal to
590 * ARC_BUFC_DATA. This is generally consumed by buffers backing
591 * on disk user data (e.g. plain file contents).
592 * Not updated directly; only synced in arc_kstat_update.
593 */
594 kstat_named_t arcstat_data_size;
595 /*
596 * Number of bytes consumed by ARC buffers of type equal to
597 * ARC_BUFC_METADATA. This is generally consumed by buffers
598 * backing on disk data that is used for internal ZFS
599 * structures (e.g. ZAP, dnode, indirect blocks, etc).
600 * Not updated directly; only synced in arc_kstat_update.
601 */
602 kstat_named_t arcstat_metadata_size;
603 /*
604 * Number of bytes consumed by dmu_buf_impl_t objects.
605 * Not updated directly; only synced in arc_kstat_update.
606 */
607 kstat_named_t arcstat_dbuf_size;
608 /*
609 * Number of bytes consumed by dnode_t objects.
610 * Not updated directly; only synced in arc_kstat_update.
611 */
612 kstat_named_t arcstat_dnode_size;
613 /*
614 * Number of bytes consumed by bonus buffers.
615 * Not updated directly; only synced in arc_kstat_update.
616 */
617 kstat_named_t arcstat_bonus_size;
618 /*
619 * Total number of bytes consumed by ARC buffers residing in the
620 * arc_anon state. This includes *all* buffers in the arc_anon
621 * state; e.g. data, metadata, evictable, and unevictable buffers
622 * are all included in this value.
623 * Not updated directly; only synced in arc_kstat_update.
624 */
625 kstat_named_t arcstat_anon_size;
626 /*
627 * Number of bytes consumed by ARC buffers that meet the
628 * following criteria: backing buffers of type ARC_BUFC_DATA,
629 * residing in the arc_anon state, and are eligible for eviction
630 * (e.g. have no outstanding holds on the buffer).
631 * Not updated directly; only synced in arc_kstat_update.
632 */
633 kstat_named_t arcstat_anon_evictable_data;
634 /*
635 * Number of bytes consumed by ARC buffers that meet the
636 * following criteria: backing buffers of type ARC_BUFC_METADATA,
637 * residing in the arc_anon state, and are eligible for eviction
638 * (e.g. have no outstanding holds on the buffer).
639 * Not updated directly; only synced in arc_kstat_update.
640 */
641 kstat_named_t arcstat_anon_evictable_metadata;
642 /*
643 * Total number of bytes consumed by ARC buffers residing in the
644 * arc_mru state. This includes *all* buffers in the arc_mru
645 * state; e.g. data, metadata, evictable, and unevictable buffers
646 * are all included in this value.
647 * Not updated directly; only synced in arc_kstat_update.
648 */
649 kstat_named_t arcstat_mru_size;
650 /*
651 * Number of bytes consumed by ARC buffers that meet the
652 * following criteria: backing buffers of type ARC_BUFC_DATA,
653 * residing in the arc_mru state, and are eligible for eviction
654 * (e.g. have no outstanding holds on the buffer).
655 * Not updated directly; only synced in arc_kstat_update.
656 */
657 kstat_named_t arcstat_mru_evictable_data;
658 /*
659 * Number of bytes consumed by ARC buffers that meet the
660 * following criteria: backing buffers of type ARC_BUFC_METADATA,
661 * residing in the arc_mru state, and are eligible for eviction
662 * (e.g. have no outstanding holds on the buffer).
663 * Not updated directly; only synced in arc_kstat_update.
664 */
665 kstat_named_t arcstat_mru_evictable_metadata;
666 /*
667 * Total number of bytes that *would have been* consumed by ARC
668 * buffers in the arc_mru_ghost state. The key thing to note
669 * here, is the fact that this size doesn't actually indicate
670 * RAM consumption. The ghost lists only consist of headers and
671 * don't actually have ARC buffers linked off of these headers.
672 * Thus, *if* the headers had associated ARC buffers, these
673 * buffers *would have* consumed this number of bytes.
674 * Not updated directly; only synced in arc_kstat_update.
675 */
676 kstat_named_t arcstat_mru_ghost_size;
677 /*
678 * Number of bytes that *would have been* consumed by ARC
679 * buffers that are eligible for eviction, of type
680 * ARC_BUFC_DATA, and linked off the arc_mru_ghost state.
681 * Not updated directly; only synced in arc_kstat_update.
682 */
683 kstat_named_t arcstat_mru_ghost_evictable_data;
684 /*
685 * Number of bytes that *would have been* consumed by ARC
686 * buffers that are eligible for eviction, of type
687 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
688 * Not updated directly; only synced in arc_kstat_update.
689 */
690 kstat_named_t arcstat_mru_ghost_evictable_metadata;
691 /*
692 * Total number of bytes consumed by ARC buffers residing in the
693 * arc_mfu state. This includes *all* buffers in the arc_mfu
694 * state; e.g. data, metadata, evictable, and unevictable buffers
695 * are all included in this value.
696 * Not updated directly; only synced in arc_kstat_update.
697 */
698 kstat_named_t arcstat_mfu_size;
699 /*
700 * Number of bytes consumed by ARC buffers that are eligible for
701 * eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu
702 * state.
703 * Not updated directly; only synced in arc_kstat_update.
704 */
705 kstat_named_t arcstat_mfu_evictable_data;
706 /*
707 * Number of bytes consumed by ARC buffers that are eligible for
708 * eviction, of type ARC_BUFC_METADATA, and reside in the
709 * arc_mfu state.
710 * Not updated directly; only synced in arc_kstat_update.
711 */
712 kstat_named_t arcstat_mfu_evictable_metadata;
713 /*
714 * Total number of bytes that *would have been* consumed by ARC
715 * buffers in the arc_mfu_ghost state. See the comment above
716 * arcstat_mru_ghost_size for more details.
717 * Not updated directly; only synced in arc_kstat_update.
718 */
719 kstat_named_t arcstat_mfu_ghost_size;
720 /*
721 * Number of bytes that *would have been* consumed by ARC
722 * buffers that are eligible for eviction, of type
723 * ARC_BUFC_DATA, and linked off the arc_mfu_ghost state.
724 * Not updated directly; only synced in arc_kstat_update.
725 */
726 kstat_named_t arcstat_mfu_ghost_evictable_data;
727 /*
728 * Number of bytes that *would have been* consumed by ARC
729 * buffers that are eligible for eviction, of type
730 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
731 * Not updated directly; only synced in arc_kstat_update.
732 */
733 kstat_named_t arcstat_mfu_ghost_evictable_metadata;
734 kstat_named_t arcstat_l2_hits;
735 kstat_named_t arcstat_l2_misses;
736 kstat_named_t arcstat_l2_feeds;
737 kstat_named_t arcstat_l2_rw_clash;
738 kstat_named_t arcstat_l2_read_bytes;
739 kstat_named_t arcstat_l2_write_bytes;
740 kstat_named_t arcstat_l2_writes_sent;
741 kstat_named_t arcstat_l2_writes_done;
742 kstat_named_t arcstat_l2_writes_error;
743 kstat_named_t arcstat_l2_writes_lock_retry;
744 kstat_named_t arcstat_l2_evict_lock_retry;
745 kstat_named_t arcstat_l2_evict_reading;
746 kstat_named_t arcstat_l2_evict_l1cached;
747 kstat_named_t arcstat_l2_free_on_write;
748 kstat_named_t arcstat_l2_abort_lowmem;
749 kstat_named_t arcstat_l2_cksum_bad;
750 kstat_named_t arcstat_l2_io_error;
751 kstat_named_t arcstat_l2_lsize;
752 kstat_named_t arcstat_l2_psize;
753 /* Not updated directly; only synced in arc_kstat_update. */
754 kstat_named_t arcstat_l2_hdr_size;
755 /*
756 * Number of L2ARC log blocks written. These are used for restoring the
757 * L2ARC. Updated during writing of L2ARC log blocks.
758 */
759 kstat_named_t arcstat_l2_log_blk_writes;
760 /*
761 * Moving average of the aligned size of the L2ARC log blocks, in
762 * bytes. Updated during L2ARC rebuild and during writing of L2ARC
763 * log blocks.
764 */
765 kstat_named_t arcstat_l2_log_blk_avg_asize;
766 /* Aligned size of L2ARC log blocks on L2ARC devices. */
767 kstat_named_t arcstat_l2_log_blk_asize;
768 /* Number of L2ARC log blocks present on L2ARC devices. */
769 kstat_named_t arcstat_l2_log_blk_count;
770 /*
771 * Moving average of the aligned size of L2ARC restored data, in bytes,
772 * to the aligned size of their metadata in L2ARC, in bytes.
773 * Updated during L2ARC rebuild and during writing of L2ARC log blocks.
774 */
775 kstat_named_t arcstat_l2_data_to_meta_ratio;
776 /*
777 * Number of times the L2ARC rebuild was successful for an L2ARC device.
778 */
779 kstat_named_t arcstat_l2_rebuild_success;
780 /*
781 * Number of times the L2ARC rebuild failed because the device header
782 * was in an unsupported format or corrupted.
783 */
784 kstat_named_t arcstat_l2_rebuild_abort_unsupported;
785 /*
786 * Number of times the L2ARC rebuild failed because of IO errors
787 * while reading a log block.
788 */
789 kstat_named_t arcstat_l2_rebuild_abort_io_errors;
790 /*
791 * Number of times the L2ARC rebuild failed because of IO errors when
792 * reading the device header.
793 */
794 kstat_named_t arcstat_l2_rebuild_abort_dh_errors;
795 /*
796 * Number of L2ARC log blocks which failed to be restored due to
797 * checksum errors.
798 */
799 kstat_named_t arcstat_l2_rebuild_abort_cksum_lb_errors;
800 /*
801 * Number of times the L2ARC rebuild was aborted due to low system
802 * memory.
803 */
804 kstat_named_t arcstat_l2_rebuild_abort_lowmem;
805 /* Logical size of L2ARC restored data, in bytes. */
806 kstat_named_t arcstat_l2_rebuild_size;
807 /* Aligned size of L2ARC restored data, in bytes. */
808 kstat_named_t arcstat_l2_rebuild_asize;
809 /*
810 * Number of L2ARC log entries (buffers) that were successfully
811 * restored in ARC.
812 */
813 kstat_named_t arcstat_l2_rebuild_bufs;
814 /*
815 * Number of L2ARC log entries (buffers) already cached in ARC. These
816 * were not restored again.
817 */
818 kstat_named_t arcstat_l2_rebuild_bufs_precached;
819 /*
820 * Number of L2ARC log blocks that were restored successfully. Each
821 * log block may hold up to L2ARC_LOG_BLK_MAX_ENTRIES buffers.
822 */
823 kstat_named_t arcstat_l2_rebuild_log_blks;
824 kstat_named_t arcstat_memory_throttle_count;
825 kstat_named_t arcstat_memory_direct_count;
826 kstat_named_t arcstat_memory_indirect_count;
827 kstat_named_t arcstat_memory_all_bytes;
828 kstat_named_t arcstat_memory_free_bytes;
829 kstat_named_t arcstat_memory_available_bytes;
830 kstat_named_t arcstat_no_grow;
831 kstat_named_t arcstat_tempreserve;
832 kstat_named_t arcstat_loaned_bytes;
833 kstat_named_t arcstat_prune;
834 /* Not updated directly; only synced in arc_kstat_update. */
835 kstat_named_t arcstat_meta_used;
836 kstat_named_t arcstat_meta_limit;
837 kstat_named_t arcstat_dnode_limit;
838 kstat_named_t arcstat_meta_max;
839 kstat_named_t arcstat_meta_min;
840 kstat_named_t arcstat_async_upgrade_sync;
841 kstat_named_t arcstat_demand_hit_predictive_prefetch;
842 kstat_named_t arcstat_demand_hit_prescient_prefetch;
843 kstat_named_t arcstat_need_free;
844 kstat_named_t arcstat_sys_free;
845 kstat_named_t arcstat_raw_size;
846 kstat_named_t arcstat_cached_only_in_progress;
847 } arc_stats_t;
848
849 typedef enum free_memory_reason_t {
850 FMR_UNKNOWN,
851 FMR_NEEDFREE,
852 FMR_LOTSFREE,
853 FMR_SWAPFS_MINFREE,
854 FMR_PAGES_PP_MAXIMUM,
855 FMR_HEAP_ARENA,
856 FMR_ZIO_ARENA,
857 } free_memory_reason_t;
858
859 #define ARCSTAT(stat) (arc_stats.stat.value.ui64)
860
861 #define ARCSTAT_INCR(stat, val) \
862 atomic_add_64(&arc_stats.stat.value.ui64, (val))
863
864 #define ARCSTAT_BUMP(stat) ARCSTAT_INCR(stat, 1)
865 #define ARCSTAT_BUMPDOWN(stat) ARCSTAT_INCR(stat, -1)
866
867 #define arc_no_grow ARCSTAT(arcstat_no_grow) /* do not grow cache size */
868 #define arc_p ARCSTAT(arcstat_p) /* target size of MRU */
869 #define arc_c ARCSTAT(arcstat_c) /* target size of cache */
870 #define arc_c_min ARCSTAT(arcstat_c_min) /* min target cache size */
871 #define arc_c_max ARCSTAT(arcstat_c_max) /* max target cache size */
872 #define arc_sys_free ARCSTAT(arcstat_sys_free) /* target system free bytes */
873 #define arc_need_free ARCSTAT(arcstat_need_free) /* bytes to be freed */
874
875 extern int arc_zio_arena_free_shift;
876 extern taskq_t *arc_prune_taskq;
877 extern arc_stats_t arc_stats;
878 extern hrtime_t arc_growtime;
879 extern boolean_t arc_warm;
880 extern int arc_grow_retry;
881 extern int arc_shrink_shift;
882 extern zthr_t *arc_adjust_zthr;
883 extern kmutex_t arc_adjust_lock;
884 extern kcondvar_t arc_adjust_waiters_cv;
885 extern boolean_t arc_adjust_needed;
886 extern kmutex_t arc_prune_mtx;
887 extern list_t arc_prune_list;
888 extern aggsum_t arc_size;
889 extern arc_state_t *arc_mfu;
890 extern arc_state_t *arc_mru;
891 extern uint_t zfs_arc_pc_percent;
892 extern int arc_lotsfree_percent;
893
894 extern void arc_reduce_target_size(int64_t to_free);
895 extern boolean_t arc_reclaim_needed(void);
896 extern void arc_kmem_reap_soon(void);
897
898 extern void arc_lowmem_init(void);
899 extern void arc_lowmem_fini(void);
900 extern void arc_prune_async(int64_t);
901 extern int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg);
902 extern uint64_t arc_free_memory(void);
903 extern int64_t arc_available_memory(void);
904 extern void arc_tuning_update(boolean_t);
905
906 extern int param_set_arc_long(ZFS_MODULE_PARAM_ARGS);
907 extern int param_set_arc_int(ZFS_MODULE_PARAM_ARGS);
908
909 /* used in zdb.c */
910 boolean_t l2arc_log_blkptr_valid(l2arc_dev_t *dev,
911 const l2arc_log_blkptr_t *lbp);
912
913 /* used in vdev_trim.c */
914 void l2arc_dev_hdr_update(l2arc_dev_t *dev);
915 l2arc_dev_t *l2arc_vdev_get(vdev_t *vd);
916
917 #ifdef __cplusplus
918 }
919 #endif
920
921 #endif /* _SYS_ARC_IMPL_H */