4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013 by Delphix. All rights reserved.
24 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
25 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
28 #ifndef _SYS_ARC_IMPL_H
29 #define _SYS_ARC_IMPL_H
32 #include <sys/zio_crypt.h>
39 * Note that buffers can be in one of 6 states:
40 * ARC_anon - anonymous (discussed below)
41 * ARC_mru - recently used, currently cached
42 * ARC_mru_ghost - recentely used, no longer in cache
43 * ARC_mfu - frequently used, currently cached
44 * ARC_mfu_ghost - frequently used, no longer in cache
45 * ARC_l2c_only - exists in L2ARC but not other states
46 * When there are no active references to the buffer, they are
47 * are linked onto a list in one of these arc states. These are
48 * the only buffers that can be evicted or deleted. Within each
49 * state there are multiple lists, one for meta-data and one for
50 * non-meta-data. Meta-data (indirect blocks, blocks of dnodes,
51 * etc.) is tracked separately so that it can be managed more
52 * explicitly: favored over data, limited explicitly.
54 * Anonymous buffers are buffers that are not associated with
55 * a DVA. These are buffers that hold dirty block copies
56 * before they are written to stable storage. By definition,
57 * they are "ref'd" and are considered part of arc_mru
58 * that cannot be freed. Generally, they will acquire a DVA
59 * as they are written and migrate onto the arc_mru list.
61 * The ARC_l2c_only state is for buffers that are in the second
62 * level ARC but no longer in any of the ARC_m* lists. The second
63 * level ARC itself may also contain buffers that are in any of
64 * the ARC_m* states - meaning that a buffer can exist in two
65 * places. The reason for the ARC_l2c_only state is to keep the
66 * buffer header in the hash table, so that reads that hit the
67 * second level ARC benefit from these fast lookups.
70 typedef struct arc_state
{
72 * list of evictable buffers
74 multilist_t
*arcs_list
[ARC_BUFC_NUMTYPES
];
76 * total amount of evictable data in this state
78 refcount_t arcs_esize
[ARC_BUFC_NUMTYPES
];
80 * total amount of data in this state; this includes: evictable,
81 * non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA.
85 * supports the "dbufs" kstat
87 arc_state_type_t arcs_state
;
90 typedef struct arc_callback arc_callback_t
;
94 arc_read_done_func_t
*acb_done
;
96 boolean_t acb_encrypted
;
97 boolean_t acb_compressed
;
100 zio_t
*acb_zio_dummy
;
101 arc_callback_t
*acb_next
;
104 typedef struct arc_write_callback arc_write_callback_t
;
106 struct arc_write_callback
{
108 arc_write_done_func_t
*awcb_ready
;
109 arc_write_done_func_t
*awcb_children_ready
;
110 arc_write_done_func_t
*awcb_physdone
;
111 arc_write_done_func_t
*awcb_done
;
116 * ARC buffers are separated into multiple structs as a memory saving measure:
117 * - Common fields struct, always defined, and embedded within it:
118 * - L2-only fields, always allocated but undefined when not in L2ARC
119 * - L1-only fields, only allocated when in L1ARC
121 * Buffer in L1 Buffer only in L2
122 * +------------------------+ +------------------------+
123 * | arc_buf_hdr_t | | arc_buf_hdr_t |
127 * +------------------------+ +------------------------+
128 * | l2arc_buf_hdr_t | | l2arc_buf_hdr_t |
129 * | (undefined if L1-only) | | |
130 * +------------------------+ +------------------------+
131 * | l1arc_buf_hdr_t |
136 * +------------------------+
138 * Because it's possible for the L2ARC to become extremely large, we can wind
139 * up eating a lot of memory in L2ARC buffer headers, so the size of a header
140 * is minimized by only allocating the fields necessary for an L1-cached buffer
141 * when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and
142 * l2arc_buf_hdr) are embedded rather than allocated separately to save a couple
143 * words in pointers. arc_hdr_realloc() is used to switch a header between
144 * these two allocation states.
146 typedef struct l1arc_buf_hdr
{
147 kmutex_t b_freeze_lock
;
148 zio_cksum_t
*b_freeze_cksum
;
152 /* for waiting on writes to complete */
157 /* protected by arc state mutex */
158 arc_state_t
*b_state
;
159 multilist_node_t b_arc_node
;
161 /* updated atomically */
162 clock_t b_arc_access
;
164 uint32_t b_mru_ghost_hits
;
166 uint32_t b_mfu_ghost_hits
;
169 /* self protecting */
172 arc_callback_t
*b_acb
;
177 * Encrypted blocks will need to be stored encrypted on the L2ARC
178 * disk as they appear in the main pool. In order for this to work we
179 * need to pass around the encryption parameters so they can be used
180 * to write data to the L2ARC. This struct is only defined in the
181 * arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED
184 typedef struct arc_buf_hdr_crypt
{
185 abd_t
*b_rabd
; /* raw encrypted data */
186 dmu_object_type_t b_ot
; /* object type */
187 uint32_t b_ebufcnt
; /* count of encrypted buffers */
189 /* dsobj for looking up encryption key for l2arc encryption */
192 /* encryption parameters */
193 uint8_t b_salt
[ZIO_DATA_SALT_LEN
];
194 uint8_t b_iv
[ZIO_DATA_IV_LEN
];
197 * Technically this could be removed since we will always be able to
198 * get the mac from the bp when we need it. However, it is inconvenient
199 * for callers of arc code to have to pass a bp in all the time. This
200 * also allows us to assert that L2ARC data is properly encrypted to
201 * match the data in the main storage pool.
203 uint8_t b_mac
[ZIO_DATA_MAC_LEN
];
204 } arc_buf_hdr_crypt_t
;
206 typedef struct l2arc_dev
{
207 vdev_t
*l2ad_vdev
; /* vdev */
208 spa_t
*l2ad_spa
; /* spa */
209 uint64_t l2ad_hand
; /* next write location */
210 uint64_t l2ad_start
; /* first addr on device */
211 uint64_t l2ad_end
; /* last addr on device */
212 boolean_t l2ad_first
; /* first sweep through */
213 boolean_t l2ad_writing
; /* currently writing */
214 kmutex_t l2ad_mtx
; /* lock for buffer list */
215 list_t l2ad_buflist
; /* buffer list */
216 list_node_t l2ad_node
; /* device list node */
217 refcount_t l2ad_alloc
; /* allocated bytes */
220 typedef struct l2arc_buf_hdr
{
221 /* protected by arc_buf_hdr mutex */
222 l2arc_dev_t
*b_dev
; /* L2ARC device */
223 uint64_t b_daddr
; /* disk address, offset byte */
226 list_node_t b_l2node
;
229 typedef struct l2arc_write_callback
{
230 l2arc_dev_t
*l2wcb_dev
; /* device info */
231 arc_buf_hdr_t
*l2wcb_head
; /* head of write buflist */
232 } l2arc_write_callback_t
;
235 /* protected by hash lock */
239 arc_buf_contents_t b_type
;
240 arc_buf_hdr_t
*b_hash_next
;
244 * This field stores the size of the data buffer after
245 * compression, and is set in the arc's zio completion handlers.
246 * It is in units of SPA_MINBLOCKSIZE (e.g. 1 == 512 bytes).
248 * While the block pointers can store up to 32MB in their psize
249 * field, we can only store up to 32MB minus 512B. This is due
250 * to the bp using a bias of 1, whereas we use a bias of 0 (i.e.
251 * a field of zeros represents 512B in the bp). We can't use a
252 * bias of 1 since we need to reserve a psize of zero, here, to
253 * represent holes and embedded blocks.
255 * This isn't a problem in practice, since the maximum size of a
256 * buffer is limited to 16MB, so we never need to store 32MB in
257 * this field. Even in the upstream illumos code base, the
258 * maximum size of a buffer is limited to 16MB.
263 * This field stores the size of the data buffer before
264 * compression, and cannot change once set. It is in units
265 * of SPA_MINBLOCKSIZE (e.g. 2 == 1024 bytes)
267 uint16_t b_lsize
; /* immutable */
268 uint64_t b_spa
; /* immutable */
270 /* L2ARC fields. Undefined when not in L2ARC. */
271 l2arc_buf_hdr_t b_l2hdr
;
272 /* L1ARC fields. Undefined when in l2arc_only state */
273 l1arc_buf_hdr_t b_l1hdr
;
275 * Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED
276 * is set and the L1 header exists.
278 arc_buf_hdr_crypt_t b_crypt_hdr
;
284 #endif /* _SYS_ARC_IMPL_H */