]>
Commit | Line | Data |
---|---|---|
59ec819a NB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. | |
23 | * Copyright (c) 2013 by Delphix. All rights reserved. | |
24 | * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. | |
25 | * Copyright 2013 Nexenta Systems, Inc. All rights reserved. | |
26 | */ | |
27 | ||
28 | #ifndef _SYS_ARC_IMPL_H | |
29 | #define _SYS_ARC_IMPL_H | |
30 | ||
31 | #include <sys/arc.h> | |
b5256303 | 32 | #include <sys/zio_crypt.h> |
59ec819a NB |
33 | |
34 | #ifdef __cplusplus | |
35 | extern "C" { | |
36 | #endif | |
37 | ||
38 | /* | |
39 | * Note that buffers can be in one of 6 states: | |
40 | * ARC_anon - anonymous (discussed below) | |
41 | * ARC_mru - recently used, currently cached | |
42 | * ARC_mru_ghost - recentely used, no longer in cache | |
43 | * ARC_mfu - frequently used, currently cached | |
44 | * ARC_mfu_ghost - frequently used, no longer in cache | |
45 | * ARC_l2c_only - exists in L2ARC but not other states | |
46 | * When there are no active references to the buffer, they are | |
47 | * are linked onto a list in one of these arc states. These are | |
48 | * the only buffers that can be evicted or deleted. Within each | |
49 | * state there are multiple lists, one for meta-data and one for | |
50 | * non-meta-data. Meta-data (indirect blocks, blocks of dnodes, | |
51 | * etc.) is tracked separately so that it can be managed more | |
52 | * explicitly: favored over data, limited explicitly. | |
53 | * | |
54 | * Anonymous buffers are buffers that are not associated with | |
55 | * a DVA. These are buffers that hold dirty block copies | |
56 | * before they are written to stable storage. By definition, | |
57 | * they are "ref'd" and are considered part of arc_mru | |
4e33ba4c | 58 | * that cannot be freed. Generally, they will acquire a DVA |
59ec819a NB |
59 | * as they are written and migrate onto the arc_mru list. |
60 | * | |
61 | * The ARC_l2c_only state is for buffers that are in the second | |
62 | * level ARC but no longer in any of the ARC_m* lists. The second | |
63 | * level ARC itself may also contain buffers that are in any of | |
64 | * the ARC_m* states - meaning that a buffer can exist in two | |
65 | * places. The reason for the ARC_l2c_only state is to keep the | |
66 | * buffer header in the hash table, so that reads that hit the | |
67 | * second level ARC benefit from these fast lookups. | |
68 | */ | |
69 | ||
70 | typedef struct arc_state { | |
ca0bf58d PS |
71 | /* |
72 | * list of evictable buffers | |
73 | */ | |
64fc7762 | 74 | multilist_t *arcs_list[ARC_BUFC_NUMTYPES]; |
ca0bf58d PS |
75 | /* |
76 | * total amount of evictable data in this state | |
77 | */ | |
d3c2ae1c | 78 | refcount_t arcs_esize[ARC_BUFC_NUMTYPES]; |
ca0bf58d PS |
79 | /* |
80 | * total amount of data in this state; this includes: evictable, | |
81 | * non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA. | |
82 | */ | |
36da08ef | 83 | refcount_t arcs_size; |
ca0bf58d PS |
84 | /* |
85 | * supports the "dbufs" kstat | |
86 | */ | |
59ec819a NB |
87 | arc_state_type_t arcs_state; |
88 | } arc_state_t; | |
89 | ||
59ec819a NB |
90 | typedef struct arc_callback arc_callback_t; |
91 | ||
92 | struct arc_callback { | |
93 | void *acb_private; | |
b5256303 | 94 | arc_read_done_func_t *acb_done; |
59ec819a | 95 | arc_buf_t *acb_buf; |
b5256303 | 96 | boolean_t acb_encrypted; |
2aa34383 | 97 | boolean_t acb_compressed; |
b5256303 | 98 | boolean_t acb_noauth; |
be9a5c35 | 99 | zbookmark_phys_t acb_zb; |
59ec819a | 100 | zio_t *acb_zio_dummy; |
a8b2e306 | 101 | zio_t *acb_zio_head; |
59ec819a NB |
102 | arc_callback_t *acb_next; |
103 | }; | |
104 | ||
105 | typedef struct arc_write_callback arc_write_callback_t; | |
106 | ||
107 | struct arc_write_callback { | |
b5256303 TC |
108 | void *awcb_private; |
109 | arc_write_done_func_t *awcb_ready; | |
110 | arc_write_done_func_t *awcb_children_ready; | |
111 | arc_write_done_func_t *awcb_physdone; | |
112 | arc_write_done_func_t *awcb_done; | |
113 | arc_buf_t *awcb_buf; | |
59ec819a NB |
114 | }; |
115 | ||
b9541d6b CW |
116 | /* |
117 | * ARC buffers are separated into multiple structs as a memory saving measure: | |
118 | * - Common fields struct, always defined, and embedded within it: | |
119 | * - L2-only fields, always allocated but undefined when not in L2ARC | |
120 | * - L1-only fields, only allocated when in L1ARC | |
121 | * | |
122 | * Buffer in L1 Buffer only in L2 | |
123 | * +------------------------+ +------------------------+ | |
124 | * | arc_buf_hdr_t | | arc_buf_hdr_t | | |
125 | * | | | | | |
126 | * | | | | | |
127 | * | | | | | |
128 | * +------------------------+ +------------------------+ | |
129 | * | l2arc_buf_hdr_t | | l2arc_buf_hdr_t | | |
130 | * | (undefined if L1-only) | | | | |
131 | * +------------------------+ +------------------------+ | |
132 | * | l1arc_buf_hdr_t | | |
133 | * | | | |
134 | * | | | |
135 | * | | | |
136 | * | | | |
137 | * +------------------------+ | |
138 | * | |
139 | * Because it's possible for the L2ARC to become extremely large, we can wind | |
140 | * up eating a lot of memory in L2ARC buffer headers, so the size of a header | |
141 | * is minimized by only allocating the fields necessary for an L1-cached buffer | |
142 | * when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and | |
143 | * l2arc_buf_hdr) are embedded rather than allocated separately to save a couple | |
144 | * words in pointers. arc_hdr_realloc() is used to switch a header between | |
145 | * these two allocation states. | |
146 | */ | |
147 | typedef struct l1arc_buf_hdr { | |
59ec819a | 148 | kmutex_t b_freeze_lock; |
d3c2ae1c | 149 | zio_cksum_t *b_freeze_cksum; |
59ec819a | 150 | |
59ec819a | 151 | arc_buf_t *b_buf; |
d3c2ae1c | 152 | uint32_t b_bufcnt; |
b9541d6b | 153 | /* for waiting on writes to complete */ |
59ec819a | 154 | kcondvar_t b_cv; |
d3c2ae1c | 155 | uint8_t b_byteswap; |
59ec819a | 156 | |
59ec819a NB |
157 | |
158 | /* protected by arc state mutex */ | |
159 | arc_state_t *b_state; | |
ca0bf58d | 160 | multilist_node_t b_arc_node; |
59ec819a NB |
161 | |
162 | /* updated atomically */ | |
163 | clock_t b_arc_access; | |
164 | uint32_t b_mru_hits; | |
165 | uint32_t b_mru_ghost_hits; | |
166 | uint32_t b_mfu_hits; | |
167 | uint32_t b_mfu_ghost_hits; | |
168 | uint32_t b_l2_hits; | |
169 | ||
170 | /* self protecting */ | |
171 | refcount_t b_refcnt; | |
172 | ||
b9541d6b | 173 | arc_callback_t *b_acb; |
a6255b7f | 174 | abd_t *b_pabd; |
b9541d6b | 175 | } l1arc_buf_hdr_t; |
59ec819a | 176 | |
b5256303 TC |
177 | /* |
178 | * Encrypted blocks will need to be stored encrypted on the L2ARC | |
179 | * disk as they appear in the main pool. In order for this to work we | |
180 | * need to pass around the encryption parameters so they can be used | |
181 | * to write data to the L2ARC. This struct is only defined in the | |
182 | * arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED | |
183 | * flag set. | |
184 | */ | |
185 | typedef struct arc_buf_hdr_crypt { | |
186 | abd_t *b_rabd; /* raw encrypted data */ | |
187 | dmu_object_type_t b_ot; /* object type */ | |
188 | uint32_t b_ebufcnt; /* count of encrypted buffers */ | |
189 | ||
190 | /* dsobj for looking up encryption key for l2arc encryption */ | |
191 | uint64_t b_dsobj; | |
192 | ||
193 | /* encryption parameters */ | |
194 | uint8_t b_salt[ZIO_DATA_SALT_LEN]; | |
195 | uint8_t b_iv[ZIO_DATA_IV_LEN]; | |
196 | ||
197 | /* | |
198 | * Technically this could be removed since we will always be able to | |
199 | * get the mac from the bp when we need it. However, it is inconvenient | |
200 | * for callers of arc code to have to pass a bp in all the time. This | |
201 | * also allows us to assert that L2ARC data is properly encrypted to | |
202 | * match the data in the main storage pool. | |
203 | */ | |
204 | uint8_t b_mac[ZIO_DATA_MAC_LEN]; | |
205 | } arc_buf_hdr_crypt_t; | |
206 | ||
59ec819a NB |
207 | typedef struct l2arc_dev { |
208 | vdev_t *l2ad_vdev; /* vdev */ | |
209 | spa_t *l2ad_spa; /* spa */ | |
210 | uint64_t l2ad_hand; /* next write location */ | |
211 | uint64_t l2ad_start; /* first addr on device */ | |
212 | uint64_t l2ad_end; /* last addr on device */ | |
59ec819a NB |
213 | boolean_t l2ad_first; /* first sweep through */ |
214 | boolean_t l2ad_writing; /* currently writing */ | |
b9541d6b CW |
215 | kmutex_t l2ad_mtx; /* lock for buffer list */ |
216 | list_t l2ad_buflist; /* buffer list */ | |
59ec819a | 217 | list_node_t l2ad_node; /* device list node */ |
d962d5da | 218 | refcount_t l2ad_alloc; /* allocated bytes */ |
59ec819a NB |
219 | } l2arc_dev_t; |
220 | ||
b9541d6b CW |
221 | typedef struct l2arc_buf_hdr { |
222 | /* protected by arc_buf_hdr mutex */ | |
223 | l2arc_dev_t *b_dev; /* L2ARC device */ | |
224 | uint64_t b_daddr; /* disk address, offset byte */ | |
b9541d6b | 225 | uint32_t b_hits; |
b9541d6b CW |
226 | |
227 | list_node_t b_l2node; | |
228 | } l2arc_buf_hdr_t; | |
229 | ||
49ee64e5 NB |
230 | typedef struct l2arc_write_callback { |
231 | l2arc_dev_t *l2wcb_dev; /* device info */ | |
232 | arc_buf_hdr_t *l2wcb_head; /* head of write buflist */ | |
233 | } l2arc_write_callback_t; | |
234 | ||
b9541d6b CW |
235 | struct arc_buf_hdr { |
236 | /* protected by hash lock */ | |
237 | dva_t b_dva; | |
238 | uint64_t b_birth; | |
b9541d6b | 239 | |
d3c2ae1c | 240 | arc_buf_contents_t b_type; |
b9541d6b CW |
241 | arc_buf_hdr_t *b_hash_next; |
242 | arc_flags_t b_flags; | |
243 | ||
d3c2ae1c GW |
244 | /* |
245 | * This field stores the size of the data buffer after | |
246 | * compression, and is set in the arc's zio completion handlers. | |
247 | * It is in units of SPA_MINBLOCKSIZE (e.g. 1 == 512 bytes). | |
248 | * | |
249 | * While the block pointers can store up to 32MB in their psize | |
250 | * field, we can only store up to 32MB minus 512B. This is due | |
251 | * to the bp using a bias of 1, whereas we use a bias of 0 (i.e. | |
252 | * a field of zeros represents 512B in the bp). We can't use a | |
253 | * bias of 1 since we need to reserve a psize of zero, here, to | |
254 | * represent holes and embedded blocks. | |
255 | * | |
256 | * This isn't a problem in practice, since the maximum size of a | |
257 | * buffer is limited to 16MB, so we never need to store 32MB in | |
258 | * this field. Even in the upstream illumos code base, the | |
259 | * maximum size of a buffer is limited to 16MB. | |
260 | */ | |
261 | uint16_t b_psize; | |
262 | ||
263 | /* | |
264 | * This field stores the size of the data buffer before | |
265 | * compression, and cannot change once set. It is in units | |
266 | * of SPA_MINBLOCKSIZE (e.g. 2 == 1024 bytes) | |
267 | */ | |
268 | uint16_t b_lsize; /* immutable */ | |
269 | uint64_t b_spa; /* immutable */ | |
b9541d6b CW |
270 | |
271 | /* L2ARC fields. Undefined when not in L2ARC. */ | |
272 | l2arc_buf_hdr_t b_l2hdr; | |
273 | /* L1ARC fields. Undefined when in l2arc_only state */ | |
274 | l1arc_buf_hdr_t b_l1hdr; | |
b5256303 TC |
275 | /* |
276 | * Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED | |
277 | * is set and the L1 header exists. | |
278 | */ | |
279 | arc_buf_hdr_crypt_t b_crypt_hdr; | |
b9541d6b | 280 | }; |
59ec819a NB |
281 | #ifdef __cplusplus |
282 | } | |
283 | #endif | |
284 | ||
285 | #endif /* _SYS_ARC_IMPL_H */ |