]> git.proxmox.com Git - mirror_zfs.git/blame - include/sys/arc_impl.h
ztest: scrub ddt repair
[mirror_zfs.git] / include / sys / arc_impl.h
CommitLineData
59ec819a
NB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013 by Delphix. All rights reserved.
24 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
25 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
26 */
27
28#ifndef _SYS_ARC_IMPL_H
29#define _SYS_ARC_IMPL_H
30
31#include <sys/arc.h>
b5256303 32#include <sys/zio_crypt.h>
59ec819a
NB
33
34#ifdef __cplusplus
35extern "C" {
36#endif
37
38/*
39 * Note that buffers can be in one of 6 states:
40 * ARC_anon - anonymous (discussed below)
41 * ARC_mru - recently used, currently cached
42 * ARC_mru_ghost - recentely used, no longer in cache
43 * ARC_mfu - frequently used, currently cached
44 * ARC_mfu_ghost - frequently used, no longer in cache
45 * ARC_l2c_only - exists in L2ARC but not other states
46 * When there are no active references to the buffer, they are
47 * are linked onto a list in one of these arc states. These are
48 * the only buffers that can be evicted or deleted. Within each
49 * state there are multiple lists, one for meta-data and one for
50 * non-meta-data. Meta-data (indirect blocks, blocks of dnodes,
51 * etc.) is tracked separately so that it can be managed more
52 * explicitly: favored over data, limited explicitly.
53 *
54 * Anonymous buffers are buffers that are not associated with
55 * a DVA. These are buffers that hold dirty block copies
56 * before they are written to stable storage. By definition,
57 * they are "ref'd" and are considered part of arc_mru
4e33ba4c 58 * that cannot be freed. Generally, they will acquire a DVA
59ec819a
NB
59 * as they are written and migrate onto the arc_mru list.
60 *
61 * The ARC_l2c_only state is for buffers that are in the second
62 * level ARC but no longer in any of the ARC_m* lists. The second
63 * level ARC itself may also contain buffers that are in any of
64 * the ARC_m* states - meaning that a buffer can exist in two
65 * places. The reason for the ARC_l2c_only state is to keep the
66 * buffer header in the hash table, so that reads that hit the
67 * second level ARC benefit from these fast lookups.
68 */
69
70typedef struct arc_state {
ca0bf58d
PS
71 /*
72 * list of evictable buffers
73 */
64fc7762 74 multilist_t *arcs_list[ARC_BUFC_NUMTYPES];
ca0bf58d
PS
75 /*
76 * total amount of evictable data in this state
77 */
c13060e4 78 zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES];
ca0bf58d
PS
79 /*
80 * total amount of data in this state; this includes: evictable,
81 * non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA.
82 */
c13060e4 83 zfs_refcount_t arcs_size;
ca0bf58d
PS
84 /*
85 * supports the "dbufs" kstat
86 */
59ec819a
NB
87 arc_state_type_t arcs_state;
88} arc_state_t;
89
59ec819a
NB
90typedef struct arc_callback arc_callback_t;
91
92struct arc_callback {
93 void *acb_private;
b5256303 94 arc_read_done_func_t *acb_done;
59ec819a 95 arc_buf_t *acb_buf;
b5256303 96 boolean_t acb_encrypted;
2aa34383 97 boolean_t acb_compressed;
b5256303 98 boolean_t acb_noauth;
be9a5c35 99 zbookmark_phys_t acb_zb;
59ec819a 100 zio_t *acb_zio_dummy;
a8b2e306 101 zio_t *acb_zio_head;
59ec819a
NB
102 arc_callback_t *acb_next;
103};
104
105typedef struct arc_write_callback arc_write_callback_t;
106
107struct arc_write_callback {
b5256303
TC
108 void *awcb_private;
109 arc_write_done_func_t *awcb_ready;
110 arc_write_done_func_t *awcb_children_ready;
111 arc_write_done_func_t *awcb_physdone;
112 arc_write_done_func_t *awcb_done;
113 arc_buf_t *awcb_buf;
59ec819a
NB
114};
115
b9541d6b
CW
116/*
117 * ARC buffers are separated into multiple structs as a memory saving measure:
118 * - Common fields struct, always defined, and embedded within it:
119 * - L2-only fields, always allocated but undefined when not in L2ARC
120 * - L1-only fields, only allocated when in L1ARC
121 *
122 * Buffer in L1 Buffer only in L2
123 * +------------------------+ +------------------------+
124 * | arc_buf_hdr_t | | arc_buf_hdr_t |
125 * | | | |
126 * | | | |
127 * | | | |
128 * +------------------------+ +------------------------+
129 * | l2arc_buf_hdr_t | | l2arc_buf_hdr_t |
130 * | (undefined if L1-only) | | |
131 * +------------------------+ +------------------------+
132 * | l1arc_buf_hdr_t |
133 * | |
134 * | |
135 * | |
136 * | |
137 * +------------------------+
138 *
139 * Because it's possible for the L2ARC to become extremely large, we can wind
140 * up eating a lot of memory in L2ARC buffer headers, so the size of a header
141 * is minimized by only allocating the fields necessary for an L1-cached buffer
142 * when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and
143 * l2arc_buf_hdr) are embedded rather than allocated separately to save a couple
144 * words in pointers. arc_hdr_realloc() is used to switch a header between
145 * these two allocation states.
146 */
147typedef struct l1arc_buf_hdr {
59ec819a 148 kmutex_t b_freeze_lock;
d3c2ae1c 149 zio_cksum_t *b_freeze_cksum;
59ec819a 150
59ec819a 151 arc_buf_t *b_buf;
d3c2ae1c 152 uint32_t b_bufcnt;
b9541d6b 153 /* for waiting on writes to complete */
59ec819a 154 kcondvar_t b_cv;
d3c2ae1c 155 uint8_t b_byteswap;
59ec819a 156
59ec819a
NB
157
158 /* protected by arc state mutex */
159 arc_state_t *b_state;
ca0bf58d 160 multilist_node_t b_arc_node;
59ec819a
NB
161
162 /* updated atomically */
163 clock_t b_arc_access;
164 uint32_t b_mru_hits;
165 uint32_t b_mru_ghost_hits;
166 uint32_t b_mfu_hits;
167 uint32_t b_mfu_ghost_hits;
168 uint32_t b_l2_hits;
169
170 /* self protecting */
c13060e4 171 zfs_refcount_t b_refcnt;
59ec819a 172
b9541d6b 173 arc_callback_t *b_acb;
a6255b7f 174 abd_t *b_pabd;
b9541d6b 175} l1arc_buf_hdr_t;
59ec819a 176
b5256303
TC
177/*
178 * Encrypted blocks will need to be stored encrypted on the L2ARC
179 * disk as they appear in the main pool. In order for this to work we
180 * need to pass around the encryption parameters so they can be used
181 * to write data to the L2ARC. This struct is only defined in the
182 * arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED
183 * flag set.
184 */
185typedef struct arc_buf_hdr_crypt {
186 abd_t *b_rabd; /* raw encrypted data */
187 dmu_object_type_t b_ot; /* object type */
188 uint32_t b_ebufcnt; /* count of encrypted buffers */
189
190 /* dsobj for looking up encryption key for l2arc encryption */
191 uint64_t b_dsobj;
192
193 /* encryption parameters */
194 uint8_t b_salt[ZIO_DATA_SALT_LEN];
195 uint8_t b_iv[ZIO_DATA_IV_LEN];
196
197 /*
198 * Technically this could be removed since we will always be able to
199 * get the mac from the bp when we need it. However, it is inconvenient
200 * for callers of arc code to have to pass a bp in all the time. This
201 * also allows us to assert that L2ARC data is properly encrypted to
202 * match the data in the main storage pool.
203 */
204 uint8_t b_mac[ZIO_DATA_MAC_LEN];
205} arc_buf_hdr_crypt_t;
206
59ec819a
NB
207typedef struct l2arc_dev {
208 vdev_t *l2ad_vdev; /* vdev */
209 spa_t *l2ad_spa; /* spa */
210 uint64_t l2ad_hand; /* next write location */
211 uint64_t l2ad_start; /* first addr on device */
212 uint64_t l2ad_end; /* last addr on device */
59ec819a
NB
213 boolean_t l2ad_first; /* first sweep through */
214 boolean_t l2ad_writing; /* currently writing */
b9541d6b
CW
215 kmutex_t l2ad_mtx; /* lock for buffer list */
216 list_t l2ad_buflist; /* buffer list */
59ec819a 217 list_node_t l2ad_node; /* device list node */
c13060e4 218 zfs_refcount_t l2ad_alloc; /* allocated bytes */
59ec819a
NB
219} l2arc_dev_t;
220
b9541d6b
CW
221typedef struct l2arc_buf_hdr {
222 /* protected by arc_buf_hdr mutex */
223 l2arc_dev_t *b_dev; /* L2ARC device */
224 uint64_t b_daddr; /* disk address, offset byte */
b9541d6b 225 uint32_t b_hits;
b9541d6b
CW
226
227 list_node_t b_l2node;
228} l2arc_buf_hdr_t;
229
49ee64e5
NB
230typedef struct l2arc_write_callback {
231 l2arc_dev_t *l2wcb_dev; /* device info */
232 arc_buf_hdr_t *l2wcb_head; /* head of write buflist */
233} l2arc_write_callback_t;
234
b9541d6b
CW
235struct arc_buf_hdr {
236 /* protected by hash lock */
237 dva_t b_dva;
238 uint64_t b_birth;
b9541d6b 239
d3c2ae1c 240 arc_buf_contents_t b_type;
b9541d6b
CW
241 arc_buf_hdr_t *b_hash_next;
242 arc_flags_t b_flags;
243
d3c2ae1c
GW
244 /*
245 * This field stores the size of the data buffer after
246 * compression, and is set in the arc's zio completion handlers.
247 * It is in units of SPA_MINBLOCKSIZE (e.g. 1 == 512 bytes).
248 *
249 * While the block pointers can store up to 32MB in their psize
250 * field, we can only store up to 32MB minus 512B. This is due
251 * to the bp using a bias of 1, whereas we use a bias of 0 (i.e.
252 * a field of zeros represents 512B in the bp). We can't use a
253 * bias of 1 since we need to reserve a psize of zero, here, to
254 * represent holes and embedded blocks.
255 *
256 * This isn't a problem in practice, since the maximum size of a
257 * buffer is limited to 16MB, so we never need to store 32MB in
258 * this field. Even in the upstream illumos code base, the
259 * maximum size of a buffer is limited to 16MB.
260 */
261 uint16_t b_psize;
262
263 /*
264 * This field stores the size of the data buffer before
265 * compression, and cannot change once set. It is in units
266 * of SPA_MINBLOCKSIZE (e.g. 2 == 1024 bytes)
267 */
268 uint16_t b_lsize; /* immutable */
269 uint64_t b_spa; /* immutable */
b9541d6b
CW
270
271 /* L2ARC fields. Undefined when not in L2ARC. */
272 l2arc_buf_hdr_t b_l2hdr;
273 /* L1ARC fields. Undefined when in l2arc_only state */
274 l1arc_buf_hdr_t b_l1hdr;
b5256303
TC
275 /*
276 * Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED
277 * is set and the L1 header exists.
278 */
279 arc_buf_hdr_crypt_t b_crypt_hdr;
b9541d6b 280};
59ec819a
NB
281#ifdef __cplusplus
282}
283#endif
284
285#endif /* _SYS_ARC_IMPL_H */