]> git.proxmox.com Git - mirror_zfs.git/blame - include/sys/arc_impl.h
OpenZFS 6513 - partially filled holes lose birth time
[mirror_zfs.git] / include / sys / arc_impl.h
CommitLineData
59ec819a
NB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013 by Delphix. All rights reserved.
24 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
25 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
26 */
27
28#ifndef _SYS_ARC_IMPL_H
29#define _SYS_ARC_IMPL_H
30
31#include <sys/arc.h>
32
33#ifdef __cplusplus
34extern "C" {
35#endif
36
37/*
38 * Note that buffers can be in one of 6 states:
39 * ARC_anon - anonymous (discussed below)
40 * ARC_mru - recently used, currently cached
41 * ARC_mru_ghost - recentely used, no longer in cache
42 * ARC_mfu - frequently used, currently cached
43 * ARC_mfu_ghost - frequently used, no longer in cache
44 * ARC_l2c_only - exists in L2ARC but not other states
45 * When there are no active references to the buffer, they are
46 * are linked onto a list in one of these arc states. These are
47 * the only buffers that can be evicted or deleted. Within each
48 * state there are multiple lists, one for meta-data and one for
49 * non-meta-data. Meta-data (indirect blocks, blocks of dnodes,
50 * etc.) is tracked separately so that it can be managed more
51 * explicitly: favored over data, limited explicitly.
52 *
53 * Anonymous buffers are buffers that are not associated with
54 * a DVA. These are buffers that hold dirty block copies
55 * before they are written to stable storage. By definition,
56 * they are "ref'd" and are considered part of arc_mru
57 * that cannot be freed. Generally, they will aquire a DVA
58 * as they are written and migrate onto the arc_mru list.
59 *
60 * The ARC_l2c_only state is for buffers that are in the second
61 * level ARC but no longer in any of the ARC_m* lists. The second
62 * level ARC itself may also contain buffers that are in any of
63 * the ARC_m* states - meaning that a buffer can exist in two
64 * places. The reason for the ARC_l2c_only state is to keep the
65 * buffer header in the hash table, so that reads that hit the
66 * second level ARC benefit from these fast lookups.
67 */
68
69typedef struct arc_state {
ca0bf58d
PS
70 /*
71 * list of evictable buffers
72 */
73 multilist_t arcs_list[ARC_BUFC_NUMTYPES];
74 /*
75 * total amount of evictable data in this state
76 */
77 uint64_t arcs_lsize[ARC_BUFC_NUMTYPES];
78 /*
79 * total amount of data in this state; this includes: evictable,
80 * non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA.
81 */
36da08ef 82 refcount_t arcs_size;
ca0bf58d
PS
83 /*
84 * supports the "dbufs" kstat
85 */
59ec819a
NB
86 arc_state_type_t arcs_state;
87} arc_state_t;
88
59ec819a
NB
89typedef struct arc_callback arc_callback_t;
90
91struct arc_callback {
92 void *acb_private;
93 arc_done_func_t *acb_done;
94 arc_buf_t *acb_buf;
95 zio_t *acb_zio_dummy;
96 arc_callback_t *acb_next;
97};
98
99typedef struct arc_write_callback arc_write_callback_t;
100
101struct arc_write_callback {
102 void *awcb_private;
103 arc_done_func_t *awcb_ready;
bc77ba73 104 arc_done_func_t *awcb_children_ready;
59ec819a
NB
105 arc_done_func_t *awcb_physdone;
106 arc_done_func_t *awcb_done;
107 arc_buf_t *awcb_buf;
108};
109
b9541d6b
CW
110/*
111 * ARC buffers are separated into multiple structs as a memory saving measure:
112 * - Common fields struct, always defined, and embedded within it:
113 * - L2-only fields, always allocated but undefined when not in L2ARC
114 * - L1-only fields, only allocated when in L1ARC
115 *
116 * Buffer in L1 Buffer only in L2
117 * +------------------------+ +------------------------+
118 * | arc_buf_hdr_t | | arc_buf_hdr_t |
119 * | | | |
120 * | | | |
121 * | | | |
122 * +------------------------+ +------------------------+
123 * | l2arc_buf_hdr_t | | l2arc_buf_hdr_t |
124 * | (undefined if L1-only) | | |
125 * +------------------------+ +------------------------+
126 * | l1arc_buf_hdr_t |
127 * | |
128 * | |
129 * | |
130 * | |
131 * +------------------------+
132 *
133 * Because it's possible for the L2ARC to become extremely large, we can wind
134 * up eating a lot of memory in L2ARC buffer headers, so the size of a header
135 * is minimized by only allocating the fields necessary for an L1-cached buffer
136 * when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and
137 * l2arc_buf_hdr) are embedded rather than allocated separately to save a couple
138 * words in pointers. arc_hdr_realloc() is used to switch a header between
139 * these two allocation states.
140 */
141typedef struct l1arc_buf_hdr {
59ec819a 142 kmutex_t b_freeze_lock;
59ec819a 143
59ec819a 144 arc_buf_t *b_buf;
59ec819a 145 uint32_t b_datacnt;
b9541d6b 146 /* for waiting on writes to complete */
59ec819a
NB
147 kcondvar_t b_cv;
148
59ec819a
NB
149
150 /* protected by arc state mutex */
151 arc_state_t *b_state;
ca0bf58d 152 multilist_node_t b_arc_node;
59ec819a
NB
153
154 /* updated atomically */
155 clock_t b_arc_access;
156 uint32_t b_mru_hits;
157 uint32_t b_mru_ghost_hits;
158 uint32_t b_mfu_hits;
159 uint32_t b_mfu_ghost_hits;
160 uint32_t b_l2_hits;
161
162 /* self protecting */
163 refcount_t b_refcnt;
164
b9541d6b
CW
165 arc_callback_t *b_acb;
166 /* temporary buffer holder for in-flight compressed data */
167 void *b_tmp_cdata;
168} l1arc_buf_hdr_t;
59ec819a
NB
169
170typedef struct l2arc_dev {
171 vdev_t *l2ad_vdev; /* vdev */
172 spa_t *l2ad_spa; /* spa */
173 uint64_t l2ad_hand; /* next write location */
174 uint64_t l2ad_start; /* first addr on device */
175 uint64_t l2ad_end; /* last addr on device */
59ec819a
NB
176 boolean_t l2ad_first; /* first sweep through */
177 boolean_t l2ad_writing; /* currently writing */
b9541d6b
CW
178 kmutex_t l2ad_mtx; /* lock for buffer list */
179 list_t l2ad_buflist; /* buffer list */
59ec819a 180 list_node_t l2ad_node; /* device list node */
d962d5da 181 refcount_t l2ad_alloc; /* allocated bytes */
59ec819a
NB
182} l2arc_dev_t;
183
b9541d6b
CW
184typedef struct l2arc_buf_hdr {
185 /* protected by arc_buf_hdr mutex */
186 l2arc_dev_t *b_dev; /* L2ARC device */
187 uint64_t b_daddr; /* disk address, offset byte */
188 /* real alloc'd buffer size depending on b_compress applied */
189 uint32_t b_hits;
190 int32_t b_asize;
4e0f33ff 191 uint8_t b_compress;
b9541d6b
CW
192
193 list_node_t b_l2node;
194} l2arc_buf_hdr_t;
195
49ee64e5
NB
196typedef struct l2arc_write_callback {
197 l2arc_dev_t *l2wcb_dev; /* device info */
198 arc_buf_hdr_t *l2wcb_head; /* head of write buflist */
199} l2arc_write_callback_t;
200
b9541d6b
CW
201struct arc_buf_hdr {
202 /* protected by hash lock */
203 dva_t b_dva;
204 uint64_t b_birth;
205 /*
206 * Even though this checksum is only set/verified when a buffer is in
207 * the L1 cache, it needs to be in the set of common fields because it
208 * must be preserved from the time before a buffer is written out to
209 * L2ARC until after it is read back in.
210 */
211 zio_cksum_t *b_freeze_cksum;
212
213 arc_buf_hdr_t *b_hash_next;
214 arc_flags_t b_flags;
215
216 /* immutable */
217 int32_t b_size;
218 uint64_t b_spa;
219
220 /* L2ARC fields. Undefined when not in L2ARC. */
221 l2arc_buf_hdr_t b_l2hdr;
222 /* L1ARC fields. Undefined when in l2arc_only state */
223 l1arc_buf_hdr_t b_l1hdr;
224};
59ec819a
NB
225#ifdef __cplusplus
226}
227#endif
228
229#endif /* _SYS_ARC_IMPL_H */