]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/multilist.c
Limit the amount of dnode metadata in the ARC
[mirror_zfs.git] / module / zfs / multilist.c
1 /*
2 * CDDL HEADER START
3 *
4 * This file and its contents are supplied under the terms of the
5 * Common Development and Distribution License ("CDDL"), version 1.0.
6 * You may only use this file in accordance with the terms of version
7 * 1.0 of the CDDL.
8 *
9 * A full copy of the text of the CDDL should have accompanied this
10 * source. A copy of the CDDL is also available via the Internet at
11 * http://www.illumos.org/license/CDDL.
12 *
13 * CDDL HEADER END
14 */
15 /*
16 * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
17 */
18
19 #include <sys/zfs_context.h>
20 #include <sys/multilist.h>
21 #include <sys/trace_multilist.h>
22
23 /* needed for spa_get_random() */
24 #include <sys/spa.h>
25
26 /*
27 * Given the object contained on the list, return a pointer to the
28 * object's multilist_node_t structure it contains.
29 */
30 #ifdef DEBUG
31 static multilist_node_t *
32 multilist_d2l(multilist_t *ml, void *obj)
33 {
34 return ((multilist_node_t *)((char *)obj + ml->ml_offset));
35 }
36 #endif
37
38 /*
39 * Initialize a new mutlilist using the parameters specified.
40 *
41 * - 'size' denotes the size of the structure containing the
42 * multilist_node_t.
43 * - 'offset' denotes the byte offset of the mutlilist_node_t within
44 * the structure that contains it.
45 * - 'num' specifies the number of internal sublists to create.
46 * - 'index_func' is used to determine which sublist to insert into
47 * when the multilist_insert() function is called; as well as which
48 * sublist to remove from when multilist_remove() is called. The
49 * requirements this function must meet, are the following:
50 *
51 * - It must always return the same value when called on the same
52 * object (to ensure the object is removed from the list it was
53 * inserted into).
54 *
55 * - It must return a value in the range [0, number of sublists).
56 * The multilist_get_num_sublists() function may be used to
57 * determine the number of sublists in the multilist.
58 *
59 * Also, in order to reduce internal contention between the sublists
60 * during insertion and removal, this function should choose evenly
61 * between all available sublists when inserting. This isn't a hard
62 * requirement, but a general rule of thumb in order to garner the
63 * best multi-threaded performance out of the data structure.
64 */
65 void
66 multilist_create(multilist_t *ml, size_t size, size_t offset, unsigned int num,
67 multilist_sublist_index_func_t *index_func)
68 {
69 int i;
70
71 ASSERT3P(ml, !=, NULL);
72 ASSERT3U(size, >, 0);
73 ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
74 ASSERT3U(num, >, 0);
75 ASSERT3P(index_func, !=, NULL);
76
77 ml->ml_offset = offset;
78 ml->ml_num_sublists = num;
79 ml->ml_index_func = index_func;
80
81 ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) *
82 ml->ml_num_sublists, KM_SLEEP);
83
84 ASSERT3P(ml->ml_sublists, !=, NULL);
85
86 for (i = 0; i < ml->ml_num_sublists; i++) {
87 multilist_sublist_t *mls = &ml->ml_sublists[i];
88 mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL);
89 list_create(&mls->mls_list, size, offset);
90 }
91 }
92
93 /*
94 * Destroy the given multilist object, and free up any memory it holds.
95 */
96 void
97 multilist_destroy(multilist_t *ml)
98 {
99 int i;
100
101 ASSERT(multilist_is_empty(ml));
102
103 for (i = 0; i < ml->ml_num_sublists; i++) {
104 multilist_sublist_t *mls = &ml->ml_sublists[i];
105
106 ASSERT(list_is_empty(&mls->mls_list));
107
108 list_destroy(&mls->mls_list);
109 mutex_destroy(&mls->mls_lock);
110 }
111
112 ASSERT3P(ml->ml_sublists, !=, NULL);
113 kmem_free(ml->ml_sublists,
114 sizeof (multilist_sublist_t) * ml->ml_num_sublists);
115
116 ml->ml_num_sublists = 0;
117 ml->ml_offset = 0;
118 }
119
120 /*
121 * Insert the given object into the multilist.
122 *
123 * This function will insert the object specified into the sublist
124 * determined using the function given at multilist creation time.
125 *
126 * The sublist locks are automatically acquired if not already held, to
127 * ensure consistency when inserting and removing from multiple threads.
128 */
129 void
130 multilist_insert(multilist_t *ml, void *obj)
131 {
132 unsigned int sublist_idx = ml->ml_index_func(ml, obj);
133 multilist_sublist_t *mls;
134 boolean_t need_lock;
135
136 DTRACE_PROBE3(multilist__insert, multilist_t *, ml,
137 unsigned int, sublist_idx, void *, obj);
138
139 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
140
141 mls = &ml->ml_sublists[sublist_idx];
142
143 /*
144 * Note: Callers may already hold the sublist lock by calling
145 * multilist_sublist_lock(). Here we rely on MUTEX_HELD()
146 * returning TRUE if and only if the current thread holds the
147 * lock. While it's a little ugly to make the lock recursive in
148 * this way, it works and allows the calling code to be much
149 * simpler -- otherwise it would have to pass around a flag
150 * indicating that it already has the lock.
151 */
152 need_lock = !MUTEX_HELD(&mls->mls_lock);
153
154 if (need_lock)
155 mutex_enter(&mls->mls_lock);
156
157 ASSERT(!multilist_link_active(multilist_d2l(ml, obj)));
158
159 multilist_sublist_insert_head(mls, obj);
160
161 if (need_lock)
162 mutex_exit(&mls->mls_lock);
163 }
164
165 /*
166 * Remove the given object from the multilist.
167 *
168 * This function will remove the object specified from the sublist
169 * determined using the function given at multilist creation time.
170 *
171 * The necessary sublist locks are automatically acquired, to ensure
172 * consistency when inserting and removing from multiple threads.
173 */
174 void
175 multilist_remove(multilist_t *ml, void *obj)
176 {
177 unsigned int sublist_idx = ml->ml_index_func(ml, obj);
178 multilist_sublist_t *mls;
179 boolean_t need_lock;
180
181 DTRACE_PROBE3(multilist__remove, multilist_t *, ml,
182 unsigned int, sublist_idx, void *, obj);
183
184 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
185
186 mls = &ml->ml_sublists[sublist_idx];
187 /* See comment in multilist_insert(). */
188 need_lock = !MUTEX_HELD(&mls->mls_lock);
189
190 if (need_lock)
191 mutex_enter(&mls->mls_lock);
192
193 ASSERT(multilist_link_active(multilist_d2l(ml, obj)));
194
195 multilist_sublist_remove(mls, obj);
196
197 if (need_lock)
198 mutex_exit(&mls->mls_lock);
199 }
200
201 /*
202 * Check to see if this multilist object is empty.
203 *
204 * This will return TRUE if it finds all of the sublists of this
205 * multilist to be empty, and FALSE otherwise. Each sublist lock will be
206 * automatically acquired as necessary.
207 *
208 * If concurrent insertions and removals are occurring, the semantics
209 * of this function become a little fuzzy. Instead of locking all
210 * sublists for the entire call time of the function, each sublist is
211 * only locked as it is individually checked for emptiness. Thus, it's
212 * possible for this function to return TRUE with non-empty sublists at
213 * the time the function returns. This would be due to another thread
214 * inserting into a given sublist, after that specific sublist was check
215 * and deemed empty, but before all sublists have been checked.
216 */
217 int
218 multilist_is_empty(multilist_t *ml)
219 {
220 int i;
221
222 for (i = 0; i < ml->ml_num_sublists; i++) {
223 multilist_sublist_t *mls = &ml->ml_sublists[i];
224 /* See comment in multilist_insert(). */
225 boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock);
226
227 if (need_lock)
228 mutex_enter(&mls->mls_lock);
229
230 if (!list_is_empty(&mls->mls_list)) {
231 if (need_lock)
232 mutex_exit(&mls->mls_lock);
233
234 return (FALSE);
235 }
236
237 if (need_lock)
238 mutex_exit(&mls->mls_lock);
239 }
240
241 return (TRUE);
242 }
243
244 /* Return the number of sublists composing this multilist */
245 unsigned int
246 multilist_get_num_sublists(multilist_t *ml)
247 {
248 return (ml->ml_num_sublists);
249 }
250
251 /* Return a randomly selected, valid sublist index for this multilist */
252 unsigned int
253 multilist_get_random_index(multilist_t *ml)
254 {
255 return (spa_get_random(ml->ml_num_sublists));
256 }
257
258 /* Lock and return the sublist specified at the given index */
259 multilist_sublist_t *
260 multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx)
261 {
262 multilist_sublist_t *mls;
263
264 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
265 mls = &ml->ml_sublists[sublist_idx];
266 mutex_enter(&mls->mls_lock);
267
268 return (mls);
269 }
270
271 void
272 multilist_sublist_unlock(multilist_sublist_t *mls)
273 {
274 mutex_exit(&mls->mls_lock);
275 }
276
277 /*
278 * We're allowing any object to be inserted into this specific sublist,
279 * but this can lead to trouble if multilist_remove() is called to
280 * remove this object. Specifically, if calling ml_index_func on this
281 * object returns an index for sublist different than what is passed as
282 * a parameter here, any call to multilist_remove() with this newly
283 * inserted object is undefined! (the call to multilist_remove() will
284 * remove the object from a list that it isn't contained in)
285 */
286 void
287 multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj)
288 {
289 ASSERT(MUTEX_HELD(&mls->mls_lock));
290 list_insert_head(&mls->mls_list, obj);
291 }
292
293 /* please see comment above multilist_sublist_insert_head */
294 void
295 multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj)
296 {
297 ASSERT(MUTEX_HELD(&mls->mls_lock));
298 list_insert_tail(&mls->mls_list, obj);
299 }
300
301 /*
302 * Move the object one element forward in the list.
303 *
304 * This function will move the given object forward in the list (towards
305 * the head) by one object. So, in essence, it will swap its position in
306 * the list with its "prev" pointer. If the given object is already at the
307 * head of the list, it cannot be moved forward any more than it already
308 * is, so no action is taken.
309 *
310 * NOTE: This function **must not** remove any object from the list other
311 * than the object given as the parameter. This is relied upon in
312 * arc_evict_state_impl().
313 */
314 void
315 multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj)
316 {
317 void *prev = list_prev(&mls->mls_list, obj);
318
319 ASSERT(MUTEX_HELD(&mls->mls_lock));
320 ASSERT(!list_is_empty(&mls->mls_list));
321
322 /* 'obj' must be at the head of the list, nothing to do */
323 if (prev == NULL)
324 return;
325
326 list_remove(&mls->mls_list, obj);
327 list_insert_before(&mls->mls_list, prev, obj);
328 }
329
330 void
331 multilist_sublist_remove(multilist_sublist_t *mls, void *obj)
332 {
333 ASSERT(MUTEX_HELD(&mls->mls_lock));
334 list_remove(&mls->mls_list, obj);
335 }
336
337 void *
338 multilist_sublist_head(multilist_sublist_t *mls)
339 {
340 ASSERT(MUTEX_HELD(&mls->mls_lock));
341 return (list_head(&mls->mls_list));
342 }
343
344 void *
345 multilist_sublist_tail(multilist_sublist_t *mls)
346 {
347 ASSERT(MUTEX_HELD(&mls->mls_lock));
348 return (list_tail(&mls->mls_list));
349 }
350
351 void *
352 multilist_sublist_next(multilist_sublist_t *mls, void *obj)
353 {
354 ASSERT(MUTEX_HELD(&mls->mls_lock));
355 return (list_next(&mls->mls_list, obj));
356 }
357
358 void *
359 multilist_sublist_prev(multilist_sublist_t *mls, void *obj)
360 {
361 ASSERT(MUTEX_HELD(&mls->mls_lock));
362 return (list_prev(&mls->mls_list, obj));
363 }
364
365 void
366 multilist_link_init(multilist_node_t *link)
367 {
368 list_link_init(link);
369 }
370
371 int
372 multilist_link_active(multilist_node_t *link)
373 {
374 return (list_link_active(link));
375 }