]> git.proxmox.com Git - mirror_zfs-debian.git/blame - module/zfs/multilist.c
Merge tag 'upstream/0.7.5'
[mirror_zfs-debian.git] / module / zfs / multilist.c
CommitLineData
e10b0808
AX
1/*
2 * CDDL HEADER START
3 *
4 * This file and its contents are supplied under the terms of the
5 * Common Development and Distribution License ("CDDL"), version 1.0.
6 * You may only use this file in accordance with the terms of version
7 * 1.0 of the CDDL.
8 *
9 * A full copy of the text of the CDDL should have accompanied this
10 * source. A copy of the CDDL is also available via the Internet at
11 * http://www.illumos.org/license/CDDL.
12 *
13 * CDDL HEADER END
14 */
15/*
cae5b340 16 * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
e10b0808
AX
17 */
18
19#include <sys/zfs_context.h>
20#include <sys/multilist.h>
21#include <sys/trace_multilist.h>
22
23/* needed for spa_get_random() */
24#include <sys/spa.h>
25
cae5b340
AX
26/*
27 * This overrides the number of sublists in each multilist_t, which defaults
28 * to the number of CPUs in the system (see multilist_create()).
29 */
30int zfs_multilist_num_sublists = 0;
31
e10b0808
AX
32/*
33 * Given the object contained on the list, return a pointer to the
34 * object's multilist_node_t structure it contains.
35 */
36#ifdef DEBUG
37static multilist_node_t *
38multilist_d2l(multilist_t *ml, void *obj)
39{
40 return ((multilist_node_t *)((char *)obj + ml->ml_offset));
41}
42#endif
43
44/*
45 * Initialize a new mutlilist using the parameters specified.
46 *
47 * - 'size' denotes the size of the structure containing the
48 * multilist_node_t.
49 * - 'offset' denotes the byte offset of the mutlilist_node_t within
50 * the structure that contains it.
51 * - 'num' specifies the number of internal sublists to create.
52 * - 'index_func' is used to determine which sublist to insert into
53 * when the multilist_insert() function is called; as well as which
54 * sublist to remove from when multilist_remove() is called. The
55 * requirements this function must meet, are the following:
56 *
57 * - It must always return the same value when called on the same
58 * object (to ensure the object is removed from the list it was
59 * inserted into).
60 *
61 * - It must return a value in the range [0, number of sublists).
62 * The multilist_get_num_sublists() function may be used to
63 * determine the number of sublists in the multilist.
64 *
65 * Also, in order to reduce internal contention between the sublists
66 * during insertion and removal, this function should choose evenly
67 * between all available sublists when inserting. This isn't a hard
68 * requirement, but a general rule of thumb in order to garner the
69 * best multi-threaded performance out of the data structure.
70 */
cae5b340
AX
71static multilist_t *
72multilist_create_impl(size_t size, size_t offset,
73 unsigned int num, multilist_sublist_index_func_t *index_func)
e10b0808 74{
e10b0808
AX
75 ASSERT3U(size, >, 0);
76 ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
77 ASSERT3U(num, >, 0);
78 ASSERT3P(index_func, !=, NULL);
79
cae5b340 80 multilist_t *ml = kmem_alloc(sizeof (*ml), KM_SLEEP);
e10b0808
AX
81 ml->ml_offset = offset;
82 ml->ml_num_sublists = num;
83 ml->ml_index_func = index_func;
84
85 ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) *
86 ml->ml_num_sublists, KM_SLEEP);
87
88 ASSERT3P(ml->ml_sublists, !=, NULL);
89
cae5b340 90 for (int i = 0; i < ml->ml_num_sublists; i++) {
e10b0808 91 multilist_sublist_t *mls = &ml->ml_sublists[i];
cae5b340 92 mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL);
e10b0808
AX
93 list_create(&mls->mls_list, size, offset);
94 }
cae5b340
AX
95 return (ml);
96}
97
98/*
99 * Allocate a new multilist, using the default number of sublists
100 * (the number of CPUs, or at least 4, or the tunable
101 * zfs_multilist_num_sublists).
102 */
103multilist_t *
104multilist_create(size_t size, size_t offset,
105 multilist_sublist_index_func_t *index_func)
106{
107 int num_sublists;
108
109 if (zfs_multilist_num_sublists > 0) {
110 num_sublists = zfs_multilist_num_sublists;
111 } else {
112 num_sublists = MAX(boot_ncpus, 4);
113 }
114
115 return (multilist_create_impl(size, offset, num_sublists, index_func));
e10b0808
AX
116}
117
118/*
119 * Destroy the given multilist object, and free up any memory it holds.
120 */
121void
122multilist_destroy(multilist_t *ml)
123{
124 int i;
125
126 ASSERT(multilist_is_empty(ml));
127
128 for (i = 0; i < ml->ml_num_sublists; i++) {
129 multilist_sublist_t *mls = &ml->ml_sublists[i];
130
131 ASSERT(list_is_empty(&mls->mls_list));
132
133 list_destroy(&mls->mls_list);
134 mutex_destroy(&mls->mls_lock);
135 }
136
137 ASSERT3P(ml->ml_sublists, !=, NULL);
138 kmem_free(ml->ml_sublists,
139 sizeof (multilist_sublist_t) * ml->ml_num_sublists);
140
141 ml->ml_num_sublists = 0;
142 ml->ml_offset = 0;
cae5b340 143 kmem_free(ml, sizeof (multilist_t));
e10b0808
AX
144}
145
146/*
147 * Insert the given object into the multilist.
148 *
149 * This function will insert the object specified into the sublist
150 * determined using the function given at multilist creation time.
151 *
152 * The sublist locks are automatically acquired if not already held, to
153 * ensure consistency when inserting and removing from multiple threads.
154 */
155void
156multilist_insert(multilist_t *ml, void *obj)
157{
158 unsigned int sublist_idx = ml->ml_index_func(ml, obj);
159 multilist_sublist_t *mls;
160 boolean_t need_lock;
161
162 DTRACE_PROBE3(multilist__insert, multilist_t *, ml,
163 unsigned int, sublist_idx, void *, obj);
164
165 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
166
167 mls = &ml->ml_sublists[sublist_idx];
168
169 /*
170 * Note: Callers may already hold the sublist lock by calling
171 * multilist_sublist_lock(). Here we rely on MUTEX_HELD()
172 * returning TRUE if and only if the current thread holds the
173 * lock. While it's a little ugly to make the lock recursive in
174 * this way, it works and allows the calling code to be much
175 * simpler -- otherwise it would have to pass around a flag
176 * indicating that it already has the lock.
177 */
178 need_lock = !MUTEX_HELD(&mls->mls_lock);
179
180 if (need_lock)
181 mutex_enter(&mls->mls_lock);
182
183 ASSERT(!multilist_link_active(multilist_d2l(ml, obj)));
184
185 multilist_sublist_insert_head(mls, obj);
186
187 if (need_lock)
188 mutex_exit(&mls->mls_lock);
189}
190
191/*
192 * Remove the given object from the multilist.
193 *
194 * This function will remove the object specified from the sublist
195 * determined using the function given at multilist creation time.
196 *
197 * The necessary sublist locks are automatically acquired, to ensure
198 * consistency when inserting and removing from multiple threads.
199 */
200void
201multilist_remove(multilist_t *ml, void *obj)
202{
203 unsigned int sublist_idx = ml->ml_index_func(ml, obj);
204 multilist_sublist_t *mls;
205 boolean_t need_lock;
206
207 DTRACE_PROBE3(multilist__remove, multilist_t *, ml,
208 unsigned int, sublist_idx, void *, obj);
209
210 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
211
212 mls = &ml->ml_sublists[sublist_idx];
213 /* See comment in multilist_insert(). */
214 need_lock = !MUTEX_HELD(&mls->mls_lock);
215
216 if (need_lock)
217 mutex_enter(&mls->mls_lock);
218
219 ASSERT(multilist_link_active(multilist_d2l(ml, obj)));
220
221 multilist_sublist_remove(mls, obj);
222
223 if (need_lock)
224 mutex_exit(&mls->mls_lock);
225}
226
227/*
228 * Check to see if this multilist object is empty.
229 *
230 * This will return TRUE if it finds all of the sublists of this
231 * multilist to be empty, and FALSE otherwise. Each sublist lock will be
232 * automatically acquired as necessary.
233 *
234 * If concurrent insertions and removals are occurring, the semantics
235 * of this function become a little fuzzy. Instead of locking all
236 * sublists for the entire call time of the function, each sublist is
237 * only locked as it is individually checked for emptiness. Thus, it's
238 * possible for this function to return TRUE with non-empty sublists at
239 * the time the function returns. This would be due to another thread
240 * inserting into a given sublist, after that specific sublist was check
241 * and deemed empty, but before all sublists have been checked.
242 */
243int
244multilist_is_empty(multilist_t *ml)
245{
246 int i;
247
248 for (i = 0; i < ml->ml_num_sublists; i++) {
249 multilist_sublist_t *mls = &ml->ml_sublists[i];
250 /* See comment in multilist_insert(). */
251 boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock);
252
253 if (need_lock)
254 mutex_enter(&mls->mls_lock);
255
256 if (!list_is_empty(&mls->mls_list)) {
257 if (need_lock)
258 mutex_exit(&mls->mls_lock);
259
260 return (FALSE);
261 }
262
263 if (need_lock)
264 mutex_exit(&mls->mls_lock);
265 }
266
267 return (TRUE);
268}
269
270/* Return the number of sublists composing this multilist */
271unsigned int
272multilist_get_num_sublists(multilist_t *ml)
273{
274 return (ml->ml_num_sublists);
275}
276
277/* Return a randomly selected, valid sublist index for this multilist */
278unsigned int
279multilist_get_random_index(multilist_t *ml)
280{
281 return (spa_get_random(ml->ml_num_sublists));
282}
283
284/* Lock and return the sublist specified at the given index */
285multilist_sublist_t *
286multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx)
287{
288 multilist_sublist_t *mls;
289
290 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
291 mls = &ml->ml_sublists[sublist_idx];
292 mutex_enter(&mls->mls_lock);
293
294 return (mls);
295}
296
cae5b340
AX
297/* Lock and return the sublist that would be used to store the specified obj */
298multilist_sublist_t *
299multilist_sublist_lock_obj(multilist_t *ml, void *obj)
300{
301 return (multilist_sublist_lock(ml, ml->ml_index_func(ml, obj)));
302}
303
e10b0808
AX
304void
305multilist_sublist_unlock(multilist_sublist_t *mls)
306{
307 mutex_exit(&mls->mls_lock);
308}
309
310/*
311 * We're allowing any object to be inserted into this specific sublist,
312 * but this can lead to trouble if multilist_remove() is called to
313 * remove this object. Specifically, if calling ml_index_func on this
314 * object returns an index for sublist different than what is passed as
315 * a parameter here, any call to multilist_remove() with this newly
316 * inserted object is undefined! (the call to multilist_remove() will
317 * remove the object from a list that it isn't contained in)
318 */
319void
320multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj)
321{
322 ASSERT(MUTEX_HELD(&mls->mls_lock));
323 list_insert_head(&mls->mls_list, obj);
324}
325
326/* please see comment above multilist_sublist_insert_head */
327void
328multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj)
329{
330 ASSERT(MUTEX_HELD(&mls->mls_lock));
331 list_insert_tail(&mls->mls_list, obj);
332}
333
334/*
335 * Move the object one element forward in the list.
336 *
337 * This function will move the given object forward in the list (towards
338 * the head) by one object. So, in essence, it will swap its position in
339 * the list with its "prev" pointer. If the given object is already at the
340 * head of the list, it cannot be moved forward any more than it already
341 * is, so no action is taken.
342 *
343 * NOTE: This function **must not** remove any object from the list other
344 * than the object given as the parameter. This is relied upon in
345 * arc_evict_state_impl().
346 */
347void
348multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj)
349{
350 void *prev = list_prev(&mls->mls_list, obj);
351
352 ASSERT(MUTEX_HELD(&mls->mls_lock));
353 ASSERT(!list_is_empty(&mls->mls_list));
354
355 /* 'obj' must be at the head of the list, nothing to do */
356 if (prev == NULL)
357 return;
358
359 list_remove(&mls->mls_list, obj);
360 list_insert_before(&mls->mls_list, prev, obj);
361}
362
363void
364multilist_sublist_remove(multilist_sublist_t *mls, void *obj)
365{
366 ASSERT(MUTEX_HELD(&mls->mls_lock));
367 list_remove(&mls->mls_list, obj);
368}
369
370void *
371multilist_sublist_head(multilist_sublist_t *mls)
372{
373 ASSERT(MUTEX_HELD(&mls->mls_lock));
374 return (list_head(&mls->mls_list));
375}
376
377void *
378multilist_sublist_tail(multilist_sublist_t *mls)
379{
380 ASSERT(MUTEX_HELD(&mls->mls_lock));
381 return (list_tail(&mls->mls_list));
382}
383
384void *
385multilist_sublist_next(multilist_sublist_t *mls, void *obj)
386{
387 ASSERT(MUTEX_HELD(&mls->mls_lock));
388 return (list_next(&mls->mls_list, obj));
389}
390
391void *
392multilist_sublist_prev(multilist_sublist_t *mls, void *obj)
393{
394 ASSERT(MUTEX_HELD(&mls->mls_lock));
395 return (list_prev(&mls->mls_list, obj));
396}
397
398void
399multilist_link_init(multilist_node_t *link)
400{
401 list_link_init(link);
402}
403
404int
405multilist_link_active(multilist_node_t *link)
406{
407 return (list_link_active(link));
408}
cae5b340
AX
409
410#if defined(_KERNEL) && defined(HAVE_SPL)
411
412/* BEGIN CSTYLED */
413
414module_param(zfs_multilist_num_sublists, int, 0644);
415MODULE_PARM_DESC(zfs_multilist_num_sublists,
416 "Number of sublists used in each multilist");
417
418/* END CSTYLED */
419#endif