]>
Commit | Line | Data |
---|---|---|
ca0bf58d PS |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * This file and its contents are supplied under the terms of the | |
5 | * Common Development and Distribution License ("CDDL"), version 1.0. | |
6 | * You may only use this file in accordance with the terms of version | |
7 | * 1.0 of the CDDL. | |
8 | * | |
9 | * A full copy of the text of the CDDL should have accompanied this | |
10 | * source. A copy of the CDDL is also available via the Internet at | |
11 | * http://www.illumos.org/license/CDDL. | |
12 | * | |
13 | * CDDL HEADER END | |
14 | */ | |
15 | /* | |
c30e58c4 | 16 | * Copyright (c) 2013, 2017 by Delphix. All rights reserved. |
ca0bf58d PS |
17 | */ |
18 | ||
19 | #include <sys/zfs_context.h> | |
20 | #include <sys/multilist.h> | |
e5d1c27e | 21 | #include <sys/trace_zfs.h> |
ca0bf58d | 22 | |
c30e58c4 MA |
23 | /* |
24 | * This overrides the number of sublists in each multilist_t, which defaults | |
25 | * to the number of CPUs in the system (see multilist_create()). | |
26 | */ | |
27 | int zfs_multilist_num_sublists = 0; | |
28 | ||
ca0bf58d PS |
29 | /* |
30 | * Given the object contained on the list, return a pointer to the | |
31 | * object's multilist_node_t structure it contains. | |
32 | */ | |
6d8da841 | 33 | #ifdef ZFS_DEBUG |
ca0bf58d PS |
34 | static multilist_node_t * |
35 | multilist_d2l(multilist_t *ml, void *obj) | |
36 | { | |
37 | return ((multilist_node_t *)((char *)obj + ml->ml_offset)); | |
38 | } | |
2c1988e9 AZ |
39 | #else |
40 | #define multilist_d2l(ml, obj) ((void) sizeof (ml), (void) sizeof (obj), NULL) | |
ca0bf58d PS |
41 | #endif |
42 | ||
43 | /* | |
44 | * Initialize a new mutlilist using the parameters specified. | |
45 | * | |
46 | * - 'size' denotes the size of the structure containing the | |
47 | * multilist_node_t. | |
48 | * - 'offset' denotes the byte offset of the mutlilist_node_t within | |
49 | * the structure that contains it. | |
50 | * - 'num' specifies the number of internal sublists to create. | |
51 | * - 'index_func' is used to determine which sublist to insert into | |
52 | * when the multilist_insert() function is called; as well as which | |
53 | * sublist to remove from when multilist_remove() is called. The | |
54 | * requirements this function must meet, are the following: | |
55 | * | |
56 | * - It must always return the same value when called on the same | |
57 | * object (to ensure the object is removed from the list it was | |
58 | * inserted into). | |
59 | * | |
60 | * - It must return a value in the range [0, number of sublists). | |
61 | * The multilist_get_num_sublists() function may be used to | |
62 | * determine the number of sublists in the multilist. | |
63 | * | |
64 | * Also, in order to reduce internal contention between the sublists | |
65 | * during insertion and removal, this function should choose evenly | |
66 | * between all available sublists when inserting. This isn't a hard | |
67 | * requirement, but a general rule of thumb in order to garner the | |
68 | * best multi-threaded performance out of the data structure. | |
69 | */ | |
ffdf019c AM |
70 | static void |
71 | multilist_create_impl(multilist_t *ml, size_t size, size_t offset, | |
c30e58c4 | 72 | unsigned int num, multilist_sublist_index_func_t *index_func) |
ca0bf58d | 73 | { |
ca0bf58d PS |
74 | ASSERT3U(size, >, 0); |
75 | ASSERT3U(size, >=, offset + sizeof (multilist_node_t)); | |
76 | ASSERT3U(num, >, 0); | |
77 | ASSERT3P(index_func, !=, NULL); | |
78 | ||
79 | ml->ml_offset = offset; | |
80 | ml->ml_num_sublists = num; | |
81 | ml->ml_index_func = index_func; | |
82 | ||
83 | ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) * | |
84 | ml->ml_num_sublists, KM_SLEEP); | |
85 | ||
86 | ASSERT3P(ml->ml_sublists, !=, NULL); | |
87 | ||
64fc7762 | 88 | for (int i = 0; i < ml->ml_num_sublists; i++) { |
ca0bf58d | 89 | multilist_sublist_t *mls = &ml->ml_sublists[i]; |
448d7aaa | 90 | mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL); |
ca0bf58d PS |
91 | list_create(&mls->mls_list, size, offset); |
92 | } | |
93 | } | |
94 | ||
c30e58c4 | 95 | /* |
60a4c7d2 PD |
96 | * Allocate a new multilist, using the default number of sublists (the number |
97 | * of CPUs, or at least 4, or the tunable zfs_multilist_num_sublists). Note | |
98 | * that the multilists do not expand if more CPUs are hot-added. In that case, | |
99 | * we will have less fanout than boot_ncpus, but we don't want to always | |
100 | * reserve the RAM necessary to create the extra slots for additional CPUs up | |
101 | * front, and dynamically adding them is a complex task. | |
c30e58c4 | 102 | */ |
ffdf019c AM |
103 | void |
104 | multilist_create(multilist_t *ml, size_t size, size_t offset, | |
c30e58c4 MA |
105 | multilist_sublist_index_func_t *index_func) |
106 | { | |
107 | int num_sublists; | |
108 | ||
109 | if (zfs_multilist_num_sublists > 0) { | |
110 | num_sublists = zfs_multilist_num_sublists; | |
111 | } else { | |
112 | num_sublists = MAX(boot_ncpus, 4); | |
113 | } | |
114 | ||
ffdf019c | 115 | multilist_create_impl(ml, size, offset, num_sublists, index_func); |
c30e58c4 MA |
116 | } |
117 | ||
ca0bf58d PS |
118 | /* |
119 | * Destroy the given multilist object, and free up any memory it holds. | |
120 | */ | |
121 | void | |
122 | multilist_destroy(multilist_t *ml) | |
123 | { | |
ca0bf58d PS |
124 | ASSERT(multilist_is_empty(ml)); |
125 | ||
1c27024e | 126 | for (int i = 0; i < ml->ml_num_sublists; i++) { |
ca0bf58d PS |
127 | multilist_sublist_t *mls = &ml->ml_sublists[i]; |
128 | ||
129 | ASSERT(list_is_empty(&mls->mls_list)); | |
130 | ||
131 | list_destroy(&mls->mls_list); | |
132 | mutex_destroy(&mls->mls_lock); | |
133 | } | |
134 | ||
135 | ASSERT3P(ml->ml_sublists, !=, NULL); | |
136 | kmem_free(ml->ml_sublists, | |
137 | sizeof (multilist_sublist_t) * ml->ml_num_sublists); | |
138 | ||
139 | ml->ml_num_sublists = 0; | |
140 | ml->ml_offset = 0; | |
ffdf019c | 141 | ml->ml_sublists = NULL; |
ca0bf58d PS |
142 | } |
143 | ||
144 | /* | |
145 | * Insert the given object into the multilist. | |
146 | * | |
147 | * This function will insert the object specified into the sublist | |
148 | * determined using the function given at multilist creation time. | |
149 | * | |
150 | * The sublist locks are automatically acquired if not already held, to | |
151 | * ensure consistency when inserting and removing from multiple threads. | |
152 | */ | |
153 | void | |
154 | multilist_insert(multilist_t *ml, void *obj) | |
155 | { | |
156 | unsigned int sublist_idx = ml->ml_index_func(ml, obj); | |
157 | multilist_sublist_t *mls; | |
158 | boolean_t need_lock; | |
159 | ||
160 | DTRACE_PROBE3(multilist__insert, multilist_t *, ml, | |
161 | unsigned int, sublist_idx, void *, obj); | |
162 | ||
163 | ASSERT3U(sublist_idx, <, ml->ml_num_sublists); | |
164 | ||
165 | mls = &ml->ml_sublists[sublist_idx]; | |
166 | ||
167 | /* | |
168 | * Note: Callers may already hold the sublist lock by calling | |
169 | * multilist_sublist_lock(). Here we rely on MUTEX_HELD() | |
170 | * returning TRUE if and only if the current thread holds the | |
171 | * lock. While it's a little ugly to make the lock recursive in | |
172 | * this way, it works and allows the calling code to be much | |
173 | * simpler -- otherwise it would have to pass around a flag | |
174 | * indicating that it already has the lock. | |
175 | */ | |
176 | need_lock = !MUTEX_HELD(&mls->mls_lock); | |
177 | ||
178 | if (need_lock) | |
179 | mutex_enter(&mls->mls_lock); | |
180 | ||
181 | ASSERT(!multilist_link_active(multilist_d2l(ml, obj))); | |
182 | ||
183 | multilist_sublist_insert_head(mls, obj); | |
184 | ||
185 | if (need_lock) | |
186 | mutex_exit(&mls->mls_lock); | |
187 | } | |
188 | ||
189 | /* | |
190 | * Remove the given object from the multilist. | |
191 | * | |
192 | * This function will remove the object specified from the sublist | |
193 | * determined using the function given at multilist creation time. | |
194 | * | |
195 | * The necessary sublist locks are automatically acquired, to ensure | |
196 | * consistency when inserting and removing from multiple threads. | |
197 | */ | |
198 | void | |
199 | multilist_remove(multilist_t *ml, void *obj) | |
200 | { | |
201 | unsigned int sublist_idx = ml->ml_index_func(ml, obj); | |
202 | multilist_sublist_t *mls; | |
203 | boolean_t need_lock; | |
204 | ||
205 | DTRACE_PROBE3(multilist__remove, multilist_t *, ml, | |
206 | unsigned int, sublist_idx, void *, obj); | |
207 | ||
208 | ASSERT3U(sublist_idx, <, ml->ml_num_sublists); | |
209 | ||
210 | mls = &ml->ml_sublists[sublist_idx]; | |
211 | /* See comment in multilist_insert(). */ | |
212 | need_lock = !MUTEX_HELD(&mls->mls_lock); | |
213 | ||
214 | if (need_lock) | |
215 | mutex_enter(&mls->mls_lock); | |
216 | ||
217 | ASSERT(multilist_link_active(multilist_d2l(ml, obj))); | |
218 | ||
219 | multilist_sublist_remove(mls, obj); | |
220 | ||
221 | if (need_lock) | |
222 | mutex_exit(&mls->mls_lock); | |
223 | } | |
224 | ||
225 | /* | |
226 | * Check to see if this multilist object is empty. | |
227 | * | |
228 | * This will return TRUE if it finds all of the sublists of this | |
229 | * multilist to be empty, and FALSE otherwise. Each sublist lock will be | |
230 | * automatically acquired as necessary. | |
231 | * | |
232 | * If concurrent insertions and removals are occurring, the semantics | |
233 | * of this function become a little fuzzy. Instead of locking all | |
234 | * sublists for the entire call time of the function, each sublist is | |
235 | * only locked as it is individually checked for emptiness. Thus, it's | |
236 | * possible for this function to return TRUE with non-empty sublists at | |
237 | * the time the function returns. This would be due to another thread | |
238 | * inserting into a given sublist, after that specific sublist was check | |
239 | * and deemed empty, but before all sublists have been checked. | |
240 | */ | |
241 | int | |
242 | multilist_is_empty(multilist_t *ml) | |
243 | { | |
1c27024e | 244 | for (int i = 0; i < ml->ml_num_sublists; i++) { |
ca0bf58d PS |
245 | multilist_sublist_t *mls = &ml->ml_sublists[i]; |
246 | /* See comment in multilist_insert(). */ | |
247 | boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock); | |
248 | ||
249 | if (need_lock) | |
250 | mutex_enter(&mls->mls_lock); | |
251 | ||
252 | if (!list_is_empty(&mls->mls_list)) { | |
253 | if (need_lock) | |
254 | mutex_exit(&mls->mls_lock); | |
255 | ||
256 | return (FALSE); | |
257 | } | |
258 | ||
259 | if (need_lock) | |
260 | mutex_exit(&mls->mls_lock); | |
261 | } | |
262 | ||
263 | return (TRUE); | |
264 | } | |
265 | ||
266 | /* Return the number of sublists composing this multilist */ | |
267 | unsigned int | |
268 | multilist_get_num_sublists(multilist_t *ml) | |
269 | { | |
270 | return (ml->ml_num_sublists); | |
271 | } | |
272 | ||
273 | /* Return a randomly selected, valid sublist index for this multilist */ | |
274 | unsigned int | |
275 | multilist_get_random_index(multilist_t *ml) | |
276 | { | |
29274c9f | 277 | return (random_in_range(ml->ml_num_sublists)); |
ca0bf58d PS |
278 | } |
279 | ||
280 | /* Lock and return the sublist specified at the given index */ | |
281 | multilist_sublist_t * | |
282 | multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx) | |
283 | { | |
284 | multilist_sublist_t *mls; | |
285 | ||
286 | ASSERT3U(sublist_idx, <, ml->ml_num_sublists); | |
287 | mls = &ml->ml_sublists[sublist_idx]; | |
288 | mutex_enter(&mls->mls_lock); | |
289 | ||
290 | return (mls); | |
291 | } | |
292 | ||
64fc7762 MA |
293 | /* Lock and return the sublist that would be used to store the specified obj */ |
294 | multilist_sublist_t * | |
295 | multilist_sublist_lock_obj(multilist_t *ml, void *obj) | |
296 | { | |
297 | return (multilist_sublist_lock(ml, ml->ml_index_func(ml, obj))); | |
298 | } | |
299 | ||
ca0bf58d PS |
300 | void |
301 | multilist_sublist_unlock(multilist_sublist_t *mls) | |
302 | { | |
303 | mutex_exit(&mls->mls_lock); | |
304 | } | |
305 | ||
306 | /* | |
307 | * We're allowing any object to be inserted into this specific sublist, | |
308 | * but this can lead to trouble if multilist_remove() is called to | |
309 | * remove this object. Specifically, if calling ml_index_func on this | |
310 | * object returns an index for sublist different than what is passed as | |
311 | * a parameter here, any call to multilist_remove() with this newly | |
312 | * inserted object is undefined! (the call to multilist_remove() will | |
313 | * remove the object from a list that it isn't contained in) | |
314 | */ | |
315 | void | |
316 | multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj) | |
317 | { | |
318 | ASSERT(MUTEX_HELD(&mls->mls_lock)); | |
319 | list_insert_head(&mls->mls_list, obj); | |
320 | } | |
321 | ||
322 | /* please see comment above multilist_sublist_insert_head */ | |
323 | void | |
324 | multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj) | |
325 | { | |
326 | ASSERT(MUTEX_HELD(&mls->mls_lock)); | |
327 | list_insert_tail(&mls->mls_list, obj); | |
328 | } | |
329 | ||
330 | /* | |
331 | * Move the object one element forward in the list. | |
332 | * | |
333 | * This function will move the given object forward in the list (towards | |
334 | * the head) by one object. So, in essence, it will swap its position in | |
335 | * the list with its "prev" pointer. If the given object is already at the | |
336 | * head of the list, it cannot be moved forward any more than it already | |
337 | * is, so no action is taken. | |
338 | * | |
339 | * NOTE: This function **must not** remove any object from the list other | |
340 | * than the object given as the parameter. This is relied upon in | |
341 | * arc_evict_state_impl(). | |
342 | */ | |
343 | void | |
344 | multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj) | |
345 | { | |
346 | void *prev = list_prev(&mls->mls_list, obj); | |
347 | ||
348 | ASSERT(MUTEX_HELD(&mls->mls_lock)); | |
349 | ASSERT(!list_is_empty(&mls->mls_list)); | |
350 | ||
351 | /* 'obj' must be at the head of the list, nothing to do */ | |
352 | if (prev == NULL) | |
353 | return; | |
354 | ||
355 | list_remove(&mls->mls_list, obj); | |
356 | list_insert_before(&mls->mls_list, prev, obj); | |
357 | } | |
358 | ||
359 | void | |
360 | multilist_sublist_remove(multilist_sublist_t *mls, void *obj) | |
361 | { | |
362 | ASSERT(MUTEX_HELD(&mls->mls_lock)); | |
363 | list_remove(&mls->mls_list, obj); | |
364 | } | |
365 | ||
fc754677 AM |
366 | int |
367 | multilist_sublist_is_empty(multilist_sublist_t *mls) | |
368 | { | |
369 | ASSERT(MUTEX_HELD(&mls->mls_lock)); | |
370 | return (list_is_empty(&mls->mls_list)); | |
371 | } | |
372 | ||
373 | int | |
374 | multilist_sublist_is_empty_idx(multilist_t *ml, unsigned int sublist_idx) | |
375 | { | |
376 | multilist_sublist_t *mls; | |
377 | int empty; | |
378 | ||
379 | ASSERT3U(sublist_idx, <, ml->ml_num_sublists); | |
380 | mls = &ml->ml_sublists[sublist_idx]; | |
381 | ASSERT(!MUTEX_HELD(&mls->mls_lock)); | |
382 | mutex_enter(&mls->mls_lock); | |
383 | empty = list_is_empty(&mls->mls_list); | |
384 | mutex_exit(&mls->mls_lock); | |
385 | return (empty); | |
386 | } | |
387 | ||
ca0bf58d PS |
388 | void * |
389 | multilist_sublist_head(multilist_sublist_t *mls) | |
390 | { | |
391 | ASSERT(MUTEX_HELD(&mls->mls_lock)); | |
392 | return (list_head(&mls->mls_list)); | |
393 | } | |
394 | ||
395 | void * | |
396 | multilist_sublist_tail(multilist_sublist_t *mls) | |
397 | { | |
398 | ASSERT(MUTEX_HELD(&mls->mls_lock)); | |
399 | return (list_tail(&mls->mls_list)); | |
400 | } | |
401 | ||
402 | void * | |
403 | multilist_sublist_next(multilist_sublist_t *mls, void *obj) | |
404 | { | |
405 | ASSERT(MUTEX_HELD(&mls->mls_lock)); | |
406 | return (list_next(&mls->mls_list, obj)); | |
407 | } | |
408 | ||
409 | void * | |
410 | multilist_sublist_prev(multilist_sublist_t *mls, void *obj) | |
411 | { | |
412 | ASSERT(MUTEX_HELD(&mls->mls_lock)); | |
413 | return (list_prev(&mls->mls_list, obj)); | |
414 | } | |
415 | ||
416 | void | |
417 | multilist_link_init(multilist_node_t *link) | |
418 | { | |
419 | list_link_init(link); | |
420 | } | |
421 | ||
422 | int | |
423 | multilist_link_active(multilist_node_t *link) | |
424 | { | |
425 | return (list_link_active(link)); | |
426 | } | |
c30e58c4 | 427 | |
03fdcb9a | 428 | ZFS_MODULE_PARAM(zfs, zfs_, multilist_num_sublists, INT, ZMOD_RW, |
c30e58c4 | 429 | "Number of sublists used in each multilist"); |