4 * This file and its contents are supplied under the terms of the
5 * Common Development and Distribution License ("CDDL"), version 1.0.
6 * You may only use this file in accordance with the terms of version
9 * A full copy of the text of the CDDL should have accompanied this
10 * source. A copy of the CDDL is also available via the Internet at
11 * http://www.illumos.org/license/CDDL.
16 * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
19 #include <sys/zfs_context.h>
20 #include <sys/multilist.h>
21 #include <sys/trace_multilist.h>
23 /* needed for spa_get_random() */
27 * Given the object contained on the list, return a pointer to the
28 * object's multilist_node_t structure it contains.
31 static multilist_node_t
*
32 multilist_d2l(multilist_t
*ml
, void *obj
)
34 return ((multilist_node_t
*)((char *)obj
+ ml
->ml_offset
));
39 * Initialize a new mutlilist using the parameters specified.
41 * - 'size' denotes the size of the structure containing the
43 * - 'offset' denotes the byte offset of the mutlilist_node_t within
44 * the structure that contains it.
45 * - 'num' specifies the number of internal sublists to create.
46 * - 'index_func' is used to determine which sublist to insert into
47 * when the multilist_insert() function is called; as well as which
48 * sublist to remove from when multilist_remove() is called. The
49 * requirements this function must meet, are the following:
51 * - It must always return the same value when called on the same
52 * object (to ensure the object is removed from the list it was
55 * - It must return a value in the range [0, number of sublists).
56 * The multilist_get_num_sublists() function may be used to
57 * determine the number of sublists in the multilist.
59 * Also, in order to reduce internal contention between the sublists
60 * during insertion and removal, this function should choose evenly
61 * between all available sublists when inserting. This isn't a hard
62 * requirement, but a general rule of thumb in order to garner the
63 * best multi-threaded performance out of the data structure.
66 multilist_create(multilist_t
*ml
, size_t size
, size_t offset
, unsigned int num
,
67 multilist_sublist_index_func_t
*index_func
)
71 ASSERT3P(ml
, !=, NULL
);
73 ASSERT3U(size
, >=, offset
+ sizeof (multilist_node_t
));
75 ASSERT3P(index_func
, !=, NULL
);
77 ml
->ml_offset
= offset
;
78 ml
->ml_num_sublists
= num
;
79 ml
->ml_index_func
= index_func
;
81 ml
->ml_sublists
= kmem_zalloc(sizeof (multilist_sublist_t
) *
82 ml
->ml_num_sublists
, KM_SLEEP
);
84 ASSERT3P(ml
->ml_sublists
, !=, NULL
);
86 for (i
= 0; i
< ml
->ml_num_sublists
; i
++) {
87 multilist_sublist_t
*mls
= &ml
->ml_sublists
[i
];
88 mutex_init(&mls
->mls_lock
, NULL
, MUTEX_NOLOCKDEP
, NULL
);
89 list_create(&mls
->mls_list
, size
, offset
);
94 * Destroy the given multilist object, and free up any memory it holds.
97 multilist_destroy(multilist_t
*ml
)
101 ASSERT(multilist_is_empty(ml
));
103 for (i
= 0; i
< ml
->ml_num_sublists
; i
++) {
104 multilist_sublist_t
*mls
= &ml
->ml_sublists
[i
];
106 ASSERT(list_is_empty(&mls
->mls_list
));
108 list_destroy(&mls
->mls_list
);
109 mutex_destroy(&mls
->mls_lock
);
112 ASSERT3P(ml
->ml_sublists
, !=, NULL
);
113 kmem_free(ml
->ml_sublists
,
114 sizeof (multilist_sublist_t
) * ml
->ml_num_sublists
);
116 ml
->ml_num_sublists
= 0;
121 * Insert the given object into the multilist.
123 * This function will insert the object specified into the sublist
124 * determined using the function given at multilist creation time.
126 * The sublist locks are automatically acquired if not already held, to
127 * ensure consistency when inserting and removing from multiple threads.
130 multilist_insert(multilist_t
*ml
, void *obj
)
132 unsigned int sublist_idx
= ml
->ml_index_func(ml
, obj
);
133 multilist_sublist_t
*mls
;
136 DTRACE_PROBE3(multilist__insert
, multilist_t
*, ml
,
137 unsigned int, sublist_idx
, void *, obj
);
139 ASSERT3U(sublist_idx
, <, ml
->ml_num_sublists
);
141 mls
= &ml
->ml_sublists
[sublist_idx
];
144 * Note: Callers may already hold the sublist lock by calling
145 * multilist_sublist_lock(). Here we rely on MUTEX_HELD()
146 * returning TRUE if and only if the current thread holds the
147 * lock. While it's a little ugly to make the lock recursive in
148 * this way, it works and allows the calling code to be much
149 * simpler -- otherwise it would have to pass around a flag
150 * indicating that it already has the lock.
152 need_lock
= !MUTEX_HELD(&mls
->mls_lock
);
155 mutex_enter(&mls
->mls_lock
);
157 ASSERT(!multilist_link_active(multilist_d2l(ml
, obj
)));
159 multilist_sublist_insert_head(mls
, obj
);
162 mutex_exit(&mls
->mls_lock
);
166 * Remove the given object from the multilist.
168 * This function will remove the object specified from the sublist
169 * determined using the function given at multilist creation time.
171 * The necessary sublist locks are automatically acquired, to ensure
172 * consistency when inserting and removing from multiple threads.
175 multilist_remove(multilist_t
*ml
, void *obj
)
177 unsigned int sublist_idx
= ml
->ml_index_func(ml
, obj
);
178 multilist_sublist_t
*mls
;
181 DTRACE_PROBE3(multilist__remove
, multilist_t
*, ml
,
182 unsigned int, sublist_idx
, void *, obj
);
184 ASSERT3U(sublist_idx
, <, ml
->ml_num_sublists
);
186 mls
= &ml
->ml_sublists
[sublist_idx
];
187 /* See comment in multilist_insert(). */
188 need_lock
= !MUTEX_HELD(&mls
->mls_lock
);
191 mutex_enter(&mls
->mls_lock
);
193 ASSERT(multilist_link_active(multilist_d2l(ml
, obj
)));
195 multilist_sublist_remove(mls
, obj
);
198 mutex_exit(&mls
->mls_lock
);
202 * Check to see if this multilist object is empty.
204 * This will return TRUE if it finds all of the sublists of this
205 * multilist to be empty, and FALSE otherwise. Each sublist lock will be
206 * automatically acquired as necessary.
208 * If concurrent insertions and removals are occurring, the semantics
209 * of this function become a little fuzzy. Instead of locking all
210 * sublists for the entire call time of the function, each sublist is
211 * only locked as it is individually checked for emptiness. Thus, it's
212 * possible for this function to return TRUE with non-empty sublists at
213 * the time the function returns. This would be due to another thread
214 * inserting into a given sublist, after that specific sublist was check
215 * and deemed empty, but before all sublists have been checked.
218 multilist_is_empty(multilist_t
*ml
)
222 for (i
= 0; i
< ml
->ml_num_sublists
; i
++) {
223 multilist_sublist_t
*mls
= &ml
->ml_sublists
[i
];
224 /* See comment in multilist_insert(). */
225 boolean_t need_lock
= !MUTEX_HELD(&mls
->mls_lock
);
228 mutex_enter(&mls
->mls_lock
);
230 if (!list_is_empty(&mls
->mls_list
)) {
232 mutex_exit(&mls
->mls_lock
);
238 mutex_exit(&mls
->mls_lock
);
244 /* Return the number of sublists composing this multilist */
246 multilist_get_num_sublists(multilist_t
*ml
)
248 return (ml
->ml_num_sublists
);
251 /* Return a randomly selected, valid sublist index for this multilist */
253 multilist_get_random_index(multilist_t
*ml
)
255 return (spa_get_random(ml
->ml_num_sublists
));
258 /* Lock and return the sublist specified at the given index */
259 multilist_sublist_t
*
260 multilist_sublist_lock(multilist_t
*ml
, unsigned int sublist_idx
)
262 multilist_sublist_t
*mls
;
264 ASSERT3U(sublist_idx
, <, ml
->ml_num_sublists
);
265 mls
= &ml
->ml_sublists
[sublist_idx
];
266 mutex_enter(&mls
->mls_lock
);
272 multilist_sublist_unlock(multilist_sublist_t
*mls
)
274 mutex_exit(&mls
->mls_lock
);
278 * We're allowing any object to be inserted into this specific sublist,
279 * but this can lead to trouble if multilist_remove() is called to
280 * remove this object. Specifically, if calling ml_index_func on this
281 * object returns an index for sublist different than what is passed as
282 * a parameter here, any call to multilist_remove() with this newly
283 * inserted object is undefined! (the call to multilist_remove() will
284 * remove the object from a list that it isn't contained in)
287 multilist_sublist_insert_head(multilist_sublist_t
*mls
, void *obj
)
289 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
290 list_insert_head(&mls
->mls_list
, obj
);
293 /* please see comment above multilist_sublist_insert_head */
295 multilist_sublist_insert_tail(multilist_sublist_t
*mls
, void *obj
)
297 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
298 list_insert_tail(&mls
->mls_list
, obj
);
302 * Move the object one element forward in the list.
304 * This function will move the given object forward in the list (towards
305 * the head) by one object. So, in essence, it will swap its position in
306 * the list with its "prev" pointer. If the given object is already at the
307 * head of the list, it cannot be moved forward any more than it already
308 * is, so no action is taken.
310 * NOTE: This function **must not** remove any object from the list other
311 * than the object given as the parameter. This is relied upon in
312 * arc_evict_state_impl().
315 multilist_sublist_move_forward(multilist_sublist_t
*mls
, void *obj
)
317 void *prev
= list_prev(&mls
->mls_list
, obj
);
319 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
320 ASSERT(!list_is_empty(&mls
->mls_list
));
322 /* 'obj' must be at the head of the list, nothing to do */
326 list_remove(&mls
->mls_list
, obj
);
327 list_insert_before(&mls
->mls_list
, prev
, obj
);
331 multilist_sublist_remove(multilist_sublist_t
*mls
, void *obj
)
333 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
334 list_remove(&mls
->mls_list
, obj
);
338 multilist_sublist_head(multilist_sublist_t
*mls
)
340 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
341 return (list_head(&mls
->mls_list
));
345 multilist_sublist_tail(multilist_sublist_t
*mls
)
347 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
348 return (list_tail(&mls
->mls_list
));
352 multilist_sublist_next(multilist_sublist_t
*mls
, void *obj
)
354 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
355 return (list_next(&mls
->mls_list
, obj
));
359 multilist_sublist_prev(multilist_sublist_t
*mls
, void *obj
)
361 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
362 return (list_prev(&mls
->mls_list
, obj
));
366 multilist_link_init(multilist_node_t
*link
)
368 list_link_init(link
);
372 multilist_link_active(multilist_node_t
*link
)
374 return (list_link_active(link
));