]>
Commit | Line | Data |
---|---|---|
9d5b5245 SD |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * This file and its contents are supplied under the terms of the | |
5 | * Common Development and Distribution License ("CDDL"), version 1.0. | |
6 | * You may only use this file in accordance with the terms of version | |
7 | * 1.0 of the CDDL. | |
8 | * | |
9 | * A full copy of the text of the CDDL should have accompanied this | |
10 | * source. A copy of the CDDL is also available via the Internet at | |
11 | * http://www.illumos.org/license/CDDL. | |
12 | * | |
13 | * CDDL HEADER END | |
14 | */ | |
15 | ||
16 | /* | |
61c3391a | 17 | * Copyright (c) 2017, 2019 by Delphix. All rights reserved. |
9d5b5245 SD |
18 | */ |
19 | ||
20 | /* | |
21 | * ZTHR Infrastructure | |
22 | * =================== | |
23 | * | |
24 | * ZTHR threads are used for isolated operations that span multiple txgs | |
25 | * within a SPA. They generally exist from SPA creation/loading and until | |
26 | * the SPA is exported/destroyed. The ideal requirements for an operation | |
27 | * to be modeled with a zthr are the following: | |
28 | * | |
29 | * 1] The operation needs to run over multiple txgs. | |
30 | * 2] There is be a single point of reference in memory or on disk that | |
61c3391a | 31 | * indicates whether the operation should run/is running or has |
9d5b5245 SD |
32 | * stopped. |
33 | * | |
34 | * If the operation satisfies the above then the following rules guarantee | |
35 | * a certain level of correctness: | |
36 | * | |
37 | * 1] Any thread EXCEPT the zthr changes the work indicator from stopped | |
38 | * to running but not the opposite. | |
39 | * 2] Only the zthr can change the work indicator from running to stopped | |
40 | * (e.g. when it is done) but not the opposite. | |
41 | * | |
42 | * This way a normal zthr cycle should go like this: | |
43 | * | |
44 | * 1] An external thread changes the work indicator from stopped to | |
45 | * running and wakes up the zthr. | |
46 | * 2] The zthr wakes up, checks the indicator and starts working. | |
47 | * 3] When the zthr is done, it changes the indicator to stopped, allowing | |
48 | * a new cycle to start. | |
49 | * | |
3ec34e55 BL |
50 | * Besides being awakened by other threads, a zthr can be configured |
51 | * during creation to wakeup on its own after a specified interval | |
52 | * [see zthr_create_timer()]. | |
53 | * | |
61c3391a SD |
54 | * Note: ZTHR threads are NOT a replacement for generic threads! Please |
55 | * ensure that they fit your use-case well before using them. | |
56 | * | |
9d5b5245 SD |
57 | * == ZTHR creation |
58 | * | |
59 | * Every zthr needs three inputs to start running: | |
60 | * | |
61 | * 1] A user-defined checker function (checkfunc) that decides whether | |
62 | * the zthr should start working or go to sleep. The function should | |
63 | * return TRUE when the zthr needs to work or FALSE to let it sleep, | |
64 | * and should adhere to the following signature: | |
65 | * boolean_t checkfunc_name(void *args, zthr_t *t); | |
66 | * | |
67 | * 2] A user-defined ZTHR function (func) which the zthr executes when | |
68 | * it is not sleeping. The function should adhere to the following | |
69 | * signature type: | |
61c3391a | 70 | * void func_name(void *args, zthr_t *t); |
9d5b5245 SD |
71 | * |
72 | * 3] A void args pointer that will be passed to checkfunc and func | |
73 | * implicitly by the infrastructure. | |
74 | * | |
75 | * The reason why the above API needs two different functions, | |
76 | * instead of one that both checks and does the work, has to do with | |
61c3391a SD |
77 | * the zthr's internal state lock (zthr_state_lock) and the allowed |
78 | * cancellation windows. We want to hold the zthr_state_lock while | |
79 | * running checkfunc but not while running func. This way the zthr | |
80 | * can be cancelled while doing work and not while checking for work. | |
9d5b5245 SD |
81 | * |
82 | * To start a zthr: | |
83 | * zthr_t *zthr_pointer = zthr_create(checkfunc, func, args); | |
3ec34e55 BL |
84 | * or |
85 | * zthr_t *zthr_pointer = zthr_create_timer(checkfunc, func, | |
86 | * args, max_sleep); | |
9d5b5245 SD |
87 | * |
88 | * After that you should be able to wakeup, cancel, and resume the | |
61c3391a | 89 | * zthr from another thread using the zthr_pointer. |
9d5b5245 SD |
90 | * |
91 | * NOTE: ZTHR threads could potentially wake up spuriously and the | |
92 | * user should take this into account when writing a checkfunc. | |
93 | * [see ZTHR state transitions] | |
94 | * | |
2747f599 SH |
95 | * == ZTHR wakeup |
96 | * | |
97 | * ZTHR wakeup should be used when new work is added for the zthr. The | |
98 | * sleeping zthr will wakeup, see that it has more work to complete | |
99 | * and proceed. This can be invoked from open or syncing context. | |
100 | * | |
101 | * To wakeup a zthr: | |
102 | * zthr_wakeup(zthr_t *t) | |
103 | * | |
104 | * == ZTHR cancellation and resumption | |
9d5b5245 SD |
105 | * |
106 | * ZTHR threads must be cancelled when their SPA is being exported | |
107 | * or when they need to be paused so they don't interfere with other | |
108 | * operations. | |
109 | * | |
110 | * To cancel a zthr: | |
111 | * zthr_cancel(zthr_pointer); | |
112 | * | |
113 | * To resume it: | |
114 | * zthr_resume(zthr_pointer); | |
115 | * | |
2747f599 SH |
116 | * ZTHR cancel and resume should be invoked in open context during the |
117 | * lifecycle of the pool as it is imported, exported or destroyed. | |
118 | * | |
9d5b5245 | 119 | * A zthr will implicitly check if it has received a cancellation |
61c3391a SD |
120 | * signal every time func returns and every time it wakes up [see |
121 | * ZTHR state transitions below]. | |
9d5b5245 SD |
122 | * |
123 | * At times, waiting for the zthr's func to finish its job may take | |
124 | * time. This may be very time-consuming for some operations that | |
125 | * need to cancel the SPA's zthrs (e.g spa_export). For this scenario | |
126 | * the user can explicitly make their ZTHR function aware of incoming | |
127 | * cancellation signals using zthr_iscancelled(). A common pattern for | |
128 | * that looks like this: | |
129 | * | |
130 | * int | |
131 | * func_name(void *args, zthr_t *t) | |
132 | * { | |
133 | * ... <unpack args> ... | |
134 | * while (!work_done && !zthr_iscancelled(t)) { | |
135 | * ... <do more work> ... | |
136 | * } | |
9d5b5245 SD |
137 | * } |
138 | * | |
9d5b5245 SD |
139 | * == ZTHR cleanup |
140 | * | |
141 | * Cancelling a zthr doesn't clean up its metadata (internal locks, | |
142 | * function pointers to func and checkfunc, etc..). This is because | |
143 | * we want to keep them around in case we want to resume the execution | |
144 | * of the zthr later. Similarly for zthrs that exit themselves. | |
145 | * | |
146 | * To completely cleanup a zthr, cancel it first to ensure that it | |
147 | * is not running and then use zthr_destroy(). | |
148 | * | |
149 | * == ZTHR state transitions | |
150 | * | |
151 | * zthr creation | |
152 | * + | |
153 | * | | |
154 | * | woke up | |
155 | * | +--------------+ sleep | |
156 | * | | ^ | |
157 | * | | | | |
158 | * | | | FALSE | |
159 | * | | | | |
160 | * v v FALSE + | |
161 | * cancelled? +---------> checkfunc? | |
162 | * + ^ + | |
163 | * | | | | |
164 | * | | | TRUE | |
165 | * | | | | |
166 | * | | func returned v | |
167 | * | +---------------+ func | |
168 | * | | |
169 | * | TRUE | |
170 | * | | |
171 | * v | |
172 | * zthr stopped running | |
173 | * | |
61c3391a SD |
174 | * == Implementation of ZTHR requests |
175 | * | |
2747f599 SH |
176 | * ZTHR cancel and resume are requests on a zthr to change its |
177 | * internal state. These requests are serialized using the | |
178 | * zthr_request_lock, while changes in its internal state are | |
179 | * protected by the zthr_state_lock. A request will first acquire | |
180 | * the zthr_request_lock and then immediately acquire the | |
181 | * zthr_state_lock. We do this so that incoming requests are | |
182 | * serialized using the request lock, while still allowing us | |
183 | * to use the state lock for thread communication via zthr_cv. | |
184 | * | |
185 | * ZTHR wakeup broadcasts to zthr_cv, causing sleeping threads | |
186 | * to wakeup. It acquires the zthr_state_lock but not the | |
187 | * zthr_request_lock, so that a wakeup on a zthr in the middle | |
188 | * of being cancelled will not block. | |
9d5b5245 SD |
189 | */ |
190 | ||
191 | #include <sys/zfs_context.h> | |
192 | #include <sys/zthr.h> | |
193 | ||
61c3391a SD |
194 | struct zthr { |
195 | /* running thread doing the work */ | |
196 | kthread_t *zthr_thread; | |
197 | ||
198 | /* lock protecting internal data & invariants */ | |
199 | kmutex_t zthr_state_lock; | |
200 | ||
201 | /* mutex that serializes external requests */ | |
202 | kmutex_t zthr_request_lock; | |
203 | ||
204 | /* notification mechanism for requests */ | |
205 | kcondvar_t zthr_cv; | |
206 | ||
207 | /* flag set to true if we are canceling the zthr */ | |
208 | boolean_t zthr_cancel; | |
209 | ||
210 | /* | |
211 | * maximum amount of time that the zthr is spent sleeping; | |
212 | * if this is 0, the thread doesn't wake up until it gets | |
213 | * signaled. | |
214 | */ | |
215 | hrtime_t zthr_wait_time; | |
216 | ||
217 | /* consumer-provided callbacks & data */ | |
218 | zthr_checkfunc_t *zthr_checkfunc; | |
219 | zthr_func_t *zthr_func; | |
220 | void *zthr_arg; | |
221 | }; | |
9d5b5245 SD |
222 | |
223 | static void | |
224 | zthr_procedure(void *arg) | |
225 | { | |
226 | zthr_t *t = arg; | |
9d5b5245 | 227 | |
61c3391a SD |
228 | mutex_enter(&t->zthr_state_lock); |
229 | ASSERT3P(t->zthr_thread, ==, curthread); | |
230 | ||
9d5b5245 SD |
231 | while (!t->zthr_cancel) { |
232 | if (t->zthr_checkfunc(t->zthr_arg, t)) { | |
61c3391a SD |
233 | mutex_exit(&t->zthr_state_lock); |
234 | t->zthr_func(t->zthr_arg, t); | |
235 | mutex_enter(&t->zthr_state_lock); | |
9d5b5245 | 236 | } else { |
f4e35b16 BB |
237 | /* |
238 | * cv_wait_sig() is used instead of cv_wait() in | |
239 | * order to prevent this process from incorrectly | |
240 | * contributing to the system load average when idle. | |
241 | */ | |
3ec34e55 | 242 | if (t->zthr_wait_time == 0) { |
61c3391a | 243 | cv_wait_sig(&t->zthr_cv, &t->zthr_state_lock); |
3ec34e55 BL |
244 | } else { |
245 | (void) cv_timedwait_sig_hires(&t->zthr_cv, | |
61c3391a | 246 | &t->zthr_state_lock, t->zthr_wait_time, |
3ec34e55 BL |
247 | MSEC2NSEC(1), 0); |
248 | } | |
9d5b5245 SD |
249 | } |
250 | } | |
9d5b5245 | 251 | |
61c3391a SD |
252 | /* |
253 | * Clear out the kernel thread metadata and notify the | |
254 | * zthr_cancel() thread that we've stopped running. | |
255 | */ | |
256 | t->zthr_thread = NULL; | |
257 | t->zthr_cancel = B_FALSE; | |
258 | cv_broadcast(&t->zthr_cv); | |
259 | ||
260 | mutex_exit(&t->zthr_state_lock); | |
261 | thread_exit(); | |
9d5b5245 SD |
262 | } |
263 | ||
264 | zthr_t * | |
265 | zthr_create(zthr_checkfunc_t *checkfunc, zthr_func_t *func, void *arg) | |
3ec34e55 BL |
266 | { |
267 | return (zthr_create_timer(checkfunc, func, arg, (hrtime_t)0)); | |
268 | } | |
269 | ||
270 | /* | |
271 | * Create a zthr with specified maximum sleep time. If the time | |
272 | * in sleeping state exceeds max_sleep, a wakeup(do the check and | |
273 | * start working if required) will be triggered. | |
274 | */ | |
275 | zthr_t * | |
276 | zthr_create_timer(zthr_checkfunc_t *checkfunc, zthr_func_t *func, | |
277 | void *arg, hrtime_t max_sleep) | |
9d5b5245 SD |
278 | { |
279 | zthr_t *t = kmem_zalloc(sizeof (*t), KM_SLEEP); | |
61c3391a SD |
280 | mutex_init(&t->zthr_state_lock, NULL, MUTEX_DEFAULT, NULL); |
281 | mutex_init(&t->zthr_request_lock, NULL, MUTEX_DEFAULT, NULL); | |
9d5b5245 SD |
282 | cv_init(&t->zthr_cv, NULL, CV_DEFAULT, NULL); |
283 | ||
61c3391a | 284 | mutex_enter(&t->zthr_state_lock); |
9d5b5245 SD |
285 | t->zthr_checkfunc = checkfunc; |
286 | t->zthr_func = func; | |
287 | t->zthr_arg = arg; | |
3ec34e55 | 288 | t->zthr_wait_time = max_sleep; |
9d5b5245 SD |
289 | |
290 | t->zthr_thread = thread_create(NULL, 0, zthr_procedure, t, | |
291 | 0, &p0, TS_RUN, minclsyspri); | |
61c3391a | 292 | mutex_exit(&t->zthr_state_lock); |
9d5b5245 SD |
293 | |
294 | return (t); | |
295 | } | |
296 | ||
297 | void | |
298 | zthr_destroy(zthr_t *t) | |
299 | { | |
61c3391a SD |
300 | ASSERT(!MUTEX_HELD(&t->zthr_state_lock)); |
301 | ASSERT(!MUTEX_HELD(&t->zthr_request_lock)); | |
9d5b5245 | 302 | VERIFY3P(t->zthr_thread, ==, NULL); |
61c3391a SD |
303 | mutex_destroy(&t->zthr_request_lock); |
304 | mutex_destroy(&t->zthr_state_lock); | |
9d5b5245 SD |
305 | cv_destroy(&t->zthr_cv); |
306 | kmem_free(t, sizeof (*t)); | |
307 | } | |
308 | ||
309 | /* | |
2747f599 SH |
310 | * Wake up the zthr if it is sleeping. If the thread has been cancelled |
311 | * or is in the process of being cancelled, this is a no-op. | |
9d5b5245 SD |
312 | */ |
313 | void | |
314 | zthr_wakeup(zthr_t *t) | |
315 | { | |
61c3391a SD |
316 | mutex_enter(&t->zthr_state_lock); |
317 | ||
318 | /* | |
2747f599 | 319 | * There are 5 states that we can find the zthr when issuing |
61c3391a SD |
320 | * this broadcast: |
321 | * | |
322 | * [1] The common case of the thread being asleep, at which | |
323 | * point the broadcast will wake it up. | |
324 | * [2] The thread has been cancelled. Waking up a cancelled | |
325 | * thread is a no-op. Any work that is still left to be | |
326 | * done should be handled the next time the thread is | |
327 | * resumed. | |
328 | * [3] The thread is doing work and is already up, so this | |
329 | * is basically a no-op. | |
330 | * [4] The thread was just created/resumed, in which case the | |
331 | * behavior is similar to [3]. | |
2747f599 SH |
332 | * [5] The thread is in the middle of being cancelled, which |
333 | * will be a no-op. | |
61c3391a | 334 | */ |
9d5b5245 | 335 | cv_broadcast(&t->zthr_cv); |
61c3391a SD |
336 | |
337 | mutex_exit(&t->zthr_state_lock); | |
9d5b5245 SD |
338 | } |
339 | ||
340 | /* | |
61c3391a SD |
341 | * Sends a cancel request to the zthr and blocks until the zthr is |
342 | * cancelled. If the zthr is not running (e.g. has been cancelled | |
2747f599 SH |
343 | * already), this is a no-op. Note that this function should not be |
344 | * called from syncing context as it could deadlock with the zthr_func. | |
9d5b5245 | 345 | */ |
61c3391a | 346 | void |
9d5b5245 SD |
347 | zthr_cancel(zthr_t *t) |
348 | { | |
61c3391a SD |
349 | mutex_enter(&t->zthr_request_lock); |
350 | mutex_enter(&t->zthr_state_lock); | |
9d5b5245 | 351 | |
61c3391a SD |
352 | /* |
353 | * Since we are holding the zthr_state_lock at this point | |
354 | * we can find the state in one of the following 4 states: | |
355 | * | |
356 | * [1] The thread has already been cancelled, therefore | |
357 | * there is nothing for us to do. | |
358 | * [2] The thread is sleeping, so we broadcast the CV first | |
359 | * to wake it up and then we set the flag and we are | |
360 | * waiting for it to exit. | |
361 | * [3] The thread is doing work, in which case we just set | |
362 | * the flag and wait for it to finish. | |
363 | * [4] The thread was just created/resumed, in which case | |
364 | * the behavior is similar to [3]. | |
365 | * | |
366 | * Since requests are serialized, by the time that we get | |
367 | * control back we expect that the zthr is cancelled and | |
368 | * not running anymore. | |
369 | */ | |
370 | if (t->zthr_thread != NULL) { | |
371 | t->zthr_cancel = B_TRUE; | |
9d5b5245 | 372 | |
61c3391a SD |
373 | /* broadcast in case the zthr is sleeping */ |
374 | cv_broadcast(&t->zthr_cv); | |
9d5b5245 | 375 | |
61c3391a SD |
376 | while (t->zthr_thread != NULL) |
377 | cv_wait(&t->zthr_cv, &t->zthr_state_lock); | |
9d5b5245 | 378 | |
61c3391a SD |
379 | ASSERT(!t->zthr_cancel); |
380 | } | |
381 | ||
382 | mutex_exit(&t->zthr_state_lock); | |
383 | mutex_exit(&t->zthr_request_lock); | |
9d5b5245 SD |
384 | } |
385 | ||
61c3391a | 386 | /* |
2747f599 SH |
387 | * Sends a resume request to the supplied zthr. If the zthr is already |
388 | * running this is a no-op. Note that this function should not be | |
389 | * called from syncing context as it could deadlock with the zthr_func. | |
61c3391a | 390 | */ |
9d5b5245 SD |
391 | void |
392 | zthr_resume(zthr_t *t) | |
393 | { | |
61c3391a SD |
394 | mutex_enter(&t->zthr_request_lock); |
395 | mutex_enter(&t->zthr_state_lock); | |
9d5b5245 SD |
396 | |
397 | ASSERT3P(&t->zthr_checkfunc, !=, NULL); | |
398 | ASSERT3P(&t->zthr_func, !=, NULL); | |
399 | ASSERT(!t->zthr_cancel); | |
400 | ||
61c3391a SD |
401 | /* |
402 | * There are 4 states that we find the zthr in at this point | |
403 | * given the locks that we hold: | |
404 | * | |
405 | * [1] The zthr was cancelled, so we spawn a new thread for | |
406 | * the zthr (common case). | |
407 | * [2] The zthr is running at which point this is a no-op. | |
408 | * [3] The zthr is sleeping at which point this is a no-op. | |
409 | * [4] The zthr was just spawned at which point this is a | |
410 | * no-op. | |
411 | */ | |
412 | if (t->zthr_thread == NULL) { | |
413 | t->zthr_thread = thread_create(NULL, 0, zthr_procedure, t, | |
414 | 0, &p0, TS_RUN, minclsyspri); | |
415 | } | |
9d5b5245 | 416 | |
61c3391a SD |
417 | mutex_exit(&t->zthr_state_lock); |
418 | mutex_exit(&t->zthr_request_lock); | |
9d5b5245 SD |
419 | } |
420 | ||
421 | /* | |
422 | * This function is intended to be used by the zthr itself | |
61c3391a SD |
423 | * (specifically the zthr_func callback provided) to check |
424 | * if another thread has signaled it to stop running before | |
425 | * doing some expensive operation. | |
9d5b5245 SD |
426 | * |
427 | * returns TRUE if we are in the middle of trying to cancel | |
428 | * this thread. | |
429 | * | |
430 | * returns FALSE otherwise. | |
431 | */ | |
432 | boolean_t | |
433 | zthr_iscancelled(zthr_t *t) | |
434 | { | |
9d5b5245 SD |
435 | ASSERT3P(t->zthr_thread, ==, curthread); |
436 | ||
61c3391a SD |
437 | /* |
438 | * The majority of the functions here grab zthr_request_lock | |
439 | * first and then zthr_state_lock. This function only grabs | |
440 | * the zthr_state_lock. That is because this function should | |
441 | * only be called from the zthr_func to check if someone has | |
442 | * issued a zthr_cancel() on the thread. If there is a zthr_cancel() | |
443 | * happening concurrently, attempting to grab the request lock | |
444 | * here would result in a deadlock. | |
445 | * | |
446 | * By grabbing only the zthr_state_lock this function is allowed | |
447 | * to run concurrently with a zthr_cancel() request. | |
448 | */ | |
449 | mutex_enter(&t->zthr_state_lock); | |
450 | boolean_t cancelled = t->zthr_cancel; | |
451 | mutex_exit(&t->zthr_state_lock); | |
9d5b5245 SD |
452 | return (cancelled); |
453 | } |