]>
Commit | Line | Data |
---|---|---|
9d5b5245 SD |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * This file and its contents are supplied under the terms of the | |
5 | * Common Development and Distribution License ("CDDL"), version 1.0. | |
6 | * You may only use this file in accordance with the terms of version | |
7 | * 1.0 of the CDDL. | |
8 | * | |
9 | * A full copy of the text of the CDDL should have accompanied this | |
10 | * source. A copy of the CDDL is also available via the Internet at | |
11 | * http://www.illumos.org/license/CDDL. | |
12 | * | |
13 | * CDDL HEADER END | |
14 | */ | |
15 | ||
16 | /* | |
61c3391a | 17 | * Copyright (c) 2017, 2019 by Delphix. All rights reserved. |
9d5b5245 SD |
18 | */ |
19 | ||
20 | /* | |
21 | * ZTHR Infrastructure | |
22 | * =================== | |
23 | * | |
24 | * ZTHR threads are used for isolated operations that span multiple txgs | |
25 | * within a SPA. They generally exist from SPA creation/loading and until | |
26 | * the SPA is exported/destroyed. The ideal requirements for an operation | |
27 | * to be modeled with a zthr are the following: | |
28 | * | |
29 | * 1] The operation needs to run over multiple txgs. | |
30 | * 2] There is be a single point of reference in memory or on disk that | |
61c3391a | 31 | * indicates whether the operation should run/is running or has |
9d5b5245 SD |
32 | * stopped. |
33 | * | |
34 | * If the operation satisfies the above then the following rules guarantee | |
35 | * a certain level of correctness: | |
36 | * | |
37 | * 1] Any thread EXCEPT the zthr changes the work indicator from stopped | |
38 | * to running but not the opposite. | |
39 | * 2] Only the zthr can change the work indicator from running to stopped | |
40 | * (e.g. when it is done) but not the opposite. | |
41 | * | |
42 | * This way a normal zthr cycle should go like this: | |
43 | * | |
44 | * 1] An external thread changes the work indicator from stopped to | |
45 | * running and wakes up the zthr. | |
46 | * 2] The zthr wakes up, checks the indicator and starts working. | |
47 | * 3] When the zthr is done, it changes the indicator to stopped, allowing | |
48 | * a new cycle to start. | |
49 | * | |
3ec34e55 BL |
50 | * Besides being awakened by other threads, a zthr can be configured |
51 | * during creation to wakeup on its own after a specified interval | |
52 | * [see zthr_create_timer()]. | |
53 | * | |
61c3391a SD |
54 | * Note: ZTHR threads are NOT a replacement for generic threads! Please |
55 | * ensure that they fit your use-case well before using them. | |
56 | * | |
9d5b5245 SD |
57 | * == ZTHR creation |
58 | * | |
59 | * Every zthr needs three inputs to start running: | |
60 | * | |
61 | * 1] A user-defined checker function (checkfunc) that decides whether | |
62 | * the zthr should start working or go to sleep. The function should | |
63 | * return TRUE when the zthr needs to work or FALSE to let it sleep, | |
64 | * and should adhere to the following signature: | |
65 | * boolean_t checkfunc_name(void *args, zthr_t *t); | |
66 | * | |
67 | * 2] A user-defined ZTHR function (func) which the zthr executes when | |
68 | * it is not sleeping. The function should adhere to the following | |
69 | * signature type: | |
61c3391a | 70 | * void func_name(void *args, zthr_t *t); |
9d5b5245 SD |
71 | * |
72 | * 3] A void args pointer that will be passed to checkfunc and func | |
73 | * implicitly by the infrastructure. | |
74 | * | |
75 | * The reason why the above API needs two different functions, | |
76 | * instead of one that both checks and does the work, has to do with | |
61c3391a SD |
77 | * the zthr's internal state lock (zthr_state_lock) and the allowed |
78 | * cancellation windows. We want to hold the zthr_state_lock while | |
79 | * running checkfunc but not while running func. This way the zthr | |
80 | * can be cancelled while doing work and not while checking for work. | |
9d5b5245 SD |
81 | * |
82 | * To start a zthr: | |
83 | * zthr_t *zthr_pointer = zthr_create(checkfunc, func, args); | |
3ec34e55 BL |
84 | * or |
85 | * zthr_t *zthr_pointer = zthr_create_timer(checkfunc, func, | |
86 | * args, max_sleep); | |
9d5b5245 SD |
87 | * |
88 | * After that you should be able to wakeup, cancel, and resume the | |
61c3391a | 89 | * zthr from another thread using the zthr_pointer. |
9d5b5245 SD |
90 | * |
91 | * NOTE: ZTHR threads could potentially wake up spuriously and the | |
92 | * user should take this into account when writing a checkfunc. | |
93 | * [see ZTHR state transitions] | |
94 | * | |
2747f599 SH |
95 | * == ZTHR wakeup |
96 | * | |
97 | * ZTHR wakeup should be used when new work is added for the zthr. The | |
98 | * sleeping zthr will wakeup, see that it has more work to complete | |
99 | * and proceed. This can be invoked from open or syncing context. | |
100 | * | |
101 | * To wakeup a zthr: | |
102 | * zthr_wakeup(zthr_t *t) | |
103 | * | |
104 | * == ZTHR cancellation and resumption | |
9d5b5245 SD |
105 | * |
106 | * ZTHR threads must be cancelled when their SPA is being exported | |
107 | * or when they need to be paused so they don't interfere with other | |
108 | * operations. | |
109 | * | |
110 | * To cancel a zthr: | |
111 | * zthr_cancel(zthr_pointer); | |
112 | * | |
113 | * To resume it: | |
114 | * zthr_resume(zthr_pointer); | |
115 | * | |
2747f599 SH |
116 | * ZTHR cancel and resume should be invoked in open context during the |
117 | * lifecycle of the pool as it is imported, exported or destroyed. | |
118 | * | |
9d5b5245 | 119 | * A zthr will implicitly check if it has received a cancellation |
61c3391a SD |
120 | * signal every time func returns and every time it wakes up [see |
121 | * ZTHR state transitions below]. | |
9d5b5245 SD |
122 | * |
123 | * At times, waiting for the zthr's func to finish its job may take | |
124 | * time. This may be very time-consuming for some operations that | |
125 | * need to cancel the SPA's zthrs (e.g spa_export). For this scenario | |
126 | * the user can explicitly make their ZTHR function aware of incoming | |
127 | * cancellation signals using zthr_iscancelled(). A common pattern for | |
128 | * that looks like this: | |
129 | * | |
130 | * int | |
131 | * func_name(void *args, zthr_t *t) | |
132 | * { | |
133 | * ... <unpack args> ... | |
134 | * while (!work_done && !zthr_iscancelled(t)) { | |
135 | * ... <do more work> ... | |
136 | * } | |
9d5b5245 SD |
137 | * } |
138 | * | |
9d5b5245 SD |
139 | * == ZTHR cleanup |
140 | * | |
141 | * Cancelling a zthr doesn't clean up its metadata (internal locks, | |
142 | * function pointers to func and checkfunc, etc..). This is because | |
143 | * we want to keep them around in case we want to resume the execution | |
144 | * of the zthr later. Similarly for zthrs that exit themselves. | |
145 | * | |
146 | * To completely cleanup a zthr, cancel it first to ensure that it | |
147 | * is not running and then use zthr_destroy(). | |
148 | * | |
149 | * == ZTHR state transitions | |
150 | * | |
151 | * zthr creation | |
152 | * + | |
153 | * | | |
154 | * | woke up | |
155 | * | +--------------+ sleep | |
156 | * | | ^ | |
157 | * | | | | |
158 | * | | | FALSE | |
159 | * | | | | |
160 | * v v FALSE + | |
161 | * cancelled? +---------> checkfunc? | |
162 | * + ^ + | |
163 | * | | | | |
164 | * | | | TRUE | |
165 | * | | | | |
166 | * | | func returned v | |
167 | * | +---------------+ func | |
168 | * | | |
169 | * | TRUE | |
170 | * | | |
171 | * v | |
172 | * zthr stopped running | |
173 | * | |
61c3391a SD |
174 | * == Implementation of ZTHR requests |
175 | * | |
2747f599 SH |
176 | * ZTHR cancel and resume are requests on a zthr to change its |
177 | * internal state. These requests are serialized using the | |
178 | * zthr_request_lock, while changes in its internal state are | |
179 | * protected by the zthr_state_lock. A request will first acquire | |
180 | * the zthr_request_lock and then immediately acquire the | |
181 | * zthr_state_lock. We do this so that incoming requests are | |
182 | * serialized using the request lock, while still allowing us | |
183 | * to use the state lock for thread communication via zthr_cv. | |
184 | * | |
185 | * ZTHR wakeup broadcasts to zthr_cv, causing sleeping threads | |
186 | * to wakeup. It acquires the zthr_state_lock but not the | |
187 | * zthr_request_lock, so that a wakeup on a zthr in the middle | |
188 | * of being cancelled will not block. | |
9d5b5245 SD |
189 | */ |
190 | ||
191 | #include <sys/zfs_context.h> | |
192 | #include <sys/zthr.h> | |
193 | ||
61c3391a SD |
194 | struct zthr { |
195 | /* running thread doing the work */ | |
196 | kthread_t *zthr_thread; | |
197 | ||
198 | /* lock protecting internal data & invariants */ | |
199 | kmutex_t zthr_state_lock; | |
200 | ||
201 | /* mutex that serializes external requests */ | |
202 | kmutex_t zthr_request_lock; | |
203 | ||
204 | /* notification mechanism for requests */ | |
205 | kcondvar_t zthr_cv; | |
206 | ||
207 | /* flag set to true if we are canceling the zthr */ | |
208 | boolean_t zthr_cancel; | |
209 | ||
210 | /* | |
211 | * maximum amount of time that the zthr is spent sleeping; | |
212 | * if this is 0, the thread doesn't wake up until it gets | |
213 | * signaled. | |
214 | */ | |
215 | hrtime_t zthr_wait_time; | |
216 | ||
217 | /* consumer-provided callbacks & data */ | |
218 | zthr_checkfunc_t *zthr_checkfunc; | |
219 | zthr_func_t *zthr_func; | |
220 | void *zthr_arg; | |
221 | }; | |
9d5b5245 SD |
222 | |
223 | static void | |
224 | zthr_procedure(void *arg) | |
225 | { | |
226 | zthr_t *t = arg; | |
9d5b5245 | 227 | |
61c3391a SD |
228 | mutex_enter(&t->zthr_state_lock); |
229 | ASSERT3P(t->zthr_thread, ==, curthread); | |
230 | ||
9d5b5245 SD |
231 | while (!t->zthr_cancel) { |
232 | if (t->zthr_checkfunc(t->zthr_arg, t)) { | |
61c3391a SD |
233 | mutex_exit(&t->zthr_state_lock); |
234 | t->zthr_func(t->zthr_arg, t); | |
235 | mutex_enter(&t->zthr_state_lock); | |
9d5b5245 SD |
236 | } else { |
237 | /* go to sleep */ | |
3ec34e55 | 238 | if (t->zthr_wait_time == 0) { |
61c3391a | 239 | cv_wait_sig(&t->zthr_cv, &t->zthr_state_lock); |
3ec34e55 BL |
240 | } else { |
241 | (void) cv_timedwait_sig_hires(&t->zthr_cv, | |
61c3391a | 242 | &t->zthr_state_lock, t->zthr_wait_time, |
3ec34e55 BL |
243 | MSEC2NSEC(1), 0); |
244 | } | |
9d5b5245 SD |
245 | } |
246 | } | |
9d5b5245 | 247 | |
61c3391a SD |
248 | /* |
249 | * Clear out the kernel thread metadata and notify the | |
250 | * zthr_cancel() thread that we've stopped running. | |
251 | */ | |
252 | t->zthr_thread = NULL; | |
253 | t->zthr_cancel = B_FALSE; | |
254 | cv_broadcast(&t->zthr_cv); | |
255 | ||
256 | mutex_exit(&t->zthr_state_lock); | |
257 | thread_exit(); | |
9d5b5245 SD |
258 | } |
259 | ||
260 | zthr_t * | |
261 | zthr_create(zthr_checkfunc_t *checkfunc, zthr_func_t *func, void *arg) | |
3ec34e55 BL |
262 | { |
263 | return (zthr_create_timer(checkfunc, func, arg, (hrtime_t)0)); | |
264 | } | |
265 | ||
266 | /* | |
267 | * Create a zthr with specified maximum sleep time. If the time | |
268 | * in sleeping state exceeds max_sleep, a wakeup(do the check and | |
269 | * start working if required) will be triggered. | |
270 | */ | |
271 | zthr_t * | |
272 | zthr_create_timer(zthr_checkfunc_t *checkfunc, zthr_func_t *func, | |
273 | void *arg, hrtime_t max_sleep) | |
9d5b5245 SD |
274 | { |
275 | zthr_t *t = kmem_zalloc(sizeof (*t), KM_SLEEP); | |
61c3391a SD |
276 | mutex_init(&t->zthr_state_lock, NULL, MUTEX_DEFAULT, NULL); |
277 | mutex_init(&t->zthr_request_lock, NULL, MUTEX_DEFAULT, NULL); | |
9d5b5245 SD |
278 | cv_init(&t->zthr_cv, NULL, CV_DEFAULT, NULL); |
279 | ||
61c3391a | 280 | mutex_enter(&t->zthr_state_lock); |
9d5b5245 SD |
281 | t->zthr_checkfunc = checkfunc; |
282 | t->zthr_func = func; | |
283 | t->zthr_arg = arg; | |
3ec34e55 | 284 | t->zthr_wait_time = max_sleep; |
9d5b5245 SD |
285 | |
286 | t->zthr_thread = thread_create(NULL, 0, zthr_procedure, t, | |
287 | 0, &p0, TS_RUN, minclsyspri); | |
61c3391a | 288 | mutex_exit(&t->zthr_state_lock); |
9d5b5245 SD |
289 | |
290 | return (t); | |
291 | } | |
292 | ||
293 | void | |
294 | zthr_destroy(zthr_t *t) | |
295 | { | |
61c3391a SD |
296 | ASSERT(!MUTEX_HELD(&t->zthr_state_lock)); |
297 | ASSERT(!MUTEX_HELD(&t->zthr_request_lock)); | |
9d5b5245 | 298 | VERIFY3P(t->zthr_thread, ==, NULL); |
61c3391a SD |
299 | mutex_destroy(&t->zthr_request_lock); |
300 | mutex_destroy(&t->zthr_state_lock); | |
9d5b5245 SD |
301 | cv_destroy(&t->zthr_cv); |
302 | kmem_free(t, sizeof (*t)); | |
303 | } | |
304 | ||
305 | /* | |
2747f599 SH |
306 | * Wake up the zthr if it is sleeping. If the thread has been cancelled |
307 | * or is in the process of being cancelled, this is a no-op. | |
9d5b5245 SD |
308 | */ |
309 | void | |
310 | zthr_wakeup(zthr_t *t) | |
311 | { | |
61c3391a SD |
312 | mutex_enter(&t->zthr_state_lock); |
313 | ||
314 | /* | |
2747f599 | 315 | * There are 5 states that we can find the zthr when issuing |
61c3391a SD |
316 | * this broadcast: |
317 | * | |
318 | * [1] The common case of the thread being asleep, at which | |
319 | * point the broadcast will wake it up. | |
320 | * [2] The thread has been cancelled. Waking up a cancelled | |
321 | * thread is a no-op. Any work that is still left to be | |
322 | * done should be handled the next time the thread is | |
323 | * resumed. | |
324 | * [3] The thread is doing work and is already up, so this | |
325 | * is basically a no-op. | |
326 | * [4] The thread was just created/resumed, in which case the | |
327 | * behavior is similar to [3]. | |
2747f599 SH |
328 | * [5] The thread is in the middle of being cancelled, which |
329 | * will be a no-op. | |
61c3391a | 330 | */ |
9d5b5245 | 331 | cv_broadcast(&t->zthr_cv); |
61c3391a SD |
332 | |
333 | mutex_exit(&t->zthr_state_lock); | |
9d5b5245 SD |
334 | } |
335 | ||
336 | /* | |
61c3391a SD |
337 | * Sends a cancel request to the zthr and blocks until the zthr is |
338 | * cancelled. If the zthr is not running (e.g. has been cancelled | |
2747f599 SH |
339 | * already), this is a no-op. Note that this function should not be |
340 | * called from syncing context as it could deadlock with the zthr_func. | |
9d5b5245 | 341 | */ |
61c3391a | 342 | void |
9d5b5245 SD |
343 | zthr_cancel(zthr_t *t) |
344 | { | |
61c3391a SD |
345 | mutex_enter(&t->zthr_request_lock); |
346 | mutex_enter(&t->zthr_state_lock); | |
9d5b5245 | 347 | |
61c3391a SD |
348 | /* |
349 | * Since we are holding the zthr_state_lock at this point | |
350 | * we can find the state in one of the following 4 states: | |
351 | * | |
352 | * [1] The thread has already been cancelled, therefore | |
353 | * there is nothing for us to do. | |
354 | * [2] The thread is sleeping, so we broadcast the CV first | |
355 | * to wake it up and then we set the flag and we are | |
356 | * waiting for it to exit. | |
357 | * [3] The thread is doing work, in which case we just set | |
358 | * the flag and wait for it to finish. | |
359 | * [4] The thread was just created/resumed, in which case | |
360 | * the behavior is similar to [3]. | |
361 | * | |
362 | * Since requests are serialized, by the time that we get | |
363 | * control back we expect that the zthr is cancelled and | |
364 | * not running anymore. | |
365 | */ | |
366 | if (t->zthr_thread != NULL) { | |
367 | t->zthr_cancel = B_TRUE; | |
9d5b5245 | 368 | |
61c3391a SD |
369 | /* broadcast in case the zthr is sleeping */ |
370 | cv_broadcast(&t->zthr_cv); | |
9d5b5245 | 371 | |
61c3391a SD |
372 | while (t->zthr_thread != NULL) |
373 | cv_wait(&t->zthr_cv, &t->zthr_state_lock); | |
9d5b5245 | 374 | |
61c3391a SD |
375 | ASSERT(!t->zthr_cancel); |
376 | } | |
377 | ||
378 | mutex_exit(&t->zthr_state_lock); | |
379 | mutex_exit(&t->zthr_request_lock); | |
9d5b5245 SD |
380 | } |
381 | ||
61c3391a | 382 | /* |
2747f599 SH |
383 | * Sends a resume request to the supplied zthr. If the zthr is already |
384 | * running this is a no-op. Note that this function should not be | |
385 | * called from syncing context as it could deadlock with the zthr_func. | |
61c3391a | 386 | */ |
9d5b5245 SD |
387 | void |
388 | zthr_resume(zthr_t *t) | |
389 | { | |
61c3391a SD |
390 | mutex_enter(&t->zthr_request_lock); |
391 | mutex_enter(&t->zthr_state_lock); | |
9d5b5245 SD |
392 | |
393 | ASSERT3P(&t->zthr_checkfunc, !=, NULL); | |
394 | ASSERT3P(&t->zthr_func, !=, NULL); | |
395 | ASSERT(!t->zthr_cancel); | |
396 | ||
61c3391a SD |
397 | /* |
398 | * There are 4 states that we find the zthr in at this point | |
399 | * given the locks that we hold: | |
400 | * | |
401 | * [1] The zthr was cancelled, so we spawn a new thread for | |
402 | * the zthr (common case). | |
403 | * [2] The zthr is running at which point this is a no-op. | |
404 | * [3] The zthr is sleeping at which point this is a no-op. | |
405 | * [4] The zthr was just spawned at which point this is a | |
406 | * no-op. | |
407 | */ | |
408 | if (t->zthr_thread == NULL) { | |
409 | t->zthr_thread = thread_create(NULL, 0, zthr_procedure, t, | |
410 | 0, &p0, TS_RUN, minclsyspri); | |
411 | } | |
9d5b5245 | 412 | |
61c3391a SD |
413 | mutex_exit(&t->zthr_state_lock); |
414 | mutex_exit(&t->zthr_request_lock); | |
9d5b5245 SD |
415 | } |
416 | ||
417 | /* | |
418 | * This function is intended to be used by the zthr itself | |
61c3391a SD |
419 | * (specifically the zthr_func callback provided) to check |
420 | * if another thread has signaled it to stop running before | |
421 | * doing some expensive operation. | |
9d5b5245 SD |
422 | * |
423 | * returns TRUE if we are in the middle of trying to cancel | |
424 | * this thread. | |
425 | * | |
426 | * returns FALSE otherwise. | |
427 | */ | |
428 | boolean_t | |
429 | zthr_iscancelled(zthr_t *t) | |
430 | { | |
9d5b5245 SD |
431 | ASSERT3P(t->zthr_thread, ==, curthread); |
432 | ||
61c3391a SD |
433 | /* |
434 | * The majority of the functions here grab zthr_request_lock | |
435 | * first and then zthr_state_lock. This function only grabs | |
436 | * the zthr_state_lock. That is because this function should | |
437 | * only be called from the zthr_func to check if someone has | |
438 | * issued a zthr_cancel() on the thread. If there is a zthr_cancel() | |
439 | * happening concurrently, attempting to grab the request lock | |
440 | * here would result in a deadlock. | |
441 | * | |
442 | * By grabbing only the zthr_state_lock this function is allowed | |
443 | * to run concurrently with a zthr_cancel() request. | |
444 | */ | |
445 | mutex_enter(&t->zthr_state_lock); | |
446 | boolean_t cancelled = t->zthr_cancel; | |
447 | mutex_exit(&t->zthr_state_lock); | |
9d5b5245 SD |
448 | return (cancelled); |
449 | } |