]> git.proxmox.com Git - mirror_spl-debian.git/blame - module/spl/spl-taskq.c
splat linux:shrinker: Fix fail-safe
[mirror_spl-debian.git] / module / spl / spl-taskq.c
CommitLineData
716154c5
BB
1/*****************************************************************************\
2 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3 * Copyright (C) 2007 The Regents of the University of California.
4 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
715f6251 6 * UCRL-CODE-235197
7 *
716154c5
BB
8 * This file is part of the SPL, Solaris Porting Layer.
9 * For details, see <http://github.com/behlendorf/spl/>.
715f6251 10 *
716154c5
BB
11 * The SPL is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2 of the License, or (at your
14 * option) any later version.
15 *
16 * The SPL is distributed in the hope that it will be useful, but WITHOUT
715f6251 17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 * for more details.
20 *
21 * You should have received a copy of the GNU General Public License along
716154c5
BB
22 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
23 *****************************************************************************
24 * Solaris Porting Layer (SPL) Task Queue Implementation.
25\*****************************************************************************/
715f6251 26
f4b37741 27#include <sys/taskq.h>
3d061e9d 28#include <sys/kmem.h>
55abb092 29#include <spl-debug.h>
f1ca4da6 30
b17edc10
BB
31#ifdef SS_DEBUG_SUBSYS
32#undef SS_DEBUG_SUBSYS
937879f1 33#endif
34
b17edc10 35#define SS_DEBUG_SUBSYS SS_TASKQ
937879f1 36
e9cb2b4f
BB
37/* Global system-wide dynamic task queue available for all consumers */
38taskq_t *system_taskq;
39EXPORT_SYMBOL(system_taskq);
40
9b51f218
BB
41static int
42task_km_flags(uint_t flags)
43{
44 if (flags & TQ_NOSLEEP)
45 return KM_NOSLEEP;
46
47 if (flags & TQ_PUSHPAGE)
48 return KM_PUSHPAGE;
49
50 return KM_SLEEP;
51}
52
82387586
BB
53/*
54 * NOTE: Must be called with tq->tq_lock held, returns a list_t which
bcd68186 55 * is not attached to the free, work, or pending taskq lists.
f1ca4da6 56 */
046a70c9 57static taskq_ent_t *
bcd68186 58task_alloc(taskq_t *tq, uint_t flags)
59{
046a70c9 60 taskq_ent_t *t;
bcd68186 61 int count = 0;
b17edc10 62 SENTRY;
bcd68186 63
64 ASSERT(tq);
3d061e9d 65 ASSERT(spin_is_locked(&tq->tq_lock));
bcd68186 66retry:
046a70c9 67 /* Acquire taskq_ent_t's from free list if available */
bcd68186 68 if (!list_empty(&tq->tq_free_list) && !(flags & TQ_NEW)) {
046a70c9 69 t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
44217f7a
PS
70
71 ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
72
046a70c9 73 list_del_init(&t->tqent_list);
b17edc10 74 SRETURN(t);
bcd68186 75 }
76
7257ec41 77 /* Free list is empty and memory allocations are prohibited */
bcd68186 78 if (flags & TQ_NOALLOC)
b17edc10 79 SRETURN(NULL);
bcd68186 80
046a70c9 81 /* Hit maximum taskq_ent_t pool size */
bcd68186 82 if (tq->tq_nalloc >= tq->tq_maxalloc) {
83 if (flags & TQ_NOSLEEP)
b17edc10 84 SRETURN(NULL);
bcd68186 85
26f7245c
RC
86 /*
87 * Sleep periodically polling the free list for an available
046a70c9
PS
88 * taskq_ent_t. Dispatching with TQ_SLEEP should always succeed
89 * but we cannot block forever waiting for an taskq_entq_t to
26f7245c
RC
90 * show up in the free list, otherwise a deadlock can happen.
91 *
92 * Therefore, we need to allocate a new task even if the number
93 * of allocated tasks is above tq->tq_maxalloc, but we still
94 * end up delaying the task allocation by one second, thereby
95 * throttling the task dispatch rate.
96 */
97 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
98 schedule_timeout(HZ / 100);
99 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
100 if (count < 100)
101 SGOTO(retry, count++);
bcd68186 102 }
103
26f7245c 104 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
9b51f218 105 t = kmem_alloc(sizeof(taskq_ent_t), task_km_flags(flags));
749045bb 106 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
bcd68186 107
26f7245c 108 if (t) {
44217f7a 109 taskq_init_ent(t);
26f7245c
RC
110 tq->tq_nalloc++;
111 }
bcd68186 112
b17edc10 113 SRETURN(t);
bcd68186 114}
115
82387586 116/*
046a70c9 117 * NOTE: Must be called with tq->tq_lock held, expects the taskq_ent_t
bcd68186 118 * to already be removed from the free, work, or pending taskq lists.
119 */
120static void
046a70c9 121task_free(taskq_t *tq, taskq_ent_t *t)
bcd68186 122{
b17edc10 123 SENTRY;
bcd68186 124
125 ASSERT(tq);
126 ASSERT(t);
127 ASSERT(spin_is_locked(&tq->tq_lock));
046a70c9 128 ASSERT(list_empty(&t->tqent_list));
bcd68186 129
046a70c9 130 kmem_free(t, sizeof(taskq_ent_t));
bcd68186 131 tq->tq_nalloc--;
f1ca4da6 132
b17edc10 133 SEXIT;
bcd68186 134}
135
82387586
BB
136/*
137 * NOTE: Must be called with tq->tq_lock held, either destroys the
046a70c9 138 * taskq_ent_t if too many exist or moves it to the free list for later use.
bcd68186 139 */
f1ca4da6 140static void
046a70c9 141task_done(taskq_t *tq, taskq_ent_t *t)
f1ca4da6 142{
b17edc10 143 SENTRY;
bcd68186 144 ASSERT(tq);
145 ASSERT(t);
146 ASSERT(spin_is_locked(&tq->tq_lock));
147
046a70c9 148 list_del_init(&t->tqent_list);
f1ca4da6 149
bcd68186 150 if (tq->tq_nalloc <= tq->tq_minalloc) {
046a70c9
PS
151 t->tqent_id = 0;
152 t->tqent_func = NULL;
153 t->tqent_arg = NULL;
44217f7a 154 t->tqent_flags = 0;
8f2503e0 155
046a70c9 156 list_add_tail(&t->tqent_list, &tq->tq_free_list);
bcd68186 157 } else {
158 task_free(tq, t);
159 }
f1ca4da6 160
b17edc10 161 SEXIT;
f1ca4da6 162}
163
82387586
BB
164/*
165 * As tasks are submitted to the task queue they are assigned a
f0d8bb26
NB
166 * monotonically increasing taskqid and added to the tail of the pending
167 * list. As worker threads become available the tasks are removed from
168 * the head of the pending or priority list, giving preference to the
05b8f50c
PS
169 * priority list. The tasks are then removed from their respective
170 * list, and the taskq_thread servicing the task is added to the active
171 * list, preserving the order using the serviced task's taskqid.
172 * Finally, as tasks complete the taskq_thread servicing the task is
173 * removed from the active list. This means that the pending task and
174 * active taskq_thread lists are always kept sorted by taskqid. Thus the
175 * lowest outstanding incomplete taskqid can be determined simply by
176 * checking the min taskqid for each head item on the pending, priority,
177 * and active taskq_thread list. This value is stored in
178 * tq->tq_lowest_id and only updated to the new lowest id when the
179 * previous lowest id completes. All taskqids lower than
180 * tq->tq_lowest_id must have completed. It is also possible larger
181 * taskqid's have completed because they may be processed in parallel by
182 * several worker threads. However, this is not a problem because the
183 * behavior of taskq_wait_id() is to block until all previously
184 * submitted taskqid's have completed.
82387586
BB
185 *
186 * XXX: Taskqid_t wrapping is not handled. However, taskqid_t's are
187 * 64-bit values so even if a taskq is processing 2^24 (16,777,216)
188 * taskqid_ts per second it will still take 2^40 seconds, 34,865 years,
189 * before the wrap occurs. I can live with that for now.
bcd68186 190 */
191static int
192taskq_wait_check(taskq_t *tq, taskqid_t id)
193{
7257ec41
BB
194 int rc;
195
196 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
197 rc = (id < tq->tq_lowest_id);
198 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
199
b17edc10 200 SRETURN(rc);
bcd68186 201}
202
bcd68186 203void
204__taskq_wait_id(taskq_t *tq, taskqid_t id)
f1ca4da6 205{
b17edc10 206 SENTRY;
bcd68186 207 ASSERT(tq);
208
209 wait_event(tq->tq_wait_waitq, taskq_wait_check(tq, id));
210
b17edc10 211 SEXIT;
bcd68186 212}
213EXPORT_SYMBOL(__taskq_wait_id);
214
215void
216__taskq_wait(taskq_t *tq)
217{
218 taskqid_t id;
b17edc10 219 SENTRY;
bcd68186 220 ASSERT(tq);
221
7257ec41 222 /* Wait for the largest outstanding taskqid */
749045bb 223 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
7257ec41 224 id = tq->tq_next_id - 1;
749045bb 225 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
bcd68186 226
227 __taskq_wait_id(tq, id);
228
b17edc10 229 SEXIT;
bcd68186 230
231}
232EXPORT_SYMBOL(__taskq_wait);
233
234int
235__taskq_member(taskq_t *tq, void *t)
236{
2c02b71b
PS
237 struct list_head *l;
238 taskq_thread_t *tqt;
b17edc10 239 SENTRY;
bcd68186 240
241 ASSERT(tq);
242 ASSERT(t);
243
2c02b71b
PS
244 list_for_each(l, &tq->tq_thread_list) {
245 tqt = list_entry(l, taskq_thread_t, tqt_thread_list);
246 if (tqt->tqt_thread == (struct task_struct *)t)
247 SRETURN(1);
248 }
bcd68186 249
b17edc10 250 SRETURN(0);
bcd68186 251}
252EXPORT_SYMBOL(__taskq_member);
253
254taskqid_t
255__taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
256{
046a70c9 257 taskq_ent_t *t;
bcd68186 258 taskqid_t rc = 0;
b17edc10 259 SENTRY;
f1ca4da6 260
937879f1 261 ASSERT(tq);
262 ASSERT(func);
d05ec4b4 263
749045bb 264 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
f1ca4da6 265
bcd68186 266 /* Taskq being destroyed and all tasks drained */
267 if (!(tq->tq_flags & TQ_ACTIVE))
b17edc10 268 SGOTO(out, rc = 0);
f1ca4da6 269
bcd68186 270 /* Do not queue the task unless there is idle thread for it */
271 ASSERT(tq->tq_nactive <= tq->tq_nthreads);
272 if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads))
b17edc10 273 SGOTO(out, rc = 0);
bcd68186 274
275 if ((t = task_alloc(tq, flags)) == NULL)
b17edc10 276 SGOTO(out, rc = 0);
f1ca4da6 277
046a70c9 278 spin_lock(&t->tqent_lock);
f0d8bb26
NB
279
280 /* Queue to the priority list instead of the pending list */
281 if (flags & TQ_FRONT)
046a70c9 282 list_add_tail(&t->tqent_list, &tq->tq_prio_list);
f0d8bb26 283 else
046a70c9 284 list_add_tail(&t->tqent_list, &tq->tq_pend_list);
f0d8bb26 285
046a70c9 286 t->tqent_id = rc = tq->tq_next_id;
bcd68186 287 tq->tq_next_id++;
046a70c9
PS
288 t->tqent_func = func;
289 t->tqent_arg = arg;
44217f7a
PS
290
291 ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
292
046a70c9 293 spin_unlock(&t->tqent_lock);
0bb43ca2
NB
294
295 wake_up(&tq->tq_work_waitq);
bcd68186 296out:
749045bb 297 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
b17edc10 298 SRETURN(rc);
f1ca4da6 299}
f1b59d26 300EXPORT_SYMBOL(__taskq_dispatch);
44217f7a
PS
301
302void
303__taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
304 taskq_ent_t *t)
305{
306 SENTRY;
307
308 ASSERT(tq);
309 ASSERT(func);
310 ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC));
311
312 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
313
314 /* Taskq being destroyed and all tasks drained */
315 if (!(tq->tq_flags & TQ_ACTIVE)) {
316 t->tqent_id = 0;
317 goto out;
318 }
319
320 spin_lock(&t->tqent_lock);
321
322 /*
323 * Mark it as a prealloc'd task. This is important
324 * to ensure that we don't free it later.
325 */
326 t->tqent_flags |= TQENT_FLAG_PREALLOC;
327
328 /* Queue to the priority list instead of the pending list */
329 if (flags & TQ_FRONT)
330 list_add_tail(&t->tqent_list, &tq->tq_prio_list);
331 else
332 list_add_tail(&t->tqent_list, &tq->tq_pend_list);
333
334 t->tqent_id = tq->tq_next_id;
335 tq->tq_next_id++;
336 t->tqent_func = func;
337 t->tqent_arg = arg;
338
339 spin_unlock(&t->tqent_lock);
340
341 wake_up(&tq->tq_work_waitq);
342out:
0bb43ca2 343 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
44217f7a
PS
344 SEXIT;
345}
346EXPORT_SYMBOL(__taskq_dispatch_ent);
347
348int
349__taskq_empty_ent(taskq_ent_t *t)
350{
351 return list_empty(&t->tqent_list);
352}
353EXPORT_SYMBOL(__taskq_empty_ent);
354
355void
356__taskq_init_ent(taskq_ent_t *t)
357{
358 spin_lock_init(&t->tqent_lock);
359 INIT_LIST_HEAD(&t->tqent_list);
360 t->tqent_id = 0;
361 t->tqent_func = NULL;
362 t->tqent_arg = NULL;
363 t->tqent_flags = 0;
364}
365EXPORT_SYMBOL(__taskq_init_ent);
366
82387586
BB
367/*
368 * Returns the lowest incomplete taskqid_t. The taskqid_t may
f0d8bb26
NB
369 * be queued on the pending list, on the priority list, or on
370 * the work list currently being handled, but it is not 100%
371 * complete yet.
82387586 372 */
bcd68186 373static taskqid_t
374taskq_lowest_id(taskq_t *tq)
375{
7257ec41 376 taskqid_t lowest_id = tq->tq_next_id;
046a70c9 377 taskq_ent_t *t;
2c02b71b 378 taskq_thread_t *tqt;
b17edc10 379 SENTRY;
bcd68186 380
381 ASSERT(tq);
382 ASSERT(spin_is_locked(&tq->tq_lock));
383
82387586 384 if (!list_empty(&tq->tq_pend_list)) {
046a70c9
PS
385 t = list_entry(tq->tq_pend_list.next, taskq_ent_t, tqent_list);
386 lowest_id = MIN(lowest_id, t->tqent_id);
82387586 387 }
bcd68186 388
f0d8bb26 389 if (!list_empty(&tq->tq_prio_list)) {
046a70c9
PS
390 t = list_entry(tq->tq_prio_list.next, taskq_ent_t, tqent_list);
391 lowest_id = MIN(lowest_id, t->tqent_id);
f0d8bb26
NB
392 }
393
2c02b71b
PS
394 if (!list_empty(&tq->tq_active_list)) {
395 tqt = list_entry(tq->tq_active_list.next, taskq_thread_t,
396 tqt_active_list);
e7e5f78e
PS
397 ASSERT(tqt->tqt_id != 0);
398 lowest_id = MIN(lowest_id, tqt->tqt_id);
82387586 399 }
bcd68186 400
b17edc10 401 SRETURN(lowest_id);
bcd68186 402}
403
f0d8bb26
NB
404/*
405 * Insert a task into a list keeping the list sorted by increasing
406 * taskqid.
407 */
408static void
2c02b71b 409taskq_insert_in_order(taskq_t *tq, taskq_thread_t *tqt)
f0d8bb26 410{
2c02b71b 411 taskq_thread_t *w;
f0d8bb26
NB
412 struct list_head *l;
413
b17edc10 414 SENTRY;
f0d8bb26 415 ASSERT(tq);
2c02b71b 416 ASSERT(tqt);
f0d8bb26
NB
417 ASSERT(spin_is_locked(&tq->tq_lock));
418
2c02b71b
PS
419 list_for_each_prev(l, &tq->tq_active_list) {
420 w = list_entry(l, taskq_thread_t, tqt_active_list);
e7e5f78e 421 if (w->tqt_id < tqt->tqt_id) {
2c02b71b 422 list_add(&tqt->tqt_active_list, l);
f0d8bb26
NB
423 break;
424 }
425 }
2c02b71b
PS
426 if (l == &tq->tq_active_list)
427 list_add(&tqt->tqt_active_list, &tq->tq_active_list);
f0d8bb26 428
b17edc10 429 SEXIT;
f0d8bb26
NB
430}
431
bcd68186 432static int
433taskq_thread(void *args)
434{
435 DECLARE_WAITQUEUE(wait, current);
436 sigset_t blocked;
2c02b71b
PS
437 taskq_thread_t *tqt = args;
438 taskq_t *tq;
046a70c9 439 taskq_ent_t *t;
f0d8bb26 440 struct list_head *pend_list;
b17edc10 441 SENTRY;
bcd68186 442
2c02b71b
PS
443 ASSERT(tqt);
444 tq = tqt->tqt_tq;
bcd68186 445 current->flags |= PF_NOFREEZE;
446
447 sigfillset(&blocked);
448 sigprocmask(SIG_BLOCK, &blocked, NULL);
449 flush_signals(current);
450
749045bb 451 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
bcd68186 452 tq->tq_nthreads++;
453 wake_up(&tq->tq_wait_waitq);
454 set_current_state(TASK_INTERRUPTIBLE);
455
456 while (!kthread_should_stop()) {
457
f0d8bb26
NB
458 if (list_empty(&tq->tq_pend_list) &&
459 list_empty(&tq->tq_prio_list)) {
3c6ed541 460 add_wait_queue_exclusive(&tq->tq_work_waitq, &wait);
749045bb 461 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
bcd68186 462 schedule();
749045bb 463 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
3c6ed541 464 remove_wait_queue(&tq->tq_work_waitq, &wait);
bcd68186 465 } else {
466 __set_current_state(TASK_RUNNING);
467 }
468
f0d8bb26
NB
469
470 if (!list_empty(&tq->tq_prio_list))
471 pend_list = &tq->tq_prio_list;
472 else if (!list_empty(&tq->tq_pend_list))
473 pend_list = &tq->tq_pend_list;
474 else
475 pend_list = NULL;
476
477 if (pend_list) {
046a70c9
PS
478 t = list_entry(pend_list->next, taskq_ent_t, tqent_list);
479 list_del_init(&t->tqent_list);
8f2503e0 480
44217f7a
PS
481 /* In order to support recursively dispatching a
482 * preallocated taskq_ent_t, tqent_id must be
483 * stored prior to executing tqent_func. */
e7e5f78e 484 tqt->tqt_id = t->tqent_id;
8f2503e0
PS
485
486 /* We must store a copy of the flags prior to
487 * servicing the task (servicing a prealloc'd task
488 * returns the ownership of the tqent back to
489 * the caller of taskq_dispatch). Thus,
490 * tqent_flags _may_ change within the call. */
491 tqt->tqt_flags = t->tqent_flags;
492
2c02b71b 493 taskq_insert_in_order(tq, tqt);
bcd68186 494 tq->tq_nactive++;
749045bb 495 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
bcd68186 496
497 /* Perform the requested task */
046a70c9 498 t->tqent_func(t->tqent_arg);
bcd68186 499
749045bb 500 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
bcd68186 501 tq->tq_nactive--;
2c02b71b 502 list_del_init(&tqt->tqt_active_list);
8f2503e0
PS
503
504 /* For prealloc'd tasks, we don't free anything. */
505 if ((tq->tq_flags & TASKQ_DYNAMIC) ||
506 !(tqt->tqt_flags & TQENT_FLAG_PREALLOC))
507 task_done(tq, t);
bcd68186 508
7257ec41
BB
509 /* When the current lowest outstanding taskqid is
510 * done calculate the new lowest outstanding id */
e7e5f78e 511 if (tq->tq_lowest_id == tqt->tqt_id) {
bcd68186 512 tq->tq_lowest_id = taskq_lowest_id(tq);
e7e5f78e 513 ASSERT3S(tq->tq_lowest_id, >, tqt->tqt_id);
bcd68186 514 }
515
e7e5f78e 516 tqt->tqt_id = 0;
8f2503e0 517 tqt->tqt_flags = 0;
bcd68186 518 wake_up_all(&tq->tq_wait_waitq);
519 }
520
521 set_current_state(TASK_INTERRUPTIBLE);
522
523 }
524
525 __set_current_state(TASK_RUNNING);
526 tq->tq_nthreads--;
2c02b71b
PS
527 list_del_init(&tqt->tqt_thread_list);
528 kmem_free(tqt, sizeof(taskq_thread_t));
529
749045bb 530 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
bcd68186 531
b17edc10 532 SRETURN(0);
bcd68186 533}
534
f1ca4da6 535taskq_t *
536__taskq_create(const char *name, int nthreads, pri_t pri,
537 int minalloc, int maxalloc, uint_t flags)
538{
bcd68186 539 taskq_t *tq;
2c02b71b 540 taskq_thread_t *tqt;
bcd68186 541 int rc = 0, i, j = 0;
b17edc10 542 SENTRY;
bcd68186 543
544 ASSERT(name != NULL);
545 ASSERT(pri <= maxclsyspri);
546 ASSERT(minalloc >= 0);
547 ASSERT(maxalloc <= INT_MAX);
548 ASSERT(!(flags & (TASKQ_CPR_SAFE | TASKQ_DYNAMIC))); /* Unsupported */
549
915404bd
BB
550 /* Scale the number of threads using nthreads as a percentage */
551 if (flags & TASKQ_THREADS_CPU_PCT) {
552 ASSERT(nthreads <= 100);
553 ASSERT(nthreads >= 0);
554 nthreads = MIN(nthreads, 100);
555 nthreads = MAX(nthreads, 0);
556 nthreads = MAX((num_online_cpus() * nthreads) / 100, 1);
557 }
558
9b51f218 559 tq = kmem_alloc(sizeof(*tq), KM_PUSHPAGE);
bcd68186 560 if (tq == NULL)
b17edc10 561 SRETURN(NULL);
bcd68186 562
bcd68186 563 spin_lock_init(&tq->tq_lock);
749045bb 564 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
2c02b71b
PS
565 INIT_LIST_HEAD(&tq->tq_thread_list);
566 INIT_LIST_HEAD(&tq->tq_active_list);
bcd68186 567 tq->tq_name = name;
568 tq->tq_nactive = 0;
569 tq->tq_nthreads = 0;
570 tq->tq_pri = pri;
571 tq->tq_minalloc = minalloc;
572 tq->tq_maxalloc = maxalloc;
573 tq->tq_nalloc = 0;
574 tq->tq_flags = (flags | TQ_ACTIVE);
575 tq->tq_next_id = 1;
576 tq->tq_lowest_id = 1;
577 INIT_LIST_HEAD(&tq->tq_free_list);
bcd68186 578 INIT_LIST_HEAD(&tq->tq_pend_list);
f0d8bb26 579 INIT_LIST_HEAD(&tq->tq_prio_list);
bcd68186 580 init_waitqueue_head(&tq->tq_work_waitq);
581 init_waitqueue_head(&tq->tq_wait_waitq);
582
583 if (flags & TASKQ_PREPOPULATE)
584 for (i = 0; i < minalloc; i++)
9b51f218 585 task_done(tq, task_alloc(tq, TQ_PUSHPAGE | TQ_NEW));
6e605b6e 586
749045bb 587 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
6e605b6e 588
2c02b71b 589 for (i = 0; i < nthreads; i++) {
9b51f218 590 tqt = kmem_alloc(sizeof(*tqt), KM_PUSHPAGE);
2c02b71b
PS
591 INIT_LIST_HEAD(&tqt->tqt_thread_list);
592 INIT_LIST_HEAD(&tqt->tqt_active_list);
593 tqt->tqt_tq = tq;
e7e5f78e 594 tqt->tqt_id = 0;
2c02b71b
PS
595
596 tqt->tqt_thread = kthread_create(taskq_thread, tqt,
597 "%s/%d", name, i);
598 if (tqt->tqt_thread) {
599 list_add(&tqt->tqt_thread_list, &tq->tq_thread_list);
600 kthread_bind(tqt->tqt_thread, i % num_online_cpus());
601 set_user_nice(tqt->tqt_thread, PRIO_TO_NICE(pri));
602 wake_up_process(tqt->tqt_thread);
bcd68186 603 j++;
2c02b71b
PS
604 } else {
605 kmem_free(tqt, sizeof(taskq_thread_t));
606 rc = 1;
607 }
608 }
bcd68186 609
610 /* Wait for all threads to be started before potential destroy */
611 wait_event(tq->tq_wait_waitq, tq->tq_nthreads == j);
612
613 if (rc) {
614 __taskq_destroy(tq);
615 tq = NULL;
616 }
617
b17edc10 618 SRETURN(tq);
f1ca4da6 619}
f1b59d26 620EXPORT_SYMBOL(__taskq_create);
b123971f 621
622void
623__taskq_destroy(taskq_t *tq)
624{
2c02b71b
PS
625 struct task_struct *thread;
626 taskq_thread_t *tqt;
046a70c9 627 taskq_ent_t *t;
b17edc10 628 SENTRY;
b123971f 629
bcd68186 630 ASSERT(tq);
749045bb 631 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
bcd68186 632 tq->tq_flags &= ~TQ_ACTIVE;
749045bb 633 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
bcd68186 634
635 /* TQ_ACTIVE cleared prevents new tasks being added to pending */
636 __taskq_wait(tq);
637
749045bb 638 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
bcd68186 639
2c02b71b
PS
640 /*
641 * Signal each thread to exit and block until it does. Each thread
642 * is responsible for removing itself from the list and freeing its
643 * taskq_thread_t. This allows for idle threads to opt to remove
644 * themselves from the taskq. They can be recreated as needed.
645 */
646 while (!list_empty(&tq->tq_thread_list)) {
647 tqt = list_entry(tq->tq_thread_list.next,
648 taskq_thread_t, tqt_thread_list);
649 thread = tqt->tqt_thread;
650 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
651
652 kthread_stop(thread);
653
654 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
655 }
656
bcd68186 657 while (!list_empty(&tq->tq_free_list)) {
046a70c9 658 t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
44217f7a
PS
659
660 ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
661
046a70c9 662 list_del_init(&t->tqent_list);
bcd68186 663 task_free(tq, t);
664 }
665
666 ASSERT(tq->tq_nthreads == 0);
667 ASSERT(tq->tq_nalloc == 0);
2c02b71b
PS
668 ASSERT(list_empty(&tq->tq_thread_list));
669 ASSERT(list_empty(&tq->tq_active_list));
bcd68186 670 ASSERT(list_empty(&tq->tq_free_list));
bcd68186 671 ASSERT(list_empty(&tq->tq_pend_list));
f0d8bb26 672 ASSERT(list_empty(&tq->tq_prio_list));
bcd68186 673
749045bb 674 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
2c02b71b 675
bcd68186 676 kmem_free(tq, sizeof(taskq_t));
677
b17edc10 678 SEXIT;
b123971f 679}
bcd68186 680EXPORT_SYMBOL(__taskq_destroy);
e9cb2b4f
BB
681
682int
683spl_taskq_init(void)
684{
b17edc10 685 SENTRY;
e9cb2b4f 686
f220894e
BB
687 /* Solaris creates a dynamic taskq of up to 64 threads, however in
688 * a Linux environment 1 thread per-core is usually about right */
689 system_taskq = taskq_create("spl_system_taskq", num_online_cpus(),
690 minclsyspri, 4, 512, TASKQ_PREPOPULATE);
e9cb2b4f 691 if (system_taskq == NULL)
b17edc10 692 SRETURN(1);
e9cb2b4f 693
b17edc10 694 SRETURN(0);
e9cb2b4f
BB
695}
696
697void
698spl_taskq_fini(void)
699{
b17edc10 700 SENTRY;
e9cb2b4f 701 taskq_destroy(system_taskq);
b17edc10 702 SEXIT;
e9cb2b4f 703}