]> git.proxmox.com Git - mirror_spl-debian.git/blame - module/spl/spl-taskq.c
taskq style, convert spaces to soft tabs
[mirror_spl-debian.git] / module / spl / spl-taskq.c
CommitLineData
716154c5
BB
1/*****************************************************************************\
2 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3 * Copyright (C) 2007 The Regents of the University of California.
4 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
715f6251 6 * UCRL-CODE-235197
7 *
716154c5
BB
8 * This file is part of the SPL, Solaris Porting Layer.
9 * For details, see <http://github.com/behlendorf/spl/>.
715f6251 10 *
716154c5
BB
11 * The SPL is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2 of the License, or (at your
14 * option) any later version.
15 *
16 * The SPL is distributed in the hope that it will be useful, but WITHOUT
715f6251 17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 * for more details.
20 *
21 * You should have received a copy of the GNU General Public License along
716154c5
BB
22 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
23 *****************************************************************************
24 * Solaris Porting Layer (SPL) Task Queue Implementation.
25\*****************************************************************************/
715f6251 26
f4b37741 27#include <sys/taskq.h>
3d061e9d 28#include <sys/kmem.h>
55abb092 29#include <spl-debug.h>
f1ca4da6 30
b17edc10
BB
31#ifdef SS_DEBUG_SUBSYS
32#undef SS_DEBUG_SUBSYS
937879f1 33#endif
34
b17edc10 35#define SS_DEBUG_SUBSYS SS_TASKQ
937879f1 36
e9cb2b4f
BB
37/* Global system-wide dynamic task queue available for all consumers */
38taskq_t *system_taskq;
39EXPORT_SYMBOL(system_taskq);
40
9b51f218
BB
41static int
42task_km_flags(uint_t flags)
43{
44 if (flags & TQ_NOSLEEP)
45 return KM_NOSLEEP;
46
47 if (flags & TQ_PUSHPAGE)
48 return KM_PUSHPAGE;
49
50 return KM_SLEEP;
51}
52
82387586
BB
53/*
54 * NOTE: Must be called with tq->tq_lock held, returns a list_t which
bcd68186 55 * is not attached to the free, work, or pending taskq lists.
f1ca4da6 56 */
046a70c9 57static taskq_ent_t *
bcd68186 58task_alloc(taskq_t *tq, uint_t flags)
59{
472a34ca
BB
60 taskq_ent_t *t;
61 int count = 0;
62 SENTRY;
bcd68186 63
472a34ca
BB
64 ASSERT(tq);
65 ASSERT(spin_is_locked(&tq->tq_lock));
bcd68186 66retry:
472a34ca
BB
67 /* Acquire taskq_ent_t's from free list if available */
68 if (!list_empty(&tq->tq_free_list) && !(flags & TQ_NEW)) {
69 t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
70
71 ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
72
73 list_del_init(&t->tqent_list);
74 SRETURN(t);
75 }
76
77 /* Free list is empty and memory allocations are prohibited */
78 if (flags & TQ_NOALLOC)
79 SRETURN(NULL);
80
81 /* Hit maximum taskq_ent_t pool size */
82 if (tq->tq_nalloc >= tq->tq_maxalloc) {
83 if (flags & TQ_NOSLEEP)
84 SRETURN(NULL);
85
86 /*
87 * Sleep periodically polling the free list for an available
88 * taskq_ent_t. Dispatching with TQ_SLEEP should always succeed
89 * but we cannot block forever waiting for an taskq_ent_t to
90 * show up in the free list, otherwise a deadlock can happen.
91 *
92 * Therefore, we need to allocate a new task even if the number
93 * of allocated tasks is above tq->tq_maxalloc, but we still
94 * end up delaying the task allocation by one second, thereby
95 * throttling the task dispatch rate.
96 */
97 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
98 schedule_timeout(HZ / 100);
99 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
100 if (count < 100)
101 SGOTO(retry, count++);
102 }
103
104 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
105 t = kmem_alloc(sizeof(taskq_ent_t), task_km_flags(flags));
106 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
107
108 if (t) {
109 taskq_init_ent(t);
110 tq->tq_nalloc++;
111 }
112
113 SRETURN(t);
bcd68186 114}
115
82387586 116/*
046a70c9 117 * NOTE: Must be called with tq->tq_lock held, expects the taskq_ent_t
bcd68186 118 * to already be removed from the free, work, or pending taskq lists.
119 */
120static void
046a70c9 121task_free(taskq_t *tq, taskq_ent_t *t)
bcd68186 122{
472a34ca 123 SENTRY;
bcd68186 124
472a34ca
BB
125 ASSERT(tq);
126 ASSERT(t);
bcd68186 127 ASSERT(spin_is_locked(&tq->tq_lock));
046a70c9 128 ASSERT(list_empty(&t->tqent_list));
bcd68186 129
472a34ca
BB
130 kmem_free(t, sizeof(taskq_ent_t));
131 tq->tq_nalloc--;
f1ca4da6 132
b17edc10 133 SEXIT;
bcd68186 134}
135
82387586
BB
136/*
137 * NOTE: Must be called with tq->tq_lock held, either destroys the
046a70c9 138 * taskq_ent_t if too many exist or moves it to the free list for later use.
bcd68186 139 */
f1ca4da6 140static void
046a70c9 141task_done(taskq_t *tq, taskq_ent_t *t)
f1ca4da6 142{
b17edc10 143 SENTRY;
bcd68186 144 ASSERT(tq);
145 ASSERT(t);
146 ASSERT(spin_is_locked(&tq->tq_lock));
147
046a70c9 148 list_del_init(&t->tqent_list);
f1ca4da6 149
472a34ca 150 if (tq->tq_nalloc <= tq->tq_minalloc) {
046a70c9
PS
151 t->tqent_id = 0;
152 t->tqent_func = NULL;
153 t->tqent_arg = NULL;
44217f7a 154 t->tqent_flags = 0;
8f2503e0 155
472a34ca 156 list_add_tail(&t->tqent_list, &tq->tq_free_list);
bcd68186 157 } else {
158 task_free(tq, t);
159 }
f1ca4da6 160
472a34ca 161 SEXIT;
f1ca4da6 162}
163
82387586
BB
164/*
165 * As tasks are submitted to the task queue they are assigned a
f0d8bb26
NB
166 * monotonically increasing taskqid and added to the tail of the pending
167 * list. As worker threads become available the tasks are removed from
168 * the head of the pending or priority list, giving preference to the
05b8f50c
PS
169 * priority list. The tasks are then removed from their respective
170 * list, and the taskq_thread servicing the task is added to the active
171 * list, preserving the order using the serviced task's taskqid.
172 * Finally, as tasks complete the taskq_thread servicing the task is
173 * removed from the active list. This means that the pending task and
174 * active taskq_thread lists are always kept sorted by taskqid. Thus the
175 * lowest outstanding incomplete taskqid can be determined simply by
176 * checking the min taskqid for each head item on the pending, priority,
177 * and active taskq_thread list. This value is stored in
178 * tq->tq_lowest_id and only updated to the new lowest id when the
179 * previous lowest id completes. All taskqids lower than
180 * tq->tq_lowest_id must have completed. It is also possible larger
181 * taskqid's have completed because they may be processed in parallel by
182 * several worker threads. However, this is not a problem because the
183 * behavior of taskq_wait_id() is to block until all previously
184 * submitted taskqid's have completed.
82387586
BB
185 *
186 * XXX: Taskqid_t wrapping is not handled. However, taskqid_t's are
187 * 64-bit values so even if a taskq is processing 2^24 (16,777,216)
188 * taskqid_ts per second it will still take 2^40 seconds, 34,865 years,
189 * before the wrap occurs. I can live with that for now.
bcd68186 190 */
191static int
192taskq_wait_check(taskq_t *tq, taskqid_t id)
193{
7257ec41
BB
194 int rc;
195
196 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
197 rc = (id < tq->tq_lowest_id);
198 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
199
b17edc10 200 SRETURN(rc);
bcd68186 201}
202
bcd68186 203void
204__taskq_wait_id(taskq_t *tq, taskqid_t id)
f1ca4da6 205{
b17edc10 206 SENTRY;
bcd68186 207 ASSERT(tq);
208
209 wait_event(tq->tq_wait_waitq, taskq_wait_check(tq, id));
210
b17edc10 211 SEXIT;
bcd68186 212}
213EXPORT_SYMBOL(__taskq_wait_id);
214
215void
216__taskq_wait(taskq_t *tq)
217{
218 taskqid_t id;
b17edc10 219 SENTRY;
bcd68186 220 ASSERT(tq);
221
7257ec41 222 /* Wait for the largest outstanding taskqid */
749045bb 223 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
7257ec41 224 id = tq->tq_next_id - 1;
749045bb 225 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
bcd68186 226
227 __taskq_wait_id(tq, id);
228
b17edc10 229 SEXIT;
bcd68186 230
231}
232EXPORT_SYMBOL(__taskq_wait);
233
234int
235__taskq_member(taskq_t *tq, void *t)
236{
2c02b71b
PS
237 struct list_head *l;
238 taskq_thread_t *tqt;
472a34ca 239 SENTRY;
bcd68186 240
241 ASSERT(tq);
472a34ca 242 ASSERT(t);
bcd68186 243
2c02b71b
PS
244 list_for_each(l, &tq->tq_thread_list) {
245 tqt = list_entry(l, taskq_thread_t, tqt_thread_list);
246 if (tqt->tqt_thread == (struct task_struct *)t)
247 SRETURN(1);
248 }
bcd68186 249
472a34ca 250 SRETURN(0);
bcd68186 251}
252EXPORT_SYMBOL(__taskq_member);
253
254taskqid_t
255__taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
256{
472a34ca 257 taskq_ent_t *t;
bcd68186 258 taskqid_t rc = 0;
472a34ca 259 SENTRY;
f1ca4da6 260
472a34ca
BB
261 ASSERT(tq);
262 ASSERT(func);
d05ec4b4 263
472a34ca 264 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
f1ca4da6 265
bcd68186 266 /* Taskq being destroyed and all tasks drained */
267 if (!(tq->tq_flags & TQ_ACTIVE))
b17edc10 268 SGOTO(out, rc = 0);
f1ca4da6 269
bcd68186 270 /* Do not queue the task unless there is idle thread for it */
271 ASSERT(tq->tq_nactive <= tq->tq_nthreads);
272 if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads))
b17edc10 273 SGOTO(out, rc = 0);
bcd68186 274
472a34ca 275 if ((t = task_alloc(tq, flags)) == NULL)
b17edc10 276 SGOTO(out, rc = 0);
f1ca4da6 277
046a70c9 278 spin_lock(&t->tqent_lock);
f0d8bb26
NB
279
280 /* Queue to the priority list instead of the pending list */
281 if (flags & TQ_FRONT)
046a70c9 282 list_add_tail(&t->tqent_list, &tq->tq_prio_list);
f0d8bb26 283 else
046a70c9 284 list_add_tail(&t->tqent_list, &tq->tq_pend_list);
f0d8bb26 285
046a70c9 286 t->tqent_id = rc = tq->tq_next_id;
bcd68186 287 tq->tq_next_id++;
472a34ca
BB
288 t->tqent_func = func;
289 t->tqent_arg = arg;
44217f7a
PS
290
291 ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
292
046a70c9 293 spin_unlock(&t->tqent_lock);
0bb43ca2
NB
294
295 wake_up(&tq->tq_work_waitq);
bcd68186 296out:
749045bb 297 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
b17edc10 298 SRETURN(rc);
f1ca4da6 299}
f1b59d26 300EXPORT_SYMBOL(__taskq_dispatch);
44217f7a
PS
301
302void
303__taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
304 taskq_ent_t *t)
305{
306 SENTRY;
307
308 ASSERT(tq);
309 ASSERT(func);
310 ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC));
311
312 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
313
314 /* Taskq being destroyed and all tasks drained */
315 if (!(tq->tq_flags & TQ_ACTIVE)) {
316 t->tqent_id = 0;
317 goto out;
318 }
319
320 spin_lock(&t->tqent_lock);
321
322 /*
323 * Mark it as a prealloc'd task. This is important
324 * to ensure that we don't free it later.
325 */
326 t->tqent_flags |= TQENT_FLAG_PREALLOC;
327
328 /* Queue to the priority list instead of the pending list */
329 if (flags & TQ_FRONT)
330 list_add_tail(&t->tqent_list, &tq->tq_prio_list);
331 else
332 list_add_tail(&t->tqent_list, &tq->tq_pend_list);
333
334 t->tqent_id = tq->tq_next_id;
335 tq->tq_next_id++;
336 t->tqent_func = func;
337 t->tqent_arg = arg;
338
339 spin_unlock(&t->tqent_lock);
340
341 wake_up(&tq->tq_work_waitq);
342out:
0bb43ca2 343 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
44217f7a
PS
344 SEXIT;
345}
346EXPORT_SYMBOL(__taskq_dispatch_ent);
347
348int
349__taskq_empty_ent(taskq_ent_t *t)
350{
351 return list_empty(&t->tqent_list);
352}
353EXPORT_SYMBOL(__taskq_empty_ent);
354
355void
356__taskq_init_ent(taskq_ent_t *t)
357{
358 spin_lock_init(&t->tqent_lock);
359 INIT_LIST_HEAD(&t->tqent_list);
360 t->tqent_id = 0;
361 t->tqent_func = NULL;
362 t->tqent_arg = NULL;
363 t->tqent_flags = 0;
364}
365EXPORT_SYMBOL(__taskq_init_ent);
366
82387586
BB
367/*
368 * Returns the lowest incomplete taskqid_t. The taskqid_t may
f0d8bb26
NB
369 * be queued on the pending list, on the priority list, or on
370 * the work list currently being handled, but it is not 100%
371 * complete yet.
82387586 372 */
bcd68186 373static taskqid_t
374taskq_lowest_id(taskq_t *tq)
375{
7257ec41 376 taskqid_t lowest_id = tq->tq_next_id;
472a34ca 377 taskq_ent_t *t;
2c02b71b 378 taskq_thread_t *tqt;
b17edc10 379 SENTRY;
bcd68186 380
381 ASSERT(tq);
382 ASSERT(spin_is_locked(&tq->tq_lock));
383
82387586 384 if (!list_empty(&tq->tq_pend_list)) {
046a70c9
PS
385 t = list_entry(tq->tq_pend_list.next, taskq_ent_t, tqent_list);
386 lowest_id = MIN(lowest_id, t->tqent_id);
82387586 387 }
bcd68186 388
f0d8bb26 389 if (!list_empty(&tq->tq_prio_list)) {
046a70c9
PS
390 t = list_entry(tq->tq_prio_list.next, taskq_ent_t, tqent_list);
391 lowest_id = MIN(lowest_id, t->tqent_id);
f0d8bb26
NB
392 }
393
2c02b71b
PS
394 if (!list_empty(&tq->tq_active_list)) {
395 tqt = list_entry(tq->tq_active_list.next, taskq_thread_t,
472a34ca 396 tqt_active_list);
e7e5f78e
PS
397 ASSERT(tqt->tqt_id != 0);
398 lowest_id = MIN(lowest_id, tqt->tqt_id);
82387586 399 }
bcd68186 400
b17edc10 401 SRETURN(lowest_id);
bcd68186 402}
403
f0d8bb26 404/*
472a34ca 405 * Insert a task into a list keeping the list sorted by increasing taskqid.
f0d8bb26
NB
406 */
407static void
2c02b71b 408taskq_insert_in_order(taskq_t *tq, taskq_thread_t *tqt)
f0d8bb26 409{
2c02b71b 410 taskq_thread_t *w;
f0d8bb26
NB
411 struct list_head *l;
412
b17edc10 413 SENTRY;
f0d8bb26 414 ASSERT(tq);
2c02b71b 415 ASSERT(tqt);
f0d8bb26
NB
416 ASSERT(spin_is_locked(&tq->tq_lock));
417
2c02b71b
PS
418 list_for_each_prev(l, &tq->tq_active_list) {
419 w = list_entry(l, taskq_thread_t, tqt_active_list);
e7e5f78e 420 if (w->tqt_id < tqt->tqt_id) {
2c02b71b 421 list_add(&tqt->tqt_active_list, l);
f0d8bb26
NB
422 break;
423 }
424 }
2c02b71b
PS
425 if (l == &tq->tq_active_list)
426 list_add(&tqt->tqt_active_list, &tq->tq_active_list);
f0d8bb26 427
b17edc10 428 SEXIT;
f0d8bb26
NB
429}
430
bcd68186 431static int
432taskq_thread(void *args)
433{
472a34ca
BB
434 DECLARE_WAITQUEUE(wait, current);
435 sigset_t blocked;
2c02b71b 436 taskq_thread_t *tqt = args;
472a34ca
BB
437 taskq_t *tq;
438 taskq_ent_t *t;
f0d8bb26 439 struct list_head *pend_list;
b17edc10 440 SENTRY;
bcd68186 441
472a34ca 442 ASSERT(tqt);
2c02b71b 443 tq = tqt->tqt_tq;
472a34ca 444 current->flags |= PF_NOFREEZE;
bcd68186 445
472a34ca
BB
446 sigfillset(&blocked);
447 sigprocmask(SIG_BLOCK, &blocked, NULL);
448 flush_signals(current);
bcd68186 449
472a34ca
BB
450 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
451 tq->tq_nthreads++;
452 wake_up(&tq->tq_wait_waitq);
453 set_current_state(TASK_INTERRUPTIBLE);
bcd68186 454
472a34ca 455 while (!kthread_should_stop()) {
bcd68186 456
f0d8bb26
NB
457 if (list_empty(&tq->tq_pend_list) &&
458 list_empty(&tq->tq_prio_list)) {
3c6ed541 459 add_wait_queue_exclusive(&tq->tq_work_waitq, &wait);
749045bb 460 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
bcd68186 461 schedule();
749045bb 462 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
3c6ed541 463 remove_wait_queue(&tq->tq_work_waitq, &wait);
bcd68186 464 } else {
465 __set_current_state(TASK_RUNNING);
466 }
467
f0d8bb26
NB
468
469 if (!list_empty(&tq->tq_prio_list))
470 pend_list = &tq->tq_prio_list;
471 else if (!list_empty(&tq->tq_pend_list))
472 pend_list = &tq->tq_pend_list;
473 else
474 pend_list = NULL;
475
476 if (pend_list) {
472a34ca
BB
477 t = list_entry(pend_list->next,taskq_ent_t,tqent_list);
478 list_del_init(&t->tqent_list);
8f2503e0 479
44217f7a
PS
480 /* In order to support recursively dispatching a
481 * preallocated taskq_ent_t, tqent_id must be
482 * stored prior to executing tqent_func. */
e7e5f78e 483 tqt->tqt_id = t->tqent_id;
8f2503e0
PS
484
485 /* We must store a copy of the flags prior to
486 * servicing the task (servicing a prealloc'd task
487 * returns the ownership of the tqent back to
488 * the caller of taskq_dispatch). Thus,
489 * tqent_flags _may_ change within the call. */
490 tqt->tqt_flags = t->tqent_flags;
491
2c02b71b 492 taskq_insert_in_order(tq, tqt);
472a34ca 493 tq->tq_nactive++;
749045bb 494 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
bcd68186 495
496 /* Perform the requested task */
472a34ca 497 t->tqent_func(t->tqent_arg);
bcd68186 498
749045bb 499 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
472a34ca 500 tq->tq_nactive--;
2c02b71b 501 list_del_init(&tqt->tqt_active_list);
8f2503e0
PS
502
503 /* For prealloc'd tasks, we don't free anything. */
504 if ((tq->tq_flags & TASKQ_DYNAMIC) ||
505 !(tqt->tqt_flags & TQENT_FLAG_PREALLOC))
506 task_done(tq, t);
bcd68186 507
7257ec41
BB
508 /* When the current lowest outstanding taskqid is
509 * done calculate the new lowest outstanding id */
e7e5f78e 510 if (tq->tq_lowest_id == tqt->tqt_id) {
bcd68186 511 tq->tq_lowest_id = taskq_lowest_id(tq);
e7e5f78e 512 ASSERT3S(tq->tq_lowest_id, >, tqt->tqt_id);
bcd68186 513 }
514
e7e5f78e 515 tqt->tqt_id = 0;
8f2503e0 516 tqt->tqt_flags = 0;
472a34ca 517 wake_up_all(&tq->tq_wait_waitq);
bcd68186 518 }
519
520 set_current_state(TASK_INTERRUPTIBLE);
521
472a34ca 522 }
bcd68186 523
524 __set_current_state(TASK_RUNNING);
472a34ca 525 tq->tq_nthreads--;
2c02b71b
PS
526 list_del_init(&tqt->tqt_thread_list);
527 kmem_free(tqt, sizeof(taskq_thread_t));
528
472a34ca 529 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
bcd68186 530
b17edc10 531 SRETURN(0);
bcd68186 532}
533
f1ca4da6 534taskq_t *
535__taskq_create(const char *name, int nthreads, pri_t pri,
472a34ca 536 int minalloc, int maxalloc, uint_t flags)
f1ca4da6 537{
472a34ca 538 taskq_t *tq;
2c02b71b 539 taskq_thread_t *tqt;
472a34ca
BB
540 int rc = 0, i, j = 0;
541 SENTRY;
bcd68186 542
472a34ca
BB
543 ASSERT(name != NULL);
544 ASSERT(pri <= maxclsyspri);
545 ASSERT(minalloc >= 0);
546 ASSERT(maxalloc <= INT_MAX);
547 ASSERT(!(flags & (TASKQ_CPR_SAFE | TASKQ_DYNAMIC))); /* Unsupported */
bcd68186 548
915404bd
BB
549 /* Scale the number of threads using nthreads as a percentage */
550 if (flags & TASKQ_THREADS_CPU_PCT) {
551 ASSERT(nthreads <= 100);
552 ASSERT(nthreads >= 0);
553 nthreads = MIN(nthreads, 100);
554 nthreads = MAX(nthreads, 0);
555 nthreads = MAX((num_online_cpus() * nthreads) / 100, 1);
556 }
557
472a34ca
BB
558 tq = kmem_alloc(sizeof(*tq), KM_PUSHPAGE);
559 if (tq == NULL)
560 SRETURN(NULL);
bcd68186 561
472a34ca
BB
562 spin_lock_init(&tq->tq_lock);
563 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
564 INIT_LIST_HEAD(&tq->tq_thread_list);
565 INIT_LIST_HEAD(&tq->tq_active_list);
566 tq->tq_name = name;
567 tq->tq_nactive = 0;
bcd68186 568 tq->tq_nthreads = 0;
472a34ca
BB
569 tq->tq_pri = pri;
570 tq->tq_minalloc = minalloc;
571 tq->tq_maxalloc = maxalloc;
bcd68186 572 tq->tq_nalloc = 0;
472a34ca 573 tq->tq_flags = (flags | TQ_ACTIVE);
bcd68186 574 tq->tq_next_id = 1;
575 tq->tq_lowest_id = 1;
472a34ca
BB
576 INIT_LIST_HEAD(&tq->tq_free_list);
577 INIT_LIST_HEAD(&tq->tq_pend_list);
578 INIT_LIST_HEAD(&tq->tq_prio_list);
579 init_waitqueue_head(&tq->tq_work_waitq);
580 init_waitqueue_head(&tq->tq_wait_waitq);
bcd68186 581
472a34ca
BB
582 if (flags & TASKQ_PREPOPULATE)
583 for (i = 0; i < minalloc; i++)
584 task_done(tq, task_alloc(tq, TQ_PUSHPAGE | TQ_NEW));
6e605b6e 585
472a34ca 586 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
6e605b6e 587
2c02b71b 588 for (i = 0; i < nthreads; i++) {
9b51f218 589 tqt = kmem_alloc(sizeof(*tqt), KM_PUSHPAGE);
2c02b71b
PS
590 INIT_LIST_HEAD(&tqt->tqt_thread_list);
591 INIT_LIST_HEAD(&tqt->tqt_active_list);
592 tqt->tqt_tq = tq;
e7e5f78e 593 tqt->tqt_id = 0;
2c02b71b
PS
594
595 tqt->tqt_thread = kthread_create(taskq_thread, tqt,
472a34ca 596 "%s/%d", name, i);
2c02b71b
PS
597 if (tqt->tqt_thread) {
598 list_add(&tqt->tqt_thread_list, &tq->tq_thread_list);
599 kthread_bind(tqt->tqt_thread, i % num_online_cpus());
600 set_user_nice(tqt->tqt_thread, PRIO_TO_NICE(pri));
601 wake_up_process(tqt->tqt_thread);
bcd68186 602 j++;
2c02b71b
PS
603 } else {
604 kmem_free(tqt, sizeof(taskq_thread_t));
605 rc = 1;
606 }
607 }
bcd68186 608
472a34ca 609 /* Wait for all threads to be started before potential destroy */
bcd68186 610 wait_event(tq->tq_wait_waitq, tq->tq_nthreads == j);
611
472a34ca
BB
612 if (rc) {
613 __taskq_destroy(tq);
614 tq = NULL;
615 }
bcd68186 616
472a34ca 617 SRETURN(tq);
f1ca4da6 618}
f1b59d26 619EXPORT_SYMBOL(__taskq_create);
b123971f 620
621void
622__taskq_destroy(taskq_t *tq)
623{
2c02b71b
PS
624 struct task_struct *thread;
625 taskq_thread_t *tqt;
046a70c9 626 taskq_ent_t *t;
b17edc10 627 SENTRY;
b123971f 628
bcd68186 629 ASSERT(tq);
749045bb 630 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
472a34ca 631 tq->tq_flags &= ~TQ_ACTIVE;
749045bb 632 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
bcd68186 633
634 /* TQ_ACTIVE cleared prevents new tasks being added to pending */
472a34ca 635 __taskq_wait(tq);
bcd68186 636
472a34ca 637 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
bcd68186 638
2c02b71b
PS
639 /*
640 * Signal each thread to exit and block until it does. Each thread
641 * is responsible for removing itself from the list and freeing its
642 * taskq_thread_t. This allows for idle threads to opt to remove
643 * themselves from the taskq. They can be recreated as needed.
644 */
645 while (!list_empty(&tq->tq_thread_list)) {
646 tqt = list_entry(tq->tq_thread_list.next,
647 taskq_thread_t, tqt_thread_list);
648 thread = tqt->tqt_thread;
649 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
650
651 kthread_stop(thread);
652
472a34ca 653 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
2c02b71b
PS
654 }
655
472a34ca 656 while (!list_empty(&tq->tq_free_list)) {
046a70c9 657 t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
44217f7a
PS
658
659 ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
660
472a34ca
BB
661 list_del_init(&t->tqent_list);
662 task_free(tq, t);
663 }
bcd68186 664
472a34ca
BB
665 ASSERT(tq->tq_nthreads == 0);
666 ASSERT(tq->tq_nalloc == 0);
667 ASSERT(list_empty(&tq->tq_thread_list));
668 ASSERT(list_empty(&tq->tq_active_list));
669 ASSERT(list_empty(&tq->tq_free_list));
670 ASSERT(list_empty(&tq->tq_pend_list));
671 ASSERT(list_empty(&tq->tq_prio_list));
bcd68186 672
472a34ca 673 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
2c02b71b 674
472a34ca 675 kmem_free(tq, sizeof(taskq_t));
bcd68186 676
b17edc10 677 SEXIT;
b123971f 678}
bcd68186 679EXPORT_SYMBOL(__taskq_destroy);
e9cb2b4f
BB
680
681int
682spl_taskq_init(void)
683{
472a34ca 684 SENTRY;
e9cb2b4f 685
f220894e
BB
686 /* Solaris creates a dynamic taskq of up to 64 threads, however in
687 * a Linux environment 1 thread per-core is usually about right */
472a34ca 688 system_taskq = taskq_create("spl_system_taskq", num_online_cpus(),
f220894e 689 minclsyspri, 4, 512, TASKQ_PREPOPULATE);
e9cb2b4f 690 if (system_taskq == NULL)
b17edc10 691 SRETURN(1);
e9cb2b4f 692
472a34ca 693 SRETURN(0);
e9cb2b4f
BB
694}
695
696void
697spl_taskq_fini(void)
698{
472a34ca 699 SENTRY;
e9cb2b4f 700 taskq_destroy(system_taskq);
472a34ca 701 SEXIT;
e9cb2b4f 702}