]> git.proxmox.com Git - mirror_spl.git/blobdiff - module/spl/spl-taskq.c
Remove TQ_SLEEP -> KM_SLEEP mapping
[mirror_spl.git] / module / spl / spl-taskq.c
index 50d32e021ae98e983b754bb56e4168bbe573ba76..7ea20461b1aae21e81bd67cc3905422d8a494aa5 100644 (file)
 taskq_t *system_taskq;
 EXPORT_SYMBOL(system_taskq);
 
-typedef struct taskq_ent {
-        spinlock_t              tqent_lock;
-        struct list_head        tqent_list;
-        taskqid_t               tqent_id;
-        task_func_t             *tqent_func;
-        void                    *tqent_arg;
-} taskq_ent_t;
+static int
+task_km_flags(uint_t flags)
+{
+       if (flags & TQ_NOSLEEP)
+               return KM_NOSLEEP;
+
+       if (flags & TQ_PUSHPAGE)
+               return KM_PUSHPAGE;
+
+       return KM_SLEEP;
+}
 
 /*
  * NOTE: Must be called with tq->tq_lock held, returns a list_t which
@@ -58,13 +62,14 @@ task_alloc(taskq_t *tq, uint_t flags)
         SENTRY;
 
         ASSERT(tq);
-        ASSERT(flags & (TQ_SLEEP | TQ_NOSLEEP));               /* One set */
-        ASSERT(!((flags & TQ_SLEEP) && (flags & TQ_NOSLEEP))); /* Not both */
         ASSERT(spin_is_locked(&tq->tq_lock));
 retry:
         /* Acquire taskq_ent_t's from free list if available */
         if (!list_empty(&tq->tq_free_list) && !(flags & TQ_NEW)) {
                 t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
+
+                ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
+
                 list_del_init(&t->tqent_list);
                 SRETURN(t);
         }
@@ -97,15 +102,11 @@ retry:
         }
 
         spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
-        t = kmem_alloc(sizeof(taskq_ent_t), flags & (TQ_SLEEP | TQ_NOSLEEP));
+        t = kmem_alloc(sizeof(taskq_ent_t), task_km_flags(flags));
         spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
 
         if (t) {
-                spin_lock_init(&t->tqent_lock);
-                INIT_LIST_HEAD(&t->tqent_list);
-                t->tqent_id = 0;
-                t->tqent_func = NULL;
-                t->tqent_arg = NULL;
+                taskq_init_ent(t);
                 tq->tq_nalloc++;
         }
 
@@ -150,6 +151,8 @@ task_done(taskq_t *tq, taskq_ent_t *t)
                t->tqent_id = 0;
                t->tqent_func = NULL;
                t->tqent_arg = NULL;
+               t->tqent_flags = 0;
+
                 list_add_tail(&t->tqent_list, &tq->tq_free_list);
        } else {
                task_free(tq, t);
@@ -163,19 +166,22 @@ task_done(taskq_t *tq, taskq_ent_t *t)
  * monotonically increasing taskqid and added to the tail of the pending
  * list.  As worker threads become available the tasks are removed from
  * the head of the pending or priority list, giving preference to the
- * priority list.  The tasks are then added to the work list, preserving
- * the ordering by taskqid.  Finally, as tasks complete they are removed
- * from the work list.  This means that the pending and work lists are
- * always kept sorted by taskqid.  Thus the lowest outstanding
- * incomplete taskqid can be determined simply by checking the min
- * taskqid for each head item on the pending, priority, and work list.
- * This value is stored in tq->tq_lowest_id and only updated to the new
- * lowest id when the previous lowest id completes.  All taskqids lower
- * than tq->tq_lowest_id must have completed.  It is also possible
- * larger taskqid's have completed because they may be processed in
- * parallel by several worker threads.  However, this is not a problem
- * because the behavior of taskq_wait_id() is to block until all
- * previously submitted taskqid's have completed.
+ * priority list.  The tasks are then removed from their respective
+ * list, and the taskq_thread servicing the task is added to the active
+ * list, preserving the order using the serviced task's taskqid.
+ * Finally, as tasks complete the taskq_thread servicing the task is
+ * removed from the active list.  This means that the pending task and
+ * active taskq_thread lists are always kept sorted by taskqid. Thus the
+ * lowest outstanding incomplete taskqid can be determined simply by
+ * checking the min taskqid for each head item on the pending, priority,
+ * and active taskq_thread list. This value is stored in
+ * tq->tq_lowest_id and only updated to the new lowest id when the
+ * previous lowest id completes.  All taskqids lower than
+ * tq->tq_lowest_id must have completed.  It is also possible larger
+ * taskqid's have completed because they may be processed in parallel by
+ * several worker threads.  However, this is not a problem because the
+ * behavior of taskq_wait_id() is to block until all previously
+ * submitted taskqid's have completed.
  *
  * XXX: Taskqid_t wrapping is not handled.  However, taskqid_t's are
  * 64-bit values so even if a taskq is processing 2^24 (16,777,216)
@@ -228,15 +234,18 @@ EXPORT_SYMBOL(__taskq_wait);
 int
 __taskq_member(taskq_t *tq, void *t)
 {
-        int i;
+       struct list_head *l;
+       taskq_thread_t *tqt;
         SENTRY;
 
        ASSERT(tq);
         ASSERT(t);
 
-        for (i = 0; i < tq->tq_nthreads; i++)
-                if (tq->tq_threads[i] == (struct task_struct *)t)
-                        SRETURN(1);
+       list_for_each(l, &tq->tq_thread_list) {
+               tqt = list_entry(l, taskq_thread_t, tqt_thread_list);
+               if (tqt->tqt_thread == (struct task_struct *)t)
+                       SRETURN(1);
+       }
 
         SRETURN(0);
 }
@@ -252,14 +261,6 @@ __taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
         ASSERT(tq);
         ASSERT(func);
 
-       /* Solaris assumes TQ_SLEEP if not passed explicitly */
-       if (!(flags & (TQ_SLEEP | TQ_NOSLEEP)))
-               flags |= TQ_SLEEP;
-
-       if (unlikely(in_atomic() && (flags & TQ_SLEEP)))
-               PANIC("May schedule while atomic: %s/0x%08x/%d\n",
-                   current->comm, preempt_count(), current->pid);
-
         spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
 
        /* Taskq being destroyed and all tasks drained */
@@ -286,6 +287,9 @@ __taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
        tq->tq_next_id++;
         t->tqent_func = func;
         t->tqent_arg = arg;
+
+       ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
+
        spin_unlock(&t->tqent_lock);
 
        wake_up(&tq->tq_work_waitq);
@@ -294,6 +298,72 @@ out:
        SRETURN(rc);
 }
 EXPORT_SYMBOL(__taskq_dispatch);
+
+void
+__taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
+   taskq_ent_t *t)
+{
+       SENTRY;
+
+       ASSERT(tq);
+       ASSERT(func);
+       ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC));
+
+       spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+
+       /* Taskq being destroyed and all tasks drained */
+       if (!(tq->tq_flags & TQ_ACTIVE)) {
+               t->tqent_id = 0;
+               goto out;
+       }
+
+       spin_lock(&t->tqent_lock);
+
+       /*
+        * Mark it as a prealloc'd task.  This is important
+        * to ensure that we don't free it later.
+        */
+       t->tqent_flags |= TQENT_FLAG_PREALLOC;
+
+       /* Queue to the priority list instead of the pending list */
+       if (flags & TQ_FRONT)
+               list_add_tail(&t->tqent_list, &tq->tq_prio_list);
+       else
+               list_add_tail(&t->tqent_list, &tq->tq_pend_list);
+
+       t->tqent_id = tq->tq_next_id;
+       tq->tq_next_id++;
+       t->tqent_func = func;
+       t->tqent_arg = arg;
+
+       spin_unlock(&t->tqent_lock);
+
+       wake_up(&tq->tq_work_waitq);
+out:
+       spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+       SEXIT;
+}
+EXPORT_SYMBOL(__taskq_dispatch_ent);
+
+int
+__taskq_empty_ent(taskq_ent_t *t)
+{
+       return list_empty(&t->tqent_list);
+}
+EXPORT_SYMBOL(__taskq_empty_ent);
+
+void
+__taskq_init_ent(taskq_ent_t *t)
+{
+       spin_lock_init(&t->tqent_lock);
+       INIT_LIST_HEAD(&t->tqent_list);
+       t->tqent_id = 0;
+       t->tqent_func = NULL;
+       t->tqent_arg = NULL;
+       t->tqent_flags = 0;
+}
+EXPORT_SYMBOL(__taskq_init_ent);
+
 /*
  * Returns the lowest incomplete taskqid_t.  The taskqid_t may
  * be queued on the pending list, on the priority list,  or on
@@ -305,6 +375,7 @@ taskq_lowest_id(taskq_t *tq)
 {
        taskqid_t lowest_id = tq->tq_next_id;
         taskq_ent_t *t;
+       taskq_thread_t *tqt;
        SENTRY;
 
        ASSERT(tq);
@@ -320,9 +391,11 @@ taskq_lowest_id(taskq_t *tq)
                lowest_id = MIN(lowest_id, t->tqent_id);
        }
 
-       if (!list_empty(&tq->tq_work_list)) {
-               t = list_entry(tq->tq_work_list.next, taskq_ent_t, tqent_list);
-               lowest_id = MIN(lowest_id, t->tqent_id);
+       if (!list_empty(&tq->tq_active_list)) {
+               tqt = list_entry(tq->tq_active_list.next, taskq_thread_t,
+                                tqt_active_list);
+               ASSERT(tqt->tqt_id != 0);
+               lowest_id = MIN(lowest_id, tqt->tqt_id);
        }
 
        SRETURN(lowest_id);
@@ -333,25 +406,25 @@ taskq_lowest_id(taskq_t *tq)
  * taskqid.
  */
 static void
-taskq_insert_in_order(taskq_t *tq, taskq_ent_t *t)
+taskq_insert_in_order(taskq_t *tq, taskq_thread_t *tqt)
 {
-       taskq_ent_t *w;
+       taskq_thread_t *w;
        struct list_head *l;
 
        SENTRY;
        ASSERT(tq);
-       ASSERT(t);
+       ASSERT(tqt);
        ASSERT(spin_is_locked(&tq->tq_lock));
 
-       list_for_each_prev(l, &tq->tq_work_list) {
-               w = list_entry(l, taskq_ent_t, tqent_list);
-               if (w->tqent_id < t->tqent_id) {
-                       list_add(&t->tqent_list, l);
+       list_for_each_prev(l, &tq->tq_active_list) {
+               w = list_entry(l, taskq_thread_t, tqt_active_list);
+               if (w->tqt_id < tqt->tqt_id) {
+                       list_add(&tqt->tqt_active_list, l);
                        break;
                }
        }
-       if (l == &tq->tq_work_list)
-               list_add(&t->tqent_list, &tq->tq_work_list);
+       if (l == &tq->tq_active_list)
+               list_add(&tqt->tqt_active_list, &tq->tq_active_list);
 
        SEXIT;
 }
@@ -361,19 +434,16 @@ taskq_thread(void *args)
 {
         DECLARE_WAITQUEUE(wait, current);
         sigset_t blocked;
-       taskqid_t id;
-        taskq_t *tq = args;
+       taskq_thread_t *tqt = args;
+        taskq_t *tq;
         taskq_ent_t *t;
        struct list_head *pend_list;
        SENTRY;
 
-        ASSERT(tq);
+        ASSERT(tqt);
+       tq = tqt->tqt_tq;
         current->flags |= PF_NOFREEZE;
 
-       /* Disable the direct memory reclaim path */
-       if (tq->tq_flags & TASKQ_NORECLAIM)
-               current->flags |= PF_MEMALLOC;
-
         sigfillset(&blocked);
         sigprocmask(SIG_BLOCK, &blocked, NULL);
         flush_signals(current);
@@ -385,17 +455,17 @@ taskq_thread(void *args)
 
         while (!kthread_should_stop()) {
 
-               add_wait_queue(&tq->tq_work_waitq, &wait);
                if (list_empty(&tq->tq_pend_list) &&
                    list_empty(&tq->tq_prio_list)) {
+                       add_wait_queue_exclusive(&tq->tq_work_waitq, &wait);
                        spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
                        schedule();
                        spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+                       remove_wait_queue(&tq->tq_work_waitq, &wait);
                } else {
                        __set_current_state(TASK_RUNNING);
                }
 
-               remove_wait_queue(&tq->tq_work_waitq, &wait);
 
                if (!list_empty(&tq->tq_prio_list))
                        pend_list = &tq->tq_prio_list;
@@ -407,7 +477,20 @@ taskq_thread(void *args)
                if (pend_list) {
                         t = list_entry(pend_list->next, taskq_ent_t, tqent_list);
                         list_del_init(&t->tqent_list);
-                       taskq_insert_in_order(tq, t);
+
+                       /* In order to support recursively dispatching a
+                        * preallocated taskq_ent_t, tqent_id must be
+                        * stored prior to executing tqent_func. */
+                       tqt->tqt_id = t->tqent_id;
+
+                       /* We must store a copy of the flags prior to
+                        * servicing the task (servicing a prealloc'd task
+                        * returns the ownership of the tqent back to
+                        * the caller of taskq_dispatch). Thus,
+                        * tqent_flags _may_ change within the call. */
+                       tqt->tqt_flags = t->tqent_flags;
+
+                       taskq_insert_in_order(tq, tqt);
                         tq->tq_nactive++;
                        spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
 
@@ -416,16 +499,22 @@ taskq_thread(void *args)
 
                        spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
                         tq->tq_nactive--;
-                       id = t->tqent_id;
-                        task_done(tq, t);
+                       list_del_init(&tqt->tqt_active_list);
+
+                       /* For prealloc'd tasks, we don't free anything. */
+                       if ((tq->tq_flags & TASKQ_DYNAMIC) ||
+                           !(tqt->tqt_flags & TQENT_FLAG_PREALLOC))
+                               task_done(tq, t);
 
                        /* When the current lowest outstanding taskqid is
                         * done calculate the new lowest outstanding id */
-                       if (tq->tq_lowest_id == id) {
+                       if (tq->tq_lowest_id == tqt->tqt_id) {
                                tq->tq_lowest_id = taskq_lowest_id(tq);
-                               ASSERT(tq->tq_lowest_id > id);
+                               ASSERT3S(tq->tq_lowest_id, >, tqt->tqt_id);
                        }
 
+                       tqt->tqt_id = 0;
+                       tqt->tqt_flags = 0;
                         wake_up_all(&tq->tq_wait_waitq);
                }
 
@@ -435,6 +524,9 @@ taskq_thread(void *args)
 
        __set_current_state(TASK_RUNNING);
         tq->tq_nthreads--;
+       list_del_init(&tqt->tqt_thread_list);
+       kmem_free(tqt, sizeof(taskq_thread_t));
+
         spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
 
        SRETURN(0);
@@ -445,7 +537,7 @@ __taskq_create(const char *name, int nthreads, pri_t pri,
                int minalloc, int maxalloc, uint_t flags)
 {
         taskq_t *tq;
-        struct task_struct *t;
+       taskq_thread_t *tqt;
         int rc = 0, i, j = 0;
         SENTRY;
 
@@ -464,18 +556,14 @@ __taskq_create(const char *name, int nthreads, pri_t pri,
                nthreads = MAX((num_online_cpus() * nthreads) / 100, 1);
        }
 
-        tq = kmem_alloc(sizeof(*tq), KM_SLEEP);
+        tq = kmem_alloc(sizeof(*tq), KM_PUSHPAGE);
         if (tq == NULL)
                 SRETURN(NULL);
 
-        tq->tq_threads = kmem_alloc(nthreads * sizeof(t), KM_SLEEP);
-        if (tq->tq_threads == NULL) {
-                kmem_free(tq, sizeof(*tq));
-                SRETURN(NULL);
-        }
-
         spin_lock_init(&tq->tq_lock);
         spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+        INIT_LIST_HEAD(&tq->tq_thread_list);
+        INIT_LIST_HEAD(&tq->tq_active_list);
         tq->tq_name      = name;
         tq->tq_nactive   = 0;
        tq->tq_nthreads  = 0;
@@ -487,7 +575,6 @@ __taskq_create(const char *name, int nthreads, pri_t pri,
        tq->tq_next_id   = 1;
        tq->tq_lowest_id = 1;
         INIT_LIST_HEAD(&tq->tq_free_list);
-        INIT_LIST_HEAD(&tq->tq_work_list);
         INIT_LIST_HEAD(&tq->tq_pend_list);
         INIT_LIST_HEAD(&tq->tq_prio_list);
         init_waitqueue_head(&tq->tq_work_waitq);
@@ -495,23 +582,30 @@ __taskq_create(const char *name, int nthreads, pri_t pri,
 
         if (flags & TASKQ_PREPOPULATE)
                 for (i = 0; i < minalloc; i++)
-                        task_done(tq, task_alloc(tq, TQ_SLEEP | TQ_NEW));
+                        task_done(tq, task_alloc(tq, TQ_PUSHPAGE | TQ_NEW));
 
         spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
 
-        for (i = 0; i < nthreads; i++) {
-                t = kthread_create(taskq_thread, tq, "%s/%d", name, i);
-                if (t) {
-                        tq->tq_threads[i] = t;
-                        kthread_bind(t, i % num_online_cpus());
-                        set_user_nice(t, PRIO_TO_NICE(pri));
-                        wake_up_process(t);
+       for (i = 0; i < nthreads; i++) {
+               tqt = kmem_alloc(sizeof(*tqt), KM_PUSHPAGE);
+               INIT_LIST_HEAD(&tqt->tqt_thread_list);
+               INIT_LIST_HEAD(&tqt->tqt_active_list);
+               tqt->tqt_tq = tq;
+               tqt->tqt_id = 0;
+
+               tqt->tqt_thread = kthread_create(taskq_thread, tqt,
+                                                "%s/%d", name, i);
+               if (tqt->tqt_thread) {
+                       list_add(&tqt->tqt_thread_list, &tq->tq_thread_list);
+                       kthread_bind(tqt->tqt_thread, i % num_online_cpus());
+                       set_user_nice(tqt->tqt_thread, PRIO_TO_NICE(pri));
+                       wake_up_process(tqt->tqt_thread);
                        j++;
-                } else {
-                        tq->tq_threads[i] = NULL;
-                        rc = 1;
-                }
-        }
+               } else {
+                       kmem_free(tqt, sizeof(taskq_thread_t));
+                       rc = 1;
+               }
+       }
 
         /* Wait for all threads to be started before potential destroy */
        wait_event(tq->tq_wait_waitq, tq->tq_nthreads == j);
@@ -528,8 +622,9 @@ EXPORT_SYMBOL(__taskq_create);
 void
 __taskq_destroy(taskq_t *tq)
 {
+       struct task_struct *thread;
+       taskq_thread_t *tqt;
        taskq_ent_t *t;
-       int i, nthreads;
        SENTRY;
 
        ASSERT(tq);
@@ -540,28 +635,44 @@ __taskq_destroy(taskq_t *tq)
        /* TQ_ACTIVE cleared prevents new tasks being added to pending */
         __taskq_wait(tq);
 
-       nthreads = tq->tq_nthreads;
-       for (i = 0; i < nthreads; i++)
-               if (tq->tq_threads[i])
-                       kthread_stop(tq->tq_threads[i]);
-
         spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
 
+       /*
+        * Signal each thread to exit and block until it does.  Each thread
+        * is responsible for removing itself from the list and freeing its
+        * taskq_thread_t.  This allows for idle threads to opt to remove
+        * themselves from the taskq.  They can be recreated as needed.
+        */
+       while (!list_empty(&tq->tq_thread_list)) {
+               tqt = list_entry(tq->tq_thread_list.next,
+                                taskq_thread_t, tqt_thread_list);
+               thread = tqt->tqt_thread;
+               spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+
+               kthread_stop(thread);
+
+               spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+       }
+
         while (!list_empty(&tq->tq_free_list)) {
                t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
+
+               ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
+
                list_del_init(&t->tqent_list);
                 task_free(tq, t);
         }
 
         ASSERT(tq->tq_nthreads == 0);
         ASSERT(tq->tq_nalloc == 0);
+        ASSERT(list_empty(&tq->tq_thread_list));
+        ASSERT(list_empty(&tq->tq_active_list));
         ASSERT(list_empty(&tq->tq_free_list));
-        ASSERT(list_empty(&tq->tq_work_list));
         ASSERT(list_empty(&tq->tq_pend_list));
         ASSERT(list_empty(&tq->tq_prio_list));
 
         spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
-        kmem_free(tq->tq_threads, nthreads * sizeof(taskq_ent_t *));
+
         kmem_free(tq, sizeof(taskq_t));
 
        SEXIT;