]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blob - net/sunrpc/sched.c
SUNRPC: Set memalloc_nofs_save() for sync tasks
[mirror_ubuntu-focal-kernel.git] / net / sunrpc / sched.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * linux/net/sunrpc/sched.c
4 *
5 * Scheduling for synchronous and asynchronous RPC requests.
6 *
7 * Copyright (C) 1996 Olaf Kirch, <okir@monad.swb.de>
8 *
9 * TCP NFS related read + write fixes
10 * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie>
11 */
12
13 #include <linux/module.h>
14
15 #include <linux/sched.h>
16 #include <linux/interrupt.h>
17 #include <linux/slab.h>
18 #include <linux/mempool.h>
19 #include <linux/smp.h>
20 #include <linux/spinlock.h>
21 #include <linux/mutex.h>
22 #include <linux/freezer.h>
23 #include <linux/sched/mm.h>
24
25 #include <linux/sunrpc/clnt.h>
26 #include <linux/sunrpc/metrics.h>
27
28 #include "sunrpc.h"
29
30 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
31 #define RPCDBG_FACILITY RPCDBG_SCHED
32 #endif
33
34 #define CREATE_TRACE_POINTS
35 #include <trace/events/sunrpc.h>
36
37 /*
38 * RPC slabs and memory pools
39 */
40 #define RPC_BUFFER_MAXSIZE (2048)
41 #define RPC_BUFFER_POOLSIZE (8)
42 #define RPC_TASK_POOLSIZE (8)
43 static struct kmem_cache *rpc_task_slabp __read_mostly;
44 static struct kmem_cache *rpc_buffer_slabp __read_mostly;
45 static mempool_t *rpc_task_mempool __read_mostly;
46 static mempool_t *rpc_buffer_mempool __read_mostly;
47
48 static void rpc_async_schedule(struct work_struct *);
49 static void rpc_release_task(struct rpc_task *task);
50 static void __rpc_queue_timer_fn(struct work_struct *);
51
52 /*
53 * RPC tasks sit here while waiting for conditions to improve.
54 */
55 static struct rpc_wait_queue delay_queue;
56
57 /*
58 * rpciod-related stuff
59 */
60 struct workqueue_struct *rpciod_workqueue __read_mostly;
61 struct workqueue_struct *xprtiod_workqueue __read_mostly;
62 EXPORT_SYMBOL_GPL(xprtiod_workqueue);
63
64 unsigned long
65 rpc_task_timeout(const struct rpc_task *task)
66 {
67 unsigned long timeout = READ_ONCE(task->tk_timeout);
68
69 if (timeout != 0) {
70 unsigned long now = jiffies;
71 if (time_before(now, timeout))
72 return timeout - now;
73 }
74 return 0;
75 }
76 EXPORT_SYMBOL_GPL(rpc_task_timeout);
77
78 /*
79 * Disable the timer for a given RPC task. Should be called with
80 * queue->lock and bh_disabled in order to avoid races within
81 * rpc_run_timer().
82 */
83 static void
84 __rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
85 {
86 if (list_empty(&task->u.tk_wait.timer_list))
87 return;
88 dprintk("RPC: %5u disabling timer\n", task->tk_pid);
89 task->tk_timeout = 0;
90 list_del(&task->u.tk_wait.timer_list);
91 if (list_empty(&queue->timer_list.list))
92 cancel_delayed_work(&queue->timer_list.dwork);
93 }
94
95 static void
96 rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires)
97 {
98 unsigned long now = jiffies;
99 queue->timer_list.expires = expires;
100 if (time_before_eq(expires, now))
101 expires = 0;
102 else
103 expires -= now;
104 mod_delayed_work(rpciod_workqueue, &queue->timer_list.dwork, expires);
105 }
106
107 /*
108 * Set up a timer for the current task.
109 */
110 static void
111 __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task,
112 unsigned long timeout)
113 {
114 dprintk("RPC: %5u setting alarm for %u ms\n",
115 task->tk_pid, jiffies_to_msecs(timeout - jiffies));
116
117 task->tk_timeout = timeout;
118 if (list_empty(&queue->timer_list.list) || time_before(timeout, queue->timer_list.expires))
119 rpc_set_queue_timer(queue, timeout);
120 list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
121 }
122
123 static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
124 {
125 if (queue->priority != priority) {
126 queue->priority = priority;
127 queue->nr = 1U << priority;
128 }
129 }
130
131 static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
132 {
133 rpc_set_waitqueue_priority(queue, queue->maxpriority);
134 }
135
136 /*
137 * Add a request to a queue list
138 */
139 static void
140 __rpc_list_enqueue_task(struct list_head *q, struct rpc_task *task)
141 {
142 struct rpc_task *t;
143
144 list_for_each_entry(t, q, u.tk_wait.list) {
145 if (t->tk_owner == task->tk_owner) {
146 list_add_tail(&task->u.tk_wait.links,
147 &t->u.tk_wait.links);
148 /* Cache the queue head in task->u.tk_wait.list */
149 task->u.tk_wait.list.next = q;
150 task->u.tk_wait.list.prev = NULL;
151 return;
152 }
153 }
154 INIT_LIST_HEAD(&task->u.tk_wait.links);
155 list_add_tail(&task->u.tk_wait.list, q);
156 }
157
158 /*
159 * Remove request from a queue list
160 */
161 static void
162 __rpc_list_dequeue_task(struct rpc_task *task)
163 {
164 struct list_head *q;
165 struct rpc_task *t;
166
167 if (task->u.tk_wait.list.prev == NULL) {
168 list_del(&task->u.tk_wait.links);
169 return;
170 }
171 if (!list_empty(&task->u.tk_wait.links)) {
172 t = list_first_entry(&task->u.tk_wait.links,
173 struct rpc_task,
174 u.tk_wait.links);
175 /* Assume __rpc_list_enqueue_task() cached the queue head */
176 q = t->u.tk_wait.list.next;
177 list_add_tail(&t->u.tk_wait.list, q);
178 list_del(&task->u.tk_wait.links);
179 }
180 list_del(&task->u.tk_wait.list);
181 }
182
183 /*
184 * Add new request to a priority queue.
185 */
186 static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
187 struct rpc_task *task,
188 unsigned char queue_priority)
189 {
190 if (unlikely(queue_priority > queue->maxpriority))
191 queue_priority = queue->maxpriority;
192 __rpc_list_enqueue_task(&queue->tasks[queue_priority], task);
193 }
194
195 /*
196 * Add new request to wait queue.
197 *
198 * Swapper tasks always get inserted at the head of the queue.
199 * This should avoid many nasty memory deadlocks and hopefully
200 * improve overall performance.
201 * Everyone else gets appended to the queue to ensure proper FIFO behavior.
202 */
203 static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
204 struct rpc_task *task,
205 unsigned char queue_priority)
206 {
207 INIT_LIST_HEAD(&task->u.tk_wait.timer_list);
208 if (RPC_IS_PRIORITY(queue))
209 __rpc_add_wait_queue_priority(queue, task, queue_priority);
210 else if (RPC_IS_SWAPPER(task))
211 list_add(&task->u.tk_wait.list, &queue->tasks[0]);
212 else
213 list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
214 task->tk_waitqueue = queue;
215 queue->qlen++;
216 /* barrier matches the read in rpc_wake_up_task_queue_locked() */
217 smp_wmb();
218 rpc_set_queued(task);
219
220 dprintk("RPC: %5u added to queue %p \"%s\"\n",
221 task->tk_pid, queue, rpc_qname(queue));
222 }
223
224 /*
225 * Remove request from a priority queue.
226 */
227 static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
228 {
229 __rpc_list_dequeue_task(task);
230 }
231
232 /*
233 * Remove request from queue.
234 * Note: must be called with spin lock held.
235 */
236 static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
237 {
238 __rpc_disable_timer(queue, task);
239 if (RPC_IS_PRIORITY(queue))
240 __rpc_remove_wait_queue_priority(task);
241 else
242 list_del(&task->u.tk_wait.list);
243 queue->qlen--;
244 dprintk("RPC: %5u removed from queue %p \"%s\"\n",
245 task->tk_pid, queue, rpc_qname(queue));
246 }
247
248 static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues)
249 {
250 int i;
251
252 spin_lock_init(&queue->lock);
253 for (i = 0; i < ARRAY_SIZE(queue->tasks); i++)
254 INIT_LIST_HEAD(&queue->tasks[i]);
255 queue->maxpriority = nr_queues - 1;
256 rpc_reset_waitqueue_priority(queue);
257 queue->qlen = 0;
258 queue->timer_list.expires = 0;
259 INIT_DELAYED_WORK(&queue->timer_list.dwork, __rpc_queue_timer_fn);
260 INIT_LIST_HEAD(&queue->timer_list.list);
261 rpc_assign_waitqueue_name(queue, qname);
262 }
263
264 void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname)
265 {
266 __rpc_init_priority_wait_queue(queue, qname, RPC_NR_PRIORITY);
267 }
268 EXPORT_SYMBOL_GPL(rpc_init_priority_wait_queue);
269
270 void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname)
271 {
272 __rpc_init_priority_wait_queue(queue, qname, 1);
273 }
274 EXPORT_SYMBOL_GPL(rpc_init_wait_queue);
275
276 void rpc_destroy_wait_queue(struct rpc_wait_queue *queue)
277 {
278 cancel_delayed_work_sync(&queue->timer_list.dwork);
279 }
280 EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
281
282 static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode)
283 {
284 freezable_schedule_unsafe();
285 if (signal_pending_state(mode, current))
286 return -ERESTARTSYS;
287 return 0;
288 }
289
290 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS)
291 static void rpc_task_set_debuginfo(struct rpc_task *task)
292 {
293 static atomic_t rpc_pid;
294
295 task->tk_pid = atomic_inc_return(&rpc_pid);
296 }
297 #else
298 static inline void rpc_task_set_debuginfo(struct rpc_task *task)
299 {
300 }
301 #endif
302
303 static void rpc_set_active(struct rpc_task *task)
304 {
305 rpc_task_set_debuginfo(task);
306 set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
307 trace_rpc_task_begin(task, NULL);
308 }
309
310 /*
311 * Mark an RPC call as having completed by clearing the 'active' bit
312 * and then waking up all tasks that were sleeping.
313 */
314 static int rpc_complete_task(struct rpc_task *task)
315 {
316 void *m = &task->tk_runstate;
317 wait_queue_head_t *wq = bit_waitqueue(m, RPC_TASK_ACTIVE);
318 struct wait_bit_key k = __WAIT_BIT_KEY_INITIALIZER(m, RPC_TASK_ACTIVE);
319 unsigned long flags;
320 int ret;
321
322 trace_rpc_task_complete(task, NULL);
323
324 spin_lock_irqsave(&wq->lock, flags);
325 clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
326 ret = atomic_dec_and_test(&task->tk_count);
327 if (waitqueue_active(wq))
328 __wake_up_locked_key(wq, TASK_NORMAL, &k);
329 spin_unlock_irqrestore(&wq->lock, flags);
330 return ret;
331 }
332
333 /*
334 * Allow callers to wait for completion of an RPC call
335 *
336 * Note the use of out_of_line_wait_on_bit() rather than wait_on_bit()
337 * to enforce taking of the wq->lock and hence avoid races with
338 * rpc_complete_task().
339 */
340 int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *action)
341 {
342 if (action == NULL)
343 action = rpc_wait_bit_killable;
344 return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
345 action, TASK_KILLABLE);
346 }
347 EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
348
349 /*
350 * Make an RPC task runnable.
351 *
352 * Note: If the task is ASYNC, and is being made runnable after sitting on an
353 * rpc_wait_queue, this must be called with the queue spinlock held to protect
354 * the wait queue operation.
355 * Note the ordering of rpc_test_and_set_running() and rpc_clear_queued(),
356 * which is needed to ensure that __rpc_execute() doesn't loop (due to the
357 * lockless RPC_IS_QUEUED() test) before we've had a chance to test
358 * the RPC_TASK_RUNNING flag.
359 */
360 static void rpc_make_runnable(struct workqueue_struct *wq,
361 struct rpc_task *task)
362 {
363 bool need_wakeup = !rpc_test_and_set_running(task);
364
365 rpc_clear_queued(task);
366 if (!need_wakeup)
367 return;
368 if (RPC_IS_ASYNC(task)) {
369 INIT_WORK(&task->u.tk_work, rpc_async_schedule);
370 queue_work(wq, &task->u.tk_work);
371 } else
372 wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED);
373 }
374
375 /*
376 * Prepare for sleeping on a wait queue.
377 * By always appending tasks to the list we ensure FIFO behavior.
378 * NB: An RPC task will only receive interrupt-driven events as long
379 * as it's on a wait queue.
380 */
381 static void __rpc_do_sleep_on_priority(struct rpc_wait_queue *q,
382 struct rpc_task *task,
383 unsigned char queue_priority)
384 {
385 dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
386 task->tk_pid, rpc_qname(q), jiffies);
387
388 trace_rpc_task_sleep(task, q);
389
390 __rpc_add_wait_queue(q, task, queue_priority);
391
392 }
393
394 static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
395 struct rpc_task *task,
396 unsigned char queue_priority)
397 {
398 if (WARN_ON_ONCE(RPC_IS_QUEUED(task)))
399 return;
400 __rpc_do_sleep_on_priority(q, task, queue_priority);
401 }
402
403 static void __rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q,
404 struct rpc_task *task, unsigned long timeout,
405 unsigned char queue_priority)
406 {
407 if (WARN_ON_ONCE(RPC_IS_QUEUED(task)))
408 return;
409 if (time_is_after_jiffies(timeout)) {
410 __rpc_do_sleep_on_priority(q, task, queue_priority);
411 __rpc_add_timer(q, task, timeout);
412 } else
413 task->tk_status = -ETIMEDOUT;
414 }
415
416 static void rpc_set_tk_callback(struct rpc_task *task, rpc_action action)
417 {
418 if (action && !WARN_ON_ONCE(task->tk_callback != NULL))
419 task->tk_callback = action;
420 }
421
422 static bool rpc_sleep_check_activated(struct rpc_task *task)
423 {
424 /* We shouldn't ever put an inactive task to sleep */
425 if (WARN_ON_ONCE(!RPC_IS_ACTIVATED(task))) {
426 task->tk_status = -EIO;
427 rpc_put_task_async(task);
428 return false;
429 }
430 return true;
431 }
432
433 void rpc_sleep_on_timeout(struct rpc_wait_queue *q, struct rpc_task *task,
434 rpc_action action, unsigned long timeout)
435 {
436 if (!rpc_sleep_check_activated(task))
437 return;
438
439 rpc_set_tk_callback(task, action);
440
441 /*
442 * Protect the queue operations.
443 */
444 spin_lock(&q->lock);
445 __rpc_sleep_on_priority_timeout(q, task, timeout, task->tk_priority);
446 spin_unlock(&q->lock);
447 }
448 EXPORT_SYMBOL_GPL(rpc_sleep_on_timeout);
449
450 void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
451 rpc_action action)
452 {
453 if (!rpc_sleep_check_activated(task))
454 return;
455
456 rpc_set_tk_callback(task, action);
457
458 WARN_ON_ONCE(task->tk_timeout != 0);
459 /*
460 * Protect the queue operations.
461 */
462 spin_lock(&q->lock);
463 __rpc_sleep_on_priority(q, task, task->tk_priority);
464 spin_unlock(&q->lock);
465 }
466 EXPORT_SYMBOL_GPL(rpc_sleep_on);
467
468 void rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q,
469 struct rpc_task *task, unsigned long timeout, int priority)
470 {
471 if (!rpc_sleep_check_activated(task))
472 return;
473
474 priority -= RPC_PRIORITY_LOW;
475 /*
476 * Protect the queue operations.
477 */
478 spin_lock(&q->lock);
479 __rpc_sleep_on_priority_timeout(q, task, timeout, priority);
480 spin_unlock(&q->lock);
481 }
482 EXPORT_SYMBOL_GPL(rpc_sleep_on_priority_timeout);
483
484 void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task,
485 int priority)
486 {
487 if (!rpc_sleep_check_activated(task))
488 return;
489
490 WARN_ON_ONCE(task->tk_timeout != 0);
491 priority -= RPC_PRIORITY_LOW;
492 /*
493 * Protect the queue operations.
494 */
495 spin_lock(&q->lock);
496 __rpc_sleep_on_priority(q, task, priority);
497 spin_unlock(&q->lock);
498 }
499 EXPORT_SYMBOL_GPL(rpc_sleep_on_priority);
500
501 /**
502 * __rpc_do_wake_up_task_on_wq - wake up a single rpc_task
503 * @wq: workqueue on which to run task
504 * @queue: wait queue
505 * @task: task to be woken up
506 *
507 * Caller must hold queue->lock, and have cleared the task queued flag.
508 */
509 static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq,
510 struct rpc_wait_queue *queue,
511 struct rpc_task *task)
512 {
513 dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n",
514 task->tk_pid, jiffies);
515
516 /* Has the task been executed yet? If not, we cannot wake it up! */
517 if (!RPC_IS_ACTIVATED(task)) {
518 printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
519 return;
520 }
521
522 trace_rpc_task_wakeup(task, queue);
523
524 __rpc_remove_wait_queue(queue, task);
525
526 rpc_make_runnable(wq, task);
527
528 dprintk("RPC: __rpc_wake_up_task done\n");
529 }
530
531 /*
532 * Wake up a queued task while the queue lock is being held
533 */
534 static struct rpc_task *
535 rpc_wake_up_task_on_wq_queue_action_locked(struct workqueue_struct *wq,
536 struct rpc_wait_queue *queue, struct rpc_task *task,
537 bool (*action)(struct rpc_task *, void *), void *data)
538 {
539 if (RPC_IS_QUEUED(task)) {
540 smp_rmb();
541 if (task->tk_waitqueue == queue) {
542 if (action == NULL || action(task, data)) {
543 __rpc_do_wake_up_task_on_wq(wq, queue, task);
544 return task;
545 }
546 }
547 }
548 return NULL;
549 }
550
551 /*
552 * Wake up a queued task while the queue lock is being held
553 */
554 static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue,
555 struct rpc_task *task)
556 {
557 rpc_wake_up_task_on_wq_queue_action_locked(rpciod_workqueue, queue,
558 task, NULL, NULL);
559 }
560
561 /*
562 * Wake up a task on a specific queue
563 */
564 void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task)
565 {
566 if (!RPC_IS_QUEUED(task))
567 return;
568 spin_lock(&queue->lock);
569 rpc_wake_up_task_queue_locked(queue, task);
570 spin_unlock(&queue->lock);
571 }
572 EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task);
573
574 static bool rpc_task_action_set_status(struct rpc_task *task, void *status)
575 {
576 task->tk_status = *(int *)status;
577 return true;
578 }
579
580 static void
581 rpc_wake_up_task_queue_set_status_locked(struct rpc_wait_queue *queue,
582 struct rpc_task *task, int status)
583 {
584 rpc_wake_up_task_on_wq_queue_action_locked(rpciod_workqueue, queue,
585 task, rpc_task_action_set_status, &status);
586 }
587
588 /**
589 * rpc_wake_up_queued_task_set_status - wake up a task and set task->tk_status
590 * @queue: pointer to rpc_wait_queue
591 * @task: pointer to rpc_task
592 * @status: integer error value
593 *
594 * If @task is queued on @queue, then it is woken up, and @task->tk_status is
595 * set to the value of @status.
596 */
597 void
598 rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *queue,
599 struct rpc_task *task, int status)
600 {
601 if (!RPC_IS_QUEUED(task))
602 return;
603 spin_lock(&queue->lock);
604 rpc_wake_up_task_queue_set_status_locked(queue, task, status);
605 spin_unlock(&queue->lock);
606 }
607
608 /*
609 * Wake up the next task on a priority queue.
610 */
611 static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *queue)
612 {
613 struct list_head *q;
614 struct rpc_task *task;
615
616 /*
617 * Service a batch of tasks from a single owner.
618 */
619 q = &queue->tasks[queue->priority];
620 if (!list_empty(q) && --queue->nr) {
621 task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
622 goto out;
623 }
624
625 /*
626 * Service the next queue.
627 */
628 do {
629 if (q == &queue->tasks[0])
630 q = &queue->tasks[queue->maxpriority];
631 else
632 q = q - 1;
633 if (!list_empty(q)) {
634 task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
635 goto new_queue;
636 }
637 } while (q != &queue->tasks[queue->priority]);
638
639 rpc_reset_waitqueue_priority(queue);
640 return NULL;
641
642 new_queue:
643 rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
644 out:
645 return task;
646 }
647
648 static struct rpc_task *__rpc_find_next_queued(struct rpc_wait_queue *queue)
649 {
650 if (RPC_IS_PRIORITY(queue))
651 return __rpc_find_next_queued_priority(queue);
652 if (!list_empty(&queue->tasks[0]))
653 return list_first_entry(&queue->tasks[0], struct rpc_task, u.tk_wait.list);
654 return NULL;
655 }
656
657 /*
658 * Wake up the first task on the wait queue.
659 */
660 struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq,
661 struct rpc_wait_queue *queue,
662 bool (*func)(struct rpc_task *, void *), void *data)
663 {
664 struct rpc_task *task = NULL;
665
666 dprintk("RPC: wake_up_first(%p \"%s\")\n",
667 queue, rpc_qname(queue));
668 spin_lock(&queue->lock);
669 task = __rpc_find_next_queued(queue);
670 if (task != NULL)
671 task = rpc_wake_up_task_on_wq_queue_action_locked(wq, queue,
672 task, func, data);
673 spin_unlock(&queue->lock);
674
675 return task;
676 }
677
678 /*
679 * Wake up the first task on the wait queue.
680 */
681 struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *queue,
682 bool (*func)(struct rpc_task *, void *), void *data)
683 {
684 return rpc_wake_up_first_on_wq(rpciod_workqueue, queue, func, data);
685 }
686 EXPORT_SYMBOL_GPL(rpc_wake_up_first);
687
688 static bool rpc_wake_up_next_func(struct rpc_task *task, void *data)
689 {
690 return true;
691 }
692
693 /*
694 * Wake up the next task on the wait queue.
695 */
696 struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *queue)
697 {
698 return rpc_wake_up_first(queue, rpc_wake_up_next_func, NULL);
699 }
700 EXPORT_SYMBOL_GPL(rpc_wake_up_next);
701
702 /**
703 * rpc_wake_up_locked - wake up all rpc_tasks
704 * @queue: rpc_wait_queue on which the tasks are sleeping
705 *
706 */
707 static void rpc_wake_up_locked(struct rpc_wait_queue *queue)
708 {
709 struct rpc_task *task;
710
711 for (;;) {
712 task = __rpc_find_next_queued(queue);
713 if (task == NULL)
714 break;
715 rpc_wake_up_task_queue_locked(queue, task);
716 }
717 }
718
719 /**
720 * rpc_wake_up - wake up all rpc_tasks
721 * @queue: rpc_wait_queue on which the tasks are sleeping
722 *
723 * Grabs queue->lock
724 */
725 void rpc_wake_up(struct rpc_wait_queue *queue)
726 {
727 spin_lock(&queue->lock);
728 rpc_wake_up_locked(queue);
729 spin_unlock(&queue->lock);
730 }
731 EXPORT_SYMBOL_GPL(rpc_wake_up);
732
733 /**
734 * rpc_wake_up_status_locked - wake up all rpc_tasks and set their status value.
735 * @queue: rpc_wait_queue on which the tasks are sleeping
736 * @status: status value to set
737 */
738 static void rpc_wake_up_status_locked(struct rpc_wait_queue *queue, int status)
739 {
740 struct rpc_task *task;
741
742 for (;;) {
743 task = __rpc_find_next_queued(queue);
744 if (task == NULL)
745 break;
746 rpc_wake_up_task_queue_set_status_locked(queue, task, status);
747 }
748 }
749
750 /**
751 * rpc_wake_up_status - wake up all rpc_tasks and set their status value.
752 * @queue: rpc_wait_queue on which the tasks are sleeping
753 * @status: status value to set
754 *
755 * Grabs queue->lock
756 */
757 void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
758 {
759 spin_lock(&queue->lock);
760 rpc_wake_up_status_locked(queue, status);
761 spin_unlock(&queue->lock);
762 }
763 EXPORT_SYMBOL_GPL(rpc_wake_up_status);
764
765 static void __rpc_queue_timer_fn(struct work_struct *work)
766 {
767 struct rpc_wait_queue *queue = container_of(work,
768 struct rpc_wait_queue,
769 timer_list.dwork.work);
770 struct rpc_task *task, *n;
771 unsigned long expires, now, timeo;
772
773 spin_lock(&queue->lock);
774 expires = now = jiffies;
775 list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) {
776 timeo = task->tk_timeout;
777 if (time_after_eq(now, timeo)) {
778 dprintk("RPC: %5u timeout\n", task->tk_pid);
779 task->tk_status = -ETIMEDOUT;
780 rpc_wake_up_task_queue_locked(queue, task);
781 continue;
782 }
783 if (expires == now || time_after(expires, timeo))
784 expires = timeo;
785 }
786 if (!list_empty(&queue->timer_list.list))
787 rpc_set_queue_timer(queue, expires);
788 spin_unlock(&queue->lock);
789 }
790
791 static void __rpc_atrun(struct rpc_task *task)
792 {
793 if (task->tk_status == -ETIMEDOUT)
794 task->tk_status = 0;
795 }
796
797 /*
798 * Run a task at a later time
799 */
800 void rpc_delay(struct rpc_task *task, unsigned long delay)
801 {
802 rpc_sleep_on_timeout(&delay_queue, task, __rpc_atrun, jiffies + delay);
803 }
804 EXPORT_SYMBOL_GPL(rpc_delay);
805
806 /*
807 * Helper to call task->tk_ops->rpc_call_prepare
808 */
809 void rpc_prepare_task(struct rpc_task *task)
810 {
811 task->tk_ops->rpc_call_prepare(task, task->tk_calldata);
812 }
813
814 static void
815 rpc_init_task_statistics(struct rpc_task *task)
816 {
817 /* Initialize retry counters */
818 task->tk_garb_retry = 2;
819 task->tk_cred_retry = 2;
820 task->tk_rebind_retry = 2;
821
822 /* starting timestamp */
823 task->tk_start = ktime_get();
824 }
825
826 static void
827 rpc_reset_task_statistics(struct rpc_task *task)
828 {
829 task->tk_timeouts = 0;
830 task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_SENT);
831 rpc_init_task_statistics(task);
832 }
833
834 /*
835 * Helper that calls task->tk_ops->rpc_call_done if it exists
836 */
837 void rpc_exit_task(struct rpc_task *task)
838 {
839 trace_rpc_task_end(task, task->tk_action);
840 task->tk_action = NULL;
841 if (task->tk_ops->rpc_count_stats)
842 task->tk_ops->rpc_count_stats(task, task->tk_calldata);
843 else if (task->tk_client)
844 rpc_count_iostats(task, task->tk_client->cl_metrics);
845 if (task->tk_ops->rpc_call_done != NULL) {
846 task->tk_ops->rpc_call_done(task, task->tk_calldata);
847 if (task->tk_action != NULL) {
848 /* Always release the RPC slot and buffer memory */
849 xprt_release(task);
850 rpc_reset_task_statistics(task);
851 }
852 }
853 }
854
855 void rpc_signal_task(struct rpc_task *task)
856 {
857 struct rpc_wait_queue *queue;
858
859 if (!RPC_IS_ACTIVATED(task))
860 return;
861 set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
862 smp_mb__after_atomic();
863 queue = READ_ONCE(task->tk_waitqueue);
864 if (queue)
865 rpc_wake_up_queued_task_set_status(queue, task, -ERESTARTSYS);
866 }
867
868 void rpc_exit(struct rpc_task *task, int status)
869 {
870 task->tk_status = status;
871 task->tk_action = rpc_exit_task;
872 rpc_wake_up_queued_task(task->tk_waitqueue, task);
873 }
874 EXPORT_SYMBOL_GPL(rpc_exit);
875
876 void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
877 {
878 if (ops->rpc_release != NULL)
879 ops->rpc_release(calldata);
880 }
881
882 /*
883 * This is the RPC `scheduler' (or rather, the finite state machine).
884 */
885 static void __rpc_execute(struct rpc_task *task)
886 {
887 struct rpc_wait_queue *queue;
888 int task_is_async = RPC_IS_ASYNC(task);
889 int status = 0;
890
891 dprintk("RPC: %5u __rpc_execute flags=0x%x\n",
892 task->tk_pid, task->tk_flags);
893
894 WARN_ON_ONCE(RPC_IS_QUEUED(task));
895 if (RPC_IS_QUEUED(task))
896 return;
897
898 for (;;) {
899 void (*do_action)(struct rpc_task *);
900
901 /*
902 * Perform the next FSM step or a pending callback.
903 *
904 * tk_action may be NULL if the task has been killed.
905 * In particular, note that rpc_killall_tasks may
906 * do this at any time, so beware when dereferencing.
907 */
908 do_action = task->tk_action;
909 if (task->tk_callback) {
910 do_action = task->tk_callback;
911 task->tk_callback = NULL;
912 }
913 if (!do_action)
914 break;
915 trace_rpc_task_run_action(task, do_action);
916 do_action(task);
917
918 /*
919 * Lockless check for whether task is sleeping or not.
920 */
921 if (!RPC_IS_QUEUED(task))
922 continue;
923
924 /*
925 * Signalled tasks should exit rather than sleep.
926 */
927 if (RPC_SIGNALLED(task)) {
928 task->tk_rpc_status = -ERESTARTSYS;
929 rpc_exit(task, -ERESTARTSYS);
930 }
931
932 /*
933 * The queue->lock protects against races with
934 * rpc_make_runnable().
935 *
936 * Note that once we clear RPC_TASK_RUNNING on an asynchronous
937 * rpc_task, rpc_make_runnable() can assign it to a
938 * different workqueue. We therefore cannot assume that the
939 * rpc_task pointer may still be dereferenced.
940 */
941 queue = task->tk_waitqueue;
942 spin_lock(&queue->lock);
943 if (!RPC_IS_QUEUED(task)) {
944 spin_unlock(&queue->lock);
945 continue;
946 }
947 rpc_clear_running(task);
948 spin_unlock(&queue->lock);
949 if (task_is_async)
950 return;
951
952 /* sync task: sleep here */
953 dprintk("RPC: %5u sync task going to sleep\n", task->tk_pid);
954 status = out_of_line_wait_on_bit(&task->tk_runstate,
955 RPC_TASK_QUEUED, rpc_wait_bit_killable,
956 TASK_KILLABLE);
957 if (status < 0) {
958 /*
959 * When a sync task receives a signal, it exits with
960 * -ERESTARTSYS. In order to catch any callbacks that
961 * clean up after sleeping on some queue, we don't
962 * break the loop here, but go around once more.
963 */
964 dprintk("RPC: %5u got signal\n", task->tk_pid);
965 set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
966 task->tk_rpc_status = -ERESTARTSYS;
967 rpc_exit(task, -ERESTARTSYS);
968 }
969 dprintk("RPC: %5u sync task resuming\n", task->tk_pid);
970 }
971
972 dprintk("RPC: %5u return %d, status %d\n", task->tk_pid, status,
973 task->tk_status);
974 /* Release all resources associated with the task */
975 rpc_release_task(task);
976 }
977
978 /*
979 * User-visible entry point to the scheduler.
980 *
981 * This may be called recursively if e.g. an async NFS task updates
982 * the attributes and finds that dirty pages must be flushed.
983 * NOTE: Upon exit of this function the task is guaranteed to be
984 * released. In particular note that tk_release() will have
985 * been called, so your task memory may have been freed.
986 */
987 void rpc_execute(struct rpc_task *task)
988 {
989 bool is_async = RPC_IS_ASYNC(task);
990
991 rpc_set_active(task);
992 rpc_make_runnable(rpciod_workqueue, task);
993 if (!is_async) {
994 unsigned int pflags = memalloc_nofs_save();
995 __rpc_execute(task);
996 memalloc_nofs_restore(pflags);
997 }
998 }
999
1000 static void rpc_async_schedule(struct work_struct *work)
1001 {
1002 unsigned int pflags = memalloc_nofs_save();
1003
1004 __rpc_execute(container_of(work, struct rpc_task, u.tk_work));
1005 memalloc_nofs_restore(pflags);
1006 }
1007
1008 /**
1009 * rpc_malloc - allocate RPC buffer resources
1010 * @task: RPC task
1011 *
1012 * A single memory region is allocated, which is split between the
1013 * RPC call and RPC reply that this task is being used for. When
1014 * this RPC is retired, the memory is released by calling rpc_free.
1015 *
1016 * To prevent rpciod from hanging, this allocator never sleeps,
1017 * returning -ENOMEM and suppressing warning if the request cannot
1018 * be serviced immediately. The caller can arrange to sleep in a
1019 * way that is safe for rpciod.
1020 *
1021 * Most requests are 'small' (under 2KiB) and can be serviced from a
1022 * mempool, ensuring that NFS reads and writes can always proceed,
1023 * and that there is good locality of reference for these buffers.
1024 */
1025 int rpc_malloc(struct rpc_task *task)
1026 {
1027 struct rpc_rqst *rqst = task->tk_rqstp;
1028 size_t size = rqst->rq_callsize + rqst->rq_rcvsize;
1029 struct rpc_buffer *buf;
1030 gfp_t gfp = GFP_NOFS;
1031
1032 if (RPC_IS_SWAPPER(task))
1033 gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
1034
1035 size += sizeof(struct rpc_buffer);
1036 if (size <= RPC_BUFFER_MAXSIZE)
1037 buf = mempool_alloc(rpc_buffer_mempool, gfp);
1038 else
1039 buf = kmalloc(size, gfp);
1040
1041 if (!buf)
1042 return -ENOMEM;
1043
1044 buf->len = size;
1045 dprintk("RPC: %5u allocated buffer of size %zu at %p\n",
1046 task->tk_pid, size, buf);
1047 rqst->rq_buffer = buf->data;
1048 rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize;
1049 return 0;
1050 }
1051 EXPORT_SYMBOL_GPL(rpc_malloc);
1052
1053 /**
1054 * rpc_free - free RPC buffer resources allocated via rpc_malloc
1055 * @task: RPC task
1056 *
1057 */
1058 void rpc_free(struct rpc_task *task)
1059 {
1060 void *buffer = task->tk_rqstp->rq_buffer;
1061 size_t size;
1062 struct rpc_buffer *buf;
1063
1064 buf = container_of(buffer, struct rpc_buffer, data);
1065 size = buf->len;
1066
1067 dprintk("RPC: freeing buffer of size %zu at %p\n",
1068 size, buf);
1069
1070 if (size <= RPC_BUFFER_MAXSIZE)
1071 mempool_free(buf, rpc_buffer_mempool);
1072 else
1073 kfree(buf);
1074 }
1075 EXPORT_SYMBOL_GPL(rpc_free);
1076
1077 /*
1078 * Creation and deletion of RPC task structures
1079 */
1080 static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setup_data)
1081 {
1082 memset(task, 0, sizeof(*task));
1083 atomic_set(&task->tk_count, 1);
1084 task->tk_flags = task_setup_data->flags;
1085 task->tk_ops = task_setup_data->callback_ops;
1086 task->tk_calldata = task_setup_data->callback_data;
1087 INIT_LIST_HEAD(&task->tk_task);
1088
1089 task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW;
1090 task->tk_owner = current->tgid;
1091
1092 /* Initialize workqueue for async tasks */
1093 task->tk_workqueue = task_setup_data->workqueue;
1094
1095 task->tk_xprt = rpc_task_get_xprt(task_setup_data->rpc_client,
1096 xprt_get(task_setup_data->rpc_xprt));
1097
1098 task->tk_op_cred = get_rpccred(task_setup_data->rpc_op_cred);
1099
1100 if (task->tk_ops->rpc_call_prepare != NULL)
1101 task->tk_action = rpc_prepare_task;
1102
1103 rpc_init_task_statistics(task);
1104
1105 dprintk("RPC: new task initialized, procpid %u\n",
1106 task_pid_nr(current));
1107 }
1108
1109 static struct rpc_task *
1110 rpc_alloc_task(void)
1111 {
1112 return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
1113 }
1114
1115 /*
1116 * Create a new task for the specified client.
1117 */
1118 struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
1119 {
1120 struct rpc_task *task = setup_data->task;
1121 unsigned short flags = 0;
1122
1123 if (task == NULL) {
1124 task = rpc_alloc_task();
1125 flags = RPC_TASK_DYNAMIC;
1126 }
1127
1128 rpc_init_task(task, setup_data);
1129 task->tk_flags |= flags;
1130 dprintk("RPC: allocated task %p\n", task);
1131 return task;
1132 }
1133
1134 /*
1135 * rpc_free_task - release rpc task and perform cleanups
1136 *
1137 * Note that we free up the rpc_task _after_ rpc_release_calldata()
1138 * in order to work around a workqueue dependency issue.
1139 *
1140 * Tejun Heo states:
1141 * "Workqueue currently considers two work items to be the same if they're
1142 * on the same address and won't execute them concurrently - ie. it
1143 * makes a work item which is queued again while being executed wait
1144 * for the previous execution to complete.
1145 *
1146 * If a work function frees the work item, and then waits for an event
1147 * which should be performed by another work item and *that* work item
1148 * recycles the freed work item, it can create a false dependency loop.
1149 * There really is no reliable way to detect this short of verifying
1150 * every memory free."
1151 *
1152 */
1153 static void rpc_free_task(struct rpc_task *task)
1154 {
1155 unsigned short tk_flags = task->tk_flags;
1156
1157 put_rpccred(task->tk_op_cred);
1158 rpc_release_calldata(task->tk_ops, task->tk_calldata);
1159
1160 if (tk_flags & RPC_TASK_DYNAMIC) {
1161 dprintk("RPC: %5u freeing task\n", task->tk_pid);
1162 mempool_free(task, rpc_task_mempool);
1163 }
1164 }
1165
1166 static void rpc_async_release(struct work_struct *work)
1167 {
1168 unsigned int pflags = memalloc_nofs_save();
1169
1170 rpc_free_task(container_of(work, struct rpc_task, u.tk_work));
1171 memalloc_nofs_restore(pflags);
1172 }
1173
1174 static void rpc_release_resources_task(struct rpc_task *task)
1175 {
1176 xprt_release(task);
1177 if (task->tk_msg.rpc_cred) {
1178 put_cred(task->tk_msg.rpc_cred);
1179 task->tk_msg.rpc_cred = NULL;
1180 }
1181 rpc_task_release_client(task);
1182 }
1183
1184 static void rpc_final_put_task(struct rpc_task *task,
1185 struct workqueue_struct *q)
1186 {
1187 if (q != NULL) {
1188 INIT_WORK(&task->u.tk_work, rpc_async_release);
1189 queue_work(q, &task->u.tk_work);
1190 } else
1191 rpc_free_task(task);
1192 }
1193
1194 static void rpc_do_put_task(struct rpc_task *task, struct workqueue_struct *q)
1195 {
1196 if (atomic_dec_and_test(&task->tk_count)) {
1197 rpc_release_resources_task(task);
1198 rpc_final_put_task(task, q);
1199 }
1200 }
1201
1202 void rpc_put_task(struct rpc_task *task)
1203 {
1204 rpc_do_put_task(task, NULL);
1205 }
1206 EXPORT_SYMBOL_GPL(rpc_put_task);
1207
1208 void rpc_put_task_async(struct rpc_task *task)
1209 {
1210 rpc_do_put_task(task, task->tk_workqueue);
1211 }
1212 EXPORT_SYMBOL_GPL(rpc_put_task_async);
1213
1214 static void rpc_release_task(struct rpc_task *task)
1215 {
1216 dprintk("RPC: %5u release task\n", task->tk_pid);
1217
1218 WARN_ON_ONCE(RPC_IS_QUEUED(task));
1219
1220 rpc_release_resources_task(task);
1221
1222 /*
1223 * Note: at this point we have been removed from rpc_clnt->cl_tasks,
1224 * so it should be safe to use task->tk_count as a test for whether
1225 * or not any other processes still hold references to our rpc_task.
1226 */
1227 if (atomic_read(&task->tk_count) != 1 + !RPC_IS_ASYNC(task)) {
1228 /* Wake up anyone who may be waiting for task completion */
1229 if (!rpc_complete_task(task))
1230 return;
1231 } else {
1232 if (!atomic_dec_and_test(&task->tk_count))
1233 return;
1234 }
1235 rpc_final_put_task(task, task->tk_workqueue);
1236 }
1237
1238 int rpciod_up(void)
1239 {
1240 return try_module_get(THIS_MODULE) ? 0 : -EINVAL;
1241 }
1242
1243 void rpciod_down(void)
1244 {
1245 module_put(THIS_MODULE);
1246 }
1247
1248 /*
1249 * Start up the rpciod workqueue.
1250 */
1251 static int rpciod_start(void)
1252 {
1253 struct workqueue_struct *wq;
1254
1255 /*
1256 * Create the rpciod thread and wait for it to start.
1257 */
1258 dprintk("RPC: creating workqueue rpciod\n");
1259 wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
1260 if (!wq)
1261 goto out_failed;
1262 rpciod_workqueue = wq;
1263 /* Note: highpri because network receive is latency sensitive */
1264 wq = alloc_workqueue("xprtiod", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_HIGHPRI, 0);
1265 if (!wq)
1266 goto free_rpciod;
1267 xprtiod_workqueue = wq;
1268 return 1;
1269 free_rpciod:
1270 wq = rpciod_workqueue;
1271 rpciod_workqueue = NULL;
1272 destroy_workqueue(wq);
1273 out_failed:
1274 return 0;
1275 }
1276
1277 static void rpciod_stop(void)
1278 {
1279 struct workqueue_struct *wq = NULL;
1280
1281 if (rpciod_workqueue == NULL)
1282 return;
1283 dprintk("RPC: destroying workqueue rpciod\n");
1284
1285 wq = rpciod_workqueue;
1286 rpciod_workqueue = NULL;
1287 destroy_workqueue(wq);
1288 wq = xprtiod_workqueue;
1289 xprtiod_workqueue = NULL;
1290 destroy_workqueue(wq);
1291 }
1292
1293 void
1294 rpc_destroy_mempool(void)
1295 {
1296 rpciod_stop();
1297 mempool_destroy(rpc_buffer_mempool);
1298 mempool_destroy(rpc_task_mempool);
1299 kmem_cache_destroy(rpc_task_slabp);
1300 kmem_cache_destroy(rpc_buffer_slabp);
1301 rpc_destroy_wait_queue(&delay_queue);
1302 }
1303
1304 int
1305 rpc_init_mempool(void)
1306 {
1307 /*
1308 * The following is not strictly a mempool initialisation,
1309 * but there is no harm in doing it here
1310 */
1311 rpc_init_wait_queue(&delay_queue, "delayq");
1312 if (!rpciod_start())
1313 goto err_nomem;
1314
1315 rpc_task_slabp = kmem_cache_create("rpc_tasks",
1316 sizeof(struct rpc_task),
1317 0, SLAB_HWCACHE_ALIGN,
1318 NULL);
1319 if (!rpc_task_slabp)
1320 goto err_nomem;
1321 rpc_buffer_slabp = kmem_cache_create("rpc_buffers",
1322 RPC_BUFFER_MAXSIZE,
1323 0, SLAB_HWCACHE_ALIGN,
1324 NULL);
1325 if (!rpc_buffer_slabp)
1326 goto err_nomem;
1327 rpc_task_mempool = mempool_create_slab_pool(RPC_TASK_POOLSIZE,
1328 rpc_task_slabp);
1329 if (!rpc_task_mempool)
1330 goto err_nomem;
1331 rpc_buffer_mempool = mempool_create_slab_pool(RPC_BUFFER_POOLSIZE,
1332 rpc_buffer_slabp);
1333 if (!rpc_buffer_mempool)
1334 goto err_nomem;
1335 return 0;
1336 err_nomem:
1337 rpc_destroy_mempool();
1338 return -ENOMEM;
1339 }