]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/net/sunrpc/sched.c | |
3 | * | |
4 | * Scheduling for synchronous and asynchronous RPC requests. | |
5 | * | |
6 | * Copyright (C) 1996 Olaf Kirch, <okir@monad.swb.de> | |
7 | * | |
8 | * TCP NFS related read + write fixes | |
9 | * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> | |
10 | */ | |
11 | ||
12 | #include <linux/module.h> | |
13 | ||
14 | #include <linux/sched.h> | |
15 | #include <linux/interrupt.h> | |
16 | #include <linux/slab.h> | |
17 | #include <linux/mempool.h> | |
18 | #include <linux/smp.h> | |
19 | #include <linux/smp_lock.h> | |
20 | #include <linux/spinlock.h> | |
21 | ||
22 | #include <linux/sunrpc/clnt.h> | |
23 | #include <linux/sunrpc/xprt.h> | |
24 | ||
25 | #ifdef RPC_DEBUG | |
26 | #define RPCDBG_FACILITY RPCDBG_SCHED | |
27 | #define RPC_TASK_MAGIC_ID 0xf00baa | |
28 | static int rpc_task_id; | |
29 | #endif | |
30 | ||
31 | /* | |
32 | * RPC slabs and memory pools | |
33 | */ | |
34 | #define RPC_BUFFER_MAXSIZE (2048) | |
35 | #define RPC_BUFFER_POOLSIZE (8) | |
36 | #define RPC_TASK_POOLSIZE (8) | |
ba89966c ED |
37 | static kmem_cache_t *rpc_task_slabp __read_mostly; |
38 | static kmem_cache_t *rpc_buffer_slabp __read_mostly; | |
39 | static mempool_t *rpc_task_mempool __read_mostly; | |
40 | static mempool_t *rpc_buffer_mempool __read_mostly; | |
1da177e4 LT |
41 | |
42 | static void __rpc_default_timer(struct rpc_task *task); | |
43 | static void rpciod_killall(void); | |
44 | static void rpc_free(struct rpc_task *task); | |
45 | ||
46 | static void rpc_async_schedule(void *); | |
47 | ||
48 | /* | |
49 | * RPC tasks that create another task (e.g. for contacting the portmapper) | |
50 | * will wait on this queue for their child's completion | |
51 | */ | |
52 | static RPC_WAITQ(childq, "childq"); | |
53 | ||
54 | /* | |
55 | * RPC tasks sit here while waiting for conditions to improve. | |
56 | */ | |
57 | static RPC_WAITQ(delay_queue, "delayq"); | |
58 | ||
59 | /* | |
60 | * All RPC tasks are linked into this list | |
61 | */ | |
62 | static LIST_HEAD(all_tasks); | |
63 | ||
64 | /* | |
65 | * rpciod-related stuff | |
66 | */ | |
67 | static DECLARE_MUTEX(rpciod_sema); | |
68 | static unsigned int rpciod_users; | |
69 | static struct workqueue_struct *rpciod_workqueue; | |
70 | ||
71 | /* | |
72 | * Spinlock for other critical sections of code. | |
73 | */ | |
74 | static DEFINE_SPINLOCK(rpc_sched_lock); | |
75 | ||
76 | /* | |
77 | * Disable the timer for a given RPC task. Should be called with | |
78 | * queue->lock and bh_disabled in order to avoid races within | |
79 | * rpc_run_timer(). | |
80 | */ | |
81 | static inline void | |
82 | __rpc_disable_timer(struct rpc_task *task) | |
83 | { | |
84 | dprintk("RPC: %4d disabling timer\n", task->tk_pid); | |
85 | task->tk_timeout_fn = NULL; | |
86 | task->tk_timeout = 0; | |
87 | } | |
88 | ||
89 | /* | |
90 | * Run a timeout function. | |
91 | * We use the callback in order to allow __rpc_wake_up_task() | |
92 | * and friends to disable the timer synchronously on SMP systems | |
93 | * without calling del_timer_sync(). The latter could cause a | |
94 | * deadlock if called while we're holding spinlocks... | |
95 | */ | |
96 | static void rpc_run_timer(struct rpc_task *task) | |
97 | { | |
98 | void (*callback)(struct rpc_task *); | |
99 | ||
100 | callback = task->tk_timeout_fn; | |
101 | task->tk_timeout_fn = NULL; | |
102 | if (callback && RPC_IS_QUEUED(task)) { | |
103 | dprintk("RPC: %4d running timer\n", task->tk_pid); | |
104 | callback(task); | |
105 | } | |
106 | smp_mb__before_clear_bit(); | |
107 | clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); | |
108 | smp_mb__after_clear_bit(); | |
109 | } | |
110 | ||
111 | /* | |
112 | * Set up a timer for the current task. | |
113 | */ | |
114 | static inline void | |
115 | __rpc_add_timer(struct rpc_task *task, rpc_action timer) | |
116 | { | |
117 | if (!task->tk_timeout) | |
118 | return; | |
119 | ||
120 | dprintk("RPC: %4d setting alarm for %lu ms\n", | |
121 | task->tk_pid, task->tk_timeout * 1000 / HZ); | |
122 | ||
123 | if (timer) | |
124 | task->tk_timeout_fn = timer; | |
125 | else | |
126 | task->tk_timeout_fn = __rpc_default_timer; | |
127 | set_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); | |
128 | mod_timer(&task->tk_timer, jiffies + task->tk_timeout); | |
129 | } | |
130 | ||
131 | /* | |
132 | * Delete any timer for the current task. Because we use del_timer_sync(), | |
133 | * this function should never be called while holding queue->lock. | |
134 | */ | |
135 | static void | |
136 | rpc_delete_timer(struct rpc_task *task) | |
137 | { | |
138 | if (RPC_IS_QUEUED(task)) | |
139 | return; | |
140 | if (test_and_clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate)) { | |
141 | del_singleshot_timer_sync(&task->tk_timer); | |
142 | dprintk("RPC: %4d deleting timer\n", task->tk_pid); | |
143 | } | |
144 | } | |
145 | ||
146 | /* | |
147 | * Add new request to a priority queue. | |
148 | */ | |
149 | static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *task) | |
150 | { | |
151 | struct list_head *q; | |
152 | struct rpc_task *t; | |
153 | ||
154 | INIT_LIST_HEAD(&task->u.tk_wait.links); | |
155 | q = &queue->tasks[task->tk_priority]; | |
156 | if (unlikely(task->tk_priority > queue->maxpriority)) | |
157 | q = &queue->tasks[queue->maxpriority]; | |
158 | list_for_each_entry(t, q, u.tk_wait.list) { | |
159 | if (t->tk_cookie == task->tk_cookie) { | |
160 | list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links); | |
161 | return; | |
162 | } | |
163 | } | |
164 | list_add_tail(&task->u.tk_wait.list, q); | |
165 | } | |
166 | ||
167 | /* | |
168 | * Add new request to wait queue. | |
169 | * | |
170 | * Swapper tasks always get inserted at the head of the queue. | |
171 | * This should avoid many nasty memory deadlocks and hopefully | |
172 | * improve overall performance. | |
173 | * Everyone else gets appended to the queue to ensure proper FIFO behavior. | |
174 | */ | |
175 | static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) | |
176 | { | |
177 | BUG_ON (RPC_IS_QUEUED(task)); | |
178 | ||
179 | if (RPC_IS_PRIORITY(queue)) | |
180 | __rpc_add_wait_queue_priority(queue, task); | |
181 | else if (RPC_IS_SWAPPER(task)) | |
182 | list_add(&task->u.tk_wait.list, &queue->tasks[0]); | |
183 | else | |
184 | list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); | |
185 | task->u.tk_wait.rpc_waitq = queue; | |
186 | rpc_set_queued(task); | |
187 | ||
188 | dprintk("RPC: %4d added to queue %p \"%s\"\n", | |
189 | task->tk_pid, queue, rpc_qname(queue)); | |
190 | } | |
191 | ||
192 | /* | |
193 | * Remove request from a priority queue. | |
194 | */ | |
195 | static void __rpc_remove_wait_queue_priority(struct rpc_task *task) | |
196 | { | |
197 | struct rpc_task *t; | |
198 | ||
199 | if (!list_empty(&task->u.tk_wait.links)) { | |
200 | t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list); | |
201 | list_move(&t->u.tk_wait.list, &task->u.tk_wait.list); | |
202 | list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links); | |
203 | } | |
204 | list_del(&task->u.tk_wait.list); | |
205 | } | |
206 | ||
207 | /* | |
208 | * Remove request from queue. | |
209 | * Note: must be called with spin lock held. | |
210 | */ | |
211 | static void __rpc_remove_wait_queue(struct rpc_task *task) | |
212 | { | |
213 | struct rpc_wait_queue *queue; | |
214 | queue = task->u.tk_wait.rpc_waitq; | |
215 | ||
216 | if (RPC_IS_PRIORITY(queue)) | |
217 | __rpc_remove_wait_queue_priority(task); | |
218 | else | |
219 | list_del(&task->u.tk_wait.list); | |
220 | dprintk("RPC: %4d removed from queue %p \"%s\"\n", | |
221 | task->tk_pid, queue, rpc_qname(queue)); | |
222 | } | |
223 | ||
224 | static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) | |
225 | { | |
226 | queue->priority = priority; | |
227 | queue->count = 1 << (priority * 2); | |
228 | } | |
229 | ||
230 | static inline void rpc_set_waitqueue_cookie(struct rpc_wait_queue *queue, unsigned long cookie) | |
231 | { | |
232 | queue->cookie = cookie; | |
233 | queue->nr = RPC_BATCH_COUNT; | |
234 | } | |
235 | ||
236 | static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue) | |
237 | { | |
238 | rpc_set_waitqueue_priority(queue, queue->maxpriority); | |
239 | rpc_set_waitqueue_cookie(queue, 0); | |
240 | } | |
241 | ||
242 | static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, int maxprio) | |
243 | { | |
244 | int i; | |
245 | ||
246 | spin_lock_init(&queue->lock); | |
247 | for (i = 0; i < ARRAY_SIZE(queue->tasks); i++) | |
248 | INIT_LIST_HEAD(&queue->tasks[i]); | |
249 | queue->maxpriority = maxprio; | |
250 | rpc_reset_waitqueue_priority(queue); | |
251 | #ifdef RPC_DEBUG | |
252 | queue->name = qname; | |
253 | #endif | |
254 | } | |
255 | ||
256 | void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname) | |
257 | { | |
258 | __rpc_init_priority_wait_queue(queue, qname, RPC_PRIORITY_HIGH); | |
259 | } | |
260 | ||
261 | void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname) | |
262 | { | |
263 | __rpc_init_priority_wait_queue(queue, qname, 0); | |
264 | } | |
265 | EXPORT_SYMBOL(rpc_init_wait_queue); | |
266 | ||
267 | /* | |
268 | * Make an RPC task runnable. | |
269 | * | |
270 | * Note: If the task is ASYNC, this must be called with | |
271 | * the spinlock held to protect the wait queue operation. | |
272 | */ | |
273 | static void rpc_make_runnable(struct rpc_task *task) | |
274 | { | |
275 | int do_ret; | |
276 | ||
277 | BUG_ON(task->tk_timeout_fn); | |
278 | do_ret = rpc_test_and_set_running(task); | |
279 | rpc_clear_queued(task); | |
280 | if (do_ret) | |
281 | return; | |
282 | if (RPC_IS_ASYNC(task)) { | |
283 | int status; | |
284 | ||
285 | INIT_WORK(&task->u.tk_work, rpc_async_schedule, (void *)task); | |
286 | status = queue_work(task->tk_workqueue, &task->u.tk_work); | |
287 | if (status < 0) { | |
288 | printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); | |
289 | task->tk_status = status; | |
290 | return; | |
291 | } | |
292 | } else | |
96651ab3 | 293 | wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED); |
1da177e4 LT |
294 | } |
295 | ||
296 | /* | |
297 | * Place a newly initialized task on the workqueue. | |
298 | */ | |
299 | static inline void | |
300 | rpc_schedule_run(struct rpc_task *task) | |
301 | { | |
302 | /* Don't run a child twice! */ | |
303 | if (RPC_IS_ACTIVATED(task)) | |
304 | return; | |
305 | task->tk_active = 1; | |
306 | rpc_make_runnable(task); | |
307 | } | |
308 | ||
309 | /* | |
310 | * Prepare for sleeping on a wait queue. | |
311 | * By always appending tasks to the list we ensure FIFO behavior. | |
312 | * NB: An RPC task will only receive interrupt-driven events as long | |
313 | * as it's on a wait queue. | |
314 | */ | |
315 | static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, | |
316 | rpc_action action, rpc_action timer) | |
317 | { | |
318 | dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid, | |
319 | rpc_qname(q), jiffies); | |
320 | ||
321 | if (!RPC_IS_ASYNC(task) && !RPC_IS_ACTIVATED(task)) { | |
322 | printk(KERN_ERR "RPC: Inactive synchronous task put to sleep!\n"); | |
323 | return; | |
324 | } | |
325 | ||
326 | /* Mark the task as being activated if so needed */ | |
327 | if (!RPC_IS_ACTIVATED(task)) | |
328 | task->tk_active = 1; | |
329 | ||
330 | __rpc_add_wait_queue(q, task); | |
331 | ||
332 | BUG_ON(task->tk_callback != NULL); | |
333 | task->tk_callback = action; | |
334 | __rpc_add_timer(task, timer); | |
335 | } | |
336 | ||
337 | void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, | |
338 | rpc_action action, rpc_action timer) | |
339 | { | |
340 | /* | |
341 | * Protect the queue operations. | |
342 | */ | |
343 | spin_lock_bh(&q->lock); | |
344 | __rpc_sleep_on(q, task, action, timer); | |
345 | spin_unlock_bh(&q->lock); | |
346 | } | |
347 | ||
348 | /** | |
349 | * __rpc_do_wake_up_task - wake up a single rpc_task | |
350 | * @task: task to be woken up | |
351 | * | |
352 | * Caller must hold queue->lock, and have cleared the task queued flag. | |
353 | */ | |
354 | static void __rpc_do_wake_up_task(struct rpc_task *task) | |
355 | { | |
356 | dprintk("RPC: %4d __rpc_wake_up_task (now %ld)\n", task->tk_pid, jiffies); | |
357 | ||
358 | #ifdef RPC_DEBUG | |
359 | BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID); | |
360 | #endif | |
361 | /* Has the task been executed yet? If not, we cannot wake it up! */ | |
362 | if (!RPC_IS_ACTIVATED(task)) { | |
363 | printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task); | |
364 | return; | |
365 | } | |
366 | ||
367 | __rpc_disable_timer(task); | |
368 | __rpc_remove_wait_queue(task); | |
369 | ||
370 | rpc_make_runnable(task); | |
371 | ||
372 | dprintk("RPC: __rpc_wake_up_task done\n"); | |
373 | } | |
374 | ||
375 | /* | |
376 | * Wake up the specified task | |
377 | */ | |
378 | static void __rpc_wake_up_task(struct rpc_task *task) | |
379 | { | |
380 | if (rpc_start_wakeup(task)) { | |
381 | if (RPC_IS_QUEUED(task)) | |
382 | __rpc_do_wake_up_task(task); | |
383 | rpc_finish_wakeup(task); | |
384 | } | |
385 | } | |
386 | ||
387 | /* | |
388 | * Default timeout handler if none specified by user | |
389 | */ | |
390 | static void | |
391 | __rpc_default_timer(struct rpc_task *task) | |
392 | { | |
393 | dprintk("RPC: %d timeout (default timer)\n", task->tk_pid); | |
394 | task->tk_status = -ETIMEDOUT; | |
395 | rpc_wake_up_task(task); | |
396 | } | |
397 | ||
398 | /* | |
399 | * Wake up the specified task | |
400 | */ | |
401 | void rpc_wake_up_task(struct rpc_task *task) | |
402 | { | |
403 | if (rpc_start_wakeup(task)) { | |
404 | if (RPC_IS_QUEUED(task)) { | |
405 | struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq; | |
406 | ||
407 | spin_lock_bh(&queue->lock); | |
408 | __rpc_do_wake_up_task(task); | |
409 | spin_unlock_bh(&queue->lock); | |
410 | } | |
411 | rpc_finish_wakeup(task); | |
412 | } | |
413 | } | |
414 | ||
415 | /* | |
416 | * Wake up the next task on a priority queue. | |
417 | */ | |
418 | static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queue) | |
419 | { | |
420 | struct list_head *q; | |
421 | struct rpc_task *task; | |
422 | ||
423 | /* | |
424 | * Service a batch of tasks from a single cookie. | |
425 | */ | |
426 | q = &queue->tasks[queue->priority]; | |
427 | if (!list_empty(q)) { | |
428 | task = list_entry(q->next, struct rpc_task, u.tk_wait.list); | |
429 | if (queue->cookie == task->tk_cookie) { | |
430 | if (--queue->nr) | |
431 | goto out; | |
432 | list_move_tail(&task->u.tk_wait.list, q); | |
433 | } | |
434 | /* | |
435 | * Check if we need to switch queues. | |
436 | */ | |
437 | if (--queue->count) | |
438 | goto new_cookie; | |
439 | } | |
440 | ||
441 | /* | |
442 | * Service the next queue. | |
443 | */ | |
444 | do { | |
445 | if (q == &queue->tasks[0]) | |
446 | q = &queue->tasks[queue->maxpriority]; | |
447 | else | |
448 | q = q - 1; | |
449 | if (!list_empty(q)) { | |
450 | task = list_entry(q->next, struct rpc_task, u.tk_wait.list); | |
451 | goto new_queue; | |
452 | } | |
453 | } while (q != &queue->tasks[queue->priority]); | |
454 | ||
455 | rpc_reset_waitqueue_priority(queue); | |
456 | return NULL; | |
457 | ||
458 | new_queue: | |
459 | rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0])); | |
460 | new_cookie: | |
461 | rpc_set_waitqueue_cookie(queue, task->tk_cookie); | |
462 | out: | |
463 | __rpc_wake_up_task(task); | |
464 | return task; | |
465 | } | |
466 | ||
467 | /* | |
468 | * Wake up the next task on the wait queue. | |
469 | */ | |
470 | struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) | |
471 | { | |
472 | struct rpc_task *task = NULL; | |
473 | ||
474 | dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); | |
475 | spin_lock_bh(&queue->lock); | |
476 | if (RPC_IS_PRIORITY(queue)) | |
477 | task = __rpc_wake_up_next_priority(queue); | |
478 | else { | |
479 | task_for_first(task, &queue->tasks[0]) | |
480 | __rpc_wake_up_task(task); | |
481 | } | |
482 | spin_unlock_bh(&queue->lock); | |
483 | ||
484 | return task; | |
485 | } | |
486 | ||
487 | /** | |
488 | * rpc_wake_up - wake up all rpc_tasks | |
489 | * @queue: rpc_wait_queue on which the tasks are sleeping | |
490 | * | |
491 | * Grabs queue->lock | |
492 | */ | |
493 | void rpc_wake_up(struct rpc_wait_queue *queue) | |
494 | { | |
495 | struct rpc_task *task; | |
496 | ||
497 | struct list_head *head; | |
498 | spin_lock_bh(&queue->lock); | |
499 | head = &queue->tasks[queue->maxpriority]; | |
500 | for (;;) { | |
501 | while (!list_empty(head)) { | |
502 | task = list_entry(head->next, struct rpc_task, u.tk_wait.list); | |
503 | __rpc_wake_up_task(task); | |
504 | } | |
505 | if (head == &queue->tasks[0]) | |
506 | break; | |
507 | head--; | |
508 | } | |
509 | spin_unlock_bh(&queue->lock); | |
510 | } | |
511 | ||
512 | /** | |
513 | * rpc_wake_up_status - wake up all rpc_tasks and set their status value. | |
514 | * @queue: rpc_wait_queue on which the tasks are sleeping | |
515 | * @status: status value to set | |
516 | * | |
517 | * Grabs queue->lock | |
518 | */ | |
519 | void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) | |
520 | { | |
521 | struct list_head *head; | |
522 | struct rpc_task *task; | |
523 | ||
524 | spin_lock_bh(&queue->lock); | |
525 | head = &queue->tasks[queue->maxpriority]; | |
526 | for (;;) { | |
527 | while (!list_empty(head)) { | |
528 | task = list_entry(head->next, struct rpc_task, u.tk_wait.list); | |
529 | task->tk_status = status; | |
530 | __rpc_wake_up_task(task); | |
531 | } | |
532 | if (head == &queue->tasks[0]) | |
533 | break; | |
534 | head--; | |
535 | } | |
536 | spin_unlock_bh(&queue->lock); | |
537 | } | |
538 | ||
539 | /* | |
540 | * Run a task at a later time | |
541 | */ | |
542 | static void __rpc_atrun(struct rpc_task *); | |
543 | void | |
544 | rpc_delay(struct rpc_task *task, unsigned long delay) | |
545 | { | |
546 | task->tk_timeout = delay; | |
547 | rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun); | |
548 | } | |
549 | ||
550 | static void | |
551 | __rpc_atrun(struct rpc_task *task) | |
552 | { | |
553 | task->tk_status = 0; | |
554 | rpc_wake_up_task(task); | |
555 | } | |
556 | ||
d05fdb0c | 557 | /* |
963d8fe5 | 558 | * Helper that calls task->tk_ops->rpc_call_done if it exists |
d05fdb0c | 559 | */ |
abbcf28f | 560 | void rpc_exit_task(struct rpc_task *task) |
d05fdb0c | 561 | { |
abbcf28f | 562 | task->tk_action = NULL; |
963d8fe5 TM |
563 | if (task->tk_ops->rpc_call_done != NULL) { |
564 | task->tk_ops->rpc_call_done(task, task->tk_calldata); | |
d05fdb0c | 565 | if (task->tk_action != NULL) { |
abbcf28f TM |
566 | WARN_ON(RPC_ASSASSINATED(task)); |
567 | /* Always release the RPC slot and buffer memory */ | |
568 | xprt_release(task); | |
569 | rpc_free(task); | |
d05fdb0c TM |
570 | } |
571 | } | |
d05fdb0c | 572 | } |
abbcf28f | 573 | EXPORT_SYMBOL(rpc_exit_task); |
d05fdb0c | 574 | |
96651ab3 TM |
575 | static int rpc_wait_bit_interruptible(void *word) |
576 | { | |
577 | if (signal_pending(current)) | |
578 | return -ERESTARTSYS; | |
579 | schedule(); | |
580 | return 0; | |
581 | } | |
582 | ||
1da177e4 LT |
583 | /* |
584 | * This is the RPC `scheduler' (or rather, the finite state machine). | |
585 | */ | |
586 | static int __rpc_execute(struct rpc_task *task) | |
587 | { | |
588 | int status = 0; | |
589 | ||
590 | dprintk("RPC: %4d rpc_execute flgs %x\n", | |
591 | task->tk_pid, task->tk_flags); | |
592 | ||
593 | BUG_ON(RPC_IS_QUEUED(task)); | |
594 | ||
d05fdb0c | 595 | for (;;) { |
1da177e4 LT |
596 | /* |
597 | * Garbage collection of pending timers... | |
598 | */ | |
599 | rpc_delete_timer(task); | |
600 | ||
601 | /* | |
602 | * Execute any pending callback. | |
603 | */ | |
604 | if (RPC_DO_CALLBACK(task)) { | |
605 | /* Define a callback save pointer */ | |
606 | void (*save_callback)(struct rpc_task *); | |
607 | ||
608 | /* | |
609 | * If a callback exists, save it, reset it, | |
610 | * call it. | |
611 | * The save is needed to stop from resetting | |
612 | * another callback set within the callback handler | |
613 | * - Dave | |
614 | */ | |
615 | save_callback=task->tk_callback; | |
616 | task->tk_callback=NULL; | |
617 | lock_kernel(); | |
618 | save_callback(task); | |
619 | unlock_kernel(); | |
620 | } | |
621 | ||
622 | /* | |
623 | * Perform the next FSM step. | |
624 | * tk_action may be NULL when the task has been killed | |
625 | * by someone else. | |
626 | */ | |
627 | if (!RPC_IS_QUEUED(task)) { | |
abbcf28f | 628 | if (task->tk_action == NULL) |
1da177e4 | 629 | break; |
abbcf28f TM |
630 | lock_kernel(); |
631 | task->tk_action(task); | |
632 | unlock_kernel(); | |
1da177e4 LT |
633 | } |
634 | ||
635 | /* | |
636 | * Lockless check for whether task is sleeping or not. | |
637 | */ | |
638 | if (!RPC_IS_QUEUED(task)) | |
639 | continue; | |
640 | rpc_clear_running(task); | |
641 | if (RPC_IS_ASYNC(task)) { | |
642 | /* Careful! we may have raced... */ | |
643 | if (RPC_IS_QUEUED(task)) | |
644 | return 0; | |
645 | if (rpc_test_and_set_running(task)) | |
646 | return 0; | |
647 | continue; | |
648 | } | |
649 | ||
650 | /* sync task: sleep here */ | |
651 | dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid); | |
96651ab3 TM |
652 | /* Note: Caller should be using rpc_clnt_sigmask() */ |
653 | status = out_of_line_wait_on_bit(&task->tk_runstate, | |
654 | RPC_TASK_QUEUED, rpc_wait_bit_interruptible, | |
655 | TASK_INTERRUPTIBLE); | |
656 | if (status == -ERESTARTSYS) { | |
1da177e4 LT |
657 | /* |
658 | * When a sync task receives a signal, it exits with | |
659 | * -ERESTARTSYS. In order to catch any callbacks that | |
660 | * clean up after sleeping on some queue, we don't | |
661 | * break the loop here, but go around once more. | |
662 | */ | |
96651ab3 TM |
663 | dprintk("RPC: %4d got signal\n", task->tk_pid); |
664 | task->tk_flags |= RPC_TASK_KILLED; | |
665 | rpc_exit(task, -ERESTARTSYS); | |
666 | rpc_wake_up_task(task); | |
1da177e4 LT |
667 | } |
668 | rpc_set_running(task); | |
669 | dprintk("RPC: %4d sync task resuming\n", task->tk_pid); | |
670 | } | |
671 | ||
1da177e4 LT |
672 | dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status); |
673 | status = task->tk_status; | |
674 | ||
675 | /* Release all resources associated with the task */ | |
676 | rpc_release_task(task); | |
677 | return status; | |
678 | } | |
679 | ||
680 | /* | |
681 | * User-visible entry point to the scheduler. | |
682 | * | |
683 | * This may be called recursively if e.g. an async NFS task updates | |
684 | * the attributes and finds that dirty pages must be flushed. | |
685 | * NOTE: Upon exit of this function the task is guaranteed to be | |
686 | * released. In particular note that tk_release() will have | |
687 | * been called, so your task memory may have been freed. | |
688 | */ | |
689 | int | |
690 | rpc_execute(struct rpc_task *task) | |
691 | { | |
692 | BUG_ON(task->tk_active); | |
693 | ||
694 | task->tk_active = 1; | |
695 | rpc_set_running(task); | |
696 | return __rpc_execute(task); | |
697 | } | |
698 | ||
699 | static void rpc_async_schedule(void *arg) | |
700 | { | |
701 | __rpc_execute((struct rpc_task *)arg); | |
702 | } | |
703 | ||
704 | /* | |
705 | * Allocate memory for RPC purposes. | |
706 | * | |
707 | * We try to ensure that some NFS reads and writes can always proceed | |
708 | * by using a mempool when allocating 'small' buffers. | |
709 | * In order to avoid memory starvation triggering more writebacks of | |
710 | * NFS requests, we use GFP_NOFS rather than GFP_KERNEL. | |
711 | */ | |
712 | void * | |
713 | rpc_malloc(struct rpc_task *task, size_t size) | |
714 | { | |
dd0fc66f | 715 | gfp_t gfp; |
1da177e4 LT |
716 | |
717 | if (task->tk_flags & RPC_TASK_SWAPPER) | |
718 | gfp = GFP_ATOMIC; | |
719 | else | |
720 | gfp = GFP_NOFS; | |
721 | ||
722 | if (size > RPC_BUFFER_MAXSIZE) { | |
723 | task->tk_buffer = kmalloc(size, gfp); | |
724 | if (task->tk_buffer) | |
725 | task->tk_bufsize = size; | |
726 | } else { | |
727 | task->tk_buffer = mempool_alloc(rpc_buffer_mempool, gfp); | |
728 | if (task->tk_buffer) | |
729 | task->tk_bufsize = RPC_BUFFER_MAXSIZE; | |
730 | } | |
731 | return task->tk_buffer; | |
732 | } | |
733 | ||
734 | static void | |
735 | rpc_free(struct rpc_task *task) | |
736 | { | |
737 | if (task->tk_buffer) { | |
738 | if (task->tk_bufsize == RPC_BUFFER_MAXSIZE) | |
739 | mempool_free(task->tk_buffer, rpc_buffer_mempool); | |
740 | else | |
741 | kfree(task->tk_buffer); | |
742 | task->tk_buffer = NULL; | |
743 | task->tk_bufsize = 0; | |
744 | } | |
745 | } | |
746 | ||
747 | /* | |
748 | * Creation and deletion of RPC task structures | |
749 | */ | |
963d8fe5 | 750 | void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata) |
1da177e4 LT |
751 | { |
752 | memset(task, 0, sizeof(*task)); | |
753 | init_timer(&task->tk_timer); | |
754 | task->tk_timer.data = (unsigned long) task; | |
755 | task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer; | |
756 | task->tk_client = clnt; | |
757 | task->tk_flags = flags; | |
963d8fe5 TM |
758 | task->tk_ops = tk_ops; |
759 | task->tk_calldata = calldata; | |
1da177e4 LT |
760 | |
761 | /* Initialize retry counters */ | |
762 | task->tk_garb_retry = 2; | |
763 | task->tk_cred_retry = 2; | |
764 | ||
765 | task->tk_priority = RPC_PRIORITY_NORMAL; | |
766 | task->tk_cookie = (unsigned long)current; | |
767 | ||
768 | /* Initialize workqueue for async tasks */ | |
769 | task->tk_workqueue = rpciod_workqueue; | |
1da177e4 LT |
770 | |
771 | if (clnt) { | |
772 | atomic_inc(&clnt->cl_users); | |
773 | if (clnt->cl_softrtry) | |
774 | task->tk_flags |= RPC_TASK_SOFT; | |
775 | if (!clnt->cl_intr) | |
776 | task->tk_flags |= RPC_TASK_NOINTR; | |
777 | } | |
778 | ||
779 | #ifdef RPC_DEBUG | |
780 | task->tk_magic = RPC_TASK_MAGIC_ID; | |
781 | task->tk_pid = rpc_task_id++; | |
782 | #endif | |
783 | /* Add to global list of all tasks */ | |
784 | spin_lock(&rpc_sched_lock); | |
785 | list_add_tail(&task->tk_task, &all_tasks); | |
786 | spin_unlock(&rpc_sched_lock); | |
787 | ||
963d8fe5 TM |
788 | BUG_ON(task->tk_ops == NULL); |
789 | ||
1da177e4 LT |
790 | dprintk("RPC: %4d new task procpid %d\n", task->tk_pid, |
791 | current->pid); | |
792 | } | |
793 | ||
794 | static struct rpc_task * | |
795 | rpc_alloc_task(void) | |
796 | { | |
797 | return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); | |
798 | } | |
799 | ||
963d8fe5 | 800 | static void rpc_free_task(struct rpc_task *task) |
1da177e4 LT |
801 | { |
802 | dprintk("RPC: %4d freeing task\n", task->tk_pid); | |
803 | mempool_free(task, rpc_task_mempool); | |
804 | } | |
805 | ||
806 | /* | |
807 | * Create a new task for the specified client. We have to | |
808 | * clean up after an allocation failure, as the client may | |
809 | * have specified "oneshot". | |
810 | */ | |
963d8fe5 | 811 | struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata) |
1da177e4 LT |
812 | { |
813 | struct rpc_task *task; | |
814 | ||
815 | task = rpc_alloc_task(); | |
816 | if (!task) | |
817 | goto cleanup; | |
818 | ||
963d8fe5 | 819 | rpc_init_task(task, clnt, flags, tk_ops, calldata); |
1da177e4 LT |
820 | |
821 | dprintk("RPC: %4d allocated task\n", task->tk_pid); | |
822 | task->tk_flags |= RPC_TASK_DYNAMIC; | |
823 | out: | |
824 | return task; | |
825 | ||
826 | cleanup: | |
827 | /* Check whether to release the client */ | |
828 | if (clnt) { | |
829 | printk("rpc_new_task: failed, users=%d, oneshot=%d\n", | |
830 | atomic_read(&clnt->cl_users), clnt->cl_oneshot); | |
831 | atomic_inc(&clnt->cl_users); /* pretend we were used ... */ | |
832 | rpc_release_client(clnt); | |
833 | } | |
834 | goto out; | |
835 | } | |
836 | ||
837 | void rpc_release_task(struct rpc_task *task) | |
838 | { | |
963d8fe5 TM |
839 | const struct rpc_call_ops *tk_ops = task->tk_ops; |
840 | void *calldata = task->tk_calldata; | |
1da177e4 LT |
841 | dprintk("RPC: %4d release task\n", task->tk_pid); |
842 | ||
843 | #ifdef RPC_DEBUG | |
844 | BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID); | |
845 | #endif | |
846 | ||
847 | /* Remove from global task list */ | |
848 | spin_lock(&rpc_sched_lock); | |
849 | list_del(&task->tk_task); | |
850 | spin_unlock(&rpc_sched_lock); | |
851 | ||
852 | BUG_ON (RPC_IS_QUEUED(task)); | |
853 | task->tk_active = 0; | |
854 | ||
855 | /* Synchronously delete any running timer */ | |
856 | rpc_delete_timer(task); | |
857 | ||
858 | /* Release resources */ | |
859 | if (task->tk_rqstp) | |
860 | xprt_release(task); | |
861 | if (task->tk_msg.rpc_cred) | |
862 | rpcauth_unbindcred(task); | |
863 | rpc_free(task); | |
864 | if (task->tk_client) { | |
865 | rpc_release_client(task->tk_client); | |
866 | task->tk_client = NULL; | |
867 | } | |
868 | ||
869 | #ifdef RPC_DEBUG | |
870 | task->tk_magic = 0; | |
871 | #endif | |
963d8fe5 TM |
872 | if (task->tk_flags & RPC_TASK_DYNAMIC) |
873 | rpc_free_task(task); | |
874 | if (tk_ops->rpc_release) | |
875 | tk_ops->rpc_release(calldata); | |
1da177e4 LT |
876 | } |
877 | ||
878 | /** | |
879 | * rpc_find_parent - find the parent of a child task. | |
880 | * @child: child task | |
881 | * | |
882 | * Checks that the parent task is still sleeping on the | |
883 | * queue 'childq'. If so returns a pointer to the parent. | |
884 | * Upon failure returns NULL. | |
885 | * | |
886 | * Caller must hold childq.lock | |
887 | */ | |
963d8fe5 | 888 | static inline struct rpc_task *rpc_find_parent(struct rpc_task *child, struct rpc_task *parent) |
1da177e4 | 889 | { |
963d8fe5 | 890 | struct rpc_task *task; |
1da177e4 LT |
891 | struct list_head *le; |
892 | ||
1da177e4 LT |
893 | task_for_each(task, le, &childq.tasks[0]) |
894 | if (task == parent) | |
895 | return parent; | |
896 | ||
897 | return NULL; | |
898 | } | |
899 | ||
963d8fe5 | 900 | static void rpc_child_exit(struct rpc_task *child, void *calldata) |
1da177e4 LT |
901 | { |
902 | struct rpc_task *parent; | |
903 | ||
904 | spin_lock_bh(&childq.lock); | |
963d8fe5 | 905 | if ((parent = rpc_find_parent(child, calldata)) != NULL) { |
1da177e4 LT |
906 | parent->tk_status = child->tk_status; |
907 | __rpc_wake_up_task(parent); | |
908 | } | |
909 | spin_unlock_bh(&childq.lock); | |
910 | } | |
911 | ||
963d8fe5 TM |
912 | static const struct rpc_call_ops rpc_child_ops = { |
913 | .rpc_call_done = rpc_child_exit, | |
914 | }; | |
915 | ||
1da177e4 LT |
916 | /* |
917 | * Note: rpc_new_task releases the client after a failure. | |
918 | */ | |
919 | struct rpc_task * | |
920 | rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent) | |
921 | { | |
922 | struct rpc_task *task; | |
923 | ||
963d8fe5 | 924 | task = rpc_new_task(clnt, RPC_TASK_ASYNC | RPC_TASK_CHILD, &rpc_child_ops, parent); |
1da177e4 LT |
925 | if (!task) |
926 | goto fail; | |
1da177e4 LT |
927 | return task; |
928 | ||
929 | fail: | |
930 | parent->tk_status = -ENOMEM; | |
931 | return NULL; | |
932 | } | |
933 | ||
934 | void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func) | |
935 | { | |
936 | spin_lock_bh(&childq.lock); | |
937 | /* N.B. Is it possible for the child to have already finished? */ | |
938 | __rpc_sleep_on(&childq, task, func, NULL); | |
939 | rpc_schedule_run(child); | |
940 | spin_unlock_bh(&childq.lock); | |
941 | } | |
942 | ||
943 | /* | |
944 | * Kill all tasks for the given client. | |
945 | * XXX: kill their descendants as well? | |
946 | */ | |
947 | void rpc_killall_tasks(struct rpc_clnt *clnt) | |
948 | { | |
949 | struct rpc_task *rovr; | |
950 | struct list_head *le; | |
951 | ||
952 | dprintk("RPC: killing all tasks for client %p\n", clnt); | |
953 | ||
954 | /* | |
955 | * Spin lock all_tasks to prevent changes... | |
956 | */ | |
957 | spin_lock(&rpc_sched_lock); | |
958 | alltask_for_each(rovr, le, &all_tasks) { | |
959 | if (! RPC_IS_ACTIVATED(rovr)) | |
960 | continue; | |
961 | if (!clnt || rovr->tk_client == clnt) { | |
962 | rovr->tk_flags |= RPC_TASK_KILLED; | |
963 | rpc_exit(rovr, -EIO); | |
964 | rpc_wake_up_task(rovr); | |
965 | } | |
966 | } | |
967 | spin_unlock(&rpc_sched_lock); | |
968 | } | |
969 | ||
970 | static DECLARE_MUTEX_LOCKED(rpciod_running); | |
971 | ||
972 | static void rpciod_killall(void) | |
973 | { | |
974 | unsigned long flags; | |
975 | ||
976 | while (!list_empty(&all_tasks)) { | |
977 | clear_thread_flag(TIF_SIGPENDING); | |
978 | rpc_killall_tasks(NULL); | |
979 | flush_workqueue(rpciod_workqueue); | |
980 | if (!list_empty(&all_tasks)) { | |
981 | dprintk("rpciod_killall: waiting for tasks to exit\n"); | |
982 | yield(); | |
983 | } | |
984 | } | |
985 | ||
986 | spin_lock_irqsave(¤t->sighand->siglock, flags); | |
987 | recalc_sigpending(); | |
988 | spin_unlock_irqrestore(¤t->sighand->siglock, flags); | |
989 | } | |
990 | ||
991 | /* | |
992 | * Start up the rpciod process if it's not already running. | |
993 | */ | |
994 | int | |
995 | rpciod_up(void) | |
996 | { | |
997 | struct workqueue_struct *wq; | |
998 | int error = 0; | |
999 | ||
1000 | down(&rpciod_sema); | |
1001 | dprintk("rpciod_up: users %d\n", rpciod_users); | |
1002 | rpciod_users++; | |
1003 | if (rpciod_workqueue) | |
1004 | goto out; | |
1005 | /* | |
1006 | * If there's no pid, we should be the first user. | |
1007 | */ | |
1008 | if (rpciod_users > 1) | |
1009 | printk(KERN_WARNING "rpciod_up: no workqueue, %d users??\n", rpciod_users); | |
1010 | /* | |
1011 | * Create the rpciod thread and wait for it to start. | |
1012 | */ | |
1013 | error = -ENOMEM; | |
1014 | wq = create_workqueue("rpciod"); | |
1015 | if (wq == NULL) { | |
1016 | printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error); | |
1017 | rpciod_users--; | |
1018 | goto out; | |
1019 | } | |
1020 | rpciod_workqueue = wq; | |
1021 | error = 0; | |
1022 | out: | |
1023 | up(&rpciod_sema); | |
1024 | return error; | |
1025 | } | |
1026 | ||
1027 | void | |
1028 | rpciod_down(void) | |
1029 | { | |
1030 | down(&rpciod_sema); | |
1031 | dprintk("rpciod_down sema %d\n", rpciod_users); | |
1032 | if (rpciod_users) { | |
1033 | if (--rpciod_users) | |
1034 | goto out; | |
1035 | } else | |
1036 | printk(KERN_WARNING "rpciod_down: no users??\n"); | |
1037 | ||
1038 | if (!rpciod_workqueue) { | |
1039 | dprintk("rpciod_down: Nothing to do!\n"); | |
1040 | goto out; | |
1041 | } | |
1042 | rpciod_killall(); | |
1043 | ||
1044 | destroy_workqueue(rpciod_workqueue); | |
1045 | rpciod_workqueue = NULL; | |
1046 | out: | |
1047 | up(&rpciod_sema); | |
1048 | } | |
1049 | ||
1050 | #ifdef RPC_DEBUG | |
1051 | void rpc_show_tasks(void) | |
1052 | { | |
1053 | struct list_head *le; | |
1054 | struct rpc_task *t; | |
1055 | ||
1056 | spin_lock(&rpc_sched_lock); | |
1057 | if (list_empty(&all_tasks)) { | |
1058 | spin_unlock(&rpc_sched_lock); | |
1059 | return; | |
1060 | } | |
1061 | printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " | |
963d8fe5 | 1062 | "-rpcwait -action- ---ops--\n"); |
1da177e4 LT |
1063 | alltask_for_each(t, le, &all_tasks) { |
1064 | const char *rpc_waitq = "none"; | |
1065 | ||
1066 | if (RPC_IS_QUEUED(t)) | |
1067 | rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); | |
1068 | ||
1069 | printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n", | |
1070 | t->tk_pid, | |
1071 | (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), | |
1072 | t->tk_flags, t->tk_status, | |
1073 | t->tk_client, | |
1074 | (t->tk_client ? t->tk_client->cl_prog : 0), | |
1075 | t->tk_rqstp, t->tk_timeout, | |
1076 | rpc_waitq, | |
963d8fe5 | 1077 | t->tk_action, t->tk_ops); |
1da177e4 LT |
1078 | } |
1079 | spin_unlock(&rpc_sched_lock); | |
1080 | } | |
1081 | #endif | |
1082 | ||
1083 | void | |
1084 | rpc_destroy_mempool(void) | |
1085 | { | |
1086 | if (rpc_buffer_mempool) | |
1087 | mempool_destroy(rpc_buffer_mempool); | |
1088 | if (rpc_task_mempool) | |
1089 | mempool_destroy(rpc_task_mempool); | |
1090 | if (rpc_task_slabp && kmem_cache_destroy(rpc_task_slabp)) | |
1091 | printk(KERN_INFO "rpc_task: not all structures were freed\n"); | |
1092 | if (rpc_buffer_slabp && kmem_cache_destroy(rpc_buffer_slabp)) | |
1093 | printk(KERN_INFO "rpc_buffers: not all structures were freed\n"); | |
1094 | } | |
1095 | ||
1096 | int | |
1097 | rpc_init_mempool(void) | |
1098 | { | |
1099 | rpc_task_slabp = kmem_cache_create("rpc_tasks", | |
1100 | sizeof(struct rpc_task), | |
1101 | 0, SLAB_HWCACHE_ALIGN, | |
1102 | NULL, NULL); | |
1103 | if (!rpc_task_slabp) | |
1104 | goto err_nomem; | |
1105 | rpc_buffer_slabp = kmem_cache_create("rpc_buffers", | |
1106 | RPC_BUFFER_MAXSIZE, | |
1107 | 0, SLAB_HWCACHE_ALIGN, | |
1108 | NULL, NULL); | |
1109 | if (!rpc_buffer_slabp) | |
1110 | goto err_nomem; | |
1111 | rpc_task_mempool = mempool_create(RPC_TASK_POOLSIZE, | |
1112 | mempool_alloc_slab, | |
1113 | mempool_free_slab, | |
1114 | rpc_task_slabp); | |
1115 | if (!rpc_task_mempool) | |
1116 | goto err_nomem; | |
1117 | rpc_buffer_mempool = mempool_create(RPC_BUFFER_POOLSIZE, | |
1118 | mempool_alloc_slab, | |
1119 | mempool_free_slab, | |
1120 | rpc_buffer_slabp); | |
1121 | if (!rpc_buffer_mempool) | |
1122 | goto err_nomem; | |
1123 | return 0; | |
1124 | err_nomem: | |
1125 | rpc_destroy_mempool(); | |
1126 | return -ENOMEM; | |
1127 | } |