]>
Commit | Line | Data |
---|---|---|
8b712842 CM |
1 | /* |
2 | * Copyright (C) 2007 Oracle. All rights reserved. | |
08a9ff32 | 3 | * Copyright (C) 2014 Fujitsu. All rights reserved. |
8b712842 CM |
4 | * |
5 | * This program is free software; you can redistribute it and/or | |
6 | * modify it under the terms of the GNU General Public | |
7 | * License v2 as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | * General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public | |
15 | * License along with this program; if not, write to the | |
16 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
17 | * Boston, MA 021110-1307, USA. | |
18 | */ | |
19 | ||
20 | #include <linux/kthread.h> | |
5a0e3ad6 | 21 | #include <linux/slab.h> |
8b712842 CM |
22 | #include <linux/list.h> |
23 | #include <linux/spinlock.h> | |
b51912c9 | 24 | #include <linux/freezer.h> |
08a9ff32 | 25 | #include <linux/workqueue.h> |
8b712842 CM |
26 | #include "async-thread.h" |
27 | ||
4a69a410 CM |
28 | #define WORK_QUEUED_BIT 0 |
29 | #define WORK_DONE_BIT 1 | |
30 | #define WORK_ORDER_DONE_BIT 2 | |
d313d7a3 | 31 | #define WORK_HIGH_PRIO_BIT 3 |
4a69a410 | 32 | |
0bd9289c QW |
33 | #define NO_THRESHOLD (-1) |
34 | #define DFT_THRESHOLD (32) | |
35 | ||
8b712842 CM |
36 | /* |
37 | * container for the kthread task pointer and the list of pending work | |
38 | * One of these is allocated per thread. | |
39 | */ | |
40 | struct btrfs_worker_thread { | |
35d8ba66 CM |
41 | /* pool we belong to */ |
42 | struct btrfs_workers *workers; | |
43 | ||
8b712842 CM |
44 | /* list of struct btrfs_work that are waiting for service */ |
45 | struct list_head pending; | |
d313d7a3 | 46 | struct list_head prio_pending; |
8b712842 CM |
47 | |
48 | /* list of worker threads from struct btrfs_workers */ | |
49 | struct list_head worker_list; | |
50 | ||
51 | /* kthread */ | |
52 | struct task_struct *task; | |
53 | ||
54 | /* number of things on the pending list */ | |
55 | atomic_t num_pending; | |
53863232 | 56 | |
9042846b CM |
57 | /* reference counter for this struct */ |
58 | atomic_t refs; | |
59 | ||
4854ddd0 | 60 | unsigned long sequence; |
8b712842 CM |
61 | |
62 | /* protects the pending list. */ | |
63 | spinlock_t lock; | |
64 | ||
65 | /* set to non-zero when this thread is already awake and kicking */ | |
66 | int working; | |
35d8ba66 CM |
67 | |
68 | /* are we currently idle */ | |
69 | int idle; | |
8b712842 CM |
70 | }; |
71 | ||
0dc3b84a JB |
72 | static int __btrfs_start_workers(struct btrfs_workers *workers); |
73 | ||
61d92c32 CM |
74 | /* |
75 | * btrfs_start_workers uses kthread_run, which can block waiting for memory | |
76 | * for a very long time. It will actually throttle on page writeback, | |
77 | * and so it may not make progress until after our btrfs worker threads | |
78 | * process all of the pending work structs in their queue | |
79 | * | |
80 | * This means we can't use btrfs_start_workers from inside a btrfs worker | |
81 | * thread that is used as part of cleaning dirty memory, which pretty much | |
82 | * involves all of the worker threads. | |
83 | * | |
84 | * Instead we have a helper queue who never has more than one thread | |
85 | * where we scheduler thread start operations. This worker_start struct | |
86 | * is used to contain the work and hold a pointer to the queue that needs | |
87 | * another worker. | |
88 | */ | |
89 | struct worker_start { | |
90 | struct btrfs_work work; | |
91 | struct btrfs_workers *queue; | |
92 | }; | |
93 | ||
94 | static void start_new_worker_func(struct btrfs_work *work) | |
95 | { | |
96 | struct worker_start *start; | |
97 | start = container_of(work, struct worker_start, work); | |
0dc3b84a | 98 | __btrfs_start_workers(start->queue); |
61d92c32 CM |
99 | kfree(start); |
100 | } | |
101 | ||
35d8ba66 CM |
102 | /* |
103 | * helper function to move a thread onto the idle list after it | |
104 | * has finished some requests. | |
105 | */ | |
106 | static void check_idle_worker(struct btrfs_worker_thread *worker) | |
107 | { | |
108 | if (!worker->idle && atomic_read(&worker->num_pending) < | |
109 | worker->workers->idle_thresh / 2) { | |
110 | unsigned long flags; | |
111 | spin_lock_irqsave(&worker->workers->lock, flags); | |
112 | worker->idle = 1; | |
3e99d8eb CM |
113 | |
114 | /* the list may be empty if the worker is just starting */ | |
964fb15a ID |
115 | if (!list_empty(&worker->worker_list) && |
116 | !worker->workers->stopping) { | |
3e99d8eb CM |
117 | list_move(&worker->worker_list, |
118 | &worker->workers->idle_list); | |
119 | } | |
35d8ba66 CM |
120 | spin_unlock_irqrestore(&worker->workers->lock, flags); |
121 | } | |
122 | } | |
123 | ||
124 | /* | |
125 | * helper function to move a thread off the idle list after new | |
126 | * pending work is added. | |
127 | */ | |
128 | static void check_busy_worker(struct btrfs_worker_thread *worker) | |
129 | { | |
130 | if (worker->idle && atomic_read(&worker->num_pending) >= | |
131 | worker->workers->idle_thresh) { | |
132 | unsigned long flags; | |
133 | spin_lock_irqsave(&worker->workers->lock, flags); | |
134 | worker->idle = 0; | |
3e99d8eb | 135 | |
964fb15a ID |
136 | if (!list_empty(&worker->worker_list) && |
137 | !worker->workers->stopping) { | |
3e99d8eb CM |
138 | list_move_tail(&worker->worker_list, |
139 | &worker->workers->worker_list); | |
140 | } | |
35d8ba66 CM |
141 | spin_unlock_irqrestore(&worker->workers->lock, flags); |
142 | } | |
143 | } | |
144 | ||
9042846b CM |
145 | static void check_pending_worker_creates(struct btrfs_worker_thread *worker) |
146 | { | |
147 | struct btrfs_workers *workers = worker->workers; | |
0dc3b84a | 148 | struct worker_start *start; |
9042846b CM |
149 | unsigned long flags; |
150 | ||
151 | rmb(); | |
152 | if (!workers->atomic_start_pending) | |
153 | return; | |
154 | ||
0dc3b84a JB |
155 | start = kzalloc(sizeof(*start), GFP_NOFS); |
156 | if (!start) | |
157 | return; | |
158 | ||
159 | start->work.func = start_new_worker_func; | |
160 | start->queue = workers; | |
161 | ||
9042846b CM |
162 | spin_lock_irqsave(&workers->lock, flags); |
163 | if (!workers->atomic_start_pending) | |
164 | goto out; | |
165 | ||
166 | workers->atomic_start_pending = 0; | |
61d92c32 CM |
167 | if (workers->num_workers + workers->num_workers_starting >= |
168 | workers->max_workers) | |
9042846b CM |
169 | goto out; |
170 | ||
61d92c32 | 171 | workers->num_workers_starting += 1; |
9042846b | 172 | spin_unlock_irqrestore(&workers->lock, flags); |
0dc3b84a | 173 | btrfs_queue_worker(workers->atomic_worker_start, &start->work); |
9042846b CM |
174 | return; |
175 | ||
176 | out: | |
0dc3b84a | 177 | kfree(start); |
9042846b CM |
178 | spin_unlock_irqrestore(&workers->lock, flags); |
179 | } | |
180 | ||
143bede5 | 181 | static noinline void run_ordered_completions(struct btrfs_workers *workers, |
4a69a410 CM |
182 | struct btrfs_work *work) |
183 | { | |
4a69a410 | 184 | if (!workers->ordered) |
143bede5 | 185 | return; |
4a69a410 CM |
186 | |
187 | set_bit(WORK_DONE_BIT, &work->flags); | |
188 | ||
4e3f9c50 | 189 | spin_lock(&workers->order_lock); |
4a69a410 | 190 | |
d313d7a3 CM |
191 | while (1) { |
192 | if (!list_empty(&workers->prio_order_list)) { | |
193 | work = list_entry(workers->prio_order_list.next, | |
194 | struct btrfs_work, order_list); | |
195 | } else if (!list_empty(&workers->order_list)) { | |
196 | work = list_entry(workers->order_list.next, | |
197 | struct btrfs_work, order_list); | |
198 | } else { | |
199 | break; | |
200 | } | |
4a69a410 CM |
201 | if (!test_bit(WORK_DONE_BIT, &work->flags)) |
202 | break; | |
203 | ||
204 | /* we are going to call the ordered done function, but | |
205 | * we leave the work item on the list as a barrier so | |
206 | * that later work items that are done don't have their | |
207 | * functions called before this one returns | |
208 | */ | |
209 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) | |
210 | break; | |
211 | ||
4e3f9c50 | 212 | spin_unlock(&workers->order_lock); |
4a69a410 CM |
213 | |
214 | work->ordered_func(work); | |
215 | ||
e9fbcb42 | 216 | /* now take the lock again and drop our item from the list */ |
4e3f9c50 | 217 | spin_lock(&workers->order_lock); |
4a69a410 | 218 | list_del(&work->order_list); |
e9fbcb42 CM |
219 | spin_unlock(&workers->order_lock); |
220 | ||
221 | /* | |
222 | * we don't want to call the ordered free functions | |
223 | * with the lock held though | |
224 | */ | |
4a69a410 | 225 | work->ordered_free(work); |
e9fbcb42 | 226 | spin_lock(&workers->order_lock); |
4a69a410 CM |
227 | } |
228 | ||
4e3f9c50 | 229 | spin_unlock(&workers->order_lock); |
4a69a410 CM |
230 | } |
231 | ||
9042846b CM |
232 | static void put_worker(struct btrfs_worker_thread *worker) |
233 | { | |
234 | if (atomic_dec_and_test(&worker->refs)) | |
235 | kfree(worker); | |
236 | } | |
237 | ||
238 | static int try_worker_shutdown(struct btrfs_worker_thread *worker) | |
239 | { | |
240 | int freeit = 0; | |
241 | ||
242 | spin_lock_irq(&worker->lock); | |
627e421a | 243 | spin_lock(&worker->workers->lock); |
9042846b CM |
244 | if (worker->workers->num_workers > 1 && |
245 | worker->idle && | |
246 | !worker->working && | |
247 | !list_empty(&worker->worker_list) && | |
248 | list_empty(&worker->prio_pending) && | |
6e74057c CM |
249 | list_empty(&worker->pending) && |
250 | atomic_read(&worker->num_pending) == 0) { | |
9042846b CM |
251 | freeit = 1; |
252 | list_del_init(&worker->worker_list); | |
253 | worker->workers->num_workers--; | |
254 | } | |
627e421a | 255 | spin_unlock(&worker->workers->lock); |
9042846b CM |
256 | spin_unlock_irq(&worker->lock); |
257 | ||
258 | if (freeit) | |
259 | put_worker(worker); | |
260 | return freeit; | |
261 | } | |
262 | ||
4f878e84 CM |
263 | static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker, |
264 | struct list_head *prio_head, | |
265 | struct list_head *head) | |
266 | { | |
267 | struct btrfs_work *work = NULL; | |
268 | struct list_head *cur = NULL; | |
269 | ||
51b98eff | 270 | if (!list_empty(prio_head)) { |
4f878e84 | 271 | cur = prio_head->next; |
51b98eff SG |
272 | goto out; |
273 | } | |
4f878e84 CM |
274 | |
275 | smp_mb(); | |
276 | if (!list_empty(&worker->prio_pending)) | |
277 | goto refill; | |
278 | ||
51b98eff | 279 | if (!list_empty(head)) { |
4f878e84 | 280 | cur = head->next; |
4f878e84 | 281 | goto out; |
51b98eff | 282 | } |
4f878e84 CM |
283 | |
284 | refill: | |
285 | spin_lock_irq(&worker->lock); | |
286 | list_splice_tail_init(&worker->prio_pending, prio_head); | |
287 | list_splice_tail_init(&worker->pending, head); | |
288 | ||
289 | if (!list_empty(prio_head)) | |
290 | cur = prio_head->next; | |
291 | else if (!list_empty(head)) | |
292 | cur = head->next; | |
293 | spin_unlock_irq(&worker->lock); | |
294 | ||
295 | if (!cur) | |
296 | goto out_fail; | |
297 | ||
298 | out: | |
299 | work = list_entry(cur, struct btrfs_work, list); | |
300 | ||
301 | out_fail: | |
302 | return work; | |
303 | } | |
304 | ||
8b712842 CM |
305 | /* |
306 | * main loop for servicing work items | |
307 | */ | |
308 | static int worker_loop(void *arg) | |
309 | { | |
310 | struct btrfs_worker_thread *worker = arg; | |
4f878e84 CM |
311 | struct list_head head; |
312 | struct list_head prio_head; | |
8b712842 | 313 | struct btrfs_work *work; |
4f878e84 CM |
314 | |
315 | INIT_LIST_HEAD(&head); | |
316 | INIT_LIST_HEAD(&prio_head); | |
317 | ||
8b712842 | 318 | do { |
4f878e84 | 319 | again: |
d313d7a3 | 320 | while (1) { |
4f878e84 CM |
321 | |
322 | ||
323 | work = get_next_work(worker, &prio_head, &head); | |
324 | if (!work) | |
d313d7a3 CM |
325 | break; |
326 | ||
8b712842 | 327 | list_del(&work->list); |
4a69a410 | 328 | clear_bit(WORK_QUEUED_BIT, &work->flags); |
8b712842 CM |
329 | |
330 | work->worker = worker; | |
8b712842 CM |
331 | |
332 | work->func(work); | |
333 | ||
334 | atomic_dec(&worker->num_pending); | |
4a69a410 CM |
335 | /* |
336 | * unless this is an ordered work queue, | |
337 | * 'work' was probably freed by func above. | |
338 | */ | |
339 | run_ordered_completions(worker->workers, work); | |
340 | ||
9042846b | 341 | check_pending_worker_creates(worker); |
8f3b65a3 | 342 | cond_resched(); |
8b712842 | 343 | } |
4f878e84 CM |
344 | |
345 | spin_lock_irq(&worker->lock); | |
346 | check_idle_worker(worker); | |
347 | ||
8b712842 | 348 | if (freezing(current)) { |
b51912c9 CM |
349 | worker->working = 0; |
350 | spin_unlock_irq(&worker->lock); | |
a0acae0e | 351 | try_to_freeze(); |
8b712842 | 352 | } else { |
8b712842 | 353 | spin_unlock_irq(&worker->lock); |
b51912c9 CM |
354 | if (!kthread_should_stop()) { |
355 | cpu_relax(); | |
356 | /* | |
357 | * we've dropped the lock, did someone else | |
358 | * jump_in? | |
359 | */ | |
360 | smp_mb(); | |
d313d7a3 CM |
361 | if (!list_empty(&worker->pending) || |
362 | !list_empty(&worker->prio_pending)) | |
b51912c9 CM |
363 | continue; |
364 | ||
365 | /* | |
366 | * this short schedule allows more work to | |
367 | * come in without the queue functions | |
368 | * needing to go through wake_up_process() | |
369 | * | |
370 | * worker->working is still 1, so nobody | |
371 | * is going to try and wake us up | |
372 | */ | |
373 | schedule_timeout(1); | |
374 | smp_mb(); | |
d313d7a3 CM |
375 | if (!list_empty(&worker->pending) || |
376 | !list_empty(&worker->prio_pending)) | |
b51912c9 CM |
377 | continue; |
378 | ||
b5555f77 AG |
379 | if (kthread_should_stop()) |
380 | break; | |
381 | ||
b51912c9 CM |
382 | /* still no more work?, sleep for real */ |
383 | spin_lock_irq(&worker->lock); | |
384 | set_current_state(TASK_INTERRUPTIBLE); | |
d313d7a3 | 385 | if (!list_empty(&worker->pending) || |
4f878e84 CM |
386 | !list_empty(&worker->prio_pending)) { |
387 | spin_unlock_irq(&worker->lock); | |
ed3b3d31 | 388 | set_current_state(TASK_RUNNING); |
4f878e84 CM |
389 | goto again; |
390 | } | |
b51912c9 CM |
391 | |
392 | /* | |
393 | * this makes sure we get a wakeup when someone | |
394 | * adds something new to the queue | |
395 | */ | |
396 | worker->working = 0; | |
397 | spin_unlock_irq(&worker->lock); | |
398 | ||
9042846b CM |
399 | if (!kthread_should_stop()) { |
400 | schedule_timeout(HZ * 120); | |
401 | if (!worker->working && | |
402 | try_worker_shutdown(worker)) { | |
403 | return 0; | |
404 | } | |
405 | } | |
b51912c9 | 406 | } |
8b712842 CM |
407 | __set_current_state(TASK_RUNNING); |
408 | } | |
409 | } while (!kthread_should_stop()); | |
410 | return 0; | |
411 | } | |
412 | ||
413 | /* | |
414 | * this will wait for all the worker threads to shutdown | |
415 | */ | |
143bede5 | 416 | void btrfs_stop_workers(struct btrfs_workers *workers) |
8b712842 CM |
417 | { |
418 | struct list_head *cur; | |
419 | struct btrfs_worker_thread *worker; | |
9042846b | 420 | int can_stop; |
8b712842 | 421 | |
9042846b | 422 | spin_lock_irq(&workers->lock); |
964fb15a | 423 | workers->stopping = 1; |
35d8ba66 | 424 | list_splice_init(&workers->idle_list, &workers->worker_list); |
d397712b | 425 | while (!list_empty(&workers->worker_list)) { |
8b712842 CM |
426 | cur = workers->worker_list.next; |
427 | worker = list_entry(cur, struct btrfs_worker_thread, | |
428 | worker_list); | |
9042846b CM |
429 | |
430 | atomic_inc(&worker->refs); | |
431 | workers->num_workers -= 1; | |
432 | if (!list_empty(&worker->worker_list)) { | |
433 | list_del_init(&worker->worker_list); | |
434 | put_worker(worker); | |
435 | can_stop = 1; | |
436 | } else | |
437 | can_stop = 0; | |
438 | spin_unlock_irq(&workers->lock); | |
439 | if (can_stop) | |
440 | kthread_stop(worker->task); | |
441 | spin_lock_irq(&workers->lock); | |
442 | put_worker(worker); | |
8b712842 | 443 | } |
9042846b | 444 | spin_unlock_irq(&workers->lock); |
8b712842 CM |
445 | } |
446 | ||
447 | /* | |
448 | * simple init on struct btrfs_workers | |
449 | */ | |
61d92c32 CM |
450 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, |
451 | struct btrfs_workers *async_helper) | |
8b712842 CM |
452 | { |
453 | workers->num_workers = 0; | |
61d92c32 | 454 | workers->num_workers_starting = 0; |
8b712842 | 455 | INIT_LIST_HEAD(&workers->worker_list); |
35d8ba66 | 456 | INIT_LIST_HEAD(&workers->idle_list); |
4a69a410 | 457 | INIT_LIST_HEAD(&workers->order_list); |
d313d7a3 | 458 | INIT_LIST_HEAD(&workers->prio_order_list); |
8b712842 | 459 | spin_lock_init(&workers->lock); |
4e3f9c50 | 460 | spin_lock_init(&workers->order_lock); |
8b712842 | 461 | workers->max_workers = max; |
61b49440 | 462 | workers->idle_thresh = 32; |
5443be45 | 463 | workers->name = name; |
4a69a410 | 464 | workers->ordered = 0; |
9042846b | 465 | workers->atomic_start_pending = 0; |
61d92c32 | 466 | workers->atomic_worker_start = async_helper; |
964fb15a | 467 | workers->stopping = 0; |
8b712842 CM |
468 | } |
469 | ||
470 | /* | |
471 | * starts new worker threads. This does not enforce the max worker | |
472 | * count in case you need to temporarily go past it. | |
473 | */ | |
0dc3b84a | 474 | static int __btrfs_start_workers(struct btrfs_workers *workers) |
8b712842 CM |
475 | { |
476 | struct btrfs_worker_thread *worker; | |
477 | int ret = 0; | |
8b712842 | 478 | |
0dc3b84a JB |
479 | worker = kzalloc(sizeof(*worker), GFP_NOFS); |
480 | if (!worker) { | |
481 | ret = -ENOMEM; | |
482 | goto fail; | |
483 | } | |
8b712842 | 484 | |
0dc3b84a JB |
485 | INIT_LIST_HEAD(&worker->pending); |
486 | INIT_LIST_HEAD(&worker->prio_pending); | |
487 | INIT_LIST_HEAD(&worker->worker_list); | |
488 | spin_lock_init(&worker->lock); | |
489 | ||
490 | atomic_set(&worker->num_pending, 0); | |
491 | atomic_set(&worker->refs, 1); | |
492 | worker->workers = workers; | |
964fb15a ID |
493 | worker->task = kthread_create(worker_loop, worker, |
494 | "btrfs-%s-%d", workers->name, | |
495 | workers->num_workers + 1); | |
0dc3b84a JB |
496 | if (IS_ERR(worker->task)) { |
497 | ret = PTR_ERR(worker->task); | |
0dc3b84a | 498 | goto fail; |
8b712842 | 499 | } |
964fb15a | 500 | |
0dc3b84a | 501 | spin_lock_irq(&workers->lock); |
964fb15a ID |
502 | if (workers->stopping) { |
503 | spin_unlock_irq(&workers->lock); | |
ba69994a | 504 | ret = -EINVAL; |
964fb15a ID |
505 | goto fail_kthread; |
506 | } | |
0dc3b84a JB |
507 | list_add_tail(&worker->worker_list, &workers->idle_list); |
508 | worker->idle = 1; | |
509 | workers->num_workers++; | |
510 | workers->num_workers_starting--; | |
511 | WARN_ON(workers->num_workers_starting < 0); | |
512 | spin_unlock_irq(&workers->lock); | |
513 | ||
964fb15a | 514 | wake_up_process(worker->task); |
8b712842 | 515 | return 0; |
964fb15a ID |
516 | |
517 | fail_kthread: | |
518 | kthread_stop(worker->task); | |
8b712842 | 519 | fail: |
964fb15a | 520 | kfree(worker); |
0dc3b84a JB |
521 | spin_lock_irq(&workers->lock); |
522 | workers->num_workers_starting--; | |
523 | spin_unlock_irq(&workers->lock); | |
8b712842 CM |
524 | return ret; |
525 | } | |
526 | ||
0dc3b84a | 527 | int btrfs_start_workers(struct btrfs_workers *workers) |
61d92c32 CM |
528 | { |
529 | spin_lock_irq(&workers->lock); | |
0dc3b84a | 530 | workers->num_workers_starting++; |
61d92c32 | 531 | spin_unlock_irq(&workers->lock); |
0dc3b84a | 532 | return __btrfs_start_workers(workers); |
61d92c32 CM |
533 | } |
534 | ||
8b712842 CM |
535 | /* |
536 | * run through the list and find a worker thread that doesn't have a lot | |
537 | * to do right now. This can return null if we aren't yet at the thread | |
538 | * count limit and all of the threads are busy. | |
539 | */ | |
540 | static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) | |
541 | { | |
542 | struct btrfs_worker_thread *worker; | |
543 | struct list_head *next; | |
61d92c32 CM |
544 | int enforce_min; |
545 | ||
546 | enforce_min = (workers->num_workers + workers->num_workers_starting) < | |
547 | workers->max_workers; | |
8b712842 | 548 | |
8b712842 | 549 | /* |
35d8ba66 CM |
550 | * if we find an idle thread, don't move it to the end of the |
551 | * idle list. This improves the chance that the next submission | |
552 | * will reuse the same thread, and maybe catch it while it is still | |
553 | * working | |
8b712842 | 554 | */ |
35d8ba66 CM |
555 | if (!list_empty(&workers->idle_list)) { |
556 | next = workers->idle_list.next; | |
8b712842 CM |
557 | worker = list_entry(next, struct btrfs_worker_thread, |
558 | worker_list); | |
35d8ba66 | 559 | return worker; |
8b712842 | 560 | } |
35d8ba66 CM |
561 | if (enforce_min || list_empty(&workers->worker_list)) |
562 | return NULL; | |
563 | ||
8b712842 | 564 | /* |
35d8ba66 | 565 | * if we pick a busy task, move the task to the end of the list. |
d352ac68 CM |
566 | * hopefully this will keep things somewhat evenly balanced. |
567 | * Do the move in batches based on the sequence number. This groups | |
568 | * requests submitted at roughly the same time onto the same worker. | |
8b712842 | 569 | */ |
35d8ba66 CM |
570 | next = workers->worker_list.next; |
571 | worker = list_entry(next, struct btrfs_worker_thread, worker_list); | |
4854ddd0 | 572 | worker->sequence++; |
d352ac68 | 573 | |
53863232 | 574 | if (worker->sequence % workers->idle_thresh == 0) |
4854ddd0 | 575 | list_move_tail(next, &workers->worker_list); |
8b712842 CM |
576 | return worker; |
577 | } | |
578 | ||
d352ac68 CM |
579 | /* |
580 | * selects a worker thread to take the next job. This will either find | |
581 | * an idle worker, start a new worker up to the max count, or just return | |
582 | * one of the existing busy workers. | |
583 | */ | |
8b712842 CM |
584 | static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) |
585 | { | |
586 | struct btrfs_worker_thread *worker; | |
587 | unsigned long flags; | |
9042846b | 588 | struct list_head *fallback; |
0dc3b84a | 589 | int ret; |
8b712842 | 590 | |
8b712842 | 591 | spin_lock_irqsave(&workers->lock, flags); |
8d532b2a | 592 | again: |
8b712842 | 593 | worker = next_worker(workers); |
8b712842 CM |
594 | |
595 | if (!worker) { | |
61d92c32 CM |
596 | if (workers->num_workers + workers->num_workers_starting >= |
597 | workers->max_workers) { | |
9042846b CM |
598 | goto fallback; |
599 | } else if (workers->atomic_worker_start) { | |
600 | workers->atomic_start_pending = 1; | |
601 | goto fallback; | |
8b712842 | 602 | } else { |
61d92c32 | 603 | workers->num_workers_starting++; |
8b712842 CM |
604 | spin_unlock_irqrestore(&workers->lock, flags); |
605 | /* we're below the limit, start another worker */ | |
0dc3b84a | 606 | ret = __btrfs_start_workers(workers); |
8d532b2a | 607 | spin_lock_irqsave(&workers->lock, flags); |
0dc3b84a JB |
608 | if (ret) |
609 | goto fallback; | |
8b712842 CM |
610 | goto again; |
611 | } | |
612 | } | |
6e74057c | 613 | goto found; |
9042846b CM |
614 | |
615 | fallback: | |
616 | fallback = NULL; | |
617 | /* | |
618 | * we have failed to find any workers, just | |
619 | * return the first one we can find. | |
620 | */ | |
621 | if (!list_empty(&workers->worker_list)) | |
622 | fallback = workers->worker_list.next; | |
623 | if (!list_empty(&workers->idle_list)) | |
624 | fallback = workers->idle_list.next; | |
625 | BUG_ON(!fallback); | |
626 | worker = list_entry(fallback, | |
627 | struct btrfs_worker_thread, worker_list); | |
6e74057c CM |
628 | found: |
629 | /* | |
630 | * this makes sure the worker doesn't exit before it is placed | |
631 | * onto a busy/idle list | |
632 | */ | |
633 | atomic_inc(&worker->num_pending); | |
9042846b CM |
634 | spin_unlock_irqrestore(&workers->lock, flags); |
635 | return worker; | |
8b712842 CM |
636 | } |
637 | ||
638 | /* | |
639 | * btrfs_requeue_work just puts the work item back on the tail of the list | |
640 | * it was taken from. It is intended for use with long running work functions | |
641 | * that make some progress and want to give the cpu up for others. | |
642 | */ | |
143bede5 | 643 | void btrfs_requeue_work(struct btrfs_work *work) |
8b712842 CM |
644 | { |
645 | struct btrfs_worker_thread *worker = work->worker; | |
646 | unsigned long flags; | |
a6837051 | 647 | int wake = 0; |
8b712842 | 648 | |
4a69a410 | 649 | if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) |
143bede5 | 650 | return; |
8b712842 CM |
651 | |
652 | spin_lock_irqsave(&worker->lock, flags); | |
d313d7a3 CM |
653 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) |
654 | list_add_tail(&work->list, &worker->prio_pending); | |
655 | else | |
656 | list_add_tail(&work->list, &worker->pending); | |
b51912c9 | 657 | atomic_inc(&worker->num_pending); |
75ccf47d CM |
658 | |
659 | /* by definition we're busy, take ourselves off the idle | |
660 | * list | |
661 | */ | |
662 | if (worker->idle) { | |
29c5e8ce | 663 | spin_lock(&worker->workers->lock); |
75ccf47d CM |
664 | worker->idle = 0; |
665 | list_move_tail(&worker->worker_list, | |
6e74057c | 666 | &worker->workers->worker_list); |
29c5e8ce | 667 | spin_unlock(&worker->workers->lock); |
75ccf47d | 668 | } |
a6837051 CM |
669 | if (!worker->working) { |
670 | wake = 1; | |
671 | worker->working = 1; | |
672 | } | |
75ccf47d | 673 | |
a6837051 CM |
674 | if (wake) |
675 | wake_up_process(worker->task); | |
9042846b | 676 | spin_unlock_irqrestore(&worker->lock, flags); |
8b712842 CM |
677 | } |
678 | ||
d313d7a3 CM |
679 | void btrfs_set_work_high_prio(struct btrfs_work *work) |
680 | { | |
681 | set_bit(WORK_HIGH_PRIO_BIT, &work->flags); | |
682 | } | |
683 | ||
8b712842 CM |
684 | /* |
685 | * places a struct btrfs_work into the pending queue of one of the kthreads | |
686 | */ | |
0dc3b84a | 687 | void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) |
8b712842 CM |
688 | { |
689 | struct btrfs_worker_thread *worker; | |
690 | unsigned long flags; | |
691 | int wake = 0; | |
692 | ||
693 | /* don't requeue something already on a list */ | |
4a69a410 | 694 | if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) |
0dc3b84a | 695 | return; |
8b712842 CM |
696 | |
697 | worker = find_worker(workers); | |
4a69a410 | 698 | if (workers->ordered) { |
4e3f9c50 CM |
699 | /* |
700 | * you're not allowed to do ordered queues from an | |
701 | * interrupt handler | |
702 | */ | |
703 | spin_lock(&workers->order_lock); | |
d313d7a3 CM |
704 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { |
705 | list_add_tail(&work->order_list, | |
706 | &workers->prio_order_list); | |
707 | } else { | |
708 | list_add_tail(&work->order_list, &workers->order_list); | |
709 | } | |
4e3f9c50 | 710 | spin_unlock(&workers->order_lock); |
4a69a410 CM |
711 | } else { |
712 | INIT_LIST_HEAD(&work->order_list); | |
713 | } | |
8b712842 CM |
714 | |
715 | spin_lock_irqsave(&worker->lock, flags); | |
a6837051 | 716 | |
d313d7a3 CM |
717 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) |
718 | list_add_tail(&work->list, &worker->prio_pending); | |
719 | else | |
720 | list_add_tail(&work->list, &worker->pending); | |
35d8ba66 | 721 | check_busy_worker(worker); |
8b712842 CM |
722 | |
723 | /* | |
724 | * avoid calling into wake_up_process if this thread has already | |
725 | * been kicked | |
726 | */ | |
727 | if (!worker->working) | |
728 | wake = 1; | |
729 | worker->working = 1; | |
730 | ||
8b712842 CM |
731 | if (wake) |
732 | wake_up_process(worker->task); | |
9042846b | 733 | spin_unlock_irqrestore(&worker->lock, flags); |
8b712842 | 734 | } |
08a9ff32 | 735 | |
1ca08976 | 736 | struct __btrfs_workqueue_struct { |
08a9ff32 QW |
737 | struct workqueue_struct *normal_wq; |
738 | /* List head pointing to ordered work list */ | |
739 | struct list_head ordered_list; | |
740 | ||
741 | /* Spinlock for ordered_list */ | |
742 | spinlock_t list_lock; | |
0bd9289c QW |
743 | |
744 | /* Thresholding related variants */ | |
745 | atomic_t pending; | |
746 | int max_active; | |
747 | int current_max; | |
748 | int thresh; | |
749 | unsigned int count; | |
750 | spinlock_t thres_lock; | |
08a9ff32 QW |
751 | }; |
752 | ||
1ca08976 QW |
753 | struct btrfs_workqueue_struct { |
754 | struct __btrfs_workqueue_struct *normal; | |
755 | struct __btrfs_workqueue_struct *high; | |
756 | }; | |
757 | ||
758 | static inline struct __btrfs_workqueue_struct | |
0bd9289c | 759 | *__btrfs_alloc_workqueue(char *name, int flags, int max_active, int thresh) |
1ca08976 QW |
760 | { |
761 | struct __btrfs_workqueue_struct *ret = kzalloc(sizeof(*ret), GFP_NOFS); | |
762 | ||
763 | if (unlikely(!ret)) | |
764 | return NULL; | |
765 | ||
0bd9289c QW |
766 | ret->max_active = max_active; |
767 | atomic_set(&ret->pending, 0); | |
768 | if (thresh == 0) | |
769 | thresh = DFT_THRESHOLD; | |
770 | /* For low threshold, disabling threshold is a better choice */ | |
771 | if (thresh < DFT_THRESHOLD) { | |
772 | ret->current_max = max_active; | |
773 | ret->thresh = NO_THRESHOLD; | |
774 | } else { | |
775 | ret->current_max = 1; | |
776 | ret->thresh = thresh; | |
777 | } | |
778 | ||
1ca08976 QW |
779 | if (flags & WQ_HIGHPRI) |
780 | ret->normal_wq = alloc_workqueue("%s-%s-high", flags, | |
0bd9289c QW |
781 | ret->max_active, |
782 | "btrfs", name); | |
1ca08976 QW |
783 | else |
784 | ret->normal_wq = alloc_workqueue("%s-%s", flags, | |
0bd9289c QW |
785 | ret->max_active, "btrfs", |
786 | name); | |
1ca08976 QW |
787 | if (unlikely(!ret->normal_wq)) { |
788 | kfree(ret); | |
789 | return NULL; | |
790 | } | |
791 | ||
792 | INIT_LIST_HEAD(&ret->ordered_list); | |
793 | spin_lock_init(&ret->list_lock); | |
0bd9289c | 794 | spin_lock_init(&ret->thres_lock); |
1ca08976 QW |
795 | return ret; |
796 | } | |
797 | ||
798 | static inline void | |
799 | __btrfs_destroy_workqueue(struct __btrfs_workqueue_struct *wq); | |
800 | ||
08a9ff32 QW |
801 | struct btrfs_workqueue_struct *btrfs_alloc_workqueue(char *name, |
802 | int flags, | |
0bd9289c QW |
803 | int max_active, |
804 | int thresh) | |
08a9ff32 QW |
805 | { |
806 | struct btrfs_workqueue_struct *ret = kzalloc(sizeof(*ret), GFP_NOFS); | |
807 | ||
808 | if (unlikely(!ret)) | |
809 | return NULL; | |
810 | ||
1ca08976 | 811 | ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI, |
0bd9289c | 812 | max_active, thresh); |
1ca08976 | 813 | if (unlikely(!ret->normal)) { |
08a9ff32 QW |
814 | kfree(ret); |
815 | return NULL; | |
816 | } | |
817 | ||
1ca08976 | 818 | if (flags & WQ_HIGHPRI) { |
0bd9289c QW |
819 | ret->high = __btrfs_alloc_workqueue(name, flags, max_active, |
820 | thresh); | |
1ca08976 QW |
821 | if (unlikely(!ret->high)) { |
822 | __btrfs_destroy_workqueue(ret->normal); | |
823 | kfree(ret); | |
824 | return NULL; | |
825 | } | |
826 | } | |
08a9ff32 QW |
827 | return ret; |
828 | } | |
829 | ||
0bd9289c QW |
830 | /* |
831 | * Hook for threshold which will be called in btrfs_queue_work. | |
832 | * This hook WILL be called in IRQ handler context, | |
833 | * so workqueue_set_max_active MUST NOT be called in this hook | |
834 | */ | |
835 | static inline void thresh_queue_hook(struct __btrfs_workqueue_struct *wq) | |
836 | { | |
837 | if (wq->thresh == NO_THRESHOLD) | |
838 | return; | |
839 | atomic_inc(&wq->pending); | |
840 | } | |
841 | ||
842 | /* | |
843 | * Hook for threshold which will be called before executing the work, | |
844 | * This hook is called in kthread content. | |
845 | * So workqueue_set_max_active is called here. | |
846 | */ | |
847 | static inline void thresh_exec_hook(struct __btrfs_workqueue_struct *wq) | |
848 | { | |
849 | int new_max_active; | |
850 | long pending; | |
851 | int need_change = 0; | |
852 | ||
853 | if (wq->thresh == NO_THRESHOLD) | |
854 | return; | |
855 | ||
856 | atomic_dec(&wq->pending); | |
857 | spin_lock(&wq->thres_lock); | |
858 | /* | |
859 | * Use wq->count to limit the calling frequency of | |
860 | * workqueue_set_max_active. | |
861 | */ | |
862 | wq->count++; | |
863 | wq->count %= (wq->thresh / 4); | |
864 | if (!wq->count) | |
865 | goto out; | |
866 | new_max_active = wq->current_max; | |
867 | ||
868 | /* | |
869 | * pending may be changed later, but it's OK since we really | |
870 | * don't need it so accurate to calculate new_max_active. | |
871 | */ | |
872 | pending = atomic_read(&wq->pending); | |
873 | if (pending > wq->thresh) | |
874 | new_max_active++; | |
875 | if (pending < wq->thresh / 2) | |
876 | new_max_active--; | |
877 | new_max_active = clamp_val(new_max_active, 1, wq->max_active); | |
878 | if (new_max_active != wq->current_max) { | |
879 | need_change = 1; | |
880 | wq->current_max = new_max_active; | |
881 | } | |
882 | out: | |
883 | spin_unlock(&wq->thres_lock); | |
884 | ||
885 | if (need_change) { | |
886 | workqueue_set_max_active(wq->normal_wq, wq->current_max); | |
887 | } | |
888 | } | |
889 | ||
1ca08976 | 890 | static void run_ordered_work(struct __btrfs_workqueue_struct *wq) |
08a9ff32 QW |
891 | { |
892 | struct list_head *list = &wq->ordered_list; | |
893 | struct btrfs_work_struct *work; | |
894 | spinlock_t *lock = &wq->list_lock; | |
895 | unsigned long flags; | |
896 | ||
897 | while (1) { | |
898 | spin_lock_irqsave(lock, flags); | |
899 | if (list_empty(list)) | |
900 | break; | |
901 | work = list_entry(list->next, struct btrfs_work_struct, | |
902 | ordered_list); | |
903 | if (!test_bit(WORK_DONE_BIT, &work->flags)) | |
904 | break; | |
905 | ||
906 | /* | |
907 | * we are going to call the ordered done function, but | |
908 | * we leave the work item on the list as a barrier so | |
909 | * that later work items that are done don't have their | |
910 | * functions called before this one returns | |
911 | */ | |
912 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) | |
913 | break; | |
914 | spin_unlock_irqrestore(lock, flags); | |
915 | work->ordered_func(work); | |
916 | ||
917 | /* now take the lock again and drop our item from the list */ | |
918 | spin_lock_irqsave(lock, flags); | |
919 | list_del(&work->ordered_list); | |
920 | spin_unlock_irqrestore(lock, flags); | |
921 | ||
922 | /* | |
923 | * we don't want to call the ordered free functions | |
924 | * with the lock held though | |
925 | */ | |
926 | work->ordered_free(work); | |
927 | } | |
928 | spin_unlock_irqrestore(lock, flags); | |
929 | } | |
930 | ||
931 | static void normal_work_helper(struct work_struct *arg) | |
932 | { | |
933 | struct btrfs_work_struct *work; | |
1ca08976 | 934 | struct __btrfs_workqueue_struct *wq; |
08a9ff32 QW |
935 | int need_order = 0; |
936 | ||
937 | work = container_of(arg, struct btrfs_work_struct, normal_work); | |
938 | /* | |
939 | * We should not touch things inside work in the following cases: | |
940 | * 1) after work->func() if it has no ordered_free | |
941 | * Since the struct is freed in work->func(). | |
942 | * 2) after setting WORK_DONE_BIT | |
943 | * The work may be freed in other threads almost instantly. | |
944 | * So we save the needed things here. | |
945 | */ | |
946 | if (work->ordered_func) | |
947 | need_order = 1; | |
948 | wq = work->wq; | |
949 | ||
0bd9289c | 950 | thresh_exec_hook(wq); |
08a9ff32 QW |
951 | work->func(work); |
952 | if (need_order) { | |
953 | set_bit(WORK_DONE_BIT, &work->flags); | |
954 | run_ordered_work(wq); | |
955 | } | |
956 | } | |
957 | ||
958 | void btrfs_init_work(struct btrfs_work_struct *work, | |
959 | void (*func)(struct btrfs_work_struct *), | |
960 | void (*ordered_func)(struct btrfs_work_struct *), | |
961 | void (*ordered_free)(struct btrfs_work_struct *)) | |
962 | { | |
963 | work->func = func; | |
964 | work->ordered_func = ordered_func; | |
965 | work->ordered_free = ordered_free; | |
966 | INIT_WORK(&work->normal_work, normal_work_helper); | |
967 | INIT_LIST_HEAD(&work->ordered_list); | |
968 | work->flags = 0; | |
969 | } | |
970 | ||
1ca08976 QW |
971 | static inline void __btrfs_queue_work(struct __btrfs_workqueue_struct *wq, |
972 | struct btrfs_work_struct *work) | |
08a9ff32 QW |
973 | { |
974 | unsigned long flags; | |
975 | ||
976 | work->wq = wq; | |
0bd9289c | 977 | thresh_queue_hook(wq); |
08a9ff32 QW |
978 | if (work->ordered_func) { |
979 | spin_lock_irqsave(&wq->list_lock, flags); | |
980 | list_add_tail(&work->ordered_list, &wq->ordered_list); | |
981 | spin_unlock_irqrestore(&wq->list_lock, flags); | |
982 | } | |
983 | queue_work(wq->normal_wq, &work->normal_work); | |
984 | } | |
985 | ||
1ca08976 QW |
986 | void btrfs_queue_work(struct btrfs_workqueue_struct *wq, |
987 | struct btrfs_work_struct *work) | |
988 | { | |
989 | struct __btrfs_workqueue_struct *dest_wq; | |
990 | ||
991 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags) && wq->high) | |
992 | dest_wq = wq->high; | |
993 | else | |
994 | dest_wq = wq->normal; | |
995 | __btrfs_queue_work(dest_wq, work); | |
996 | } | |
997 | ||
998 | static inline void | |
999 | __btrfs_destroy_workqueue(struct __btrfs_workqueue_struct *wq) | |
08a9ff32 QW |
1000 | { |
1001 | destroy_workqueue(wq->normal_wq); | |
1002 | kfree(wq); | |
1003 | } | |
1004 | ||
1ca08976 QW |
1005 | void btrfs_destroy_workqueue(struct btrfs_workqueue_struct *wq) |
1006 | { | |
1007 | if (!wq) | |
1008 | return; | |
1009 | if (wq->high) | |
1010 | __btrfs_destroy_workqueue(wq->high); | |
1011 | __btrfs_destroy_workqueue(wq->normal); | |
1012 | } | |
1013 | ||
08a9ff32 QW |
1014 | void btrfs_workqueue_set_max(struct btrfs_workqueue_struct *wq, int max) |
1015 | { | |
0bd9289c | 1016 | wq->normal->max_active = max; |
1ca08976 | 1017 | if (wq->high) |
0bd9289c | 1018 | wq->high->max_active = max; |
1ca08976 QW |
1019 | } |
1020 | ||
1021 | void btrfs_set_work_high_priority(struct btrfs_work_struct *work) | |
1022 | { | |
1023 | set_bit(WORK_HIGH_PRIO_BIT, &work->flags); | |
08a9ff32 | 1024 | } |