]>
Commit | Line | Data |
---|---|---|
d354c7ec PB |
1 | /* |
2 | * QEMU block layer thread pool | |
3 | * | |
4 | * Copyright IBM, Corp. 2008 | |
5 | * Copyright Red Hat, Inc. 2012 | |
6 | * | |
7 | * Authors: | |
8 | * Anthony Liguori <aliguori@us.ibm.com> | |
9 | * Paolo Bonzini <pbonzini@redhat.com> | |
10 | * | |
11 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
12 | * the COPYING file in the top-level directory. | |
13 | * | |
14 | * Contributions after 2012-01-13 are licensed under the terms of the | |
15 | * GNU GPL, version 2 or (at your option) any later version. | |
16 | */ | |
17 | #include "qemu-common.h" | |
1de7afc9 PB |
18 | #include "qemu/queue.h" |
19 | #include "qemu/thread.h" | |
20 | #include "qemu/osdep.h" | |
737e150e | 21 | #include "block/coroutine.h" |
d354c7ec | 22 | #include "trace.h" |
737e150e | 23 | #include "block/block_int.h" |
1de7afc9 | 24 | #include "qemu/event_notifier.h" |
737e150e | 25 | #include "block/thread-pool.h" |
d354c7ec | 26 | |
b811203c | 27 | static void do_spawn_thread(ThreadPool *pool); |
d354c7ec PB |
28 | |
29 | typedef struct ThreadPoolElement ThreadPoolElement; | |
30 | ||
31 | enum ThreadState { | |
32 | THREAD_QUEUED, | |
33 | THREAD_ACTIVE, | |
34 | THREAD_DONE, | |
35 | THREAD_CANCELED, | |
36 | }; | |
37 | ||
38 | struct ThreadPoolElement { | |
39 | BlockDriverAIOCB common; | |
b811203c | 40 | ThreadPool *pool; |
d354c7ec PB |
41 | ThreadPoolFunc *func; |
42 | void *arg; | |
19d092cf PB |
43 | |
44 | /* Moving state out of THREAD_QUEUED is protected by lock. After | |
45 | * that, only the worker thread can write to it. Reads and writes | |
46 | * of state and ret are ordered with memory barriers. | |
47 | */ | |
d354c7ec PB |
48 | enum ThreadState state; |
49 | int ret; | |
50 | ||
51 | /* Access to this list is protected by lock. */ | |
52 | QTAILQ_ENTRY(ThreadPoolElement) reqs; | |
53 | ||
54 | /* Access to this list is protected by the global mutex. */ | |
55 | QLIST_ENTRY(ThreadPoolElement) all; | |
56 | }; | |
57 | ||
b811203c SH |
58 | struct ThreadPool { |
59 | EventNotifier notifier; | |
f7311ccc | 60 | AioContext *ctx; |
b811203c SH |
61 | QemuMutex lock; |
62 | QemuCond check_cancel; | |
f7311ccc | 63 | QemuCond worker_stopped; |
b811203c SH |
64 | QemuSemaphore sem; |
65 | int max_threads; | |
66 | QEMUBH *new_thread_bh; | |
67 | ||
68 | /* The following variables are only accessed from one AioContext. */ | |
69 | QLIST_HEAD(, ThreadPoolElement) head; | |
70 | ||
71 | /* The following variables are protected by lock. */ | |
72 | QTAILQ_HEAD(, ThreadPoolElement) request_list; | |
73 | int cur_threads; | |
74 | int idle_threads; | |
75 | int new_threads; /* backlog of threads we need to create */ | |
76 | int pending_threads; /* threads created but not running yet */ | |
77 | int pending_cancellations; /* whether we need a cond_broadcast */ | |
f7311ccc | 78 | bool stopping; |
b811203c SH |
79 | }; |
80 | ||
81 | /* Currently there is only one thread pool instance. */ | |
82 | static ThreadPool global_pool; | |
83 | ||
84 | static void *worker_thread(void *opaque) | |
d354c7ec | 85 | { |
b811203c SH |
86 | ThreadPool *pool = opaque; |
87 | ||
88 | qemu_mutex_lock(&pool->lock); | |
89 | pool->pending_threads--; | |
90 | do_spawn_thread(pool); | |
d354c7ec | 91 | |
f7311ccc | 92 | while (!pool->stopping) { |
d354c7ec PB |
93 | ThreadPoolElement *req; |
94 | int ret; | |
95 | ||
96 | do { | |
b811203c SH |
97 | pool->idle_threads++; |
98 | qemu_mutex_unlock(&pool->lock); | |
99 | ret = qemu_sem_timedwait(&pool->sem, 10000); | |
100 | qemu_mutex_lock(&pool->lock); | |
101 | pool->idle_threads--; | |
102 | } while (ret == -1 && !QTAILQ_EMPTY(&pool->request_list)); | |
f7311ccc | 103 | if (ret == -1 || pool->stopping) { |
d354c7ec PB |
104 | break; |
105 | } | |
106 | ||
b811203c SH |
107 | req = QTAILQ_FIRST(&pool->request_list); |
108 | QTAILQ_REMOVE(&pool->request_list, req, reqs); | |
d354c7ec | 109 | req->state = THREAD_ACTIVE; |
b811203c | 110 | qemu_mutex_unlock(&pool->lock); |
d354c7ec PB |
111 | |
112 | ret = req->func(req->arg); | |
113 | ||
d354c7ec | 114 | req->ret = ret; |
19d092cf PB |
115 | /* Write ret before state. */ |
116 | smp_wmb(); | |
117 | req->state = THREAD_DONE; | |
118 | ||
b811203c SH |
119 | qemu_mutex_lock(&pool->lock); |
120 | if (pool->pending_cancellations) { | |
121 | qemu_cond_broadcast(&pool->check_cancel); | |
d354c7ec PB |
122 | } |
123 | ||
b811203c | 124 | event_notifier_set(&pool->notifier); |
d354c7ec PB |
125 | } |
126 | ||
b811203c | 127 | pool->cur_threads--; |
f7311ccc | 128 | qemu_cond_signal(&pool->worker_stopped); |
b811203c | 129 | qemu_mutex_unlock(&pool->lock); |
d354c7ec PB |
130 | return NULL; |
131 | } | |
132 | ||
b811203c | 133 | static void do_spawn_thread(ThreadPool *pool) |
d354c7ec PB |
134 | { |
135 | QemuThread t; | |
136 | ||
137 | /* Runs with lock taken. */ | |
b811203c | 138 | if (!pool->new_threads) { |
d354c7ec PB |
139 | return; |
140 | } | |
141 | ||
b811203c SH |
142 | pool->new_threads--; |
143 | pool->pending_threads++; | |
d354c7ec | 144 | |
b811203c | 145 | qemu_thread_create(&t, worker_thread, pool, QEMU_THREAD_DETACHED); |
d354c7ec PB |
146 | } |
147 | ||
148 | static void spawn_thread_bh_fn(void *opaque) | |
149 | { | |
b811203c SH |
150 | ThreadPool *pool = opaque; |
151 | ||
152 | qemu_mutex_lock(&pool->lock); | |
153 | do_spawn_thread(pool); | |
154 | qemu_mutex_unlock(&pool->lock); | |
d354c7ec PB |
155 | } |
156 | ||
b811203c | 157 | static void spawn_thread(ThreadPool *pool) |
d354c7ec | 158 | { |
b811203c SH |
159 | pool->cur_threads++; |
160 | pool->new_threads++; | |
d354c7ec PB |
161 | /* If there are threads being created, they will spawn new workers, so |
162 | * we don't spend time creating many threads in a loop holding a mutex or | |
163 | * starving the current vcpu. | |
164 | * | |
165 | * If there are no idle threads, ask the main thread to create one, so we | |
166 | * inherit the correct affinity instead of the vcpu affinity. | |
167 | */ | |
b811203c SH |
168 | if (!pool->pending_threads) { |
169 | qemu_bh_schedule(pool->new_thread_bh); | |
d354c7ec PB |
170 | } |
171 | } | |
172 | ||
173 | static void event_notifier_ready(EventNotifier *notifier) | |
174 | { | |
b811203c | 175 | ThreadPool *pool = container_of(notifier, ThreadPool, notifier); |
d354c7ec PB |
176 | ThreadPoolElement *elem, *next; |
177 | ||
178 | event_notifier_test_and_clear(notifier); | |
179 | restart: | |
b811203c | 180 | QLIST_FOREACH_SAFE(elem, &pool->head, all, next) { |
d354c7ec PB |
181 | if (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) { |
182 | continue; | |
183 | } | |
184 | if (elem->state == THREAD_DONE) { | |
b811203c SH |
185 | trace_thread_pool_complete(pool, elem, elem->common.opaque, |
186 | elem->ret); | |
d354c7ec PB |
187 | } |
188 | if (elem->state == THREAD_DONE && elem->common.cb) { | |
d354c7ec | 189 | QLIST_REMOVE(elem, all); |
19d092cf PB |
190 | /* Read state before ret. */ |
191 | smp_rmb(); | |
192 | elem->common.cb(elem->common.opaque, elem->ret); | |
d354c7ec PB |
193 | qemu_aio_release(elem); |
194 | goto restart; | |
195 | } else { | |
196 | /* remove the request */ | |
197 | QLIST_REMOVE(elem, all); | |
198 | qemu_aio_release(elem); | |
199 | } | |
200 | } | |
201 | } | |
202 | ||
203 | static int thread_pool_active(EventNotifier *notifier) | |
204 | { | |
b811203c SH |
205 | ThreadPool *pool = container_of(notifier, ThreadPool, notifier); |
206 | return !QLIST_EMPTY(&pool->head); | |
d354c7ec PB |
207 | } |
208 | ||
209 | static void thread_pool_cancel(BlockDriverAIOCB *acb) | |
210 | { | |
211 | ThreadPoolElement *elem = (ThreadPoolElement *)acb; | |
b811203c | 212 | ThreadPool *pool = elem->pool; |
d354c7ec PB |
213 | |
214 | trace_thread_pool_cancel(elem, elem->common.opaque); | |
215 | ||
b811203c | 216 | qemu_mutex_lock(&pool->lock); |
d354c7ec PB |
217 | if (elem->state == THREAD_QUEUED && |
218 | /* No thread has yet started working on elem. we can try to "steal" | |
219 | * the item from the worker if we can get a signal from the | |
220 | * semaphore. Because this is non-blocking, we can do it with | |
221 | * the lock taken and ensure that elem will remain THREAD_QUEUED. | |
222 | */ | |
b811203c SH |
223 | qemu_sem_timedwait(&pool->sem, 0) == 0) { |
224 | QTAILQ_REMOVE(&pool->request_list, elem, reqs); | |
d354c7ec | 225 | elem->state = THREAD_CANCELED; |
b811203c | 226 | event_notifier_set(&pool->notifier); |
d354c7ec | 227 | } else { |
b811203c | 228 | pool->pending_cancellations++; |
d354c7ec | 229 | while (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) { |
b811203c | 230 | qemu_cond_wait(&pool->check_cancel, &pool->lock); |
d354c7ec | 231 | } |
b811203c | 232 | pool->pending_cancellations--; |
d354c7ec | 233 | } |
b811203c | 234 | qemu_mutex_unlock(&pool->lock); |
d354c7ec PB |
235 | } |
236 | ||
d7331bed | 237 | static const AIOCBInfo thread_pool_aiocb_info = { |
d354c7ec PB |
238 | .aiocb_size = sizeof(ThreadPoolElement), |
239 | .cancel = thread_pool_cancel, | |
240 | }; | |
241 | ||
242 | BlockDriverAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, | |
243 | BlockDriverCompletionFunc *cb, void *opaque) | |
244 | { | |
b811203c | 245 | ThreadPool *pool = &global_pool; |
d354c7ec PB |
246 | ThreadPoolElement *req; |
247 | ||
d7331bed | 248 | req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque); |
d354c7ec PB |
249 | req->func = func; |
250 | req->arg = arg; | |
251 | req->state = THREAD_QUEUED; | |
b811203c | 252 | req->pool = pool; |
d354c7ec | 253 | |
b811203c | 254 | QLIST_INSERT_HEAD(&pool->head, req, all); |
d354c7ec | 255 | |
b811203c | 256 | trace_thread_pool_submit(pool, req, arg); |
d354c7ec | 257 | |
b811203c SH |
258 | qemu_mutex_lock(&pool->lock); |
259 | if (pool->idle_threads == 0 && pool->cur_threads < pool->max_threads) { | |
260 | spawn_thread(pool); | |
d354c7ec | 261 | } |
b811203c SH |
262 | QTAILQ_INSERT_TAIL(&pool->request_list, req, reqs); |
263 | qemu_mutex_unlock(&pool->lock); | |
264 | qemu_sem_post(&pool->sem); | |
d354c7ec PB |
265 | return &req->common; |
266 | } | |
267 | ||
268 | typedef struct ThreadPoolCo { | |
269 | Coroutine *co; | |
270 | int ret; | |
271 | } ThreadPoolCo; | |
272 | ||
273 | static void thread_pool_co_cb(void *opaque, int ret) | |
274 | { | |
275 | ThreadPoolCo *co = opaque; | |
276 | ||
277 | co->ret = ret; | |
278 | qemu_coroutine_enter(co->co, NULL); | |
279 | } | |
280 | ||
281 | int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg) | |
282 | { | |
283 | ThreadPoolCo tpc = { .co = qemu_coroutine_self(), .ret = -EINPROGRESS }; | |
284 | assert(qemu_in_coroutine()); | |
285 | thread_pool_submit_aio(func, arg, thread_pool_co_cb, &tpc); | |
286 | qemu_coroutine_yield(); | |
287 | return tpc.ret; | |
288 | } | |
289 | ||
290 | void thread_pool_submit(ThreadPoolFunc *func, void *arg) | |
291 | { | |
292 | thread_pool_submit_aio(func, arg, NULL, NULL); | |
293 | } | |
294 | ||
b811203c SH |
295 | static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) |
296 | { | |
297 | if (!ctx) { | |
298 | ctx = qemu_get_aio_context(); | |
299 | } | |
300 | ||
301 | memset(pool, 0, sizeof(*pool)); | |
302 | event_notifier_init(&pool->notifier, false); | |
f7311ccc | 303 | pool->ctx = ctx; |
b811203c SH |
304 | qemu_mutex_init(&pool->lock); |
305 | qemu_cond_init(&pool->check_cancel); | |
f7311ccc | 306 | qemu_cond_init(&pool->worker_stopped); |
b811203c SH |
307 | qemu_sem_init(&pool->sem, 0); |
308 | pool->max_threads = 64; | |
309 | pool->new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool); | |
310 | ||
311 | QLIST_INIT(&pool->head); | |
312 | QTAILQ_INIT(&pool->request_list); | |
313 | ||
314 | aio_set_event_notifier(ctx, &pool->notifier, event_notifier_ready, | |
315 | thread_pool_active); | |
316 | } | |
317 | ||
f7311ccc SH |
318 | ThreadPool *thread_pool_new(AioContext *ctx) |
319 | { | |
320 | ThreadPool *pool = g_new(ThreadPool, 1); | |
321 | thread_pool_init_one(pool, ctx); | |
322 | return pool; | |
323 | } | |
324 | ||
325 | void thread_pool_free(ThreadPool *pool) | |
326 | { | |
327 | if (!pool) { | |
328 | return; | |
329 | } | |
330 | ||
331 | assert(QLIST_EMPTY(&pool->head)); | |
332 | ||
333 | qemu_mutex_lock(&pool->lock); | |
334 | ||
335 | /* Stop new threads from spawning */ | |
336 | qemu_bh_delete(pool->new_thread_bh); | |
337 | pool->cur_threads -= pool->new_threads; | |
338 | pool->new_threads = 0; | |
339 | ||
340 | /* Wait for worker threads to terminate */ | |
341 | pool->stopping = true; | |
342 | while (pool->cur_threads > 0) { | |
343 | qemu_sem_post(&pool->sem); | |
344 | qemu_cond_wait(&pool->worker_stopped, &pool->lock); | |
345 | } | |
346 | ||
347 | qemu_mutex_unlock(&pool->lock); | |
348 | ||
349 | aio_set_event_notifier(pool->ctx, &pool->notifier, NULL, NULL); | |
350 | qemu_sem_destroy(&pool->sem); | |
351 | qemu_cond_destroy(&pool->check_cancel); | |
352 | qemu_cond_destroy(&pool->worker_stopped); | |
353 | qemu_mutex_destroy(&pool->lock); | |
354 | event_notifier_cleanup(&pool->notifier); | |
355 | g_free(pool); | |
356 | } | |
357 | ||
d354c7ec PB |
358 | static void thread_pool_init(void) |
359 | { | |
b811203c | 360 | thread_pool_init_one(&global_pool, NULL); |
d354c7ec PB |
361 | } |
362 | ||
363 | block_init(thread_pool_init) |