]>
Commit | Line | Data |
---|---|---|
d354c7ec PB |
1 | /* |
2 | * QEMU block layer thread pool | |
3 | * | |
4 | * Copyright IBM, Corp. 2008 | |
5 | * Copyright Red Hat, Inc. 2012 | |
6 | * | |
7 | * Authors: | |
8 | * Anthony Liguori <aliguori@us.ibm.com> | |
9 | * Paolo Bonzini <pbonzini@redhat.com> | |
10 | * | |
11 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
12 | * the COPYING file in the top-level directory. | |
13 | * | |
14 | * Contributions after 2012-01-13 are licensed under the terms of the | |
15 | * GNU GPL, version 2 or (at your option) any later version. | |
16 | */ | |
17 | #include "qemu-common.h" | |
1de7afc9 PB |
18 | #include "qemu/queue.h" |
19 | #include "qemu/thread.h" | |
20 | #include "qemu/osdep.h" | |
737e150e | 21 | #include "block/coroutine.h" |
d354c7ec | 22 | #include "trace.h" |
737e150e | 23 | #include "block/block_int.h" |
1de7afc9 | 24 | #include "qemu/event_notifier.h" |
737e150e | 25 | #include "block/thread-pool.h" |
d354c7ec PB |
26 | |
27 | static void do_spawn_thread(void); | |
28 | ||
29 | typedef struct ThreadPoolElement ThreadPoolElement; | |
30 | ||
31 | enum ThreadState { | |
32 | THREAD_QUEUED, | |
33 | THREAD_ACTIVE, | |
34 | THREAD_DONE, | |
35 | THREAD_CANCELED, | |
36 | }; | |
37 | ||
38 | struct ThreadPoolElement { | |
39 | BlockDriverAIOCB common; | |
40 | ThreadPoolFunc *func; | |
41 | void *arg; | |
19d092cf PB |
42 | |
43 | /* Moving state out of THREAD_QUEUED is protected by lock. After | |
44 | * that, only the worker thread can write to it. Reads and writes | |
45 | * of state and ret are ordered with memory barriers. | |
46 | */ | |
d354c7ec PB |
47 | enum ThreadState state; |
48 | int ret; | |
49 | ||
50 | /* Access to this list is protected by lock. */ | |
51 | QTAILQ_ENTRY(ThreadPoolElement) reqs; | |
52 | ||
53 | /* Access to this list is protected by the global mutex. */ | |
54 | QLIST_ENTRY(ThreadPoolElement) all; | |
55 | }; | |
56 | ||
57 | static EventNotifier notifier; | |
58 | static QemuMutex lock; | |
59 | static QemuCond check_cancel; | |
60 | static QemuSemaphore sem; | |
61 | static int max_threads = 64; | |
62 | static QEMUBH *new_thread_bh; | |
63 | ||
64 | /* The following variables are protected by the global mutex. */ | |
65 | static QLIST_HEAD(, ThreadPoolElement) head; | |
66 | ||
67 | /* The following variables are protected by lock. */ | |
68 | static QTAILQ_HEAD(, ThreadPoolElement) request_list; | |
69 | static int cur_threads; | |
70 | static int idle_threads; | |
71 | static int new_threads; /* backlog of threads we need to create */ | |
72 | static int pending_threads; /* threads created but not running yet */ | |
73 | static int pending_cancellations; /* whether we need a cond_broadcast */ | |
74 | ||
75 | static void *worker_thread(void *unused) | |
76 | { | |
77 | qemu_mutex_lock(&lock); | |
78 | pending_threads--; | |
79 | do_spawn_thread(); | |
80 | ||
81 | while (1) { | |
82 | ThreadPoolElement *req; | |
83 | int ret; | |
84 | ||
85 | do { | |
86 | idle_threads++; | |
87 | qemu_mutex_unlock(&lock); | |
88 | ret = qemu_sem_timedwait(&sem, 10000); | |
89 | qemu_mutex_lock(&lock); | |
90 | idle_threads--; | |
91 | } while (ret == -1 && !QTAILQ_EMPTY(&request_list)); | |
92 | if (ret == -1) { | |
93 | break; | |
94 | } | |
95 | ||
96 | req = QTAILQ_FIRST(&request_list); | |
97 | QTAILQ_REMOVE(&request_list, req, reqs); | |
98 | req->state = THREAD_ACTIVE; | |
99 | qemu_mutex_unlock(&lock); | |
100 | ||
101 | ret = req->func(req->arg); | |
102 | ||
d354c7ec | 103 | req->ret = ret; |
19d092cf PB |
104 | /* Write ret before state. */ |
105 | smp_wmb(); | |
106 | req->state = THREAD_DONE; | |
107 | ||
108 | qemu_mutex_lock(&lock); | |
d354c7ec PB |
109 | if (pending_cancellations) { |
110 | qemu_cond_broadcast(&check_cancel); | |
111 | } | |
112 | ||
113 | event_notifier_set(¬ifier); | |
114 | } | |
115 | ||
116 | cur_threads--; | |
117 | qemu_mutex_unlock(&lock); | |
118 | return NULL; | |
119 | } | |
120 | ||
121 | static void do_spawn_thread(void) | |
122 | { | |
123 | QemuThread t; | |
124 | ||
125 | /* Runs with lock taken. */ | |
126 | if (!new_threads) { | |
127 | return; | |
128 | } | |
129 | ||
130 | new_threads--; | |
131 | pending_threads++; | |
132 | ||
133 | qemu_thread_create(&t, worker_thread, NULL, QEMU_THREAD_DETACHED); | |
134 | } | |
135 | ||
136 | static void spawn_thread_bh_fn(void *opaque) | |
137 | { | |
138 | qemu_mutex_lock(&lock); | |
139 | do_spawn_thread(); | |
140 | qemu_mutex_unlock(&lock); | |
141 | } | |
142 | ||
143 | static void spawn_thread(void) | |
144 | { | |
145 | cur_threads++; | |
146 | new_threads++; | |
147 | /* If there are threads being created, they will spawn new workers, so | |
148 | * we don't spend time creating many threads in a loop holding a mutex or | |
149 | * starving the current vcpu. | |
150 | * | |
151 | * If there are no idle threads, ask the main thread to create one, so we | |
152 | * inherit the correct affinity instead of the vcpu affinity. | |
153 | */ | |
154 | if (!pending_threads) { | |
155 | qemu_bh_schedule(new_thread_bh); | |
156 | } | |
157 | } | |
158 | ||
159 | static void event_notifier_ready(EventNotifier *notifier) | |
160 | { | |
161 | ThreadPoolElement *elem, *next; | |
162 | ||
163 | event_notifier_test_and_clear(notifier); | |
164 | restart: | |
165 | QLIST_FOREACH_SAFE(elem, &head, all, next) { | |
166 | if (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) { | |
167 | continue; | |
168 | } | |
169 | if (elem->state == THREAD_DONE) { | |
170 | trace_thread_pool_complete(elem, elem->common.opaque, elem->ret); | |
171 | } | |
172 | if (elem->state == THREAD_DONE && elem->common.cb) { | |
d354c7ec | 173 | QLIST_REMOVE(elem, all); |
19d092cf PB |
174 | /* Read state before ret. */ |
175 | smp_rmb(); | |
176 | elem->common.cb(elem->common.opaque, elem->ret); | |
d354c7ec PB |
177 | qemu_aio_release(elem); |
178 | goto restart; | |
179 | } else { | |
180 | /* remove the request */ | |
181 | QLIST_REMOVE(elem, all); | |
182 | qemu_aio_release(elem); | |
183 | } | |
184 | } | |
185 | } | |
186 | ||
187 | static int thread_pool_active(EventNotifier *notifier) | |
188 | { | |
189 | return !QLIST_EMPTY(&head); | |
190 | } | |
191 | ||
192 | static void thread_pool_cancel(BlockDriverAIOCB *acb) | |
193 | { | |
194 | ThreadPoolElement *elem = (ThreadPoolElement *)acb; | |
195 | ||
196 | trace_thread_pool_cancel(elem, elem->common.opaque); | |
197 | ||
198 | qemu_mutex_lock(&lock); | |
199 | if (elem->state == THREAD_QUEUED && | |
200 | /* No thread has yet started working on elem. we can try to "steal" | |
201 | * the item from the worker if we can get a signal from the | |
202 | * semaphore. Because this is non-blocking, we can do it with | |
203 | * the lock taken and ensure that elem will remain THREAD_QUEUED. | |
204 | */ | |
205 | qemu_sem_timedwait(&sem, 0) == 0) { | |
206 | QTAILQ_REMOVE(&request_list, elem, reqs); | |
207 | elem->state = THREAD_CANCELED; | |
208 | event_notifier_set(¬ifier); | |
209 | } else { | |
210 | pending_cancellations++; | |
211 | while (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) { | |
212 | qemu_cond_wait(&check_cancel, &lock); | |
213 | } | |
214 | pending_cancellations--; | |
215 | } | |
216 | qemu_mutex_unlock(&lock); | |
217 | } | |
218 | ||
d7331bed | 219 | static const AIOCBInfo thread_pool_aiocb_info = { |
d354c7ec PB |
220 | .aiocb_size = sizeof(ThreadPoolElement), |
221 | .cancel = thread_pool_cancel, | |
222 | }; | |
223 | ||
224 | BlockDriverAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, | |
225 | BlockDriverCompletionFunc *cb, void *opaque) | |
226 | { | |
227 | ThreadPoolElement *req; | |
228 | ||
d7331bed | 229 | req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque); |
d354c7ec PB |
230 | req->func = func; |
231 | req->arg = arg; | |
232 | req->state = THREAD_QUEUED; | |
233 | ||
234 | QLIST_INSERT_HEAD(&head, req, all); | |
235 | ||
236 | trace_thread_pool_submit(req, arg); | |
237 | ||
238 | qemu_mutex_lock(&lock); | |
239 | if (idle_threads == 0 && cur_threads < max_threads) { | |
240 | spawn_thread(); | |
241 | } | |
242 | QTAILQ_INSERT_TAIL(&request_list, req, reqs); | |
243 | qemu_mutex_unlock(&lock); | |
244 | qemu_sem_post(&sem); | |
245 | return &req->common; | |
246 | } | |
247 | ||
248 | typedef struct ThreadPoolCo { | |
249 | Coroutine *co; | |
250 | int ret; | |
251 | } ThreadPoolCo; | |
252 | ||
253 | static void thread_pool_co_cb(void *opaque, int ret) | |
254 | { | |
255 | ThreadPoolCo *co = opaque; | |
256 | ||
257 | co->ret = ret; | |
258 | qemu_coroutine_enter(co->co, NULL); | |
259 | } | |
260 | ||
261 | int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg) | |
262 | { | |
263 | ThreadPoolCo tpc = { .co = qemu_coroutine_self(), .ret = -EINPROGRESS }; | |
264 | assert(qemu_in_coroutine()); | |
265 | thread_pool_submit_aio(func, arg, thread_pool_co_cb, &tpc); | |
266 | qemu_coroutine_yield(); | |
267 | return tpc.ret; | |
268 | } | |
269 | ||
270 | void thread_pool_submit(ThreadPoolFunc *func, void *arg) | |
271 | { | |
272 | thread_pool_submit_aio(func, arg, NULL, NULL); | |
273 | } | |
274 | ||
275 | static void thread_pool_init(void) | |
276 | { | |
277 | QLIST_INIT(&head); | |
278 | event_notifier_init(¬ifier, false); | |
279 | qemu_mutex_init(&lock); | |
280 | qemu_cond_init(&check_cancel); | |
281 | qemu_sem_init(&sem, 0); | |
282 | qemu_aio_set_event_notifier(¬ifier, event_notifier_ready, | |
283 | thread_pool_active); | |
284 | ||
285 | QTAILQ_INIT(&request_list); | |
286 | new_thread_bh = qemu_bh_new(spawn_thread_bh_fn, NULL); | |
287 | } | |
288 | ||
289 | block_init(thread_pool_init) |