]> git.proxmox.com Git - qemu.git/blob - main-loop.c
aio: introduce AioContext, move bottom halves there
[qemu.git] / main-loop.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "qemu-common.h"
26 #include "qemu-timer.h"
27 #include "slirp/slirp.h"
28 #include "main-loop.h"
29 #include "qemu-aio.h"
30
31 #ifndef _WIN32
32
33 #include "compatfd.h"
34
35 static int io_thread_fd = -1;
36
37 void qemu_notify_event(void)
38 {
39 /* Write 8 bytes to be compatible with eventfd. */
40 static const uint64_t val = 1;
41 ssize_t ret;
42
43 if (io_thread_fd == -1) {
44 return;
45 }
46 do {
47 ret = write(io_thread_fd, &val, sizeof(val));
48 } while (ret < 0 && errno == EINTR);
49
50 /* EAGAIN is fine, a read must be pending. */
51 if (ret < 0 && errno != EAGAIN) {
52 fprintf(stderr, "qemu_notify_event: write() failed: %s\n",
53 strerror(errno));
54 exit(1);
55 }
56 }
57
58 static void qemu_event_read(void *opaque)
59 {
60 int fd = (intptr_t)opaque;
61 ssize_t len;
62 char buffer[512];
63
64 /* Drain the notify pipe. For eventfd, only 8 bytes will be read. */
65 do {
66 len = read(fd, buffer, sizeof(buffer));
67 } while ((len == -1 && errno == EINTR) || len == sizeof(buffer));
68 }
69
70 static int qemu_event_init(void)
71 {
72 int err;
73 int fds[2];
74
75 err = qemu_eventfd(fds);
76 if (err == -1) {
77 return -errno;
78 }
79 err = fcntl_setfl(fds[0], O_NONBLOCK);
80 if (err < 0) {
81 goto fail;
82 }
83 err = fcntl_setfl(fds[1], O_NONBLOCK);
84 if (err < 0) {
85 goto fail;
86 }
87 qemu_set_fd_handler2(fds[0], NULL, qemu_event_read, NULL,
88 (void *)(intptr_t)fds[0]);
89
90 io_thread_fd = fds[1];
91 return 0;
92
93 fail:
94 close(fds[0]);
95 close(fds[1]);
96 return err;
97 }
98
99 /* If we have signalfd, we mask out the signals we want to handle and then
100 * use signalfd to listen for them. We rely on whatever the current signal
101 * handler is to dispatch the signals when we receive them.
102 */
103 static void sigfd_handler(void *opaque)
104 {
105 int fd = (intptr_t)opaque;
106 struct qemu_signalfd_siginfo info;
107 struct sigaction action;
108 ssize_t len;
109
110 while (1) {
111 do {
112 len = read(fd, &info, sizeof(info));
113 } while (len == -1 && errno == EINTR);
114
115 if (len == -1 && errno == EAGAIN) {
116 break;
117 }
118
119 if (len != sizeof(info)) {
120 printf("read from sigfd returned %zd: %m\n", len);
121 return;
122 }
123
124 sigaction(info.ssi_signo, NULL, &action);
125 if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
126 action.sa_sigaction(info.ssi_signo,
127 (siginfo_t *)&info, NULL);
128 } else if (action.sa_handler) {
129 action.sa_handler(info.ssi_signo);
130 }
131 }
132 }
133
134 static int qemu_signal_init(void)
135 {
136 int sigfd;
137 sigset_t set;
138
139 /*
140 * SIG_IPI must be blocked in the main thread and must not be caught
141 * by sigwait() in the signal thread. Otherwise, the cpu thread will
142 * not catch it reliably.
143 */
144 sigemptyset(&set);
145 sigaddset(&set, SIG_IPI);
146 sigaddset(&set, SIGIO);
147 sigaddset(&set, SIGALRM);
148 sigaddset(&set, SIGBUS);
149 pthread_sigmask(SIG_BLOCK, &set, NULL);
150
151 sigdelset(&set, SIG_IPI);
152 sigfd = qemu_signalfd(&set);
153 if (sigfd == -1) {
154 fprintf(stderr, "failed to create signalfd\n");
155 return -errno;
156 }
157
158 fcntl_setfl(sigfd, O_NONBLOCK);
159
160 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
161 (void *)(intptr_t)sigfd);
162
163 return 0;
164 }
165
166 #else /* _WIN32 */
167
168 static HANDLE qemu_event_handle = NULL;
169
170 static void dummy_event_handler(void *opaque)
171 {
172 }
173
174 static int qemu_event_init(void)
175 {
176 qemu_event_handle = CreateEvent(NULL, FALSE, FALSE, NULL);
177 if (!qemu_event_handle) {
178 fprintf(stderr, "Failed CreateEvent: %ld\n", GetLastError());
179 return -1;
180 }
181 qemu_add_wait_object(qemu_event_handle, dummy_event_handler, NULL);
182 return 0;
183 }
184
185 void qemu_notify_event(void)
186 {
187 if (!qemu_event_handle) {
188 return;
189 }
190 if (!SetEvent(qemu_event_handle)) {
191 fprintf(stderr, "qemu_notify_event: SetEvent failed: %ld\n",
192 GetLastError());
193 exit(1);
194 }
195 }
196
197 static int qemu_signal_init(void)
198 {
199 return 0;
200 }
201 #endif
202
203 static AioContext *qemu_aio_context;
204
205 int qemu_init_main_loop(void)
206 {
207 int ret;
208
209 init_clocks();
210 init_timer_alarm();
211
212 qemu_mutex_lock_iothread();
213 ret = qemu_signal_init();
214 if (ret) {
215 return ret;
216 }
217
218 /* Note eventfd must be drained before signalfd handlers run */
219 ret = qemu_event_init();
220 if (ret) {
221 return ret;
222 }
223
224 qemu_aio_context = aio_context_new();
225 return 0;
226 }
227
228 static fd_set rfds, wfds, xfds;
229 static int nfds;
230 static GPollFD poll_fds[1024 * 2]; /* this is probably overkill */
231 static int n_poll_fds;
232 static int max_priority;
233
234 #ifndef _WIN32
235 static void glib_select_fill(int *max_fd, fd_set *rfds, fd_set *wfds,
236 fd_set *xfds, uint32_t *cur_timeout)
237 {
238 GMainContext *context = g_main_context_default();
239 int i;
240 int timeout = 0;
241
242 g_main_context_prepare(context, &max_priority);
243
244 n_poll_fds = g_main_context_query(context, max_priority, &timeout,
245 poll_fds, ARRAY_SIZE(poll_fds));
246 g_assert(n_poll_fds <= ARRAY_SIZE(poll_fds));
247
248 for (i = 0; i < n_poll_fds; i++) {
249 GPollFD *p = &poll_fds[i];
250
251 if ((p->events & G_IO_IN)) {
252 FD_SET(p->fd, rfds);
253 *max_fd = MAX(*max_fd, p->fd);
254 }
255 if ((p->events & G_IO_OUT)) {
256 FD_SET(p->fd, wfds);
257 *max_fd = MAX(*max_fd, p->fd);
258 }
259 if ((p->events & G_IO_ERR)) {
260 FD_SET(p->fd, xfds);
261 *max_fd = MAX(*max_fd, p->fd);
262 }
263 }
264
265 if (timeout >= 0 && timeout < *cur_timeout) {
266 *cur_timeout = timeout;
267 }
268 }
269
270 static void glib_select_poll(fd_set *rfds, fd_set *wfds, fd_set *xfds,
271 bool err)
272 {
273 GMainContext *context = g_main_context_default();
274
275 if (!err) {
276 int i;
277
278 for (i = 0; i < n_poll_fds; i++) {
279 GPollFD *p = &poll_fds[i];
280
281 if ((p->events & G_IO_IN) && FD_ISSET(p->fd, rfds)) {
282 p->revents |= G_IO_IN;
283 }
284 if ((p->events & G_IO_OUT) && FD_ISSET(p->fd, wfds)) {
285 p->revents |= G_IO_OUT;
286 }
287 if ((p->events & G_IO_ERR) && FD_ISSET(p->fd, xfds)) {
288 p->revents |= G_IO_ERR;
289 }
290 }
291 }
292
293 if (g_main_context_check(context, max_priority, poll_fds, n_poll_fds)) {
294 g_main_context_dispatch(context);
295 }
296 }
297
298 static int os_host_main_loop_wait(uint32_t timeout)
299 {
300 struct timeval tv, *tvarg = NULL;
301 int ret;
302
303 glib_select_fill(&nfds, &rfds, &wfds, &xfds, &timeout);
304
305 if (timeout < UINT32_MAX) {
306 tvarg = &tv;
307 tv.tv_sec = timeout / 1000;
308 tv.tv_usec = (timeout % 1000) * 1000;
309 }
310
311 if (timeout > 0) {
312 qemu_mutex_unlock_iothread();
313 }
314
315 ret = select(nfds + 1, &rfds, &wfds, &xfds, tvarg);
316
317 if (timeout > 0) {
318 qemu_mutex_lock_iothread();
319 }
320
321 glib_select_poll(&rfds, &wfds, &xfds, (ret < 0));
322 return ret;
323 }
324 #else
325 /***********************************************************/
326 /* Polling handling */
327
328 typedef struct PollingEntry {
329 PollingFunc *func;
330 void *opaque;
331 struct PollingEntry *next;
332 } PollingEntry;
333
334 static PollingEntry *first_polling_entry;
335
336 int qemu_add_polling_cb(PollingFunc *func, void *opaque)
337 {
338 PollingEntry **ppe, *pe;
339 pe = g_malloc0(sizeof(PollingEntry));
340 pe->func = func;
341 pe->opaque = opaque;
342 for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next);
343 *ppe = pe;
344 return 0;
345 }
346
347 void qemu_del_polling_cb(PollingFunc *func, void *opaque)
348 {
349 PollingEntry **ppe, *pe;
350 for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next) {
351 pe = *ppe;
352 if (pe->func == func && pe->opaque == opaque) {
353 *ppe = pe->next;
354 g_free(pe);
355 break;
356 }
357 }
358 }
359
360 /***********************************************************/
361 /* Wait objects support */
362 typedef struct WaitObjects {
363 int num;
364 int revents[MAXIMUM_WAIT_OBJECTS + 1];
365 HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
366 WaitObjectFunc *func[MAXIMUM_WAIT_OBJECTS + 1];
367 void *opaque[MAXIMUM_WAIT_OBJECTS + 1];
368 } WaitObjects;
369
370 static WaitObjects wait_objects = {0};
371
372 int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
373 {
374 WaitObjects *w = &wait_objects;
375 if (w->num >= MAXIMUM_WAIT_OBJECTS) {
376 return -1;
377 }
378 w->events[w->num] = handle;
379 w->func[w->num] = func;
380 w->opaque[w->num] = opaque;
381 w->revents[w->num] = 0;
382 w->num++;
383 return 0;
384 }
385
386 void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
387 {
388 int i, found;
389 WaitObjects *w = &wait_objects;
390
391 found = 0;
392 for (i = 0; i < w->num; i++) {
393 if (w->events[i] == handle) {
394 found = 1;
395 }
396 if (found) {
397 w->events[i] = w->events[i + 1];
398 w->func[i] = w->func[i + 1];
399 w->opaque[i] = w->opaque[i + 1];
400 w->revents[i] = w->revents[i + 1];
401 }
402 }
403 if (found) {
404 w->num--;
405 }
406 }
407
408 void qemu_fd_register(int fd)
409 {
410 WSAEventSelect(fd, qemu_event_handle, FD_READ | FD_ACCEPT | FD_CLOSE |
411 FD_CONNECT | FD_WRITE | FD_OOB);
412 }
413
414 static int os_host_main_loop_wait(uint32_t timeout)
415 {
416 GMainContext *context = g_main_context_default();
417 int ret, i;
418 PollingEntry *pe;
419 WaitObjects *w = &wait_objects;
420 gint poll_timeout;
421 static struct timeval tv0;
422
423 /* XXX: need to suppress polling by better using win32 events */
424 ret = 0;
425 for (pe = first_polling_entry; pe != NULL; pe = pe->next) {
426 ret |= pe->func(pe->opaque);
427 }
428 if (ret != 0) {
429 return ret;
430 }
431
432 if (nfds >= 0) {
433 ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv0);
434 if (ret != 0) {
435 timeout = 0;
436 }
437 }
438
439 g_main_context_prepare(context, &max_priority);
440 n_poll_fds = g_main_context_query(context, max_priority, &poll_timeout,
441 poll_fds, ARRAY_SIZE(poll_fds));
442 g_assert(n_poll_fds <= ARRAY_SIZE(poll_fds));
443
444 for (i = 0; i < w->num; i++) {
445 poll_fds[n_poll_fds + i].fd = (DWORD_PTR)w->events[i];
446 poll_fds[n_poll_fds + i].events = G_IO_IN;
447 }
448
449 if (poll_timeout < 0 || timeout < poll_timeout) {
450 poll_timeout = timeout;
451 }
452
453 qemu_mutex_unlock_iothread();
454 ret = g_poll(poll_fds, n_poll_fds + w->num, poll_timeout);
455 qemu_mutex_lock_iothread();
456 if (ret > 0) {
457 for (i = 0; i < w->num; i++) {
458 w->revents[i] = poll_fds[n_poll_fds + i].revents;
459 }
460 for (i = 0; i < w->num; i++) {
461 if (w->revents[i] && w->func[i]) {
462 w->func[i](w->opaque[i]);
463 }
464 }
465 }
466
467 if (g_main_context_check(context, max_priority, poll_fds, n_poll_fds)) {
468 g_main_context_dispatch(context);
469 }
470
471 /* If an edge-triggered socket event occurred, select will return a
472 * positive result on the next iteration. We do not need to do anything
473 * here.
474 */
475
476 return ret;
477 }
478 #endif
479
480 int main_loop_wait(int nonblocking)
481 {
482 int ret;
483 uint32_t timeout = UINT32_MAX;
484
485 if (nonblocking) {
486 timeout = 0;
487 } else {
488 aio_bh_update_timeout(qemu_aio_context, &timeout);
489 }
490
491 /* poll any events */
492 /* XXX: separate device handlers from system ones */
493 nfds = -1;
494 FD_ZERO(&rfds);
495 FD_ZERO(&wfds);
496 FD_ZERO(&xfds);
497
498 #ifdef CONFIG_SLIRP
499 slirp_update_timeout(&timeout);
500 slirp_select_fill(&nfds, &rfds, &wfds, &xfds);
501 #endif
502 qemu_iohandler_fill(&nfds, &rfds, &wfds, &xfds);
503 ret = os_host_main_loop_wait(timeout);
504 qemu_iohandler_poll(&rfds, &wfds, &xfds, ret);
505 #ifdef CONFIG_SLIRP
506 slirp_select_poll(&rfds, &wfds, &xfds, (ret < 0));
507 #endif
508
509 qemu_run_all_timers();
510
511 /* Check bottom-halves last in case any of the earlier events triggered
512 them. */
513 qemu_bh_poll();
514
515 return ret;
516 }
517
518 /* Functions to operate on the main QEMU AioContext. */
519
520 QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
521 {
522 return aio_bh_new(qemu_aio_context, cb, opaque);
523 }
524
525 int qemu_bh_poll(void)
526 {
527 return aio_bh_poll(qemu_aio_context);
528 }