]> git.proxmox.com Git - mirror_qemu.git/blob - util/aio-posix.c
aio-posix: remove confusing QLIST_SAFE_REMOVE()
[mirror_qemu.git] / util / aio-posix.c
1 /*
2 * QEMU aio implementation
3 *
4 * Copyright IBM, Corp. 2008
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 * Contributions after 2012-01-13 are licensed under the terms of the
13 * GNU GPL, version 2 or (at your option) any later version.
14 */
15
16 #include "qemu/osdep.h"
17 #include "block/block.h"
18 #include "qemu/rcu.h"
19 #include "qemu/rcu_queue.h"
20 #include "qemu/sockets.h"
21 #include "qemu/cutils.h"
22 #include "trace.h"
23 #ifdef CONFIG_EPOLL_CREATE1
24 #include <sys/epoll.h>
25 #endif
26
27 struct AioHandler
28 {
29 GPollFD pfd;
30 IOHandler *io_read;
31 IOHandler *io_write;
32 AioPollFn *io_poll;
33 IOHandler *io_poll_begin;
34 IOHandler *io_poll_end;
35 void *opaque;
36 bool is_external;
37 QLIST_ENTRY(AioHandler) node;
38 QLIST_ENTRY(AioHandler) node_ready; /* only used during aio_poll() */
39 QLIST_ENTRY(AioHandler) node_deleted;
40 };
41
42 /* Add a handler to a ready list */
43 static void add_ready_handler(AioHandlerList *ready_list,
44 AioHandler *node,
45 int revents)
46 {
47 QLIST_SAFE_REMOVE(node, node_ready); /* remove from nested parent's list */
48 node->pfd.revents = revents;
49 QLIST_INSERT_HEAD(ready_list, node, node_ready);
50 }
51
52 #ifdef CONFIG_EPOLL_CREATE1
53
54 /* The fd number threshold to switch to epoll */
55 #define EPOLL_ENABLE_THRESHOLD 64
56
57 static void aio_epoll_disable(AioContext *ctx)
58 {
59 ctx->epoll_enabled = false;
60 if (!ctx->epoll_available) {
61 return;
62 }
63 ctx->epoll_available = false;
64 close(ctx->epollfd);
65 }
66
67 static inline int epoll_events_from_pfd(int pfd_events)
68 {
69 return (pfd_events & G_IO_IN ? EPOLLIN : 0) |
70 (pfd_events & G_IO_OUT ? EPOLLOUT : 0) |
71 (pfd_events & G_IO_HUP ? EPOLLHUP : 0) |
72 (pfd_events & G_IO_ERR ? EPOLLERR : 0);
73 }
74
75 static bool aio_epoll_try_enable(AioContext *ctx)
76 {
77 AioHandler *node;
78 struct epoll_event event;
79
80 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
81 int r;
82 if (QLIST_IS_INSERTED(node, node_deleted) || !node->pfd.events) {
83 continue;
84 }
85 event.events = epoll_events_from_pfd(node->pfd.events);
86 event.data.ptr = node;
87 r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
88 if (r) {
89 return false;
90 }
91 }
92 ctx->epoll_enabled = true;
93 return true;
94 }
95
96 static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
97 {
98 struct epoll_event event;
99 int r;
100 int ctl;
101
102 if (!ctx->epoll_enabled) {
103 return;
104 }
105 if (!node->pfd.events) {
106 ctl = EPOLL_CTL_DEL;
107 } else {
108 event.data.ptr = node;
109 event.events = epoll_events_from_pfd(node->pfd.events);
110 ctl = is_new ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
111 }
112
113 r = epoll_ctl(ctx->epollfd, ctl, node->pfd.fd, &event);
114 if (r) {
115 aio_epoll_disable(ctx);
116 }
117 }
118
119 static int aio_epoll(AioContext *ctx, AioHandlerList *ready_list,
120 int64_t timeout)
121 {
122 GPollFD pfd = {
123 .fd = ctx->epollfd,
124 .events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR,
125 };
126 AioHandler *node;
127 int i, ret = 0;
128 struct epoll_event events[128];
129
130 if (timeout > 0) {
131 ret = qemu_poll_ns(&pfd, 1, timeout);
132 if (ret > 0) {
133 timeout = 0;
134 }
135 }
136 if (timeout <= 0 || ret > 0) {
137 ret = epoll_wait(ctx->epollfd, events,
138 ARRAY_SIZE(events),
139 timeout);
140 if (ret <= 0) {
141 goto out;
142 }
143 for (i = 0; i < ret; i++) {
144 int ev = events[i].events;
145 int revents = (ev & EPOLLIN ? G_IO_IN : 0) |
146 (ev & EPOLLOUT ? G_IO_OUT : 0) |
147 (ev & EPOLLHUP ? G_IO_HUP : 0) |
148 (ev & EPOLLERR ? G_IO_ERR : 0);
149
150 node = events[i].data.ptr;
151 add_ready_handler(ready_list, node, revents);
152 }
153 }
154 out:
155 return ret;
156 }
157
158 static bool aio_epoll_enabled(AioContext *ctx)
159 {
160 /* Fall back to ppoll when external clients are disabled. */
161 return !aio_external_disabled(ctx) && ctx->epoll_enabled;
162 }
163
164 static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
165 unsigned npfd, int64_t timeout)
166 {
167 if (!ctx->epoll_available) {
168 return false;
169 }
170 if (aio_epoll_enabled(ctx)) {
171 return true;
172 }
173 if (npfd >= EPOLL_ENABLE_THRESHOLD) {
174 if (aio_epoll_try_enable(ctx)) {
175 return true;
176 } else {
177 aio_epoll_disable(ctx);
178 }
179 }
180 return false;
181 }
182
183 #else
184
185 static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
186 {
187 }
188
189 static int aio_epoll(AioContext *ctx, AioHandlerList *ready_list,
190 int64_t timeout)
191 {
192 assert(false);
193 }
194
195 static bool aio_epoll_enabled(AioContext *ctx)
196 {
197 return false;
198 }
199
200 static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
201 unsigned npfd, int64_t timeout)
202 {
203 return false;
204 }
205
206 #endif
207
208 static AioHandler *find_aio_handler(AioContext *ctx, int fd)
209 {
210 AioHandler *node;
211
212 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
213 if (node->pfd.fd == fd) {
214 if (!QLIST_IS_INSERTED(node, node_deleted)) {
215 return node;
216 }
217 }
218 }
219
220 return NULL;
221 }
222
223 static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
224 {
225 /* If the GSource is in the process of being destroyed then
226 * g_source_remove_poll() causes an assertion failure. Skip
227 * removal in that case, because glib cleans up its state during
228 * destruction anyway.
229 */
230 if (!g_source_is_destroyed(&ctx->source)) {
231 g_source_remove_poll(&ctx->source, &node->pfd);
232 }
233
234 /* If a read is in progress, just mark the node as deleted */
235 if (qemu_lockcnt_count(&ctx->list_lock)) {
236 QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted);
237 node->pfd.revents = 0;
238 return false;
239 }
240 /* Otherwise, delete it for real. We can't just mark it as
241 * deleted because deleted nodes are only cleaned up while
242 * no one is walking the handlers list.
243 */
244 QLIST_REMOVE(node, node);
245 return true;
246 }
247
248 void aio_set_fd_handler(AioContext *ctx,
249 int fd,
250 bool is_external,
251 IOHandler *io_read,
252 IOHandler *io_write,
253 AioPollFn *io_poll,
254 void *opaque)
255 {
256 AioHandler *node;
257 AioHandler *new_node = NULL;
258 bool is_new = false;
259 bool deleted = false;
260 int poll_disable_change;
261
262 qemu_lockcnt_lock(&ctx->list_lock);
263
264 node = find_aio_handler(ctx, fd);
265
266 /* Are we deleting the fd handler? */
267 if (!io_read && !io_write && !io_poll) {
268 if (node == NULL) {
269 qemu_lockcnt_unlock(&ctx->list_lock);
270 return;
271 }
272 /* Clean events in order to unregister fd from the ctx epoll. */
273 node->pfd.events = 0;
274
275 poll_disable_change = -!node->io_poll;
276 } else {
277 poll_disable_change = !io_poll - (node && !node->io_poll);
278 if (node == NULL) {
279 is_new = true;
280 }
281 /* Alloc and insert if it's not already there */
282 new_node = g_new0(AioHandler, 1);
283
284 /* Update handler with latest information */
285 new_node->io_read = io_read;
286 new_node->io_write = io_write;
287 new_node->io_poll = io_poll;
288 new_node->opaque = opaque;
289 new_node->is_external = is_external;
290
291 if (is_new) {
292 new_node->pfd.fd = fd;
293 } else {
294 new_node->pfd = node->pfd;
295 }
296 g_source_add_poll(&ctx->source, &new_node->pfd);
297
298 new_node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
299 new_node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
300
301 QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, new_node, node);
302 }
303 if (node) {
304 deleted = aio_remove_fd_handler(ctx, node);
305 }
306
307 /* No need to order poll_disable_cnt writes against other updates;
308 * the counter is only used to avoid wasting time and latency on
309 * iterated polling when the system call will be ultimately necessary.
310 * Changing handlers is a rare event, and a little wasted polling until
311 * the aio_notify below is not an issue.
312 */
313 atomic_set(&ctx->poll_disable_cnt,
314 atomic_read(&ctx->poll_disable_cnt) + poll_disable_change);
315
316 if (new_node) {
317 aio_epoll_update(ctx, new_node, is_new);
318 } else if (node) {
319 /* Unregister deleted fd_handler */
320 aio_epoll_update(ctx, node, false);
321 }
322 qemu_lockcnt_unlock(&ctx->list_lock);
323 aio_notify(ctx);
324
325 if (deleted) {
326 g_free(node);
327 }
328 }
329
330 void aio_set_fd_poll(AioContext *ctx, int fd,
331 IOHandler *io_poll_begin,
332 IOHandler *io_poll_end)
333 {
334 AioHandler *node = find_aio_handler(ctx, fd);
335
336 if (!node) {
337 return;
338 }
339
340 node->io_poll_begin = io_poll_begin;
341 node->io_poll_end = io_poll_end;
342 }
343
344 void aio_set_event_notifier(AioContext *ctx,
345 EventNotifier *notifier,
346 bool is_external,
347 EventNotifierHandler *io_read,
348 AioPollFn *io_poll)
349 {
350 aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external,
351 (IOHandler *)io_read, NULL, io_poll, notifier);
352 }
353
354 void aio_set_event_notifier_poll(AioContext *ctx,
355 EventNotifier *notifier,
356 EventNotifierHandler *io_poll_begin,
357 EventNotifierHandler *io_poll_end)
358 {
359 aio_set_fd_poll(ctx, event_notifier_get_fd(notifier),
360 (IOHandler *)io_poll_begin,
361 (IOHandler *)io_poll_end);
362 }
363
364 static void poll_set_started(AioContext *ctx, bool started)
365 {
366 AioHandler *node;
367
368 if (started == ctx->poll_started) {
369 return;
370 }
371
372 ctx->poll_started = started;
373
374 qemu_lockcnt_inc(&ctx->list_lock);
375 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
376 IOHandler *fn;
377
378 if (QLIST_IS_INSERTED(node, node_deleted)) {
379 continue;
380 }
381
382 if (started) {
383 fn = node->io_poll_begin;
384 } else {
385 fn = node->io_poll_end;
386 }
387
388 if (fn) {
389 fn(node->opaque);
390 }
391 }
392 qemu_lockcnt_dec(&ctx->list_lock);
393 }
394
395
396 bool aio_prepare(AioContext *ctx)
397 {
398 /* Poll mode cannot be used with glib's event loop, disable it. */
399 poll_set_started(ctx, false);
400
401 return false;
402 }
403
404 bool aio_pending(AioContext *ctx)
405 {
406 AioHandler *node;
407 bool result = false;
408
409 /*
410 * We have to walk very carefully in case aio_set_fd_handler is
411 * called while we're walking.
412 */
413 qemu_lockcnt_inc(&ctx->list_lock);
414
415 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
416 int revents;
417
418 revents = node->pfd.revents & node->pfd.events;
419 if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
420 aio_node_check(ctx, node->is_external)) {
421 result = true;
422 break;
423 }
424 if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
425 aio_node_check(ctx, node->is_external)) {
426 result = true;
427 break;
428 }
429 }
430 qemu_lockcnt_dec(&ctx->list_lock);
431
432 return result;
433 }
434
435 static void aio_free_deleted_handlers(AioContext *ctx)
436 {
437 AioHandler *node;
438
439 if (QLIST_EMPTY_RCU(&ctx->deleted_aio_handlers)) {
440 return;
441 }
442 if (!qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
443 return; /* we are nested, let the parent do the freeing */
444 }
445
446 while ((node = QLIST_FIRST_RCU(&ctx->deleted_aio_handlers))) {
447 QLIST_REMOVE(node, node);
448 QLIST_REMOVE(node, node_deleted);
449 g_free(node);
450 }
451
452 qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
453 }
454
455 static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
456 {
457 bool progress = false;
458 int revents;
459
460 revents = node->pfd.revents & node->pfd.events;
461 node->pfd.revents = 0;
462
463 if (!QLIST_IS_INSERTED(node, node_deleted) &&
464 (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
465 aio_node_check(ctx, node->is_external) &&
466 node->io_read) {
467 node->io_read(node->opaque);
468
469 /* aio_notify() does not count as progress */
470 if (node->opaque != &ctx->notifier) {
471 progress = true;
472 }
473 }
474 if (!QLIST_IS_INSERTED(node, node_deleted) &&
475 (revents & (G_IO_OUT | G_IO_ERR)) &&
476 aio_node_check(ctx, node->is_external) &&
477 node->io_write) {
478 node->io_write(node->opaque);
479 progress = true;
480 }
481
482 return progress;
483 }
484
485 /*
486 * If we have a list of ready handlers then this is more efficient than
487 * scanning all handlers with aio_dispatch_handlers().
488 */
489 static bool aio_dispatch_ready_handlers(AioContext *ctx,
490 AioHandlerList *ready_list)
491 {
492 bool progress = false;
493 AioHandler *node;
494
495 while ((node = QLIST_FIRST(ready_list))) {
496 QLIST_REMOVE(node, node_ready);
497 progress = aio_dispatch_handler(ctx, node) || progress;
498 }
499
500 return progress;
501 }
502
503 /* Slower than aio_dispatch_ready_handlers() but only used via glib */
504 static bool aio_dispatch_handlers(AioContext *ctx)
505 {
506 AioHandler *node, *tmp;
507 bool progress = false;
508
509 QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
510 progress = aio_dispatch_handler(ctx, node) || progress;
511 }
512
513 return progress;
514 }
515
516 void aio_dispatch(AioContext *ctx)
517 {
518 qemu_lockcnt_inc(&ctx->list_lock);
519 aio_bh_poll(ctx);
520 aio_dispatch_handlers(ctx);
521 aio_free_deleted_handlers(ctx);
522 qemu_lockcnt_dec(&ctx->list_lock);
523
524 timerlistgroup_run_timers(&ctx->tlg);
525 }
526
527 /* These thread-local variables are used only in a small part of aio_poll
528 * around the call to the poll() system call. In particular they are not
529 * used while aio_poll is performing callbacks, which makes it much easier
530 * to think about reentrancy!
531 *
532 * Stack-allocated arrays would be perfect but they have size limitations;
533 * heap allocation is expensive enough that we want to reuse arrays across
534 * calls to aio_poll(). And because poll() has to be called without holding
535 * any lock, the arrays cannot be stored in AioContext. Thread-local data
536 * has none of the disadvantages of these three options.
537 */
538 static __thread GPollFD *pollfds;
539 static __thread AioHandler **nodes;
540 static __thread unsigned npfd, nalloc;
541 static __thread Notifier pollfds_cleanup_notifier;
542
543 static void pollfds_cleanup(Notifier *n, void *unused)
544 {
545 g_assert(npfd == 0);
546 g_free(pollfds);
547 g_free(nodes);
548 nalloc = 0;
549 }
550
551 static void add_pollfd(AioHandler *node)
552 {
553 if (npfd == nalloc) {
554 if (nalloc == 0) {
555 pollfds_cleanup_notifier.notify = pollfds_cleanup;
556 qemu_thread_atexit_add(&pollfds_cleanup_notifier);
557 nalloc = 8;
558 } else {
559 g_assert(nalloc <= INT_MAX);
560 nalloc *= 2;
561 }
562 pollfds = g_renew(GPollFD, pollfds, nalloc);
563 nodes = g_renew(AioHandler *, nodes, nalloc);
564 }
565 nodes[npfd] = node;
566 pollfds[npfd] = (GPollFD) {
567 .fd = node->pfd.fd,
568 .events = node->pfd.events,
569 };
570 npfd++;
571 }
572
573 static bool run_poll_handlers_once(AioContext *ctx, int64_t *timeout)
574 {
575 bool progress = false;
576 AioHandler *node;
577
578 /*
579 * Optimization: ->io_poll() handlers often contain RCU read critical
580 * sections and we therefore see many rcu_read_lock() -> rcu_read_unlock()
581 * -> rcu_read_lock() -> ... sequences with expensive memory
582 * synchronization primitives. Make the entire polling loop an RCU
583 * critical section because nested rcu_read_lock()/rcu_read_unlock() calls
584 * are cheap.
585 */
586 RCU_READ_LOCK_GUARD();
587
588 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
589 if (!QLIST_IS_INSERTED(node, node_deleted) && node->io_poll &&
590 aio_node_check(ctx, node->is_external) &&
591 node->io_poll(node->opaque)) {
592 /*
593 * Polling was successful, exit try_poll_mode immediately
594 * to adjust the next polling time.
595 */
596 *timeout = 0;
597 if (node->opaque != &ctx->notifier) {
598 progress = true;
599 }
600 }
601
602 /* Caller handles freeing deleted nodes. Don't do it here. */
603 }
604
605 return progress;
606 }
607
608 /* run_poll_handlers:
609 * @ctx: the AioContext
610 * @max_ns: maximum time to poll for, in nanoseconds
611 *
612 * Polls for a given time.
613 *
614 * Note that ctx->notify_me must be non-zero so this function can detect
615 * aio_notify().
616 *
617 * Note that the caller must have incremented ctx->list_lock.
618 *
619 * Returns: true if progress was made, false otherwise
620 */
621 static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
622 {
623 bool progress;
624 int64_t start_time, elapsed_time;
625
626 assert(ctx->notify_me);
627 assert(qemu_lockcnt_count(&ctx->list_lock) > 0);
628
629 trace_run_poll_handlers_begin(ctx, max_ns, *timeout);
630
631 start_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
632 do {
633 progress = run_poll_handlers_once(ctx, timeout);
634 elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time;
635 max_ns = qemu_soonest_timeout(*timeout, max_ns);
636 assert(!(max_ns && progress));
637 } while (elapsed_time < max_ns && !atomic_read(&ctx->poll_disable_cnt));
638
639 /* If time has passed with no successful polling, adjust *timeout to
640 * keep the same ending time.
641 */
642 if (*timeout != -1) {
643 *timeout -= MIN(*timeout, elapsed_time);
644 }
645
646 trace_run_poll_handlers_end(ctx, progress, *timeout);
647 return progress;
648 }
649
650 /* try_poll_mode:
651 * @ctx: the AioContext
652 * @timeout: timeout for blocking wait, computed by the caller and updated if
653 * polling succeeds.
654 *
655 * ctx->notify_me must be non-zero so this function can detect aio_notify().
656 *
657 * Note that the caller must have incremented ctx->list_lock.
658 *
659 * Returns: true if progress was made, false otherwise
660 */
661 static bool try_poll_mode(AioContext *ctx, int64_t *timeout)
662 {
663 int64_t max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns);
664
665 if (max_ns && !atomic_read(&ctx->poll_disable_cnt)) {
666 poll_set_started(ctx, true);
667
668 if (run_poll_handlers(ctx, max_ns, timeout)) {
669 return true;
670 }
671 }
672
673 poll_set_started(ctx, false);
674
675 /* Even if we don't run busy polling, try polling once in case it can make
676 * progress and the caller will be able to avoid ppoll(2)/epoll_wait(2).
677 */
678 return run_poll_handlers_once(ctx, timeout);
679 }
680
681 bool aio_poll(AioContext *ctx, bool blocking)
682 {
683 AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
684 AioHandler *node;
685 int i;
686 int ret = 0;
687 bool progress;
688 int64_t timeout;
689 int64_t start = 0;
690
691 assert(in_aio_context_home_thread(ctx));
692
693 /* aio_notify can avoid the expensive event_notifier_set if
694 * everything (file descriptors, bottom halves, timers) will
695 * be re-evaluated before the next blocking poll(). This is
696 * already true when aio_poll is called with blocking == false;
697 * if blocking == true, it is only true after poll() returns,
698 * so disable the optimization now.
699 */
700 if (blocking) {
701 atomic_add(&ctx->notify_me, 2);
702 }
703
704 qemu_lockcnt_inc(&ctx->list_lock);
705
706 if (ctx->poll_max_ns) {
707 start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
708 }
709
710 timeout = blocking ? aio_compute_timeout(ctx) : 0;
711 progress = try_poll_mode(ctx, &timeout);
712 assert(!(timeout && progress));
713
714 /* If polling is allowed, non-blocking aio_poll does not need the
715 * system call---a single round of run_poll_handlers_once suffices.
716 */
717 if (timeout || atomic_read(&ctx->poll_disable_cnt)) {
718 assert(npfd == 0);
719
720 /* fill pollfds */
721
722 if (!aio_epoll_enabled(ctx)) {
723 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
724 if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events
725 && aio_node_check(ctx, node->is_external)) {
726 add_pollfd(node);
727 }
728 }
729 }
730
731 /* wait until next event */
732 if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
733 npfd = 0; /* pollfds[] is not being used */
734 ret = aio_epoll(ctx, &ready_list, timeout);
735 } else {
736 ret = qemu_poll_ns(pollfds, npfd, timeout);
737 }
738 }
739
740 if (blocking) {
741 atomic_sub(&ctx->notify_me, 2);
742 aio_notify_accept(ctx);
743 }
744
745 /* Adjust polling time */
746 if (ctx->poll_max_ns) {
747 int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
748
749 if (block_ns <= ctx->poll_ns) {
750 /* This is the sweet spot, no adjustment needed */
751 } else if (block_ns > ctx->poll_max_ns) {
752 /* We'd have to poll for too long, poll less */
753 int64_t old = ctx->poll_ns;
754
755 if (ctx->poll_shrink) {
756 ctx->poll_ns /= ctx->poll_shrink;
757 } else {
758 ctx->poll_ns = 0;
759 }
760
761 trace_poll_shrink(ctx, old, ctx->poll_ns);
762 } else if (ctx->poll_ns < ctx->poll_max_ns &&
763 block_ns < ctx->poll_max_ns) {
764 /* There is room to grow, poll longer */
765 int64_t old = ctx->poll_ns;
766 int64_t grow = ctx->poll_grow;
767
768 if (grow == 0) {
769 grow = 2;
770 }
771
772 if (ctx->poll_ns) {
773 ctx->poll_ns *= grow;
774 } else {
775 ctx->poll_ns = 4000; /* start polling at 4 microseconds */
776 }
777
778 if (ctx->poll_ns > ctx->poll_max_ns) {
779 ctx->poll_ns = ctx->poll_max_ns;
780 }
781
782 trace_poll_grow(ctx, old, ctx->poll_ns);
783 }
784 }
785
786 /* if we have any readable fds, dispatch event */
787 if (ret > 0) {
788 for (i = 0; i < npfd; i++) {
789 int revents = pollfds[i].revents;
790
791 if (revents) {
792 add_ready_handler(&ready_list, nodes[i], revents);
793 }
794 }
795 }
796
797 npfd = 0;
798
799 progress |= aio_bh_poll(ctx);
800
801 if (ret > 0) {
802 progress |= aio_dispatch_ready_handlers(ctx, &ready_list);
803 }
804
805 aio_free_deleted_handlers(ctx);
806
807 qemu_lockcnt_dec(&ctx->list_lock);
808
809 progress |= timerlistgroup_run_timers(&ctx->tlg);
810
811 return progress;
812 }
813
814 void aio_context_setup(AioContext *ctx)
815 {
816 #ifdef CONFIG_EPOLL_CREATE1
817 assert(!ctx->epollfd);
818 ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
819 if (ctx->epollfd == -1) {
820 fprintf(stderr, "Failed to create epoll instance: %s", strerror(errno));
821 ctx->epoll_available = false;
822 } else {
823 ctx->epoll_available = true;
824 }
825 #endif
826 }
827
828 void aio_context_destroy(AioContext *ctx)
829 {
830 #ifdef CONFIG_EPOLL_CREATE1
831 aio_epoll_disable(ctx);
832 #endif
833 }
834
835 void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
836 int64_t grow, int64_t shrink, Error **errp)
837 {
838 /* No thread synchronization here, it doesn't matter if an incorrect value
839 * is used once.
840 */
841 ctx->poll_max_ns = max_ns;
842 ctx->poll_ns = 0;
843 ctx->poll_grow = grow;
844 ctx->poll_shrink = shrink;
845
846 aio_notify(ctx);
847 }