]> git.proxmox.com Git - mirror_qemu.git/blob - util/aio-posix.c
aio-posix: move RCU_READ_LOCK() into run_poll_handlers()
[mirror_qemu.git] / util / aio-posix.c
1 /*
2 * QEMU aio implementation
3 *
4 * Copyright IBM, Corp. 2008
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 * Contributions after 2012-01-13 are licensed under the terms of the
13 * GNU GPL, version 2 or (at your option) any later version.
14 */
15
16 #include "qemu/osdep.h"
17 #include "block/block.h"
18 #include "qemu/rcu.h"
19 #include "qemu/rcu_queue.h"
20 #include "qemu/sockets.h"
21 #include "qemu/cutils.h"
22 #include "trace.h"
23 #ifdef CONFIG_EPOLL_CREATE1
24 #include <sys/epoll.h>
25 #endif
26
27 struct AioHandler
28 {
29 GPollFD pfd;
30 IOHandler *io_read;
31 IOHandler *io_write;
32 AioPollFn *io_poll;
33 IOHandler *io_poll_begin;
34 IOHandler *io_poll_end;
35 void *opaque;
36 bool is_external;
37 QLIST_ENTRY(AioHandler) node;
38 QLIST_ENTRY(AioHandler) node_ready; /* only used during aio_poll() */
39 QLIST_ENTRY(AioHandler) node_deleted;
40 };
41
42 /* Add a handler to a ready list */
43 static void add_ready_handler(AioHandlerList *ready_list,
44 AioHandler *node,
45 int revents)
46 {
47 QLIST_SAFE_REMOVE(node, node_ready); /* remove from nested parent's list */
48 node->pfd.revents = revents;
49 QLIST_INSERT_HEAD(ready_list, node, node_ready);
50 }
51
52 #ifdef CONFIG_EPOLL_CREATE1
53
54 /* The fd number threshold to switch to epoll */
55 #define EPOLL_ENABLE_THRESHOLD 64
56
57 static void aio_epoll_disable(AioContext *ctx)
58 {
59 ctx->epoll_enabled = false;
60 if (!ctx->epoll_available) {
61 return;
62 }
63 ctx->epoll_available = false;
64 close(ctx->epollfd);
65 }
66
67 static inline int epoll_events_from_pfd(int pfd_events)
68 {
69 return (pfd_events & G_IO_IN ? EPOLLIN : 0) |
70 (pfd_events & G_IO_OUT ? EPOLLOUT : 0) |
71 (pfd_events & G_IO_HUP ? EPOLLHUP : 0) |
72 (pfd_events & G_IO_ERR ? EPOLLERR : 0);
73 }
74
75 static bool aio_epoll_try_enable(AioContext *ctx)
76 {
77 AioHandler *node;
78 struct epoll_event event;
79
80 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
81 int r;
82 if (QLIST_IS_INSERTED(node, node_deleted) || !node->pfd.events) {
83 continue;
84 }
85 event.events = epoll_events_from_pfd(node->pfd.events);
86 event.data.ptr = node;
87 r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
88 if (r) {
89 return false;
90 }
91 }
92 ctx->epoll_enabled = true;
93 return true;
94 }
95
96 static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
97 {
98 struct epoll_event event;
99 int r;
100 int ctl;
101
102 if (!ctx->epoll_enabled) {
103 return;
104 }
105 if (!node->pfd.events) {
106 ctl = EPOLL_CTL_DEL;
107 } else {
108 event.data.ptr = node;
109 event.events = epoll_events_from_pfd(node->pfd.events);
110 ctl = is_new ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
111 }
112
113 r = epoll_ctl(ctx->epollfd, ctl, node->pfd.fd, &event);
114 if (r) {
115 aio_epoll_disable(ctx);
116 }
117 }
118
119 static int aio_epoll(AioContext *ctx, AioHandlerList *ready_list,
120 int64_t timeout)
121 {
122 GPollFD pfd = {
123 .fd = ctx->epollfd,
124 .events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR,
125 };
126 AioHandler *node;
127 int i, ret = 0;
128 struct epoll_event events[128];
129
130 if (timeout > 0) {
131 ret = qemu_poll_ns(&pfd, 1, timeout);
132 if (ret > 0) {
133 timeout = 0;
134 }
135 }
136 if (timeout <= 0 || ret > 0) {
137 ret = epoll_wait(ctx->epollfd, events,
138 ARRAY_SIZE(events),
139 timeout);
140 if (ret <= 0) {
141 goto out;
142 }
143 for (i = 0; i < ret; i++) {
144 int ev = events[i].events;
145 int revents = (ev & EPOLLIN ? G_IO_IN : 0) |
146 (ev & EPOLLOUT ? G_IO_OUT : 0) |
147 (ev & EPOLLHUP ? G_IO_HUP : 0) |
148 (ev & EPOLLERR ? G_IO_ERR : 0);
149
150 node = events[i].data.ptr;
151 add_ready_handler(ready_list, node, revents);
152 }
153 }
154 out:
155 return ret;
156 }
157
158 static bool aio_epoll_enabled(AioContext *ctx)
159 {
160 /* Fall back to ppoll when external clients are disabled. */
161 return !aio_external_disabled(ctx) && ctx->epoll_enabled;
162 }
163
164 static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
165 unsigned npfd, int64_t timeout)
166 {
167 if (!ctx->epoll_available) {
168 return false;
169 }
170 if (aio_epoll_enabled(ctx)) {
171 return true;
172 }
173 if (npfd >= EPOLL_ENABLE_THRESHOLD) {
174 if (aio_epoll_try_enable(ctx)) {
175 return true;
176 } else {
177 aio_epoll_disable(ctx);
178 }
179 }
180 return false;
181 }
182
183 #else
184
185 static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
186 {
187 }
188
189 static int aio_epoll(AioContext *ctx, AioHandlerList *ready_list,
190 int64_t timeout)
191 {
192 assert(false);
193 }
194
195 static bool aio_epoll_enabled(AioContext *ctx)
196 {
197 return false;
198 }
199
200 static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
201 unsigned npfd, int64_t timeout)
202 {
203 return false;
204 }
205
206 #endif
207
208 static AioHandler *find_aio_handler(AioContext *ctx, int fd)
209 {
210 AioHandler *node;
211
212 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
213 if (node->pfd.fd == fd) {
214 if (!QLIST_IS_INSERTED(node, node_deleted)) {
215 return node;
216 }
217 }
218 }
219
220 return NULL;
221 }
222
223 static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
224 {
225 /* If the GSource is in the process of being destroyed then
226 * g_source_remove_poll() causes an assertion failure. Skip
227 * removal in that case, because glib cleans up its state during
228 * destruction anyway.
229 */
230 if (!g_source_is_destroyed(&ctx->source)) {
231 g_source_remove_poll(&ctx->source, &node->pfd);
232 }
233
234 /* If a read is in progress, just mark the node as deleted */
235 if (qemu_lockcnt_count(&ctx->list_lock)) {
236 QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted);
237 node->pfd.revents = 0;
238 return false;
239 }
240 /* Otherwise, delete it for real. We can't just mark it as
241 * deleted because deleted nodes are only cleaned up while
242 * no one is walking the handlers list.
243 */
244 QLIST_REMOVE(node, node);
245 return true;
246 }
247
248 void aio_set_fd_handler(AioContext *ctx,
249 int fd,
250 bool is_external,
251 IOHandler *io_read,
252 IOHandler *io_write,
253 AioPollFn *io_poll,
254 void *opaque)
255 {
256 AioHandler *node;
257 AioHandler *new_node = NULL;
258 bool is_new = false;
259 bool deleted = false;
260 int poll_disable_change;
261
262 qemu_lockcnt_lock(&ctx->list_lock);
263
264 node = find_aio_handler(ctx, fd);
265
266 /* Are we deleting the fd handler? */
267 if (!io_read && !io_write && !io_poll) {
268 if (node == NULL) {
269 qemu_lockcnt_unlock(&ctx->list_lock);
270 return;
271 }
272 /* Clean events in order to unregister fd from the ctx epoll. */
273 node->pfd.events = 0;
274
275 poll_disable_change = -!node->io_poll;
276 } else {
277 poll_disable_change = !io_poll - (node && !node->io_poll);
278 if (node == NULL) {
279 is_new = true;
280 }
281 /* Alloc and insert if it's not already there */
282 new_node = g_new0(AioHandler, 1);
283
284 /* Update handler with latest information */
285 new_node->io_read = io_read;
286 new_node->io_write = io_write;
287 new_node->io_poll = io_poll;
288 new_node->opaque = opaque;
289 new_node->is_external = is_external;
290
291 if (is_new) {
292 new_node->pfd.fd = fd;
293 } else {
294 new_node->pfd = node->pfd;
295 }
296 g_source_add_poll(&ctx->source, &new_node->pfd);
297
298 new_node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
299 new_node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
300
301 QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, new_node, node);
302 }
303 if (node) {
304 deleted = aio_remove_fd_handler(ctx, node);
305 }
306
307 /* No need to order poll_disable_cnt writes against other updates;
308 * the counter is only used to avoid wasting time and latency on
309 * iterated polling when the system call will be ultimately necessary.
310 * Changing handlers is a rare event, and a little wasted polling until
311 * the aio_notify below is not an issue.
312 */
313 atomic_set(&ctx->poll_disable_cnt,
314 atomic_read(&ctx->poll_disable_cnt) + poll_disable_change);
315
316 if (new_node) {
317 aio_epoll_update(ctx, new_node, is_new);
318 } else if (node) {
319 /* Unregister deleted fd_handler */
320 aio_epoll_update(ctx, node, false);
321 }
322 qemu_lockcnt_unlock(&ctx->list_lock);
323 aio_notify(ctx);
324
325 if (deleted) {
326 g_free(node);
327 }
328 }
329
330 void aio_set_fd_poll(AioContext *ctx, int fd,
331 IOHandler *io_poll_begin,
332 IOHandler *io_poll_end)
333 {
334 AioHandler *node = find_aio_handler(ctx, fd);
335
336 if (!node) {
337 return;
338 }
339
340 node->io_poll_begin = io_poll_begin;
341 node->io_poll_end = io_poll_end;
342 }
343
344 void aio_set_event_notifier(AioContext *ctx,
345 EventNotifier *notifier,
346 bool is_external,
347 EventNotifierHandler *io_read,
348 AioPollFn *io_poll)
349 {
350 aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external,
351 (IOHandler *)io_read, NULL, io_poll, notifier);
352 }
353
354 void aio_set_event_notifier_poll(AioContext *ctx,
355 EventNotifier *notifier,
356 EventNotifierHandler *io_poll_begin,
357 EventNotifierHandler *io_poll_end)
358 {
359 aio_set_fd_poll(ctx, event_notifier_get_fd(notifier),
360 (IOHandler *)io_poll_begin,
361 (IOHandler *)io_poll_end);
362 }
363
364 static bool poll_set_started(AioContext *ctx, bool started)
365 {
366 AioHandler *node;
367 bool progress = false;
368
369 if (started == ctx->poll_started) {
370 return false;
371 }
372
373 ctx->poll_started = started;
374
375 qemu_lockcnt_inc(&ctx->list_lock);
376 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
377 IOHandler *fn;
378
379 if (QLIST_IS_INSERTED(node, node_deleted)) {
380 continue;
381 }
382
383 if (started) {
384 fn = node->io_poll_begin;
385 } else {
386 fn = node->io_poll_end;
387 }
388
389 if (fn) {
390 fn(node->opaque);
391 }
392
393 /* Poll one last time in case ->io_poll_end() raced with the event */
394 if (!started) {
395 progress = node->io_poll(node->opaque) || progress;
396 }
397 }
398 qemu_lockcnt_dec(&ctx->list_lock);
399
400 return progress;
401 }
402
403
404 bool aio_prepare(AioContext *ctx)
405 {
406 /* Poll mode cannot be used with glib's event loop, disable it. */
407 poll_set_started(ctx, false);
408
409 return false;
410 }
411
412 bool aio_pending(AioContext *ctx)
413 {
414 AioHandler *node;
415 bool result = false;
416
417 /*
418 * We have to walk very carefully in case aio_set_fd_handler is
419 * called while we're walking.
420 */
421 qemu_lockcnt_inc(&ctx->list_lock);
422
423 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
424 int revents;
425
426 revents = node->pfd.revents & node->pfd.events;
427 if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
428 aio_node_check(ctx, node->is_external)) {
429 result = true;
430 break;
431 }
432 if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
433 aio_node_check(ctx, node->is_external)) {
434 result = true;
435 break;
436 }
437 }
438 qemu_lockcnt_dec(&ctx->list_lock);
439
440 return result;
441 }
442
443 static void aio_free_deleted_handlers(AioContext *ctx)
444 {
445 AioHandler *node;
446
447 if (QLIST_EMPTY_RCU(&ctx->deleted_aio_handlers)) {
448 return;
449 }
450 if (!qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
451 return; /* we are nested, let the parent do the freeing */
452 }
453
454 while ((node = QLIST_FIRST_RCU(&ctx->deleted_aio_handlers))) {
455 QLIST_REMOVE(node, node);
456 QLIST_REMOVE(node, node_deleted);
457 g_free(node);
458 }
459
460 qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
461 }
462
463 static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
464 {
465 bool progress = false;
466 int revents;
467
468 revents = node->pfd.revents & node->pfd.events;
469 node->pfd.revents = 0;
470
471 if (!QLIST_IS_INSERTED(node, node_deleted) &&
472 (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
473 aio_node_check(ctx, node->is_external) &&
474 node->io_read) {
475 node->io_read(node->opaque);
476
477 /* aio_notify() does not count as progress */
478 if (node->opaque != &ctx->notifier) {
479 progress = true;
480 }
481 }
482 if (!QLIST_IS_INSERTED(node, node_deleted) &&
483 (revents & (G_IO_OUT | G_IO_ERR)) &&
484 aio_node_check(ctx, node->is_external) &&
485 node->io_write) {
486 node->io_write(node->opaque);
487 progress = true;
488 }
489
490 return progress;
491 }
492
493 /*
494 * If we have a list of ready handlers then this is more efficient than
495 * scanning all handlers with aio_dispatch_handlers().
496 */
497 static bool aio_dispatch_ready_handlers(AioContext *ctx,
498 AioHandlerList *ready_list)
499 {
500 bool progress = false;
501 AioHandler *node;
502
503 while ((node = QLIST_FIRST(ready_list))) {
504 QLIST_REMOVE(node, node_ready);
505 progress = aio_dispatch_handler(ctx, node) || progress;
506 }
507
508 return progress;
509 }
510
511 /* Slower than aio_dispatch_ready_handlers() but only used via glib */
512 static bool aio_dispatch_handlers(AioContext *ctx)
513 {
514 AioHandler *node, *tmp;
515 bool progress = false;
516
517 QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
518 progress = aio_dispatch_handler(ctx, node) || progress;
519 }
520
521 return progress;
522 }
523
524 void aio_dispatch(AioContext *ctx)
525 {
526 qemu_lockcnt_inc(&ctx->list_lock);
527 aio_bh_poll(ctx);
528 aio_dispatch_handlers(ctx);
529 aio_free_deleted_handlers(ctx);
530 qemu_lockcnt_dec(&ctx->list_lock);
531
532 timerlistgroup_run_timers(&ctx->tlg);
533 }
534
535 /* These thread-local variables are used only in a small part of aio_poll
536 * around the call to the poll() system call. In particular they are not
537 * used while aio_poll is performing callbacks, which makes it much easier
538 * to think about reentrancy!
539 *
540 * Stack-allocated arrays would be perfect but they have size limitations;
541 * heap allocation is expensive enough that we want to reuse arrays across
542 * calls to aio_poll(). And because poll() has to be called without holding
543 * any lock, the arrays cannot be stored in AioContext. Thread-local data
544 * has none of the disadvantages of these three options.
545 */
546 static __thread GPollFD *pollfds;
547 static __thread AioHandler **nodes;
548 static __thread unsigned npfd, nalloc;
549 static __thread Notifier pollfds_cleanup_notifier;
550
551 static void pollfds_cleanup(Notifier *n, void *unused)
552 {
553 g_assert(npfd == 0);
554 g_free(pollfds);
555 g_free(nodes);
556 nalloc = 0;
557 }
558
559 static void add_pollfd(AioHandler *node)
560 {
561 if (npfd == nalloc) {
562 if (nalloc == 0) {
563 pollfds_cleanup_notifier.notify = pollfds_cleanup;
564 qemu_thread_atexit_add(&pollfds_cleanup_notifier);
565 nalloc = 8;
566 } else {
567 g_assert(nalloc <= INT_MAX);
568 nalloc *= 2;
569 }
570 pollfds = g_renew(GPollFD, pollfds, nalloc);
571 nodes = g_renew(AioHandler *, nodes, nalloc);
572 }
573 nodes[npfd] = node;
574 pollfds[npfd] = (GPollFD) {
575 .fd = node->pfd.fd,
576 .events = node->pfd.events,
577 };
578 npfd++;
579 }
580
581 static bool run_poll_handlers_once(AioContext *ctx, int64_t *timeout)
582 {
583 bool progress = false;
584 AioHandler *node;
585
586 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
587 if (!QLIST_IS_INSERTED(node, node_deleted) && node->io_poll &&
588 aio_node_check(ctx, node->is_external) &&
589 node->io_poll(node->opaque)) {
590 /*
591 * Polling was successful, exit try_poll_mode immediately
592 * to adjust the next polling time.
593 */
594 *timeout = 0;
595 if (node->opaque != &ctx->notifier) {
596 progress = true;
597 }
598 }
599
600 /* Caller handles freeing deleted nodes. Don't do it here. */
601 }
602
603 return progress;
604 }
605
606 /* run_poll_handlers:
607 * @ctx: the AioContext
608 * @max_ns: maximum time to poll for, in nanoseconds
609 *
610 * Polls for a given time.
611 *
612 * Note that ctx->notify_me must be non-zero so this function can detect
613 * aio_notify().
614 *
615 * Note that the caller must have incremented ctx->list_lock.
616 *
617 * Returns: true if progress was made, false otherwise
618 */
619 static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
620 {
621 bool progress;
622 int64_t start_time, elapsed_time;
623
624 assert(ctx->notify_me);
625 assert(qemu_lockcnt_count(&ctx->list_lock) > 0);
626
627 trace_run_poll_handlers_begin(ctx, max_ns, *timeout);
628
629 /*
630 * Optimization: ->io_poll() handlers often contain RCU read critical
631 * sections and we therefore see many rcu_read_lock() -> rcu_read_unlock()
632 * -> rcu_read_lock() -> ... sequences with expensive memory
633 * synchronization primitives. Make the entire polling loop an RCU
634 * critical section because nested rcu_read_lock()/rcu_read_unlock() calls
635 * are cheap.
636 */
637 RCU_READ_LOCK_GUARD();
638
639 start_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
640 do {
641 progress = run_poll_handlers_once(ctx, timeout);
642 elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time;
643 max_ns = qemu_soonest_timeout(*timeout, max_ns);
644 assert(!(max_ns && progress));
645 } while (elapsed_time < max_ns && !atomic_read(&ctx->poll_disable_cnt));
646
647 /* If time has passed with no successful polling, adjust *timeout to
648 * keep the same ending time.
649 */
650 if (*timeout != -1) {
651 *timeout -= MIN(*timeout, elapsed_time);
652 }
653
654 trace_run_poll_handlers_end(ctx, progress, *timeout);
655 return progress;
656 }
657
658 /* try_poll_mode:
659 * @ctx: the AioContext
660 * @timeout: timeout for blocking wait, computed by the caller and updated if
661 * polling succeeds.
662 *
663 * ctx->notify_me must be non-zero so this function can detect aio_notify().
664 *
665 * Note that the caller must have incremented ctx->list_lock.
666 *
667 * Returns: true if progress was made, false otherwise
668 */
669 static bool try_poll_mode(AioContext *ctx, int64_t *timeout)
670 {
671 int64_t max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns);
672
673 if (max_ns && !atomic_read(&ctx->poll_disable_cnt)) {
674 poll_set_started(ctx, true);
675
676 if (run_poll_handlers(ctx, max_ns, timeout)) {
677 return true;
678 }
679 }
680
681 if (poll_set_started(ctx, false)) {
682 *timeout = 0;
683 return true;
684 }
685
686 return false;
687 }
688
689 bool aio_poll(AioContext *ctx, bool blocking)
690 {
691 AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
692 AioHandler *node;
693 int i;
694 int ret = 0;
695 bool progress;
696 int64_t timeout;
697 int64_t start = 0;
698
699 assert(in_aio_context_home_thread(ctx));
700
701 /* aio_notify can avoid the expensive event_notifier_set if
702 * everything (file descriptors, bottom halves, timers) will
703 * be re-evaluated before the next blocking poll(). This is
704 * already true when aio_poll is called with blocking == false;
705 * if blocking == true, it is only true after poll() returns,
706 * so disable the optimization now.
707 */
708 if (blocking) {
709 atomic_add(&ctx->notify_me, 2);
710 }
711
712 qemu_lockcnt_inc(&ctx->list_lock);
713
714 if (ctx->poll_max_ns) {
715 start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
716 }
717
718 timeout = blocking ? aio_compute_timeout(ctx) : 0;
719 progress = try_poll_mode(ctx, &timeout);
720 assert(!(timeout && progress));
721
722 /* If polling is allowed, non-blocking aio_poll does not need the
723 * system call---a single round of run_poll_handlers_once suffices.
724 */
725 if (timeout || atomic_read(&ctx->poll_disable_cnt)) {
726 assert(npfd == 0);
727
728 /* fill pollfds */
729
730 if (!aio_epoll_enabled(ctx)) {
731 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
732 if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events
733 && aio_node_check(ctx, node->is_external)) {
734 add_pollfd(node);
735 }
736 }
737 }
738
739 /* wait until next event */
740 if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
741 npfd = 0; /* pollfds[] is not being used */
742 ret = aio_epoll(ctx, &ready_list, timeout);
743 } else {
744 ret = qemu_poll_ns(pollfds, npfd, timeout);
745 }
746 }
747
748 if (blocking) {
749 atomic_sub(&ctx->notify_me, 2);
750 aio_notify_accept(ctx);
751 }
752
753 /* Adjust polling time */
754 if (ctx->poll_max_ns) {
755 int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
756
757 if (block_ns <= ctx->poll_ns) {
758 /* This is the sweet spot, no adjustment needed */
759 } else if (block_ns > ctx->poll_max_ns) {
760 /* We'd have to poll for too long, poll less */
761 int64_t old = ctx->poll_ns;
762
763 if (ctx->poll_shrink) {
764 ctx->poll_ns /= ctx->poll_shrink;
765 } else {
766 ctx->poll_ns = 0;
767 }
768
769 trace_poll_shrink(ctx, old, ctx->poll_ns);
770 } else if (ctx->poll_ns < ctx->poll_max_ns &&
771 block_ns < ctx->poll_max_ns) {
772 /* There is room to grow, poll longer */
773 int64_t old = ctx->poll_ns;
774 int64_t grow = ctx->poll_grow;
775
776 if (grow == 0) {
777 grow = 2;
778 }
779
780 if (ctx->poll_ns) {
781 ctx->poll_ns *= grow;
782 } else {
783 ctx->poll_ns = 4000; /* start polling at 4 microseconds */
784 }
785
786 if (ctx->poll_ns > ctx->poll_max_ns) {
787 ctx->poll_ns = ctx->poll_max_ns;
788 }
789
790 trace_poll_grow(ctx, old, ctx->poll_ns);
791 }
792 }
793
794 /* if we have any readable fds, dispatch event */
795 if (ret > 0) {
796 for (i = 0; i < npfd; i++) {
797 int revents = pollfds[i].revents;
798
799 if (revents) {
800 add_ready_handler(&ready_list, nodes[i], revents);
801 }
802 }
803 }
804
805 npfd = 0;
806
807 progress |= aio_bh_poll(ctx);
808
809 if (ret > 0) {
810 progress |= aio_dispatch_ready_handlers(ctx, &ready_list);
811 }
812
813 aio_free_deleted_handlers(ctx);
814
815 qemu_lockcnt_dec(&ctx->list_lock);
816
817 progress |= timerlistgroup_run_timers(&ctx->tlg);
818
819 return progress;
820 }
821
822 void aio_context_setup(AioContext *ctx)
823 {
824 #ifdef CONFIG_EPOLL_CREATE1
825 assert(!ctx->epollfd);
826 ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
827 if (ctx->epollfd == -1) {
828 fprintf(stderr, "Failed to create epoll instance: %s", strerror(errno));
829 ctx->epoll_available = false;
830 } else {
831 ctx->epoll_available = true;
832 }
833 #endif
834 }
835
836 void aio_context_destroy(AioContext *ctx)
837 {
838 #ifdef CONFIG_EPOLL_CREATE1
839 aio_epoll_disable(ctx);
840 #endif
841 }
842
843 void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
844 int64_t grow, int64_t shrink, Error **errp)
845 {
846 /* No thread synchronization here, it doesn't matter if an incorrect value
847 * is used once.
848 */
849 ctx->poll_max_ns = max_ns;
850 ctx->poll_ns = 0;
851 ctx->poll_grow = grow;
852 ctx->poll_shrink = shrink;
853
854 aio_notify(ctx);
855 }