]> git.proxmox.com Git - mirror_qemu.git/blame - util/aio-posix.c
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
[mirror_qemu.git] / util / aio-posix.c
CommitLineData
a76bab49
AL
1/*
2 * QEMU aio implementation
3 *
4 * Copyright IBM, Corp. 2008
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
6b620ca3
PB
12 * Contributions after 2012-01-13 are licensed under the terms of the
13 * GNU GPL, version 2 or (at your option) any later version.
a76bab49
AL
14 */
15
d38ea87a 16#include "qemu/osdep.h"
737e150e 17#include "block/block.h"
9ce44e2c 18#include "qemu/main-loop.h"
f25c0b54 19#include "qemu/rcu.h"
2bbf11d7 20#include "qemu/rcu_queue.h"
1de7afc9 21#include "qemu/sockets.h"
4a1cba38 22#include "qemu/cutils.h"
c2b38b27 23#include "trace.h"
1f050a46 24#include "aio-posix.h"
a76bab49 25
d37d0e36
SH
26/* Stop userspace polling on a handler if it isn't active for some time */
27#define POLL_IDLE_INTERVAL_NS (7 * NANOSECONDS_PER_SECOND)
28
aa38e19f
SH
29bool aio_poll_disabled(AioContext *ctx)
30{
d73415a3 31 return qatomic_read(&ctx->poll_disable_cnt);
aa38e19f
SH
32}
33
1f050a46
SH
34void aio_add_ready_handler(AioHandlerList *ready_list,
35 AioHandler *node,
36 int revents)
7391d34c
SH
37{
38 QLIST_SAFE_REMOVE(node, node_ready); /* remove from nested parent's list */
39 node->pfd.revents = revents;
40 QLIST_INSERT_HEAD(ready_list, node, node_ready);
41}
42
a915f4bc 43static AioHandler *find_aio_handler(AioContext *ctx, int fd)
a76bab49
AL
44{
45 AioHandler *node;
46
a915f4bc 47 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
4749079c
SH
48 if (node->pfd.fd == fd) {
49 if (!QLIST_IS_INSERTED(node, node_deleted)) {
79d5ca56 50 return node;
4749079c
SH
51 }
52 }
a76bab49
AL
53 }
54
55 return NULL;
56}
57
fef16601
RN
58static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
59{
60 /* If the GSource is in the process of being destroyed then
61 * g_source_remove_poll() causes an assertion failure. Skip
62 * removal in that case, because glib cleans up its state during
63 * destruction anyway.
64 */
65 if (!g_source_is_destroyed(&ctx->source)) {
66 g_source_remove_poll(&ctx->source, &node->pfd);
67 }
68
73fd282e
SH
69 node->pfd.revents = 0;
70
71 /* If the fd monitor has already marked it deleted, leave it alone */
72 if (QLIST_IS_INSERTED(node, node_deleted)) {
73 return false;
74 }
75
fef16601
RN
76 /* If a read is in progress, just mark the node as deleted */
77 if (qemu_lockcnt_count(&ctx->list_lock)) {
4749079c 78 QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted);
fef16601
RN
79 return false;
80 }
81 /* Otherwise, delete it for real. We can't just mark it as
82 * deleted because deleted nodes are only cleaned up while
83 * no one is walking the handlers list.
84 */
d37d0e36 85 QLIST_SAFE_REMOVE(node, node_poll);
fef16601
RN
86 QLIST_REMOVE(node, node);
87 return true;
88}
89
a915f4bc
PB
90void aio_set_fd_handler(AioContext *ctx,
91 int fd,
dca21ef2 92 bool is_external,
a915f4bc
PB
93 IOHandler *io_read,
94 IOHandler *io_write,
f6a51c84 95 AioPollFn *io_poll,
a915f4bc 96 void *opaque)
a76bab49
AL
97{
98 AioHandler *node;
fef16601 99 AioHandler *new_node = NULL;
fbe3fc5c 100 bool is_new = false;
0ed39f3d 101 bool deleted = false;
d7be5dd1 102 int poll_disable_change;
a76bab49 103
2bbf11d7
PB
104 qemu_lockcnt_lock(&ctx->list_lock);
105
a915f4bc 106 node = find_aio_handler(ctx, fd);
a76bab49
AL
107
108 /* Are we deleting the fd handler? */
4a1cba38 109 if (!io_read && !io_write && !io_poll) {
36173ec5 110 if (node == NULL) {
2bbf11d7 111 qemu_lockcnt_unlock(&ctx->list_lock);
36173ec5
PB
112 return;
113 }
8821b34a
RN
114 /* Clean events in order to unregister fd from the ctx epoll. */
115 node->pfd.events = 0;
116
d7be5dd1 117 poll_disable_change = -!node->io_poll;
a76bab49 118 } else {
d7be5dd1 119 poll_disable_change = !io_poll - (node && !node->io_poll);
a76bab49 120 if (node == NULL) {
fbe3fc5c 121 is_new = true;
a76bab49 122 }
fef16601
RN
123 /* Alloc and insert if it's not already there */
124 new_node = g_new0(AioHandler, 1);
4a1cba38 125
a76bab49 126 /* Update handler with latest information */
fef16601
RN
127 new_node->io_read = io_read;
128 new_node->io_write = io_write;
129 new_node->io_poll = io_poll;
130 new_node->opaque = opaque;
131 new_node->is_external = is_external;
132
133 if (is_new) {
134 new_node->pfd.fd = fd;
135 } else {
136 new_node->pfd = node->pfd;
137 }
138 g_source_add_poll(&ctx->source, &new_node->pfd);
139
140 new_node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
141 new_node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
142
143 QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, new_node, node);
144 }
7ed2b24c 145
d7be5dd1
PB
146 /* No need to order poll_disable_cnt writes against other updates;
147 * the counter is only used to avoid wasting time and latency on
148 * iterated polling when the system call will be ultimately necessary.
149 * Changing handlers is a rare event, and a little wasted polling until
150 * the aio_notify below is not an issue.
151 */
d73415a3
SH
152 qatomic_set(&ctx->poll_disable_cnt,
153 qatomic_read(&ctx->poll_disable_cnt) + poll_disable_change);
d7be5dd1 154
b321051c 155 ctx->fdmon_ops->update(ctx, node, new_node);
73fd282e
SH
156 if (node) {
157 deleted = aio_remove_fd_handler(ctx, node);
158 }
2bbf11d7 159 qemu_lockcnt_unlock(&ctx->list_lock);
7ed2b24c 160 aio_notify(ctx);
4a1cba38 161
0ed39f3d
FZ
162 if (deleted) {
163 g_free(node);
164 }
9958c351
PB
165}
166
684e508c
SH
167void aio_set_fd_poll(AioContext *ctx, int fd,
168 IOHandler *io_poll_begin,
169 IOHandler *io_poll_end)
170{
171 AioHandler *node = find_aio_handler(ctx, fd);
172
173 if (!node) {
174 return;
175 }
176
177 node->io_poll_begin = io_poll_begin;
178 node->io_poll_end = io_poll_end;
179}
180
a915f4bc
PB
181void aio_set_event_notifier(AioContext *ctx,
182 EventNotifier *notifier,
dca21ef2 183 bool is_external,
f6a51c84
SH
184 EventNotifierHandler *io_read,
185 AioPollFn *io_poll)
a76bab49 186{
f6a51c84
SH
187 aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external,
188 (IOHandler *)io_read, NULL, io_poll, notifier);
a76bab49
AL
189}
190
684e508c
SH
191void aio_set_event_notifier_poll(AioContext *ctx,
192 EventNotifier *notifier,
193 EventNotifierHandler *io_poll_begin,
194 EventNotifierHandler *io_poll_end)
195{
196 aio_set_fd_poll(ctx, event_notifier_get_fd(notifier),
197 (IOHandler *)io_poll_begin,
198 (IOHandler *)io_poll_end);
199}
200
e4346192 201static bool poll_set_started(AioContext *ctx, bool started)
684e508c
SH
202{
203 AioHandler *node;
e4346192 204 bool progress = false;
684e508c
SH
205
206 if (started == ctx->poll_started) {
e4346192 207 return false;
684e508c
SH
208 }
209
210 ctx->poll_started = started;
211
2bbf11d7 212 qemu_lockcnt_inc(&ctx->list_lock);
d37d0e36 213 QLIST_FOREACH(node, &ctx->poll_aio_handlers, node_poll) {
684e508c
SH
214 IOHandler *fn;
215
4749079c 216 if (QLIST_IS_INSERTED(node, node_deleted)) {
684e508c
SH
217 continue;
218 }
219
220 if (started) {
221 fn = node->io_poll_begin;
222 } else {
223 fn = node->io_poll_end;
224 }
225
226 if (fn) {
227 fn(node->opaque);
228 }
e4346192
SH
229
230 /* Poll one last time in case ->io_poll_end() raced with the event */
231 if (!started) {
232 progress = node->io_poll(node->opaque) || progress;
233 }
684e508c 234 }
2bbf11d7 235 qemu_lockcnt_dec(&ctx->list_lock);
e4346192
SH
236
237 return progress;
684e508c
SH
238}
239
240
a3462c65
PB
241bool aio_prepare(AioContext *ctx)
242{
684e508c
SH
243 /* Poll mode cannot be used with glib's event loop, disable it. */
244 poll_set_started(ctx, false);
245
a3462c65
PB
246 return false;
247}
248
cd9ba1eb
PB
249bool aio_pending(AioContext *ctx)
250{
251 AioHandler *node;
2bbf11d7 252 bool result = false;
cd9ba1eb 253
2bbf11d7
PB
254 /*
255 * We have to walk very carefully in case aio_set_fd_handler is
256 * called while we're walking.
257 */
258 qemu_lockcnt_inc(&ctx->list_lock);
259
260 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
cd9ba1eb
PB
261 int revents;
262
cd9ba1eb 263 revents = node->pfd.revents & node->pfd.events;
37989ced
FZ
264 if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
265 aio_node_check(ctx, node->is_external)) {
2bbf11d7
PB
266 result = true;
267 break;
cd9ba1eb 268 }
37989ced
FZ
269 if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
270 aio_node_check(ctx, node->is_external)) {
2bbf11d7
PB
271 result = true;
272 break;
cd9ba1eb
PB
273 }
274 }
2bbf11d7 275 qemu_lockcnt_dec(&ctx->list_lock);
cd9ba1eb 276
2bbf11d7 277 return result;
cd9ba1eb
PB
278}
279
4749079c
SH
280static void aio_free_deleted_handlers(AioContext *ctx)
281{
282 AioHandler *node;
283
284 if (QLIST_EMPTY_RCU(&ctx->deleted_aio_handlers)) {
285 return;
286 }
287 if (!qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
288 return; /* we are nested, let the parent do the freeing */
289 }
290
291 while ((node = QLIST_FIRST_RCU(&ctx->deleted_aio_handlers))) {
292 QLIST_REMOVE(node, node);
293 QLIST_REMOVE(node, node_deleted);
d37d0e36 294 QLIST_SAFE_REMOVE(node, node_poll);
4749079c
SH
295 g_free(node);
296 }
297
298 qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
299}
300
7391d34c 301static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
a76bab49 302{
d0c8d2c0 303 bool progress = false;
7391d34c 304 int revents;
7c0628b2 305
7391d34c
SH
306 revents = node->pfd.revents & node->pfd.events;
307 node->pfd.revents = 0;
cd9ba1eb 308
d37d0e36
SH
309 /*
310 * Start polling AioHandlers when they become ready because activity is
311 * likely to continue. Note that starvation is theoretically possible when
312 * fdmon_supports_polling(), but only until the fd fires for the first
313 * time.
314 */
315 if (!QLIST_IS_INSERTED(node, node_deleted) &&
316 !QLIST_IS_INSERTED(node, node_poll) &&
317 node->io_poll) {
318 trace_poll_add(ctx, node, node->pfd.fd, revents);
319 if (ctx->poll_started && node->io_poll_begin) {
320 node->io_poll_begin(node->opaque);
321 }
322 QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll);
323 }
324
7391d34c
SH
325 if (!QLIST_IS_INSERTED(node, node_deleted) &&
326 (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
327 aio_node_check(ctx, node->is_external) &&
328 node->io_read) {
329 node->io_read(node->opaque);
cd9ba1eb 330
7391d34c
SH
331 /* aio_notify() does not count as progress */
332 if (node->opaque != &ctx->notifier) {
cd9ba1eb
PB
333 progress = true;
334 }
cd9ba1eb 335 }
7391d34c
SH
336 if (!QLIST_IS_INSERTED(node, node_deleted) &&
337 (revents & (G_IO_OUT | G_IO_ERR)) &&
338 aio_node_check(ctx, node->is_external) &&
339 node->io_write) {
340 node->io_write(node->opaque);
341 progress = true;
342 }
343
344 return progress;
345}
346
347/*
348 * If we have a list of ready handlers then this is more efficient than
349 * scanning all handlers with aio_dispatch_handlers().
350 */
351static bool aio_dispatch_ready_handlers(AioContext *ctx,
352 AioHandlerList *ready_list)
353{
354 bool progress = false;
355 AioHandler *node;
356
357 while ((node = QLIST_FIRST(ready_list))) {
c39cbedb 358 QLIST_REMOVE(node, node_ready);
7391d34c
SH
359 progress = aio_dispatch_handler(ctx, node) || progress;
360 }
361
362 return progress;
363}
364
365/* Slower than aio_dispatch_ready_handlers() but only used via glib */
366static bool aio_dispatch_handlers(AioContext *ctx)
367{
368 AioHandler *node, *tmp;
369 bool progress = false;
370
371 QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
372 progress = aio_dispatch_handler(ctx, node) || progress;
373 }
438e1f47 374
56d2c3c6
PB
375 return progress;
376}
377
a153bf52 378void aio_dispatch(AioContext *ctx)
56d2c3c6 379{
a153bf52 380 qemu_lockcnt_inc(&ctx->list_lock);
bd451435 381 aio_bh_poll(ctx);
a153bf52 382 aio_dispatch_handlers(ctx);
4749079c 383 aio_free_deleted_handlers(ctx);
a153bf52 384 qemu_lockcnt_dec(&ctx->list_lock);
438e1f47 385
a153bf52 386 timerlistgroup_run_timers(&ctx->tlg);
d0c8d2c0
SH
387}
388
d37d0e36
SH
389static bool run_poll_handlers_once(AioContext *ctx,
390 int64_t now,
391 int64_t *timeout)
684e508c
SH
392{
393 bool progress = false;
394 AioHandler *node;
d37d0e36 395 AioHandler *tmp;
684e508c 396
d37d0e36
SH
397 QLIST_FOREACH_SAFE(node, &ctx->poll_aio_handlers, node_poll, tmp) {
398 if (aio_node_check(ctx, node->is_external) &&
cfeb35d6 399 node->io_poll(node->opaque)) {
d37d0e36
SH
400 node->poll_idle_timeout = now + POLL_IDLE_INTERVAL_NS;
401
993ed89f
PB
402 /*
403 * Polling was successful, exit try_poll_mode immediately
404 * to adjust the next polling time.
405 */
e30cffa0 406 *timeout = 0;
cfeb35d6
PB
407 if (node->opaque != &ctx->notifier) {
408 progress = true;
409 }
684e508c
SH
410 }
411
412 /* Caller handles freeing deleted nodes. Don't do it here. */
413 }
414
415 return progress;
416}
417
d37d0e36
SH
418static bool fdmon_supports_polling(AioContext *ctx)
419{
420 return ctx->fdmon_ops->need_wait != aio_poll_disabled;
421}
422
423static bool remove_idle_poll_handlers(AioContext *ctx, int64_t now)
424{
425 AioHandler *node;
426 AioHandler *tmp;
427 bool progress = false;
428
429 /*
430 * File descriptor monitoring implementations without userspace polling
431 * support suffer from starvation when a subset of handlers is polled
432 * because fds will not be processed in a timely fashion. Don't remove
433 * idle poll handlers.
434 */
435 if (!fdmon_supports_polling(ctx)) {
436 return false;
437 }
438
439 QLIST_FOREACH_SAFE(node, &ctx->poll_aio_handlers, node_poll, tmp) {
440 if (node->poll_idle_timeout == 0LL) {
441 node->poll_idle_timeout = now + POLL_IDLE_INTERVAL_NS;
442 } else if (now >= node->poll_idle_timeout) {
443 trace_poll_remove(ctx, node, node->pfd.fd);
444 node->poll_idle_timeout = 0LL;
445 QLIST_SAFE_REMOVE(node, node_poll);
446 if (ctx->poll_started && node->io_poll_end) {
447 node->io_poll_end(node->opaque);
448
449 /*
450 * Final poll in case ->io_poll_end() races with an event.
451 * Nevermind about re-adding the handler in the rare case where
452 * this causes progress.
453 */
454 progress = node->io_poll(node->opaque) || progress;
455 }
456 }
457 }
458
459 return progress;
460}
461
4a1cba38
SH
462/* run_poll_handlers:
463 * @ctx: the AioContext
464 * @max_ns: maximum time to poll for, in nanoseconds
465 *
466 * Polls for a given time.
467 *
2bbf11d7 468 * Note that the caller must have incremented ctx->list_lock.
4a1cba38
SH
469 *
470 * Returns: true if progress was made, false otherwise
471 */
e30cffa0 472static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
4a1cba38 473{
684e508c 474 bool progress;
e30cffa0 475 int64_t start_time, elapsed_time;
4a1cba38 476
2bbf11d7 477 assert(qemu_lockcnt_count(&ctx->list_lock) > 0);
4a1cba38 478
e30cffa0 479 trace_run_poll_handlers_begin(ctx, max_ns, *timeout);
4a1cba38 480
3aa221b3
SH
481 /*
482 * Optimization: ->io_poll() handlers often contain RCU read critical
483 * sections and we therefore see many rcu_read_lock() -> rcu_read_unlock()
484 * -> rcu_read_lock() -> ... sequences with expensive memory
485 * synchronization primitives. Make the entire polling loop an RCU
486 * critical section because nested rcu_read_lock()/rcu_read_unlock() calls
487 * are cheap.
488 */
489 RCU_READ_LOCK_GUARD();
490
e30cffa0 491 start_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
4a1cba38 492 do {
d37d0e36 493 progress = run_poll_handlers_once(ctx, start_time, timeout);
e30cffa0 494 elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time;
993ed89f
PB
495 max_ns = qemu_soonest_timeout(*timeout, max_ns);
496 assert(!(max_ns && progress));
aa38e19f 497 } while (elapsed_time < max_ns && !ctx->fdmon_ops->need_wait(ctx));
4a1cba38 498
d37d0e36
SH
499 if (remove_idle_poll_handlers(ctx, start_time + elapsed_time)) {
500 *timeout = 0;
501 progress = true;
502 }
503
e30cffa0
PB
504 /* If time has passed with no successful polling, adjust *timeout to
505 * keep the same ending time.
506 */
507 if (*timeout != -1) {
508 *timeout -= MIN(*timeout, elapsed_time);
509 }
4a1cba38 510
e30cffa0 511 trace_run_poll_handlers_end(ctx, progress, *timeout);
4a1cba38
SH
512 return progress;
513}
514
515/* try_poll_mode:
516 * @ctx: the AioContext
e30cffa0
PB
517 * @timeout: timeout for blocking wait, computed by the caller and updated if
518 * polling succeeds.
4a1cba38 519 *
2bbf11d7 520 * Note that the caller must have incremented ctx->list_lock.
4a1cba38
SH
521 *
522 * Returns: true if progress was made, false otherwise
523 */
e30cffa0 524static bool try_poll_mode(AioContext *ctx, int64_t *timeout)
4a1cba38 525{
d37d0e36
SH
526 int64_t max_ns;
527
528 if (QLIST_EMPTY_RCU(&ctx->poll_aio_handlers)) {
529 return false;
530 }
4a1cba38 531
d37d0e36 532 max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns);
aa38e19f 533 if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) {
e30cffa0 534 poll_set_started(ctx, true);
684e508c 535
e30cffa0
PB
536 if (run_poll_handlers(ctx, max_ns, timeout)) {
537 return true;
4a1cba38
SH
538 }
539 }
540
e4346192
SH
541 if (poll_set_started(ctx, false)) {
542 *timeout = 0;
543 return true;
544 }
684e508c 545
e4346192 546 return false;
4a1cba38
SH
547}
548
d0c8d2c0
SH
549bool aio_poll(AioContext *ctx, bool blocking)
550{
7391d34c 551 AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
4a1cba38 552 int ret = 0;
164a101f 553 bool progress;
44277bf9 554 bool use_notify_me;
e98ab097 555 int64_t timeout;
82a41186 556 int64_t start = 0;
d0c8d2c0 557
5710a3e0
PB
558 /*
559 * There cannot be two concurrent aio_poll calls for the same AioContext (or
560 * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
561 * We rely on this below to avoid slow locked accesses to ctx->notify_me.
9ce44e2c
KW
562 *
563 * aio_poll() may only be called in the AioContext's thread. iohandler_ctx
564 * is special in that it runs in the main thread, but that thread's context
565 * is qemu_aio_context.
5710a3e0 566 */
9ce44e2c
KW
567 assert(in_aio_context_home_thread(ctx == iohandler_get_aio_context() ?
568 qemu_get_aio_context() : ctx));
0dc165c1 569
44277bf9
SH
570 qemu_lockcnt_inc(&ctx->list_lock);
571
572 if (ctx->poll_max_ns) {
573 start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
574 }
575
576 timeout = blocking ? aio_compute_timeout(ctx) : 0;
577 progress = try_poll_mode(ctx, &timeout);
578 assert(!(timeout && progress));
579
580 /*
581 * aio_notify can avoid the expensive event_notifier_set if
0ceb849b 582 * everything (file descriptors, bottom halves, timers) will
e4c7e2d1
PB
583 * be re-evaluated before the next blocking poll(). This is
584 * already true when aio_poll is called with blocking == false;
eabc9779
PB
585 * if blocking == true, it is only true after poll() returns,
586 * so disable the optimization now.
0ceb849b 587 */
44277bf9
SH
588 use_notify_me = timeout != 0;
589 if (use_notify_me) {
d73415a3 590 qatomic_set(&ctx->notify_me, qatomic_read(&ctx->notify_me) + 2);
5710a3e0 591 /*
44277bf9 592 * Write ctx->notify_me before reading ctx->notified. Pairs with
5710a3e0
PB
593 * smp_mb in aio_notify().
594 */
595 smp_mb();
a76bab49 596
44277bf9 597 /* Don't block if aio_notify() was called */
d73415a3 598 if (qatomic_read(&ctx->notified)) {
44277bf9
SH
599 timeout = 0;
600 }
82a41186
SH
601 }
602
e30cffa0
PB
603 /* If polling is allowed, non-blocking aio_poll does not need the
604 * system call---a single round of run_poll_handlers_once suffices.
605 */
aa38e19f 606 if (timeout || ctx->fdmon_ops->need_wait(ctx)) {
1f050a46 607 ret = ctx->fdmon_ops->wait(ctx, &ready_list, timeout);
fbe3fc5c 608 }
4a1cba38 609
44277bf9 610 if (use_notify_me) {
5710a3e0 611 /* Finish the poll before clearing the flag. */
d73415a3
SH
612 qatomic_store_release(&ctx->notify_me,
613 qatomic_read(&ctx->notify_me) - 2);
eabc9779 614 }
9eb0bfca 615
44277bf9
SH
616 aio_notify_accept(ctx);
617
82a41186
SH
618 /* Adjust polling time */
619 if (ctx->poll_max_ns) {
620 int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
621
622 if (block_ns <= ctx->poll_ns) {
623 /* This is the sweet spot, no adjustment needed */
624 } else if (block_ns > ctx->poll_max_ns) {
625 /* We'd have to poll for too long, poll less */
626 int64_t old = ctx->poll_ns;
627
628 if (ctx->poll_shrink) {
629 ctx->poll_ns /= ctx->poll_shrink;
630 } else {
631 ctx->poll_ns = 0;
632 }
633
634 trace_poll_shrink(ctx, old, ctx->poll_ns);
635 } else if (ctx->poll_ns < ctx->poll_max_ns &&
636 block_ns < ctx->poll_max_ns) {
637 /* There is room to grow, poll longer */
638 int64_t old = ctx->poll_ns;
639 int64_t grow = ctx->poll_grow;
640
641 if (grow == 0) {
642 grow = 2;
643 }
644
645 if (ctx->poll_ns) {
646 ctx->poll_ns *= grow;
647 } else {
648 ctx->poll_ns = 4000; /* start polling at 4 microseconds */
649 }
650
651 if (ctx->poll_ns > ctx->poll_max_ns) {
652 ctx->poll_ns = ctx->poll_max_ns;
653 }
654
655 trace_poll_grow(ctx, old, ctx->poll_ns);
656 }
657 }
658
a153bf52
PB
659 progress |= aio_bh_poll(ctx);
660
661 if (ret > 0) {
7391d34c 662 progress |= aio_dispatch_ready_handlers(ctx, &ready_list);
9eb0bfca 663 }
bcdc1857 664
4749079c
SH
665 aio_free_deleted_handlers(ctx);
666
bd451435
PB
667 qemu_lockcnt_dec(&ctx->list_lock);
668
a153bf52
PB
669 progress |= timerlistgroup_run_timers(&ctx->tlg);
670
164a101f 671 return progress;
a76bab49 672}
37fcee5d 673
7e003465 674void aio_context_setup(AioContext *ctx)
37fcee5d 675{
1f050a46
SH
676 ctx->fdmon_ops = &fdmon_poll_ops;
677 ctx->epollfd = -1;
678
73fd282e
SH
679 /* Use the fastest fd monitoring implementation if available */
680 if (fdmon_io_uring_setup(ctx)) {
681 return;
682 }
683
1f050a46 684 fdmon_epoll_setup(ctx);
37fcee5d 685}
4a1cba38 686
cd0a6d2b
JW
687void aio_context_destroy(AioContext *ctx)
688{
73fd282e 689 fdmon_io_uring_destroy(ctx);
1f050a46 690 fdmon_epoll_disable(ctx);
de137e44 691 aio_free_deleted_handlers(ctx);
cd0a6d2b
JW
692}
693
ba607ca8
SH
694void aio_context_use_g_source(AioContext *ctx)
695{
696 /*
697 * Disable io_uring when the glib main loop is used because it doesn't
698 * support mixed glib/aio_poll() usage. It relies on aio_poll() being
699 * called regularly so that changes to the monitored file descriptors are
700 * submitted, otherwise a list of pending fd handlers builds up.
701 */
702 fdmon_io_uring_destroy(ctx);
703 aio_free_deleted_handlers(ctx);
704}
705
82a41186
SH
706void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
707 int64_t grow, int64_t shrink, Error **errp)
4a1cba38 708{
82a41186
SH
709 /* No thread synchronization here, it doesn't matter if an incorrect value
710 * is used once.
4a1cba38
SH
711 */
712 ctx->poll_max_ns = max_ns;
82a41186
SH
713 ctx->poll_ns = 0;
714 ctx->poll_grow = grow;
715 ctx->poll_shrink = shrink;
4a1cba38
SH
716
717 aio_notify(ctx);
718}