1 /* SPDX-License-Identifier: LGPL-2.1+ */
12 #include <sys/epoll.h>
20 lxc_log_define(mainloop
, lxc
);
22 #define CANCEL_RECEIVED (1 << 0)
23 #define CANCEL_SUCCESS (1 << 1)
25 struct mainloop_handler
{
26 struct lxc_list
*list
;
29 lxc_mainloop_callback_t callback
;
30 lxc_mainloop_cleanup_t cleanup
;
31 const char *handler_name
;
37 static int __io_uring_disarm(struct lxc_async_descr
*descr
,
38 struct mainloop_handler
*handler
);
40 static int disarm_handler(struct lxc_async_descr
*descr
,
41 struct mainloop_handler
*handler
, bool oneshot
)
45 if (descr
->type
== LXC_MAINLOOP_IO_URING
) {
47 * For a oneshot handler we don't have to do anything. If we
48 * end up here we know that an event for this handler has been
49 * generated before and since this is a oneshot handler it
50 * means that it has been deactivated. So the only thing we
51 * need to do is to call the registered cleanup handler and
52 * remove the handler from the list.
55 ret
= __io_uring_disarm(descr
, handler
);
57 ret
= epoll_ctl(descr
->epfd
, EPOLL_CTL_DEL
, handler
->fd
, NULL
);
60 return syswarn_ret(-1, "Failed to disarm %d for \"%s\" handler",
61 handler
->fd
, handler
->handler_name
);
63 TRACE("Disarmed %d for \"%s\" handler", handler
->fd
, handler
->handler_name
);
67 static void delete_handler(struct mainloop_handler
*handler
)
69 struct lxc_list
*list
;
71 if (handler
->cleanup
) {
74 ret
= handler
->cleanup(handler
->fd
, handler
->data
);
76 SYSWARN("Failed to cleanup %d for \"%s\" handler", handler
->fd
, handler
->handler_name
);
79 TRACE("Deleted %d for \"%s\" handler", handler
->fd
, handler
->handler_name
);
80 list
= move_ptr(handler
->list
);
86 static inline void cleanup_handler(struct lxc_async_descr
*descr
,
87 struct mainloop_handler
*handler
, bool oneshot
)
89 if (disarm_handler(descr
, handler
, oneshot
) == 0)
90 delete_handler(handler
);
94 static inline int __lxc_mainloop_io_uring(struct lxc_async_descr
*descr
,
97 return ret_errno(ENOSYS
);
100 static int __io_uring_arm(struct lxc_async_descr
*descr
,
101 struct mainloop_handler
*handler
, bool oneshot
)
103 return ret_errno(ENOSYS
);
106 static int __io_uring_disarm(struct lxc_async_descr
*descr
,
107 struct mainloop_handler
*handler
)
109 return ret_errno(ENOSYS
);
112 static inline int __io_uring_open(struct lxc_async_descr
*descr
)
114 return ret_errno(ENOSYS
);
119 static inline int __io_uring_open(struct lxc_async_descr
*descr
)
122 *descr
= (struct lxc_async_descr
){
126 descr
->ring
= mmap(NULL
, sizeof(struct io_uring
), PROT_READ
| PROT_WRITE
,
127 MAP_SHARED
| MAP_POPULATE
| MAP_ANONYMOUS
, -1, 0);
128 if (descr
->ring
== MAP_FAILED
)
129 return syserror("Failed to mmap io_uring memory");
131 ret
= io_uring_queue_init(512, descr
->ring
, IORING_SETUP_SQPOLL
);
133 SYSERROR("Failed to initialize io_uring instance");
137 ret
= io_uring_ring_dontfork(descr
->ring
);
139 SYSERROR("Failed to prevent inheritance of io_uring mmaped region");
143 descr
->type
= LXC_MAINLOOP_IO_URING
;
144 TRACE("Created io-uring instance");
148 ret
= munmap(descr
->ring
, sizeof(struct io_uring
));
150 SYSWARN("Failed to unmap io_uring mmaped memory");
152 return ret_errno(ENOSYS
);
155 static int __io_uring_arm(struct lxc_async_descr
*descr
,
156 struct mainloop_handler
*handler
, bool oneshot
)
159 struct io_uring_sqe
*sqe
;
161 sqe
= io_uring_get_sqe(descr
->ring
);
163 return syserror_set(ENOENT
, "Failed to get submission queue entry");
165 io_uring_prep_poll_add(sqe
, handler
->fd
, EPOLLIN
);
168 * Raise IORING_POLL_ADD_MULTI to set up a multishot poll. The same sqe
169 * will now produce multiple cqes. A cqe produced from a multishot sqe
170 * will raise IORING_CQE_F_MORE in cqe->flags.
171 * Some devices can't be used with IORING_POLL_ADD_MULTI. This can only
172 * be detected at completion time. The IORING_CQE_F_MORE flag will not
173 * raised in cqe->flags. This includes terminal devices. So
174 * unfortunately we can't use multishot for them although we really
175 * would like to. But instead we will need to resubmit them. The
176 * io_uring based mainloop will deal cases whwere multishot doesn't
177 * work and resubmit the request. The handler just needs to inform the
178 * mainloop that it wants to keep the handler.
181 sqe
->len
|= IORING_POLL_ADD_MULTI
;
183 io_uring_sqe_set_data(sqe
, handler
);
184 ret
= io_uring_submit(descr
->ring
);
186 if (!oneshot
&& ret
== -EINVAL
) {
187 /* The kernel might not yet support multishot. */
188 sqe
->len
&= ~IORING_POLL_ADD_MULTI
;
189 ret
= io_uring_submit(descr
->ring
);
193 return syserror_ret(ret
, "Failed to add \"%s\" handler", handler
->handler_name
);
195 TRACE("Added \"%s\" handler", handler
->handler_name
);
199 static int __io_uring_disarm(struct lxc_async_descr
*descr
,
200 struct mainloop_handler
*handler
)
203 struct io_uring_sqe
*sqe
;
205 sqe
= io_uring_get_sqe(descr
->ring
);
207 return syserror_set(ENOENT
,
208 "Failed to get submission queue entry");
210 io_uring_prep_poll_remove(sqe
, handler
);
211 io_uring_sqe_set_data(sqe
, handler
);
212 ret
= io_uring_submit(descr
->ring
);
214 return syserror_ret(ret
, "Failed to remove \"%s\" handler",
215 handler
->handler_name
);
217 TRACE("Removed handler \"%s\"", handler
->handler_name
);
221 static void msec_to_ts(struct __kernel_timespec
*ts
, unsigned int timeout_ms
)
223 ts
->tv_sec
= timeout_ms
/ 1000;
224 ts
->tv_nsec
= (timeout_ms
% 1000) * 1000000;
227 static int __lxc_mainloop_io_uring(struct lxc_async_descr
*descr
, int timeout_ms
)
229 struct __kernel_timespec ts
;
232 msec_to_ts(&ts
, timeout_ms
);
237 bool oneshot
= false;
238 struct io_uring_cqe
*cqe
= NULL
;
239 struct mainloop_handler
*handler
= NULL
;
242 ret
= io_uring_wait_cqe_timeout(descr
->ring
, &cqe
, &ts
);
244 ret
= io_uring_wait_cqe(descr
->ring
, &cqe
);
252 return syserror_ret(ret
, "Failed to wait for completion");
255 ret
= LXC_MAINLOOP_CONTINUE
;
256 oneshot
= !(cqe
->flags
& IORING_CQE_F_MORE
);
258 handler
= io_uring_cqe_get_data(cqe
);
259 io_uring_cqe_seen(descr
->ring
, cqe
);
264 TRACE("Removed \"%s\" handler", handler
->handler_name
);
265 handler
->flags
|= CANCEL_SUCCESS
;
266 if (has_exact_flags(handler
->flags
, (CANCEL_SUCCESS
| CANCEL_RECEIVED
)))
267 delete_handler(handler
);
270 TRACE("Repeat sqe remove request for \"%s\" handler", handler
->handler_name
);
273 TRACE("Canceled \"%s\" handler", handler
->handler_name
);
274 handler
->flags
|= CANCEL_RECEIVED
;
275 if (has_exact_flags(handler
->flags
, (CANCEL_SUCCESS
| CANCEL_RECEIVED
)))
276 delete_handler(handler
);
279 TRACE("No sqe for \"%s\" handler", handler
->handler_name
);
282 WARN("Received unexpected return value %d in cqe for \"%s\" handler",
283 res
, handler
->handler_name
);
287 ret
= handler
->callback(handler
->fd
, res
, handler
->data
, descr
);
289 case LXC_MAINLOOP_CONTINUE
:
290 /* We're operating in oneshot mode so we need to rearm. */
291 if (oneshot
&& __io_uring_arm(descr
, handler
, true))
294 case LXC_MAINLOOP_DISARM
:
295 disarm_handler(descr
, handler
, oneshot
);
297 delete_handler(handler
);
299 case LXC_MAINLOOP_CLOSE
:
300 return log_trace(0, "Closing from \"%s\"", handler
->handler_name
);
301 case LXC_MAINLOOP_ERROR
:
302 return syserror_ret(-1, "Closing with error from \"%s\"", handler
->handler_name
);
304 WARN("Received unexpected return value %d from \"%s\" handler",
305 ret
, handler
->handler_name
);
310 if (lxc_list_empty(&descr
->handlers
))
311 return error_ret(0, "Closing because there are no more handlers");
316 static int __lxc_mainloop_epoll(struct lxc_async_descr
*descr
, int timeout_ms
)
320 struct epoll_event events
[MAX_EVENTS
];
322 nfds
= epoll_wait(descr
->epfd
, events
, MAX_EVENTS
, timeout_ms
);
330 for (int i
= 0; i
< nfds
; i
++) {
332 struct mainloop_handler
*handler
= events
[i
].data
.ptr
;
334 /* If the handler returns a positive value, exit the
337 ret
= handler
->callback(handler
->fd
, events
[i
].events
,
338 handler
->data
, descr
);
340 case LXC_MAINLOOP_DISARM
:
341 cleanup_handler(descr
, handler
, false);
343 case LXC_MAINLOOP_CONTINUE
:
345 case LXC_MAINLOOP_CLOSE
:
347 case LXC_MAINLOOP_ERROR
:
355 if (lxc_list_empty(&descr
->handlers
))
360 int lxc_mainloop(struct lxc_async_descr
*descr
, int timeout_ms
)
362 if (descr
->type
== LXC_MAINLOOP_IO_URING
)
363 return __lxc_mainloop_io_uring(descr
, timeout_ms
);
365 return __lxc_mainloop_epoll(descr
, timeout_ms
);
368 static int __lxc_mainloop_add_handler_events(struct lxc_async_descr
*descr
,
370 lxc_mainloop_callback_t callback
,
371 lxc_mainloop_cleanup_t cleanup
,
372 void *data
, bool oneshot
,
373 const char *handler_name
)
375 __do_free
struct mainloop_handler
*handler
= NULL
;
376 __do_free
struct lxc_list
*list
= NULL
;
378 struct epoll_event ev
;
381 return ret_errno(EBADF
);
383 if (!callback
|| !cleanup
|| !events
|| !handler_name
)
384 return ret_errno(EINVAL
);
386 handler
= zalloc(sizeof(*handler
));
388 return ret_errno(ENOMEM
);
390 handler
->callback
= callback
;
391 handler
->cleanup
= cleanup
;
393 handler
->data
= data
;
394 handler
->handler_name
= handler_name
;
396 if (descr
->type
== LXC_MAINLOOP_IO_URING
) {
397 ret
= __io_uring_arm(descr
, handler
, oneshot
);
400 ev
.data
.ptr
= handler
;
401 ret
= epoll_ctl(descr
->epfd
, EPOLL_CTL_ADD
, fd
, &ev
);
406 list
= lxc_list_new();
408 return ret_errno(ENOMEM
);
410 handler
->list
= list
;
411 lxc_list_add_elem(list
, move_ptr(handler
));;
412 lxc_list_add_tail(&descr
->handlers
, move_ptr(list
));
416 int lxc_mainloop_add_handler_events(struct lxc_async_descr
*descr
, int fd
,
418 lxc_mainloop_callback_t callback
,
419 lxc_mainloop_cleanup_t cleanup
,
420 void *data
, const char *handler_name
)
422 return __lxc_mainloop_add_handler_events(descr
, fd
, events
,
424 data
, false, handler_name
);
427 int lxc_mainloop_add_handler(struct lxc_async_descr
*descr
, int fd
,
428 lxc_mainloop_callback_t callback
,
429 lxc_mainloop_cleanup_t cleanup
,
430 void *data
, const char *handler_name
)
432 return __lxc_mainloop_add_handler_events(descr
, fd
, EPOLLIN
,
434 data
, false, handler_name
);
437 int lxc_mainloop_add_oneshot_handler(struct lxc_async_descr
*descr
, int fd
,
438 lxc_mainloop_callback_t callback
,
439 lxc_mainloop_cleanup_t cleanup
,
440 void *data
, const char *handler_name
)
442 return __lxc_mainloop_add_handler_events(descr
, fd
, EPOLLIN
,
444 data
, true, handler_name
);
447 int lxc_mainloop_del_handler(struct lxc_async_descr
*descr
, int fd
)
450 struct lxc_list
*iterator
= NULL
;
452 lxc_list_for_each(iterator
, &descr
->handlers
) {
453 struct mainloop_handler
*handler
= iterator
->elem
;
455 if (handler
->fd
!= fd
)
458 if (descr
->type
== LXC_MAINLOOP_IO_URING
)
459 ret
= __io_uring_disarm(descr
, handler
);
461 ret
= epoll_ctl(descr
->epfd
, EPOLL_CTL_DEL
, fd
, NULL
);
463 return syserror("Failed to disarm \"%s\"", handler
->handler_name
);
466 * For io_uring the deletion happens at completion time. Either
467 * we get ENOENT if the request was oneshot and it had already
468 * triggered or we get ECANCELED for the original sqe and 0 for
469 * the cancellation request.
471 if (descr
->type
== LXC_MAINLOOP_EPOLL
) {
472 lxc_list_del(iterator
);
473 free(iterator
->elem
);
480 return ret_errno(EINVAL
);
483 static inline int __epoll_open(struct lxc_async_descr
*descr
)
485 *descr
= (struct lxc_async_descr
){
489 descr
->epfd
= epoll_create1(EPOLL_CLOEXEC
);
491 return syserror("Failed to create epoll instance");
493 descr
->type
= LXC_MAINLOOP_EPOLL
;
494 TRACE("Created epoll instance");
498 int lxc_mainloop_open(struct lxc_async_descr
*descr
)
502 ret
= __io_uring_open(descr
);
504 ret
= __epoll_open(descr
);
506 return syserror("Failed to create mainloop instance");
508 lxc_list_init(&descr
->handlers
);
512 void lxc_mainloop_close(struct lxc_async_descr
*descr
)
514 struct lxc_list
*iterator
, *next
;
516 iterator
= descr
->handlers
.next
;
517 while (iterator
!= &descr
->handlers
) {
518 next
= iterator
->next
;
520 lxc_list_del(iterator
);
521 free(iterator
->elem
);
526 if (descr
->type
== LXC_MAINLOOP_IO_URING
) {
528 io_uring_queue_exit(descr
->ring
);
529 munmap(descr
->ring
, sizeof(struct io_uring
));
531 ERROR("Unsupported io_uring mainloop");
534 close_prot_errno_disarm(descr
->epfd
);