1 /* SPDX-License-Identifier: LGPL-2.1+ */
12 #include <sys/epoll.h>
20 lxc_log_define(mainloop
, lxc
);
22 #define CANCEL_RAISED (1 << 0)
23 #define CANCEL_RECEIVED (1 << 1)
24 #define CANCEL_SUCCESS (1 << 2)
26 struct mainloop_handler
{
27 struct lxc_list
*list
;
30 lxc_mainloop_callback_t callback
;
31 lxc_mainloop_cleanup_t cleanup
;
32 const char *handler_name
;
38 static int __io_uring_disarm(struct lxc_async_descr
*descr
,
39 struct mainloop_handler
*handler
);
41 static void delete_handler(struct lxc_async_descr
*descr
,
42 struct mainloop_handler
*handler
, bool oneshot
)
45 struct lxc_list
*list
;
47 if (descr
->type
== LXC_MAINLOOP_IO_URING
) {
49 * For a oneshot handler we don't have to do anything. If we
50 * end up here we know that an event for this handler has been
51 * generated before and since this is a oneshot handler it
52 * means that it has been deactivated. So the only thing we
53 * need to do is to call the registered cleanup handler and
54 * remove the handlerfrom the list.
57 ret
= __io_uring_disarm(descr
, handler
);
59 ret
= epoll_ctl(descr
->epfd
, EPOLL_CTL_DEL
, handler
->fd
, NULL
);
62 SYSWARN("Failed to delete \"%d\" for \"%s\"", handler
->fd
, handler
->handler_name
);
64 if (handler
->cleanup
) {
65 ret
= handler
->cleanup(handler
->fd
, handler
->data
);
67 SYSWARN("Failed to call cleanup \"%s\" handler", handler
->handler_name
);
70 list
= move_ptr(handler
->list
);
77 static inline int __lxc_mainloop_io_uring(struct lxc_async_descr
*descr
,
80 return ret_errno(ENOSYS
);
83 static int __io_uring_arm(struct lxc_async_descr
*descr
,
84 struct mainloop_handler
*handler
, bool oneshot
)
86 return ret_errno(ENOSYS
);
89 static int __io_uring_disarm(struct lxc_async_descr
*descr
,
90 struct mainloop_handler
*handler
)
92 return ret_errno(ENOSYS
);
95 static inline int __io_uring_open(struct lxc_async_descr
*descr
)
97 return ret_errno(ENOSYS
);
102 static inline int __io_uring_open(struct lxc_async_descr
*descr
)
105 *descr
= (struct lxc_async_descr
){
109 descr
->ring
= mmap(NULL
, sizeof(struct io_uring
), PROT_READ
| PROT_WRITE
,
110 MAP_SHARED
| MAP_POPULATE
| MAP_ANONYMOUS
, -1, 0);
111 if (descr
->ring
== MAP_FAILED
)
112 return syserror("Failed to mmap io_uring memory");
114 ret
= io_uring_queue_init(512, descr
->ring
, IORING_SETUP_SQPOLL
);
116 SYSERROR("Failed to initialize io_uring instance");
120 ret
= io_uring_ring_dontfork(descr
->ring
);
122 SYSERROR("Failed to prevent inheritance of io_uring mmaped region");
126 descr
->type
= LXC_MAINLOOP_IO_URING
;
127 TRACE("Created io-uring instance");
131 ret
= munmap(descr
->ring
, sizeof(struct io_uring
));
133 SYSWARN("Failed to unmap io_uring mmaped memory");
135 return ret_errno(ENOSYS
);
138 static int __io_uring_arm(struct lxc_async_descr
*descr
,
139 struct mainloop_handler
*handler
, bool oneshot
)
142 struct io_uring_sqe
*sqe
;
144 sqe
= io_uring_get_sqe(descr
->ring
);
146 return syserror_set(ENOENT
, "Failed to get submission queue entry");
148 io_uring_prep_poll_add(sqe
, handler
->fd
, EPOLLIN
);
151 * Raise IORING_POLL_ADD_MULTI to set up a multishot poll. The same sqe
152 * will now produce multiple cqes. A cqe produced from a multishot sqe
153 * will raise IORING_CQE_F_MORE in cqe->flags.
154 * Some devices can't be used with IORING_POLL_ADD_MULTI. This can only
155 * be detected at completion time. The IORING_CQE_F_MORE flag will not
156 * raised in cqe->flags. This includes terminal devices. So
157 * unfortunately we can't use multishot for them although we really
158 * would like to. But instead we will need to resubmit them. The
159 * io_uring based mainloop will deal cases whwere multishot doesn't
160 * work and resubmit the request. The handler just needs to inform the
161 * mainloop that it wants to keep the handler.
164 sqe
->len
|= IORING_POLL_ADD_MULTI
;
166 io_uring_sqe_set_data(sqe
, handler
);
167 ret
= io_uring_submit(descr
->ring
);
169 if (!oneshot
&& ret
== -EINVAL
) {
170 /* The kernel might not yet support multishot. */
171 sqe
->len
&= ~IORING_POLL_ADD_MULTI
;
172 ret
= io_uring_submit(descr
->ring
);
176 return syserror_ret(ret
, "Failed to add \"%s\" handler", handler
->handler_name
);
178 TRACE("Added \"%s\" handler", handler
->handler_name
);
182 static int __io_uring_disarm(struct lxc_async_descr
*descr
,
183 struct mainloop_handler
*handler
)
186 struct io_uring_sqe
*sqe
;
188 sqe
= io_uring_get_sqe(descr
->ring
);
190 return syserror_set(ENOENT
,
191 "Failed to get submission queue entry");
193 io_uring_prep_poll_remove(sqe
, handler
);
194 handler
->flags
|= CANCEL_RAISED
;
195 io_uring_sqe_set_data(sqe
, handler
);
196 ret
= io_uring_submit(descr
->ring
);
198 handler
->flags
&= ~CANCEL_RAISED
;
199 return syserror_ret(ret
, "Failed to remove \"%s\" handler",
200 handler
->handler_name
);
203 TRACE("Removed handler \"%s\"", handler
->handler_name
);
207 static void msec_to_ts(struct __kernel_timespec
*ts
, unsigned int timeout_ms
)
209 ts
->tv_sec
= timeout_ms
/ 1000;
210 ts
->tv_nsec
= (timeout_ms
% 1000) * 1000000;
213 static int __lxc_mainloop_io_uring(struct lxc_async_descr
*descr
, int timeout_ms
)
215 struct __kernel_timespec ts
;
218 msec_to_ts(&ts
, timeout_ms
);
223 bool oneshot
= false;
224 struct io_uring_cqe
*cqe
= NULL
;
225 struct mainloop_handler
*handler
= NULL
;
228 ret
= io_uring_wait_cqe_timeout(descr
->ring
, &cqe
, &ts
);
230 ret
= io_uring_wait_cqe(descr
->ring
, &cqe
);
238 return syserror_ret(ret
, "Failed to wait for completion");
241 ret
= LXC_MAINLOOP_CONTINUE
;
242 oneshot
= !(cqe
->flags
& IORING_CQE_F_MORE
);
244 handler
= io_uring_cqe_get_data(cqe
);
245 io_uring_cqe_seen(descr
->ring
, cqe
);
249 handler
->flags
|= CANCEL_RECEIVED
;
250 TRACE("Canceled \"%s\" handler", handler
->handler_name
);
253 handler
->flags
= CANCEL_SUCCESS
| CANCEL_RECEIVED
;
254 TRACE("No sqe for \"%s\" handler", handler
->handler_name
);
257 TRACE("Repeat sqe remove request for \"%s\" handler", handler
->handler_name
);
260 handler
->flags
|= CANCEL_SUCCESS
;
261 TRACE("Removed \"%s\" handler", handler
->handler_name
);
265 * We need to always remove the handler for a
266 * successful oneshot request.
269 handler
->flags
= CANCEL_SUCCESS
| CANCEL_RECEIVED
;
272 ret
= handler
->callback(handler
->fd
, mask
, handler
->data
, descr
);
274 case LXC_MAINLOOP_CONTINUE
:
275 /* We're operating in oneshot mode so we need to rearm. */
276 if (oneshot
&& __io_uring_arm(descr
, handler
, true))
279 case LXC_MAINLOOP_DISARM
:
280 if (has_exact_flags(handler
->flags
, (CANCEL_SUCCESS
| CANCEL_RECEIVED
)))
281 delete_handler(descr
, handler
, oneshot
);
283 case LXC_MAINLOOP_CLOSE
:
284 return log_trace(0, "Closing from \"%s\"", handler
->handler_name
);
285 case LXC_MAINLOOP_ERROR
:
286 return syserror_ret(-1, "Closing with error from \"%s\"", handler
->handler_name
);
290 if (lxc_list_empty(&descr
->handlers
))
291 return error_ret(0, "Closing because there are no more handlers");
296 static int __lxc_mainloop_epoll(struct lxc_async_descr
*descr
, int timeout_ms
)
299 struct mainloop_handler
*handler
;
300 struct epoll_event events
[MAX_EVENTS
];
303 nfds
= epoll_wait(descr
->epfd
, events
, MAX_EVENTS
, timeout_ms
);
311 for (i
= 0; i
< nfds
; i
++) {
312 handler
= events
[i
].data
.ptr
;
314 /* If the handler returns a positive value, exit the
317 ret
= handler
->callback(handler
->fd
, events
[i
].events
,
318 handler
->data
, descr
);
320 case LXC_MAINLOOP_DISARM
:
321 delete_handler(descr
, handler
, false);
323 case LXC_MAINLOOP_CONTINUE
:
325 case LXC_MAINLOOP_CLOSE
:
327 case LXC_MAINLOOP_ERROR
:
335 if (lxc_list_empty(&descr
->handlers
))
340 int lxc_mainloop(struct lxc_async_descr
*descr
, int timeout_ms
)
342 if (descr
->type
== LXC_MAINLOOP_IO_URING
)
343 return __lxc_mainloop_io_uring(descr
, timeout_ms
);
345 return __lxc_mainloop_epoll(descr
, timeout_ms
);
348 static int __lxc_mainloop_add_handler_events(struct lxc_async_descr
*descr
,
350 lxc_mainloop_callback_t callback
,
351 lxc_mainloop_cleanup_t cleanup
,
352 void *data
, bool oneshot
,
353 const char *handler_name
)
355 __do_free
struct mainloop_handler
*handler
= NULL
;
356 __do_free
struct lxc_list
*list
= NULL
;
358 struct epoll_event ev
;
361 return ret_errno(EBADF
);
363 if (!callback
|| !cleanup
|| !events
|| !handler_name
)
364 return ret_errno(EINVAL
);
366 handler
= zalloc(sizeof(*handler
));
368 return ret_errno(ENOMEM
);
370 handler
->callback
= callback
;
371 handler
->cleanup
= cleanup
;
373 handler
->data
= data
;
374 handler
->handler_name
= handler_name
;
376 if (descr
->type
== LXC_MAINLOOP_IO_URING
) {
377 ret
= __io_uring_arm(descr
, handler
, oneshot
);
380 ev
.data
.ptr
= handler
;
381 ret
= epoll_ctl(descr
->epfd
, EPOLL_CTL_ADD
, fd
, &ev
);
386 list
= lxc_list_new();
388 return ret_errno(ENOMEM
);
390 handler
->list
= list
;
391 lxc_list_add_elem(list
, move_ptr(handler
));;
392 lxc_list_add_tail(&descr
->handlers
, move_ptr(list
));
396 int lxc_mainloop_add_handler_events(struct lxc_async_descr
*descr
, int fd
,
398 lxc_mainloop_callback_t callback
,
399 lxc_mainloop_cleanup_t cleanup
,
400 void *data
, const char *handler_name
)
402 return __lxc_mainloop_add_handler_events(descr
, fd
, events
,
404 data
, false, handler_name
);
407 int lxc_mainloop_add_handler(struct lxc_async_descr
*descr
, int fd
,
408 lxc_mainloop_callback_t callback
,
409 lxc_mainloop_cleanup_t cleanup
,
410 void *data
, const char *handler_name
)
412 return __lxc_mainloop_add_handler_events(descr
, fd
, EPOLLIN
,
414 data
, false, handler_name
);
417 int lxc_mainloop_add_oneshot_handler(struct lxc_async_descr
*descr
, int fd
,
418 lxc_mainloop_callback_t callback
,
419 lxc_mainloop_cleanup_t cleanup
,
420 void *data
, const char *handler_name
)
422 return __lxc_mainloop_add_handler_events(descr
, fd
, EPOLLIN
,
424 data
, true, handler_name
);
427 int lxc_mainloop_del_handler(struct lxc_async_descr
*descr
, int fd
)
430 struct lxc_list
*iterator
= NULL
;
432 lxc_list_for_each(iterator
, &descr
->handlers
) {
433 struct mainloop_handler
*handler
= iterator
->elem
;
435 if (handler
->fd
!= fd
)
438 if (descr
->type
== LXC_MAINLOOP_IO_URING
)
439 ret
= __io_uring_disarm(descr
, handler
);
441 ret
= epoll_ctl(descr
->epfd
, EPOLL_CTL_DEL
, fd
, NULL
);
443 return syserror("Failed to disarm \"%s\"", handler
->handler_name
);
446 * For io_uring the deletion happens at completion time. Either
447 * we get ENOENT if the request was oneshot and it had already
448 * triggered or we get ECANCELED for the original sqe and 0 for
449 * the cancellation request.
451 if (descr
->type
== LXC_MAINLOOP_EPOLL
) {
452 lxc_list_del(iterator
);
453 free(iterator
->elem
);
460 return ret_errno(EINVAL
);
463 static inline int __epoll_open(struct lxc_async_descr
*descr
)
465 *descr
= (struct lxc_async_descr
){
469 descr
->epfd
= epoll_create1(EPOLL_CLOEXEC
);
471 return syserror("Failed to create epoll instance");
473 descr
->type
= LXC_MAINLOOP_EPOLL
;
474 TRACE("Created epoll instance");
478 int lxc_mainloop_open(struct lxc_async_descr
*descr
)
482 ret
= __io_uring_open(descr
);
484 ret
= __epoll_open(descr
);
486 return syserror("Failed to create mainloop instance");
488 lxc_list_init(&descr
->handlers
);
492 void lxc_mainloop_close(struct lxc_async_descr
*descr
)
494 struct lxc_list
*iterator
, *next
;
496 iterator
= descr
->handlers
.next
;
497 while (iterator
!= &descr
->handlers
) {
498 next
= iterator
->next
;
500 lxc_list_del(iterator
);
501 free(iterator
->elem
);
506 if (descr
->type
== LXC_MAINLOOP_IO_URING
) {
508 io_uring_queue_exit(descr
->ring
);
509 munmap(descr
->ring
, sizeof(struct io_uring
));
511 ERROR("Unsupported io_uring mainloop");
514 close_prot_errno_disarm(descr
->epfd
);