]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/mainloop.c
Merge pull request #3899 from denisfa/master
[mirror_lxc.git] / src / lxc / mainloop.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE 1
5 #endif
6 #include <errno.h>
7 #include <fcntl.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <poll.h>
12 #include <sys/epoll.h>
13 #include <unistd.h>
14
15 #include "config.h"
16 #include "log.h"
17 #include "macro.h"
18 #include "mainloop.h"
19
20 lxc_log_define(mainloop, lxc);
21
22 #define CANCEL_RECEIVED (1 << 0)
23 #define CANCEL_SUCCESS (1 << 1)
24
25 struct mainloop_handler {
26 struct lxc_list *list;
27 int fd;
28 void *data;
29 lxc_mainloop_callback_t callback;
30 lxc_mainloop_cleanup_t cleanup;
31 const char *name;
32 unsigned int flags;
33 };
34
35 #define MAX_EVENTS 10
36
37 static int __io_uring_disarm(struct lxc_async_descr *descr,
38 struct mainloop_handler *handler);
39
40 static int disarm_handler(struct lxc_async_descr *descr,
41 struct mainloop_handler *handler, bool oneshot)
42 {
43 int ret = 0;
44
45 if (descr->type == LXC_MAINLOOP_IO_URING) {
46 /*
47 * For a oneshot handler we don't have to do anything. If we
48 * end up here we know that an event for this handler has been
49 * generated before and since this is a oneshot handler it
50 * means that it has been deactivated. So the only thing we
51 * need to do is to call the registered cleanup handler and
52 * remove the handler from the list.
53 */
54 if (!oneshot)
55 ret = __io_uring_disarm(descr, handler);
56 } else {
57 ret = epoll_ctl(descr->epfd, EPOLL_CTL_DEL, handler->fd, NULL);
58 }
59 if (ret < 0)
60 return syswarn_ret(-1, "Failed to disarm %d for \"%s\" handler",
61 handler->fd, handler->name);
62
63 TRACE("Disarmed %d for \"%s\" handler", handler->fd, handler->name);
64 return 0;
65 }
66
67 static void delete_handler(struct mainloop_handler *handler)
68 {
69 struct lxc_list *list;
70
71 if (handler->cleanup) {
72 int ret;
73
74 ret = handler->cleanup(handler->fd, handler->data);
75 if (ret < 0)
76 SYSWARN("Failed to cleanup %d for \"%s\" handler", handler->fd, handler->name);
77 }
78
79 TRACE("Deleted %d for \"%s\" handler", handler->fd, handler->name);
80 list = move_ptr(handler->list);
81 lxc_list_del(list);
82 free(list->elem);
83 free(list);
84 }
85
86 static inline void cleanup_handler(struct lxc_async_descr *descr,
87 struct mainloop_handler *handler, bool oneshot)
88 {
89 if (disarm_handler(descr, handler, oneshot) == 0)
90 delete_handler(handler);
91 }
92
93 #ifndef HAVE_LIBURING
94 static inline int __lxc_mainloop_io_uring(struct lxc_async_descr *descr,
95 int timeout_ms)
96 {
97 return ret_errno(ENOSYS);
98 }
99
100 static int __io_uring_arm(struct lxc_async_descr *descr,
101 struct mainloop_handler *handler, bool oneshot)
102 {
103 return ret_errno(ENOSYS);
104 }
105
106 static int __io_uring_disarm(struct lxc_async_descr *descr,
107 struct mainloop_handler *handler)
108 {
109 return ret_errno(ENOSYS);
110 }
111
112 static inline int __io_uring_open(struct lxc_async_descr *descr)
113 {
114 return ret_errno(ENOSYS);
115 }
116
117 #else
118
119 static inline int __io_uring_open(struct lxc_async_descr *descr)
120 {
121 int ret;
122 *descr = (struct lxc_async_descr){
123 .epfd = -EBADF,
124 };
125
126 descr->ring = mmap(NULL, sizeof(struct io_uring), PROT_READ | PROT_WRITE,
127 MAP_SHARED | MAP_POPULATE | MAP_ANONYMOUS, -1, 0);
128 if (descr->ring == MAP_FAILED)
129 return syserror("Failed to mmap io_uring memory");
130
131 ret = io_uring_queue_init(512, descr->ring, 0);
132 if (ret) {
133 SYSERROR("Failed to initialize io_uring instance");
134 goto on_error;
135 }
136
137 ret = io_uring_ring_dontfork(descr->ring);
138 if (ret) {
139 SYSERROR("Failed to prevent inheritance of io_uring mmaped region");
140 goto on_error;
141 }
142
143 descr->type = LXC_MAINLOOP_IO_URING;
144 TRACE("Created io-uring instance");
145 return 0;
146
147 on_error:
148 ret = munmap(descr->ring, sizeof(struct io_uring));
149 if (ret < 0)
150 SYSWARN("Failed to unmap io_uring mmaped memory");
151
152 return ret_errno(ENOSYS);
153 }
154
155 static int __io_uring_arm(struct lxc_async_descr *descr,
156 struct mainloop_handler *handler, bool oneshot)
157 {
158 int ret;
159 struct io_uring_sqe *sqe;
160
161 sqe = io_uring_get_sqe(descr->ring);
162 if (!sqe)
163 return syserror_set(ENOENT, "Failed to get submission queue entry");
164
165 io_uring_prep_poll_add(sqe, handler->fd, EPOLLIN);
166
167 /*
168 * Raise IORING_POLL_ADD_MULTI to set up a multishot poll. The same sqe
169 * will now produce multiple cqes. A cqe produced from a multishot sqe
170 * will raise IORING_CQE_F_MORE in cqe->flags.
171 * Some devices can't be used with IORING_POLL_ADD_MULTI. This can only
172 * be detected at completion time. The IORING_CQE_F_MORE flag will not
173 * raised in cqe->flags. This includes terminal devices. So
174 * unfortunately we can't use multishot for them although we really
175 * would like to. But instead we will need to resubmit them. The
176 * io_uring based mainloop will deal cases whwere multishot doesn't
177 * work and resubmit the request. The handler just needs to inform the
178 * mainloop that it wants to keep the handler.
179 */
180 if (!oneshot)
181 sqe->len |= IORING_POLL_ADD_MULTI;
182
183 io_uring_sqe_set_data(sqe, handler);
184 ret = io_uring_submit(descr->ring);
185 if (ret < 0) {
186 if (!oneshot && ret == -EINVAL) {
187 /* The kernel might not yet support multishot. */
188 sqe->len &= ~IORING_POLL_ADD_MULTI;
189 ret = io_uring_submit(descr->ring);
190 }
191 }
192 if (ret < 0)
193 return syserror_ret(ret, "Failed to add \"%s\" handler", handler->name);
194
195 TRACE("Added \"%s\" handler", handler->name);
196 return 0;
197 }
198
199 static int __io_uring_disarm(struct lxc_async_descr *descr,
200 struct mainloop_handler *handler)
201 {
202 int ret;
203 struct io_uring_sqe *sqe;
204
205 sqe = io_uring_get_sqe(descr->ring);
206 if (!sqe)
207 return syserror_set(ENOENT,
208 "Failed to get submission queue entry");
209
210 io_uring_prep_poll_remove(sqe, handler);
211 io_uring_sqe_set_data(sqe, handler);
212 ret = io_uring_submit(descr->ring);
213 if (ret < 0)
214 return syserror_ret(ret, "Failed to remove \"%s\" handler",
215 handler->name);
216
217 TRACE("Removed handler \"%s\"", handler->name);
218 return ret;
219 }
220
221 static void msec_to_ts(struct __kernel_timespec *ts, unsigned int timeout_ms)
222 {
223 ts->tv_sec = timeout_ms / 1000;
224 ts->tv_nsec = (timeout_ms % 1000) * 1000000;
225 }
226
227 static int __lxc_mainloop_io_uring(struct lxc_async_descr *descr, int timeout_ms)
228 {
229 struct __kernel_timespec ts;
230
231 if (timeout_ms >= 0)
232 msec_to_ts(&ts, timeout_ms);
233
234 for (;;) {
235 int ret;
236 __s32 res = 0;
237 bool oneshot = false;
238 struct io_uring_cqe *cqe = NULL;
239 struct mainloop_handler *handler = NULL;
240
241 if (timeout_ms >= 0)
242 ret = io_uring_wait_cqe_timeout(descr->ring, &cqe, &ts);
243 else
244 ret = io_uring_wait_cqe(descr->ring, &cqe);
245 if (ret < 0) {
246 if (ret == -EINTR)
247 continue;
248
249 if (ret == -ETIME)
250 return 0;
251
252 return syserror_ret(ret, "Failed to wait for completion");
253 }
254
255 ret = LXC_MAINLOOP_CONTINUE;
256 oneshot = !(cqe->flags & IORING_CQE_F_MORE);
257 res = cqe->res;
258 handler = io_uring_cqe_get_data(cqe);
259 io_uring_cqe_seen(descr->ring, cqe);
260
261 if (res <= 0) {
262 switch (res) {
263 case 0:
264 TRACE("Removed \"%s\" handler", handler->name);
265 handler->flags |= CANCEL_SUCCESS;
266 if (has_exact_flags(handler->flags, (CANCEL_SUCCESS | CANCEL_RECEIVED)))
267 delete_handler(handler);
268 break;
269 case -EALREADY:
270 TRACE("Repeat sqe remove request for \"%s\" handler", handler->name);
271 break;
272 case -ECANCELED:
273 TRACE("Canceled \"%s\" handler", handler->name);
274 handler->flags |= CANCEL_RECEIVED;
275 if (has_exact_flags(handler->flags, (CANCEL_SUCCESS | CANCEL_RECEIVED)))
276 delete_handler(handler);
277 break;
278 case -ENOENT:
279 TRACE("No sqe for \"%s\" handler", handler->name);
280 break;
281 default:
282 WARN("Received unexpected return value %d in cqe for \"%s\" handler",
283 res, handler->name);
284 break;
285 }
286 } else {
287 ret = handler->callback(handler->fd, res, handler->data, descr);
288 switch (ret) {
289 case LXC_MAINLOOP_CONTINUE:
290 /* We're operating in oneshot mode so we need to rearm. */
291 if (oneshot && __io_uring_arm(descr, handler, true))
292 return -1;
293 break;
294 case LXC_MAINLOOP_DISARM:
295 /*
296 * If this is a multhishot handler we need to
297 * disarm it here. Actual cleanup happens
298 * later.
299 */
300 disarm_handler(descr, handler, oneshot);
301 /*
302 * If this is a oneshot handler we know it has
303 * just run and we also know the above call was
304 * a nop. So clean it up directly.
305 */
306 if (oneshot)
307 delete_handler(handler);
308 break;
309 case LXC_MAINLOOP_CLOSE:
310 return log_trace(0, "Closing from \"%s\"", handler->name);
311 case LXC_MAINLOOP_ERROR:
312 return syserror_ret(-1, "Closing with error from \"%s\"", handler->name);
313 default:
314 WARN("Received unexpected return value %d from \"%s\" handler",
315 ret, handler->name);
316 break;
317 }
318 }
319
320 if (lxc_list_empty(&descr->handlers))
321 return error_ret(0, "Closing because there are no more handlers");
322 }
323 }
324 #endif
325
326 static int __lxc_mainloop_epoll(struct lxc_async_descr *descr, int timeout_ms)
327 {
328 for (;;) {
329 int nfds;
330 struct epoll_event events[MAX_EVENTS];
331
332 nfds = epoll_wait(descr->epfd, events, MAX_EVENTS, timeout_ms);
333 if (nfds < 0) {
334 if (errno == EINTR)
335 continue;
336
337 return -errno;
338 }
339
340 for (int i = 0; i < nfds; i++) {
341 int ret;
342 struct mainloop_handler *handler = events[i].data.ptr;
343
344 /* If the handler returns a positive value, exit the
345 * mainloop.
346 */
347 ret = handler->callback(handler->fd, events[i].events,
348 handler->data, descr);
349 switch (ret) {
350 case LXC_MAINLOOP_DISARM:
351 cleanup_handler(descr, handler, false);
352 __fallthrough;
353 case LXC_MAINLOOP_CONTINUE:
354 break;
355 case LXC_MAINLOOP_CLOSE:
356 return 0;
357 case LXC_MAINLOOP_ERROR:
358 return -1;
359 }
360 }
361
362 if (nfds == 0)
363 return 0;
364
365 if (lxc_list_empty(&descr->handlers))
366 return 0;
367 }
368 }
369
370 int lxc_mainloop(struct lxc_async_descr *descr, int timeout_ms)
371 {
372 if (descr->type == LXC_MAINLOOP_IO_URING)
373 return __lxc_mainloop_io_uring(descr, timeout_ms);
374
375 return __lxc_mainloop_epoll(descr, timeout_ms);
376 }
377
378 static int __lxc_mainloop_add_handler_events(struct lxc_async_descr *descr,
379 int fd, int events,
380 lxc_mainloop_callback_t callback,
381 lxc_mainloop_cleanup_t cleanup,
382 void *data, bool oneshot,
383 const char *name)
384 {
385 __do_free struct mainloop_handler *handler = NULL;
386 __do_free struct lxc_list *list = NULL;
387 int ret;
388 struct epoll_event ev;
389
390 if (fd < 0)
391 return ret_errno(EBADF);
392
393 if (!callback || !cleanup || !events || !name)
394 return ret_errno(EINVAL);
395
396 handler = zalloc(sizeof(*handler));
397 if (!handler)
398 return ret_errno(ENOMEM);
399
400 handler->callback = callback;
401 handler->cleanup = cleanup;
402 handler->fd = fd;
403 handler->data = data;
404 handler->name = name;
405
406 if (descr->type == LXC_MAINLOOP_IO_URING) {
407 ret = __io_uring_arm(descr, handler, oneshot);
408 } else {
409 ev.events = events;
410 ev.data.ptr = handler;
411 ret = epoll_ctl(descr->epfd, EPOLL_CTL_ADD, fd, &ev);
412 }
413 if (ret < 0)
414 return -errno;
415
416 list = lxc_list_new();
417 if (!list)
418 return ret_errno(ENOMEM);
419
420 handler->list = list;
421 lxc_list_add_elem(list, move_ptr(handler));;
422 lxc_list_add_tail(&descr->handlers, move_ptr(list));
423 return 0;
424 }
425
426 int lxc_mainloop_add_handler_events(struct lxc_async_descr *descr, int fd,
427 int events,
428 lxc_mainloop_callback_t callback,
429 lxc_mainloop_cleanup_t cleanup,
430 void *data, const char *name)
431 {
432 return __lxc_mainloop_add_handler_events(descr, fd, events,
433 callback, cleanup,
434 data, false, name);
435 }
436
437 int lxc_mainloop_add_handler(struct lxc_async_descr *descr, int fd,
438 lxc_mainloop_callback_t callback,
439 lxc_mainloop_cleanup_t cleanup,
440 void *data, const char *name)
441 {
442 return __lxc_mainloop_add_handler_events(descr, fd, EPOLLIN,
443 callback, cleanup,
444 data, false, name);
445 }
446
447 int lxc_mainloop_add_oneshot_handler(struct lxc_async_descr *descr, int fd,
448 lxc_mainloop_callback_t callback,
449 lxc_mainloop_cleanup_t cleanup,
450 void *data, const char *name)
451 {
452 return __lxc_mainloop_add_handler_events(descr, fd, EPOLLIN,
453 callback, cleanup,
454 data, true, name);
455 }
456
457 int lxc_mainloop_del_handler(struct lxc_async_descr *descr, int fd)
458 {
459 int ret;
460 struct lxc_list *iterator = NULL;
461
462 lxc_list_for_each(iterator, &descr->handlers) {
463 struct mainloop_handler *handler = iterator->elem;
464
465 if (handler->fd != fd)
466 continue;
467
468 if (descr->type == LXC_MAINLOOP_IO_URING)
469 ret = __io_uring_disarm(descr, handler);
470 else
471 ret = epoll_ctl(descr->epfd, EPOLL_CTL_DEL, fd, NULL);
472 if (ret < 0)
473 return syserror("Failed to disarm \"%s\"", handler->name);
474
475 /*
476 * For io_uring the deletion happens at completion time. Either
477 * we get ENOENT if the request was oneshot and it had already
478 * triggered or we get ECANCELED for the original sqe and 0 for
479 * the cancellation request.
480 */
481 if (descr->type == LXC_MAINLOOP_EPOLL) {
482 lxc_list_del(iterator);
483 free(iterator->elem);
484 free(iterator);
485 }
486
487 return 0;
488 }
489
490 return ret_errno(EINVAL);
491 }
492
493 static inline int __epoll_open(struct lxc_async_descr *descr)
494 {
495 *descr = (struct lxc_async_descr){
496 .epfd = -EBADF,
497 };
498
499 descr->epfd = epoll_create1(EPOLL_CLOEXEC);
500 if (descr->epfd < 0)
501 return syserror("Failed to create epoll instance");
502
503 descr->type = LXC_MAINLOOP_EPOLL;
504 TRACE("Created epoll instance");
505 return 0;
506 }
507
508 int lxc_mainloop_open(struct lxc_async_descr *descr)
509 {
510 int ret;
511
512 ret = __io_uring_open(descr);
513 if (ret == -ENOSYS)
514 ret = __epoll_open(descr);
515 if (ret < 0)
516 return syserror("Failed to create mainloop instance");
517
518 lxc_list_init(&descr->handlers);
519 return 0;
520 }
521
522 void lxc_mainloop_close(struct lxc_async_descr *descr)
523 {
524 struct lxc_list *iterator, *next;
525
526 iterator = descr->handlers.next;
527 while (iterator != &descr->handlers) {
528 next = iterator->next;
529
530 lxc_list_del(iterator);
531 free(iterator->elem);
532 free(iterator);
533 iterator = next;
534 }
535
536 if (descr->type == LXC_MAINLOOP_IO_URING) {
537 #ifdef HAVE_LIBURING
538 io_uring_queue_exit(descr->ring);
539 munmap(descr->ring, sizeof(struct io_uring));
540 #else
541 ERROR("Unsupported io_uring mainloop");
542 #endif
543 } else {
544 close_prot_errno_disarm(descr->epfd);
545 }
546 }