]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/mainloop.c
Merge pull request #3931 from brauner/2021-08-11.fixes
[mirror_lxc.git] / src / lxc / mainloop.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE 1
5 #endif
6 #include <errno.h>
7 #include <fcntl.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <poll.h>
12 #include <sys/epoll.h>
13 #include <unistd.h>
14
15 #include "config.h"
16 #include "log.h"
17 #include "macro.h"
18 #include "mainloop.h"
19
20 lxc_log_define(mainloop, lxc);
21
22 #define CANCEL_RECEIVED (1 << 0)
23 #define CANCEL_SUCCESS (1 << 1)
24
25 struct mainloop_handler {
26 struct lxc_list *list;
27 int fd;
28 void *data;
29 lxc_mainloop_callback_t callback;
30 lxc_mainloop_cleanup_t cleanup;
31 const char *handler_name;
32 unsigned int flags;
33 };
34
35 #define MAX_EVENTS 10
36
37 static int __io_uring_disarm(struct lxc_async_descr *descr,
38 struct mainloop_handler *handler);
39
40 static int disarm_handler(struct lxc_async_descr *descr,
41 struct mainloop_handler *handler, bool oneshot)
42 {
43 int ret = 0;
44
45 if (descr->type == LXC_MAINLOOP_IO_URING) {
46 /*
47 * For a oneshot handler we don't have to do anything. If we
48 * end up here we know that an event for this handler has been
49 * generated before and since this is a oneshot handler it
50 * means that it has been deactivated. So the only thing we
51 * need to do is to call the registered cleanup handler and
52 * remove the handler from the list.
53 */
54 if (!oneshot)
55 ret = __io_uring_disarm(descr, handler);
56 } else {
57 ret = epoll_ctl(descr->epfd, EPOLL_CTL_DEL, handler->fd, NULL);
58 }
59 if (ret < 0)
60 return syswarn_ret(-1, "Failed to disarm %d for \"%s\" handler",
61 handler->fd, handler->handler_name);
62
63 TRACE("Disarmed %d for \"%s\" handler", handler->fd, handler->handler_name);
64 return 0;
65 }
66
67 static void delete_handler(struct mainloop_handler *handler)
68 {
69 struct lxc_list *list;
70
71 if (handler->cleanup) {
72 int ret;
73
74 ret = handler->cleanup(handler->fd, handler->data);
75 if (ret < 0)
76 SYSWARN("Failed to cleanup %d for \"%s\" handler", handler->fd, handler->handler_name);
77 }
78
79 TRACE("Deleted %d for \"%s\" handler", handler->fd, handler->handler_name);
80 list = move_ptr(handler->list);
81 lxc_list_del(list);
82 free(list->elem);
83 free(list);
84 }
85
86 static inline void cleanup_handler(struct lxc_async_descr *descr,
87 struct mainloop_handler *handler, bool oneshot)
88 {
89 if (disarm_handler(descr, handler, oneshot) == 0)
90 delete_handler(handler);
91 }
92
93 #ifndef HAVE_LIBURING
94 static inline int __lxc_mainloop_io_uring(struct lxc_async_descr *descr,
95 int timeout_ms)
96 {
97 return ret_errno(ENOSYS);
98 }
99
100 static int __io_uring_arm(struct lxc_async_descr *descr,
101 struct mainloop_handler *handler, bool oneshot)
102 {
103 return ret_errno(ENOSYS);
104 }
105
106 static int __io_uring_disarm(struct lxc_async_descr *descr,
107 struct mainloop_handler *handler)
108 {
109 return ret_errno(ENOSYS);
110 }
111
112 static inline int __io_uring_open(struct lxc_async_descr *descr)
113 {
114 return ret_errno(ENOSYS);
115 }
116
117 #else
118
119 static inline int __io_uring_open(struct lxc_async_descr *descr)
120 {
121 int ret;
122 *descr = (struct lxc_async_descr){
123 .epfd = -EBADF,
124 };
125
126 descr->ring = mmap(NULL, sizeof(struct io_uring), PROT_READ | PROT_WRITE,
127 MAP_SHARED | MAP_POPULATE | MAP_ANONYMOUS, -1, 0);
128 if (descr->ring == MAP_FAILED)
129 return syserror("Failed to mmap io_uring memory");
130
131 ret = io_uring_queue_init(512, descr->ring, IORING_SETUP_SQPOLL);
132 if (ret) {
133 SYSERROR("Failed to initialize io_uring instance");
134 goto on_error;
135 }
136
137 ret = io_uring_ring_dontfork(descr->ring);
138 if (ret) {
139 SYSERROR("Failed to prevent inheritance of io_uring mmaped region");
140 goto on_error;
141 }
142
143 descr->type = LXC_MAINLOOP_IO_URING;
144 TRACE("Created io-uring instance");
145 return 0;
146
147 on_error:
148 ret = munmap(descr->ring, sizeof(struct io_uring));
149 if (ret < 0)
150 SYSWARN("Failed to unmap io_uring mmaped memory");
151
152 return ret_errno(ENOSYS);
153 }
154
155 static int __io_uring_arm(struct lxc_async_descr *descr,
156 struct mainloop_handler *handler, bool oneshot)
157 {
158 int ret;
159 struct io_uring_sqe *sqe;
160
161 sqe = io_uring_get_sqe(descr->ring);
162 if (!sqe)
163 return syserror_set(ENOENT, "Failed to get submission queue entry");
164
165 io_uring_prep_poll_add(sqe, handler->fd, EPOLLIN);
166
167 /*
168 * Raise IORING_POLL_ADD_MULTI to set up a multishot poll. The same sqe
169 * will now produce multiple cqes. A cqe produced from a multishot sqe
170 * will raise IORING_CQE_F_MORE in cqe->flags.
171 * Some devices can't be used with IORING_POLL_ADD_MULTI. This can only
172 * be detected at completion time. The IORING_CQE_F_MORE flag will not
173 * raised in cqe->flags. This includes terminal devices. So
174 * unfortunately we can't use multishot for them although we really
175 * would like to. But instead we will need to resubmit them. The
176 * io_uring based mainloop will deal cases whwere multishot doesn't
177 * work and resubmit the request. The handler just needs to inform the
178 * mainloop that it wants to keep the handler.
179 */
180 if (!oneshot)
181 sqe->len |= IORING_POLL_ADD_MULTI;
182
183 io_uring_sqe_set_data(sqe, handler);
184 ret = io_uring_submit(descr->ring);
185 if (ret < 0) {
186 if (!oneshot && ret == -EINVAL) {
187 /* The kernel might not yet support multishot. */
188 sqe->len &= ~IORING_POLL_ADD_MULTI;
189 ret = io_uring_submit(descr->ring);
190 }
191 }
192 if (ret < 0)
193 return syserror_ret(ret, "Failed to add \"%s\" handler", handler->handler_name);
194
195 TRACE("Added \"%s\" handler", handler->handler_name);
196 return 0;
197 }
198
199 static int __io_uring_disarm(struct lxc_async_descr *descr,
200 struct mainloop_handler *handler)
201 {
202 int ret;
203 struct io_uring_sqe *sqe;
204
205 sqe = io_uring_get_sqe(descr->ring);
206 if (!sqe)
207 return syserror_set(ENOENT,
208 "Failed to get submission queue entry");
209
210 io_uring_prep_poll_remove(sqe, handler);
211 io_uring_sqe_set_data(sqe, handler);
212 ret = io_uring_submit(descr->ring);
213 if (ret < 0)
214 return syserror_ret(ret, "Failed to remove \"%s\" handler",
215 handler->handler_name);
216
217 TRACE("Removed handler \"%s\"", handler->handler_name);
218 return ret;
219 }
220
221 static void msec_to_ts(struct __kernel_timespec *ts, unsigned int timeout_ms)
222 {
223 ts->tv_sec = timeout_ms / 1000;
224 ts->tv_nsec = (timeout_ms % 1000) * 1000000;
225 }
226
227 static int __lxc_mainloop_io_uring(struct lxc_async_descr *descr, int timeout_ms)
228 {
229 struct __kernel_timespec ts;
230
231 if (timeout_ms >= 0)
232 msec_to_ts(&ts, timeout_ms);
233
234 for (;;) {
235 int ret;
236 __s32 res = 0;
237 bool oneshot = false;
238 struct io_uring_cqe *cqe = NULL;
239 struct mainloop_handler *handler = NULL;
240
241 if (timeout_ms >= 0)
242 ret = io_uring_wait_cqe_timeout(descr->ring, &cqe, &ts);
243 else
244 ret = io_uring_wait_cqe(descr->ring, &cqe);
245 if (ret < 0) {
246 if (ret == -EINTR)
247 continue;
248
249 if (ret == -ETIME)
250 return 0;
251
252 return syserror_ret(ret, "Failed to wait for completion");
253 }
254
255 ret = LXC_MAINLOOP_CONTINUE;
256 oneshot = !(cqe->flags & IORING_CQE_F_MORE);
257 res = cqe->res;
258 handler = io_uring_cqe_get_data(cqe);
259 io_uring_cqe_seen(descr->ring, cqe);
260
261 if (res <= 0) {
262 switch (res) {
263 case 0:
264 TRACE("Removed \"%s\" handler", handler->handler_name);
265 handler->flags |= CANCEL_SUCCESS;
266 if (has_exact_flags(handler->flags, (CANCEL_SUCCESS | CANCEL_RECEIVED)))
267 delete_handler(handler);
268 break;
269 case -EALREADY:
270 TRACE("Repeat sqe remove request for \"%s\" handler", handler->handler_name);
271 break;
272 case -ECANCELED:
273 TRACE("Canceled \"%s\" handler", handler->handler_name);
274 handler->flags |= CANCEL_RECEIVED;
275 if (has_exact_flags(handler->flags, (CANCEL_SUCCESS | CANCEL_RECEIVED)))
276 delete_handler(handler);
277 break;
278 case -ENOENT:
279 TRACE("No sqe for \"%s\" handler", handler->handler_name);
280 break;
281 default:
282 WARN("Received unexpected return value %d in cqe for \"%s\" handler",
283 res, handler->handler_name);
284 break;
285 }
286 } else {
287 ret = handler->callback(handler->fd, res, handler->data, descr);
288 switch (ret) {
289 case LXC_MAINLOOP_CONTINUE:
290 /* We're operating in oneshot mode so we need to rearm. */
291 if (oneshot && __io_uring_arm(descr, handler, true))
292 return -1;
293 break;
294 case LXC_MAINLOOP_DISARM:
295 disarm_handler(descr, handler, oneshot);
296 if (oneshot)
297 delete_handler(handler);
298 break;
299 case LXC_MAINLOOP_CLOSE:
300 return log_trace(0, "Closing from \"%s\"", handler->handler_name);
301 case LXC_MAINLOOP_ERROR:
302 return syserror_ret(-1, "Closing with error from \"%s\"", handler->handler_name);
303 default:
304 WARN("Received unexpected return value %d from \"%s\" handler",
305 ret, handler->handler_name);
306 break;
307 }
308 }
309
310 if (lxc_list_empty(&descr->handlers))
311 return error_ret(0, "Closing because there are no more handlers");
312 }
313 }
314 #endif
315
316 static int __lxc_mainloop_epoll(struct lxc_async_descr *descr, int timeout_ms)
317 {
318 for (;;) {
319 int nfds;
320 struct epoll_event events[MAX_EVENTS];
321
322 nfds = epoll_wait(descr->epfd, events, MAX_EVENTS, timeout_ms);
323 if (nfds < 0) {
324 if (errno == EINTR)
325 continue;
326
327 return -errno;
328 }
329
330 for (int i = 0; i < nfds; i++) {
331 int ret;
332 struct mainloop_handler *handler = events[i].data.ptr;
333
334 /* If the handler returns a positive value, exit the
335 * mainloop.
336 */
337 ret = handler->callback(handler->fd, events[i].events,
338 handler->data, descr);
339 switch (ret) {
340 case LXC_MAINLOOP_DISARM:
341 cleanup_handler(descr, handler, false);
342 __fallthrough;
343 case LXC_MAINLOOP_CONTINUE:
344 break;
345 case LXC_MAINLOOP_CLOSE:
346 return 0;
347 case LXC_MAINLOOP_ERROR:
348 return -1;
349 }
350 }
351
352 if (nfds == 0)
353 return 0;
354
355 if (lxc_list_empty(&descr->handlers))
356 return 0;
357 }
358 }
359
360 int lxc_mainloop(struct lxc_async_descr *descr, int timeout_ms)
361 {
362 if (descr->type == LXC_MAINLOOP_IO_URING)
363 return __lxc_mainloop_io_uring(descr, timeout_ms);
364
365 return __lxc_mainloop_epoll(descr, timeout_ms);
366 }
367
368 static int __lxc_mainloop_add_handler_events(struct lxc_async_descr *descr,
369 int fd, int events,
370 lxc_mainloop_callback_t callback,
371 lxc_mainloop_cleanup_t cleanup,
372 void *data, bool oneshot,
373 const char *handler_name)
374 {
375 __do_free struct mainloop_handler *handler = NULL;
376 __do_free struct lxc_list *list = NULL;
377 int ret;
378 struct epoll_event ev;
379
380 if (fd < 0)
381 return ret_errno(EBADF);
382
383 if (!callback || !cleanup || !events || !handler_name)
384 return ret_errno(EINVAL);
385
386 handler = zalloc(sizeof(*handler));
387 if (!handler)
388 return ret_errno(ENOMEM);
389
390 handler->callback = callback;
391 handler->cleanup = cleanup;
392 handler->fd = fd;
393 handler->data = data;
394 handler->handler_name = handler_name;
395
396 if (descr->type == LXC_MAINLOOP_IO_URING) {
397 ret = __io_uring_arm(descr, handler, oneshot);
398 } else {
399 ev.events = events;
400 ev.data.ptr = handler;
401 ret = epoll_ctl(descr->epfd, EPOLL_CTL_ADD, fd, &ev);
402 }
403 if (ret < 0)
404 return -errno;
405
406 list = lxc_list_new();
407 if (!list)
408 return ret_errno(ENOMEM);
409
410 handler->list = list;
411 lxc_list_add_elem(list, move_ptr(handler));;
412 lxc_list_add_tail(&descr->handlers, move_ptr(list));
413 return 0;
414 }
415
416 int lxc_mainloop_add_handler_events(struct lxc_async_descr *descr, int fd,
417 int events,
418 lxc_mainloop_callback_t callback,
419 lxc_mainloop_cleanup_t cleanup,
420 void *data, const char *handler_name)
421 {
422 return __lxc_mainloop_add_handler_events(descr, fd, events,
423 callback, cleanup,
424 data, false, handler_name);
425 }
426
427 int lxc_mainloop_add_handler(struct lxc_async_descr *descr, int fd,
428 lxc_mainloop_callback_t callback,
429 lxc_mainloop_cleanup_t cleanup,
430 void *data, const char *handler_name)
431 {
432 return __lxc_mainloop_add_handler_events(descr, fd, EPOLLIN,
433 callback, cleanup,
434 data, false, handler_name);
435 }
436
437 int lxc_mainloop_add_oneshot_handler(struct lxc_async_descr *descr, int fd,
438 lxc_mainloop_callback_t callback,
439 lxc_mainloop_cleanup_t cleanup,
440 void *data, const char *handler_name)
441 {
442 return __lxc_mainloop_add_handler_events(descr, fd, EPOLLIN,
443 callback, cleanup,
444 data, true, handler_name);
445 }
446
447 int lxc_mainloop_del_handler(struct lxc_async_descr *descr, int fd)
448 {
449 int ret;
450 struct lxc_list *iterator = NULL;
451
452 lxc_list_for_each(iterator, &descr->handlers) {
453 struct mainloop_handler *handler = iterator->elem;
454
455 if (handler->fd != fd)
456 continue;
457
458 if (descr->type == LXC_MAINLOOP_IO_URING)
459 ret = __io_uring_disarm(descr, handler);
460 else
461 ret = epoll_ctl(descr->epfd, EPOLL_CTL_DEL, fd, NULL);
462 if (ret < 0)
463 return syserror("Failed to disarm \"%s\"", handler->handler_name);
464
465 /*
466 * For io_uring the deletion happens at completion time. Either
467 * we get ENOENT if the request was oneshot and it had already
468 * triggered or we get ECANCELED for the original sqe and 0 for
469 * the cancellation request.
470 */
471 if (descr->type == LXC_MAINLOOP_EPOLL) {
472 lxc_list_del(iterator);
473 free(iterator->elem);
474 free(iterator);
475 }
476
477 return 0;
478 }
479
480 return ret_errno(EINVAL);
481 }
482
483 static inline int __epoll_open(struct lxc_async_descr *descr)
484 {
485 *descr = (struct lxc_async_descr){
486 .epfd = -EBADF,
487 };
488
489 descr->epfd = epoll_create1(EPOLL_CLOEXEC);
490 if (descr->epfd < 0)
491 return syserror("Failed to create epoll instance");
492
493 descr->type = LXC_MAINLOOP_EPOLL;
494 TRACE("Created epoll instance");
495 return 0;
496 }
497
498 int lxc_mainloop_open(struct lxc_async_descr *descr)
499 {
500 int ret;
501
502 ret = __io_uring_open(descr);
503 if (ret == -ENOSYS)
504 ret = __epoll_open(descr);
505 if (ret < 0)
506 return syserror("Failed to create mainloop instance");
507
508 lxc_list_init(&descr->handlers);
509 return 0;
510 }
511
512 void lxc_mainloop_close(struct lxc_async_descr *descr)
513 {
514 struct lxc_list *iterator, *next;
515
516 iterator = descr->handlers.next;
517 while (iterator != &descr->handlers) {
518 next = iterator->next;
519
520 lxc_list_del(iterator);
521 free(iterator->elem);
522 free(iterator);
523 iterator = next;
524 }
525
526 if (descr->type == LXC_MAINLOOP_IO_URING) {
527 #ifdef HAVE_LIBURING
528 io_uring_queue_exit(descr->ring);
529 munmap(descr->ring, sizeof(struct io_uring));
530 #else
531 ERROR("Unsupported io_uring mainloop");
532 #endif
533 } else {
534 close_prot_errno_disarm(descr->epfd);
535 }
536 }