AC_DEFINE(HAVE_IFADDRS_H, 1, [Have ifaddrs.h]),
AM_CONDITIONAL(HAVE_IFADDRS_H, false))
+AC_ARG_ENABLE([liburing],
+ [AS_HELP_STRING([--enable-liburing], [enable liburing support [default=auto]])],
+ [enable_liburing=$enableval], [enable_liburing=auto])
+
+if test "x$enable_liburing" = "auto"; then
+ AC_CHECK_LIB([uring],[__io_uring_sqring_wait],[enable_liburing=yes],[enable_liburing=no])
+fi
+
+AM_CONDITIONAL([ENABLE_LIBURING], [test "x$enable_liburing" = "xyes"])
+
+AM_COND_IF([ENABLE_LIBURING],
+ [AC_CHECK_HEADER([liburing.h],[],[AC_MSG_ERROR([You must install the liburing development package in order to compile lxc])])
+ # We use __io_uring_sqring_wait as an indicator whether liburing is new enough to support poll.
+ AC_CHECK_LIB([uring],[__io_uring_sqring_wait],[],[AC_MSG_ERROR([The liburing development package in order to compile lxc])])
+ AC_SUBST([LIBURING_LIBS], [-luring])])
+
# lookup major()/minor()/makedev()
AC_HEADER_MAJOR
$(OPENSSL_LIBS) \
$(SELINUX_LIBS) \
$(SECCOMP_LIBS) \
- $(DLOG_LIBS)
+ $(DLOG_LIBS) \
+ $(LIBURING_LIBS)
bin_SCRIPTS=
@OPENSSL_LIBS@ \
@SECCOMP_LIBS@ \
@SELINUX_LIBS@ \
- @DLOG_LIBS@
+ @DLOG_LIBS@ \
+ @LIBURING_LIBS@
if ENABLE_TOOLS
lxc_attach_SOURCES = tools/lxc_attach.c \
/* automatically cleaned up now */
descr_ptr = &descr;
- ret = lxc_mainloop_add_handler_events(&descr, fd, EPOLLPRI, freezer_cgroup_events_cb, INT_TO_PTR(state_num));
+ ret = lxc_mainloop_add_handler_events(&descr, fd, EPOLLPRI,
+ freezer_cgroup_events_cb,
+ default_cleanup_handler,
+ INT_TO_PTR(state_num),
+ "freezer_cgroup_events_cb");
if (ret < 0)
return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop");
}
if (events_fd < 0)
return log_error_errno(-errno, errno, "Failed to open cgroup.events file");
- ret = lxc_mainloop_add_handler_events(&descr, events_fd, EPOLLPRI, freezer_cgroup_events_cb, INT_TO_PTR(state_num));
+ ret = lxc_mainloop_add_handler_events(&descr, events_fd, EPOLLPRI,
+ freezer_cgroup_events_cb,
+ default_cleanup_handler,
+ INT_TO_PTR(state_num),
+ "freezer_cgroup_events_cb");
if (ret < 0)
return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop");
}
sigjmp_buf mark;
-static void lxc_monitord_cleanup(void);
-
/*
* Defines the structure to store the monitor information
* @lxcpath : the path being monitored
return 0;
}
-static void lxc_monitord_sockfd_remove(struct lxc_monitor *mon, int fd)
+static int lxc_monitord_sockfd_remove(struct lxc_monitor *mon, int fd)
{
int i;
- if (lxc_mainloop_del_handler(&mon->descr, fd))
- CRIT("File descriptor %d not found in mainloop", fd);
- close(fd);
-
for (i = 0; i < mon->clientfds_cnt; i++)
if (mon->clientfds[i] == fd)
break;
if (i >= mon->clientfds_cnt) {
CRIT("File descriptor %d not found in clients array", fd);
- lxc_monitord_cleanup();
- exit(EXIT_FAILURE);
+ return LXC_MAINLOOP_ERROR;
}
memmove(&mon->clientfds[i], &mon->clientfds[i+1],
(mon->clientfds_cnt - i - 1) * sizeof(mon->clientfds[0]));
mon->clientfds_cnt--;
+ return LXC_MAINLOOP_DISARM;
}
static int lxc_monitord_sock_handler(int fd, uint32_t events, void *data,
char buf[4];
rc = lxc_read_nointr(fd, buf, sizeof(buf));
- if (rc > 0 && !strncmp(buf, "quit", 4))
+ if (rc > 0 && !strncmp(buf, "quit", 4)) {
quit = LXC_MAINLOOP_CLOSE;
+ return LXC_MAINLOOP_CLOSE;
+ }
}
if (events & EPOLLHUP)
- lxc_monitord_sockfd_remove(mon, fd);
+ return lxc_monitord_sockfd_remove(mon, fd);
return quit;
}
}
ret = lxc_mainloop_add_handler(&mon->descr, clientfd,
- lxc_monitord_sock_handler, mon);
+ lxc_monitord_sock_handler,
+ default_cleanup_handler,
+ mon, "lxc_monitord_sock_handler");
if (ret < 0) {
ERROR("Failed to add socket handler");
goto err1;
static void lxc_monitord_delete(struct lxc_monitor *mon)
{
- int i;
-
- lxc_mainloop_del_handler(&mon->descr, mon->listenfd);
lxc_abstract_unix_close(mon->listenfd);
lxc_monitord_sock_delete(mon);
- lxc_mainloop_del_handler(&mon->descr, mon->fifofd);
lxc_monitord_fifo_delete(mon);
close(mon->fifofd);
- for (i = 0; i < mon->clientfds_cnt; i++) {
- lxc_mainloop_del_handler(&mon->descr, mon->clientfds[i]);
+ for (int i = 0; i < mon->clientfds_cnt; i++)
close(mon->clientfds[i]);
- }
mon->clientfds_cnt = 0;
}
int ret;
ret = lxc_mainloop_add_handler(&mon->descr, mon->fifofd,
- lxc_monitord_fifo_handler, mon);
+ lxc_monitord_fifo_handler,
+ default_cleanup_handler,
+ mon, "lxc_monitord_fifo_handler");
if (ret < 0) {
ERROR("Failed to add to mainloop monitor handler for fifo");
return -1;
}
ret = lxc_mainloop_add_handler(&mon->descr, mon->listenfd,
- lxc_monitord_sock_accept, mon);
+ lxc_monitord_sock_accept,
+ default_cleanup_handler,
+ mon, "lxc_monitord_sock_accept");
if (ret < 0) {
ERROR("Failed to add to mainloop monitor handler for listen socket");
return -1;
return 0;
}
-static void lxc_monitord_cleanup(void)
-{
- lxc_monitord_delete(&monitor);
-}
-
static void lxc_monitord_sig_handler(int sig)
{
siglongjmp(mark, 1);
ret = EXIT_SUCCESS;
on_error:
- if (monitord_created)
- lxc_monitord_cleanup();
-
if (mainloop_opened)
lxc_mainloop_close(&monitor.descr);
+ if (monitord_created)
+ lxc_monitord_delete(&monitor);
+
exit(ret);
}
goto out;
}
- ret = lxc_mainloop_add_handler(descr, recv_fd, seccomp_notify_handler,
- handler);
+ ret = lxc_mainloop_add_handler(descr, recv_fd,
+ seccomp_notify_handler,
+ seccomp_notify_cleanup_handler,
+ handler, "seccomp_notify_handler");
if (ret < 0) {
rsp.ret = -errno;
goto out;
}
static void lxc_cmd_fd_cleanup(int fd, struct lxc_handler *handler,
- struct lxc_async_descr *descr, const lxc_cmd_t cmd)
+ const lxc_cmd_t cmd)
{
- lxc_terminal_free(handler->conf, fd);
- lxc_mainloop_del_handler(descr, fd);
-
if (cmd == LXC_CMD_ADD_STATE_CLIENT) {
struct lxc_list *cur, *next;
* was already reached by the time we were ready to add it. So
* fallthrough and clean it up.
*/
- TRACE("Closing state client fd %d for command \"%s\"", fd, lxc_cmd_str(cmd));
+ TRACE("Deleted state client fd %d for command \"%s\"", fd, lxc_cmd_str(cmd));
}
- TRACE("Closing client fd %d for command \"%s\"", fd, lxc_cmd_str(cmd));
+ /*
+ * We're not closing the client fd here. They will instead be notified
+ * from the mainloop when it calls the cleanup handler. This will cause
+ * a slight delay but is semantically cleaner then what we used to do.
+ */
+}
+
+static int lxc_cmd_cleanup_handler(int fd, void *data)
+{
+ struct lxc_handler *handler = data;
+
+ lxc_terminal_free(handler->conf, fd);
close(fd);
+ TRACE("Closing client fd %d for \"%s\"", fd, __FUNCTION__);
+ return 0;
+
}
static int lxc_cmd_handler(int fd, uint32_t events, void *data,
__lxc_cmd_rsp_send(fd, &rsp);
}
- goto out_close;
+ goto out;
}
if (ret == 0)
- goto out_close;
+ goto out;
if (ret != sizeof(req)) {
WARN("Failed to receive full command request. Ignoring request for \"%s\"", lxc_cmd_str(req.cmd));
- goto out_close;
+ goto out;
}
if ((req.datalen > LXC_CMD_DATA_MAX) && (req.cmd != LXC_CMD_CONSOLE_LOG)) {
ERROR("Received command data length %d is too large for command \"%s\"", req.datalen, lxc_cmd_str(req.cmd));
- goto out_close;
+ goto out;
}
if (req.datalen > 0) {
ret = lxc_recv_nointr(fd, reqdata, req.datalen, 0);
if (ret != req.datalen) {
WARN("Failed to receive full command request. Ignoring request for \"%s\"", lxc_cmd_str(req.cmd));
- goto out_close;
+ goto out;
}
req.data = reqdata;
ret = lxc_cmd_process(fd, &req, handler, descr);
if (ret < 0) {
DEBUG("Failed to process command %s; cleaning up client fd %d", lxc_cmd_str(req.cmd), fd);
- goto out_close;
- } else if (ret == LXC_CMD_REAP_CLIENT_FD) {
+ goto out;
+ }
+
+ if (ret == LXC_CMD_REAP_CLIENT_FD) {
TRACE("Processed command %s; cleaning up client fd %d", lxc_cmd_str(req.cmd), fd);
- goto out_close;
- } else {
- TRACE("Processed command %s; keeping client fd %d", lxc_cmd_str(req.cmd), fd);
+ goto out;
}
-out:
+ TRACE("Processed command %s; keeping client fd %d", lxc_cmd_str(req.cmd), fd);
return LXC_MAINLOOP_CONTINUE;
-out_close:
- lxc_cmd_fd_cleanup(fd, handler, descr, req.cmd);
- goto out;
+out:
+ lxc_cmd_fd_cleanup(fd, handler, req.cmd);
+ return LXC_MAINLOOP_DISARM;
}
static int lxc_cmd_accept(int fd, uint32_t events, void *data,
if (ret < 0)
return log_error_errno(ret, errno, "Failed to enable necessary credentials on command socket");
- ret = lxc_mainloop_add_handler(descr, connection, lxc_cmd_handler, data);
+ ret = lxc_mainloop_add_oneshot_handler(descr, connection,
+ lxc_cmd_handler,
+ lxc_cmd_cleanup_handler,
+ data, "lxc_cmd_handler");
if (ret)
return log_error(ret, "Failed to add command handler");
{
int ret;
- ret = lxc_mainloop_add_handler(descr, handler->conf->maincmd_fd, lxc_cmd_accept, handler);
+ ret = lxc_mainloop_add_handler(descr, handler->conf->maincmd_fd,
+ lxc_cmd_accept,
+ default_cleanup_handler,
+ handler, "lxc_cmd_accept");
if (ret < 0)
return log_error(ret, "Failed to add handler for command socket fd %d", handler->conf->maincmd_fd);
__hidden extern int lxc_seccomp_load(struct lxc_conf *conf);
__hidden extern int lxc_read_seccomp_config(struct lxc_conf *conf);
__hidden extern void lxc_seccomp_free(struct lxc_seccomp *seccomp);
+__hidden extern int seccomp_notify_cleanup_handler(int fd, void *data);
__hidden extern int seccomp_notify_handler(int fd, uint32_t events, void *data,
struct lxc_async_descr *descr);
__hidden extern void seccomp_conf_init(struct lxc_conf *conf);
static inline int seccomp_notify_handler(int fd, uint32_t events, void *data,
struct lxc_async_descr *descr)
{
- return -ENOSYS;
+ return ret_errno(ENOSYS);
+}
+
+static inline int seccomp_notify_cleanup_handler(void *data)
+{
+ return ret_errno(ENOSYS);
}
static inline void seccomp_conf_init(struct lxc_conf *conf)
#define PER_LINUX32 0x0008
#endif
+static inline bool has_exact_flags(__u32 flags, __u32 mask)
+{
+ return (flags & mask) == mask;
+}
+
#endif /* __LXC_MACRO_H */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/poll.h>
#include <sys/epoll.h>
#include <unistd.h>
#include "config.h"
+#include "log.h"
+#include "macro.h"
#include "mainloop.h"
+lxc_log_define(mainloop, lxc);
+
+#define CANCEL_RAISED (1 << 0)
+#define CANCEL_RECEIVED (1 << 1)
+#define CANCEL_SUCCESS (1 << 2)
+
struct mainloop_handler {
- lxc_mainloop_callback_t callback;
+ struct lxc_list *list;
int fd;
void *data;
+ lxc_mainloop_callback_t callback;
+ lxc_mainloop_cleanup_t cleanup;
+ const char *handler_name;
+ unsigned int flags;
};
#define MAX_EVENTS 10
-int lxc_mainloop(struct lxc_async_descr *descr, int timeout_ms)
+static int __io_uring_disarm(struct lxc_async_descr *descr,
+ struct mainloop_handler *handler);
+
+static void delete_handler(struct lxc_async_descr *descr,
+ struct mainloop_handler *handler, bool oneshot)
+{
+ int ret = 0;
+ struct lxc_list *list;
+
+ if (descr->type == LXC_MAINLOOP_IO_URING) {
+ /*
+ * For a oneshot handler we don't have to do anything. If we
+ * end up here we know that an event for this handler has been
+ * generated before and since this is a oneshot handler it
+ * means that it has been deactivated. So the only thing we
+ * need to do is to call the registered cleanup handler and
+ * remove the handlerfrom the list.
+ */
+ if (!oneshot)
+ ret = __io_uring_disarm(descr, handler);
+ } else {
+ ret = epoll_ctl(descr->epfd, EPOLL_CTL_DEL, handler->fd, NULL);
+ }
+ if (ret < 0)
+ SYSWARN("Failed to delete \"%d\" for \"%s\"", handler->fd, handler->handler_name);
+
+ if (handler->cleanup) {
+ ret = handler->cleanup(handler->fd, handler->data);
+ if (ret < 0)
+ SYSWARN("Failed to call cleanup \"%s\" handler", handler->handler_name);
+ }
+
+ list = move_ptr(handler->list);
+ lxc_list_del(list);
+ free(list->elem);
+ free(list);
+}
+
+#ifndef HAVE_LIBURING
+static inline int __lxc_mainloop_io_uring(struct lxc_async_descr *descr,
+ int timeout_ms)
+{
+ return ret_errno(ENOSYS);
+}
+
+static int __io_uring_arm(struct lxc_async_descr *descr,
+ struct mainloop_handler *handler, bool oneshot)
+{
+ return ret_errno(ENOSYS);
+}
+
+static int __io_uring_disarm(struct lxc_async_descr *descr,
+ struct mainloop_handler *handler)
+{
+ return ret_errno(ENOSYS);
+}
+
+static inline int __io_uring_open(struct lxc_async_descr *descr)
+{
+ return ret_errno(ENOSYS);
+}
+
+#else
+
+static inline int __io_uring_open(struct lxc_async_descr *descr)
+{
+ int ret;
+ *descr = (struct lxc_async_descr){
+ .epfd = -EBADF,
+ };
+
+ descr->ring = mmap(NULL, sizeof(struct io_uring), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE | MAP_ANONYMOUS, -1, 0);
+ if (descr->ring == MAP_FAILED)
+ return syserror("Failed to mmap io_uring memory");
+
+ ret = io_uring_queue_init(512, descr->ring, IORING_SETUP_SQPOLL);
+ if (ret) {
+ SYSERROR("Failed to initialize io_uring instance");
+ goto on_error;
+ }
+
+ ret = io_uring_ring_dontfork(descr->ring);
+ if (ret) {
+ SYSERROR("Failed to prevent inheritance of io_uring mmaped region");
+ goto on_error;
+ }
+
+ descr->type = LXC_MAINLOOP_IO_URING;
+ TRACE("Created io-uring instance");
+ return 0;
+
+on_error:
+ ret = munmap(descr->ring, sizeof(struct io_uring));
+ if (ret < 0)
+ SYSWARN("Failed to unmap io_uring mmaped memory");
+
+ return ret_errno(ENOSYS);
+}
+
+static int __io_uring_arm(struct lxc_async_descr *descr,
+ struct mainloop_handler *handler, bool oneshot)
+{
+ int ret;
+ struct io_uring_sqe *sqe;
+
+ sqe = io_uring_get_sqe(descr->ring);
+ if (!sqe)
+ return syserror_set(ENOENT, "Failed to get submission queue entry");
+
+ io_uring_prep_poll_add(sqe, handler->fd, EPOLLIN);
+
+ /*
+ * Raise IORING_POLL_ADD_MULTI to set up a multishot poll. The same sqe
+ * will now produce multiple cqes. A cqe produced from a multishot sqe
+ * will raise IORING_CQE_F_MORE in cqe->flags.
+ * Some devices can't be used with IORING_POLL_ADD_MULTI. This can only
+ * be detected at completion time. The IORING_CQE_F_MORE flag will not
+ * raised in cqe->flags. This includes terminal devices. So
+ * unfortunately we can't use multishot for them although we really
+ * would like to. But instead we will need to resubmit them. The
+ * io_uring based mainloop will deal cases whwere multishot doesn't
+ * work and resubmit the request. The handler just needs to inform the
+ * mainloop that it wants to keep the handler.
+ */
+ if (!oneshot)
+ sqe->len |= IORING_POLL_ADD_MULTI;
+
+ io_uring_sqe_set_data(sqe, handler);
+ ret = io_uring_submit(descr->ring);
+ if (ret < 0) {
+ if (!oneshot && ret == -EINVAL) {
+ /* The kernel might not yet support multishot. */
+ sqe->len &= ~IORING_POLL_ADD_MULTI;
+ ret = io_uring_submit(descr->ring);
+ }
+ }
+ if (ret < 0)
+ return syserror_ret(ret, "Failed to add \"%s\" handler", handler->handler_name);
+
+ TRACE("Added \"%s\" handler", handler->handler_name);
+ return 0;
+}
+
+static int __io_uring_disarm(struct lxc_async_descr *descr,
+ struct mainloop_handler *handler)
+{
+ int ret;
+ struct io_uring_sqe *sqe;
+
+ sqe = io_uring_get_sqe(descr->ring);
+ if (!sqe)
+ return syserror_set(ENOENT,
+ "Failed to get submission queue entry");
+
+ io_uring_prep_poll_remove(sqe, handler);
+ handler->flags |= CANCEL_RAISED;
+ io_uring_sqe_set_data(sqe, handler);
+ ret = io_uring_submit(descr->ring);
+ if (ret < 0) {
+ handler->flags &= ~CANCEL_RAISED;
+ return syserror_ret(ret, "Failed to remove \"%s\" handler",
+ handler->handler_name);
+ }
+
+ TRACE("Removed handler \"%s\"", handler->handler_name);
+ return ret;
+}
+
+static void msec_to_ts(struct __kernel_timespec *ts, unsigned int timeout_ms)
+{
+ ts->tv_sec = timeout_ms / 1000;
+ ts->tv_nsec = (timeout_ms % 1000) * 1000000;
+}
+
+static int __lxc_mainloop_io_uring(struct lxc_async_descr *descr, int timeout_ms)
+{
+ struct __kernel_timespec ts;
+
+ if (timeout_ms >= 0)
+ msec_to_ts(&ts, timeout_ms);
+
+ for (;;) {
+ int ret;
+ __s32 mask = 0;
+ bool oneshot = false;
+ struct io_uring_cqe *cqe = NULL;
+ struct mainloop_handler *handler = NULL;
+
+ if (timeout_ms >= 0)
+ ret = io_uring_wait_cqe_timeout(descr->ring, &cqe, &ts);
+ else
+ ret = io_uring_wait_cqe(descr->ring, &cqe);
+ if (ret < 0) {
+ if (ret == -EINTR)
+ continue;
+
+ if (ret == -ETIME)
+ return 0;
+
+ return syserror_ret(ret, "Failed to wait for completion");
+ }
+
+ ret = LXC_MAINLOOP_CONTINUE;
+ oneshot = !(cqe->flags & IORING_CQE_F_MORE);
+ mask = cqe->res;
+ handler = io_uring_cqe_get_data(cqe);
+ io_uring_cqe_seen(descr->ring, cqe);
+
+ switch (mask) {
+ case -ECANCELED:
+ handler->flags |= CANCEL_RECEIVED;
+ TRACE("Canceled \"%s\" handler", handler->handler_name);
+ goto out;
+ case -ENOENT:
+ handler->flags = CANCEL_SUCCESS | CANCEL_RECEIVED;
+ TRACE("No sqe for \"%s\" handler", handler->handler_name);
+ goto out;
+ case -EALREADY:
+ TRACE("Repeat sqe remove request for \"%s\" handler", handler->handler_name);
+ goto out;
+ case 0:
+ handler->flags |= CANCEL_SUCCESS;
+ TRACE("Removed \"%s\" handler", handler->handler_name);
+ goto out;
+ default:
+ /*
+ * We need to always remove the handler for a
+ * successful oneshot request.
+ */
+ if (oneshot)
+ handler->flags = CANCEL_SUCCESS | CANCEL_RECEIVED;
+ }
+
+ ret = handler->callback(handler->fd, mask, handler->data, descr);
+ switch (ret) {
+ case LXC_MAINLOOP_CONTINUE:
+ /* We're operating in oneshot mode so we need to rearm. */
+ if (oneshot && __io_uring_arm(descr, handler, true))
+ return -1;
+ break;
+ case LXC_MAINLOOP_DISARM:
+ if (has_exact_flags(handler->flags, (CANCEL_SUCCESS | CANCEL_RECEIVED)))
+ delete_handler(descr, handler, oneshot);
+ break;
+ case LXC_MAINLOOP_CLOSE:
+ return log_trace(0, "Closing from \"%s\"", handler->handler_name);
+ case LXC_MAINLOOP_ERROR:
+ return syserror_ret(-1, "Closing with error from \"%s\"", handler->handler_name);
+ }
+
+ out:
+ if (lxc_list_empty(&descr->handlers))
+ return error_ret(0, "Closing because there are no more handlers");
+ }
+}
+#endif
+
+static int __lxc_mainloop_epoll(struct lxc_async_descr *descr, int timeout_ms)
{
int i, nfds, ret;
struct mainloop_handler *handler;
*/
ret = handler->callback(handler->fd, events[i].events,
handler->data, descr);
- if (ret == LXC_MAINLOOP_ERROR)
- return -1;
- if (ret == LXC_MAINLOOP_CLOSE)
+ switch (ret) {
+ case LXC_MAINLOOP_DISARM:
+ delete_handler(descr, handler, false);
+ __fallthrough;
+ case LXC_MAINLOOP_CONTINUE:
+ break;
+ case LXC_MAINLOOP_CLOSE:
return 0;
+ case LXC_MAINLOOP_ERROR:
+ return -1;
+ }
}
if (nfds == 0)
}
}
-int lxc_mainloop_add_handler_events(struct lxc_async_descr *descr, int fd,
- int events,
- lxc_mainloop_callback_t callback,
- void *data)
+int lxc_mainloop(struct lxc_async_descr *descr, int timeout_ms)
+{
+ if (descr->type == LXC_MAINLOOP_IO_URING)
+ return __lxc_mainloop_io_uring(descr, timeout_ms);
+
+ return __lxc_mainloop_epoll(descr, timeout_ms);
+}
+
+static int __lxc_mainloop_add_handler_events(struct lxc_async_descr *descr,
+ int fd, int events,
+ lxc_mainloop_callback_t callback,
+ lxc_mainloop_cleanup_t cleanup,
+ void *data, bool oneshot,
+ const char *handler_name)
{
__do_free struct mainloop_handler *handler = NULL;
- __do_free struct lxc_list *item = NULL;
+ __do_free struct lxc_list *list = NULL;
+ int ret;
struct epoll_event ev;
if (fd < 0)
- return -1;
+ return ret_errno(EBADF);
- handler = malloc(sizeof(*handler));
- if (!handler)
- return -1;
+ if (!callback || !cleanup || !events || !handler_name)
+ return ret_errno(EINVAL);
- handler->callback = callback;
- handler->fd = fd;
- handler->data = data;
+ handler = zalloc(sizeof(*handler));
+ if (!handler)
+ return ret_errno(ENOMEM);
- ev.events = events;
- ev.data.ptr = handler;
+ handler->callback = callback;
+ handler->cleanup = cleanup;
+ handler->fd = fd;
+ handler->data = data;
+ handler->handler_name = handler_name;
- if (epoll_ctl(descr->epfd, EPOLL_CTL_ADD, fd, &ev) < 0)
+ if (descr->type == LXC_MAINLOOP_IO_URING) {
+ ret = __io_uring_arm(descr, handler, oneshot);
+ } else {
+ ev.events = events;
+ ev.data.ptr = handler;
+ ret = epoll_ctl(descr->epfd, EPOLL_CTL_ADD, fd, &ev);
+ }
+ if (ret < 0)
return -errno;
- item = malloc(sizeof(*item));
- if (!item)
+ list = lxc_list_new();
+ if (!list)
return ret_errno(ENOMEM);
- item->elem = move_ptr(handler);
- lxc_list_add(&descr->handlers, move_ptr(item));
+ handler->list = list;
+ lxc_list_add_elem(list, move_ptr(handler));;
+ lxc_list_add_tail(&descr->handlers, move_ptr(list));
return 0;
}
+int lxc_mainloop_add_handler_events(struct lxc_async_descr *descr, int fd,
+ int events,
+ lxc_mainloop_callback_t callback,
+ lxc_mainloop_cleanup_t cleanup,
+ void *data, const char *handler_name)
+{
+ return __lxc_mainloop_add_handler_events(descr, fd, events,
+ callback, cleanup,
+ data, false, handler_name);
+}
+
int lxc_mainloop_add_handler(struct lxc_async_descr *descr, int fd,
- lxc_mainloop_callback_t callback, void *data)
+ lxc_mainloop_callback_t callback,
+ lxc_mainloop_cleanup_t cleanup,
+ void *data, const char *handler_name)
{
- return lxc_mainloop_add_handler_events(descr, fd, EPOLLIN, callback,
- data);
+ return __lxc_mainloop_add_handler_events(descr, fd, EPOLLIN,
+ callback, cleanup,
+ data, false, handler_name);
+}
+
+int lxc_mainloop_add_oneshot_handler(struct lxc_async_descr *descr, int fd,
+ lxc_mainloop_callback_t callback,
+ lxc_mainloop_cleanup_t cleanup,
+ void *data, const char *handler_name)
+{
+ return __lxc_mainloop_add_handler_events(descr, fd, EPOLLIN,
+ callback, cleanup,
+ data, true, handler_name);
}
int lxc_mainloop_del_handler(struct lxc_async_descr *descr, int fd)
{
- struct mainloop_handler *handler;
- struct lxc_list *iterator;
+ int ret;
+ struct lxc_list *iterator = NULL;
lxc_list_for_each(iterator, &descr->handlers) {
- handler = iterator->elem;
+ struct mainloop_handler *handler = iterator->elem;
- if (handler->fd == fd) {
- /* found */
- if (epoll_ctl(descr->epfd, EPOLL_CTL_DEL, fd, NULL))
- return -errno;
+ if (handler->fd != fd)
+ continue;
+ if (descr->type == LXC_MAINLOOP_IO_URING)
+ ret = __io_uring_disarm(descr, handler);
+ else
+ ret = epoll_ctl(descr->epfd, EPOLL_CTL_DEL, fd, NULL);
+ if (ret < 0)
+ return syserror("Failed to disarm \"%s\"", handler->handler_name);
+
+ /*
+ * For io_uring the deletion happens at completion time. Either
+ * we get ENOENT if the request was oneshot and it had already
+ * triggered or we get ECANCELED for the original sqe and 0 for
+ * the cancellation request.
+ */
+ if (descr->type == LXC_MAINLOOP_EPOLL) {
lxc_list_del(iterator);
free(iterator->elem);
free(iterator);
- return 0;
}
+
+ return 0;
}
return ret_errno(EINVAL);
}
-int lxc_mainloop_open(struct lxc_async_descr *descr)
+static inline int __epoll_open(struct lxc_async_descr *descr)
{
+ *descr = (struct lxc_async_descr){
+ .epfd = -EBADF,
+ };
+
descr->epfd = epoll_create1(EPOLL_CLOEXEC);
if (descr->epfd < 0)
- return -errno;
+ return syserror("Failed to create epoll instance");
+
+ descr->type = LXC_MAINLOOP_EPOLL;
+ TRACE("Created epoll instance");
+ return 0;
+}
+
+int lxc_mainloop_open(struct lxc_async_descr *descr)
+{
+ int ret;
+
+ ret = __io_uring_open(descr);
+ if (ret == -ENOSYS)
+ ret = __epoll_open(descr);
+ if (ret < 0)
+ return syserror("Failed to create mainloop instance");
lxc_list_init(&descr->handlers);
return 0;
iterator = next;
}
- close_prot_errno_disarm(descr->epfd);
+ if (descr->type == LXC_MAINLOOP_IO_URING) {
+#ifdef HAVE_LIBURING
+ io_uring_queue_exit(descr->ring);
+ munmap(descr->ring, sizeof(struct io_uring));
+#else
+ ERROR("Unsupported io_uring mainloop");
+#endif
+ } else {
+ close_prot_errno_disarm(descr->epfd);
+ }
}
#include "list.h"
#include "memory_utils.h"
+#ifdef HAVE_LIBURING
+#include <liburing.h>
+#endif
+
#define LXC_MAINLOOP_ERROR -1
#define LXC_MAINLOOP_CONTINUE 0
#define LXC_MAINLOOP_CLOSE 1
+#define LXC_MAINLOOP_DISARM 2
+
+typedef enum {
+ LXC_MAINLOOP_EPOLL = 1,
+ LXC_MAINLOOP_IO_URING = 2,
+} async_descr_t;
struct lxc_async_descr {
- int epfd;
+ async_descr_t type;
+ union {
+ int epfd;
+#ifdef HAVE_LIBURING
+ struct io_uring *ring;
+#endif
+ };
struct lxc_list handlers;
};
+static inline int default_cleanup_handler(int fd, void *data)
+{
+ return 0;
+}
+
typedef int (*lxc_mainloop_callback_t)(int fd, uint32_t event, void *data,
struct lxc_async_descr *descr);
+typedef int (*lxc_mainloop_cleanup_t)(int fd, void *data);
+
__hidden extern int lxc_mainloop(struct lxc_async_descr *descr, int timeout_ms);
__hidden extern int lxc_mainloop_add_handler_events(struct lxc_async_descr *descr, int fd, int events,
- lxc_mainloop_callback_t callback, void *data);
+ lxc_mainloop_callback_t callback,
+ lxc_mainloop_cleanup_t cleanup,
+ void *data, const char *handler_name);
__hidden extern int lxc_mainloop_add_handler(struct lxc_async_descr *descr, int fd,
- lxc_mainloop_callback_t callback, void *data);
+ lxc_mainloop_callback_t callback,
+ lxc_mainloop_cleanup_t cleanup,
+ void *data, const char *handler_name);
+__hidden extern int lxc_mainloop_add_oneshot_handler(struct lxc_async_descr *descr, int fd,
+ lxc_mainloop_callback_t callback,
+ lxc_mainloop_cleanup_t cleanup,
+ void *data, const char *handler_name);
__hidden extern int lxc_mainloop_del_handler(struct lxc_async_descr *descr, int fd);
}
#endif
+int seccomp_notify_cleanup_handler(int fd, void *data)
+{
+ struct lxc_handler *hdlr = data;
+ struct lxc_conf *conf = hdlr->conf;
+
+ /* TODO: Make sure that we don't need to free any memory in here. */
+ if (fd == conf->seccomp.notifier.notify_fd)
+ fd = move_fd(conf->seccomp.notifier.notify_fd);
+
+ /*
+ * If this isn't the main notify_fd it means that someone registered a
+ * seccomp notify handler through the command socket (e.g. for attach)
+ * and so we won't touch the container's config.
+ */
+ return 0;
+}
+
int seccomp_notify_handler(int fd, uint32_t events, void *data,
struct lxc_async_descr *descr)
{
char *cookie = conf->seccomp.notifier.cookie;
__u64 req_id;
- if (events & EPOLLHUP) {
- lxc_mainloop_del_handler(descr, fd);
- close(fd);
- return log_trace(0, "Removing seccomp notifier fd %d", fd);
- }
+ if (events & EPOLLHUP)
+ return log_trace(LXC_MAINLOOP_DISARM, "Removing seccomp notifier fd %d", fd);
memset(req, 0, conf->seccomp.notifier.sizes.seccomp_notif);
ret = seccomp_notify_receive(fd, req);
return -1;
}
- ret = lxc_mainloop_add_handler(descr,
- seccomp->notifier.notify_fd,
- seccomp_notify_handler, handler);
+ ret = lxc_mainloop_add_handler(descr, seccomp->notifier.notify_fd,
+ seccomp_notify_handler,
+ seccomp_notify_cleanup_handler,
+ handler,
+ "seccomp_notify_handler");
if (ret < 0) {
ERROR("Failed to add seccomp notify handler for %d to mainloop",
notify_fd);
if (ret == 0 && info.si_pid == hdlr->pid)
hdlr->init_died = true;
+ TRACE("Received signal ssi_signo(%d) for ssi_pid(%d), si_signo(%d), si_pid(%d)",
+ siginfo.ssi_signo, siginfo.ssi_pid, info.si_signo, info.si_pid);
+
/* Try to figure out a reasonable exit status to report. */
if (hdlr->init_died) {
switch (info.si_code) {
int lxc_poll(const char *name, struct lxc_handler *handler)
{
int ret;
- bool has_console = true;
+ struct lxc_terminal *console = &handler->conf->console;
struct lxc_async_descr descr, descr_console;
- if (handler->conf->console.path &&
- strequal(handler->conf->console.path, "none"))
- has_console = false;
+ if (!wants_console(console))
+ console = NULL;
ret = lxc_mainloop_open(&descr);
if (ret < 0) {
goto out_sigfd;
}
- if (has_console) {
+ if (console) {
ret = lxc_mainloop_open(&descr_console);
if (ret < 0) {
ERROR("Failed to create console mainloop");
}
}
- ret = lxc_mainloop_add_handler(&descr, handler->sigfd, signal_handler, handler);
+ ret = lxc_mainloop_add_handler(&descr, handler->sigfd,
+ signal_handler,
+ default_cleanup_handler,
+ handler, "signal_handler");
if (ret < 0) {
ERROR("Failed to add signal handler for %d to mainloop", handler->sigfd);
goto out_mainloop_console;
goto out_mainloop_console;
}
- if (has_console) {
- struct lxc_terminal *console = &handler->conf->console;
-
+ if (console) {
ret = lxc_terminal_mainloop_add(&descr, console);
if (ret < 0) {
ERROR("Failed to add console handlers to mainloop");
goto out_mainloop_console;
}
-
- ret = lxc_terminal_mainloop_add(&descr_console, console);
- if (ret < 0) {
- ERROR("Failed to add console handlers to console mainloop");
- goto out_mainloop_console;
- }
-
- handler->conf->console.descr = &descr;
}
ret = lxc_cmd_mainloop_add(name, &descr, handler);
if (ret < 0 || !handler->init_died)
goto out_mainloop_console;
- if (has_console)
- ret = lxc_mainloop(&descr_console, 0);
+ if (console) {
+ ret = lxc_terminal_mainloop_add(&descr_console, console);
+ if (ret == 0)
+ ret = lxc_mainloop(&descr_console, 0);
+ }
out_mainloop_console:
- if (has_console) {
+ if (console) {
lxc_mainloop_close(&descr_console);
TRACE("Closed console mainloop");
}
#include <sys/signalfd.h>
#endif
-#ifdef HAVE_STRUCT_OPEN_HOW
-#include <linux/openat2.h>
-#endif
-
#if HAVE_SYS_PERSONALITY_H
#include <sys/personality.h>
#endif
#ifndef HAVE_OPENAT2
static inline int openat2(int dfd, const char *filename, struct lxc_open_how *how, size_t size)
{
- /* When struct open_how is updated we should update lxc as well. */
-#ifdef HAVE_STRUCT_OPEN_HOW
- BUILD_BUG_ON(sizeof(struct lxc_open_how) != sizeof(struct open_how));
-#endif
- return syscall(__NR_openat2, dfd, filename, (struct open_how *)how, size);
+ return syscall(__NR_openat2, dfd, filename, how, size);
}
#endif /* HAVE_OPENAT2 */
return bytes_read;
}
-int lxc_terminal_io_cb(int fd, uint32_t events, void *data,
- struct lxc_async_descr *descr)
+static int lxc_terminal_ptx_io(struct lxc_terminal *terminal)
{
- struct lxc_terminal *terminal = data;
char buf[LXC_TERMINAL_BUFFER_SIZE];
int r, w, w_log, w_rbuf;
- w = r = lxc_read_nointr(fd, buf, sizeof(buf));
- if (r <= 0) {
- INFO("Terminal client on fd %d has exited", fd);
- lxc_mainloop_del_handler(descr, fd);
-
- if (fd == terminal->ptx) {
- terminal->ptx = -EBADF;
- } else if (fd == terminal->peer) {
- lxc_terminal_signal_fini(terminal);
- terminal->peer = -EBADF;
- } else {
- ERROR("Handler received unexpected file descriptor");
- }
- close(fd);
+ w = r = lxc_read_nointr(terminal->ptx, buf, sizeof(buf));
+ if (r <= 0)
+ return -1;
- return LXC_MAINLOOP_CLOSE;
- }
+ w_rbuf = w_log = 0;
+ /* write to peer first */
+ if (terminal->peer >= 0)
+ w = lxc_write_nointr(terminal->peer, buf, r);
- if (fd == terminal->peer)
- w = lxc_write_nointr(terminal->ptx, buf, r);
+ /* write to terminal ringbuffer */
+ if (terminal->buffer_size > 0)
+ w_rbuf = lxc_ringbuf_write(&terminal->ringbuf, buf, r);
- w_rbuf = w_log = 0;
- if (fd == terminal->ptx) {
- /* write to peer first */
- if (terminal->peer >= 0)
- w = lxc_write_nointr(terminal->peer, buf, r);
-
- /* write to terminal ringbuffer */
- if (terminal->buffer_size > 0)
- w_rbuf = lxc_ringbuf_write(&terminal->ringbuf, buf, r);
-
- /* write to terminal log */
- if (terminal->log_fd >= 0)
- w_log = lxc_terminal_write_log_file(terminal, buf, r);
- }
+ /* write to terminal log */
+ if (terminal->log_fd >= 0)
+ w_log = lxc_terminal_write_log_file(terminal, buf, r);
if (w != r)
WARN("Short write on terminal r:%d != w:%d", r, w);
if (w_log < 0)
TRACE("Failed to write %d bytes to terminal log", r);
+ return 0;
+}
+
+static int lxc_terminal_peer_io(struct lxc_terminal *terminal)
+{
+ char buf[LXC_TERMINAL_BUFFER_SIZE];
+ int r, w;
+
+ w = r = lxc_read_nointr(terminal->peer, buf, sizeof(buf));
+ if (r <= 0)
+ return -1;
+
+ w = lxc_write_nointr(terminal->ptx, buf, r);
+ if (w != r)
+ WARN("Short write on terminal r:%d != w:%d", r, w);
+
+ return 0;
+}
+
+static int lxc_terminal_ptx_io_handler(int fd, uint32_t events, void *data,
+ struct lxc_async_descr *descr)
+{
+ struct lxc_terminal *terminal = data;
+ int ret;
+
+ ret = lxc_terminal_ptx_io(data);
+ if (ret < 0)
+ return log_info(LXC_MAINLOOP_CLOSE,
+ "Terminal client on fd %d has exited",
+ terminal->ptx);
+
+ return LXC_MAINLOOP_CONTINUE;
+}
+
+static int lxc_terminal_peer_io_handler(int fd, uint32_t events, void *data,
+ struct lxc_async_descr *descr)
+{
+ struct lxc_terminal *terminal = data;
+ int ret;
+
+ ret = lxc_terminal_peer_io(data);
+ if (ret < 0)
+ return log_info(LXC_MAINLOOP_CLOSE,
+ "Terminal client on fd %d has exited",
+ terminal->peer);
+
return LXC_MAINLOOP_CONTINUE;
}
if (terminal->peer >= 0) {
ret = lxc_mainloop_add_handler(terminal->descr, terminal->peer,
- lxc_terminal_io_cb, terminal);
+ lxc_terminal_peer_io_handler,
+ default_cleanup_handler,
+ terminal, "lxc_terminal_peer_io_handler");
if (ret < 0) {
WARN("Failed to add terminal peer handler to mainloop");
return -1;
if (!terminal->tty_state || terminal->tty_state->sigfd < 0)
return 0;
- ret = lxc_mainloop_add_handler(terminal->descr, terminal->tty_state->sigfd,
- lxc_terminal_signalfd_cb, terminal->tty_state);
+ ret = lxc_mainloop_add_handler(terminal->descr,
+ terminal->tty_state->sigfd,
+ lxc_terminal_signalfd_cb,
+ default_cleanup_handler,
+ terminal->tty_state,
+ "lxc_terminal_signalfd_cb");
if (ret < 0) {
WARN("Failed to add signal handler to mainloop");
return -1;
}
ret = lxc_mainloop_add_handler(descr, terminal->ptx,
- lxc_terminal_io_cb, terminal);
+ lxc_terminal_ptx_io_handler,
+ default_cleanup_handler,
+ terminal, "lxc_terminal_ptx_io_handler");
if (ret < 0) {
- ERROR("Failed to add handler for terminal ptx fd %d to "
- "mainloop", terminal->ptx);
+ ERROR("Failed to add handler for terminal ptx fd %d to mainloop", terminal->ptx);
return -1;
}
if (ts->sigfd != -1) {
ret = lxc_mainloop_add_handler(&descr, ts->sigfd,
- lxc_terminal_signalfd_cb, ts);
+ lxc_terminal_signalfd_cb,
+ default_cleanup_handler,
+ ts, "lxc_terminal_signalfd_cb");
if (ret < 0) {
ERROR("Failed to add signal handler to mainloop");
goto close_mainloop;
}
ret = lxc_mainloop_add_handler(&descr, ts->stdinfd,
- lxc_terminal_stdin_cb, ts);
+ lxc_terminal_stdin_cb,
+ default_cleanup_handler,
+ ts, "lxc_terminal_stdin_cb");
if (ret < 0) {
ERROR("Failed to add stdin handler");
goto close_mainloop;
}
ret = lxc_mainloop_add_handler(&descr, ts->ptxfd,
- lxc_terminal_ptx_cb, ts);
+ lxc_terminal_ptx_cb,
+ default_cleanup_handler,
+ ts, "lxc_terminal_ptx_cb");
if (ret < 0) {
ERROR("Failed to add ptx handler");
goto close_mainloop;
__hidden extern int lxc_terminal_write_ringbuffer(struct lxc_terminal *terminal);
__hidden extern int lxc_terminal_create_log_file(struct lxc_terminal *terminal);
-__hidden extern int lxc_terminal_io_cb(int fd, uint32_t events, void *data,
- struct lxc_async_descr *descr);
__hidden extern int lxc_make_controlling_terminal(int fd);
__hidden extern int lxc_terminal_prepare_login(int fd);
goto out;
}
- ret = lxc_mainloop_add_handler(&descr, 0, stdin_handler, &in_char);
+ ret = lxc_mainloop_add_handler(&descr, 0,
+ stdin_handler,
+ default_cleanup_handler,
+ &in_char, "stdin_handler");
if (ret) {
fprintf(stderr, "Failed to add stdin handler\n");
ret = EXIT_FAILURE;
@OPENSSL_LIBS@ \
@SECCOMP_LIBS@ \
@SELINUX_LIBS@ \
- @DLOG_LIBS@
+ @DLOG_LIBS@ \
+ @LIBURING_LIBS@
LSM_SOURCES = ../lxc/lsm/lsm.c \
../lxc/lsm/lsm.h \