]> git.proxmox.com Git - lxc.git/commitdiff
update to current master
authorWolfgang Bumiller <w.bumiller@proxmox.com>
Wed, 23 Oct 2019 08:58:14 +0000 (10:58 +0200)
committerWolfgang Bumiller <w.bumiller@proxmox.com>
Wed, 23 Oct 2019 09:03:01 +0000 (11:03 +0200)
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
25 files changed:
debian/patches/extra/0001-conf-use-SYSERROR-on-lxc_write_to_file-errors.patch [deleted file]
debian/patches/extra/0002-Revert-conf-remove-extra-MS_BIND-with-sysfs-mixed.patch [deleted file]
debian/patches/extra/0003-CVE-2019-5736-runC-rexec-callers-as-memfd.patch [deleted file]
debian/patches/extra/0004-apparmor-generate-ro-bind-remount-rule-list.patch [deleted file]
debian/patches/extra/0005-attach-don-t-close-stdout-of-getent.patch [deleted file]
debian/patches/pve/0002-PVE-Down-run-lxcnetaddbr-when-instantiating-veths.patch
debian/patches/pve/0003-PVE-Config-deny-rw-mounting-of-sys-and-proc.patch
debian/patches/pve/0004-PVE-Up-possibility-to-run-lxc-monitord-as-a-regular-.patch [new file with mode: 0644]
debian/patches/pve/0004-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch [deleted file]
debian/patches/pve/0005-PVE-Config-Disable-lxc.monitor-cgroup.patch [new file with mode: 0644]
debian/patches/pve/0005-PVE-Up-start-initutils-make-cgroupns-separation-leve.patch [deleted file]
debian/patches/pve/0006-PVE-Config-namespace-separation.patch [deleted file]
debian/patches/pve/0006-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch [new file with mode: 0644]
debian/patches/pve/0007-PVE-Up-possibility-to-run-lxc-monitord-as-a-regular-.patch [deleted file]
debian/patches/pve/0007-PVE-Up-start-initutils-make-cgroupns-separation-leve.patch [new file with mode: 0644]
debian/patches/pve/0008-PVE-Config-Disable-lxc.monitor-cgroup.patch [deleted file]
debian/patches/pve/0008-PVE-Config-namespace-separation.patch [new file with mode: 0644]
debian/patches/pve/0009-PVE-Config-attach-always-use-getent.patch [new file with mode: 0644]
debian/patches/pve/0009-init-add-ExecReload-to-lxc.service-to-only-reload-pr.patch [deleted file]
debian/patches/pve/0010-PVE-Config-attach-always-use-getent.patch [deleted file]
debian/patches/pve/0010-init-add-ExecReload-to-lxc.service-to-only-reload-pr.patch [new file with mode: 0644]
debian/patches/pve/0011-apparmor-generate-ro-bind-remount-rule-list.patch [new file with mode: 0644]
debian/patches/pve/0012-apparmor-Prevent-writes-to-proc-acpi.patch [new file with mode: 0644]
debian/patches/series
lxc

diff --git a/debian/patches/extra/0001-conf-use-SYSERROR-on-lxc_write_to_file-errors.patch b/debian/patches/extra/0001-conf-use-SYSERROR-on-lxc_write_to_file-errors.patch
deleted file mode 100644 (file)
index 2f81fb7..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Fri, 4 Jan 2019 12:05:49 +0100
-Subject: [PATCH] conf: use SYSERROR on lxc_write_to_file errors
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
----
- src/lxc/conf.c | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/src/lxc/conf.c b/src/lxc/conf.c
-index 3d0e4a192..0d86d66e5 100644
---- a/src/lxc/conf.c
-+++ b/src/lxc/conf.c
-@@ -2681,8 +2681,8 @@ int setup_sysctl_parameters(struct lxc_list *sysctls)
-               ret = lxc_write_to_file(filename, elem->value,
-                                       strlen(elem->value), false, 0666);
-               if (ret < 0) {
--                      ERROR("Failed to setup sysctl parameters %s to %s",
--                            elem->key, elem->value);
-+                      SYSERROR("Failed to setup sysctl parameters %s to %s",
-+                               elem->key, elem->value);
-                       return -1;
-               }
-       }
-@@ -2716,8 +2716,8 @@ int setup_proc_filesystem(struct lxc_list *procs, pid_t pid)
-               ret = lxc_write_to_file(filename, elem->value,
-                                       strlen(elem->value), false, 0666);
-               if (ret < 0) {
--                      ERROR("Failed to setup proc filesystem %s to %s",
--                            elem->filename, elem->value);
-+                      SYSERROR("Failed to setup proc filesystem %s to %s",
-+                               elem->filename, elem->value);
-                       return -1;
-               }
-       }
--- 
-2.20.1
-
diff --git a/debian/patches/extra/0002-Revert-conf-remove-extra-MS_BIND-with-sysfs-mixed.patch b/debian/patches/extra/0002-Revert-conf-remove-extra-MS_BIND-with-sysfs-mixed.patch
deleted file mode 100644 (file)
index 013a851..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Thu, 17 Jan 2019 09:16:16 +0100
-Subject: [PATCH] Revert "conf: remove extra MS_BIND with sysfs:mixed"
-
-This reverts commit 51a922baf724689ff3a0df938ca8975601c9c815.
-
-The above commit confuses the mountall unit of privileged
-Ubuntu 14.04 containers at startup so that they cannot
-finish booting.
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
----
- src/lxc/conf.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/lxc/conf.c b/src/lxc/conf.c
-index 0d86d66e5..7263d0e1a 100644
---- a/src/lxc/conf.c
-+++ b/src/lxc/conf.c
-@@ -690,6 +690,7 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha
-               { LXC_AUTO_SYS_MASK,  LXC_AUTO_SYS_RW,     "sysfs",                                          "%r/sys",                     "sysfs", 0,                                               NULL },
-               { LXC_AUTO_SYS_MASK,  LXC_AUTO_SYS_RO,     "sysfs",                                          "%r/sys",                     "sysfs", MS_RDONLY,                                       NULL },
-               { LXC_AUTO_SYS_MASK,  LXC_AUTO_SYS_MIXED,  "sysfs",                                          "%r/sys",                     "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID,                    NULL },
-+              { LXC_AUTO_SYS_MASK,  LXC_AUTO_SYS_MIXED,  "%r/sys",                                         "%r/sys",                     NULL,    MS_BIND,                                         NULL },
-               { LXC_AUTO_SYS_MASK,  LXC_AUTO_SYS_MIXED,  NULL,                                             "%r/sys",                     NULL,    MS_REMOUNT|MS_BIND|MS_RDONLY,                    NULL },
-               { LXC_AUTO_SYS_MASK,  LXC_AUTO_SYS_MIXED,  "sysfs",                                          "%r/sys/devices/virtual/net", "sysfs", 0,                                               NULL },
-               { LXC_AUTO_SYS_MASK,  LXC_AUTO_SYS_MIXED,  "%r/sys/devices/virtual/net/devices/virtual/net", "%r/sys/devices/virtual/net", NULL,    MS_BIND,                                         NULL },
--- 
-2.20.1
-
diff --git a/debian/patches/extra/0003-CVE-2019-5736-runC-rexec-callers-as-memfd.patch b/debian/patches/extra/0003-CVE-2019-5736-runC-rexec-callers-as-memfd.patch
deleted file mode 100644 (file)
index 24c34e2..0000000
+++ /dev/null
@@ -1,403 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Christian Brauner <christian.brauner@ubuntu.com>
-Date: Sat, 26 Jan 2019 01:19:29 +0100
-Subject: [PATCH] CVE-2019-5736 (runC): rexec callers as memfd
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Adam Iwaniuk and Borys Popławski discovered that an attacker can compromise the
-runC host binary from inside a privileged runC container. As a result, this
-could be exploited to gain root access on the host. runC is used as the default
-runtime for containers with Docker, containerd, Podman, and CRI-O.
-
-The attack can be made when attaching to a running container or when starting a
-container running a specially crafted image.  For example, when runC attaches
-to a container the attacker can trick it into executing itself. This could be
-done by replacing the target binary inside the container with a custom binary
-pointing back at the runC binary itself. As an example, if the target binary
-was /bin/bash, this could be replaced with an executable script specifying the
-interpreter path #!/proc/self/exe (/proc/self/exec is a symbolic link created
-by the kernel for every process which points to the binary that was executed
-for that process). As such when /bin/bash is executed inside the container,
-instead the target of /proc/self/exe will be executed - which will point to the
-runc binary on the host. The attacker can then proceed to write to the target
-of /proc/self/exe to try and overwrite the runC binary on the host. However in
-general, this will not succeed as the kernel will not permit it to be
-overwritten whilst runC is executing. To overcome this, the attacker can
-instead open a file descriptor to /proc/self/exe using the O_PATH flag and then
-proceed to reopen the binary as O_WRONLY through /proc/self/fd/<nr> and try to
-write to it in a busy loop from a separate process. Ultimately it will succeed
-when the runC binary exits. After this the runC binary is compromised and can
-be used to attack other containers or the host itself.
-
-This attack is only possible with privileged containers since it requires root
-privilege on the host to overwrite the runC binary. Unprivileged containers
-with a non-identity ID mapping do not have the permission to write to the host
-binary and therefore are unaffected by this attack.
-
-LXC is also impacted in a similar manner by this vulnerability, however as the
-LXC project considers privileged containers to be unsafe no CVE has been
-assigned for this issue for LXC. Quoting from the
-https://linuxcontainers.org/lxc/security/ project's Security information page:
-
-"As privileged containers are considered unsafe, we typically will not consider
-new container escape exploits to be security issues worthy of a CVE and quick
-fix. We will however try to mitigate those issues so that accidental damage to
-the host is prevented."
-
-To prevent this attack, LXC has been patched to create a temporary copy of the
-calling binary itself when it starts or attaches to containers. To do this LXC
-creates an anonymous, in-memory file using the memfd_create() system call and
-copies itself into the temporary in-memory file, which is then sealed to
-prevent further modifications. LXC then executes this sealed, in-memory file
-instead of the original on-disk binary. Any compromising write operations from
-a privileged container to the host LXC binary will then write to the temporary
-in-memory binary and not to the host binary on-disk, preserving the integrity
-of the host LXC binary. Also as the temporary, in-memory LXC binary is sealed,
-writes to this will also fail.
-
-Note: memfd_create() was added to the Linux kernel in the 3.17 release.
-
-Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
-Co-Developed-by: Alesa Sarai <asarai@suse.de>
-Acked-by: Serge Hallyn <serge@hallyn.com>
-Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
-(cherry picked from commit 6400238d08cdf1ca20d49bafb85f4e224348bf9d)
----
- configure.ac               |  12 +++
- src/lxc/Makefile.am        |   4 +
- src/lxc/file_utils.c       |  41 ++++++++-
- src/lxc/file_utils.h       |   1 +
- src/lxc/rexec.c            | 181 +++++++++++++++++++++++++++++++++++++
- src/lxc/syscall_wrappers.h |  14 +++
- 6 files changed, 252 insertions(+), 1 deletion(-)
- create mode 100644 src/lxc/rexec.c
-
-diff --git a/configure.ac b/configure.ac
-index 9f3b8fb3c..3177f7da3 100644
---- a/configure.ac
-+++ b/configure.ac
-@@ -727,6 +727,17 @@ AM_COND_IF([ENABLE_DLOG],
-               ])
-       ])
-+AC_ARG_ENABLE([memfd-rexec],
-+      [AC_HELP_STRING([--enable-memfd-rexec], [enforce liblxc as a memfd to protect against certain symlink attacks [default=yes]])],
-+      [], [enable_memfd_rexec=yes])
-+AM_CONDITIONAL([ENFORCE_MEMFD_REXEC], [test "x$enable_memfd_rexec" = "xyes"])
-+if test "x$enable_memfd_rexec" = "xyes"; then
-+      AC_DEFINE([ENFORCE_MEMFD_REXEC], 1, [Rexec liblxc as memfd])
-+      AC_MSG_RESULT([yes])
-+else
-+      AC_MSG_RESULT([no])
-+fi
-+
- # Files requiring some variable expansion
- AC_CONFIG_FILES([
-       Makefile
-@@ -956,6 +967,7 @@ Security features:
-  - Linux capabilities: $enable_capabilities
-  - seccomp: $enable_seccomp
-  - SELinux: $enable_selinux
-+ - memfd rexec: $enable_memfd_rexec
- PAM:
-  - PAM module: $enable_pam
-diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am
-index 95b0a2f72..865d341fe 100644
---- a/src/lxc/Makefile.am
-+++ b/src/lxc/Makefile.am
-@@ -175,6 +175,10 @@ if !HAVE_STRLCAT
- liblxc_la_SOURCES += ../include/strlcat.c ../include/strlcat.h
- endif
-+if ENFORCE_MEMFD_REXEC
-+liblxc_la_SOURCES += rexec.c
-+endif
-+
- AM_CFLAGS = -DLXCROOTFSMOUNT=\"$(LXCROOTFSMOUNT)\" \
-           -DLXCPATH=\"$(LXCPATH)\" \
-           -DLXC_GLOBAL_CONF=\"$(LXC_GLOBAL_CONF)\" \
-diff --git a/src/lxc/file_utils.c b/src/lxc/file_utils.c
-index f89aa638d..930fd738a 100644
---- a/src/lxc/file_utils.c
-+++ b/src/lxc/file_utils.c
-@@ -31,7 +31,7 @@
- #include "config.h"
- #include "file_utils.h"
- #include "macro.h"
--#include "string.h"
-+#include "string_utils.h"
- int lxc_write_to_file(const char *filename, const void *buf, size_t count,
-                     bool add_newline, mode_t mode)
-@@ -327,3 +327,42 @@ again:
-       return ret;
- }
-+
-+char *file_to_buf(char *path, size_t *length)
-+{
-+      int fd;
-+      char buf[PATH_MAX];
-+      char *copy = NULL;
-+
-+      if (!length)
-+              return NULL;
-+
-+      fd = open(path, O_RDONLY | O_CLOEXEC);
-+      if (fd < 0)
-+              return NULL;
-+
-+      *length = 0;
-+      for (;;) {
-+              int n;
-+              char *old = copy;
-+
-+              n = lxc_read_nointr(fd, buf, sizeof(buf));
-+              if (n < 0)
-+                      goto on_error;
-+              if (!n)
-+                      break;
-+
-+              copy = must_realloc(old, (*length + n) * sizeof(*old));
-+              memcpy(copy + *length, buf, n);
-+              *length += n;
-+      }
-+
-+      close(fd);
-+      return copy;
-+
-+on_error:
-+      close(fd);
-+      free(copy);
-+
-+      return NULL;
-+}
-diff --git a/src/lxc/file_utils.h b/src/lxc/file_utils.h
-index 6361557a0..518a61af3 100644
---- a/src/lxc/file_utils.h
-+++ b/src/lxc/file_utils.h
-@@ -55,5 +55,6 @@ extern bool is_fs_type(const struct statfs *fs, fs_type_magic magic_val);
- extern FILE *fopen_cloexec(const char *path, const char *mode);
- extern ssize_t lxc_sendfile_nointr(int out_fd, int in_fd, off_t *offset,
-                                  size_t count);
-+extern char *file_to_buf(char *path, size_t *length);
- #endif /* __LXC_FILE_UTILS_H */
-diff --git a/src/lxc/rexec.c b/src/lxc/rexec.c
-new file mode 100644
-index 000000000..396bd617f
---- /dev/null
-+++ b/src/lxc/rexec.c
-@@ -0,0 +1,181 @@
-+/* liblxcapi
-+ *
-+ * Copyright © 2019 Christian Brauner <christian.brauner@ubuntu.com>.
-+ * Copyright © 2019 Canonical Ltd.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License along
-+ * with this program; if not, write to the Free Software Foundation, Inc.,
-+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-+ */
-+
-+#ifndef _GNU_SOURCE
-+#define _GNU_SOURCE 1
-+#endif
-+#include <errno.h>
-+#include <stdio.h>
-+#include <stdlib.h>
-+#include <string.h>
-+
-+#include "config.h"
-+#include "file_utils.h"
-+#include "raw_syscalls.h"
-+#include "string_utils.h"
-+#include "syscall_wrappers.h"
-+
-+#define LXC_MEMFD_REXEC_SEALS \
-+      (F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE)
-+
-+static int push_vargs(char *data, int data_length, char ***output)
-+{
-+      int num = 0;
-+      char *cur = data;
-+
-+      if (!data || *output)
-+              return -1;
-+
-+      *output = must_realloc(NULL, sizeof(**output));
-+
-+      while (cur < data + data_length) {
-+              num++;
-+              *output = must_realloc(*output, (num + 1) * sizeof(**output));
-+
-+              (*output)[num - 1] = cur;
-+              cur += strlen(cur) + 1;
-+      }
-+      (*output)[num] = NULL;
-+      return num;
-+}
-+
-+static int parse_exec_params(char ***argv, char ***envp)
-+{
-+      int ret;
-+      char *cmdline = NULL, *env = NULL;
-+      size_t cmdline_size, env_size;
-+
-+      cmdline = file_to_buf("/proc/self/cmdline", &cmdline_size);
-+      if (!cmdline)
-+              goto on_error;
-+
-+      env = file_to_buf("/proc/self/environ", &env_size);
-+      if (!env)
-+              goto on_error;
-+
-+      ret = push_vargs(cmdline, cmdline_size, argv);
-+      if (ret <= 0)
-+              goto on_error;
-+
-+      ret = push_vargs(env, env_size, envp);
-+      if (ret <= 0)
-+              goto on_error;
-+
-+      return 0;
-+
-+on_error:
-+      free(env);
-+      free(cmdline);
-+
-+      return -1;
-+}
-+
-+static int is_memfd(void)
-+{
-+      int fd, saved_errno, seals;
-+
-+      fd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC);
-+      if (fd < 0)
-+              return -ENOTRECOVERABLE;
-+
-+      seals = fcntl(fd, F_GET_SEALS);
-+      saved_errno = errno;
-+      close(fd);
-+      errno = saved_errno;
-+      if (seals < 0)
-+              return -EINVAL;
-+
-+      return seals == LXC_MEMFD_REXEC_SEALS;
-+}
-+
-+static void lxc_rexec_as_memfd(char **argv, char **envp, const char *memfd_name)
-+{
-+      int saved_errno;
-+      ssize_t bytes_sent;
-+      int fd = -1, memfd = -1;
-+
-+      memfd = memfd_create(memfd_name, MFD_ALLOW_SEALING | MFD_CLOEXEC);
-+      if (memfd < 0)
-+              return;
-+
-+      fd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC);
-+      if (fd < 0)
-+              goto on_error;
-+
-+      /* sendfile() handles up to 2GB. */
-+      bytes_sent = lxc_sendfile_nointr(memfd, fd, NULL, LXC_SENDFILE_MAX);
-+      saved_errno = errno;
-+      close(fd);
-+      errno = saved_errno;
-+      if (bytes_sent < 0)
-+              goto on_error;
-+
-+      if (fcntl(memfd, F_ADD_SEALS, LXC_MEMFD_REXEC_SEALS))
-+              goto on_error;
-+
-+      fexecve(memfd, argv, envp);
-+
-+on_error:
-+      saved_errno = errno;
-+      close(memfd);
-+      errno = saved_errno;
-+}
-+
-+static int lxc_rexec(const char *memfd_name)
-+{
-+      int ret;
-+      char **argv = NULL, **envp = NULL;
-+
-+      ret = is_memfd();
-+      if (ret < 0 && ret == -ENOTRECOVERABLE) {
-+              fprintf(stderr,
-+                      "%s - Failed to determine whether this is a memfd\n",
-+                      strerror(errno));
-+              return -1;
-+      } else if (ret > 0) {
-+              return 0;
-+      }
-+
-+      ret = parse_exec_params(&argv, &envp);
-+      if (ret < 0) {
-+              fprintf(stderr,
-+                      "%s - Failed to parse command line parameters\n",
-+                      strerror(errno));
-+              return -1;
-+      }
-+
-+      lxc_rexec_as_memfd(argv, envp, memfd_name);
-+      fprintf(stderr, "%s - Failed to rexec as memfd\n", strerror(errno));
-+      return -1;
-+}
-+
-+/**
-+ * This function will copy any binary that calls liblxc into a memory file and
-+ * will use the memfd to rexecute the binary. This is done to prevent attacks
-+ * through the /proc/self/exe symlink to corrupt the host binary when host and
-+ * container are in the same user namespace or have set up an identity id
-+ * mapping: CVE-2019-5736.
-+ */
-+__attribute__((constructor)) static void liblxc_rexec(void)
-+{
-+      if (lxc_rexec("liblxc")) {
-+              fprintf(stderr, "Failed to re-execute liblxc via memory file descriptor\n");
-+              _exit(EXIT_FAILURE);
-+      }
-+}
-diff --git a/src/lxc/syscall_wrappers.h b/src/lxc/syscall_wrappers.h
-index 42d94db28..dca4d1571 100644
---- a/src/lxc/syscall_wrappers.h
-+++ b/src/lxc/syscall_wrappers.h
-@@ -58,6 +58,20 @@ static inline long __keyctl(int cmd, unsigned long arg2, unsigned long arg3,
- #define keyctl __keyctl
- #endif
-+#ifndef F_LINUX_SPECIFIC_BASE
-+#define F_LINUX_SPECIFIC_BASE 1024
-+#endif
-+#ifndef F_ADD_SEALS
-+#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
-+#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
-+#endif
-+#ifndef F_SEAL_SEAL
-+#define F_SEAL_SEAL 0x0001
-+#define F_SEAL_SHRINK 0x0002
-+#define F_SEAL_GROW 0x0004
-+#define F_SEAL_WRITE 0x0008
-+#endif
-+
- #ifndef HAVE_MEMFD_CREATE
- static inline int memfd_create(const char *name, unsigned int flags) {
-       #ifndef __NR_memfd_create
--- 
-2.20.1
-
diff --git a/debian/patches/extra/0004-apparmor-generate-ro-bind-remount-rule-list.patch b/debian/patches/extra/0004-apparmor-generate-ro-bind-remount-rule-list.patch
deleted file mode 100644 (file)
index ecc1ca3..0000000
+++ /dev/null
@@ -1,169 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Fri, 2 Aug 2019 12:57:42 +0200
-Subject: [PATCH] apparmor: generate ro,bind,remount rule list
-
-and update to changes based on lxd
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
----
- src/lxc/lsm/apparmor.c | 114 ++++++++++++++++++++++++++++++++++++-----
- 1 file changed, 100 insertions(+), 14 deletions(-)
-
-diff --git a/src/lxc/lsm/apparmor.c b/src/lxc/lsm/apparmor.c
-index e32b12531..47f825866 100644
---- a/src/lxc/lsm/apparmor.c
-+++ b/src/lxc/lsm/apparmor.c
-@@ -149,6 +149,16 @@ static const char AA_PROFILE_BASE[] =
- "#  mount options=(rw,make-unbindable) -> **,\n"
- "#  mount options=(rw,make-runbindable) -> **,\n"
- "\n"
-+"# Allow limited modification of mount propagation\n"
-+"  mount options=(rw,make-slave) -> /,\n"
-+"  mount options=(rw,make-rslave) -> /,\n"
-+"  mount options=(rw,make-shared) -> /,\n"
-+"  mount options=(rw,make-rshared) -> /,\n"
-+"  mount options=(rw,make-private) -> /,\n"
-+"  mount options=(rw,make-rprivate) -> /,\n"
-+"  mount options=(rw,make-unbindable) -> /,\n"
-+"  mount options=(rw,make-runbindable) -> /,\n"
-+"\n"
- "  # allow bind-mounts of anything except /proc, /sys and /dev\n"
- "  mount options=(rw,bind) /[^spd]*{,/**},\n"
- "  mount options=(rw,bind) /d[^e]*{,/**},\n"
-@@ -167,15 +177,18 @@ static const char AA_PROFILE_BASE[] =
- "  mount options=(rw,bind) /sy[^s]*{,/**},\n"
- "  mount options=(rw,bind) /sys?*{,/**},\n"
- "\n"
--"  # allow various ro-bind-*re*-mounts\n"
--"  mount options=(ro,remount,bind),\n"
--"  mount options=(ro,remount,bind,nosuid),\n"
--"  mount options=(ro,remount,bind,noexec),\n"
--"  mount options=(ro,remount,bind,nodev),\n"
--"  mount options=(ro,remount,bind,nosuid,noexec),\n"
--"  mount options=(ro,remount,bind,noexec,nodev),\n"
--"  mount options=(ro,remount,bind,nodev,nosuid),\n"
--"  mount options=(ro,remount,bind,nosuid,noexec,nodev),\n"
-+"  # Allow rbind-mounts of anything except /, /dev, /proc and /sys\n"
-+"  mount options=(rw,rbind) /[^spd]*{,/**},\n"
-+"  mount options=(rw,rbind) /d[^e]*{,/**},\n"
-+"  mount options=(rw,rbind) /de[^v]*{,/**},\n"
-+"  mount options=(rw,rbind) /dev?*{,/**},\n"
-+"  mount options=(rw,rbind) /p[^r]*{,/**},\n"
-+"  mount options=(rw,rbind) /pr[^o]*{,/**},\n"
-+"  mount options=(rw,rbind) /pro[^c]*{,/**},\n"
-+"  mount options=(rw,rbind) /proc?*{,/**},\n"
-+"  mount options=(rw,rbind) /s[^y]*{,/**},\n"
-+"  mount options=(rw,rbind) /sy[^s]*{,/**},\n"
-+"  mount options=(rw,rbind) /sys?*{,/**},\n"
- "\n"
- "  # allow moving mounts except for /proc, /sys and /dev\n"
- "  mount options=(rw,move) /[^spd]*{,/**},\n"
-@@ -341,12 +354,13 @@ static const char AA_PROFILE_NESTING_BASE[] =
- "\n"
- "  mount fstype=proc -> /usr/lib/*/lxc/**,\n"
- "  mount fstype=sysfs -> /usr/lib/*/lxc/**,\n"
--"  mount options=(rw,bind),\n"
--"  mount options=(rw,rbind),\n"
--"  mount options=(rw,make-rshared),\n"
- "\n"
--   /* FIXME: What's the state here on apparmor's side? */
--"  # there doesn't seem to be a way to ask for:\n"
-+"  # Allow nested LXD\n"
-+"  mount none -> /var/lib/lxd/shmounts/,\n"
-+"  mount /var/lib/lxd/shmounts/ -> /var/lib/lxd/shmounts/,\n"
-+"  mount options=bind /var/lib/lxd/shmounts/** -> /var/lib/lxd/**,\n"
-+"\n"
-+"  # FIXME: There doesn't seem to be a way to ask for:\n"
- "  # mount options=(ro,nosuid,nodev,noexec,remount,bind),\n"
- "  # as we always get mount to $cdir/proc/sys with those flags denied\n"
- "  # So allow all mounts until that is straightened out:\n"
-@@ -648,6 +662,76 @@ static bool is_privileged(struct lxc_conf *conf)
-       return lxc_list_empty(&conf->id_map);
- }
-+static const char* AA_ALL_DEST_PATH_LIST[] = {
-+      " -> /[^spd]*{,/**},\n",
-+      " -> /d[^e]*{,/**},\n",
-+      " -> /de[^v]*{,/**},\n",
-+      " -> /dev/.[^l]*{,/**},\n",
-+      " -> /dev/.l[^x]*{,/**},\n",
-+      " -> /dev/.lx[^c]*{,/**},\n",
-+      " -> /dev/.lxc?*{,/**},\n",
-+      " -> /dev/[^.]*{,/**},\n",
-+      " -> /dev?*{,/**},\n",
-+      " -> /p[^r]*{,/**},\n",
-+      " -> /pr[^o]*{,/**},\n",
-+      " -> /pro[^c]*{,/**},\n",
-+      " -> /proc?*{,/**},\n",
-+      " -> /s[^y]*{,/**},\n",
-+      " -> /sy[^s]*{,/**},\n",
-+      " -> /sys?*{,/**},\n",
-+      NULL,
-+};
-+
-+static void append_remount_rule(char **profile, size_t *size, const char *rule)
-+{
-+      size_t rule_len = strlen(rule);
-+
-+      for (const char **dest = AA_ALL_DEST_PATH_LIST; *dest; ++dest) {
-+              must_append_sized(profile, size, rule, rule_len);
-+              must_append_sized(profile, size, *dest, strlen(*dest));
-+      }
-+}
-+
-+static void append_all_remount_rules(char **profile, size_t *size)
-+{
-+      must_append_sized(profile, size,
-+                        "# allow various ro-bind-*re*mounts\n",
-+                        sizeof("# allow various ro-bind-*re*mounts\n")-1);
-+
-+      static struct mntopt_t {
-+              const char *opt;
-+              size_t len;
-+      } mnt_opt_list[] = {
-+              { ",nodev", sizeof(",nodev")-1 },
-+              { ",nosuid", sizeof(",nosuid")-1 },
-+              { ",noexec", sizeof(",noexec")-1 },
-+      };
-+
-+      const size_t opt_count = sizeof(mnt_opt_list) / sizeof(mnt_opt_list[0]);
-+
-+      char buf[128] = "mount options=(ro,remount,bind";
-+      const size_t start = strlen(buf);
-+      for (size_t i = 0; i != 1 << opt_count; ++i) {
-+              size_t at = start;
-+              unsigned opt_bit = 1;
-+
-+              for (size_t o = 0; o != opt_count; ++o, opt_bit <<= 1) {
-+                      if (i & opt_bit) {
-+                              struct mntopt_t *opt = &mnt_opt_list[o];
-+                              memcpy(&buf[at], opt->opt, opt->len);
-+                              at += opt->len;
-+                      }
-+              }
-+
-+              memcpy(&buf[at], ")", sizeof(")"));
-+              append_remount_rule(profile, size, buf);
-+              memcpy(&buf[at], ",noatime)", sizeof(",noatime)"));
-+              append_remount_rule(profile, size, buf);
-+              memcpy(&buf[at], ",strictatime)", sizeof(",strictatime)"));
-+              append_remount_rule(profile, size, buf);
-+      }
-+}
-+
- static char *get_apparmor_profile_content(struct lxc_conf *conf, const char *lxcpath)
- {
-       char *profile, *profile_name_full;
-@@ -665,6 +749,8 @@ static char *get_apparmor_profile_content(struct lxc_conf *conf, const char *lxc
-       must_append_sized(&profile, &size, AA_PROFILE_BASE,
-                         STRARRAYLEN(AA_PROFILE_BASE));
-+      append_all_remount_rules(&profile, &size);
-+
-       if (aa_supports_unix)
-               must_append_sized(&profile, &size, AA_PROFILE_UNIX_SOCKETS,
-                                 STRARRAYLEN(AA_PROFILE_UNIX_SOCKETS));
--- 
-2.20.1
-
diff --git a/debian/patches/extra/0005-attach-don-t-close-stdout-of-getent.patch b/debian/patches/extra/0005-attach-don-t-close-stdout-of-getent.patch
deleted file mode 100644 (file)
index 02568fa..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Tue, 13 Aug 2019 14:17:30 +0200
-Subject: [PATCH] attach: don't close stdout of getent
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
----
- src/lxc/attach.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/lxc/attach.c b/src/lxc/attach.c
-index 8b34a412e..6075688c2 100644
---- a/src/lxc/attach.c
-+++ b/src/lxc/attach.c
-@@ -488,7 +488,7 @@ static char *lxc_attach_getpwshell(uid_t uid)
-                       close(STDERR_FILENO);
-               } else {
-                       (void)dup3(fd, STDIN_FILENO, O_CLOEXEC);
--                      (void)dup3(fd, STDOUT_FILENO, O_CLOEXEC);
-+                      (void)dup3(fd, STDERR_FILENO, O_CLOEXEC);
-                       close(fd);
-               }
--- 
-2.20.1
-
index 070c5e4068bfc764634671c23ba68857ed7173fa..bc4099d5d2c01dd17fa401c23b7bec614d47f99d 100644 (file)
@@ -11,10 +11,10 @@ Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
  1 file changed, 5 insertions(+)
 
 diff --git a/src/lxc/network.c b/src/lxc/network.c
-index d0f14e632..9337ad4d9 100644
+index 65727f6b5..cd8d0bb14 100644
 --- a/src/lxc/network.c
 +++ b/src/lxc/network.c
-@@ -195,6 +195,11 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
+@@ -503,6 +503,11 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
                                netdev->upscript, "up", argv);
                if (err < 0)
                        goto out_delete;
index 74835c3ed433e7bc61e08ccb4da21e3d79472f66..9040b430ecc736870f410107447922e83cc720fa 100644 (file)
@@ -38,10 +38,10 @@ index 077476559..fbd70fdf5 100644
    # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.
  #  mount options=(rw,make-slave) -> **,
 diff --git a/config/apparmor/abstractions/container-base.in b/config/apparmor/abstractions/container-base.in
-index 1a3ead89a..39abf348c 100644
+index 2606fb64c..3e61c62ea 100644
 --- a/config/apparmor/abstractions/container-base.in
 +++ b/config/apparmor/abstractions/container-base.in
-@@ -82,7 +82,6 @@
+@@ -83,7 +83,6 @@
    deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,
    mount fstype=proc -> /proc/,
    mount fstype=sysfs -> /sys/,
@@ -49,7 +49,7 @@ index 1a3ead89a..39abf348c 100644
    deny /sys/firmware/efi/efivars/** rwklx,
    deny /sys/kernel/security/** rwklx,
    mount options=(ro, nosuid, nodev, noexec, remount, strictatime) -> /sys/fs/cgroup/,
-@@ -90,6 +89,11 @@
+@@ -91,6 +90,11 @@
    # deny reads from debugfs
    deny /sys/kernel/debug/{,**} rwklx,
  
diff --git a/debian/patches/pve/0004-PVE-Up-possibility-to-run-lxc-monitord-as-a-regular-.patch b/debian/patches/pve/0004-PVE-Up-possibility-to-run-lxc-monitord-as-a-regular-.patch
new file mode 100644 (file)
index 0000000..7f3e921
--- /dev/null
@@ -0,0 +1,207 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Mon, 20 Nov 2017 10:49:41 +0100
+Subject: [PATCH] PVE: [Up] possibility to run lxc-monitord as a regular daemon
+
+lxc-monitord instances are spawned on demand and, if this
+happens from a service, the daemon is considered part of
+it by systemd, as it is running in the same cgroups. This
+can be avoided by leaving it running permanently.
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+---
+ .gitignore                                  |  1 +
+ config/init/systemd/Makefile.am             | 10 ++--
+ config/init/systemd/lxc-monitord.service.in | 12 +++++
+ configure.ac                                |  1 +
+ lxc.spec.in                                 |  1 +
+ src/lxc/cmd/lxc_monitord.c                  | 60 +++++++++++++++------
+ 6 files changed, 64 insertions(+), 21 deletions(-)
+ create mode 100644 config/init/systemd/lxc-monitord.service.in
+
+diff --git a/.gitignore b/.gitignore
+index b2d4657c4..36d0b7013 100644
+--- a/.gitignore
++++ b/.gitignore
+@@ -119,6 +119,7 @@ config/bash/lxc
+ config/init/common/lxc-containers
+ config/init/common/lxc-net
+ config/init/systemd/lxc-autostart-helper
++config/init/systemd/lxc-monitord.service
+ config/init/systemd/lxc-net.service
+ config/init/systemd/lxc.service
+ config/init/systemd/lxc@.service
+diff --git a/config/init/systemd/Makefile.am b/config/init/systemd/Makefile.am
+index c448850d1..4a4fde5e7 100644
+--- a/config/init/systemd/Makefile.am
++++ b/config/init/systemd/Makefile.am
+@@ -2,19 +2,21 @@ EXTRA_DIST = \
+       lxc-apparmor-load \
+       lxc.service.in \
+       lxc@.service.in \
+-      lxc-net.service.in
++      lxc-net.service.in \
++      lxc-monitord.service.in
+ if INIT_SCRIPT_SYSTEMD
+-BUILT_SOURCES = lxc.service lxc@.service lxc-net.service
++BUILT_SOURCES = lxc.service lxc@.service lxc-net.service lxc-monitord.service
+-install-systemd: lxc.service lxc@.service lxc-net.service lxc-apparmor-load
++install-systemd: lxc.service lxc@.service lxc-net.service lxc-monitord.service lxc-apparmor-load
+       $(MKDIR_P) $(DESTDIR)$(SYSTEMD_UNIT_DIR)
+-      $(INSTALL_DATA) lxc.service lxc@.service lxc-net.service $(DESTDIR)$(SYSTEMD_UNIT_DIR)/
++      $(INSTALL_DATA) lxc.service lxc@.service lxc-net.service lxc-monitord.service $(DESTDIR)$(SYSTEMD_UNIT_DIR)/
+ uninstall-systemd:
+       rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc.service
+       rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc@.service
+       rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc-net.service
++      rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc-monitord.service
+       rmdir $(DESTDIR)$(SYSTEMD_UNIT_DIR) || :
+ pkglibexec_SCRIPTS = lxc-apparmor-load
+diff --git a/config/init/systemd/lxc-monitord.service.in b/config/init/systemd/lxc-monitord.service.in
+new file mode 100644
+index 000000000..406351688
+--- /dev/null
++++ b/config/init/systemd/lxc-monitord.service.in
+@@ -0,0 +1,12 @@
++[Unit]
++Description=LXC Container Monitoring Daemon
++After=syslog.service network.target
++
++[Service]
++Type=simple
++ExecStart=@LIBEXECDIR@/lxc/lxc-monitord --daemon
++StandardOutput=syslog
++StandardError=syslog
++
++[Install]
++WantedBy=multi-user.target
+diff --git a/configure.ac b/configure.ac
+index 645a2166d..6260f483f 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -796,6 +796,7 @@ AC_CONFIG_FILES([
+       config/init/systemd/lxc.service
+       config/init/systemd/lxc@.service
+       config/init/systemd/lxc-net.service
++      config/init/systemd/lxc-monitord.service
+       config/init/sysvinit/Makefile
+       config/init/sysvinit/lxc-containers
+       config/init/sysvinit/lxc-net
+diff --git a/lxc.spec.in b/lxc.spec.in
+index ec6321c33..ea6789fb6 100644
+--- a/lxc.spec.in
++++ b/lxc.spec.in
+@@ -251,6 +251,7 @@ fi
+ %{_unitdir}/lxc-net.service
+ %{_unitdir}/lxc.service
+ %{_unitdir}/lxc@.service
++%{_unitdir}/lxc-monitord.service
+ %else
+ %{_sysconfdir}/rc.d/init.d/lxc
+ %{_sysconfdir}/rc.d/init.d/lxc-net
+diff --git a/src/lxc/cmd/lxc_monitord.c b/src/lxc/cmd/lxc_monitord.c
+index 3b931b361..d3cc35978 100644
+--- a/src/lxc/cmd/lxc_monitord.c
++++ b/src/lxc/cmd/lxc_monitord.c
+@@ -359,17 +359,44 @@ static void lxc_monitord_sig_handler(int sig)
+ int main(int argc, char *argv[])
+ {
+-      int ret, pipefd;
++      int ret, pipefd = -1;
+       char logpath[PATH_MAX];
+       sigset_t mask;
+-      char *lxcpath = argv[1];
++      const char *lxcpath = NULL;
+       bool mainloop_opened = false;
+       bool monitord_created = false;
++      bool persistent = false;
+       struct lxc_log log;
+-      if (argc != 3) {
++      if (argc > 1 && !strcmp(argv[1], "--daemon")) {
++              persistent = true;
++              --argc;
++              ++argv;
++      }
++
++      if (argc > 1) {
++              lxcpath = argv[1];
++              --argc;
++              ++argv;
++      } else {
++              lxcpath = lxc_global_config_value("lxc.lxcpath");
++              if (!lxcpath) {
++                      ERROR("Out of memory getting lxcpath");
++                      exit(EXIT_FAILURE);
++              }
++      }
++
++      if (argc > 1) {
++              if (lxc_safe_int(argv[1], &pipefd) < 0)
++                      exit(EXIT_FAILURE);
++              --argc;
++              ++argv;
++      }
++
++      if (argc != 1 || (persistent != (pipefd == -1))) {
+               fprintf(stderr,
+-                      "Usage: lxc-monitord lxcpath sync-pipe-fd\n\n"
++                      "Usage: lxc-monitord lxcpath sync-pipe-fd\n"
++                      "       lxc-monitord --daemon lxcpath\n\n"
+                       "NOTE: lxc-monitord is intended for use by lxc internally\n"
+                       "      and does not need to be run by hand\n\n");
+               exit(EXIT_FAILURE);
+@@ -392,9 +419,6 @@ int main(int argc, char *argv[])
+               INFO("Failed to open log file %s, log will be lost", lxcpath);
+       lxc_log_options_no_override();
+-      if (lxc_safe_int(argv[2], &pipefd) < 0)
+-              exit(EXIT_FAILURE);
+-
+       if (sigfillset(&mask) ||
+           sigdelset(&mask, SIGILL)  ||
+           sigdelset(&mask, SIGSEGV) ||
+@@ -427,15 +451,17 @@ int main(int argc, char *argv[])
+               goto on_error;
+       monitord_created = true;
+-      /* sync with parent, we're ignoring the return from write
+-       * because regardless if it works or not, the following
+-       * close will sync us with the parent process. the
+-       * if-empty-statement construct is to quiet the
+-       * warn-unused-result warning.
+-       */
+-      if (lxc_write_nointr(pipefd, "S", 1))
+-              ;
+-      close(pipefd);
++      if (pipefd != -1) {
++              /* sync with parent, we're ignoring the return from write
++               * because regardless if it works or not, the following
++               * close will sync us with the parent process. the
++               * if-empty-statement construct is to quiet the
++               * warn-unused-result warning.
++               */
++              if (lxc_write_nointr(pipefd, "S", 1))
++                      ;
++              close(pipefd);
++      }
+       if (lxc_monitord_mainloop_add(&monitor)) {
+               ERROR("Failed to add mainloop handlers");
+@@ -446,7 +472,7 @@ int main(int argc, char *argv[])
+              lxc_raw_getpid(), monitor.lxcpath);
+       for (;;) {
+-              ret = lxc_mainloop(&monitor.descr, 1000 * 30);
++              ret = lxc_mainloop(&monitor.descr, persistent ? -1 : 1000 * 30);
+               if (ret) {
+                       ERROR("mainloop returned an error");
+                       break;
+-- 
+2.20.1
+
diff --git a/debian/patches/pve/0004-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch b/debian/patches/pve/0004-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch
deleted file mode 100644 (file)
index e5670cf..0000000
+++ /dev/null
@@ -1,565 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Wed, 28 Mar 2018 13:37:28 +0200
-Subject: [PATCH] PVE: [Up] separate the limiting from the namespaced cgroup
- root
-
-When cgroup namespaces are enabled a privileged container
-with mixed cgroups has full write access to its own root
-cgroup effectively allowing it to overwrite values written
-from the outside or configured via lxc.cgroup.*.
-
-This patch causes an additional 'ns/' directory to be
-created in all cgroups if cgroup namespaces and cgfsng are
-being used in order to combat this.
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
----
- src/lxc/cgroups/cgfsng.c | 94 +++++++++++++++++++++++++++++++++-------
- src/lxc/cgroups/cgroup.h | 18 ++++++--
- src/lxc/commands.c       | 87 ++++++++++++++++++++++++++++---------
- src/lxc/commands.h       |  2 +
- src/lxc/criu.c           |  4 +-
- src/lxc/start.c          | 28 +++++++++---
- 6 files changed, 183 insertions(+), 50 deletions(-)
-
-diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
-index ab99b47c5..ac8f469bb 100644
---- a/src/lxc/cgroups/cgfsng.c
-+++ b/src/lxc/cgroups/cgfsng.c
-@@ -818,6 +818,7 @@ static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char
-       new->mountpoint = mountpoint;
-       new->container_base_path = container_base_path;
-       new->container_full_path = NULL;
-+      new->container_inner_path = NULL;
-       new->monitor_full_path = NULL;
-       new->version = type;
-@@ -1059,6 +1060,9 @@ static int cgroup_rmdir(struct hierarchy **hierarchies,
-               free(h->container_full_path);
-               h->container_full_path = NULL;
-+
-+              free(h->container_inner_path);
-+              h->container_inner_path = NULL;
-       }
-       return 0;
-@@ -1070,6 +1074,7 @@ struct generic_userns_exec_data {
-       struct lxc_conf *conf;
-       uid_t origuid; /* target uid in parent namespace */
-       char *path;
-+      bool inner;
- };
- static int cgroup_rmdir_wrapper(void *data)
-@@ -1112,6 +1117,7 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
-       wrap.container_cgroup = ops->container_cgroup;
-       wrap.hierarchies = ops->hierarchies;
-       wrap.conf = handler->conf;
-+      wrap.inner = false;
-       if (handler->conf && !lxc_list_empty(&handler->conf->id_map))
-               ret = userns_exec_1(handler->conf, cgroup_rmdir_wrapper, &wrap,
-@@ -1323,17 +1329,26 @@ static bool monitor_create_path_for_hierarchy(struct hierarchy *h, char *cgname)
-       return cg_unified_create_cgroup(h, cgname);
- }
--static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgname)
-+static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgname, bool inner)
- {
-       int ret;
-+      char *path;
--      if (!cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
-+      if (!inner && !cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
-               ERROR("Failed to handle legacy cpuset controller");
-               return false;
-       }
--      h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
--      ret = mkdir_eexist_on_last(h->container_full_path, 0755);
-+      if (inner) {
-+              path = must_make_path(h->container_full_path, CGROUP_NAMESPACE_SUBDIR, NULL);
-+              h->container_inner_path = path;
-+              ret = mkdir(path, 0755);
-+      } else {
-+              path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
-+              h->container_full_path = path;
-+              ret = mkdir_eexist_on_last(path, 0755);
-+      }
-+
-       if (ret < 0) {
-               ERROR("Failed to create cgroup \"%s\"", h->container_full_path);
-               return false;
-@@ -1425,11 +1440,29 @@ on_error:
-       return bret;
- }
-+static inline bool cgfsng_create_inner(struct cgroup_ops *ops)
-+{
-+      size_t i;
-+      bool ret = true;
-+      char *cgname = must_make_path(ops->container_cgroup, CGROUP_NAMESPACE_SUBDIR, NULL);
-+      for (i = 0; ops->hierarchies[i]; i++) {
-+              if (!container_create_path_for_hierarchy(ops->hierarchies[i], cgname, true)) {
-+                      SYSERROR("Failed to create %s namespace subdirectory: %s",
-+                               ops->hierarchies[i]->container_full_path, strerror(errno));
-+                      ret = false;
-+                      break;
-+              }
-+      }
-+      free(cgname);
-+      return ret;
-+}
-+
- /* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern;
-  * next cgroup_pattern-1, -2, ..., -999.
-  */
- __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
--                                                      struct lxc_handler *handler)
-+                                                      struct lxc_handler *handler,
-+                                                      bool inner)
- {
-       int i;
-       size_t len;
-@@ -1438,10 +1471,17 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
-       struct lxc_conf *conf = handler->conf;
-       if (ops->container_cgroup) {
-+              if (inner)
-+                      return cgfsng_create_inner(ops);
-               WARN("cgfsng_create called a second time: %s", ops->container_cgroup);
-               return false;
-       }
-+      if (inner) {
-+              ERROR("cgfsng_create called twice for inner cgroup");
-+              return false;
-+      }
-+
-       if (!conf)
-               return false;
-@@ -1482,7 +1522,7 @@ again:
-       }
-       for (i = 0; ops->hierarchies[i]; i++) {
--              if (!container_create_path_for_hierarchy(ops->hierarchies[i], container_cgroup)) {
-+              if (!container_create_path_for_hierarchy(ops->hierarchies[i], container_cgroup, false)) {
-                       ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->container_full_path);
-                       free(ops->hierarchies[i]->container_full_path);
-                       ops->hierarchies[i]->container_full_path = NULL;
-@@ -1505,7 +1545,8 @@ out_free:
- }
- __cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid,
--                                           bool monitor)
-+                                           bool monitor,
-+                                           bool inner)
- {
-       int len;
-       char pidstr[INTTYPE_TO_STRLEN(pid_t)];
-@@ -1521,6 +1562,9 @@ __cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid,
-               if (monitor)
-                       path = must_make_path(ops->hierarchies[i]->monitor_full_path,
-                                             "cgroup.procs", NULL);
-+              else if (inner)
-+                      path = must_make_path(ops->hierarchies[i]->container_inner_path,
-+                                            "cgroup.procs", NULL);
-               else
-                       path = must_make_path(ops->hierarchies[i]->container_full_path,
-                                             "cgroup.procs", NULL);
-@@ -1538,12 +1582,12 @@ __cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid,
- __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, pid_t pid)
- {
--      return __do_cgroup_enter(ops, pid, true);
-+      return __do_cgroup_enter(ops, pid, true, false);
- }
--static bool cgfsng_payload_enter(struct cgroup_ops *ops, pid_t pid)
-+static bool cgfsng_payload_enter(struct cgroup_ops *ops, pid_t pid, bool inner)
- {
--      return __do_cgroup_enter(ops, pid, false);
-+      return __do_cgroup_enter(ops, pid, false, inner);
- }
- static int chowmod(char *path, uid_t chown_uid, gid_t chown_gid,
-@@ -1609,9 +1653,15 @@ static int chown_cgroup_wrapper(void *data)
-               char *fullpath;
-               char *path = arg->hierarchies[i]->container_full_path;
-+              if (arg->inner)
-+                      path = must_make_path(path, CGROUP_NAMESPACE_SUBDIR, NULL);
-+
-               ret = chowmod(path, destuid, nsgid, 0775);
--              if (ret < 0)
-+              if (ret < 0) {
-+                      if (arg->inner)
-+                              free(path);
-                       return -1;
-+              }
-               /* Failures to chown() these are inconvenient but not
-                * detrimental We leave these owned by the container launcher,
-@@ -1630,8 +1680,11 @@ static int chown_cgroup_wrapper(void *data)
-               (void)chowmod(fullpath, destuid, nsgid, 0664);
-               free(fullpath);
--              if (arg->hierarchies[i]->version != CGROUP2_SUPER_MAGIC)
-+              if (arg->hierarchies[i]->version != CGROUP2_SUPER_MAGIC) {
-+                      if (arg->inner)
-+                              free(path);
-                       continue;
-+              }
-               fullpath = must_make_path(path, "cgroup.subtree_control", NULL);
-               (void)chowmod(fullpath, destuid, nsgid, 0664);
-@@ -1640,13 +1693,17 @@ static int chown_cgroup_wrapper(void *data)
-               fullpath = must_make_path(path, "cgroup.threads", NULL);
-               (void)chowmod(fullpath, destuid, nsgid, 0664);
-               free(fullpath);
-+
-+              if (arg->inner)
-+                      free(path);
-       }
-       return 0;
- }
- __cgfsng_ops static bool cgfsng_chown(struct cgroup_ops *ops,
--                                      struct lxc_conf *conf)
-+                                      struct lxc_conf *conf,
-+                                      bool inner)
- {
-       struct generic_userns_exec_data wrap;
-@@ -1657,6 +1714,7 @@ __cgfsng_ops static bool cgfsng_chown(struct cgroup_ops *ops,
-       wrap.path = NULL;
-       wrap.hierarchies = ops->hierarchies;
-       wrap.conf = conf;
-+      wrap.inner = inner;
-       if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap,
-                         "chown_cgroup_wrapper") < 0) {
-@@ -2038,7 +2096,8 @@ __cgfsng_ops static bool cgfsng_unfreeze(struct cgroup_ops *ops)
- }
- __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
--                                                  const char *controller)
-+                                                  const char *controller,
-+                                                  bool inner)
- {
-       struct hierarchy *h;
-@@ -2049,6 +2108,9 @@ __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
-               return NULL;
-       }
-+      if (inner)
-+              return h->container_inner_path ? h->container_inner_path + strlen(h->mountpoint) : NULL;
-+
-       return h->container_full_path ? h->container_full_path + strlen(h->mountpoint) : NULL;
- }
-@@ -2080,7 +2142,7 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name,
-       int fret = -1, idx = 0;
-       char *base_path = NULL, *container_cgroup = NULL, *full_path = NULL;
--      container_cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
-+      container_cgroup = lxc_cmd_get_attach_cgroup_path(name, lxcpath, controller);
-       /* not running */
-       if (!container_cgroup)
-               return 0;
-@@ -2161,7 +2223,7 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, const char *name,
-                       continue;
-               }
--              path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]);
-+              path = lxc_cmd_get_attach_cgroup_path(name, lxcpath, h->controllers[0]);
-               /* not running */
-               if (!path)
-                       continue;
-diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
-index d4dcd506b..59445b5a5 100644
---- a/src/lxc/cgroups/cgroup.h
-+++ b/src/lxc/cgroups/cgroup.h
-@@ -32,6 +32,12 @@
- #define MONITOR_CGROUP "lxc.monitor"
- #define PIVOT_CGROUP "lxc.pivot"
-+/* When lxc.cgroup.protect_limits is in effect the container's cgroup namespace
-+ * will be moved into an additional subdirectory "cgns/" inside the cgroup in
-+ * order to prevent it from accessing the outer limiting cgroup.
-+ */
-+#define CGROUP_NAMESPACE_SUBDIR "cgns"
-+
- struct lxc_handler;
- struct lxc_conf;
- struct lxc_list;
-@@ -72,6 +78,9 @@ typedef enum {
-  * @monitor_full_path
-  * - The full path to the monitor's cgroup.
-  *
-+ * @container_inner_path
-+ * - The full path to the container's inner cgroup when protect_limits is used.
-+ *
-  * @version
-  * - legacy hierarchy
-  *   If the hierarchy is a legacy hierarchy this will be set to
-@@ -85,6 +94,7 @@ struct hierarchy {
-       char *mountpoint;
-       char *container_base_path;
-       char *container_full_path;
-+      char *container_inner_path;
-       char *monitor_full_path;
-       int version;
- };
-@@ -139,9 +149,9 @@ struct cgroup_ops {
-       void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
-       bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
-       bool (*monitor_enter)(struct cgroup_ops *ops, pid_t pid);
--      bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
--      bool (*payload_enter)(struct cgroup_ops *ops, pid_t pid);
--      const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller);
-+      bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler, bool inner);
-+      bool (*payload_enter)(struct cgroup_ops *ops, pid_t pid, bool inner);
-+      const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller, bool inner);
-       bool (*escape)(const struct cgroup_ops *ops, struct lxc_conf *conf);
-       int (*num_hierarchies)(struct cgroup_ops *ops);
-       bool (*get_hierarchies)(struct cgroup_ops *ops, int n, char ***out);
-@@ -152,7 +162,7 @@ struct cgroup_ops {
-       bool (*unfreeze)(struct cgroup_ops *ops);
-       bool (*setup_limits)(struct cgroup_ops *ops, struct lxc_conf *conf,
-                            bool with_devices);
--      bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf);
-+      bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf, bool inner);
-       bool (*attach)(struct cgroup_ops *ops, const char *name,
-                      const char *lxcpath, pid_t pid);
-       bool (*mount)(struct cgroup_ops *ops, struct lxc_handler *handler,
-diff --git a/src/lxc/commands.c b/src/lxc/commands.c
-index 133384d72..b41a76000 100644
---- a/src/lxc/commands.c
-+++ b/src/lxc/commands.c
-@@ -427,20 +427,8 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
-       return lxc_cmd_rsp_send(fd, &rsp);
- }
--/*
-- * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
-- * particular subsystem. This is the cgroup path relative to the root
-- * of the cgroup filesystem.
-- *
-- * @name      : name of container to connect to
-- * @lxcpath   : the lxcpath in which the container is running
-- * @subsystem : the subsystem being asked about
-- *
-- * Returns the path on success, NULL on failure. The caller must free() the
-- * returned path.
-- */
--char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
--                            const char *subsystem)
-+char *do_lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
-+                            const char *subsystem, bool inner)
- {
-       int ret, stopped;
-       struct lxc_cmd_rr cmd = {
-@@ -453,8 +441,18 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
-       cmd.req.data = subsystem;
-       cmd.req.datalen = 0;
--      if (subsystem)
--              cmd.req.datalen = strlen(subsystem) + 1;
-+      if (subsystem) {
-+              size_t subsyslen = strlen(subsystem);
-+              if (inner) {
-+                      char *data = alloca(subsyslen+2);
-+                      memcpy(data, subsystem, subsyslen+1);
-+                      data[subsyslen+1] = 1;
-+                      cmd.req.datalen = subsyslen+2,
-+                      cmd.req.data = data;
-+              } else {
-+                      cmd.req.datalen = subsyslen+1;
-+              }
-+      }
-       ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
-       if (ret < 0)
-@@ -469,6 +467,42 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
-       return cmd.rsp.data;
- }
-+/*
-+ * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
-+ * particular subsystem. This is the cgroup path relative to the root
-+ * of the cgroup filesystem.
-+ *
-+ * @name      : name of container to connect to
-+ * @lxcpath   : the lxcpath in which the container is running
-+ * @subsystem : the subsystem being asked about
-+ *
-+ * Returns the path on success, NULL on failure. The caller must free() the
-+ * returned path.
-+ */
-+char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
-+      const char *subsystem)
-+{
-+      return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, false);
-+}
-+
-+/*
-+ * lxc_cmd_get_attach_cgroup_path: Calculate a container's inner cgroup path
-+ * for a particular subsystem. This is the cgroup path relative to the root
-+ * of the cgroup filesystem.
-+ *
-+ * @name      : name of container to connect to
-+ * @lxcpath   : the lxcpath in which the container is running
-+ * @subsystem : the subsystem being asked about
-+ *
-+ * Returns the path on success, NULL on failure. The caller must free() the
-+ * returned path.
-+ */
-+char *lxc_cmd_get_attach_cgroup_path(const char *name, const char *lxcpath,
-+      const char *subsystem)
-+{
-+      return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, true);
-+}
-+
- static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
-                                      struct lxc_handler *handler)
- {
-@@ -476,10 +510,21 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
-       struct lxc_cmd_rsp rsp;
-       struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
--      if (req->datalen > 0)
--              path = cgroup_ops->get_cgroup(cgroup_ops, req->data);
--      else
--              path = cgroup_ops->get_cgroup(cgroup_ops, NULL);
-+      if (req->datalen > 0) {
-+              const char *subsystem;
-+              size_t subsyslen;
-+              bool inner = false;
-+              subsystem = req->data;
-+              subsyslen = strlen(subsystem);
-+              if (req->datalen == subsyslen+2)
-+                      inner = (subsystem[subsyslen+1] == 1);
-+
-+              path = cgroup_ops->get_cgroup(cgroup_ops, req->data, inner);
-+      } else {
-+              // FIXME: cgroup separation for cgroup v2 cannot be handled
-+              // like we used to do v1 here... need to figure this out...
-+              path = cgroup_ops->get_cgroup(cgroup_ops, NULL, false);
-+      }
-       if (!path)
-               return -1;
-@@ -651,7 +696,7 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req,
-                * lxc_unfreeze() would do another cmd (GET_CGROUP) which would
-                * deadlock us.
-                */
--              if (!cgroup_ops->get_cgroup(cgroup_ops, "freezer"))
-+              if (!cgroup_ops->get_cgroup(cgroup_ops, "freezer", false))
-                       return 0;
-               if (cgroup_ops->unfreeze(cgroup_ops))
-diff --git a/src/lxc/commands.h b/src/lxc/commands.h
-index 2c024b65d..7c4c00b1e 100644
---- a/src/lxc/commands.h
-+++ b/src/lxc/commands.h
-@@ -88,6 +88,8 @@ extern int lxc_cmd_console(const char *name, int *ttynum, int *fd,
-  */
- extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
-                       const char *subsystem);
-+extern char *lxc_cmd_get_attach_cgroup_path(const char *name,
-+                      const char *lxcpath, const char *subsystem);
- extern int lxc_cmd_get_clone_flags(const char *name, const char *lxcpath);
- extern char *lxc_cmd_get_config_item(const char *name, const char *item, const char *lxcpath);
- extern char *lxc_cmd_get_name(const char *hashed_sock);
-diff --git a/src/lxc/criu.c b/src/lxc/criu.c
-index 3d857b541..ec9bcb7e4 100644
---- a/src/lxc/criu.c
-+++ b/src/lxc/criu.c
-@@ -332,7 +332,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
-               } else {
-                       const char *p;
--                      p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0]);
-+                      p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0], false);
-                       if (!p) {
-                               ERROR("failed to get cgroup path for %s", controllers[0]);
-                               goto err;
-@@ -976,7 +976,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_
-               goto out_fini_handler;
-       handler->cgroup_ops = cgroup_ops;
--      if (!cgroup_ops->payload_create(cgroup_ops, handler)) {
-+      if (!cgroup_ops->payload_create(cgroup_ops, handler, false)) {
-               ERROR("failed creating groups");
-               goto out_fini_handler;
-       }
-diff --git a/src/lxc/start.c b/src/lxc/start.c
-index dae3bcfe5..f3b29d6cd 100644
---- a/src/lxc/start.c
-+++ b/src/lxc/start.c
-@@ -1649,7 +1649,7 @@ static int lxc_spawn(struct lxc_handler *handler)
-               }
-       }
--      if (!cgroup_ops->payload_create(cgroup_ops, handler)) {
-+      if (!cgroup_ops->payload_create(cgroup_ops, handler, false)) {
-               ERROR("Failed creating cgroups");
-               goto out_delete_net;
-       }
-@@ -1743,10 +1743,10 @@ static int lxc_spawn(struct lxc_handler *handler)
-               goto out_delete_net;
-       }
--      if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid))
-+      if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, false))
-               goto out_delete_net;
--      if (!cgroup_ops->chown(cgroup_ops, handler->conf))
-+      if (!cgroup_ops->chown(cgroup_ops, handler->conf, false))
-               goto out_delete_net;
-       /* Now we're ready to preserve the network namespace */
-@@ -1813,16 +1813,30 @@ static int lxc_spawn(struct lxc_handler *handler)
-               }
-       }
--      ret = lxc_sync_barrier_child(handler, LXC_SYNC_CGROUP_UNSHARE);
--      if (ret < 0)
--              goto out_delete_net;
--
-       if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, true)) {
-               ERROR("Failed to setup legacy device cgroup controller limits");
-               goto out_delete_net;
-       }
-       TRACE("Set up legacy device cgroup controller limits");
-+      if (cgns_supported()) {
-+              if (!cgroup_ops->payload_create(cgroup_ops, handler, true)) {
-+                      ERROR("failed to create inner cgroup separation layer");
-+                      goto out_delete_net;
-+              }
-+              if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, true)) {
-+                      ERROR("failed to enter inner cgroup separation layer");
-+                      goto out_delete_net;
-+              }
-+              if (!cgroup_ops->chown(cgroup_ops, handler->conf, true)) {
-+                      ERROR("failed chown inner cgroup separation layer");
-+                      goto out_delete_net;
-+              }
-+      }
-+
-+      if (lxc_sync_barrier_child(handler, LXC_SYNC_CGROUP_UNSHARE))
-+              goto out_delete_net;
-+
-       if (handler->ns_clone_flags & CLONE_NEWCGROUP) {
-               /* Now we're ready to preserve the cgroup namespace */
-               ret = lxc_try_preserve_ns(handler->pid, "cgroup");
--- 
-2.20.1
-
diff --git a/debian/patches/pve/0005-PVE-Config-Disable-lxc.monitor-cgroup.patch b/debian/patches/pve/0005-PVE-Config-Disable-lxc.monitor-cgroup.patch
new file mode 100644 (file)
index 0000000..5b4902d
--- /dev/null
@@ -0,0 +1,50 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Wed, 2 Jan 2019 14:37:58 +0100
+Subject: [PATCH] PVE: [Config] Disable lxc.monitor cgroup
+
+When not using relative cgroups this makes lxc unusable
+within systemd service files as the service cgroup becomes
+empty.
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+---
+ src/lxc/start.c | 22 +++++++++++-----------
+ 1 file changed, 11 insertions(+), 11 deletions(-)
+
+diff --git a/src/lxc/start.c b/src/lxc/start.c
+index a9a07bc83..0169cf8e3 100644
+--- a/src/lxc/start.c
++++ b/src/lxc/start.c
+@@ -2026,17 +2026,17 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
+               goto out_fini_nonet;
+       }
+-      if (!cgroup_ops->monitor_create(cgroup_ops, handler)) {
+-              ERROR("Failed to create monitor cgroup");
+-              ret = -1;
+-              goto out_fini_nonet;
+-      }
+-
+-      if (!cgroup_ops->monitor_enter(cgroup_ops, handler->monitor_pid)) {
+-              ERROR("Failed to enter monitor cgroup");
+-              ret = -1;
+-              goto out_fini_nonet;
+-      }
++      //if (!cgroup_ops->monitor_create(cgroup_ops, handler)) {
++      //      ERROR("Failed to create monitor cgroup");
++      //      ret = -1;
++      //      goto out_fini_nonet;
++      //}
++
++      //if (!cgroup_ops->monitor_enter(cgroup_ops, handler->monitor_pid)) {
++      //      ERROR("Failed to enter monitor cgroup");
++      //      ret = -1;
++      //      goto out_fini_nonet;
++      //}
+       if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
+               /* If the backing store is a device, mount it here and now. */
+-- 
+2.20.1
+
diff --git a/debian/patches/pve/0005-PVE-Up-start-initutils-make-cgroupns-separation-leve.patch b/debian/patches/pve/0005-PVE-Up-start-initutils-make-cgroupns-separation-leve.patch
deleted file mode 100644 (file)
index 1bae3be..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Wed, 28 Mar 2018 13:41:46 +0200
-Subject: [PATCH] PVE: [Up] start/initutils: make cgroupns separation level
- configurable
-
-Adds a new global config variable `lxc.cgroup.separate`
-which controls whether a separation directory for cgroup
-namespaces should be used.
-Can be empty, "privileged", "unprivileged" or "both".
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
----
- src/lxc/initutils.c | 17 +++++++++--------
- src/lxc/initutils.h |  1 +
- src/lxc/start.c     | 25 ++++++++++++++-----------
- 3 files changed, 24 insertions(+), 19 deletions(-)
-
-diff --git a/src/lxc/initutils.c b/src/lxc/initutils.c
-index 11c808662..8b5e2542a 100644
---- a/src/lxc/initutils.c
-+++ b/src/lxc/initutils.c
-@@ -63,14 +63,15 @@ static char *copy_global_config_value(char *p)
- const char *lxc_global_config_value(const char *option_name)
- {
-       static const char * const options[][2] = {
--              { "lxc.bdev.lvm.vg",        DEFAULT_VG      },
--              { "lxc.bdev.lvm.thin_pool", DEFAULT_THIN_POOL },
--              { "lxc.bdev.zfs.root",      DEFAULT_ZFSROOT },
--              { "lxc.bdev.rbd.rbdpool",   DEFAULT_RBDPOOL },
--              { "lxc.lxcpath",            NULL            },
--              { "lxc.default_config",     NULL            },
--              { "lxc.cgroup.pattern",     NULL            },
--              { "lxc.cgroup.use",         NULL            },
-+              { "lxc.bdev.lvm.vg",           DEFAULT_VG      },
-+              { "lxc.bdev.lvm.thin_pool",    DEFAULT_THIN_POOL },
-+              { "lxc.bdev.zfs.root",         DEFAULT_ZFSROOT },
-+              { "lxc.bdev.rbd.rbdpool",      DEFAULT_RBDPOOL },
-+              { "lxc.lxcpath",               NULL            },
-+              { "lxc.default_config",        NULL            },
-+              { "lxc.cgroup.pattern",        NULL            },
-+              { "lxc.cgroup.use",            NULL            },
-+              { "lxc.cgroup.protect_limits", DEFAULT_CGPROTECT },
-               { NULL, NULL },
-       };
-diff --git a/src/lxc/initutils.h b/src/lxc/initutils.h
-index 6bf23a706..b542e6015 100644
---- a/src/lxc/initutils.h
-+++ b/src/lxc/initutils.h
-@@ -42,6 +42,7 @@
- #define DEFAULT_THIN_POOL "lxc"
- #define DEFAULT_ZFSROOT "lxc"
- #define DEFAULT_RBDPOOL "lxc"
-+#define DEFAULT_CGPROTECT "privileged"
- #ifndef PR_SET_MM
- #define PR_SET_MM 35
-diff --git a/src/lxc/start.c b/src/lxc/start.c
-index f3b29d6cd..1cf792aa2 100644
---- a/src/lxc/start.c
-+++ b/src/lxc/start.c
-@@ -1820,17 +1820,20 @@ static int lxc_spawn(struct lxc_handler *handler)
-       TRACE("Set up legacy device cgroup controller limits");
-       if (cgns_supported()) {
--              if (!cgroup_ops->payload_create(cgroup_ops, handler, true)) {
--                      ERROR("failed to create inner cgroup separation layer");
--                      goto out_delete_net;
--              }
--              if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, true)) {
--                      ERROR("failed to enter inner cgroup separation layer");
--                      goto out_delete_net;
--              }
--              if (!cgroup_ops->chown(cgroup_ops, handler->conf, true)) {
--                      ERROR("failed chown inner cgroup separation layer");
--                      goto out_delete_net;
-+              const char *tmp = lxc_global_config_value("lxc.cgroup.protect_limits");
-+              if (!strcmp(tmp, "both") || !strcmp(tmp, wants_to_map_ids ? "unprivileged" : "privileged")) {
-+                      if (!cgroup_ops->payload_create(cgroup_ops, handler, true)) {
-+                              ERROR("failed to create inner cgroup separation layer");
-+                              goto out_delete_net;
-+                      }
-+                      if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, true)) {
-+                              ERROR("failed to enter inner cgroup separation layer");
-+                              goto out_delete_net;
-+                      }
-+                      if (!cgroup_ops->chown(cgroup_ops, handler->conf, true)) {
-+                              ERROR("failed chown inner cgroup separation layer");
-+                              goto out_delete_net;
-+                      }
-               }
-       }
--- 
-2.20.1
-
diff --git a/debian/patches/pve/0006-PVE-Config-namespace-separation.patch b/debian/patches/pve/0006-PVE-Config-namespace-separation.patch
deleted file mode 100644 (file)
index 396be8d..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Fri, 23 Dec 2016 15:57:24 +0100
-Subject: [PATCH] PVE: [Config] namespace separation
-
-* rename cgroup namespace directory to ns
-* set lxc.cgroup.protect_limits default to 'both'
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
----
- src/lxc/cgroups/cgroup.h | 2 +-
- src/lxc/initutils.h      | 2 +-
- 2 files changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
-index 59445b5a5..7d6135c19 100644
---- a/src/lxc/cgroups/cgroup.h
-+++ b/src/lxc/cgroups/cgroup.h
-@@ -36,7 +36,7 @@
-  * will be moved into an additional subdirectory "cgns/" inside the cgroup in
-  * order to prevent it from accessing the outer limiting cgroup.
-  */
--#define CGROUP_NAMESPACE_SUBDIR "cgns"
-+#define CGROUP_NAMESPACE_SUBDIR "ns"
- struct lxc_handler;
- struct lxc_conf;
-diff --git a/src/lxc/initutils.h b/src/lxc/initutils.h
-index b542e6015..78d3f2b10 100644
---- a/src/lxc/initutils.h
-+++ b/src/lxc/initutils.h
-@@ -42,7 +42,7 @@
- #define DEFAULT_THIN_POOL "lxc"
- #define DEFAULT_ZFSROOT "lxc"
- #define DEFAULT_RBDPOOL "lxc"
--#define DEFAULT_CGPROTECT "privileged"
-+#define DEFAULT_CGPROTECT "both"
- #ifndef PR_SET_MM
- #define PR_SET_MM 35
--- 
-2.20.1
-
diff --git a/debian/patches/pve/0006-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch b/debian/patches/pve/0006-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch
new file mode 100644 (file)
index 0000000..8891b81
--- /dev/null
@@ -0,0 +1,534 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Wed, 28 Mar 2018 13:37:28 +0200
+Subject: [PATCH] PVE: [Up] separate the limiting from the namespaced cgroup
+ root
+
+When cgroup namespaces are enabled a privileged container
+with mixed cgroups has full write access to its own root
+cgroup effectively allowing it to overwrite values written
+from the outside or configured via lxc.cgroup.*.
+
+This patch causes an additional 'ns/' directory to be
+created in all cgroups if cgroup namespaces and cgfsng are
+being used in order to combat this.
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+---
+ src/lxc/cgroups/cgfsng.c | 80 +++++++++++++++++++++++++++++-------
+ src/lxc/cgroups/cgroup.h | 18 +++++++--
+ src/lxc/commands.c       | 87 ++++++++++++++++++++++++++++++----------
+ src/lxc/commands.h       |  2 +
+ src/lxc/criu.c           |  4 +-
+ src/lxc/start.c          | 28 +++++++++----
+ 6 files changed, 171 insertions(+), 48 deletions(-)
+
+diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
+index 1e6a45cff..c09b4ea71 100644
+--- a/src/lxc/cgroups/cgfsng.c
++++ b/src/lxc/cgroups/cgfsng.c
+@@ -808,6 +808,7 @@ static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char
+       new->mountpoint = mountpoint;
+       new->container_base_path = container_base_path;
+       new->container_full_path = NULL;
++      new->container_inner_path = NULL;
+       new->monitor_full_path = NULL;
+       new->version = type;
+       new->cgroup2_chown = NULL;
+@@ -1048,6 +1049,9 @@ static int cgroup_rmdir(struct hierarchy **hierarchies,
+               free(h->container_full_path);
+               h->container_full_path = NULL;
++
++              free(h->container_inner_path);
++              h->container_inner_path = NULL;
+       }
+       return 0;
+@@ -1059,6 +1063,7 @@ struct generic_userns_exec_data {
+       struct lxc_conf *conf;
+       uid_t origuid; /* target uid in parent namespace */
+       char *path;
++      bool inner;
+ };
+ static int cgroup_rmdir_wrapper(void *data)
+@@ -1104,6 +1109,7 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
+       wrap.container_cgroup = ops->container_cgroup;
+       wrap.hierarchies = ops->hierarchies;
+       wrap.conf = handler->conf;
++      wrap.inner = false;
+       if (handler->conf && !lxc_list_empty(&handler->conf->id_map))
+               ret = userns_exec_1(handler->conf, cgroup_rmdir_wrapper, &wrap,
+@@ -1306,17 +1312,26 @@ static bool monitor_create_path_for_hierarchy(struct hierarchy *h, char *cgname)
+       return cg_unified_create_cgroup(h, cgname);
+ }
+-static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgname)
++static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgname, bool inner)
+ {
+       int ret;
++      char *path;
+-      if (!cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
++      if (!inner && !cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
+               ERROR("Failed to handle legacy cpuset controller");
+               return false;
+       }
+-      h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
+-      ret = mkdir_eexist_on_last(h->container_full_path, 0755);
++      if (inner) {
++              path = must_make_path(h->container_full_path, CGROUP_NAMESPACE_SUBDIR, NULL);
++              h->container_inner_path = path;
++              ret = mkdir(path, 0755);
++      } else {
++              path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
++              h->container_full_path = path;
++              ret = mkdir_eexist_on_last(path, 0755);
++      }
++
+       if (ret < 0) {
+               ERROR("Failed to create cgroup \"%s\"", h->container_full_path);
+               return false;
+@@ -1408,11 +1423,29 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
+       return true;
+ }
++static inline bool cgfsng_create_inner(struct cgroup_ops *ops)
++{
++      size_t i;
++      bool ret = true;
++      char *cgname = must_make_path(ops->container_cgroup, CGROUP_NAMESPACE_SUBDIR, NULL);
++      for (i = 0; ops->hierarchies[i]; i++) {
++              if (!container_create_path_for_hierarchy(ops->hierarchies[i], cgname, true)) {
++                      SYSERROR("Failed to create %s namespace subdirectory: %s",
++                               ops->hierarchies[i]->container_full_path, strerror(errno));
++                      ret = false;
++                      break;
++              }
++      }
++      free(cgname);
++      return ret;
++}
++
+ /* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern;
+  * next cgroup_pattern-1, -2, ..., -999.
+  */
+ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
+-                                                      struct lxc_handler *handler)
++                                                      struct lxc_handler *handler,
++                                                      bool inner)
+ {
+       __do_free char *container_cgroup = NULL, *tmp = NULL;
+       int i;
+@@ -1422,7 +1455,12 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
+       struct lxc_conf *conf = handler->conf;
+       if (ops->container_cgroup)
++              return inner ? cgfsng_create_inner(ops) : false;
++
++      if (inner) {
++              ERROR("cgfsng_create called twice for inner cgroup");
+               return false;
++      }
+       if (!conf)
+               return false;
+@@ -1453,7 +1491,7 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
+               for (i = 0; ops->hierarchies[i]; i++) {
+                       if (!container_create_path_for_hierarchy(ops->hierarchies[i],
+-                                                               container_cgroup)) {
++                                                               container_cgroup, false)) {
+                               ERROR("Failed to create cgroup \"%s\"",
+                                     ops->hierarchies[i]->container_full_path);
+                               for (int j = 0; j < i; j++)
+@@ -1475,7 +1513,8 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
+ }
+ __cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid,
+-                                           bool monitor)
++                                           bool monitor,
++                                           bool inner)
+ {
+       int len;
+       char pidstr[INTTYPE_TO_STRLEN(pid_t)];
+@@ -1494,6 +1533,9 @@ __cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid,
+               if (monitor)
+                       path = must_make_path(ops->hierarchies[i]->monitor_full_path,
+                                             "cgroup.procs", NULL);
++              else if (inner)
++                      path = must_make_path(ops->hierarchies[i]->container_inner_path,
++                                            "cgroup.procs", NULL);
+               else
+                       path = must_make_path(ops->hierarchies[i]->container_full_path,
+                                             "cgroup.procs", NULL);
+@@ -1509,12 +1551,12 @@ __cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid,
+ __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, pid_t pid)
+ {
+-      return __do_cgroup_enter(ops, pid, true);
++      return __do_cgroup_enter(ops, pid, true, false);
+ }
+-static bool cgfsng_payload_enter(struct cgroup_ops *ops, pid_t pid)
++static bool cgfsng_payload_enter(struct cgroup_ops *ops, pid_t pid, bool inner)
+ {
+-      return __do_cgroup_enter(ops, pid, false);
++      return __do_cgroup_enter(ops, pid, false, inner);
+ }
+ static int chowmod(char *path, uid_t chown_uid, gid_t chown_gid,
+@@ -1578,8 +1620,12 @@ static int chown_cgroup_wrapper(void *data)
+       for (int i = 0; arg->hierarchies[i]; i++) {
+               __do_free char *fullpath = NULL;
++              __do_free char *inner_guard = NULL;
+               char *path = arg->hierarchies[i]->container_full_path;
++              if (arg->inner)
++                      path = inner_guard = must_make_path(path, CGROUP_NAMESPACE_SUBDIR, NULL);
++
+               ret = chowmod(path, destuid, nsgid, 0775);
+               if (ret < 0)
+                       return -1;
+@@ -1612,7 +1658,8 @@ static int chown_cgroup_wrapper(void *data)
+ }
+ __cgfsng_ops static bool cgfsng_chown(struct cgroup_ops *ops,
+-                                      struct lxc_conf *conf)
++                                      struct lxc_conf *conf,
++                                      bool inner)
+ {
+       struct generic_userns_exec_data wrap;
+@@ -1626,6 +1673,7 @@ __cgfsng_ops static bool cgfsng_chown(struct cgroup_ops *ops,
+       wrap.path = NULL;
+       wrap.hierarchies = ops->hierarchies;
+       wrap.conf = conf;
++      wrap.inner = inner;
+       if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap,
+                         "chown_cgroup_wrapper") < 0) {
+@@ -2100,7 +2148,8 @@ __cgfsng_ops static bool cgfsng_unfreeze(struct cgroup_ops *ops)
+ }
+ __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
+-                                                  const char *controller)
++                                                  const char *controller,
++                                                  bool inner)
+ {
+       struct hierarchy *h;
+@@ -2111,6 +2160,9 @@ __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
+               return NULL;
+       }
++      if (inner)
++              return h->container_inner_path ? h->container_inner_path + strlen(h->mountpoint) : NULL;
++
+       return h->container_full_path ? h->container_full_path + strlen(h->mountpoint) : NULL;
+ }
+@@ -2143,7 +2195,7 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name,
+       size_t len;
+       int fret = -1, idx = 0;
+-      container_cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
++      container_cgroup = lxc_cmd_get_attach_cgroup_path(name, lxcpath, controller);
+       /* not running */
+       if (!container_cgroup)
+               return 0;
+@@ -2220,7 +2272,7 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, const char *name,
+                       continue;
+               }
+-              path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]);
++              path = lxc_cmd_get_attach_cgroup_path(name, lxcpath, h->controllers[0]);
+               /* not running */
+               if (!path)
+                       continue;
+diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
+index f3f0f6726..35d207feb 100644
+--- a/src/lxc/cgroups/cgroup.h
++++ b/src/lxc/cgroups/cgroup.h
+@@ -32,6 +32,12 @@
+ #define MONITOR_CGROUP "lxc.monitor"
+ #define PIVOT_CGROUP "lxc.pivot"
++/* When lxc.cgroup.protect_limits is in effect the container's cgroup namespace
++ * will be moved into an additional subdirectory "cgns/" inside the cgroup in
++ * order to prevent it from accessing the outer limiting cgroup.
++ */
++#define CGROUP_NAMESPACE_SUBDIR "cgns"
++
+ struct lxc_handler;
+ struct lxc_conf;
+ struct lxc_list;
+@@ -72,6 +78,9 @@ typedef enum {
+  * @monitor_full_path
+  * - The full path to the monitor's cgroup.
+  *
++ * @container_inner_path
++ * - The full path to the container's inner cgroup when protect_limits is used.
++ *
+  * @version
+  * - legacy hierarchy
+  *   If the hierarchy is a legacy hierarchy this will be set to
+@@ -90,6 +99,7 @@ struct hierarchy {
+       char *mountpoint;
+       char *container_base_path;
+       char *container_full_path;
++      char *container_inner_path;
+       char *monitor_full_path;
+       int version;
+ };
+@@ -144,9 +154,9 @@ struct cgroup_ops {
+       void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
+       bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
+       bool (*monitor_enter)(struct cgroup_ops *ops, pid_t pid);
+-      bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
+-      bool (*payload_enter)(struct cgroup_ops *ops, pid_t pid);
+-      const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller);
++      bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler, bool inner);
++      bool (*payload_enter)(struct cgroup_ops *ops, pid_t pid, bool inner);
++      const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller, bool inner);
+       bool (*escape)(const struct cgroup_ops *ops, struct lxc_conf *conf);
+       int (*num_hierarchies)(struct cgroup_ops *ops);
+       bool (*get_hierarchies)(struct cgroup_ops *ops, int n, char ***out);
+@@ -158,7 +168,7 @@ struct cgroup_ops {
+       bool (*unfreeze)(struct cgroup_ops *ops);
+       bool (*setup_limits)(struct cgroup_ops *ops, struct lxc_conf *conf,
+                            bool with_devices);
+-      bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf);
++      bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf, bool inner);
+       bool (*attach)(struct cgroup_ops *ops, const char *name,
+                      const char *lxcpath, pid_t pid);
+       bool (*mount)(struct cgroup_ops *ops, struct lxc_handler *handler,
+diff --git a/src/lxc/commands.c b/src/lxc/commands.c
+index 90e3c5863..93406bb7e 100644
+--- a/src/lxc/commands.c
++++ b/src/lxc/commands.c
+@@ -425,20 +425,8 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
+       return lxc_cmd_rsp_send(fd, &rsp);
+ }
+-/*
+- * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
+- * particular subsystem. This is the cgroup path relative to the root
+- * of the cgroup filesystem.
+- *
+- * @name      : name of container to connect to
+- * @lxcpath   : the lxcpath in which the container is running
+- * @subsystem : the subsystem being asked about
+- *
+- * Returns the path on success, NULL on failure. The caller must free() the
+- * returned path.
+- */
+-char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
+-                            const char *subsystem)
++char *do_lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
++                            const char *subsystem, bool inner)
+ {
+       int ret, stopped;
+       struct lxc_cmd_rr cmd = {
+@@ -451,8 +439,18 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
+       cmd.req.data = subsystem;
+       cmd.req.datalen = 0;
+-      if (subsystem)
+-              cmd.req.datalen = strlen(subsystem) + 1;
++      if (subsystem) {
++              size_t subsyslen = strlen(subsystem);
++              if (inner) {
++                      char *data = alloca(subsyslen+2);
++                      memcpy(data, subsystem, subsyslen+1);
++                      data[subsyslen+1] = 1;
++                      cmd.req.datalen = subsyslen+2,
++                      cmd.req.data = data;
++              } else {
++                      cmd.req.datalen = subsyslen+1;
++              }
++      }
+       ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
+       if (ret < 0)
+@@ -467,6 +465,42 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
+       return cmd.rsp.data;
+ }
++/*
++ * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
++ * particular subsystem. This is the cgroup path relative to the root
++ * of the cgroup filesystem.
++ *
++ * @name      : name of container to connect to
++ * @lxcpath   : the lxcpath in which the container is running
++ * @subsystem : the subsystem being asked about
++ *
++ * Returns the path on success, NULL on failure. The caller must free() the
++ * returned path.
++ */
++char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
++      const char *subsystem)
++{
++      return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, false);
++}
++
++/*
++ * lxc_cmd_get_attach_cgroup_path: Calculate a container's inner cgroup path
++ * for a particular subsystem. This is the cgroup path relative to the root
++ * of the cgroup filesystem.
++ *
++ * @name      : name of container to connect to
++ * @lxcpath   : the lxcpath in which the container is running
++ * @subsystem : the subsystem being asked about
++ *
++ * Returns the path on success, NULL on failure. The caller must free() the
++ * returned path.
++ */
++char *lxc_cmd_get_attach_cgroup_path(const char *name, const char *lxcpath,
++      const char *subsystem)
++{
++      return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, true);
++}
++
+ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
+                                      struct lxc_handler *handler,
+                                      struct lxc_epoll_descr *descr)
+@@ -475,10 +509,21 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
+       struct lxc_cmd_rsp rsp;
+       struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
+-      if (req->datalen > 0)
+-              path = cgroup_ops->get_cgroup(cgroup_ops, req->data);
+-      else
+-              path = cgroup_ops->get_cgroup(cgroup_ops, NULL);
++      if (req->datalen > 0) {
++              const char *subsystem;
++              size_t subsyslen;
++              bool inner = false;
++              subsystem = req->data;
++              subsyslen = strlen(subsystem);
++              if (req->datalen == subsyslen+2)
++                      inner = (subsystem[subsyslen+1] == 1);
++
++              path = cgroup_ops->get_cgroup(cgroup_ops, req->data, inner);
++      } else {
++              // FIXME: cgroup separation for cgroup v2 cannot be handled
++              // like we used to do v1 here... need to figure this out...
++              path = cgroup_ops->get_cgroup(cgroup_ops, NULL, false);
++      }
+       if (!path)
+               return -1;
+@@ -653,7 +698,7 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req,
+                * lxc_unfreeze() would do another cmd (GET_CGROUP) which would
+                * deadlock us.
+                */
+-              if (!cgroup_ops->get_cgroup(cgroup_ops, "freezer"))
++              if (!cgroup_ops->get_cgroup(cgroup_ops, "freezer", false))
+                       return 0;
+               if (cgroup_ops->unfreeze(cgroup_ops))
+diff --git a/src/lxc/commands.h b/src/lxc/commands.h
+index d7d0c6096..042892a42 100644
+--- a/src/lxc/commands.h
++++ b/src/lxc/commands.h
+@@ -89,6 +89,8 @@ extern int lxc_cmd_console(const char *name, int *ttynum, int *fd,
+  */
+ extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
+                       const char *subsystem);
++extern char *lxc_cmd_get_attach_cgroup_path(const char *name,
++                      const char *lxcpath, const char *subsystem);
+ extern int lxc_cmd_get_clone_flags(const char *name, const char *lxcpath);
+ extern char *lxc_cmd_get_config_item(const char *name, const char *item, const char *lxcpath);
+ extern char *lxc_cmd_get_name(const char *hashed_sock);
+diff --git a/src/lxc/criu.c b/src/lxc/criu.c
+index 86f6f1836..15a703c4f 100644
+--- a/src/lxc/criu.c
++++ b/src/lxc/criu.c
+@@ -332,7 +332,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
+               } else {
+                       const char *p;
+-                      p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0]);
++                      p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0], false);
+                       if (!p) {
+                               ERROR("failed to get cgroup path for %s", controllers[0]);
+                               goto err;
+@@ -975,7 +975,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_
+               goto out_fini_handler;
+       handler->cgroup_ops = cgroup_ops;
+-      if (!cgroup_ops->payload_create(cgroup_ops, handler)) {
++      if (!cgroup_ops->payload_create(cgroup_ops, handler, false)) {
+               ERROR("failed creating groups");
+               goto out_fini_handler;
+       }
+diff --git a/src/lxc/start.c b/src/lxc/start.c
+index 0169cf8e3..db0625af5 100644
+--- a/src/lxc/start.c
++++ b/src/lxc/start.c
+@@ -1726,7 +1726,7 @@ static int lxc_spawn(struct lxc_handler *handler)
+               }
+       }
+-      if (!cgroup_ops->payload_create(cgroup_ops, handler)) {
++      if (!cgroup_ops->payload_create(cgroup_ops, handler, false)) {
+               ERROR("Failed creating cgroups");
+               goto out_delete_net;
+       }
+@@ -1841,10 +1841,10 @@ static int lxc_spawn(struct lxc_handler *handler)
+               goto out_delete_net;
+       }
+-      if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid))
++      if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, false))
+               goto out_delete_net;
+-      if (!cgroup_ops->chown(cgroup_ops, handler->conf))
++      if (!cgroup_ops->chown(cgroup_ops, handler->conf, false))
+               goto out_delete_net;
+       /* If not done yet, we're now ready to preserve the network namespace */
+@@ -1902,16 +1902,30 @@ static int lxc_spawn(struct lxc_handler *handler)
+               }
+       }
+-      ret = lxc_sync_barrier_child(handler, LXC_SYNC_CGROUP_UNSHARE);
+-      if (ret < 0)
+-              goto out_delete_net;
+-
+       if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, true)) {
+               ERROR("Failed to setup legacy device cgroup controller limits");
+               goto out_delete_net;
+       }
+       TRACE("Set up legacy device cgroup controller limits");
++      if (cgns_supported()) {
++              if (!cgroup_ops->payload_create(cgroup_ops, handler, true)) {
++                      ERROR("failed to create inner cgroup separation layer");
++                      goto out_delete_net;
++              }
++              if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, true)) {
++                      ERROR("failed to enter inner cgroup separation layer");
++                      goto out_delete_net;
++              }
++              if (!cgroup_ops->chown(cgroup_ops, handler->conf, true)) {
++                      ERROR("failed chown inner cgroup separation layer");
++                      goto out_delete_net;
++              }
++      }
++
++      if (lxc_sync_barrier_child(handler, LXC_SYNC_CGROUP_UNSHARE))
++              goto out_delete_net;
++
+       if (handler->ns_clone_flags & CLONE_NEWCGROUP) {
+               /* Now we're ready to preserve the cgroup namespace */
+               ret = lxc_try_preserve_ns(handler->pid, "cgroup");
+-- 
+2.20.1
+
diff --git a/debian/patches/pve/0007-PVE-Up-possibility-to-run-lxc-monitord-as-a-regular-.patch b/debian/patches/pve/0007-PVE-Up-possibility-to-run-lxc-monitord-as-a-regular-.patch
deleted file mode 100644 (file)
index 5c70fe1..0000000
+++ /dev/null
@@ -1,207 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Mon, 20 Nov 2017 10:49:41 +0100
-Subject: [PATCH] PVE: [Up] possibility to run lxc-monitord as a regular daemon
-
-lxc-monitord instances are spawned on demand and, if this
-happens from a service, the daemon is considered part of
-it by systemd, as it is running in the same cgroups. This
-can be avoided by leaving it running permanently.
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
----
- .gitignore                                  |  1 +
- config/init/systemd/Makefile.am             | 10 ++--
- config/init/systemd/lxc-monitord.service.in | 12 +++++
- configure.ac                                |  1 +
- lxc.spec.in                                 |  1 +
- src/lxc/cmd/lxc_monitord.c                  | 60 +++++++++++++++------
- 6 files changed, 64 insertions(+), 21 deletions(-)
- create mode 100644 config/init/systemd/lxc-monitord.service.in
-
-diff --git a/.gitignore b/.gitignore
-index 45377714c..69e6e7ffe 100644
---- a/.gitignore
-+++ b/.gitignore
-@@ -116,6 +116,7 @@ config/bash/lxc
- config/init/common/lxc-containers
- config/init/common/lxc-net
- config/init/systemd/lxc-autostart-helper
-+config/init/systemd/lxc-monitord.service
- config/init/systemd/lxc-net.service
- config/init/systemd/lxc.service
- config/init/systemd/lxc@.service
-diff --git a/config/init/systemd/Makefile.am b/config/init/systemd/Makefile.am
-index c448850d1..4a4fde5e7 100644
---- a/config/init/systemd/Makefile.am
-+++ b/config/init/systemd/Makefile.am
-@@ -2,19 +2,21 @@ EXTRA_DIST = \
-       lxc-apparmor-load \
-       lxc.service.in \
-       lxc@.service.in \
--      lxc-net.service.in
-+      lxc-net.service.in \
-+      lxc-monitord.service.in
- if INIT_SCRIPT_SYSTEMD
--BUILT_SOURCES = lxc.service lxc@.service lxc-net.service
-+BUILT_SOURCES = lxc.service lxc@.service lxc-net.service lxc-monitord.service
--install-systemd: lxc.service lxc@.service lxc-net.service lxc-apparmor-load
-+install-systemd: lxc.service lxc@.service lxc-net.service lxc-monitord.service lxc-apparmor-load
-       $(MKDIR_P) $(DESTDIR)$(SYSTEMD_UNIT_DIR)
--      $(INSTALL_DATA) lxc.service lxc@.service lxc-net.service $(DESTDIR)$(SYSTEMD_UNIT_DIR)/
-+      $(INSTALL_DATA) lxc.service lxc@.service lxc-net.service lxc-monitord.service $(DESTDIR)$(SYSTEMD_UNIT_DIR)/
- uninstall-systemd:
-       rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc.service
-       rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc@.service
-       rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc-net.service
-+      rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc-monitord.service
-       rmdir $(DESTDIR)$(SYSTEMD_UNIT_DIR) || :
- pkglibexec_SCRIPTS = lxc-apparmor-load
-diff --git a/config/init/systemd/lxc-monitord.service.in b/config/init/systemd/lxc-monitord.service.in
-new file mode 100644
-index 000000000..406351688
---- /dev/null
-+++ b/config/init/systemd/lxc-monitord.service.in
-@@ -0,0 +1,12 @@
-+[Unit]
-+Description=LXC Container Monitoring Daemon
-+After=syslog.service network.target
-+
-+[Service]
-+Type=simple
-+ExecStart=@LIBEXECDIR@/lxc/lxc-monitord --daemon
-+StandardOutput=syslog
-+StandardError=syslog
-+
-+[Install]
-+WantedBy=multi-user.target
-diff --git a/configure.ac b/configure.ac
-index 9a6ba83c2..9f3b8fb3c 100644
---- a/configure.ac
-+++ b/configure.ac
-@@ -747,6 +747,7 @@ AC_CONFIG_FILES([
-       config/init/systemd/lxc.service
-       config/init/systemd/lxc@.service
-       config/init/systemd/lxc-net.service
-+      config/init/systemd/lxc-monitord.service
-       config/init/sysvinit/Makefile
-       config/init/sysvinit/lxc-containers
-       config/init/sysvinit/lxc-net
-diff --git a/lxc.spec.in b/lxc.spec.in
-index 7fcd811ff..59597f469 100644
---- a/lxc.spec.in
-+++ b/lxc.spec.in
-@@ -247,6 +247,7 @@ fi
- %{_unitdir}/lxc-net.service
- %{_unitdir}/lxc.service
- %{_unitdir}/lxc@.service
-+%{_unitdir}/lxc-monitord.service
- %else
- %{_sysconfdir}/rc.d/init.d/lxc
- %{_sysconfdir}/rc.d/init.d/lxc-net
-diff --git a/src/lxc/cmd/lxc_monitord.c b/src/lxc/cmd/lxc_monitord.c
-index 3b931b361..d3cc35978 100644
---- a/src/lxc/cmd/lxc_monitord.c
-+++ b/src/lxc/cmd/lxc_monitord.c
-@@ -359,17 +359,44 @@ static void lxc_monitord_sig_handler(int sig)
- int main(int argc, char *argv[])
- {
--      int ret, pipefd;
-+      int ret, pipefd = -1;
-       char logpath[PATH_MAX];
-       sigset_t mask;
--      char *lxcpath = argv[1];
-+      const char *lxcpath = NULL;
-       bool mainloop_opened = false;
-       bool monitord_created = false;
-+      bool persistent = false;
-       struct lxc_log log;
--      if (argc != 3) {
-+      if (argc > 1 && !strcmp(argv[1], "--daemon")) {
-+              persistent = true;
-+              --argc;
-+              ++argv;
-+      }
-+
-+      if (argc > 1) {
-+              lxcpath = argv[1];
-+              --argc;
-+              ++argv;
-+      } else {
-+              lxcpath = lxc_global_config_value("lxc.lxcpath");
-+              if (!lxcpath) {
-+                      ERROR("Out of memory getting lxcpath");
-+                      exit(EXIT_FAILURE);
-+              }
-+      }
-+
-+      if (argc > 1) {
-+              if (lxc_safe_int(argv[1], &pipefd) < 0)
-+                      exit(EXIT_FAILURE);
-+              --argc;
-+              ++argv;
-+      }
-+
-+      if (argc != 1 || (persistent != (pipefd == -1))) {
-               fprintf(stderr,
--                      "Usage: lxc-monitord lxcpath sync-pipe-fd\n\n"
-+                      "Usage: lxc-monitord lxcpath sync-pipe-fd\n"
-+                      "       lxc-monitord --daemon lxcpath\n\n"
-                       "NOTE: lxc-monitord is intended for use by lxc internally\n"
-                       "      and does not need to be run by hand\n\n");
-               exit(EXIT_FAILURE);
-@@ -392,9 +419,6 @@ int main(int argc, char *argv[])
-               INFO("Failed to open log file %s, log will be lost", lxcpath);
-       lxc_log_options_no_override();
--      if (lxc_safe_int(argv[2], &pipefd) < 0)
--              exit(EXIT_FAILURE);
--
-       if (sigfillset(&mask) ||
-           sigdelset(&mask, SIGILL)  ||
-           sigdelset(&mask, SIGSEGV) ||
-@@ -427,15 +451,17 @@ int main(int argc, char *argv[])
-               goto on_error;
-       monitord_created = true;
--      /* sync with parent, we're ignoring the return from write
--       * because regardless if it works or not, the following
--       * close will sync us with the parent process. the
--       * if-empty-statement construct is to quiet the
--       * warn-unused-result warning.
--       */
--      if (lxc_write_nointr(pipefd, "S", 1))
--              ;
--      close(pipefd);
-+      if (pipefd != -1) {
-+              /* sync with parent, we're ignoring the return from write
-+               * because regardless if it works or not, the following
-+               * close will sync us with the parent process. the
-+               * if-empty-statement construct is to quiet the
-+               * warn-unused-result warning.
-+               */
-+              if (lxc_write_nointr(pipefd, "S", 1))
-+                      ;
-+              close(pipefd);
-+      }
-       if (lxc_monitord_mainloop_add(&monitor)) {
-               ERROR("Failed to add mainloop handlers");
-@@ -446,7 +472,7 @@ int main(int argc, char *argv[])
-              lxc_raw_getpid(), monitor.lxcpath);
-       for (;;) {
--              ret = lxc_mainloop(&monitor.descr, 1000 * 30);
-+              ret = lxc_mainloop(&monitor.descr, persistent ? -1 : 1000 * 30);
-               if (ret) {
-                       ERROR("mainloop returned an error");
-                       break;
--- 
-2.20.1
-
diff --git a/debian/patches/pve/0007-PVE-Up-start-initutils-make-cgroupns-separation-leve.patch b/debian/patches/pve/0007-PVE-Up-start-initutils-make-cgroupns-separation-leve.patch
new file mode 100644 (file)
index 0000000..655004e
--- /dev/null
@@ -0,0 +1,97 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Wed, 28 Mar 2018 13:41:46 +0200
+Subject: [PATCH] PVE: [Up] start/initutils: make cgroupns separation level
+ configurable
+
+Adds a new global config variable `lxc.cgroup.separate`
+which controls whether a separation directory for cgroup
+namespaces should be used.
+Can be empty, "privileged", "unprivileged" or "both".
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+---
+ src/lxc/initutils.c | 17 +++++++++--------
+ src/lxc/initutils.h |  1 +
+ src/lxc/start.c     | 25 ++++++++++++++-----------
+ 3 files changed, 24 insertions(+), 19 deletions(-)
+
+diff --git a/src/lxc/initutils.c b/src/lxc/initutils.c
+index da3363294..5e33afc58 100644
+--- a/src/lxc/initutils.c
++++ b/src/lxc/initutils.c
+@@ -64,14 +64,15 @@ static char *copy_global_config_value(char *p)
+ const char *lxc_global_config_value(const char *option_name)
+ {
+       static const char * const options[][2] = {
+-              { "lxc.bdev.lvm.vg",        DEFAULT_VG      },
+-              { "lxc.bdev.lvm.thin_pool", DEFAULT_THIN_POOL },
+-              { "lxc.bdev.zfs.root",      DEFAULT_ZFSROOT },
+-              { "lxc.bdev.rbd.rbdpool",   DEFAULT_RBDPOOL },
+-              { "lxc.lxcpath",            NULL            },
+-              { "lxc.default_config",     NULL            },
+-              { "lxc.cgroup.pattern",     NULL            },
+-              { "lxc.cgroup.use",         NULL            },
++              { "lxc.bdev.lvm.vg",           DEFAULT_VG      },
++              { "lxc.bdev.lvm.thin_pool",    DEFAULT_THIN_POOL },
++              { "lxc.bdev.zfs.root",         DEFAULT_ZFSROOT },
++              { "lxc.bdev.rbd.rbdpool",      DEFAULT_RBDPOOL },
++              { "lxc.lxcpath",               NULL            },
++              { "lxc.default_config",        NULL            },
++              { "lxc.cgroup.pattern",        NULL            },
++              { "lxc.cgroup.use",            NULL            },
++              { "lxc.cgroup.protect_limits", DEFAULT_CGPROTECT },
+               { NULL, NULL },
+       };
+diff --git a/src/lxc/initutils.h b/src/lxc/initutils.h
+index 6bf23a706..b542e6015 100644
+--- a/src/lxc/initutils.h
++++ b/src/lxc/initutils.h
+@@ -42,6 +42,7 @@
+ #define DEFAULT_THIN_POOL "lxc"
+ #define DEFAULT_ZFSROOT "lxc"
+ #define DEFAULT_RBDPOOL "lxc"
++#define DEFAULT_CGPROTECT "privileged"
+ #ifndef PR_SET_MM
+ #define PR_SET_MM 35
+diff --git a/src/lxc/start.c b/src/lxc/start.c
+index db0625af5..0ee7f9636 100644
+--- a/src/lxc/start.c
++++ b/src/lxc/start.c
+@@ -1909,17 +1909,20 @@ static int lxc_spawn(struct lxc_handler *handler)
+       TRACE("Set up legacy device cgroup controller limits");
+       if (cgns_supported()) {
+-              if (!cgroup_ops->payload_create(cgroup_ops, handler, true)) {
+-                      ERROR("failed to create inner cgroup separation layer");
+-                      goto out_delete_net;
+-              }
+-              if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, true)) {
+-                      ERROR("failed to enter inner cgroup separation layer");
+-                      goto out_delete_net;
+-              }
+-              if (!cgroup_ops->chown(cgroup_ops, handler->conf, true)) {
+-                      ERROR("failed chown inner cgroup separation layer");
+-                      goto out_delete_net;
++              const char *tmp = lxc_global_config_value("lxc.cgroup.protect_limits");
++              if (!strcmp(tmp, "both") || !strcmp(tmp, wants_to_map_ids ? "unprivileged" : "privileged")) {
++                      if (!cgroup_ops->payload_create(cgroup_ops, handler, true)) {
++                              ERROR("failed to create inner cgroup separation layer");
++                              goto out_delete_net;
++                      }
++                      if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, true)) {
++                              ERROR("failed to enter inner cgroup separation layer");
++                              goto out_delete_net;
++                      }
++                      if (!cgroup_ops->chown(cgroup_ops, handler->conf, true)) {
++                              ERROR("failed chown inner cgroup separation layer");
++                              goto out_delete_net;
++                      }
+               }
+       }
+-- 
+2.20.1
+
diff --git a/debian/patches/pve/0008-PVE-Config-Disable-lxc.monitor-cgroup.patch b/debian/patches/pve/0008-PVE-Config-Disable-lxc.monitor-cgroup.patch
deleted file mode 100644 (file)
index 263c16e..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Wed, 2 Jan 2019 14:37:58 +0100
-Subject: [PATCH] PVE: [Config] Disable lxc.monitor cgroup
-
-When not using relative cgroups this makes lxc unusable
-within systemd service files as the service cgroup becomes
-empty.
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
----
- src/lxc/start.c | 18 +++++++++---------
- 1 file changed, 9 insertions(+), 9 deletions(-)
-
-diff --git a/src/lxc/start.c b/src/lxc/start.c
-index 1cf792aa2..24f387de6 100644
---- a/src/lxc/start.c
-+++ b/src/lxc/start.c
-@@ -1954,15 +1954,15 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
-               goto out_fini_nonet;
-       }
--      if (!cgroup_ops->monitor_create(cgroup_ops, handler)) {
--              ERROR("Failed to create monitor cgroup");
--              goto out_fini_nonet;
--      }
--
--      if (!cgroup_ops->monitor_enter(cgroup_ops, handler->monitor_pid)) {
--              ERROR("Failed to enter monitor cgroup");
--              goto out_fini_nonet;
--      }
-+      //if (!cgroup_ops->monitor_create(cgroup_ops, handler)) {
-+      //      ERROR("Failed to create monitor cgroup");
-+      //      goto out_fini_nonet;
-+      //}
-+
-+      //if (!cgroup_ops->monitor_enter(cgroup_ops, handler->monitor_pid)) {
-+      //      ERROR("Failed to enter monitor cgroup");
-+      //      goto out_fini_nonet;
-+      //}
-       if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
-               /* If the backing store is a device, mount it here and now. */
--- 
-2.20.1
-
diff --git a/debian/patches/pve/0008-PVE-Config-namespace-separation.patch b/debian/patches/pve/0008-PVE-Config-namespace-separation.patch
new file mode 100644 (file)
index 0000000..56dbe3e
--- /dev/null
@@ -0,0 +1,43 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Fri, 23 Dec 2016 15:57:24 +0100
+Subject: [PATCH] PVE: [Config] namespace separation
+
+* rename cgroup namespace directory to ns
+* set lxc.cgroup.protect_limits default to 'both'
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+---
+ src/lxc/cgroups/cgroup.h | 2 +-
+ src/lxc/initutils.h      | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
+index 35d207feb..be9df33a2 100644
+--- a/src/lxc/cgroups/cgroup.h
++++ b/src/lxc/cgroups/cgroup.h
+@@ -36,7 +36,7 @@
+  * will be moved into an additional subdirectory "cgns/" inside the cgroup in
+  * order to prevent it from accessing the outer limiting cgroup.
+  */
+-#define CGROUP_NAMESPACE_SUBDIR "cgns"
++#define CGROUP_NAMESPACE_SUBDIR "ns"
+ struct lxc_handler;
+ struct lxc_conf;
+diff --git a/src/lxc/initutils.h b/src/lxc/initutils.h
+index b542e6015..78d3f2b10 100644
+--- a/src/lxc/initutils.h
++++ b/src/lxc/initutils.h
+@@ -42,7 +42,7 @@
+ #define DEFAULT_THIN_POOL "lxc"
+ #define DEFAULT_ZFSROOT "lxc"
+ #define DEFAULT_RBDPOOL "lxc"
+-#define DEFAULT_CGPROTECT "privileged"
++#define DEFAULT_CGPROTECT "both"
+ #ifndef PR_SET_MM
+ #define PR_SET_MM 35
+-- 
+2.20.1
+
diff --git a/debian/patches/pve/0009-PVE-Config-attach-always-use-getent.patch b/debian/patches/pve/0009-PVE-Config-attach-always-use-getent.patch
new file mode 100644 (file)
index 0000000..af8b688
--- /dev/null
@@ -0,0 +1,78 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Tue, 13 Aug 2019 13:57:22 +0200
+Subject: [PATCH] PVE: [Config] attach: always use getent
+
+In debian buster, some libnss plugins (if installed) can
+cause getpwent to segfault instead of erroring out cleanly.
+To avoid this, stick to always using getent.
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+---
+ src/lxc/attach.c | 28 ++--------------------------
+ 1 file changed, 2 insertions(+), 26 deletions(-)
+
+diff --git a/src/lxc/attach.c b/src/lxc/attach.c
+index 80c41fe26..f30f192e3 100644
+--- a/src/lxc/attach.c
++++ b/src/lxc/attach.c
+@@ -1506,12 +1506,8 @@ int lxc_attach_run_command(void *payload)
+ int lxc_attach_run_shell(void* payload)
+ {
+-      __do_free char *buf = NULL;
+       uid_t uid;
+-      struct passwd pwent;
+-      struct passwd *pwentp = NULL;
+       char *user_shell;
+-      size_t bufsize;
+       int ret;
+       /* Ignore payload parameter. */
+@@ -1519,32 +1515,13 @@ int lxc_attach_run_shell(void* payload)
+       uid = getuid();
+-      bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
+-      if (bufsize == -1)
+-              bufsize = 1024;
+-
+-      buf = malloc(bufsize);
+-      if (buf) {
+-              ret = getpwuid_r(uid, &pwent, buf, bufsize, &pwentp);
+-              if (!pwentp) {
+-                      if (ret == 0)
+-                              WARN("Could not find matched password record");
+-
+-                      WARN("Failed to get password record - %u", uid);
+-              }
+-      }
+-
+       /* This probably happens because of incompatible nss implementations in
+        * host and container (remember, this code is still using the host's
+        * glibc but our mount namespace is in the container) we may try to get
+        * the information by spawning a [getent passwd uid] process and parsing
+        * the result.
+        */
+-      if (!pwentp)
+-              user_shell = lxc_attach_getpwshell(uid);
+-      else
+-              user_shell = pwent.pw_shell;
+-
++      user_shell = lxc_attach_getpwshell(uid);
+       if (user_shell)
+               execlp(user_shell, user_shell, (char *)NULL);
+@@ -1554,8 +1531,7 @@ int lxc_attach_run_shell(void* payload)
+       execlp("/bin/sh", "/bin/sh", (char *)NULL);
+       SYSERROR("Failed to execute shell");
+-      if (!pwentp)
+-              free(user_shell);
++      free(user_shell);
+       return -1;
+ }
+-- 
+2.20.1
+
diff --git a/debian/patches/pve/0009-init-add-ExecReload-to-lxc.service-to-only-reload-pr.patch b/debian/patches/pve/0009-init-add-ExecReload-to-lxc.service-to-only-reload-pr.patch
deleted file mode 100644 (file)
index 7044ced..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Wed, 10 Jul 2019 14:29:54 +0200
-Subject: [PATCH] init: add ExecReload to lxc.service to only reload profiles
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
----
- config/init/systemd/lxc.service.in | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/config/init/systemd/lxc.service.in b/config/init/systemd/lxc.service.in
-index 77541917e..e4c086e0a 100644
---- a/config/init/systemd/lxc.service.in
-+++ b/config/init/systemd/lxc.service.in
-@@ -10,6 +10,7 @@ RemainAfterExit=yes
- ExecStartPre=@LIBEXECDIR@/lxc/lxc-apparmor-load
- ExecStart=@LIBEXECDIR@/lxc/lxc-containers start
- ExecStop=@LIBEXECDIR@/lxc/lxc-containers stop
-+ExecReload=@LIBEXECDIR@/lxc/lxc-apparmor-load
- # Environment=BOOTUP=serial
- # Environment=CONSOLETYPE=serial
- Delegate=yes
--- 
-2.20.1
-
diff --git a/debian/patches/pve/0010-PVE-Config-attach-always-use-getent.patch b/debian/patches/pve/0010-PVE-Config-attach-always-use-getent.patch
deleted file mode 100644 (file)
index a96d4c4..0000000
+++ /dev/null
@@ -1,78 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Tue, 13 Aug 2019 13:57:22 +0200
-Subject: [PATCH] PVE: [Config] attach: always use getent
-
-In debian buster, some libnss plugins (if installed) can
-cause getpwent to segfault instead of erroring out cleanly.
-To avoid this, stick to always using getent.
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
----
- src/lxc/attach.c | 29 ++---------------------------
- 1 file changed, 2 insertions(+), 27 deletions(-)
-
-diff --git a/src/lxc/attach.c b/src/lxc/attach.c
-index 117e3778f..8b34a412e 100644
---- a/src/lxc/attach.c
-+++ b/src/lxc/attach.c
-@@ -1548,11 +1548,7 @@ int lxc_attach_run_command(void *payload)
- int lxc_attach_run_shell(void* payload)
- {
-       uid_t uid;
--      struct passwd pwent;
--      struct passwd *pwentp = NULL;
-       char *user_shell;
--      char *buf;
--      size_t bufsize;
-       int ret;
-       /* Ignore payload parameter. */
-@@ -1560,32 +1556,13 @@ int lxc_attach_run_shell(void* payload)
-       uid = getuid();
--      bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
--      if (bufsize == -1)
--              bufsize = 1024;
--
--      buf = malloc(bufsize);
--      if (buf) {
--              ret = getpwuid_r(uid, &pwent, buf, bufsize, &pwentp);
--              if (!pwentp) {
--                      if (ret == 0)
--                              WARN("Could not find matched password record");
--
--                      WARN("Failed to get password record - %u", uid);
--              }
--      }
--
-       /* This probably happens because of incompatible nss implementations in
-        * host and container (remember, this code is still using the host's
-        * glibc but our mount namespace is in the container) we may try to get
-        * the information by spawning a [getent passwd uid] process and parsing
-        * the result.
-        */
--      if (!pwentp)
--              user_shell = lxc_attach_getpwshell(uid);
--      else
--              user_shell = pwent.pw_shell;
--
-+      user_shell = lxc_attach_getpwshell(uid);
-       if (user_shell)
-               execlp(user_shell, user_shell, (char *)NULL);
-@@ -1595,9 +1572,7 @@ int lxc_attach_run_shell(void* payload)
-       execlp("/bin/sh", "/bin/sh", (char *)NULL);
-       SYSERROR("Failed to execute shell");
--      if (!pwentp)
--              free(user_shell);
-+      free(user_shell);
--      free(buf);
-       return -1;
- }
--- 
-2.20.1
-
diff --git a/debian/patches/pve/0010-init-add-ExecReload-to-lxc.service-to-only-reload-pr.patch b/debian/patches/pve/0010-init-add-ExecReload-to-lxc.service-to-only-reload-pr.patch
new file mode 100644 (file)
index 0000000..7044ced
--- /dev/null
@@ -0,0 +1,25 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Wed, 10 Jul 2019 14:29:54 +0200
+Subject: [PATCH] init: add ExecReload to lxc.service to only reload profiles
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+---
+ config/init/systemd/lxc.service.in | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/config/init/systemd/lxc.service.in b/config/init/systemd/lxc.service.in
+index 77541917e..e4c086e0a 100644
+--- a/config/init/systemd/lxc.service.in
++++ b/config/init/systemd/lxc.service.in
+@@ -10,6 +10,7 @@ RemainAfterExit=yes
+ ExecStartPre=@LIBEXECDIR@/lxc/lxc-apparmor-load
+ ExecStart=@LIBEXECDIR@/lxc/lxc-containers start
+ ExecStop=@LIBEXECDIR@/lxc/lxc-containers stop
++ExecReload=@LIBEXECDIR@/lxc/lxc-apparmor-load
+ # Environment=BOOTUP=serial
+ # Environment=CONSOLETYPE=serial
+ Delegate=yes
+-- 
+2.20.1
+
diff --git a/debian/patches/pve/0011-apparmor-generate-ro-bind-remount-rule-list.patch b/debian/patches/pve/0011-apparmor-generate-ro-bind-remount-rule-list.patch
new file mode 100644 (file)
index 0000000..ecc1ca3
--- /dev/null
@@ -0,0 +1,169 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Fri, 2 Aug 2019 12:57:42 +0200
+Subject: [PATCH] apparmor: generate ro,bind,remount rule list
+
+and update to changes based on lxd
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+---
+ src/lxc/lsm/apparmor.c | 114 ++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 100 insertions(+), 14 deletions(-)
+
+diff --git a/src/lxc/lsm/apparmor.c b/src/lxc/lsm/apparmor.c
+index e32b12531..47f825866 100644
+--- a/src/lxc/lsm/apparmor.c
++++ b/src/lxc/lsm/apparmor.c
+@@ -149,6 +149,16 @@ static const char AA_PROFILE_BASE[] =
+ "#  mount options=(rw,make-unbindable) -> **,\n"
+ "#  mount options=(rw,make-runbindable) -> **,\n"
+ "\n"
++"# Allow limited modification of mount propagation\n"
++"  mount options=(rw,make-slave) -> /,\n"
++"  mount options=(rw,make-rslave) -> /,\n"
++"  mount options=(rw,make-shared) -> /,\n"
++"  mount options=(rw,make-rshared) -> /,\n"
++"  mount options=(rw,make-private) -> /,\n"
++"  mount options=(rw,make-rprivate) -> /,\n"
++"  mount options=(rw,make-unbindable) -> /,\n"
++"  mount options=(rw,make-runbindable) -> /,\n"
++"\n"
+ "  # allow bind-mounts of anything except /proc, /sys and /dev\n"
+ "  mount options=(rw,bind) /[^spd]*{,/**},\n"
+ "  mount options=(rw,bind) /d[^e]*{,/**},\n"
+@@ -167,15 +177,18 @@ static const char AA_PROFILE_BASE[] =
+ "  mount options=(rw,bind) /sy[^s]*{,/**},\n"
+ "  mount options=(rw,bind) /sys?*{,/**},\n"
+ "\n"
+-"  # allow various ro-bind-*re*-mounts\n"
+-"  mount options=(ro,remount,bind),\n"
+-"  mount options=(ro,remount,bind,nosuid),\n"
+-"  mount options=(ro,remount,bind,noexec),\n"
+-"  mount options=(ro,remount,bind,nodev),\n"
+-"  mount options=(ro,remount,bind,nosuid,noexec),\n"
+-"  mount options=(ro,remount,bind,noexec,nodev),\n"
+-"  mount options=(ro,remount,bind,nodev,nosuid),\n"
+-"  mount options=(ro,remount,bind,nosuid,noexec,nodev),\n"
++"  # Allow rbind-mounts of anything except /, /dev, /proc and /sys\n"
++"  mount options=(rw,rbind) /[^spd]*{,/**},\n"
++"  mount options=(rw,rbind) /d[^e]*{,/**},\n"
++"  mount options=(rw,rbind) /de[^v]*{,/**},\n"
++"  mount options=(rw,rbind) /dev?*{,/**},\n"
++"  mount options=(rw,rbind) /p[^r]*{,/**},\n"
++"  mount options=(rw,rbind) /pr[^o]*{,/**},\n"
++"  mount options=(rw,rbind) /pro[^c]*{,/**},\n"
++"  mount options=(rw,rbind) /proc?*{,/**},\n"
++"  mount options=(rw,rbind) /s[^y]*{,/**},\n"
++"  mount options=(rw,rbind) /sy[^s]*{,/**},\n"
++"  mount options=(rw,rbind) /sys?*{,/**},\n"
+ "\n"
+ "  # allow moving mounts except for /proc, /sys and /dev\n"
+ "  mount options=(rw,move) /[^spd]*{,/**},\n"
+@@ -341,12 +354,13 @@ static const char AA_PROFILE_NESTING_BASE[] =
+ "\n"
+ "  mount fstype=proc -> /usr/lib/*/lxc/**,\n"
+ "  mount fstype=sysfs -> /usr/lib/*/lxc/**,\n"
+-"  mount options=(rw,bind),\n"
+-"  mount options=(rw,rbind),\n"
+-"  mount options=(rw,make-rshared),\n"
+ "\n"
+-   /* FIXME: What's the state here on apparmor's side? */
+-"  # there doesn't seem to be a way to ask for:\n"
++"  # Allow nested LXD\n"
++"  mount none -> /var/lib/lxd/shmounts/,\n"
++"  mount /var/lib/lxd/shmounts/ -> /var/lib/lxd/shmounts/,\n"
++"  mount options=bind /var/lib/lxd/shmounts/** -> /var/lib/lxd/**,\n"
++"\n"
++"  # FIXME: There doesn't seem to be a way to ask for:\n"
+ "  # mount options=(ro,nosuid,nodev,noexec,remount,bind),\n"
+ "  # as we always get mount to $cdir/proc/sys with those flags denied\n"
+ "  # So allow all mounts until that is straightened out:\n"
+@@ -648,6 +662,76 @@ static bool is_privileged(struct lxc_conf *conf)
+       return lxc_list_empty(&conf->id_map);
+ }
++static const char* AA_ALL_DEST_PATH_LIST[] = {
++      " -> /[^spd]*{,/**},\n",
++      " -> /d[^e]*{,/**},\n",
++      " -> /de[^v]*{,/**},\n",
++      " -> /dev/.[^l]*{,/**},\n",
++      " -> /dev/.l[^x]*{,/**},\n",
++      " -> /dev/.lx[^c]*{,/**},\n",
++      " -> /dev/.lxc?*{,/**},\n",
++      " -> /dev/[^.]*{,/**},\n",
++      " -> /dev?*{,/**},\n",
++      " -> /p[^r]*{,/**},\n",
++      " -> /pr[^o]*{,/**},\n",
++      " -> /pro[^c]*{,/**},\n",
++      " -> /proc?*{,/**},\n",
++      " -> /s[^y]*{,/**},\n",
++      " -> /sy[^s]*{,/**},\n",
++      " -> /sys?*{,/**},\n",
++      NULL,
++};
++
++static void append_remount_rule(char **profile, size_t *size, const char *rule)
++{
++      size_t rule_len = strlen(rule);
++
++      for (const char **dest = AA_ALL_DEST_PATH_LIST; *dest; ++dest) {
++              must_append_sized(profile, size, rule, rule_len);
++              must_append_sized(profile, size, *dest, strlen(*dest));
++      }
++}
++
++static void append_all_remount_rules(char **profile, size_t *size)
++{
++      must_append_sized(profile, size,
++                        "# allow various ro-bind-*re*mounts\n",
++                        sizeof("# allow various ro-bind-*re*mounts\n")-1);
++
++      static struct mntopt_t {
++              const char *opt;
++              size_t len;
++      } mnt_opt_list[] = {
++              { ",nodev", sizeof(",nodev")-1 },
++              { ",nosuid", sizeof(",nosuid")-1 },
++              { ",noexec", sizeof(",noexec")-1 },
++      };
++
++      const size_t opt_count = sizeof(mnt_opt_list) / sizeof(mnt_opt_list[0]);
++
++      char buf[128] = "mount options=(ro,remount,bind";
++      const size_t start = strlen(buf);
++      for (size_t i = 0; i != 1 << opt_count; ++i) {
++              size_t at = start;
++              unsigned opt_bit = 1;
++
++              for (size_t o = 0; o != opt_count; ++o, opt_bit <<= 1) {
++                      if (i & opt_bit) {
++                              struct mntopt_t *opt = &mnt_opt_list[o];
++                              memcpy(&buf[at], opt->opt, opt->len);
++                              at += opt->len;
++                      }
++              }
++
++              memcpy(&buf[at], ")", sizeof(")"));
++              append_remount_rule(profile, size, buf);
++              memcpy(&buf[at], ",noatime)", sizeof(",noatime)"));
++              append_remount_rule(profile, size, buf);
++              memcpy(&buf[at], ",strictatime)", sizeof(",strictatime)"));
++              append_remount_rule(profile, size, buf);
++      }
++}
++
+ static char *get_apparmor_profile_content(struct lxc_conf *conf, const char *lxcpath)
+ {
+       char *profile, *profile_name_full;
+@@ -665,6 +749,8 @@ static char *get_apparmor_profile_content(struct lxc_conf *conf, const char *lxc
+       must_append_sized(&profile, &size, AA_PROFILE_BASE,
+                         STRARRAYLEN(AA_PROFILE_BASE));
++      append_all_remount_rules(&profile, &size);
++
+       if (aa_supports_unix)
+               must_append_sized(&profile, &size, AA_PROFILE_UNIX_SOCKETS,
+                                 STRARRAYLEN(AA_PROFILE_UNIX_SOCKETS));
+-- 
+2.20.1
+
diff --git a/debian/patches/pve/0012-apparmor-Prevent-writes-to-proc-acpi.patch b/debian/patches/pve/0012-apparmor-Prevent-writes-to-proc-acpi.patch
new file mode 100644 (file)
index 0000000..77e1e81
--- /dev/null
@@ -0,0 +1,27 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Wed, 23 Oct 2019 10:53:21 +0200
+Subject: [PATCH] apparmor: Prevent writes to /proc/acpi/**
+
+Same as #3117.
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+---
+ src/lxc/lsm/apparmor.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/src/lxc/lsm/apparmor.c b/src/lxc/lsm/apparmor.c
+index 47f825866..8aebb21af 100644
+--- a/src/lxc/lsm/apparmor.c
++++ b/src/lxc/lsm/apparmor.c
+@@ -121,6 +121,7 @@ static const char AA_PROFILE_BASE[] =
+ "  # block some other dangerous paths\n"
+ "  deny @{PROC}/kcore rwklx,\n"
+ "  deny @{PROC}/sysrq-trigger rwklx,\n"
++"  deny @{PROC}/acpi/** rwklx,\n"
+ "\n"
+ "  # deny writes in /sys except for /sys/fs/cgroup, also allow\n"
+ "  # fusectl, securityfs and debugfs to be mounted there (read-only)\n"
+-- 
+2.20.1
+
index ba6f5afc1615f45ba59bff0adc8e739a128b8df5..7511a9e26692c72890cb7a42617d8ae32765f6c2 100644 (file)
@@ -1,15 +1,12 @@
 pve/0001-PVE-Config-lxc.service-start-after-a-potential-syslo.patch
 pve/0002-PVE-Down-run-lxcnetaddbr-when-instantiating-veths.patch
 pve/0003-PVE-Config-deny-rw-mounting-of-sys-and-proc.patch
-pve/0004-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch
-pve/0005-PVE-Up-start-initutils-make-cgroupns-separation-leve.patch
-pve/0006-PVE-Config-namespace-separation.patch
-pve/0007-PVE-Up-possibility-to-run-lxc-monitord-as-a-regular-.patch
-pve/0008-PVE-Config-Disable-lxc.monitor-cgroup.patch
-pve/0009-init-add-ExecReload-to-lxc.service-to-only-reload-pr.patch
-pve/0010-PVE-Config-attach-always-use-getent.patch
-extra/0001-conf-use-SYSERROR-on-lxc_write_to_file-errors.patch
-extra/0002-Revert-conf-remove-extra-MS_BIND-with-sysfs-mixed.patch
-extra/0003-CVE-2019-5736-runC-rexec-callers-as-memfd.patch
-extra/0004-apparmor-generate-ro-bind-remount-rule-list.patch
-extra/0005-attach-don-t-close-stdout-of-getent.patch
+pve/0004-PVE-Up-possibility-to-run-lxc-monitord-as-a-regular-.patch
+pve/0005-PVE-Config-Disable-lxc.monitor-cgroup.patch
+pve/0006-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch
+pve/0007-PVE-Up-start-initutils-make-cgroupns-separation-leve.patch
+pve/0008-PVE-Config-namespace-separation.patch
+pve/0009-PVE-Config-attach-always-use-getent.patch
+pve/0010-init-add-ExecReload-to-lxc.service-to-only-reload-pr.patch
+pve/0011-apparmor-generate-ro-bind-remount-rule-list.patch
+pve/0012-apparmor-Prevent-writes-to-proc-acpi.patch
diff --git a/lxc b/lxc
index dfaaf1cf5a9136c2caf9aab147e0f51dcb86bafb..344b8ee293f4d3730a70a6ccaa03d7e4a516ae95 160000 (submodule)
--- a/lxc
+++ b/lxc
@@ -1 +1 @@
-Subproject commit dfaaf1cf5a9136c2caf9aab147e0f51dcb86bafb
+Subproject commit 344b8ee293f4d3730a70a6ccaa03d7e4a516ae95