pkglibexec_SCRIPTS = lxc-apparmor-load
diff --git a/configure.ac b/configure.ac
-index d32fecc06..1dd916248 100644
+index e30ea6f6e..16c5ab8c4 100644
--- a/configure.ac
+++ b/configure.ac
-@@ -842,6 +842,7 @@ AC_CONFIG_FILES([
+@@ -913,6 +913,7 @@ AC_CONFIG_FILES([
config/init/systemd/lxc.service
config/init/systemd/lxc@.service
config/init/systemd/lxc-net.service
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Thu, 2 Apr 2020 10:01:37 +0200
-Subject: [PATCH] [doc] introduce
- lxc.cgroup.dir.{monitor,container,container.inner}
-
-This is a new approach to #1302 with a container-side
-configuration instead of a global boolean flag.
-
-Contrary to the previous PR using an optional additional
-parameter for the get-cgroup command, this introduces two
-new additional commands to get the limiting cgroup path and
-cgroup2 file descriptor. If the limiting option is not in
-use, these behave identical to their full-path counterparts.
-
-If these variables are used the payload will end up in the
-concatenation of lxc.cgroup.dir.container and
-lxc.cgroup.dir.container.inner (which may be empty), and the
-monitor will end up in lxc.cgruop.dir.monitor. The
-directories are fixed, no retry count logic is applied,
-failing to create these directories will simply be a hard
-error.
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
----
- doc/lxc.container.conf.sgml.in | 47 +++++++++++++
- src/lxc/commands.c | 5 +-
- src/lxc/conf.c | 3 +
- src/lxc/confile.c | 124 +++++++++++++++++++++++++++++++++
- 4 files changed, 177 insertions(+), 2 deletions(-)
-
-diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in
-index 3e0e55cee..4011f5734 100644
---- a/doc/lxc.container.conf.sgml.in
-+++ b/doc/lxc.container.conf.sgml.in
-@@ -1571,6 +1571,53 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- </para>
- </listitem>
- </varlistentry>
-+ <varlistentry>
-+ <term>
-+ <option>lxc.cgroup.dir.container</option>
-+ </term>
-+ <listitem>
-+ <para>
-+ This is similar to <option>lxc.cgroup.dir</option>, but must be
-+ used together with <option>lxc.cgroup.dir.monitor</option> and
-+ affects only the container's cgroup path. This option is mutually
-+ exclusive with <option>lxc.cgroup.dir</option>.
-+ Note that the final path the container attaches to may be
-+ extended further by the
-+ <option>lxc.cgroup.dir.container.namespace</option> option.
-+ </para>
-+ </listitem>
-+ </varlistentry>
-+ <varlistentry>
-+ <term>
-+ <option>lxc.cgroup.dir.monitor</option>
-+ </term>
-+ <listitem>
-+ <para>
-+ This is the monitor process counterpart to
-+ <option>lxc.cgroup.dir.container</option>.
-+ </para>
-+ </listitem>
-+ </varlistentry>
-+ <varlistentry>
-+ <term>
-+ <option>lxc.cgroup.dir.container.namespace</option>
-+ </term>
-+ <listitem>
-+ <para>
-+ Specify an additional subdirectory where the cgroup namespace
-+ will be created. With this option, the cgroup limits will be
-+ applied to the outer path specified in
-+ <option>lxc.cgroup.dir.container</option>, which is not accessible
-+ from within the container, making it possible to better enforce
-+ limits for privileged containers in a way they cannot override
-+ them.
-+ This only works in conjunction with the
-+ <option>lxc.cgroup.dir.container</option> and
-+ <option>lxc.cgroup.dir.monitor</option> options and has otherwise
-+ no effect.
-+ </para>
-+ </listitem>
-+ </varlistentry>
- <varlistentry>
- <term>
- <option>lxc.cgroup.relative</option>
-diff --git a/src/lxc/commands.c b/src/lxc/commands.c
-index 3046587c7..a9a03ca2c 100644
---- a/src/lxc/commands.c
-+++ b/src/lxc/commands.c
-@@ -719,7 +719,7 @@ static int lxc_cmd_get_limiting_cgroup_callback(int fd, struct lxc_cmd_req *req,
- struct lxc_handler *handler,
- struct lxc_epoll_descr *descr)
- {
-- return ret_errno(ENOSYS);
-+ return lxc_cmd_get_cgroup_callback_do(fd, req, handler, descr, true);
- }
-
- /*
-@@ -1569,7 +1569,8 @@ static int lxc_cmd_get_limiting_cgroup2_fd_callback(int fd,
- struct lxc_handler *handler,
- struct lxc_epoll_descr *descr)
- {
-- return ret_errno(ENOSYS);
-+ return lxc_cmd_get_cgroup2_fd_callback_do(fd, req, handler, descr,
-+ true);
- }
-
- static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
-diff --git a/src/lxc/conf.c b/src/lxc/conf.c
-index ae4972551..f1f01898f 100644
---- a/src/lxc/conf.c
-+++ b/src/lxc/conf.c
-@@ -3844,6 +3844,9 @@ void lxc_conf_free(struct lxc_conf *conf)
- lxc_clear_apparmor_raw(conf);
- lxc_clear_namespace(conf);
- free(conf->cgroup_meta.dir);
-+ free(conf->cgroup_meta.monitor_dir);
-+ free(conf->cgroup_meta.container_dir);
-+ free(conf->cgroup_meta.namespace_dir);
- free(conf->cgroup_meta.controllers);
- free(conf->shmount.path_host);
- free(conf->shmount.path_cont);
-diff --git a/src/lxc/confile.c b/src/lxc/confile.c
-index da0da5ff9..dab5aaca2 100644
---- a/src/lxc/confile.c
-+++ b/src/lxc/confile.c
-@@ -71,6 +71,9 @@ lxc_config_define(cap_keep);
- lxc_config_define(cgroup_controller);
- lxc_config_define(cgroup2_controller);
- lxc_config_define(cgroup_dir);
-+lxc_config_define(cgroup_monitor_dir);
-+lxc_config_define(cgroup_container_dir);
-+lxc_config_define(cgroup_container_inner_dir);
- lxc_config_define(cgroup_relative);
- lxc_config_define(console_buffer_size);
- lxc_config_define(console_logfile);
-@@ -170,6 +173,9 @@ static struct lxc_config_t config_jump_table[] = {
- { "lxc.cap.drop", set_config_cap_drop, get_config_cap_drop, clr_config_cap_drop, },
- { "lxc.cap.keep", set_config_cap_keep, get_config_cap_keep, clr_config_cap_keep, },
- { "lxc.cgroup2", set_config_cgroup2_controller, get_config_cgroup2_controller, clr_config_cgroup2_controller, },
-+ { "lxc.cgroup.dir.monitor", set_config_cgroup_monitor_dir, get_config_cgroup_monitor_dir, clr_config_cgroup_monitor_dir, },
-+ { "lxc.cgroup.dir.container", set_config_cgroup_container_dir, get_config_cgroup_container_dir, clr_config_cgroup_container_dir, },
-+ { "lxc.cgroup.dir.container.inner",set_config_cgroup_container_inner_dir, get_config_cgroup_container_inner_dir, clr_config_cgroup_container_inner_dir,},
- { "lxc.cgroup.dir", set_config_cgroup_dir, get_config_cgroup_dir, clr_config_cgroup_dir, },
- { "lxc.cgroup.relative", set_config_cgroup_relative, get_config_cgroup_relative, clr_config_cgroup_relative, },
- { "lxc.cgroup", set_config_cgroup_controller, get_config_cgroup_controller, clr_config_cgroup_controller, },
-@@ -1687,6 +1693,48 @@ static int set_config_cgroup_dir(const char *key, const char *value,
- return set_config_string_item(&lxc_conf->cgroup_meta.dir, value);
- }
-
-+static int set_config_cgroup_monitor_dir(const char *key, const char *value,
-+ struct lxc_conf *lxc_conf, void *data)
-+{
-+ if (lxc_config_value_empty(value))
-+ return clr_config_cgroup_monitor_dir(key, lxc_conf, NULL);
-+
-+ return set_config_string_item(&lxc_conf->cgroup_meta.monitor_dir,
-+ value);
-+}
-+
-+static int set_config_cgroup_container_dir(const char *key, const char *value,
-+ struct lxc_conf *lxc_conf,
-+ void *data)
-+{
-+ if (lxc_config_value_empty(value))
-+ return clr_config_cgroup_container_dir(key, lxc_conf, NULL);
-+
-+ return set_config_string_item(&lxc_conf->cgroup_meta.container_dir,
-+ value);
-+}
-+
-+static int set_config_cgroup_container_inner_dir(const char *key,
-+ const char *value,
-+ struct lxc_conf *lxc_conf,
-+ void *data)
-+{
-+ if (lxc_config_value_empty(value))
-+ return clr_config_cgroup_container_inner_dir(key, lxc_conf,
-+ NULL);
-+
-+ if (strchr(value, '/') ||
-+ strcmp(value, ".") == 0 ||
-+ strcmp(value, "..") == 0)
-+ {
-+ ERROR("lxc.cgroup.dir.container.inner must be a single directory name");
-+ return -1;
-+ }
-+
-+ return set_config_string_item(&lxc_conf->cgroup_meta.namespace_dir,
-+ value);
-+}
-+
- static int set_config_cgroup_relative(const char *key, const char *value,
- struct lxc_conf *lxc_conf, void *data)
- {
-@@ -3498,6 +3546,58 @@ static int get_config_cgroup_dir(const char *key, char *retv, int inlen,
- return fulllen;
- }
-
-+static int get_config_cgroup_monitor_dir(const char *key, char *retv, int inlen,
-+ struct lxc_conf *lxc_conf, void *data)
-+{
-+ int len;
-+ int fulllen = 0;
-+
-+ if (!retv)
-+ inlen = 0;
-+ else
-+ memset(retv, 0, inlen);
-+
-+ strprint(retv, inlen, "%s", lxc_conf->cgroup_meta.monitor_dir);
-+
-+ return fulllen;
-+}
-+
-+static int get_config_cgroup_container_dir(const char *key, char *retv,
-+ int inlen,
-+ struct lxc_conf *lxc_conf,
-+ void *data)
-+{
-+ int len;
-+ int fulllen = 0;
-+
-+ if (!retv)
-+ inlen = 0;
-+ else
-+ memset(retv, 0, inlen);
-+
-+ strprint(retv, inlen, "%s", lxc_conf->cgroup_meta.container_dir);
-+
-+ return fulllen;
-+}
-+
-+static int get_config_cgroup_container_inner_dir(const char *key, char *retv,
-+ int inlen,
-+ struct lxc_conf *lxc_conf,
-+ void *data)
-+{
-+ int len;
-+ int fulllen = 0;
-+
-+ if (!retv)
-+ inlen = 0;
-+ else
-+ memset(retv, 0, inlen);
-+
-+ strprint(retv, inlen, "%s", lxc_conf->cgroup_meta.namespace_dir);
-+
-+ return fulllen;
-+}
-+
- static inline int get_config_cgroup_relative(const char *key, char *retv,
- int inlen, struct lxc_conf *lxc_conf,
- void *data)
-@@ -4308,6 +4408,30 @@ static int clr_config_cgroup_dir(const char *key, struct lxc_conf *lxc_conf,
- return 0;
- }
-
-+static int clr_config_cgroup_monitor_dir(const char *key,
-+ struct lxc_conf *lxc_conf,
-+ void *data)
-+{
-+ free_disarm(lxc_conf->cgroup_meta.monitor_dir);
-+ return 0;
-+}
-+
-+static int clr_config_cgroup_container_dir(const char *key,
-+ struct lxc_conf *lxc_conf,
-+ void *data)
-+{
-+ free_disarm(lxc_conf->cgroup_meta.container_dir);
-+ return 0;
-+}
-+
-+static int clr_config_cgroup_container_inner_dir(const char *key,
-+ struct lxc_conf *lxc_conf,
-+ void *data)
-+{
-+ free_disarm(lxc_conf->cgroup_meta.namespace_dir);
-+ return 0;
-+}
-+
- static inline int clr_config_cgroup_relative(const char *key,
- struct lxc_conf *lxc_conf,
- void *data)
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Thu, 2 Apr 2020 10:01:37 +0200
+Subject: [PATCH] introduce lxc.cgroup.dir.{monitor,container,container.inner}
+
+This is a new approach to #1302 with a container-side
+configuration instead of a global boolean flag.
+
+Contrary to the previous PR using an optional additional
+parameter for the get-cgroup command, this introduces two
+new additional commands to get the limiting cgroup path and
+cgroup2 file descriptor. If the limiting option is not in
+use, these behave identical to their full-path counterparts.
+
+If these variables are used the payload will end up in the
+concatenation of lxc.cgroup.dir.container and
+lxc.cgroup.dir.container.inner (which may be empty), and the
+monitor will end up in lxc.cgruop.dir.monitor. The
+directories are fixed, no retry count logic is applied,
+failing to create these directories will simply be a hard
+error.
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ doc/lxc.container.conf.sgml.in | 47 +++++++++++++
+ src/lxc/confile.c | 124 +++++++++++++++++++++++++++++++++
+ 2 files changed, 171 insertions(+)
+
+diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in
+index e089fa61d..595cb3972 100644
+--- a/doc/lxc.container.conf.sgml.in
++++ b/doc/lxc.container.conf.sgml.in
+@@ -1757,6 +1757,53 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ </para>
+ </listitem>
+ </varlistentry>
++ <varlistentry>
++ <term>
++ <option>lxc.cgroup.dir.container</option>
++ </term>
++ <listitem>
++ <para>
++ This is similar to <option>lxc.cgroup.dir</option>, but must be
++ used together with <option>lxc.cgroup.dir.monitor</option> and
++ affects only the container's cgroup path. This option is mutually
++ exclusive with <option>lxc.cgroup.dir</option>.
++ Note that the final path the container attaches to may be
++ extended further by the
++ <option>lxc.cgroup.dir.container.namespace</option> option.
++ </para>
++ </listitem>
++ </varlistentry>
++ <varlistentry>
++ <term>
++ <option>lxc.cgroup.dir.monitor</option>
++ </term>
++ <listitem>
++ <para>
++ This is the monitor process counterpart to
++ <option>lxc.cgroup.dir.container</option>.
++ </para>
++ </listitem>
++ </varlistentry>
++ <varlistentry>
++ <term>
++ <option>lxc.cgroup.dir.container.namespace</option>
++ </term>
++ <listitem>
++ <para>
++ Specify an additional subdirectory where the cgroup namespace
++ will be created. With this option, the cgroup limits will be
++ applied to the outer path specified in
++ <option>lxc.cgroup.dir.container</option>, which is not accessible
++ from within the container, making it possible to better enforce
++ limits for privileged containers in a way they cannot override
++ them.
++ This only works in conjunction with the
++ <option>lxc.cgroup.dir.container</option> and
++ <option>lxc.cgroup.dir.monitor</option> options and has otherwise
++ no effect.
++ </para>
++ </listitem>
++ </varlistentry>
+ <varlistentry>
+ <term>
+ <option>lxc.cgroup.relative</option>
+diff --git a/src/lxc/confile.c b/src/lxc/confile.c
+index 37c38fe1e..daf8ee474 100644
+--- a/src/lxc/confile.c
++++ b/src/lxc/confile.c
+@@ -67,6 +67,9 @@ lxc_config_define(cap_keep);
+ lxc_config_define(cgroup_controller);
+ lxc_config_define(cgroup2_controller);
+ lxc_config_define(cgroup_dir);
++lxc_config_define(cgroup_monitor_dir);
++lxc_config_define(cgroup_container_dir);
++lxc_config_define(cgroup_container_inner_dir);
+ lxc_config_define(cgroup_relative);
+ lxc_config_define(console_buffer_size);
+ lxc_config_define(console_logfile);
+@@ -185,6 +188,9 @@ static struct lxc_config_t config_jump_table[] = {
+ { "lxc.cap.drop", true, set_config_cap_drop, get_config_cap_drop, clr_config_cap_drop, },
+ { "lxc.cap.keep", true, set_config_cap_keep, get_config_cap_keep, clr_config_cap_keep, },
+ { "lxc.cgroup2", false, set_config_cgroup2_controller, get_config_cgroup2_controller, clr_config_cgroup2_controller, },
++ { "lxc.cgroup.dir.monitor", true, set_config_cgroup_monitor_dir, get_config_cgroup_monitor_dir, clr_config_cgroup_monitor_dir, },
++ { "lxc.cgroup.dir.container.inner", true, set_config_cgroup_container_inner_dir, get_config_cgroup_container_inner_dir, clr_config_cgroup_container_inner_dir, },
++ { "lxc.cgroup.dir.container", true, set_config_cgroup_container_dir, get_config_cgroup_container_dir, clr_config_cgroup_container_dir, },
+ { "lxc.cgroup.dir", true, set_config_cgroup_dir, get_config_cgroup_dir, clr_config_cgroup_dir, },
+ { "lxc.cgroup.relative", true, set_config_cgroup_relative, get_config_cgroup_relative, clr_config_cgroup_relative, },
+ { "lxc.cgroup", false, set_config_cgroup_controller, get_config_cgroup_controller, clr_config_cgroup_controller, },
+@@ -1795,6 +1801,48 @@ static int set_config_cgroup_dir(const char *key, const char *value,
+ return set_config_path_item(&lxc_conf->cgroup_meta.dir, value);
+ }
+
++static int set_config_cgroup_monitor_dir(const char *key, const char *value,
++ struct lxc_conf *lxc_conf, void *data)
++{
++ if (lxc_config_value_empty(value))
++ return clr_config_cgroup_monitor_dir(key, lxc_conf, NULL);
++
++ return set_config_string_item(&lxc_conf->cgroup_meta.monitor_dir,
++ value);
++}
++
++static int set_config_cgroup_container_dir(const char *key, const char *value,
++ struct lxc_conf *lxc_conf,
++ void *data)
++{
++ if (lxc_config_value_empty(value))
++ return clr_config_cgroup_container_dir(key, lxc_conf, NULL);
++
++ return set_config_string_item(&lxc_conf->cgroup_meta.container_dir,
++ value);
++}
++
++static int set_config_cgroup_container_inner_dir(const char *key,
++ const char *value,
++ struct lxc_conf *lxc_conf,
++ void *data)
++{
++ if (lxc_config_value_empty(value))
++ return clr_config_cgroup_container_inner_dir(key, lxc_conf,
++ NULL);
++
++ if (strchr(value, '/') ||
++ strcmp(value, ".") == 0 ||
++ strcmp(value, "..") == 0)
++ {
++ ERROR("lxc.cgroup.dir.container.inner must be a single directory name");
++ return -1;
++ }
++
++ return set_config_string_item(&lxc_conf->cgroup_meta.namespace_dir,
++ value);
++}
++
+ static int set_config_cgroup_relative(const char *key, const char *value,
+ struct lxc_conf *lxc_conf, void *data)
+ {
+@@ -3654,6 +3702,58 @@ static int get_config_cgroup_dir(const char *key, char *retv, int inlen,
+ return fulllen;
+ }
+
++static int get_config_cgroup_monitor_dir(const char *key, char *retv, int inlen,
++ struct lxc_conf *lxc_conf, void *data)
++{
++ int len;
++ int fulllen = 0;
++
++ if (!retv)
++ inlen = 0;
++ else
++ memset(retv, 0, inlen);
++
++ strprint(retv, inlen, "%s", lxc_conf->cgroup_meta.monitor_dir);
++
++ return fulllen;
++}
++
++static int get_config_cgroup_container_dir(const char *key, char *retv,
++ int inlen,
++ struct lxc_conf *lxc_conf,
++ void *data)
++{
++ int len;
++ int fulllen = 0;
++
++ if (!retv)
++ inlen = 0;
++ else
++ memset(retv, 0, inlen);
++
++ strprint(retv, inlen, "%s", lxc_conf->cgroup_meta.container_dir);
++
++ return fulllen;
++}
++
++static int get_config_cgroup_container_inner_dir(const char *key, char *retv,
++ int inlen,
++ struct lxc_conf *lxc_conf,
++ void *data)
++{
++ int len;
++ int fulllen = 0;
++
++ if (!retv)
++ inlen = 0;
++ else
++ memset(retv, 0, inlen);
++
++ strprint(retv, inlen, "%s", lxc_conf->cgroup_meta.namespace_dir);
++
++ return fulllen;
++}
++
+ static inline int get_config_cgroup_relative(const char *key, char *retv,
+ int inlen, struct lxc_conf *lxc_conf,
+ void *data)
+@@ -4491,6 +4591,30 @@ static int clr_config_cgroup_dir(const char *key, struct lxc_conf *lxc_conf,
+ return 0;
+ }
+
++static int clr_config_cgroup_monitor_dir(const char *key,
++ struct lxc_conf *lxc_conf,
++ void *data)
++{
++ free_disarm(lxc_conf->cgroup_meta.monitor_dir);
++ return 0;
++}
++
++static int clr_config_cgroup_container_dir(const char *key,
++ struct lxc_conf *lxc_conf,
++ void *data)
++{
++ free_disarm(lxc_conf->cgroup_meta.container_dir);
++ return 0;
++}
++
++static int clr_config_cgroup_container_inner_dir(const char *key,
++ struct lxc_conf *lxc_conf,
++ void *data)
++{
++ free_disarm(lxc_conf->cgroup_meta.namespace_dir);
++ return 0;
++}
++
+ static inline int clr_config_cgroup_relative(const char *key,
+ struct lxc_conf *lxc_conf,
+ void *data)
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in
-index 4011f5734..006dcad92 100644
+index 595cb3972..18bf36aaf 100644
--- a/doc/lxc.container.conf.sgml.in
+++ b/doc/lxc.container.conf.sgml.in
-@@ -1583,7 +1583,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+@@ -1769,7 +1769,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
exclusive with <option>lxc.cgroup.dir</option>.
Note that the final path the container attaches to may be
extended further by the
</para>
</listitem>
</varlistentry>
-@@ -1600,7 +1600,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+@@ -1786,7 +1786,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
</varlistentry>
<varlistentry>
<term>
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/src/lxc/confile.c b/src/lxc/confile.c
-index dab5aaca2..3a388fed1 100644
+index daf8ee474..ae6e72c22 100644
--- a/src/lxc/confile.c
+++ b/src/lxc/confile.c
-@@ -1720,19 +1720,14 @@ static int set_config_cgroup_container_inner_dir(const char *key,
+@@ -1828,19 +1828,14 @@ static int set_config_cgroup_container_inner_dir(const char *key,
void *data)
{
if (lxc_config_value_empty(value))
2 files changed, 5 insertions(+)
diff --git a/doc/api-extensions.md b/doc/api-extensions.md
-index 4756be3dc..24dea8c9d 100644
+index ff0df50ef..13ba41a62 100644
--- a/doc/api-extensions.md
+++ b/doc/api-extensions.md
-@@ -135,3 +135,7 @@ Retrieve the seccomp notifier fd from a running container.
- ## seccomp\_proxy\_send\_notify\_fd
+@@ -136,6 +136,10 @@ Retrieve the seccomp notifier fd from a running container.
Whether the seccomp notify proxy sends a long a notify fd file descriptor.
-+
+
+## cgroup\_advanced\_isolation
+
+Privileged containers will usually be able to override the cgroup limits given to them. This introduces three new configuration keys `lxc.cgroup.dir.monitor`, `lxc.cgroup.dir.container`, and `lxc.cgroup.dir.container.inner`. The `lxc.cgroup.dir.monitor` and `lxc.cgroup.dir.container` keys can be used to set to place the `monitor` and the `container` into different cgroups. The `lxc.cgroup.dir.container.inner` key can be set to a cgroup that is concatenated with `lxc.cgroup.dir.container`. When `lxc.cgroup.dir.container.inner` is set the container will be placed into the `lxc.cgroup.dir.container.inner` cgroup but the limits will be set in the `lxc.cgroup.dir.container` cgroup. This way privileged containers cannot escape their cgroup limits.
++
+ ## idmapped\_mounts
+
+ Whether this LXC instance can handle idmapped mounts.
diff --git a/src/lxc/api_extensions.h b/src/lxc/api_extensions.h
-index 513b774f0..303abd631 100644
+index 06a4130ba..6653c7299 100644
--- a/src/lxc/api_extensions.h
+++ b/src/lxc/api_extensions.h
-@@ -43,6 +43,7 @@ static char *api_extensions[] = {
+@@ -41,6 +41,7 @@ static char *api_extensions[] = {
"devpts_fd",
"seccomp_notify_fd_active",
"seccomp_proxy_send_notify_fd",
+ "cgroup_advanced_isolation",
+ "idmapped_mounts",
};
- static size_t nr_api_extensions = sizeof(api_extensions) / sizeof(*api_extensions);
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Fri, 10 Feb 2017 09:13:40 +0100
+Subject: [PATCH] PVE: [Config] lxc.service: start after a potential
+ syslog.service
+
+We could add this as a snippet from pve-container instead.
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+---
+ config/init/systemd/lxc.service.in | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/config/init/systemd/lxc.service.in b/config/init/systemd/lxc.service.in
+index 397a6c4d0..6924374d9 100644
+--- a/config/init/systemd/lxc.service.in
++++ b/config/init/systemd/lxc.service.in
+@@ -1,6 +1,6 @@
+ [Unit]
+ Description=LXC Container Initialization and Autoboot Code
+-After=network.target lxc-net.service remote-fs.target
++After=network.target lxc-net.service remote-fs.target syslog.target
+ Wants=lxc-net.service
+ Documentation=man:lxc-autostart man:lxc
+
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Sun, 5 Apr 2020 15:55:28 +0200
-Subject: [PATCH] confile: fix jump table order
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
----
- src/lxc/confile.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/lxc/confile.c b/src/lxc/confile.c
-index 3a388fed1..34d85e788 100644
---- a/src/lxc/confile.c
-+++ b/src/lxc/confile.c
-@@ -174,8 +174,8 @@ static struct lxc_config_t config_jump_table[] = {
- { "lxc.cap.keep", set_config_cap_keep, get_config_cap_keep, clr_config_cap_keep, },
- { "lxc.cgroup2", set_config_cgroup2_controller, get_config_cgroup2_controller, clr_config_cgroup2_controller, },
- { "lxc.cgroup.dir.monitor", set_config_cgroup_monitor_dir, get_config_cgroup_monitor_dir, clr_config_cgroup_monitor_dir, },
-- { "lxc.cgroup.dir.container", set_config_cgroup_container_dir, get_config_cgroup_container_dir, clr_config_cgroup_container_dir, },
- { "lxc.cgroup.dir.container.inner",set_config_cgroup_container_inner_dir, get_config_cgroup_container_inner_dir, clr_config_cgroup_container_inner_dir,},
-+ { "lxc.cgroup.dir.container", set_config_cgroup_container_dir, get_config_cgroup_container_dir, clr_config_cgroup_container_dir, },
- { "lxc.cgroup.dir", set_config_cgroup_dir, get_config_cgroup_dir, clr_config_cgroup_dir, },
- { "lxc.cgroup.relative", set_config_cgroup_relative, get_config_cgroup_relative, clr_config_cgroup_relative, },
- { "lxc.cgroup", set_config_cgroup_controller, get_config_cgroup_controller, clr_config_cgroup_controller, },
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
+Date: Wed, 9 Nov 2016 09:14:26 +0100
+Subject: [PATCH] PVE: [Config] deny rw mounting of /sys and /proc
+
+Note that we don't actually make use of this anymore, since
+we switched to the generated profiles which already do this.
+
+this would allow root in a privileged container to change
+the permissions of /sys on the host, which could lock out
+non-root users.
+
+if a rw /sys is desired, set "lxc.mount.auto" accordingly
+---
+ config/apparmor/abstractions/container-base | 6 +++++-
+ config/apparmor/abstractions/container-base.in | 6 +++++-
+ 2 files changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/config/apparmor/abstractions/container-base b/config/apparmor/abstractions/container-base
+index 077476559..fbd70fdf5 100644
+--- a/config/apparmor/abstractions/container-base
++++ b/config/apparmor/abstractions/container-base
+@@ -82,7 +82,6 @@
+ deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,
+ mount fstype=proc -> /proc/,
+ mount fstype=sysfs -> /sys/,
+- mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,
+ deny /sys/firmware/efi/efivars/** rwklx,
+ deny /sys/kernel/security/** rwklx,
+ mount options=(move) /sys/fs/cgroup/cgmanager/ -> /sys/fs/cgroup/cgmanager.lower/,
+@@ -91,6 +90,11 @@
+ # deny reads from debugfs
+ deny /sys/kernel/debug/{,**} rwklx,
+
++ # prevent rw mounting of /sys, because that allows changing its global permissions
++ deny mount -> /proc/,
++ deny mount -> /sys/,
++# mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,
++
+ # allow paths to be made slave, shared, private or unbindable
+ # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.
+ # mount options=(rw,make-slave) -> **,
+diff --git a/config/apparmor/abstractions/container-base.in b/config/apparmor/abstractions/container-base.in
+index 2606fb64c..3e61c62ea 100644
+--- a/config/apparmor/abstractions/container-base.in
++++ b/config/apparmor/abstractions/container-base.in
+@@ -83,7 +83,6 @@
+ deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,
+ mount fstype=proc -> /proc/,
+ mount fstype=sysfs -> /sys/,
+- mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,
+ deny /sys/firmware/efi/efivars/** rwklx,
+ deny /sys/kernel/security/** rwklx,
+ mount options=(ro, nosuid, nodev, noexec, remount, strictatime) -> /sys/fs/cgroup/,
+@@ -91,6 +90,11 @@
+ # deny reads from debugfs
+ deny /sys/kernel/debug/{,**} rwklx,
+
++ # prevent rw mounting of /sys, because that allows changing its global permissions
++ deny mount -> /proc/,
++ deny mount -> /sys/,
++# mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,
++
+ # allow paths to be made slave, shared, private or unbindable
+ # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.
+ # mount options=(rw,make-slave) -> **,
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Christian Brauner <christian.brauner@ubuntu.com>
-Date: Mon, 13 Apr 2020 14:39:18 +0200
-Subject: [PATCH] cgroups: adhere to boolean return
-
-Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
----
- src/lxc/cgroups/cgfsng.c | 8 +++-----
- 1 file changed, 3 insertions(+), 5 deletions(-)
-
-diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
-index 303c2f6ab..56bb005de 100644
---- a/src/lxc/cgroups/cgfsng.c
-+++ b/src/lxc/cgroups/cgfsng.c
-@@ -1195,11 +1195,9 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
- * line, which is not possible once a subdirectory has been
- * created.
- */
-- if (string_in_list(h->controllers, "devices")) {
-- ret = ops->setup_limits_legacy(ops, conf, true);
-- if (ret < 0)
-- return ret;
-- }
-+ if (string_in_list(h->controllers, "devices") &&
-+ !ops->setup_limits_legacy(ops, conf, true))
-+ return log_error(false, "Failed to setup legacy device limits");
- }
-
- ret = mkdir_eexist_on_last(path, 0755);
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Tue, 13 Aug 2019 13:57:22 +0200
+Subject: [PATCH] PVE: [Config] attach: always use getent
+
+In debian buster, some libnss plugins (if installed) can
+cause getpwent to segfault instead of erroring out cleanly.
+To avoid this, stick to always using getent.
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+---
+ src/lxc/attach.c | 28 ++--------------------------
+ 1 file changed, 2 insertions(+), 26 deletions(-)
+
+diff --git a/src/lxc/attach.c b/src/lxc/attach.c
+index 97eff7f8e..01bd56f45 100644
+--- a/src/lxc/attach.c
++++ b/src/lxc/attach.c
+@@ -1810,12 +1810,8 @@ int lxc_attach_run_command(void *payload)
+
+ int lxc_attach_run_shell(void* payload)
+ {
+- __do_free char *buf = NULL;
+ uid_t uid;
+- struct passwd pwent;
+- struct passwd *pwentp = NULL;
+ char *user_shell;
+- size_t bufsize;
+ int ret;
+
+ /* Ignore payload parameter. */
+@@ -1823,32 +1819,13 @@ int lxc_attach_run_shell(void* payload)
+
+ uid = getuid();
+
+- bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
+- if (bufsize == -1)
+- bufsize = 1024;
+-
+- buf = malloc(bufsize);
+- if (buf) {
+- ret = getpwuid_r(uid, &pwent, buf, bufsize, &pwentp);
+- if (!pwentp) {
+- if (ret == 0)
+- WARN("Could not find matched password record");
+-
+- WARN("Failed to get password record - %u", uid);
+- }
+- }
+-
+ /* This probably happens because of incompatible nss implementations in
+ * host and container (remember, this code is still using the host's
+ * glibc but our mount namespace is in the container) we may try to get
+ * the information by spawning a [getent passwd uid] process and parsing
+ * the result.
+ */
+- if (!pwentp)
+- user_shell = lxc_attach_getpwshell(uid);
+- else
+- user_shell = pwent.pw_shell;
+-
++ user_shell = lxc_attach_getpwshell(uid);
+ if (user_shell)
+ execlp(user_shell, user_shell, (char *)NULL);
+
+@@ -1858,8 +1835,7 @@ int lxc_attach_run_shell(void* payload)
+ execlp("/bin/sh", "/bin/sh", (char *)NULL);
+
+ SYSERROR("Failed to execute shell");
+- if (!pwentp)
+- free(user_shell);
++ free(user_shell);
+
+ return -1;
+ }
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Fri, 10 Feb 2017 09:13:40 +0100
-Subject: [PATCH] PVE: [Config] lxc.service: start after a potential
- syslog.service
-
-We could add this as a snippet from pve-container instead.
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
----
- config/init/systemd/lxc.service.in | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/config/init/systemd/lxc.service.in b/config/init/systemd/lxc.service.in
-index 397a6c4d0..6924374d9 100644
---- a/config/init/systemd/lxc.service.in
-+++ b/config/init/systemd/lxc.service.in
-@@ -1,6 +1,6 @@
- [Unit]
- Description=LXC Container Initialization and Autoboot Code
--After=network.target lxc-net.service remote-fs.target
-+After=network.target lxc-net.service remote-fs.target syslog.target
- Wants=lxc-net.service
- Documentation=man:lxc-autostart man:lxc
-
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
-Date: Wed, 9 Nov 2016 09:14:26 +0100
-Subject: [PATCH] PVE: [Config] deny rw mounting of /sys and /proc
-
-Note that we don't actually make use of this anymore, since
-we switched to the generated profiles which already do this.
-
-this would allow root in a privileged container to change
-the permissions of /sys on the host, which could lock out
-non-root users.
-
-if a rw /sys is desired, set "lxc.mount.auto" accordingly
----
- config/apparmor/abstractions/container-base | 6 +++++-
- config/apparmor/abstractions/container-base.in | 6 +++++-
- 2 files changed, 10 insertions(+), 2 deletions(-)
-
-diff --git a/config/apparmor/abstractions/container-base b/config/apparmor/abstractions/container-base
-index 077476559..fbd70fdf5 100644
---- a/config/apparmor/abstractions/container-base
-+++ b/config/apparmor/abstractions/container-base
-@@ -82,7 +82,6 @@
- deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,
- mount fstype=proc -> /proc/,
- mount fstype=sysfs -> /sys/,
-- mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,
- deny /sys/firmware/efi/efivars/** rwklx,
- deny /sys/kernel/security/** rwklx,
- mount options=(move) /sys/fs/cgroup/cgmanager/ -> /sys/fs/cgroup/cgmanager.lower/,
-@@ -91,6 +90,11 @@
- # deny reads from debugfs
- deny /sys/kernel/debug/{,**} rwklx,
-
-+ # prevent rw mounting of /sys, because that allows changing its global permissions
-+ deny mount -> /proc/,
-+ deny mount -> /sys/,
-+# mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,
-+
- # allow paths to be made slave, shared, private or unbindable
- # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.
- # mount options=(rw,make-slave) -> **,
-diff --git a/config/apparmor/abstractions/container-base.in b/config/apparmor/abstractions/container-base.in
-index 2606fb64c..3e61c62ea 100644
---- a/config/apparmor/abstractions/container-base.in
-+++ b/config/apparmor/abstractions/container-base.in
-@@ -83,7 +83,6 @@
- deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,
- mount fstype=proc -> /proc/,
- mount fstype=sysfs -> /sys/,
-- mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,
- deny /sys/firmware/efi/efivars/** rwklx,
- deny /sys/kernel/security/** rwklx,
- mount options=(ro, nosuid, nodev, noexec, remount, strictatime) -> /sys/fs/cgroup/,
-@@ -91,6 +90,11 @@
- # deny reads from debugfs
- deny /sys/kernel/debug/{,**} rwklx,
-
-+ # prevent rw mounting of /sys, because that allows changing its global permissions
-+ deny mount -> /proc/,
-+ deny mount -> /sys/,
-+# mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,
-+
- # allow paths to be made slave, shared, private or unbindable
- # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.
- # mount options=(rw,make-slave) -> **,
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Tue, 13 Aug 2019 13:57:22 +0200
-Subject: [PATCH] PVE: [Config] attach: always use getent
-
-In debian buster, some libnss plugins (if installed) can
-cause getpwent to segfault instead of erroring out cleanly.
-To avoid this, stick to always using getent.
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
----
- src/lxc/attach.c | 28 ++--------------------------
- 1 file changed, 2 insertions(+), 26 deletions(-)
-
-diff --git a/src/lxc/attach.c b/src/lxc/attach.c
-index befa1580e..7a325f5b1 100644
---- a/src/lxc/attach.c
-+++ b/src/lxc/attach.c
-@@ -1444,12 +1444,8 @@ int lxc_attach_run_command(void *payload)
-
- int lxc_attach_run_shell(void* payload)
- {
-- __do_free char *buf = NULL;
- uid_t uid;
-- struct passwd pwent;
-- struct passwd *pwentp = NULL;
- char *user_shell;
-- size_t bufsize;
- int ret;
-
- /* Ignore payload parameter. */
-@@ -1457,32 +1453,13 @@ int lxc_attach_run_shell(void* payload)
-
- uid = getuid();
-
-- bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
-- if (bufsize == -1)
-- bufsize = 1024;
--
-- buf = malloc(bufsize);
-- if (buf) {
-- ret = getpwuid_r(uid, &pwent, buf, bufsize, &pwentp);
-- if (!pwentp) {
-- if (ret == 0)
-- WARN("Could not find matched password record");
--
-- WARN("Failed to get password record - %u", uid);
-- }
-- }
--
- /* This probably happens because of incompatible nss implementations in
- * host and container (remember, this code is still using the host's
- * glibc but our mount namespace is in the container) we may try to get
- * the information by spawning a [getent passwd uid] process and parsing
- * the result.
- */
-- if (!pwentp)
-- user_shell = lxc_attach_getpwshell(uid);
-- else
-- user_shell = pwent.pw_shell;
--
-+ user_shell = lxc_attach_getpwshell(uid);
- if (user_shell)
- execlp(user_shell, user_shell, (char *)NULL);
-
-@@ -1492,8 +1469,7 @@ int lxc_attach_run_shell(void* payload)
- execlp("/bin/sh", "/bin/sh", (char *)NULL);
-
- SYSERROR("Failed to execute shell");
-- if (!pwentp)
-- free(user_shell);
-+ free(user_shell);
-
- return -1;
- }
pve/0001-allow-running-lxc-monitord-as-a-system-daemon.patch
-pve/0002-doc-introduce-lxc.cgroup.dir.-monitor-container-cont.patch
+pve/0002-introduce-lxc.cgroup.dir.-monitor-container-containe.patch
pve/0003-doc-s-lxc.cgroup.container.namespace-lxc.cgroup.cont.patch
pve/0004-confile-coding-style-fixes-for-set_config_cgroup_con.patch
pve/0005-api-extensions-add-and-document-cgroup_advanced_isol.patch
pve/0006-doc-Add-lxc.cgroup.dir.-monitor-container-container..patch
-pve/0007-confile-fix-jump-table-order.patch
-pve/0008-cgroups-adhere-to-boolean-return.patch
-pve/0009-PVE-Config-lxc.service-start-after-a-potential-syslo.patch
-pve/0010-PVE-Config-deny-rw-mounting-of-sys-and-proc.patch
-pve/0011-PVE-Config-attach-always-use-getent.patch
+pve/0007-PVE-Config-lxc.service-start-after-a-potential-syslo.patch
+pve/0008-PVE-Config-deny-rw-mounting-of-sys-and-proc.patch
+pve/0009-PVE-Config-attach-always-use-getent.patch
-Subproject commit aff7ab782e168bda7a7a422fbe6a04be2cd40a64
+Subproject commit 37485abd46206ac37cb037e357b12370bbb98576