debian/patches/pve/0004-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch

   1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
   2 From: Wolfgang Bumiller <w.bumiller@proxmox.com>
   3 Date: Wed, 28 Mar 2018 13:37:28 +0200
   4 Subject: [PATCH] PVE: [Up] separate the limiting from the namespaced cgroup
   5  root
   6
   7 When cgroup namespaces are enabled a privileged container
   8 with mixed cgroups has full write access to its own root
   9 cgroup effectively allowing it to overwrite values written
  10 from the outside or configured via lxc.cgroup.*.
  11
  12 This patch causes an additional 'ns/' directory to be
  13 created in all cgroups if cgroup namespaces and cgfsng are
  14 being used in order to combat this.
  15
  16 Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
  17 ---
  18  src/lxc/cgroups/cgfsng.c | 92 +++++++++++++++++++++++++++++++++++++++---------
  19  src/lxc/cgroups/cgroup.h | 18 +++++++---
  20  src/lxc/commands.c       | 85 +++++++++++++++++++++++++++++++++-----------
  21  src/lxc/commands.h       |  2 ++
  22  src/lxc/criu.c           |  4 +--
  23  src/lxc/start.c          | 28 +++++++++++----
  24  6 files changed, 180 insertions(+), 49 deletions(-)
  25
  26 diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
  27 index 935b868b..9281cee0 100644
  28 --- a/src/lxc/cgroups/cgfsng.c
  29 +++ b/src/lxc/cgroups/cgfsng.c
  30 @@ -818,6 +818,7 @@ static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char
  31         new->mountpoint = mountpoint;
  32         new->base_cgroup = base_cgroup;
  33         new->fullcgpath = NULL;
  34 +       new->innercgpath = NULL;
  35         new->version = type;
  36
  37         newentry = append_null_to_list((void ***)h);
  38 @@ -1060,6 +1061,9 @@ static int cgroup_rmdir(struct hierarchy **hierarchies,
  39
  40                 free(h->fullcgpath);
  41                 h->fullcgpath = NULL;
  42 +
  43 +               free(h->innercgpath);
  44 +               h->innercgpath = NULL;
  45         }
  46
  47         return 0;
  48 @@ -1071,6 +1075,7 @@ struct generic_userns_exec_data {
  49         struct lxc_conf *conf;
  50         uid_t origuid; /* target uid in parent namespace */
  51         char *path;
  52 +       bool inner;
  53  };
  54
  55  static int cgroup_rmdir_wrapper(void *data)
  56 @@ -1112,6 +1117,7 @@ static void cgfsng_destroy(struct cgroup_ops *ops, struct lxc_handler *handler)
  57         wrap.container_cgroup = ops->container_cgroup;
  58         wrap.hierarchies = ops->hierarchies;
  59         wrap.conf = handler->conf;
  60 +       wrap.inner = false;
  61
  62         if (handler->conf && !lxc_list_empty(&handler->conf->id_map))
  63                 ret = userns_exec_1(handler->conf, cgroup_rmdir_wrapper, &wrap,
  64 @@ -1192,22 +1198,29 @@ on_error:
  65         return bret;
  66  }
  67
  68 -static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname)
  69 +static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname, bool inner)
  70  {
  71         int ret;
  72 +       char *path;
  73
  74 -       h->fullcgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
  75 -       if (dir_exists(h->fullcgpath)) {
  76 +       if (inner) {
  77 +               path = must_make_path(h->fullcgpath, CGROUP_NAMESPACE_SUBDIR, NULL);
  78 +               h->innercgpath = path;
  79 +       } else {
  80 +               path = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
  81 +               h->fullcgpath = path;
  82 +       }
  83 +       if (dir_exists(path)) {
  84                 ERROR("The cgroup \"%s\" already existed", h->fullcgpath);
  85                 return false;
  86         }
  87
  88 -       if (!cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
  89 +       if (!inner && !cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
  90                 ERROR("Failed to handle legacy cpuset controller");
  91                 return false;
  92         }
  93
  94 -       ret = mkdir_p(h->fullcgpath, 0755);
  95 +       ret = mkdir_p(path, 0755);
  96         if (ret < 0) {
  97                 ERROR("Failed to create cgroup \"%s\"", h->fullcgpath);
  98                 return false;
  99 @@ -1228,11 +1241,29 @@ static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname)
 100         h->fullcgpath = NULL;
 101  }
 102
 103 +static inline bool cgfsng_create_inner(struct cgroup_ops *ops)
 104 +{
 105 +       size_t i;
 106 +       bool ret = true;
 107 +       char *cgname = must_make_path(ops->container_cgroup, CGROUP_NAMESPACE_SUBDIR, NULL);
 108 +       for (i = 0; ops->hierarchies[i]; i++) {
 109 +               if (!create_path_for_hierarchy(ops->hierarchies[i], cgname, true)) {
 110 +                       SYSERROR("Failed to create %s namespace subdirectory: %s",
 111 +                                ops->hierarchies[i]->fullcgpath, strerror(errno));
 112 +                       ret = false;
 113 +                       break;
 114 +               }
 115 +       }
 116 +       free(cgname);
 117 +       return ret;
 118 +}
 119 +
 120  /* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern;
 121   * next cgroup_pattern-1, -2, ..., -999.
 122   */
 123  static inline bool cgfsng_create(struct cgroup_ops *ops,
 124 -                                struct lxc_handler *handler)
 125 +                                struct lxc_handler *handler,
 126 +                                bool inner)
 127  {
 128         int i;
 129         size_t len;
 130 @@ -1241,10 +1272,17 @@ static inline bool cgfsng_create(struct cgroup_ops *ops,
 131         struct lxc_conf *conf = handler->conf;
 132
 133         if (ops->container_cgroup) {
 134 +               if (inner)
 135 +                       return cgfsng_create_inner(ops);
 136                 WARN("cgfsng_create called a second time: %s", ops->container_cgroup);
 137                 return false;
 138         }
 139
 140 +       if (inner) {
 141 +               ERROR("cgfsng_create called twice for inner cgroup");
 142 +               return false;
 143 +       }
 144 +
 145         if (!conf)
 146                 return false;
 147
 148 @@ -1285,7 +1323,7 @@ again:
 149         }
 150
 151         for (i = 0; ops->hierarchies[i]; i++) {
 152 -               if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup)) {
 153 +               if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup, false)) {
 154                         int j;
 155                         ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->fullcgpath);
 156                         free(ops->hierarchies[i]->fullcgpath);
 157 @@ -1307,7 +1345,7 @@ out_free:
 158         return false;
 159  }
 160
 161 -static bool cgfsng_enter(struct cgroup_ops *ops, pid_t pid)
 162 +static bool cgfsng_enter(struct cgroup_ops *ops, pid_t pid, bool inner)
 163  {
 164         int i, len;
 165         char pidstr[25];
 166 @@ -1320,8 +1358,13 @@ static bool cgfsng_enter(struct cgroup_ops *ops, pid_t pid)
 167                 int ret;
 168                 char *fullpath;
 169
 170 -               fullpath = must_make_path(ops->hierarchies[i]->fullcgpath,
 171 -                                         "cgroup.procs", NULL);
 172 +               if (inner)
 173 +                       fullpath = must_make_path(ops->hierarchies[i]->fullcgpath,
 174 +                                                 CGROUP_NAMESPACE_SUBDIR,
 175 +                                                 "cgroup.procs", NULL);
 176 +               else
 177 +                       fullpath = must_make_path(ops->hierarchies[i]->fullcgpath,
 178 +                                                 "cgroup.procs", NULL);
 179                 ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666);
 180                 if (ret != 0) {
 181                         SYSERROR("Failed to enter cgroup \"%s\"", fullpath);
 182 @@ -1395,9 +1438,15 @@ static int chown_cgroup_wrapper(void *data)
 183                 char *fullpath;
 184                 char *path = arg->hierarchies[i]->fullcgpath;
 185
 186 +               if (arg->inner)
 187 +                       path = must_make_path(path, CGROUP_NAMESPACE_SUBDIR, NULL);
 188 +
 189                 ret = chowmod(path, destuid, nsgid, 0775);
 190 -               if (ret < 0)
 191 +               if (ret < 0) {
 192 +                       if (arg->inner)
 193 +                               free(path);
 194                         return -1;
 195 +               }
 196
 197                 /* Failures to chown() these are inconvenient but not
 198                  * detrimental We leave these owned by the container launcher,
 199 @@ -1416,8 +1465,11 @@ static int chown_cgroup_wrapper(void *data)
 200                 (void)chowmod(fullpath, destuid, nsgid, 0664);
 201                 free(fullpath);
 202
 203 -               if (arg->hierarchies[i]->version != CGROUP2_SUPER_MAGIC)
 204 +               if (arg->hierarchies[i]->version != CGROUP2_SUPER_MAGIC) {
 205 +                       if (arg->inner)
 206 +                               free(path);
 207                         continue;
 208 +               }
 209
 210                 fullpath = must_make_path(path, "cgroup.subtree_control", NULL);
 211                 (void)chowmod(fullpath, destuid, nsgid, 0664);
 212 @@ -1426,12 +1478,15 @@ static int chown_cgroup_wrapper(void *data)
 213                 fullpath = must_make_path(path, "cgroup.threads", NULL);
 214                 (void)chowmod(fullpath, destuid, nsgid, 0664);
 215                 free(fullpath);
 216 +
 217 +               if (arg->inner)
 218 +                       free(path);
 219         }
 220
 221         return 0;
 222  }
 223
 224 -static bool cgfsng_chown(struct cgroup_ops *ops, struct lxc_conf *conf)
 225 +static bool cgfsng_chown(struct cgroup_ops *ops, struct lxc_conf *conf, bool inner)
 226  {
 227         struct generic_userns_exec_data wrap;
 228
 229 @@ -1442,6 +1497,7 @@ static bool cgfsng_chown(struct cgroup_ops *ops, struct lxc_conf *conf)
 230         wrap.path = NULL;
 231         wrap.hierarchies = ops->hierarchies;
 232         wrap.conf = conf;
 233 +       wrap.inner = inner;
 234
 235         if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap,
 236                           "chown_cgroup_wrapper") < 0) {
 237 @@ -1821,7 +1877,8 @@ static bool cgfsng_unfreeze(struct cgroup_ops *ops)
 238  }
 239
 240  static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
 241 -                                    const char *controller)
 242 +                                    const char *controller,
 243 +                                    bool inner)
 244  {
 245         struct hierarchy *h;
 246
 247 @@ -1832,6 +1889,9 @@ static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
 248                 return NULL;
 249         }
 250
 251 +       if (inner)
 252 +               return h->innercgpath ? h->innercgpath + strlen(h->mountpoint) : NULL;
 253 +
 254         return h->fullcgpath ? h->fullcgpath + strlen(h->mountpoint) : NULL;
 255  }
 256
 257 @@ -1863,7 +1923,7 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name,
 258         int fret = -1, idx = 0;
 259         char *base_path = NULL, *container_cgroup = NULL, *full_path = NULL;
 260
 261 -       container_cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
 262 +       container_cgroup = lxc_cmd_get_attach_cgroup_path(name, lxcpath, controller);
 263         /* not running */
 264         if (!container_cgroup)
 265                 return 0;
 266 @@ -1943,7 +2003,7 @@ static bool cgfsng_attach(struct cgroup_ops *ops, const char *name,
 267                         continue;
 268                 }
 269
 270 -               path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]);
 271 +               path = lxc_cmd_get_attach_cgroup_path(name, lxcpath, h->controllers[0]);
 272                 /* not running */
 273                 if (!path)
 274                         continue;
 275 diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
 276 index 8f4af06c..b12c1f4c 100644
 277 --- a/src/lxc/cgroups/cgroup.h
 278 +++ b/src/lxc/cgroups/cgroup.h
 279 @@ -28,6 +28,12 @@
 280  #include <stddef.h>
 281  #include <sys/types.h>
 282
 283 +/* When lxc.cgroup.protect_limits is in effect the container's cgroup namespace
 284 + * will be moved into an additional subdirectory "cgns/" inside the cgroup in
 285 + * order to prevent it from accessing the outer limiting cgroup.
 286 + */
 287 +#define CGROUP_NAMESPACE_SUBDIR "cgns"
 288 +
 289  struct lxc_handler;
 290  struct lxc_conf;
 291  struct lxc_list;
 292 @@ -65,6 +71,9 @@ typedef enum {
 293   * @fullcgpath
 294   * - The full path to the containers cgroup.
 295   *
 296 + * @innercgpath
 297 + * - The full path to the container's inner cgroup when protect_limits is used.
 298 + *
 299   * @version
 300   * - legacy hierarchy
 301   *   If the hierarchy is a legacy hierarchy this will be set to
 302 @@ -78,6 +87,7 @@ struct hierarchy {
 303         char *mountpoint;
 304         char *base_cgroup;
 305         char *fullcgpath;
 306 +       char *innercgpath;
 307         int version;
 308  };
 309
 310 @@ -124,9 +134,9 @@ struct cgroup_ops {
 311
 312         bool (*data_init)(struct cgroup_ops *ops);
 313         void (*destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
 314 -       bool (*create)(struct cgroup_ops *ops, struct lxc_handler *handler);
 315 -       bool (*enter)(struct cgroup_ops *ops, pid_t pid);
 316 -       const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller);
 317 +       bool (*create)(struct cgroup_ops *ops, struct lxc_handler *handler, bool inner);
 318 +       bool (*enter)(struct cgroup_ops *ops, pid_t pid, bool inner);
 319 +       const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller, bool inner);
 320         bool (*escape)(const struct cgroup_ops *ops);
 321         int (*num_hierarchies)(struct cgroup_ops *ops);
 322         bool (*get_hierarchies)(struct cgroup_ops *ops, int n, char ***out);
 323 @@ -137,7 +147,7 @@ struct cgroup_ops {
 324         bool (*unfreeze)(struct cgroup_ops *ops);
 325         bool (*setup_limits)(struct cgroup_ops *ops, struct lxc_conf *conf,
 326                              bool with_devices);
 327 -       bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf);
 328 +       bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf, bool inner);
 329         bool (*attach)(struct cgroup_ops *ops, const char *name,
 330                        const char *lxcpath, pid_t pid);
 331         bool (*mount)(struct cgroup_ops *ops, struct lxc_handler *handler,
 332 diff --git a/src/lxc/commands.c b/src/lxc/commands.c
 333 index 30d6b604..e1bad635 100644
 334 --- a/src/lxc/commands.c
 335 +++ b/src/lxc/commands.c
 336 @@ -424,20 +424,8 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
 337         return lxc_cmd_rsp_send(fd, &rsp);
 338  }
 339
 340 -/*
 341 - * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
 342 - * particular subsystem. This is the cgroup path relative to the root
 343 - * of the cgroup filesystem.
 344 - *
 345 - * @name      : name of container to connect to
 346 - * @lxcpath   : the lxcpath in which the container is running
 347 - * @subsystem : the subsystem being asked about
 348 - *
 349 - * Returns the path on success, NULL on failure. The caller must free() the
 350 - * returned path.
 351 - */
 352 -char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
 353 -                             const char *subsystem)
 354 +char *do_lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
 355 +                             const char *subsystem, bool inner)
 356  {
 357         int ret, stopped;
 358         struct lxc_cmd_rr cmd = {
 359 @@ -450,8 +438,18 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
 360
 361         cmd.req.data = subsystem;
 362         cmd.req.datalen = 0;
 363 -       if (subsystem)
 364 -               cmd.req.datalen = strlen(subsystem) + 1;
 365 +       if (subsystem) {
 366 +               size_t subsyslen = strlen(subsystem);
 367 +               if (inner) {
 368 +                       char *data = alloca(subsyslen+2);
 369 +                       memcpy(data, subsystem, subsyslen+1);
 370 +                       data[subsyslen+1] = 1;
 371 +                       cmd.req.datalen = subsyslen+2,
 372 +                       cmd.req.data = data;
 373 +               } else {
 374 +                       cmd.req.datalen = subsyslen+1;
 375 +               }
 376 +       }
 377
 378         ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
 379         if (ret < 0)
 380 @@ -466,6 +464,42 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
 381         return cmd.rsp.data;
 382  }
 383
 384 +/*
 385 + * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
 386 + * particular subsystem. This is the cgroup path relative to the root
 387 + * of the cgroup filesystem.
 388 + *
 389 + * @name      : name of container to connect to
 390 + * @lxcpath   : the lxcpath in which the container is running
 391 + * @subsystem : the subsystem being asked about
 392 + *
 393 + * Returns the path on success, NULL on failure. The caller must free() the
 394 + * returned path.
 395 + */
 396 +char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
 397 +       const char *subsystem)
 398 +{
 399 +       return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, false);
 400 +}
 401 +
 402 +/*
 403 + * lxc_cmd_get_attach_cgroup_path: Calculate a container's inner cgroup path
 404 + * for a particular subsystem. This is the cgroup path relative to the root
 405 + * of the cgroup filesystem.
 406 + *
 407 + * @name      : name of container to connect to
 408 + * @lxcpath   : the lxcpath in which the container is running
 409 + * @subsystem : the subsystem being asked about
 410 + *
 411 + * Returns the path on success, NULL on failure. The caller must free() the
 412 + * returned path.
 413 + */
 414 +char *lxc_cmd_get_attach_cgroup_path(const char *name, const char *lxcpath,
 415 +       const char *subsystem)
 416 +{
 417 +       return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, true);
 418 +}
 419 +
 420  static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
 421                                        struct lxc_handler *handler)
 422  {
 423 @@ -473,10 +507,21 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
 424         struct lxc_cmd_rsp rsp;
 425         struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
 426
 427 -       if (req->datalen > 0)
 428 -               path = cgroup_ops->get_cgroup(cgroup_ops, req->data);
 429 -       else
 430 -               path = cgroup_ops->get_cgroup(cgroup_ops, NULL);
 431 +       if (req->datalen > 0) {
 432 +               const char *subsystem;
 433 +               size_t subsyslen;
 434 +               bool inner = false;
 435 +               subsystem = req->data;
 436 +               subsyslen = strlen(subsystem);
 437 +               if (req->datalen == subsyslen+2)
 438 +                       inner = (subsystem[subsyslen+1] == 1);
 439 +
 440 +               path = cgroup_ops->get_cgroup(cgroup_ops, req->data, inner);
 441 +       } else {
 442 +               // FIXME: cgroup separation for cgroup v2 cannot be handled
 443 +               // like we used to do v1 here... need to figure this out...
 444 +               path = cgroup_ops->get_cgroup(cgroup_ops, NULL, false);
 445 +       }
 446         if (!path)
 447                 return -1;
 448
 449 diff --git a/src/lxc/commands.h b/src/lxc/commands.h
 450 index 816cd748..e16c0d79 100644
 451 --- a/src/lxc/commands.h
 452 +++ b/src/lxc/commands.h
 453 @@ -93,6 +93,8 @@ extern int lxc_cmd_console(const char *name, int *ttynum, int *fd,
 454   */
 455  extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
 456                         const char *subsystem);
 457 +extern char *lxc_cmd_get_attach_cgroup_path(const char *name,
 458 +                       const char *lxcpath, const char *subsystem);
 459  extern int lxc_cmd_get_clone_flags(const char *name, const char *lxcpath);
 460  extern char *lxc_cmd_get_config_item(const char *name, const char *item, const char *lxcpath);
 461  extern char *lxc_cmd_get_name(const char *hashed_sock);
 462 diff --git a/src/lxc/criu.c b/src/lxc/criu.c
 463 index c3642162..456d19cf 100644
 464 --- a/src/lxc/criu.c
 465 +++ b/src/lxc/criu.c
 466 @@ -328,7 +328,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct criu_opts *opts)
 467                 } else {
 468                         const char *p;
 469
 470 -                       p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0]);
 471 +                       p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0], false);
 472                         if (!p) {
 473                                 ERROR("failed to get cgroup path for %s", controllers[0]);
 474                                 goto err;
 475 @@ -971,7 +971,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_
 476                 goto out_fini_handler;
 477         handler->cgroup_ops = cgroup_ops;
 478
 479 -       if (!cgroup_ops->create(cgroup_ops, handler)) {
 480 +       if (!cgroup_ops->create(cgroup_ops, handler, false)) {
 481                 ERROR("failed creating groups");
 482                 goto out_fini_handler;
 483         }
 484 diff --git a/src/lxc/start.c b/src/lxc/start.c
 485 index 739866d8..6944b310 100644
 486 --- a/src/lxc/start.c
 487 +++ b/src/lxc/start.c
 488 @@ -1597,7 +1597,7 @@ static int lxc_spawn(struct lxc_handler *handler)
 489                 }
 490         }
 491
 492 -       if (!cgroup_ops->create(cgroup_ops, handler)) {
 493 +       if (!cgroup_ops->create(cgroup_ops, handler, false)) {
 494                 ERROR("Failed creating cgroups");
 495                 goto out_delete_net;
 496         }
 497 @@ -1691,10 +1691,10 @@ static int lxc_spawn(struct lxc_handler *handler)
 498                 goto out_delete_net;
 499         }
 500
 501 -       if (!cgroup_ops->enter(cgroup_ops, handler->pid))
 502 +       if (!cgroup_ops->enter(cgroup_ops, handler->pid, false))
 503                 goto out_delete_net;
 504
 505 -       if (!cgroup_ops->chown(cgroup_ops, handler->conf))
 506 +       if (!cgroup_ops->chown(cgroup_ops, handler->conf, false))
 507                 goto out_delete_net;
 508
 509         /* Now we're ready to preserve the network namespace */
 510 @@ -1755,16 +1755,30 @@ static int lxc_spawn(struct lxc_handler *handler)
 511                 }
 512         }
 513
 514 -       ret = lxc_sync_barrier_child(handler, LXC_SYNC_CGROUP_UNSHARE);
 515 -       if (ret < 0)
 516 -               goto out_delete_net;
 517 -
 518         if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, true)) {
 519                 ERROR("Failed to setup legacy device cgroup controller limits");
 520                 goto out_delete_net;
 521         }
 522         TRACE("Set up legacy device cgroup controller limits");
 523
 524 +       if (cgns_supported()) {
 525 +               if (!cgroup_ops->create(cgroup_ops, handler, true)) {
 526 +                       ERROR("failed to create inner cgroup separation layer");
 527 +                       goto out_delete_net;
 528 +               }
 529 +               if (!cgroup_ops->enter(cgroup_ops, handler->pid, true)) {
 530 +                       ERROR("failed to enter inner cgroup separation layer");
 531 +                       goto out_delete_net;
 532 +               }
 533 +               if (!cgroup_ops->chown(cgroup_ops, handler->conf, true)) {
 534 +                       ERROR("failed chown inner cgroup separation layer");
 535 +                       goto out_delete_net;
 536 +               }
 537 +       }
 538 +
 539 +       if (lxc_sync_barrier_child(handler, LXC_SYNC_CGROUP_UNSHARE))
 540 +               goto out_delete_net;
 541 +
 542         if (handler->ns_clone_flags & CLONE_NEWCGROUP) {
 543                 /* Now we're ready to preserve the cgroup namespace */
 544                 ret = lxc_try_preserve_ns(handler->pid, "cgroup");
 545 --
 546 2.11.0
 547