1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Wolfgang Bumiller <w.bumiller@proxmox.com>
3 Date: Wed, 25 Jul 2018 12:11:31 +0200
4 Subject: [PATCH] apparmor: profile generation
6 This copies lxd's apparmor profile generation. This tries to
7 detect features such as cgroup namespaces, apparmor
8 namespaces and stacking support, and has profile parts
9 conditionally for unprivileged containers.
11 This introduces the following changes to the configuration:
12 lxc.apparmor.profile = generated
13 The fixed value 'generated' will cause this
14 functionality to be used, otherwise there should be no
15 functional changes happening unless specifically
16 requested with the next key:
17 lxc.apparmor.allow_nesting
18 This is a boolean which, if enabled, causes the
19 following changes: When generated apparmor profiles are
20 used, they will contain the necessary changes to allow
21 creating a nested container. In addition to the usual
22 mount points, /dev/.lxc/proc and /dev/.lxc/sys will
23 contain procfs and sysfs mount points without the lxcfs
24 overlays, which, if generated apparmor profiles are
25 being used, will not be read/writable directly.
27 A list of raw apparmor profile lines to append to the
28 profile. Only valid when using generated profiles.
30 The following apparmor profile lines have not been copied
33 mount /var/lib/lxd/shmounts/ -> /var/lib/lxd/shmounts/,
34 mount none -> /var/lib/lxd/shmounts/,
35 mount options=bind /var/lib/lxd/shmounts/** -> /var/lib/lxd/**,
37 They should be added via lxc.apparmor.raw entries by lxd.
39 In order for apparmor_parser's cache to be of use, this adds
40 a --with-apparmor-cache-dir ./configure option.
42 Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
43 (cherry picked from commit 1800f9247357948fd11c9da73b1943a8a7b6882b)
46 src/lxc/Makefile.am | 1 +
47 src/lxc/conf.c | 43 ++-
49 src/lxc/confile.c | 95 +++++
51 src/lxc/lsm/apparmor.c | 974 ++++++++++++++++++++++++++++++++++++++++++++++---
52 src/lxc/lsm/lsm.c | 30 +-
53 src/lxc/lsm/lsm.h | 8 +-
54 src/lxc/lsm/nop.c | 2 +-
55 src/lxc/lsm/selinux.c | 4 +-
56 src/lxc/start.c | 14 +-
57 12 files changed, 1134 insertions(+), 56 deletions(-)
59 diff --git a/configure.ac b/configure.ac
60 index ea312bf3..739e100a 100644
63 @@ -469,6 +469,13 @@ AC_ARG_WITH([cgroup-pattern],
64 [pattern for container cgroups]
65 )], [], [with_cgroup_pattern=['lxc/%n']])
67 +# The path for the apparmor_parser's cache for generated apparmor profiles
68 +AC_ARG_WITH([apparmor-cache-dir],
70 + [--with-apparmor-cache-dir=dir],
71 + [path for apparmor_parser cache]
72 + )], [], [with_apparmor_cache_dir=['${localstatedir}/cache/lxc/apparmor']])
74 # Container log path. By default, use $lxcpath.
75 AC_MSG_CHECKING([Whether to place logfiles in container config path])
76 AC_ARG_ENABLE([configpath-log],
77 @@ -515,6 +522,7 @@ AS_AC_EXPAND(LXCBINHOOKDIR, "$libexecdir/lxc/hooks")
78 AS_AC_EXPAND(LXCINITDIR, "$libexecdir")
79 AS_AC_EXPAND(LOGPATH, "$with_log_path")
80 AS_AC_EXPAND(RUNTIME_PATH, "$with_runtime_path")
81 +AS_AC_EXPAND(APPARMOR_CACHE_DIR, "$with_apparmor_cache_dir")
82 AC_SUBST(DEFAULT_CGROUP_PATTERN, ["$with_cgroup_pattern"])
84 # We need the install path so criu knows where to reference the hook scripts.
85 diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am
86 index c5e46ac2..1359eb3e 100644
87 --- a/src/lxc/Makefile.am
88 +++ b/src/lxc/Makefile.am
89 @@ -174,6 +174,7 @@ AM_CFLAGS = -DLXCROOTFSMOUNT=\"$(LXCROOTFSMOUNT)\" \
90 -DDEFAULT_CGROUP_PATTERN=\"$(DEFAULT_CGROUP_PATTERN)\" \
91 -DRUNTIME_PATH=\"$(RUNTIME_PATH)\" \
92 -DSBINDIR=\"$(SBINDIR)\" \
93 + -DAPPARMOR_CACHE_DIR=\"$(APPARMOR_CACHE_DIR)\" \
94 -I $(top_srcdir)/src \
95 -I $(top_srcdir)/src/lxc \
96 -I $(top_srcdir)/src/lxc/storage \
97 diff --git a/src/lxc/conf.c b/src/lxc/conf.c
98 index d36987c8..df805e6c 100644
101 @@ -2334,7 +2334,23 @@ static int setup_mount(const struct lxc_conf *conf,
105 -FILE *make_anonymous_mount_file(struct lxc_list *mount)
107 + * In order for nested containers to be able to mount /proc and /sys they need
108 + * to see a "pure" proc and sysfs mount points with nothing mounted on top
110 + * For this we provide proc and sysfs in /dev/.lxc/{proc,sys} while using an
111 + * apparmor rule to deny access to them. This is mostly for convenience: The
112 + * container's root user can mount them anyway and thus has access to the two
113 + * file systems. But a non-root user in the container should not be allowed to
114 + * access them as a side effect without explicitly allowing it.
116 +static const char nesting_helpers[] =
117 +"proc dev/.lxc/proc proc create=dir,optional\n"
118 +"sys dev/.lxc/sys sysfs create=dir,optional\n"
121 +FILE *make_anonymous_mount_file(struct lxc_list *mount,
122 + bool include_nesting_helpers)
126 @@ -2376,6 +2392,13 @@ FILE *make_anonymous_mount_file(struct lxc_list *mount)
130 + if (include_nesting_helpers) {
131 + ret = lxc_write_nointr(fd, nesting_helpers,
132 + sizeof(nesting_helpers) - 1);
133 + if (ret != sizeof(nesting_helpers) - 1)
137 ret = lseek(fd, 0, SEEK_SET);
140 @@ -2396,7 +2419,7 @@ static int setup_mount_entries(const struct lxc_conf *conf,
144 - f = make_anonymous_mount_file(mount);
145 + f = make_anonymous_mount_file(mount, conf->lsm_aa_allow_nesting);
149 @@ -2712,6 +2735,7 @@ struct lxc_conf *lxc_conf_init(void)
150 lxc_list_init(&new->groups);
151 lxc_list_init(&new->state_clients);
152 new->lsm_aa_profile = NULL;
153 + lxc_list_init(&new->lsm_aa_raw);
154 new->lsm_se_context = NULL;
155 new->tmp_umount_proc = false;
157 @@ -3996,6 +4020,19 @@ void lxc_clear_includes(struct lxc_conf *conf)
161 +int lxc_clear_apparmor_raw(struct lxc_conf *c)
163 + struct lxc_list *it, *next;
165 + lxc_list_for_each_safe (it, &c->lsm_aa_raw, next) {
174 void lxc_conf_free(struct lxc_conf *conf)
177 @@ -4023,6 +4060,7 @@ void lxc_conf_free(struct lxc_conf *conf)
179 lxc_free_networks(&conf->network);
180 free(conf->lsm_aa_profile);
181 + free(conf->lsm_aa_profile_computed);
182 free(conf->lsm_se_context);
183 lxc_seccomp_free(conf);
184 lxc_clear_config_caps(conf);
185 @@ -4039,6 +4077,7 @@ void lxc_conf_free(struct lxc_conf *conf)
186 lxc_clear_limits(conf, "lxc.prlimit");
187 lxc_clear_sysctls(conf, "lxc.sysctl");
188 lxc_clear_procs(conf, "lxc.proc");
189 + lxc_clear_apparmor_raw(conf);
190 free(conf->cgroup_meta.dir);
191 free(conf->cgroup_meta.controllers);
193 diff --git a/src/lxc/conf.h b/src/lxc/conf.h
194 index f7a879c3..f5085d8c 100644
197 @@ -272,7 +272,11 @@ struct lxc_conf {
200 char *lsm_aa_profile;
201 + char *lsm_aa_profile_computed;
202 + bool lsm_aa_profile_created;
203 + unsigned int lsm_aa_allow_nesting;
204 unsigned int lsm_aa_allow_incomplete;
205 + struct lxc_list lsm_aa_raw;
206 char *lsm_se_context;
207 bool tmp_umount_proc;
208 char *seccomp; /* filename with the seccomp rules */
209 @@ -417,7 +421,8 @@ extern int parse_mntopts(const char *mntopts, unsigned long *mntflags,
210 extern void tmp_proc_unmount(struct lxc_conf *lxc_conf);
211 extern void remount_all_slave(void);
212 extern void suggest_default_idmap(void);
213 -extern FILE *make_anonymous_mount_file(struct lxc_list *mount);
214 +extern FILE *make_anonymous_mount_file(struct lxc_list *mount,
215 + bool include_nesting_helpers);
216 extern struct lxc_list *sort_cgroup_settings(struct lxc_list *cgroup_settings);
217 extern unsigned long add_required_remount_flags(const char *s, const char *d,
218 unsigned long flags);
219 @@ -431,5 +436,6 @@ extern int setup_sysctl_parameters(struct lxc_list *sysctls);
220 extern int lxc_clear_sysctls(struct lxc_conf *c, const char *key);
221 extern int setup_proc_filesystem(struct lxc_list *procs, pid_t pid);
222 extern int lxc_clear_procs(struct lxc_conf *c, const char *key);
223 +extern int lxc_clear_apparmor_raw(struct lxc_conf *c);
225 #endif /* __LXC_CONF_H */
226 diff --git a/src/lxc/confile.c b/src/lxc/confile.c
227 index 8a7505da..097b8806 100644
228 --- a/src/lxc/confile.c
229 +++ b/src/lxc/confile.c
230 @@ -84,7 +84,9 @@ lxc_log_define(confile, lxc);
232 lxc_config_define(autodev);
233 lxc_config_define(apparmor_allow_incomplete);
234 +lxc_config_define(apparmor_allow_nesting);
235 lxc_config_define(apparmor_profile);
236 +lxc_config_define(apparmor_raw);
237 lxc_config_define(cap_drop);
238 lxc_config_define(cap_keep);
239 lxc_config_define(cgroup_controller);
240 @@ -158,6 +160,8 @@ static struct lxc_config_t config[] = {
241 { "lxc.arch", set_config_personality, get_config_personality, clr_config_personality, },
242 { "lxc.apparmor.profile", set_config_apparmor_profile, get_config_apparmor_profile, clr_config_apparmor_profile, },
243 { "lxc.apparmor.allow_incomplete", set_config_apparmor_allow_incomplete, get_config_apparmor_allow_incomplete, clr_config_apparmor_allow_incomplete, },
244 + { "lxc.apparmor.allow_nesting", set_config_apparmor_allow_nesting, get_config_apparmor_allow_nesting, clr_config_apparmor_allow_nesting, },
245 + { "lxc.apparmor.raw", set_config_apparmor_raw, get_config_apparmor_raw, clr_config_apparmor_raw, },
246 { "lxc.autodev", set_config_autodev, get_config_autodev, clr_config_autodev, },
247 { "lxc.cap.drop", set_config_cap_drop, get_config_cap_drop, clr_config_cap_drop, },
248 { "lxc.cap.keep", set_config_cap_keep, get_config_cap_keep, clr_config_cap_keep, },
249 @@ -1118,6 +1122,52 @@ static int set_config_apparmor_allow_incomplete(const char *key,
253 +static int set_config_apparmor_allow_nesting(const char *key,
255 + struct lxc_conf *lxc_conf,
258 + if (lxc_config_value_empty(value))
259 + return clr_config_apparmor_allow_nesting(key, lxc_conf, NULL);
261 + if (lxc_safe_uint(value, &lxc_conf->lsm_aa_allow_nesting) < 0)
264 + if (lxc_conf->lsm_aa_allow_nesting > 1)
270 +static int set_config_apparmor_raw(const char *key,
272 + struct lxc_conf *lxc_conf,
276 + struct lxc_list *list;
278 + if (lxc_config_value_empty(value))
279 + return lxc_clear_apparmor_raw(lxc_conf);
281 + list = malloc(sizeof(*list));
287 + elem = strdup(value);
294 + lxc_list_add_tail(&lxc_conf->lsm_aa_raw, list);
299 static int set_config_selinux_context(const char *key, const char *value,
300 struct lxc_conf *lxc_conf, void *data)
302 @@ -2959,6 +3009,34 @@ static int get_config_apparmor_allow_incomplete(const char *key, char *retv,
303 c->lsm_aa_allow_incomplete);
306 +static int get_config_apparmor_allow_nesting(const char *key, char *retv,
307 + int inlen, struct lxc_conf *c,
310 + return lxc_get_conf_int(c, retv, inlen,
311 + c->lsm_aa_allow_nesting);
314 +static int get_config_apparmor_raw(const char *key, char *retv,
315 + int inlen, struct lxc_conf *c,
319 + struct lxc_list *it;
325 + memset(retv, 0, inlen);
327 + lxc_list_for_each(it, &c->lsm_aa_raw) {
328 + strprint(retv, inlen, "%s\n", (char *)it->elem);
334 static int get_config_selinux_context(const char *key, char *retv, int inlen,
335 struct lxc_conf *c, void *data)
337 @@ -3749,6 +3827,21 @@ static inline int clr_config_apparmor_allow_incomplete(const char *key,
341 +static inline int clr_config_apparmor_allow_nesting(const char *key,
342 + struct lxc_conf *c,
345 + c->lsm_aa_allow_nesting = 0;
349 +static inline int clr_config_apparmor_raw(const char *key,
350 + struct lxc_conf *c,
353 + return lxc_clear_apparmor_raw(c);
356 static inline int clr_config_selinux_context(const char *key,
357 struct lxc_conf *c, void *data)
359 @@ -4941,7 +5034,9 @@ int lxc_list_subkeys(struct lxc_conf *conf, const char *key, char *retv,
361 if (!strcmp(key, "lxc.apparmor")) {
362 strprint(retv, inlen, "allow_incomplete\n");
363 + strprint(retv, inlen, "allow_nesting\n");
364 strprint(retv, inlen, "profile\n");
365 + strprint(retv, inlen, "raw\n");
366 } else if (!strcmp(key, "lxc.cgroup")) {
367 strprint(retv, inlen, "dir\n");
368 } else if (!strcmp(key, "lxc.selinux")) {
369 diff --git a/src/lxc/criu.c b/src/lxc/criu.c
370 index 456d19cf..02d301ac 100644
373 @@ -378,7 +378,8 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct criu_opts *opts)
374 DECLARE_ARG(opts->user->action_script);
377 - mnts = make_anonymous_mount_file(&opts->c->lxc_conf->mount_list);
378 + mnts = make_anonymous_mount_file(&opts->c->lxc_conf->mount_list,
379 + opts->c->lxc_conf->lsm_aa_allow_nesting);
383 diff --git a/src/lxc/lsm/apparmor.c b/src/lxc/lsm/apparmor.c
384 index 5fe6d525..ec3f805d 100644
385 --- a/src/lxc/lsm/apparmor.c
386 +++ b/src/lxc/lsm/apparmor.c
390 #include "initutils.h"
394 lxc_log_define(apparmor, lsm);
396 /* set by lsm_apparmor_drv_init if true */
397 static int aa_enabled = 0;
398 +static bool aa_parser_available = false;
399 +static bool aa_supports_unix = false;
400 +static bool aa_can_stack = false;
401 +static bool aa_is_stacked = false;
402 +static bool aa_admin = false;
404 static int mount_features_enabled = 0;
406 @@ -46,6 +53,332 @@ static int mount_features_enabled = 0;
407 #define AA_MOUNT_RESTR "/sys/kernel/security/apparmor/features/mount/mask"
408 #define AA_ENABLED_FILE "/sys/module/apparmor/parameters/enabled"
409 #define AA_UNCHANGED "unchanged"
410 +#define AA_GENERATED "generated"
412 +#define AA_CMD_LOAD 'r'
413 +#define AA_CMD_UNLOAD 'R'
414 +#define AA_CMD_PARSE 'Q'
416 +static const char AA_PROFILE_BASE[] =
417 +" ### Base profile\n"
424 +" # Allow us to receive signals from anywhere.\n"
425 +" signal (receive),\n"
427 +" # Allow us to send signals to ourselves\n"
428 +" signal peer=@{profile_name},\n"
430 +" # Allow other processes to read our /proc entries, futexes, perf tracing and\n"
431 +" # kcmp for now (they will need 'read' in the first place). Administrators can\n"
432 +" # override with:\n"
433 +" # deny ptrace (readby) ...\n"
434 +" ptrace (readby),\n"
436 +" # Allow other processes to trace us by default (they will need 'trace' in\n"
437 +" # the first place). Administrators can override with:\n"
438 +" # deny ptrace (tracedby) ...\n"
439 +" ptrace (tracedby),\n"
441 +" # Allow us to ptrace ourselves\n"
442 +" ptrace peer=@{profile_name},\n"
444 +" # ignore DENIED message on / remount\n"
445 +" deny mount options=(ro, remount) -> /,\n"
446 +" deny mount options=(ro, remount, silent) -> /,\n"
448 +" # allow tmpfs mounts everywhere\n"
449 +" mount fstype=tmpfs,\n"
451 +" # allow hugetlbfs mounts everywhere\n"
452 +" mount fstype=hugetlbfs,\n"
454 +" # allow mqueue mounts everywhere\n"
455 +" mount fstype=mqueue,\n"
457 +" # allow fuse mounts everywhere\n"
458 +" mount fstype=fuse,\n"
459 +" mount fstype=fuse.*,\n"
461 +" # deny access under /proc/bus to avoid e.g. messing with pci devices directly\n"
462 +" deny @{PROC}/bus/** wklx,\n"
464 +" # deny writes in /proc/sys/fs but allow binfmt_misc to be mounted\n"
465 +" mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/,\n"
466 +" deny @{PROC}/sys/fs/** wklx,\n"
468 +" # allow efivars to be mounted, writing to it will be blocked though\n"
469 +" mount fstype=efivarfs -> /sys/firmware/efi/efivars/,\n"
471 +" # block some other dangerous paths\n"
472 +" deny @{PROC}/kcore rwklx,\n"
473 +" deny @{PROC}/sysrq-trigger rwklx,\n"
475 +" # deny writes in /sys except for /sys/fs/cgroup, also allow\n"
476 +" # fusectl, securityfs and debugfs to be mounted there (read-only)\n"
477 +" mount fstype=fusectl -> /sys/fs/fuse/connections/,\n"
478 +" mount fstype=securityfs -> /sys/kernel/security/,\n"
479 +" mount fstype=debugfs -> /sys/kernel/debug/,\n"
480 +" deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,\n"
481 +" mount fstype=proc -> /proc/,\n"
482 +" mount fstype=sysfs -> /sys/,\n"
483 +" mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,\n"
484 +" deny /sys/firmware/efi/efivars/** rwklx,\n"
485 +" # note, /sys/kernel/security/** handled below\n"
486 +" mount options=(ro, nosuid, nodev, noexec, remount, strictatime) -> /sys/fs/cgroup/,\n"
488 +" # deny reads from debugfs\n"
489 +" deny /sys/kernel/debug/{,**} rwklx,\n"
491 +" # allow paths to be made slave, shared, private or unbindable\n"
492 +" # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.\n"
493 +"# mount options=(rw,make-slave) -> **,\n"
494 +"# mount options=(rw,make-rslave) -> **,\n"
495 +"# mount options=(rw,make-shared) -> **,\n"
496 +"# mount options=(rw,make-rshared) -> **,\n"
497 +"# mount options=(rw,make-private) -> **,\n"
498 +"# mount options=(rw,make-rprivate) -> **,\n"
499 +"# mount options=(rw,make-unbindable) -> **,\n"
500 +"# mount options=(rw,make-runbindable) -> **,\n"
502 +" # allow bind-mounts of anything except /proc, /sys and /dev\n"
503 +" mount options=(rw,bind) /[^spd]*{,/**},\n"
504 +" mount options=(rw,bind) /d[^e]*{,/**},\n"
505 +" mount options=(rw,bind) /de[^v]*{,/**},\n"
506 +" mount options=(rw,bind) /dev/.[^l]*{,/**},\n"
507 +" mount options=(rw,bind) /dev/.l[^x]*{,/**},\n"
508 +" mount options=(rw,bind) /dev/.lx[^c]*{,/**},\n"
509 +" mount options=(rw,bind) /dev/.lxc?*{,/**},\n"
510 +" mount options=(rw,bind) /dev/[^.]*{,/**},\n"
511 +" mount options=(rw,bind) /dev?*{,/**},\n"
512 +" mount options=(rw,bind) /p[^r]*{,/**},\n"
513 +" mount options=(rw,bind) /pr[^o]*{,/**},\n"
514 +" mount options=(rw,bind) /pro[^c]*{,/**},\n"
515 +" mount options=(rw,bind) /proc?*{,/**},\n"
516 +" mount options=(rw,bind) /s[^y]*{,/**},\n"
517 +" mount options=(rw,bind) /sy[^s]*{,/**},\n"
518 +" mount options=(rw,bind) /sys?*{,/**},\n"
520 +" # allow read-only bind-mounts of anything except /proc, /sys and /dev\n"
521 +" mount options=(ro,remount,bind) -> /[^spd]*{,/**},\n"
522 +" mount options=(ro,remount,bind) -> /d[^e]*{,/**},\n"
523 +" mount options=(ro,remount,bind) -> /de[^v]*{,/**},\n"
524 +" mount options=(ro,remount,bind) -> /dev/.[^l]*{,/**},\n"
525 +" mount options=(ro,remount,bind) -> /dev/.l[^x]*{,/**},\n"
526 +" mount options=(ro,remount,bind) -> /dev/.lx[^c]*{,/**},\n"
527 +" mount options=(ro,remount,bind) -> /dev/.lxc?*{,/**},\n"
528 +" mount options=(ro,remount,bind) -> /dev/[^.]*{,/**},\n"
529 +" mount options=(ro,remount,bind) -> /dev?*{,/**},\n"
530 +" mount options=(ro,remount,bind) -> /p[^r]*{,/**},\n"
531 +" mount options=(ro,remount,bind) -> /pr[^o]*{,/**},\n"
532 +" mount options=(ro,remount,bind) -> /pro[^c]*{,/**},\n"
533 +" mount options=(ro,remount,bind) -> /proc?*{,/**},\n"
534 +" mount options=(ro,remount,bind) -> /s[^y]*{,/**},\n"
535 +" mount options=(ro,remount,bind) -> /sy[^s]*{,/**},\n"
536 +" mount options=(ro,remount,bind) -> /sys?*{,/**},\n"
538 +" # allow moving mounts except for /proc, /sys and /dev\n"
539 +" mount options=(rw,move) /[^spd]*{,/**},\n"
540 +" mount options=(rw,move) /d[^e]*{,/**},\n"
541 +" mount options=(rw,move) /de[^v]*{,/**},\n"
542 +" mount options=(rw,move) /dev/.[^l]*{,/**},\n"
543 +" mount options=(rw,move) /dev/.l[^x]*{,/**},\n"
544 +" mount options=(rw,move) /dev/.lx[^c]*{,/**},\n"
545 +" mount options=(rw,move) /dev/.lxc?*{,/**},\n"
546 +" mount options=(rw,move) /dev/[^.]*{,/**},\n"
547 +" mount options=(rw,move) /dev?*{,/**},\n"
548 +" mount options=(rw,move) /p[^r]*{,/**},\n"
549 +" mount options=(rw,move) /pr[^o]*{,/**},\n"
550 +" mount options=(rw,move) /pro[^c]*{,/**},\n"
551 +" mount options=(rw,move) /proc?*{,/**},\n"
552 +" mount options=(rw,move) /s[^y]*{,/**},\n"
553 +" mount options=(rw,move) /sy[^s]*{,/**},\n"
554 +" mount options=(rw,move) /sys?*{,/**},\n"
556 +" # generated by: lxc-generate-aa-rules.py container-rules.base\n"
557 +" deny /proc/sys/[^kn]*{,/**} wklx,\n"
558 +" deny /proc/sys/k[^e]*{,/**} wklx,\n"
559 +" deny /proc/sys/ke[^r]*{,/**} wklx,\n"
560 +" deny /proc/sys/ker[^n]*{,/**} wklx,\n"
561 +" deny /proc/sys/kern[^e]*{,/**} wklx,\n"
562 +" deny /proc/sys/kerne[^l]*{,/**} wklx,\n"
563 +" deny /proc/sys/kernel/[^smhd]*{,/**} wklx,\n"
564 +" deny /proc/sys/kernel/d[^o]*{,/**} wklx,\n"
565 +" deny /proc/sys/kernel/do[^m]*{,/**} wklx,\n"
566 +" deny /proc/sys/kernel/dom[^a]*{,/**} wklx,\n"
567 +" deny /proc/sys/kernel/doma[^i]*{,/**} wklx,\n"
568 +" deny /proc/sys/kernel/domai[^n]*{,/**} wklx,\n"
569 +" deny /proc/sys/kernel/domain[^n]*{,/**} wklx,\n"
570 +" deny /proc/sys/kernel/domainn[^a]*{,/**} wklx,\n"
571 +" deny /proc/sys/kernel/domainna[^m]*{,/**} wklx,\n"
572 +" deny /proc/sys/kernel/domainnam[^e]*{,/**} wklx,\n"
573 +" deny /proc/sys/kernel/domainname?*{,/**} wklx,\n"
574 +" deny /proc/sys/kernel/h[^o]*{,/**} wklx,\n"
575 +" deny /proc/sys/kernel/ho[^s]*{,/**} wklx,\n"
576 +" deny /proc/sys/kernel/hos[^t]*{,/**} wklx,\n"
577 +" deny /proc/sys/kernel/host[^n]*{,/**} wklx,\n"
578 +" deny /proc/sys/kernel/hostn[^a]*{,/**} wklx,\n"
579 +" deny /proc/sys/kernel/hostna[^m]*{,/**} wklx,\n"
580 +" deny /proc/sys/kernel/hostnam[^e]*{,/**} wklx,\n"
581 +" deny /proc/sys/kernel/hostname?*{,/**} wklx,\n"
582 +" deny /proc/sys/kernel/m[^s]*{,/**} wklx,\n"
583 +" deny /proc/sys/kernel/ms[^g]*{,/**} wklx,\n"
584 +" deny /proc/sys/kernel/msg*/** wklx,\n"
585 +" deny /proc/sys/kernel/s[^he]*{,/**} wklx,\n"
586 +" deny /proc/sys/kernel/se[^m]*{,/**} wklx,\n"
587 +" deny /proc/sys/kernel/sem*/** wklx,\n"
588 +" deny /proc/sys/kernel/sh[^m]*{,/**} wklx,\n"
589 +" deny /proc/sys/kernel/shm*/** wklx,\n"
590 +" deny /proc/sys/kernel?*{,/**} wklx,\n"
591 +" deny /proc/sys/n[^e]*{,/**} wklx,\n"
592 +" deny /proc/sys/ne[^t]*{,/**} wklx,\n"
593 +" deny /proc/sys/net?*{,/**} wklx,\n"
594 +" deny /sys/[^fdck]*{,/**} wklx,\n"
595 +" deny /sys/c[^l]*{,/**} wklx,\n"
596 +" deny /sys/cl[^a]*{,/**} wklx,\n"
597 +" deny /sys/cla[^s]*{,/**} wklx,\n"
598 +" deny /sys/clas[^s]*{,/**} wklx,\n"
599 +" deny /sys/class/[^n]*{,/**} wklx,\n"
600 +" deny /sys/class/n[^e]*{,/**} wklx,\n"
601 +" deny /sys/class/ne[^t]*{,/**} wklx,\n"
602 +" deny /sys/class/net?*{,/**} wklx,\n"
603 +" deny /sys/class?*{,/**} wklx,\n"
604 +" deny /sys/d[^e]*{,/**} wklx,\n"
605 +" deny /sys/de[^v]*{,/**} wklx,\n"
606 +" deny /sys/dev[^i]*{,/**} wklx,\n"
607 +" deny /sys/devi[^c]*{,/**} wklx,\n"
608 +" deny /sys/devic[^e]*{,/**} wklx,\n"
609 +" deny /sys/device[^s]*{,/**} wklx,\n"
610 +" deny /sys/devices/[^v]*{,/**} wklx,\n"
611 +" deny /sys/devices/v[^i]*{,/**} wklx,\n"
612 +" deny /sys/devices/vi[^r]*{,/**} wklx,\n"
613 +" deny /sys/devices/vir[^t]*{,/**} wklx,\n"
614 +" deny /sys/devices/virt[^u]*{,/**} wklx,\n"
615 +" deny /sys/devices/virtu[^a]*{,/**} wklx,\n"
616 +" deny /sys/devices/virtua[^l]*{,/**} wklx,\n"
617 +" deny /sys/devices/virtual/[^n]*{,/**} wklx,\n"
618 +" deny /sys/devices/virtual/n[^e]*{,/**} wklx,\n"
619 +" deny /sys/devices/virtual/ne[^t]*{,/**} wklx,\n"
620 +" deny /sys/devices/virtual/net?*{,/**} wklx,\n"
621 +" deny /sys/devices/virtual?*{,/**} wklx,\n"
622 +" deny /sys/devices?*{,/**} wklx,\n"
623 +" deny /sys/f[^s]*{,/**} wklx,\n"
624 +" deny /sys/fs/[^c]*{,/**} wklx,\n"
625 +" deny /sys/fs/c[^g]*{,/**} wklx,\n"
626 +" deny /sys/fs/cg[^r]*{,/**} wklx,\n"
627 +" deny /sys/fs/cgr[^o]*{,/**} wklx,\n"
628 +" deny /sys/fs/cgro[^u]*{,/**} wklx,\n"
629 +" deny /sys/fs/cgrou[^p]*{,/**} wklx,\n"
630 +" deny /sys/fs/cgroup?*{,/**} wklx,\n"
631 +" deny /sys/fs?*{,/**} wklx,\n"
634 +static const char AA_PROFILE_UNIX_SOCKETS[] =
636 +" ### Feature: unix\n"
637 +" # Allow receive via unix sockets from anywhere\n"
638 +" unix (receive),\n"
640 +" # Allow all unix sockets in the container\n"
641 +" unix peer=(label=@{profile_name}),\n"
644 +static const char AA_PROFILE_CGROUP_NAMESPACES[] =
646 +" ### Feature: cgroup namespace\n"
647 +" mount fstype=cgroup -> /sys/fs/cgroup/**,\n"
648 +" mount fstype=cgroup2 -> /sys/fs/cgroup/**,\n"
651 +/* '_BASE' because we still need to append generated change_profile rules */
652 +static const char AA_PROFILE_STACKING_BASE[] =
654 +" ### Feature: apparmor stacking\n"
655 +" ### Configuration: apparmor profile loading (in namespace)\n"
656 +" deny /sys/k[^e]*{,/**} wklx,\n"
657 +" deny /sys/ke[^r]*{,/**} wklx,\n"
658 +" deny /sys/ker[^n]*{,/**} wklx,\n"
659 +" deny /sys/kern[^e]*{,/**} wklx,\n"
660 +" deny /sys/kerne[^l]*{,/**} wklx,\n"
661 +" deny /sys/kernel/[^s]*{,/**} wklx,\n"
662 +" deny /sys/kernel/s[^e]*{,/**} wklx,\n"
663 +" deny /sys/kernel/se[^c]*{,/**} wklx,\n"
664 +" deny /sys/kernel/sec[^u]*{,/**} wklx,\n"
665 +" deny /sys/kernel/secu[^r]*{,/**} wklx,\n"
666 +" deny /sys/kernel/secur[^i]*{,/**} wklx,\n"
667 +" deny /sys/kernel/securi[^t]*{,/**} wklx,\n"
668 +" deny /sys/kernel/securit[^y]*{,/**} wklx,\n"
669 +" deny /sys/kernel/security/[^a]*{,/**} wklx,\n"
670 +" deny /sys/kernel/security/a[^p]*{,/**} wklx,\n"
671 +" deny /sys/kernel/security/ap[^p]*{,/**} wklx,\n"
672 +" deny /sys/kernel/security/app[^a]*{,/**} wklx,\n"
673 +" deny /sys/kernel/security/appa[^r]*{,/**} wklx,\n"
674 +" deny /sys/kernel/security/appar[^m]*{,/**} wklx,\n"
675 +" deny /sys/kernel/security/apparm[^o]*{,/**} wklx,\n"
676 +" deny /sys/kernel/security/apparmo[^r]*{,/**} wklx,\n"
677 +" deny /sys/kernel/security/apparmor?*{,/**} wklx,\n"
678 +" deny /sys/kernel/security?*{,/**} wklx,\n"
679 +" deny /sys/kernel?*{,/**} wklx,\n"
682 +static const char AA_PROFILE_NO_STACKING[] =
684 +" ### Feature: apparmor stacking (not present)\n"
685 +" deny /sys/k*{,/**} rwklx,\n"
688 +/* '_BASE' because we need to append change_profile for stacking */
689 +static const char AA_PROFILE_NESTING_BASE[] =
691 +" ### Configuration: nesting\n"
696 + /* NOTE: See conf.c's "nesting_helpers" for details. */
697 +" deny /dev/.lxc/proc/** rw,\n"
698 +" deny /dev/.lxc/sys/** rw,\n"
700 +" mount fstype=proc -> /usr/lib/*/lxc/**,\n"
701 +" mount fstype=sysfs -> /usr/lib/*/lxc/**,\n"
702 +" mount options=(rw,bind),\n"
703 +" mount options=(rw,rbind),\n"
704 +" mount options=(rw,make-rshared),\n"
706 + /* FIXME: What's the state here on apparmor's side? */
707 +" # there doesn't seem to be a way to ask for:\n"
708 +" # mount options=(ro,nosuid,nodev,noexec,remount,bind),\n"
709 +" # as we always get mount to $cdir/proc/sys with those flags denied\n"
710 +" # So allow all mounts until that is straightened out:\n"
714 +static const char AA_PROFILE_UNPRIVILEGED[] =
716 +" ### Configuration: unprivileged container\n"
719 +" # Allow modifying mount propagation\n"
720 +" mount options=(rw,make-slave) -> **,\n"
721 +" mount options=(rw,make-rslave) -> **,\n"
722 +" mount options=(rw,make-shared) -> **,\n"
723 +" mount options=(rw,make-rshared) -> **,\n"
724 +" mount options=(rw,make-private) -> **,\n"
725 +" mount options=(rw,make-rprivate) -> **,\n"
726 +" mount options=(rw,make-unbindable) -> **,\n"
727 +" mount options=(rw,make-runbindable) -> **,\n"
729 +" # Allow all bind-mounts\n"
730 +" mount options=(rw,bind),\n"
731 +" mount options=(rw,rbind),\n"
733 +" # Allow remounting things read-only\n"
734 +" mount options=(ro,remount),\n"
737 static bool check_mount_feature_enabled(void)
739 @@ -144,11 +477,6 @@ static bool apparmor_am_unconfined(void)
743 -/* aa stacking is not yet supported */
744 -static bool aa_stacking_supported(void) {
748 static bool aa_needs_transition(char *curlabel)
751 @@ -160,61 +488,546 @@ static bool aa_needs_transition(char *curlabel)
755 +static inline void uint64hex(char *buf, uint64_t num)
760 + for (i = 16; i--;) {
761 + char c = (char)(num & 0xf);
762 + buf[i] = c + (c < 0xa ? '0' : 'a' - 0xa);
767 +static inline char *shorten_apparmor_name(char *name)
769 + size_t len = strlen(name);
770 + if (len + 7 > 253) {
772 + hash = fnv_64a_buf(name, len, FNV1A_64_INIT);
773 + name = must_realloc(name, 16 + 1);
774 + uint64hex(name, hash);
780 +/* Replace slashes with hyphens */
781 +static inline void sanitize_path(char *path)
785 + for (i = 0; path[i]; i++)
786 + if (path[i] == '/')
790 +static inline char *apparmor_dir(const char *ctname, const char *lxcpath)
792 + return must_make_path(lxcpath, ctname, "apparmor", NULL);
796 +static inline char *apparmor_profile_full(const char *ctname, const char *lxcpath)
798 + return shorten_apparmor_name(must_concat("lxc-", ctname, "_<", lxcpath, ">", NULL));
801 +/* Like apparmor_profile_full() but with slashes replaced by hyphens */
802 +static inline char *apparmor_namespace(const char *ctname, const char *lxcpath)
806 + full = apparmor_profile_full(ctname, lxcpath);
807 + sanitize_path(full);
812 +/* FIXME: This is currently run only in the context of a constructor (via the
813 + * initial lsm_init() called due to its __attribute__((constructor)), so we
814 + * do not have ERROR/... macros available, so there are some fprintf(stderr)s
817 +static bool check_apparmor_parser_version()
819 + struct lxc_popen_FILE *parserpipe;
821 + int major = 0, minor = 0, micro = 0;
823 + parserpipe = lxc_popen("apparmor_parser --version");
825 + fprintf(stderr, "Failed to run check for apparmor_parser\n");
829 + rc = fscanf(parserpipe->f, "AppArmor parser version %d.%d.%d", &major, &minor, µ);
831 + lxc_pclose(parserpipe);
832 + /* We stay silent for now as this most likely means the shell
833 + * lxc_popen executed failed to find the apparmor_parser binary.
834 + * See the FIXME comment above for details.
839 + rc = lxc_pclose(parserpipe);
841 + fprintf(stderr, "Error waiting for child process\n");
845 + fprintf(stderr, "'apparmor_parser --version' executed with an error status\n");
849 + aa_supports_unix = (major > 2) ||
850 + (major == 2 && minor > 10) ||
851 + (major == 2 && minor == 10 && micro >= 95);
856 +static bool file_is_yes(const char *path)
860 + char buf[8]; /* we actually just expect "yes" or "no" */
862 + fd = open(path, O_RDONLY | O_CLOEXEC);
866 + rd = read(fd, buf, sizeof(buf));
869 + return rd >= 4 && strncmp(buf, "yes\n", 4) == 0;
872 +static bool apparmor_can_stack()
874 + int major, minor, scanned;
877 + if (!file_is_yes("/sys/kernel/security/apparmor/features/domain/stack"))
880 + f = fopen_cloexec("/sys/kernel/security/apparmor/features/domain/version", "r");
884 + scanned = fscanf(f, "%d.%d", &major, &minor);
889 + return major > 1 || (major == 1 && minor >= 2);
892 +static void must_append_sized_full(char **buf, size_t *bufsz, const char *data,
893 + size_t size, bool append_newline)
895 + size_t newsize = *bufsz + size;
897 + if (append_newline)
900 + *buf = must_realloc(*buf, newsize);
901 + memcpy(*buf + *bufsz, data, size);
903 + if (append_newline)
904 + (*buf)[newsize - 1] = '\n';
909 +static void must_append_sized(char **buf, size_t *bufsz, const char *data, size_t size)
911 + return must_append_sized_full(buf, bufsz, data, size, false);
914 +static bool is_privileged(struct lxc_conf *conf)
916 + return lxc_list_empty(&conf->id_map);
919 +static char *get_apparmor_profile_content(struct lxc_conf *conf, const char *lxcpath)
921 + char *profile, *profile_name_full;
923 + struct lxc_list *it;
925 + profile_name_full = apparmor_profile_full(conf->name, lxcpath);
927 + profile = must_concat(
928 +"#include <tunables/global>\n"
929 +"profile \"", profile_name_full, "\" flags=(attach_disconnected,mediate_deleted) {\n",
931 + size = strlen(profile);
933 + must_append_sized(&profile, &size, AA_PROFILE_BASE,
934 + sizeof(AA_PROFILE_BASE) - 1);
936 + if (aa_supports_unix)
937 + must_append_sized(&profile, &size, AA_PROFILE_UNIX_SOCKETS,
938 + sizeof(AA_PROFILE_UNIX_SOCKETS) - 1);
940 + if (file_exists("/proc/self/ns/cgroup"))
941 + must_append_sized(&profile, &size, AA_PROFILE_CGROUP_NAMESPACES,
942 + sizeof(AA_PROFILE_CGROUP_NAMESPACES) - 1);
944 + if (aa_can_stack && !aa_is_stacked) {
945 + char *namespace, *temp;
947 + must_append_sized(&profile, &size, AA_PROFILE_STACKING_BASE,
948 + sizeof(AA_PROFILE_STACKING_BASE) - 1);
950 + namespace = apparmor_namespace(conf->name, lxcpath);
951 + temp = must_concat(" change_profile -> \":", namespace, ":*\",\n"
952 + " change_profile -> \":", namespace, "://*\",\n",
956 + must_append_sized(&profile, &size, temp, strlen(temp));
959 + must_append_sized(&profile, &size, AA_PROFILE_NO_STACKING,
960 + sizeof(AA_PROFILE_NO_STACKING) - 1);
963 + if (conf->lsm_aa_allow_nesting) {
964 + must_append_sized(&profile, &size, AA_PROFILE_NESTING_BASE,
965 + sizeof(AA_PROFILE_NESTING_BASE) - 1);
967 + if (!aa_can_stack || aa_is_stacked) {
970 + temp = must_concat(" change_profile -> \"",
971 + profile_name_full, "\",\n", NULL);
972 + must_append_sized(&profile, &size, temp, strlen(temp));
977 + if (!is_privileged(conf) || am_host_unpriv())
978 + must_append_sized(&profile, &size, AA_PROFILE_UNPRIVILEGED,
979 + sizeof(AA_PROFILE_UNPRIVILEGED) - 1);
981 + lxc_list_for_each(it, &conf->lsm_aa_raw) {
982 + const char *line = it->elem;
984 + must_append_sized_full(&profile, &size, line, strlen(line), true);
987 + /* include terminating \0 byte */
988 + must_append_sized(&profile, &size, "}\n", 3);
990 + free(profile_name_full);
996 - * apparmor_process_label_set: Set AppArmor process profile
998 - * @label : the profile to set
999 - * @conf : the container configuration to use if @label is NULL
1000 - * @default : use the default profile if @label is NULL
1001 - * @on_exec : this is ignored. Apparmor profile will be changed immediately
1003 - * Returns 0 on success, < 0 on failure
1005 - * Notes: This relies on /proc being available.
1006 + * apparmor_parser creates a cache file using the parsed file's name as a name.
1007 + * This means there may be multiple containers with the same name but different
1008 + * lxcpaths. Therefore we need a sanitized version of the complete profile name
1009 + * as profile file-name.
1010 + * We already get this exactly from apparmor_namespace().
1012 -static int apparmor_process_label_set(const char *inlabel, struct lxc_conf *conf,
1013 - bool use_default, bool on_exec)
1014 +static char *make_apparmor_profile_path(const char *ctname, const char *lxcpath)
1016 - int label_fd, ret;
1018 - const char *label = inlabel ? inlabel : conf->lsm_aa_profile;
1020 + char *ret, *filename;
1024 + filename = apparmor_namespace(ctname, lxcpath);
1025 + ret = must_make_path(lxcpath, ctname, "apparmor", filename, NULL);
1031 +static char *make_apparmor_namespace_path(const char *ctname, const char *lxcpath)
1033 + char *ret, *namespace;
1035 + namespace = apparmor_namespace(ctname, lxcpath);
1036 + ret = must_make_path("/sys/kernel/security/apparmor/policy/namespaces", namespace, NULL);
1042 +static bool make_apparmor_namespace(struct lxc_conf *conf, const char *lxcpath)
1046 + if (!aa_can_stack || aa_is_stacked)
1049 + path = make_apparmor_namespace_path(conf->name, lxcpath);
1051 + if (mkdir(path, 0755) < 0 && errno != EEXIST) {
1052 + SYSERROR("Error creating AppArmor namespace: %s", path);
1061 +static void remove_apparmor_namespace(struct lxc_conf *conf, const char *lxcpath)
1065 + path = make_apparmor_namespace_path(conf->name, lxcpath);
1066 + if (rmdir(path) != 0)
1067 + SYSERROR("Error removing AppArmor namespace");
1071 +struct apparmor_parser_args {
1076 +static int apparmor_parser_exec(void *data)
1078 + struct apparmor_parser_args *args = data;
1079 + char cmdbuf[] = { '-', args->cmd, 'W', 'L', 0 };
1081 + execlp("apparmor_parser", "apparmor_parser", cmdbuf, APPARMOR_CACHE_DIR, args->file, NULL);
1086 +static int run_apparmor_parser(char command,
1087 + struct lxc_conf *conf,
1088 + const char *lxcpath)
1090 + char output[MAXPATHLEN];
1092 + struct apparmor_parser_args args = {
1094 + .file = make_apparmor_profile_path(conf->name, lxcpath),
1097 + ret = run_command(output, sizeof(output), apparmor_parser_exec, (void*)&args);
1099 + ERROR("Failed to run apparmor_parser on \"%s\": %s", args.file, output);
1108 +static void remove_apparmor_profile(struct lxc_conf *conf, const char *lxcpath)
1112 + /* It's ok if these deletes fail: if the container was never started,
1113 + * we'll have never written a profile or cached it.
1116 + path = make_apparmor_profile_path(conf->name, lxcpath);
1117 + (void)unlink(path);
1120 + /* Also remove the apparmor/ subdirectory */
1121 + path = apparmor_dir(conf->name, lxcpath);
1122 + (void)rmdir(path);
1126 +static int load_apparmor_profile(struct lxc_conf *conf, const char *lxcpath)
1128 + struct stat profile_sb;
1129 + size_t content_len;
1131 + size_t old_len = 0;
1132 + char *profile_path = NULL, *old_content = NULL, *new_content = NULL;
1133 + int profile_fd = -1;
1135 + if (!make_apparmor_namespace(conf, lxcpath))
1138 + /* In order to avoid forcing a profile parse (potentially slow) on
1139 + * every container start, let's use apparmor's binary policy cache,
1140 + * which checks mtime of the files to figure out if the policy needs to
1143 + * Since it uses mtimes, we shouldn't just always write out our local
1144 + * apparmor template; instead we should check to see whether the
1145 + * template is the same as ours. If it isn't we should write our
1146 + * version out so that the new changes are reflected and we definitely
1147 + * force a recompile.
1150 + profile_path = make_apparmor_profile_path(conf->name, lxcpath);
1151 + profile_fd = open(profile_path, O_RDONLY | O_CLOEXEC);
1152 + if (profile_fd >= 0) {
1153 + if (fstat(profile_fd, &profile_sb) < 0) {
1154 + SYSERROR("Error accessing old profile from %s",
1158 + old_len = profile_sb.st_size;
1159 + old_content = lxc_strmmap(NULL, old_len, PROT_READ,
1160 + MAP_PRIVATE, profile_fd, 0);
1161 + if (!old_content) {
1162 + SYSERROR("Failed to mmap old profile from %s",
1166 + } else if (errno != ENOENT) {
1167 + SYSERROR("Error reading old profile from %s", profile_path);
1171 + new_content = get_apparmor_profile_content(conf, lxcpath);
1175 + content_len = strlen(new_content);
1177 + if (!old_content || old_len != content_len || memcmp(old_content, new_content, content_len) != 0) {
1180 + ret = mkdir_p(APPARMOR_CACHE_DIR, 0755);
1182 + SYSERROR("Error creating AppArmor profile cache directory " APPARMOR_CACHE_DIR);
1186 + path = apparmor_dir(conf->name, lxcpath);
1187 + ret = mkdir_p(path, 0755);
1189 + SYSERROR("Error creating AppArmor profile directory: %s", path);
1195 + ret = lxc_write_to_file(profile_path, new_content, content_len, false, 0600);
1197 + SYSERROR("Error writing profile to %s", profile_path);
1202 + ret = run_apparmor_parser(AA_CMD_LOAD, conf, lxcpath);
1204 + goto out_remove_profile;
1206 + conf->lsm_aa_profile_created = true;
1210 +out_remove_profile:
1211 + remove_apparmor_profile(conf, lxcpath);
1213 + remove_apparmor_namespace(conf, lxcpath);
1215 + if (profile_fd >= 0) {
1217 + lxc_strmunmap(old_content, old_len);
1218 + close(profile_fd);
1220 + free(profile_path);
1221 + free(new_content);
1226 + * Ensure that the container's policy namespace is unloaded to free kernel
1227 + * memory. This does not delete the policy from disk or cache.
1229 +static void apparmor_cleanup(struct lxc_conf *conf, const char *lxcpath)
1234 + if (!conf->lsm_aa_profile_created)
1237 + remove_apparmor_namespace(conf, lxcpath);
1238 + (void)run_apparmor_parser(AA_CMD_UNLOAD, conf, lxcpath);
1240 + remove_apparmor_profile(conf, lxcpath);
1243 +static int apparmor_prepare(struct lxc_conf *conf, const char *lxcpath)
1246 + const char *label;
1247 + char *curlabel = NULL, *genlabel = NULL;
1249 + if (!aa_enabled) {
1250 + ERROR("AppArmor not enabled");
1254 + label = conf->lsm_aa_profile;
1256 /* user may request that we just ignore apparmor */
1257 if (label && strcmp(label, AA_UNCHANGED) == 0) {
1258 - INFO("apparmor profile unchanged per user request");
1259 + INFO("AppArmor profile unchanged per user request");
1260 + conf->lsm_aa_profile_computed = must_copy_string(label);
1264 + if (label && strcmp(label, AA_GENERATED) == 0) {
1265 + if (!aa_parser_available) {
1266 + ERROR("Cannot use generated profile: apparmor_parser not available");
1270 + /* auto-generate profile based on available/requested security features */
1271 + if (load_apparmor_profile(conf, lxcpath) != 0) {
1272 + ERROR("Failed to load generated AppArmor profile");
1276 + genlabel = apparmor_profile_full(conf->name, lxcpath);
1278 + ERROR("Failed to build AppArmor profile name");
1282 + if (aa_can_stack && !aa_is_stacked) {
1283 + char *namespace = apparmor_namespace(conf->name, lxcpath);
1284 + size_t llen = strlen(genlabel);
1285 + must_append_sized(&genlabel, &llen, "//&:", sizeof("//&:") - 1);
1286 + must_append_sized(&genlabel, &llen, namespace, strlen(namespace));
1287 + must_append_sized(&genlabel, &llen, ":", sizeof(":")); /* with the nul byte */
1294 curlabel = apparmor_process_label_get(lxc_raw_getpid());
1296 - if (!aa_stacking_supported() && aa_needs_transition(curlabel)) {
1297 + if (!aa_can_stack && aa_needs_transition(curlabel)) {
1298 /* we're already confined, and stacking isn't supported */
1300 if (!label || strcmp(curlabel, label) == 0) {
1301 /* no change requested */
1308 - ERROR("already apparmor confined, but new label requested.");
1311 + ERROR("Already AppArmor confined, but new label requested.");
1317 - if (use_default) {
1318 - if (cgns_supported())
1319 - label = AA_DEF_PROFILE_CGNS;
1321 - label = AA_DEF_PROFILE;
1323 + if (cgns_supported())
1324 + label = AA_DEF_PROFILE_CGNS;
1326 - label = "unconfined";
1327 + label = AA_DEF_PROFILE;
1330 if (!check_mount_feature_enabled() && strcmp(label, "unconfined") != 0) {
1331 @@ -223,30 +1036,78 @@ static int apparmor_process_label_set(const char *inlabel, struct lxc_conf *conf
1332 ERROR("If you really want to start this container, set");
1333 ERROR("lxc.apparmor.allow_incomplete = 1");
1334 ERROR("in your container configuration file");
1340 + conf->lsm_aa_profile_computed = must_copy_string(label);
1347 + apparmor_cleanup(conf, lxcpath);
1354 + * apparmor_process_label_set: Set AppArmor process profile
1356 + * @label : the profile to set
1357 + * @conf : the container configuration to use if @label is NULL
1358 + * @default : use the default profile if @label is NULL
1359 + * @on_exec : this is ignored. Apparmor profile will be changed immediately
1361 + * Returns 0 on success, < 0 on failure
1363 + * Notes: This relies on /proc being available.
1365 +static int apparmor_process_label_set(const char *inlabel, struct lxc_conf *conf,
1368 + int label_fd, ret;
1370 + const char *label;
1372 + if (!aa_enabled) {
1373 + ERROR("AppArmor not enabled");
1377 + label = inlabel ? inlabel : conf->lsm_aa_profile_computed;
1379 + ERROR("LSM wasn't prepared");
1383 + /* user may request that we just ignore apparmor */
1384 + if (strcmp(label, AA_UNCHANGED) == 0) {
1385 + INFO("AppArmor profile unchanged per user request");
1389 if (strcmp(label, "unconfined") == 0 && apparmor_am_unconfined()) {
1390 - INFO("apparmor profile unchanged");
1391 + INFO("AppArmor profile unchanged");
1394 tid = lxc_raw_gettid();
1395 label_fd = lsm_process_label_fd_get(tid, on_exec);
1397 - SYSERROR("Failed to change apparmor profile to %s", label);
1398 + SYSERROR("Failed to change AppArmor profile to %s", label);
1402 ret = lsm_process_label_set_at(label_fd, label, on_exec);
1405 - ERROR("Failed to change apparmor profile to %s", label);
1406 + ERROR("Failed to change AppArmor profile to %s", label);
1410 - INFO("Changed apparmor profile to %s", label);
1411 + INFO("Changed AppArmor profile to %s", label);
1415 @@ -255,12 +1116,39 @@ static struct lsm_drv apparmor_drv = {
1416 .enabled = apparmor_enabled,
1417 .process_label_get = apparmor_process_label_get,
1418 .process_label_set = apparmor_process_label_set,
1419 + .prepare = apparmor_prepare,
1420 + .cleanup = apparmor_cleanup,
1423 struct lsm_drv *lsm_apparmor_drv_init(void)
1425 + bool have_mac_admin = false;
1427 if (!apparmor_enabled())
1430 + /* We only support generated profiles when apparmor_parser is usable */
1431 + if (!check_apparmor_parser_version())
1434 + aa_parser_available = true;
1436 + aa_can_stack = apparmor_can_stack();
1438 + aa_is_stacked = file_is_yes("/sys/kernel/security/apparmor/.ns_stacked");
1441 + have_mac_admin = lxc_proc_cap_is_set(CAP_SETGID, CAP_EFFECTIVE);
1444 + if (!have_mac_admin)
1445 + WARN("Per-container AppArmor profiles are disabled because the mac_admin capability is missing");
1446 + else if (am_host_unpriv() && !aa_is_stacked)
1447 + WARN("Per-container AppArmor profiles are disabled because LXC is running in an unprivileged container without stacking");
1453 return &apparmor_drv;
1455 diff --git a/src/lxc/lsm/lsm.c b/src/lxc/lsm/lsm.c
1456 index 8d7de2db..46e21206 100644
1457 --- a/src/lxc/lsm/lsm.c
1458 +++ b/src/lxc/lsm/lsm.c
1459 @@ -177,11 +177,37 @@ on_error:
1462 int lsm_process_label_set(const char *label, struct lxc_conf *conf,
1463 - bool use_default, bool on_exec)
1467 ERROR("LSM driver not inited");
1470 - return drv->process_label_set(label, conf, use_default, on_exec);
1471 + return drv->process_label_set(label, conf, on_exec);
1474 +int lsm_process_prepare(struct lxc_conf *conf, const char *lxcpath)
1477 + ERROR("LSM driver not inited");
1481 + if (!drv->prepare)
1484 + return drv->prepare(conf, lxcpath);
1487 +void lsm_process_cleanup(struct lxc_conf *conf, const char *lxcpath)
1490 + ERROR("LSM driver not inited");
1494 + if (!drv->cleanup)
1497 + drv->cleanup(conf, lxcpath);
1499 diff --git a/src/lxc/lsm/lsm.h b/src/lxc/lsm/lsm.h
1500 index cafb2ac7..52e656d6 100644
1501 --- a/src/lxc/lsm/lsm.h
1502 +++ b/src/lxc/lsm/lsm.h
1503 @@ -38,17 +38,21 @@ struct lsm_drv {
1504 int (*enabled)(void);
1505 char *(*process_label_get)(pid_t pid);
1506 int (*process_label_set)(const char *label, struct lxc_conf *conf,
1507 - bool use_default, bool on_exec);
1509 + int (*prepare)(struct lxc_conf *conf, const char *lxcpath);
1510 + void (*cleanup)(struct lxc_conf *conf, const char *lxcpath);
1513 extern void lsm_init(void);
1514 extern int lsm_enabled(void);
1515 extern const char *lsm_name(void);
1516 extern char *lsm_process_label_get(pid_t pid);
1517 +extern int lsm_process_prepare(struct lxc_conf *conf, const char *lxcpath);
1518 extern int lsm_process_label_set(const char *label, struct lxc_conf *conf,
1519 - bool use_default, bool on_exec);
1521 extern int lsm_process_label_fd_get(pid_t pid, bool on_exec);
1522 extern int lsm_process_label_set_at(int label_fd, const char *label,
1524 +extern void lsm_process_cleanup(struct lxc_conf *conf, const char *lxcpath);
1526 #endif /* __LXC_LSM_H */
1527 diff --git a/src/lxc/lsm/nop.c b/src/lxc/lsm/nop.c
1528 index 7bb8121b..9397f2bf 100644
1529 --- a/src/lxc/lsm/nop.c
1530 +++ b/src/lxc/lsm/nop.c
1531 @@ -30,7 +30,7 @@ static char *nop_process_label_get(pid_t pid)
1534 static int nop_process_label_set(const char *label, struct lxc_conf *conf,
1535 - bool use_default, bool on_exec)
1540 diff --git a/src/lxc/lsm/selinux.c b/src/lxc/lsm/selinux.c
1541 index c88c18e3..9f7b7bc3 100644
1542 --- a/src/lxc/lsm/selinux.c
1543 +++ b/src/lxc/lsm/selinux.c
1544 @@ -75,15 +75,13 @@ static char *selinux_process_label_get(pid_t pid)
1545 * Notes: This relies on /proc being available.
1547 static int selinux_process_label_set(const char *inlabel, struct lxc_conf *conf,
1548 - bool use_default, bool on_exec)
1554 label = inlabel ? inlabel : conf->lsm_se_context;
1559 label = DEFAULT_LABEL;
1561 diff --git a/src/lxc/start.c b/src/lxc/start.c
1562 index 3343f9bf..6d6dc7ee 100644
1563 --- a/src/lxc/start.c
1564 +++ b/src/lxc/start.c
1565 @@ -863,9 +863,19 @@ int lxc_init(const char *name, struct lxc_handler *handler)
1567 TRACE("Initialized cgroup driver");
1569 + ret = lsm_process_prepare(conf, handler->lxcpath);
1571 + ERROR("Failed to initialize LSM");
1572 + goto out_destroy_cgroups;
1574 + TRACE("Initialized LSM");
1576 INFO("Container \"%s\" is initialized", name);
1579 +out_destroy_cgroups:
1580 + handler->cgroup_ops->destroy(handler->cgroup_ops, handler);
1582 out_delete_terminal:
1583 lxc_terminal_delete(&handler->conf->console);
1585 @@ -956,6 +966,8 @@ void lxc_fini(const char *name, struct lxc_handler *handler)
1586 while (namespace_count--)
1587 free(namespaces[namespace_count]);
1589 + lsm_process_cleanup(handler->conf, handler->lxcpath);
1591 cgroup_ops->destroy(cgroup_ops, handler);
1592 cgroup_exit(cgroup_ops);
1594 @@ -1235,7 +1247,7 @@ static int do_start(void *data)
1597 /* Set the label to change to when we exec(2) the container's init. */
1598 - ret = lsm_process_label_set(NULL, handler->conf, 1, 1);
1599 + ret = lsm_process_label_set(NULL, handler->conf, true);
1601 goto out_warn_father;