]> git.proxmox.com Git - lxc.git/blob - debian/patches/extra/0010-apparmor-profile-generation.patch
bump version to 3.0.1+pve1-1
[lxc.git] / debian / patches / extra / 0010-apparmor-profile-generation.patch
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Wolfgang Bumiller <w.bumiller@proxmox.com>
3 Date: Wed, 25 Jul 2018 12:11:31 +0200
4 Subject: [PATCH] apparmor: profile generation
5
6 This copies lxd's apparmor profile generation. This tries to
7 detect features such as cgroup namespaces, apparmor
8 namespaces and stacking support, and has profile parts
9 conditionally for unprivileged containers.
10
11 This introduces the following changes to the configuration:
12 lxc.apparmor.profile = generated
13 The fixed value 'generated' will cause this
14 functionality to be used, otherwise there should be no
15 functional changes happening unless specifically
16 requested with the next key:
17 lxc.apparmor.allow_nesting
18 This is a boolean which, if enabled, causes the
19 following changes: When generated apparmor profiles are
20 used, they will contain the necessary changes to allow
21 creating a nested container. In addition to the usual
22 mount points, /dev/.lxc/proc and /dev/.lxc/sys will
23 contain procfs and sysfs mount points without the lxcfs
24 overlays, which, if generated apparmor profiles are
25 being used, will not be read/writable directly.
26 lxc.apparmor.raw
27 A list of raw apparmor profile lines to append to the
28 profile. Only valid when using generated profiles.
29
30 The following apparmor profile lines have not been copied
31 from lxd:
32
33 mount /var/lib/lxd/shmounts/ -> /var/lib/lxd/shmounts/,
34 mount none -> /var/lib/lxd/shmounts/,
35 mount options=bind /var/lib/lxd/shmounts/** -> /var/lib/lxd/**,
36
37 They should be added via lxc.apparmor.raw entries by lxd.
38
39 In order for apparmor_parser's cache to be of use, this adds
40 a --with-apparmor-cache-dir ./configure option.
41
42 Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
43 (cherry picked from commit 1800f9247357948fd11c9da73b1943a8a7b6882b)
44 ---
45 configure.ac | 8 +
46 src/lxc/Makefile.am | 1 +
47 src/lxc/conf.c | 43 ++-
48 src/lxc/conf.h | 8 +-
49 src/lxc/confile.c | 95 +++++
50 src/lxc/criu.c | 3 +-
51 src/lxc/lsm/apparmor.c | 974 ++++++++++++++++++++++++++++++++++++++++++++++---
52 src/lxc/lsm/lsm.c | 30 +-
53 src/lxc/lsm/lsm.h | 8 +-
54 src/lxc/lsm/nop.c | 2 +-
55 src/lxc/lsm/selinux.c | 4 +-
56 src/lxc/start.c | 14 +-
57 12 files changed, 1134 insertions(+), 56 deletions(-)
58
59 diff --git a/configure.ac b/configure.ac
60 index ea312bf3..739e100a 100644
61 --- a/configure.ac
62 +++ b/configure.ac
63 @@ -469,6 +469,13 @@ AC_ARG_WITH([cgroup-pattern],
64 [pattern for container cgroups]
65 )], [], [with_cgroup_pattern=['lxc/%n']])
66
67 +# The path for the apparmor_parser's cache for generated apparmor profiles
68 +AC_ARG_WITH([apparmor-cache-dir],
69 + [AC_HELP_STRING(
70 + [--with-apparmor-cache-dir=dir],
71 + [path for apparmor_parser cache]
72 + )], [], [with_apparmor_cache_dir=['${localstatedir}/cache/lxc/apparmor']])
73 +
74 # Container log path. By default, use $lxcpath.
75 AC_MSG_CHECKING([Whether to place logfiles in container config path])
76 AC_ARG_ENABLE([configpath-log],
77 @@ -515,6 +522,7 @@ AS_AC_EXPAND(LXCBINHOOKDIR, "$libexecdir/lxc/hooks")
78 AS_AC_EXPAND(LXCINITDIR, "$libexecdir")
79 AS_AC_EXPAND(LOGPATH, "$with_log_path")
80 AS_AC_EXPAND(RUNTIME_PATH, "$with_runtime_path")
81 +AS_AC_EXPAND(APPARMOR_CACHE_DIR, "$with_apparmor_cache_dir")
82 AC_SUBST(DEFAULT_CGROUP_PATTERN, ["$with_cgroup_pattern"])
83
84 # We need the install path so criu knows where to reference the hook scripts.
85 diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am
86 index c5e46ac2..1359eb3e 100644
87 --- a/src/lxc/Makefile.am
88 +++ b/src/lxc/Makefile.am
89 @@ -174,6 +174,7 @@ AM_CFLAGS = -DLXCROOTFSMOUNT=\"$(LXCROOTFSMOUNT)\" \
90 -DDEFAULT_CGROUP_PATTERN=\"$(DEFAULT_CGROUP_PATTERN)\" \
91 -DRUNTIME_PATH=\"$(RUNTIME_PATH)\" \
92 -DSBINDIR=\"$(SBINDIR)\" \
93 + -DAPPARMOR_CACHE_DIR=\"$(APPARMOR_CACHE_DIR)\" \
94 -I $(top_srcdir)/src \
95 -I $(top_srcdir)/src/lxc \
96 -I $(top_srcdir)/src/lxc/storage \
97 diff --git a/src/lxc/conf.c b/src/lxc/conf.c
98 index d36987c8..df805e6c 100644
99 --- a/src/lxc/conf.c
100 +++ b/src/lxc/conf.c
101 @@ -2334,7 +2334,23 @@ static int setup_mount(const struct lxc_conf *conf,
102 return ret;
103 }
104
105 -FILE *make_anonymous_mount_file(struct lxc_list *mount)
106 +/*
107 + * In order for nested containers to be able to mount /proc and /sys they need
108 + * to see a "pure" proc and sysfs mount points with nothing mounted on top
109 + * (like lxcfs).
110 + * For this we provide proc and sysfs in /dev/.lxc/{proc,sys} while using an
111 + * apparmor rule to deny access to them. This is mostly for convenience: The
112 + * container's root user can mount them anyway and thus has access to the two
113 + * file systems. But a non-root user in the container should not be allowed to
114 + * access them as a side effect without explicitly allowing it.
115 + */
116 +static const char nesting_helpers[] =
117 +"proc dev/.lxc/proc proc create=dir,optional\n"
118 +"sys dev/.lxc/sys sysfs create=dir,optional\n"
119 +;
120 +
121 +FILE *make_anonymous_mount_file(struct lxc_list *mount,
122 + bool include_nesting_helpers)
123 {
124 int ret;
125 char *mount_entry;
126 @@ -2376,6 +2392,13 @@ FILE *make_anonymous_mount_file(struct lxc_list *mount)
127 goto on_error;
128 }
129
130 + if (include_nesting_helpers) {
131 + ret = lxc_write_nointr(fd, nesting_helpers,
132 + sizeof(nesting_helpers) - 1);
133 + if (ret != sizeof(nesting_helpers) - 1)
134 + goto on_error;
135 + }
136 +
137 ret = lseek(fd, 0, SEEK_SET);
138 if (ret < 0)
139 goto on_error;
140 @@ -2396,7 +2419,7 @@ static int setup_mount_entries(const struct lxc_conf *conf,
141 int ret;
142 FILE *f;
143
144 - f = make_anonymous_mount_file(mount);
145 + f = make_anonymous_mount_file(mount, conf->lsm_aa_allow_nesting);
146 if (!f)
147 return -1;
148
149 @@ -2712,6 +2735,7 @@ struct lxc_conf *lxc_conf_init(void)
150 lxc_list_init(&new->groups);
151 lxc_list_init(&new->state_clients);
152 new->lsm_aa_profile = NULL;
153 + lxc_list_init(&new->lsm_aa_raw);
154 new->lsm_se_context = NULL;
155 new->tmp_umount_proc = false;
156
157 @@ -3996,6 +4020,19 @@ void lxc_clear_includes(struct lxc_conf *conf)
158 }
159 }
160
161 +int lxc_clear_apparmor_raw(struct lxc_conf *c)
162 +{
163 + struct lxc_list *it, *next;
164 +
165 + lxc_list_for_each_safe (it, &c->lsm_aa_raw, next) {
166 + lxc_list_del(it);
167 + free(it->elem);
168 + free(it);
169 + }
170 +
171 + return 0;
172 +}
173 +
174 void lxc_conf_free(struct lxc_conf *conf)
175 {
176 if (!conf)
177 @@ -4023,6 +4060,7 @@ void lxc_conf_free(struct lxc_conf *conf)
178 free(conf->syslog);
179 lxc_free_networks(&conf->network);
180 free(conf->lsm_aa_profile);
181 + free(conf->lsm_aa_profile_computed);
182 free(conf->lsm_se_context);
183 lxc_seccomp_free(conf);
184 lxc_clear_config_caps(conf);
185 @@ -4039,6 +4077,7 @@ void lxc_conf_free(struct lxc_conf *conf)
186 lxc_clear_limits(conf, "lxc.prlimit");
187 lxc_clear_sysctls(conf, "lxc.sysctl");
188 lxc_clear_procs(conf, "lxc.proc");
189 + lxc_clear_apparmor_raw(conf);
190 free(conf->cgroup_meta.dir);
191 free(conf->cgroup_meta.controllers);
192 free(conf);
193 diff --git a/src/lxc/conf.h b/src/lxc/conf.h
194 index f7a879c3..f5085d8c 100644
195 --- a/src/lxc/conf.h
196 +++ b/src/lxc/conf.h
197 @@ -272,7 +272,11 @@ struct lxc_conf {
198 };
199
200 char *lsm_aa_profile;
201 + char *lsm_aa_profile_computed;
202 + bool lsm_aa_profile_created;
203 + unsigned int lsm_aa_allow_nesting;
204 unsigned int lsm_aa_allow_incomplete;
205 + struct lxc_list lsm_aa_raw;
206 char *lsm_se_context;
207 bool tmp_umount_proc;
208 char *seccomp; /* filename with the seccomp rules */
209 @@ -417,7 +421,8 @@ extern int parse_mntopts(const char *mntopts, unsigned long *mntflags,
210 extern void tmp_proc_unmount(struct lxc_conf *lxc_conf);
211 extern void remount_all_slave(void);
212 extern void suggest_default_idmap(void);
213 -extern FILE *make_anonymous_mount_file(struct lxc_list *mount);
214 +extern FILE *make_anonymous_mount_file(struct lxc_list *mount,
215 + bool include_nesting_helpers);
216 extern struct lxc_list *sort_cgroup_settings(struct lxc_list *cgroup_settings);
217 extern unsigned long add_required_remount_flags(const char *s, const char *d,
218 unsigned long flags);
219 @@ -431,5 +436,6 @@ extern int setup_sysctl_parameters(struct lxc_list *sysctls);
220 extern int lxc_clear_sysctls(struct lxc_conf *c, const char *key);
221 extern int setup_proc_filesystem(struct lxc_list *procs, pid_t pid);
222 extern int lxc_clear_procs(struct lxc_conf *c, const char *key);
223 +extern int lxc_clear_apparmor_raw(struct lxc_conf *c);
224
225 #endif /* __LXC_CONF_H */
226 diff --git a/src/lxc/confile.c b/src/lxc/confile.c
227 index 8a7505da..097b8806 100644
228 --- a/src/lxc/confile.c
229 +++ b/src/lxc/confile.c
230 @@ -84,7 +84,9 @@ lxc_log_define(confile, lxc);
231
232 lxc_config_define(autodev);
233 lxc_config_define(apparmor_allow_incomplete);
234 +lxc_config_define(apparmor_allow_nesting);
235 lxc_config_define(apparmor_profile);
236 +lxc_config_define(apparmor_raw);
237 lxc_config_define(cap_drop);
238 lxc_config_define(cap_keep);
239 lxc_config_define(cgroup_controller);
240 @@ -158,6 +160,8 @@ static struct lxc_config_t config[] = {
241 { "lxc.arch", set_config_personality, get_config_personality, clr_config_personality, },
242 { "lxc.apparmor.profile", set_config_apparmor_profile, get_config_apparmor_profile, clr_config_apparmor_profile, },
243 { "lxc.apparmor.allow_incomplete", set_config_apparmor_allow_incomplete, get_config_apparmor_allow_incomplete, clr_config_apparmor_allow_incomplete, },
244 + { "lxc.apparmor.allow_nesting", set_config_apparmor_allow_nesting, get_config_apparmor_allow_nesting, clr_config_apparmor_allow_nesting, },
245 + { "lxc.apparmor.raw", set_config_apparmor_raw, get_config_apparmor_raw, clr_config_apparmor_raw, },
246 { "lxc.autodev", set_config_autodev, get_config_autodev, clr_config_autodev, },
247 { "lxc.cap.drop", set_config_cap_drop, get_config_cap_drop, clr_config_cap_drop, },
248 { "lxc.cap.keep", set_config_cap_keep, get_config_cap_keep, clr_config_cap_keep, },
249 @@ -1118,6 +1122,52 @@ static int set_config_apparmor_allow_incomplete(const char *key,
250 return 0;
251 }
252
253 +static int set_config_apparmor_allow_nesting(const char *key,
254 + const char *value,
255 + struct lxc_conf *lxc_conf,
256 + void *data)
257 +{
258 + if (lxc_config_value_empty(value))
259 + return clr_config_apparmor_allow_nesting(key, lxc_conf, NULL);
260 +
261 + if (lxc_safe_uint(value, &lxc_conf->lsm_aa_allow_nesting) < 0)
262 + return -1;
263 +
264 + if (lxc_conf->lsm_aa_allow_nesting > 1)
265 + return -1;
266 +
267 + return 0;
268 +}
269 +
270 +static int set_config_apparmor_raw(const char *key,
271 + const char *value,
272 + struct lxc_conf *lxc_conf,
273 + void *data)
274 +{
275 + char *elem;
276 + struct lxc_list *list;
277 +
278 + if (lxc_config_value_empty(value))
279 + return lxc_clear_apparmor_raw(lxc_conf);
280 +
281 + list = malloc(sizeof(*list));
282 + if (!list) {
283 + errno = ENOMEM;
284 + return -1;
285 + }
286 +
287 + elem = strdup(value);
288 + if (!elem) {
289 + free(list);
290 + return -1;
291 + }
292 + list->elem = elem;
293 +
294 + lxc_list_add_tail(&lxc_conf->lsm_aa_raw, list);
295 +
296 + return 0;
297 +}
298 +
299 static int set_config_selinux_context(const char *key, const char *value,
300 struct lxc_conf *lxc_conf, void *data)
301 {
302 @@ -2959,6 +3009,34 @@ static int get_config_apparmor_allow_incomplete(const char *key, char *retv,
303 c->lsm_aa_allow_incomplete);
304 }
305
306 +static int get_config_apparmor_allow_nesting(const char *key, char *retv,
307 + int inlen, struct lxc_conf *c,
308 + void *data)
309 +{
310 + return lxc_get_conf_int(c, retv, inlen,
311 + c->lsm_aa_allow_nesting);
312 +}
313 +
314 +static int get_config_apparmor_raw(const char *key, char *retv,
315 + int inlen, struct lxc_conf *c,
316 + void *data)
317 +{
318 + int len;
319 + struct lxc_list *it;
320 + int fulllen = 0;
321 +
322 + if (!retv)
323 + inlen = 0;
324 + else
325 + memset(retv, 0, inlen);
326 +
327 + lxc_list_for_each(it, &c->lsm_aa_raw) {
328 + strprint(retv, inlen, "%s\n", (char *)it->elem);
329 + }
330 +
331 + return fulllen;
332 +}
333 +
334 static int get_config_selinux_context(const char *key, char *retv, int inlen,
335 struct lxc_conf *c, void *data)
336 {
337 @@ -3749,6 +3827,21 @@ static inline int clr_config_apparmor_allow_incomplete(const char *key,
338 return 0;
339 }
340
341 +static inline int clr_config_apparmor_allow_nesting(const char *key,
342 + struct lxc_conf *c,
343 + void *data)
344 +{
345 + c->lsm_aa_allow_nesting = 0;
346 + return 0;
347 +}
348 +
349 +static inline int clr_config_apparmor_raw(const char *key,
350 + struct lxc_conf *c,
351 + void *data)
352 +{
353 + return lxc_clear_apparmor_raw(c);
354 +}
355 +
356 static inline int clr_config_selinux_context(const char *key,
357 struct lxc_conf *c, void *data)
358 {
359 @@ -4941,7 +5034,9 @@ int lxc_list_subkeys(struct lxc_conf *conf, const char *key, char *retv,
360
361 if (!strcmp(key, "lxc.apparmor")) {
362 strprint(retv, inlen, "allow_incomplete\n");
363 + strprint(retv, inlen, "allow_nesting\n");
364 strprint(retv, inlen, "profile\n");
365 + strprint(retv, inlen, "raw\n");
366 } else if (!strcmp(key, "lxc.cgroup")) {
367 strprint(retv, inlen, "dir\n");
368 } else if (!strcmp(key, "lxc.selinux")) {
369 diff --git a/src/lxc/criu.c b/src/lxc/criu.c
370 index 456d19cf..02d301ac 100644
371 --- a/src/lxc/criu.c
372 +++ b/src/lxc/criu.c
373 @@ -378,7 +378,8 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct criu_opts *opts)
374 DECLARE_ARG(opts->user->action_script);
375 }
376
377 - mnts = make_anonymous_mount_file(&opts->c->lxc_conf->mount_list);
378 + mnts = make_anonymous_mount_file(&opts->c->lxc_conf->mount_list,
379 + opts->c->lxc_conf->lsm_aa_allow_nesting);
380 if (!mnts)
381 goto err;
382
383 diff --git a/src/lxc/lsm/apparmor.c b/src/lxc/lsm/apparmor.c
384 index 5fe6d525..ec3f805d 100644
385 --- a/src/lxc/lsm/apparmor.c
386 +++ b/src/lxc/lsm/apparmor.c
387 @@ -33,11 +33,18 @@
388 #include "conf.h"
389 #include "utils.h"
390 #include "initutils.h"
391 +#include "caps.h"
392 +#include "parse.h"
393
394 lxc_log_define(apparmor, lsm);
395
396 /* set by lsm_apparmor_drv_init if true */
397 static int aa_enabled = 0;
398 +static bool aa_parser_available = false;
399 +static bool aa_supports_unix = false;
400 +static bool aa_can_stack = false;
401 +static bool aa_is_stacked = false;
402 +static bool aa_admin = false;
403
404 static int mount_features_enabled = 0;
405
406 @@ -46,6 +53,332 @@ static int mount_features_enabled = 0;
407 #define AA_MOUNT_RESTR "/sys/kernel/security/apparmor/features/mount/mask"
408 #define AA_ENABLED_FILE "/sys/module/apparmor/parameters/enabled"
409 #define AA_UNCHANGED "unchanged"
410 +#define AA_GENERATED "generated"
411 +
412 +#define AA_CMD_LOAD 'r'
413 +#define AA_CMD_UNLOAD 'R'
414 +#define AA_CMD_PARSE 'Q'
415 +
416 +static const char AA_PROFILE_BASE[] =
417 +" ### Base profile\n"
418 +" capability,\n"
419 +" dbus,\n"
420 +" file,\n"
421 +" network,\n"
422 +" umount,\n"
423 +"\n"
424 +" # Allow us to receive signals from anywhere.\n"
425 +" signal (receive),\n"
426 +"\n"
427 +" # Allow us to send signals to ourselves\n"
428 +" signal peer=@{profile_name},\n"
429 +"\n"
430 +" # Allow other processes to read our /proc entries, futexes, perf tracing and\n"
431 +" # kcmp for now (they will need 'read' in the first place). Administrators can\n"
432 +" # override with:\n"
433 +" # deny ptrace (readby) ...\n"
434 +" ptrace (readby),\n"
435 +"\n"
436 +" # Allow other processes to trace us by default (they will need 'trace' in\n"
437 +" # the first place). Administrators can override with:\n"
438 +" # deny ptrace (tracedby) ...\n"
439 +" ptrace (tracedby),\n"
440 +"\n"
441 +" # Allow us to ptrace ourselves\n"
442 +" ptrace peer=@{profile_name},\n"
443 +"\n"
444 +" # ignore DENIED message on / remount\n"
445 +" deny mount options=(ro, remount) -> /,\n"
446 +" deny mount options=(ro, remount, silent) -> /,\n"
447 +"\n"
448 +" # allow tmpfs mounts everywhere\n"
449 +" mount fstype=tmpfs,\n"
450 +"\n"
451 +" # allow hugetlbfs mounts everywhere\n"
452 +" mount fstype=hugetlbfs,\n"
453 +"\n"
454 +" # allow mqueue mounts everywhere\n"
455 +" mount fstype=mqueue,\n"
456 +"\n"
457 +" # allow fuse mounts everywhere\n"
458 +" mount fstype=fuse,\n"
459 +" mount fstype=fuse.*,\n"
460 +"\n"
461 +" # deny access under /proc/bus to avoid e.g. messing with pci devices directly\n"
462 +" deny @{PROC}/bus/** wklx,\n"
463 +"\n"
464 +" # deny writes in /proc/sys/fs but allow binfmt_misc to be mounted\n"
465 +" mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/,\n"
466 +" deny @{PROC}/sys/fs/** wklx,\n"
467 +"\n"
468 +" # allow efivars to be mounted, writing to it will be blocked though\n"
469 +" mount fstype=efivarfs -> /sys/firmware/efi/efivars/,\n"
470 +"\n"
471 +" # block some other dangerous paths\n"
472 +" deny @{PROC}/kcore rwklx,\n"
473 +" deny @{PROC}/sysrq-trigger rwklx,\n"
474 +"\n"
475 +" # deny writes in /sys except for /sys/fs/cgroup, also allow\n"
476 +" # fusectl, securityfs and debugfs to be mounted there (read-only)\n"
477 +" mount fstype=fusectl -> /sys/fs/fuse/connections/,\n"
478 +" mount fstype=securityfs -> /sys/kernel/security/,\n"
479 +" mount fstype=debugfs -> /sys/kernel/debug/,\n"
480 +" deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,\n"
481 +" mount fstype=proc -> /proc/,\n"
482 +" mount fstype=sysfs -> /sys/,\n"
483 +" mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,\n"
484 +" deny /sys/firmware/efi/efivars/** rwklx,\n"
485 +" # note, /sys/kernel/security/** handled below\n"
486 +" mount options=(ro, nosuid, nodev, noexec, remount, strictatime) -> /sys/fs/cgroup/,\n"
487 +"\n"
488 +" # deny reads from debugfs\n"
489 +" deny /sys/kernel/debug/{,**} rwklx,\n"
490 +"\n"
491 +" # allow paths to be made slave, shared, private or unbindable\n"
492 +" # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.\n"
493 +"# mount options=(rw,make-slave) -> **,\n"
494 +"# mount options=(rw,make-rslave) -> **,\n"
495 +"# mount options=(rw,make-shared) -> **,\n"
496 +"# mount options=(rw,make-rshared) -> **,\n"
497 +"# mount options=(rw,make-private) -> **,\n"
498 +"# mount options=(rw,make-rprivate) -> **,\n"
499 +"# mount options=(rw,make-unbindable) -> **,\n"
500 +"# mount options=(rw,make-runbindable) -> **,\n"
501 +"\n"
502 +" # allow bind-mounts of anything except /proc, /sys and /dev\n"
503 +" mount options=(rw,bind) /[^spd]*{,/**},\n"
504 +" mount options=(rw,bind) /d[^e]*{,/**},\n"
505 +" mount options=(rw,bind) /de[^v]*{,/**},\n"
506 +" mount options=(rw,bind) /dev/.[^l]*{,/**},\n"
507 +" mount options=(rw,bind) /dev/.l[^x]*{,/**},\n"
508 +" mount options=(rw,bind) /dev/.lx[^c]*{,/**},\n"
509 +" mount options=(rw,bind) /dev/.lxc?*{,/**},\n"
510 +" mount options=(rw,bind) /dev/[^.]*{,/**},\n"
511 +" mount options=(rw,bind) /dev?*{,/**},\n"
512 +" mount options=(rw,bind) /p[^r]*{,/**},\n"
513 +" mount options=(rw,bind) /pr[^o]*{,/**},\n"
514 +" mount options=(rw,bind) /pro[^c]*{,/**},\n"
515 +" mount options=(rw,bind) /proc?*{,/**},\n"
516 +" mount options=(rw,bind) /s[^y]*{,/**},\n"
517 +" mount options=(rw,bind) /sy[^s]*{,/**},\n"
518 +" mount options=(rw,bind) /sys?*{,/**},\n"
519 +"\n"
520 +" # allow read-only bind-mounts of anything except /proc, /sys and /dev\n"
521 +" mount options=(ro,remount,bind) -> /[^spd]*{,/**},\n"
522 +" mount options=(ro,remount,bind) -> /d[^e]*{,/**},\n"
523 +" mount options=(ro,remount,bind) -> /de[^v]*{,/**},\n"
524 +" mount options=(ro,remount,bind) -> /dev/.[^l]*{,/**},\n"
525 +" mount options=(ro,remount,bind) -> /dev/.l[^x]*{,/**},\n"
526 +" mount options=(ro,remount,bind) -> /dev/.lx[^c]*{,/**},\n"
527 +" mount options=(ro,remount,bind) -> /dev/.lxc?*{,/**},\n"
528 +" mount options=(ro,remount,bind) -> /dev/[^.]*{,/**},\n"
529 +" mount options=(ro,remount,bind) -> /dev?*{,/**},\n"
530 +" mount options=(ro,remount,bind) -> /p[^r]*{,/**},\n"
531 +" mount options=(ro,remount,bind) -> /pr[^o]*{,/**},\n"
532 +" mount options=(ro,remount,bind) -> /pro[^c]*{,/**},\n"
533 +" mount options=(ro,remount,bind) -> /proc?*{,/**},\n"
534 +" mount options=(ro,remount,bind) -> /s[^y]*{,/**},\n"
535 +" mount options=(ro,remount,bind) -> /sy[^s]*{,/**},\n"
536 +" mount options=(ro,remount,bind) -> /sys?*{,/**},\n"
537 +"\n"
538 +" # allow moving mounts except for /proc, /sys and /dev\n"
539 +" mount options=(rw,move) /[^spd]*{,/**},\n"
540 +" mount options=(rw,move) /d[^e]*{,/**},\n"
541 +" mount options=(rw,move) /de[^v]*{,/**},\n"
542 +" mount options=(rw,move) /dev/.[^l]*{,/**},\n"
543 +" mount options=(rw,move) /dev/.l[^x]*{,/**},\n"
544 +" mount options=(rw,move) /dev/.lx[^c]*{,/**},\n"
545 +" mount options=(rw,move) /dev/.lxc?*{,/**},\n"
546 +" mount options=(rw,move) /dev/[^.]*{,/**},\n"
547 +" mount options=(rw,move) /dev?*{,/**},\n"
548 +" mount options=(rw,move) /p[^r]*{,/**},\n"
549 +" mount options=(rw,move) /pr[^o]*{,/**},\n"
550 +" mount options=(rw,move) /pro[^c]*{,/**},\n"
551 +" mount options=(rw,move) /proc?*{,/**},\n"
552 +" mount options=(rw,move) /s[^y]*{,/**},\n"
553 +" mount options=(rw,move) /sy[^s]*{,/**},\n"
554 +" mount options=(rw,move) /sys?*{,/**},\n"
555 +"\n"
556 +" # generated by: lxc-generate-aa-rules.py container-rules.base\n"
557 +" deny /proc/sys/[^kn]*{,/**} wklx,\n"
558 +" deny /proc/sys/k[^e]*{,/**} wklx,\n"
559 +" deny /proc/sys/ke[^r]*{,/**} wklx,\n"
560 +" deny /proc/sys/ker[^n]*{,/**} wklx,\n"
561 +" deny /proc/sys/kern[^e]*{,/**} wklx,\n"
562 +" deny /proc/sys/kerne[^l]*{,/**} wklx,\n"
563 +" deny /proc/sys/kernel/[^smhd]*{,/**} wklx,\n"
564 +" deny /proc/sys/kernel/d[^o]*{,/**} wklx,\n"
565 +" deny /proc/sys/kernel/do[^m]*{,/**} wklx,\n"
566 +" deny /proc/sys/kernel/dom[^a]*{,/**} wklx,\n"
567 +" deny /proc/sys/kernel/doma[^i]*{,/**} wklx,\n"
568 +" deny /proc/sys/kernel/domai[^n]*{,/**} wklx,\n"
569 +" deny /proc/sys/kernel/domain[^n]*{,/**} wklx,\n"
570 +" deny /proc/sys/kernel/domainn[^a]*{,/**} wklx,\n"
571 +" deny /proc/sys/kernel/domainna[^m]*{,/**} wklx,\n"
572 +" deny /proc/sys/kernel/domainnam[^e]*{,/**} wklx,\n"
573 +" deny /proc/sys/kernel/domainname?*{,/**} wklx,\n"
574 +" deny /proc/sys/kernel/h[^o]*{,/**} wklx,\n"
575 +" deny /proc/sys/kernel/ho[^s]*{,/**} wklx,\n"
576 +" deny /proc/sys/kernel/hos[^t]*{,/**} wklx,\n"
577 +" deny /proc/sys/kernel/host[^n]*{,/**} wklx,\n"
578 +" deny /proc/sys/kernel/hostn[^a]*{,/**} wklx,\n"
579 +" deny /proc/sys/kernel/hostna[^m]*{,/**} wklx,\n"
580 +" deny /proc/sys/kernel/hostnam[^e]*{,/**} wklx,\n"
581 +" deny /proc/sys/kernel/hostname?*{,/**} wklx,\n"
582 +" deny /proc/sys/kernel/m[^s]*{,/**} wklx,\n"
583 +" deny /proc/sys/kernel/ms[^g]*{,/**} wklx,\n"
584 +" deny /proc/sys/kernel/msg*/** wklx,\n"
585 +" deny /proc/sys/kernel/s[^he]*{,/**} wklx,\n"
586 +" deny /proc/sys/kernel/se[^m]*{,/**} wklx,\n"
587 +" deny /proc/sys/kernel/sem*/** wklx,\n"
588 +" deny /proc/sys/kernel/sh[^m]*{,/**} wklx,\n"
589 +" deny /proc/sys/kernel/shm*/** wklx,\n"
590 +" deny /proc/sys/kernel?*{,/**} wklx,\n"
591 +" deny /proc/sys/n[^e]*{,/**} wklx,\n"
592 +" deny /proc/sys/ne[^t]*{,/**} wklx,\n"
593 +" deny /proc/sys/net?*{,/**} wklx,\n"
594 +" deny /sys/[^fdck]*{,/**} wklx,\n"
595 +" deny /sys/c[^l]*{,/**} wklx,\n"
596 +" deny /sys/cl[^a]*{,/**} wklx,\n"
597 +" deny /sys/cla[^s]*{,/**} wklx,\n"
598 +" deny /sys/clas[^s]*{,/**} wklx,\n"
599 +" deny /sys/class/[^n]*{,/**} wklx,\n"
600 +" deny /sys/class/n[^e]*{,/**} wklx,\n"
601 +" deny /sys/class/ne[^t]*{,/**} wklx,\n"
602 +" deny /sys/class/net?*{,/**} wklx,\n"
603 +" deny /sys/class?*{,/**} wklx,\n"
604 +" deny /sys/d[^e]*{,/**} wklx,\n"
605 +" deny /sys/de[^v]*{,/**} wklx,\n"
606 +" deny /sys/dev[^i]*{,/**} wklx,\n"
607 +" deny /sys/devi[^c]*{,/**} wklx,\n"
608 +" deny /sys/devic[^e]*{,/**} wklx,\n"
609 +" deny /sys/device[^s]*{,/**} wklx,\n"
610 +" deny /sys/devices/[^v]*{,/**} wklx,\n"
611 +" deny /sys/devices/v[^i]*{,/**} wklx,\n"
612 +" deny /sys/devices/vi[^r]*{,/**} wklx,\n"
613 +" deny /sys/devices/vir[^t]*{,/**} wklx,\n"
614 +" deny /sys/devices/virt[^u]*{,/**} wklx,\n"
615 +" deny /sys/devices/virtu[^a]*{,/**} wklx,\n"
616 +" deny /sys/devices/virtua[^l]*{,/**} wklx,\n"
617 +" deny /sys/devices/virtual/[^n]*{,/**} wklx,\n"
618 +" deny /sys/devices/virtual/n[^e]*{,/**} wklx,\n"
619 +" deny /sys/devices/virtual/ne[^t]*{,/**} wklx,\n"
620 +" deny /sys/devices/virtual/net?*{,/**} wklx,\n"
621 +" deny /sys/devices/virtual?*{,/**} wklx,\n"
622 +" deny /sys/devices?*{,/**} wklx,\n"
623 +" deny /sys/f[^s]*{,/**} wklx,\n"
624 +" deny /sys/fs/[^c]*{,/**} wklx,\n"
625 +" deny /sys/fs/c[^g]*{,/**} wklx,\n"
626 +" deny /sys/fs/cg[^r]*{,/**} wklx,\n"
627 +" deny /sys/fs/cgr[^o]*{,/**} wklx,\n"
628 +" deny /sys/fs/cgro[^u]*{,/**} wklx,\n"
629 +" deny /sys/fs/cgrou[^p]*{,/**} wklx,\n"
630 +" deny /sys/fs/cgroup?*{,/**} wklx,\n"
631 +" deny /sys/fs?*{,/**} wklx,\n"
632 +;
633 +
634 +static const char AA_PROFILE_UNIX_SOCKETS[] =
635 +"\n"
636 +" ### Feature: unix\n"
637 +" # Allow receive via unix sockets from anywhere\n"
638 +" unix (receive),\n"
639 +"\n"
640 +" # Allow all unix sockets in the container\n"
641 +" unix peer=(label=@{profile_name}),\n"
642 +;
643 +
644 +static const char AA_PROFILE_CGROUP_NAMESPACES[] =
645 +"\n"
646 +" ### Feature: cgroup namespace\n"
647 +" mount fstype=cgroup -> /sys/fs/cgroup/**,\n"
648 +" mount fstype=cgroup2 -> /sys/fs/cgroup/**,\n"
649 +;
650 +
651 +/* '_BASE' because we still need to append generated change_profile rules */
652 +static const char AA_PROFILE_STACKING_BASE[] =
653 +"\n"
654 +" ### Feature: apparmor stacking\n"
655 +" ### Configuration: apparmor profile loading (in namespace)\n"
656 +" deny /sys/k[^e]*{,/**} wklx,\n"
657 +" deny /sys/ke[^r]*{,/**} wklx,\n"
658 +" deny /sys/ker[^n]*{,/**} wklx,\n"
659 +" deny /sys/kern[^e]*{,/**} wklx,\n"
660 +" deny /sys/kerne[^l]*{,/**} wklx,\n"
661 +" deny /sys/kernel/[^s]*{,/**} wklx,\n"
662 +" deny /sys/kernel/s[^e]*{,/**} wklx,\n"
663 +" deny /sys/kernel/se[^c]*{,/**} wklx,\n"
664 +" deny /sys/kernel/sec[^u]*{,/**} wklx,\n"
665 +" deny /sys/kernel/secu[^r]*{,/**} wklx,\n"
666 +" deny /sys/kernel/secur[^i]*{,/**} wklx,\n"
667 +" deny /sys/kernel/securi[^t]*{,/**} wklx,\n"
668 +" deny /sys/kernel/securit[^y]*{,/**} wklx,\n"
669 +" deny /sys/kernel/security/[^a]*{,/**} wklx,\n"
670 +" deny /sys/kernel/security/a[^p]*{,/**} wklx,\n"
671 +" deny /sys/kernel/security/ap[^p]*{,/**} wklx,\n"
672 +" deny /sys/kernel/security/app[^a]*{,/**} wklx,\n"
673 +" deny /sys/kernel/security/appa[^r]*{,/**} wklx,\n"
674 +" deny /sys/kernel/security/appar[^m]*{,/**} wklx,\n"
675 +" deny /sys/kernel/security/apparm[^o]*{,/**} wklx,\n"
676 +" deny /sys/kernel/security/apparmo[^r]*{,/**} wklx,\n"
677 +" deny /sys/kernel/security/apparmor?*{,/**} wklx,\n"
678 +" deny /sys/kernel/security?*{,/**} wklx,\n"
679 +" deny /sys/kernel?*{,/**} wklx,\n"
680 +;
681 +
682 +static const char AA_PROFILE_NO_STACKING[] =
683 +"\n"
684 +" ### Feature: apparmor stacking (not present)\n"
685 +" deny /sys/k*{,/**} rwklx,\n"
686 +;
687 +
688 +/* '_BASE' because we need to append change_profile for stacking */
689 +static const char AA_PROFILE_NESTING_BASE[] =
690 +"\n"
691 +" ### Configuration: nesting\n"
692 +" pivot_root,\n"
693 +" ptrace,\n"
694 +" signal,\n"
695 +"\n"
696 + /* NOTE: See conf.c's "nesting_helpers" for details. */
697 +" deny /dev/.lxc/proc/** rw,\n"
698 +" deny /dev/.lxc/sys/** rw,\n"
699 +"\n"
700 +" mount fstype=proc -> /usr/lib/*/lxc/**,\n"
701 +" mount fstype=sysfs -> /usr/lib/*/lxc/**,\n"
702 +" mount options=(rw,bind),\n"
703 +" mount options=(rw,rbind),\n"
704 +" mount options=(rw,make-rshared),\n"
705 +"\n"
706 + /* FIXME: What's the state here on apparmor's side? */
707 +" # there doesn't seem to be a way to ask for:\n"
708 +" # mount options=(ro,nosuid,nodev,noexec,remount,bind),\n"
709 +" # as we always get mount to $cdir/proc/sys with those flags denied\n"
710 +" # So allow all mounts until that is straightened out:\n"
711 +" mount,\n"
712 +;
713 +
714 +static const char AA_PROFILE_UNPRIVILEGED[] =
715 +"\n"
716 +" ### Configuration: unprivileged container\n"
717 +" pivot_root,\n"
718 +"\n"
719 +" # Allow modifying mount propagation\n"
720 +" mount options=(rw,make-slave) -> **,\n"
721 +" mount options=(rw,make-rslave) -> **,\n"
722 +" mount options=(rw,make-shared) -> **,\n"
723 +" mount options=(rw,make-rshared) -> **,\n"
724 +" mount options=(rw,make-private) -> **,\n"
725 +" mount options=(rw,make-rprivate) -> **,\n"
726 +" mount options=(rw,make-unbindable) -> **,\n"
727 +" mount options=(rw,make-runbindable) -> **,\n"
728 +"\n"
729 +" # Allow all bind-mounts\n"
730 +" mount options=(rw,bind),\n"
731 +" mount options=(rw,rbind),\n"
732 +"\n"
733 +" # Allow remounting things read-only\n"
734 +" mount options=(ro,remount),\n"
735 +;
736
737 static bool check_mount_feature_enabled(void)
738 {
739 @@ -144,11 +477,6 @@ static bool apparmor_am_unconfined(void)
740 return ret;
741 }
742
743 -/* aa stacking is not yet supported */
744 -static bool aa_stacking_supported(void) {
745 - return false;
746 -}
747 -
748 static bool aa_needs_transition(char *curlabel)
749 {
750 if (!curlabel)
751 @@ -160,61 +488,546 @@ static bool aa_needs_transition(char *curlabel)
752 return true;
753 }
754
755 +static inline void uint64hex(char *buf, uint64_t num)
756 +{
757 + size_t i;
758 +
759 + buf[16] = 0;
760 + for (i = 16; i--;) {
761 + char c = (char)(num & 0xf);
762 + buf[i] = c + (c < 0xa ? '0' : 'a' - 0xa);
763 + num >>= 4;
764 + }
765 +}
766 +
767 +static inline char *shorten_apparmor_name(char *name)
768 +{
769 + size_t len = strlen(name);
770 + if (len + 7 > 253) {
771 + uint64_t hash;
772 + hash = fnv_64a_buf(name, len, FNV1A_64_INIT);
773 + name = must_realloc(name, 16 + 1);
774 + uint64hex(name, hash);
775 + }
776 +
777 + return name;
778 +}
779 +
780 +/* Replace slashes with hyphens */
781 +static inline void sanitize_path(char *path)
782 +{
783 + size_t i;
784 +
785 + for (i = 0; path[i]; i++)
786 + if (path[i] == '/')
787 + path[i] = '-';
788 +}
789 +
790 +static inline char *apparmor_dir(const char *ctname, const char *lxcpath)
791 +{
792 + return must_make_path(lxcpath, ctname, "apparmor", NULL);
793 +}
794 +
795 +
796 +static inline char *apparmor_profile_full(const char *ctname, const char *lxcpath)
797 +{
798 + return shorten_apparmor_name(must_concat("lxc-", ctname, "_<", lxcpath, ">", NULL));
799 +}
800 +
801 +/* Like apparmor_profile_full() but with slashes replaced by hyphens */
802 +static inline char *apparmor_namespace(const char *ctname, const char *lxcpath)
803 +{
804 + char *full;
805 +
806 + full = apparmor_profile_full(ctname, lxcpath);
807 + sanitize_path(full);
808 +
809 + return full;
810 +}
811 +
812 +/* FIXME: This is currently run only in the context of a constructor (via the
813 + * initial lsm_init() called due to its __attribute__((constructor)), so we
814 + * do not have ERROR/... macros available, so there are some fprintf(stderr)s
815 + * in there.
816 + */
817 +static bool check_apparmor_parser_version()
818 +{
819 + struct lxc_popen_FILE *parserpipe;
820 + int rc;
821 + int major = 0, minor = 0, micro = 0;
822 +
823 + parserpipe = lxc_popen("apparmor_parser --version");
824 + if (!parserpipe) {
825 + fprintf(stderr, "Failed to run check for apparmor_parser\n");
826 + return false;
827 + }
828 +
829 + rc = fscanf(parserpipe->f, "AppArmor parser version %d.%d.%d", &major, &minor, &micro);
830 + if (rc < 1) {
831 + lxc_pclose(parserpipe);
832 + /* We stay silent for now as this most likely means the shell
833 + * lxc_popen executed failed to find the apparmor_parser binary.
834 + * See the FIXME comment above for details.
835 + */
836 + return false;
837 + }
838 +
839 + rc = lxc_pclose(parserpipe);
840 + if (rc < 0) {
841 + fprintf(stderr, "Error waiting for child process\n");
842 + return false;
843 + }
844 + if (rc != 0) {
845 + fprintf(stderr, "'apparmor_parser --version' executed with an error status\n");
846 + return false;
847 + }
848 +
849 + aa_supports_unix = (major > 2) ||
850 + (major == 2 && minor > 10) ||
851 + (major == 2 && minor == 10 && micro >= 95);
852 +
853 + return true;
854 +}
855 +
856 +static bool file_is_yes(const char *path)
857 +{
858 + ssize_t rd;
859 + int fd;
860 + char buf[8]; /* we actually just expect "yes" or "no" */
861 +
862 + fd = open(path, O_RDONLY | O_CLOEXEC);
863 + if (fd < 0)
864 + return false;
865 +
866 + rd = read(fd, buf, sizeof(buf));
867 + close(fd);
868 +
869 + return rd >= 4 && strncmp(buf, "yes\n", 4) == 0;
870 +}
871 +
872 +static bool apparmor_can_stack()
873 +{
874 + int major, minor, scanned;
875 + FILE *f;
876 +
877 + if (!file_is_yes("/sys/kernel/security/apparmor/features/domain/stack"))
878 + return false;
879 +
880 + f = fopen_cloexec("/sys/kernel/security/apparmor/features/domain/version", "r");
881 + if (!f)
882 + return false;
883 +
884 + scanned = fscanf(f, "%d.%d", &major, &minor);
885 + fclose(f);
886 + if (scanned != 2)
887 + return false;
888 +
889 + return major > 1 || (major == 1 && minor >= 2);
890 +}
891 +
892 +static void must_append_sized_full(char **buf, size_t *bufsz, const char *data,
893 + size_t size, bool append_newline)
894 +{
895 + size_t newsize = *bufsz + size;
896 +
897 + if (append_newline)
898 + ++newsize;
899 +
900 + *buf = must_realloc(*buf, newsize);
901 + memcpy(*buf + *bufsz, data, size);
902 +
903 + if (append_newline)
904 + (*buf)[newsize - 1] = '\n';
905 +
906 + *bufsz = newsize;
907 +}
908 +
909 +static void must_append_sized(char **buf, size_t *bufsz, const char *data, size_t size)
910 +{
911 + return must_append_sized_full(buf, bufsz, data, size, false);
912 +}
913 +
914 +static bool is_privileged(struct lxc_conf *conf)
915 +{
916 + return lxc_list_empty(&conf->id_map);
917 +}
918 +
919 +static char *get_apparmor_profile_content(struct lxc_conf *conf, const char *lxcpath)
920 +{
921 + char *profile, *profile_name_full;
922 + size_t size;
923 + struct lxc_list *it;
924 +
925 + profile_name_full = apparmor_profile_full(conf->name, lxcpath);
926 +
927 + profile = must_concat(
928 +"#include <tunables/global>\n"
929 +"profile \"", profile_name_full, "\" flags=(attach_disconnected,mediate_deleted) {\n",
930 + NULL);
931 + size = strlen(profile);
932 +
933 + must_append_sized(&profile, &size, AA_PROFILE_BASE,
934 + sizeof(AA_PROFILE_BASE) - 1);
935 +
936 + if (aa_supports_unix)
937 + must_append_sized(&profile, &size, AA_PROFILE_UNIX_SOCKETS,
938 + sizeof(AA_PROFILE_UNIX_SOCKETS) - 1);
939 +
940 + if (file_exists("/proc/self/ns/cgroup"))
941 + must_append_sized(&profile, &size, AA_PROFILE_CGROUP_NAMESPACES,
942 + sizeof(AA_PROFILE_CGROUP_NAMESPACES) - 1);
943 +
944 + if (aa_can_stack && !aa_is_stacked) {
945 + char *namespace, *temp;
946 +
947 + must_append_sized(&profile, &size, AA_PROFILE_STACKING_BASE,
948 + sizeof(AA_PROFILE_STACKING_BASE) - 1);
949 +
950 + namespace = apparmor_namespace(conf->name, lxcpath);
951 + temp = must_concat(" change_profile -> \":", namespace, ":*\",\n"
952 + " change_profile -> \":", namespace, "://*\",\n",
953 + NULL);
954 + free(namespace);
955 +
956 + must_append_sized(&profile, &size, temp, strlen(temp));
957 + free(temp);
958 + } else {
959 + must_append_sized(&profile, &size, AA_PROFILE_NO_STACKING,
960 + sizeof(AA_PROFILE_NO_STACKING) - 1);
961 + }
962 +
963 + if (conf->lsm_aa_allow_nesting) {
964 + must_append_sized(&profile, &size, AA_PROFILE_NESTING_BASE,
965 + sizeof(AA_PROFILE_NESTING_BASE) - 1);
966 +
967 + if (!aa_can_stack || aa_is_stacked) {
968 + char *temp;
969 +
970 + temp = must_concat(" change_profile -> \"",
971 + profile_name_full, "\",\n", NULL);
972 + must_append_sized(&profile, &size, temp, strlen(temp));
973 + free(temp);
974 + }
975 + }
976 +
977 + if (!is_privileged(conf) || am_host_unpriv())
978 + must_append_sized(&profile, &size, AA_PROFILE_UNPRIVILEGED,
979 + sizeof(AA_PROFILE_UNPRIVILEGED) - 1);
980 +
981 + lxc_list_for_each(it, &conf->lsm_aa_raw) {
982 + const char *line = it->elem;
983 +
984 + must_append_sized_full(&profile, &size, line, strlen(line), true);
985 + }
986 +
987 + /* include terminating \0 byte */
988 + must_append_sized(&profile, &size, "}\n", 3);
989 +
990 + free(profile_name_full);
991 +
992 + return profile;
993 +}
994 +
995 /*
996 - * apparmor_process_label_set: Set AppArmor process profile
997 - *
998 - * @label : the profile to set
999 - * @conf : the container configuration to use if @label is NULL
1000 - * @default : use the default profile if @label is NULL
1001 - * @on_exec : this is ignored. Apparmor profile will be changed immediately
1002 - *
1003 - * Returns 0 on success, < 0 on failure
1004 - *
1005 - * Notes: This relies on /proc being available.
1006 + * apparmor_parser creates a cache file using the parsed file's name as a name.
1007 + * This means there may be multiple containers with the same name but different
1008 + * lxcpaths. Therefore we need a sanitized version of the complete profile name
1009 + * as profile file-name.
1010 + * We already get this exactly from apparmor_namespace().
1011 */
1012 -static int apparmor_process_label_set(const char *inlabel, struct lxc_conf *conf,
1013 - bool use_default, bool on_exec)
1014 +static char *make_apparmor_profile_path(const char *ctname, const char *lxcpath)
1015 {
1016 - int label_fd, ret;
1017 - pid_t tid;
1018 - const char *label = inlabel ? inlabel : conf->lsm_aa_profile;
1019 - char *curlabel;
1020 + char *ret, *filename;
1021
1022 - if (!aa_enabled)
1023 - return 0;
1024 + filename = apparmor_namespace(ctname, lxcpath);
1025 + ret = must_make_path(lxcpath, ctname, "apparmor", filename, NULL);
1026 + free(filename);
1027 +
1028 + return ret;
1029 +}
1030 +
1031 +static char *make_apparmor_namespace_path(const char *ctname, const char *lxcpath)
1032 +{
1033 + char *ret, *namespace;
1034 +
1035 + namespace = apparmor_namespace(ctname, lxcpath);
1036 + ret = must_make_path("/sys/kernel/security/apparmor/policy/namespaces", namespace, NULL);
1037 + free(namespace);
1038 +
1039 + return ret;
1040 +}
1041 +
1042 +static bool make_apparmor_namespace(struct lxc_conf *conf, const char *lxcpath)
1043 +{
1044 + char *path;
1045 +
1046 + if (!aa_can_stack || aa_is_stacked)
1047 + return true;
1048 +
1049 + path = make_apparmor_namespace_path(conf->name, lxcpath);
1050 + errno = 0;
1051 + if (mkdir(path, 0755) < 0 && errno != EEXIST) {
1052 + SYSERROR("Error creating AppArmor namespace: %s", path);
1053 + free(path);
1054 + return false;
1055 + }
1056 + free(path);
1057 +
1058 + return true;
1059 +}
1060 +
1061 +static void remove_apparmor_namespace(struct lxc_conf *conf, const char *lxcpath)
1062 +{
1063 + char *path;
1064 +
1065 + path = make_apparmor_namespace_path(conf->name, lxcpath);
1066 + if (rmdir(path) != 0)
1067 + SYSERROR("Error removing AppArmor namespace");
1068 + free(path);
1069 +}
1070 +
1071 +struct apparmor_parser_args {
1072 + char cmd;
1073 + char *file;
1074 +};
1075 +
1076 +static int apparmor_parser_exec(void *data)
1077 +{
1078 + struct apparmor_parser_args *args = data;
1079 + char cmdbuf[] = { '-', args->cmd, 'W', 'L', 0 };
1080 +
1081 + execlp("apparmor_parser", "apparmor_parser", cmdbuf, APPARMOR_CACHE_DIR, args->file, NULL);
1082 +
1083 + return -1;
1084 +}
1085 +
1086 +static int run_apparmor_parser(char command,
1087 + struct lxc_conf *conf,
1088 + const char *lxcpath)
1089 +{
1090 + char output[MAXPATHLEN];
1091 + int ret;
1092 + struct apparmor_parser_args args = {
1093 + .cmd = command,
1094 + .file = make_apparmor_profile_path(conf->name, lxcpath),
1095 + };
1096 +
1097 + ret = run_command(output, sizeof(output), apparmor_parser_exec, (void*)&args);
1098 + if (ret < 0) {
1099 + ERROR("Failed to run apparmor_parser on \"%s\": %s", args.file, output);
1100 + ret = -1;
1101 + }
1102 +
1103 +
1104 + free(args.file);
1105 + return ret;
1106 +}
1107 +
1108 +static void remove_apparmor_profile(struct lxc_conf *conf, const char *lxcpath)
1109 +{
1110 + char *path;
1111 +
1112 + /* It's ok if these deletes fail: if the container was never started,
1113 + * we'll have never written a profile or cached it.
1114 + */
1115 +
1116 + path = make_apparmor_profile_path(conf->name, lxcpath);
1117 + (void)unlink(path);
1118 + free(path);
1119 +
1120 + /* Also remove the apparmor/ subdirectory */
1121 + path = apparmor_dir(conf->name, lxcpath);
1122 + (void)rmdir(path);
1123 + free(path);
1124 +}
1125 +
1126 +static int load_apparmor_profile(struct lxc_conf *conf, const char *lxcpath)
1127 +{
1128 + struct stat profile_sb;
1129 + size_t content_len;
1130 + int ret = -1;
1131 + size_t old_len = 0;
1132 + char *profile_path = NULL, *old_content = NULL, *new_content = NULL;
1133 + int profile_fd = -1;
1134 +
1135 + if (!make_apparmor_namespace(conf, lxcpath))
1136 + return -1;
1137 +
1138 + /* In order to avoid forcing a profile parse (potentially slow) on
1139 + * every container start, let's use apparmor's binary policy cache,
1140 + * which checks mtime of the files to figure out if the policy needs to
1141 + * be regenerated.
1142 + *
1143 + * Since it uses mtimes, we shouldn't just always write out our local
1144 + * apparmor template; instead we should check to see whether the
1145 + * template is the same as ours. If it isn't we should write our
1146 + * version out so that the new changes are reflected and we definitely
1147 + * force a recompile.
1148 + */
1149 +
1150 + profile_path = make_apparmor_profile_path(conf->name, lxcpath);
1151 + profile_fd = open(profile_path, O_RDONLY | O_CLOEXEC);
1152 + if (profile_fd >= 0) {
1153 + if (fstat(profile_fd, &profile_sb) < 0) {
1154 + SYSERROR("Error accessing old profile from %s",
1155 + profile_path);
1156 + goto out;
1157 + }
1158 + old_len = profile_sb.st_size;
1159 + old_content = lxc_strmmap(NULL, old_len, PROT_READ,
1160 + MAP_PRIVATE, profile_fd, 0);
1161 + if (!old_content) {
1162 + SYSERROR("Failed to mmap old profile from %s",
1163 + profile_path);
1164 + goto out;
1165 + }
1166 + } else if (errno != ENOENT) {
1167 + SYSERROR("Error reading old profile from %s", profile_path);
1168 + goto out;
1169 + }
1170 +
1171 + new_content = get_apparmor_profile_content(conf, lxcpath);
1172 + if (!new_content)
1173 + goto out;
1174 +
1175 + content_len = strlen(new_content);
1176 +
1177 + if (!old_content || old_len != content_len || memcmp(old_content, new_content, content_len) != 0) {
1178 + char *path;
1179 +
1180 + ret = mkdir_p(APPARMOR_CACHE_DIR, 0755);
1181 + if (ret < 0) {
1182 + SYSERROR("Error creating AppArmor profile cache directory " APPARMOR_CACHE_DIR);
1183 + goto out;
1184 + }
1185 +
1186 + path = apparmor_dir(conf->name, lxcpath);
1187 + ret = mkdir_p(path, 0755);
1188 + if (ret < 0) {
1189 + SYSERROR("Error creating AppArmor profile directory: %s", path);
1190 + free(path);
1191 + goto out;
1192 + }
1193 + free(path);
1194 +
1195 + ret = lxc_write_to_file(profile_path, new_content, content_len, false, 0600);
1196 + if (ret < 0) {
1197 + SYSERROR("Error writing profile to %s", profile_path);
1198 + goto out;
1199 + }
1200 + }
1201 +
1202 + ret = run_apparmor_parser(AA_CMD_LOAD, conf, lxcpath);
1203 + if (ret != 0)
1204 + goto out_remove_profile;
1205 +
1206 + conf->lsm_aa_profile_created = true;
1207 +
1208 + goto out_ok;
1209 +
1210 +out_remove_profile:
1211 + remove_apparmor_profile(conf, lxcpath);
1212 +out:
1213 + remove_apparmor_namespace(conf, lxcpath);
1214 +out_ok:
1215 + if (profile_fd >= 0) {
1216 + if (old_content)
1217 + lxc_strmunmap(old_content, old_len);
1218 + close(profile_fd);
1219 + }
1220 + free(profile_path);
1221 + free(new_content);
1222 + return ret;
1223 +}
1224 +
1225 +/*
1226 + * Ensure that the container's policy namespace is unloaded to free kernel
1227 + * memory. This does not delete the policy from disk or cache.
1228 + */
1229 +static void apparmor_cleanup(struct lxc_conf *conf, const char *lxcpath)
1230 +{
1231 + if (!aa_admin)
1232 + return;
1233 +
1234 + if (!conf->lsm_aa_profile_created)
1235 + return;
1236 +
1237 + remove_apparmor_namespace(conf, lxcpath);
1238 + (void)run_apparmor_parser(AA_CMD_UNLOAD, conf, lxcpath);
1239 +
1240 + remove_apparmor_profile(conf, lxcpath);
1241 +}
1242 +
1243 +static int apparmor_prepare(struct lxc_conf *conf, const char *lxcpath)
1244 +{
1245 + int ret = -1;
1246 + const char *label;
1247 + char *curlabel = NULL, *genlabel = NULL;
1248 +
1249 + if (!aa_enabled) {
1250 + ERROR("AppArmor not enabled");
1251 + return -1;
1252 + }
1253 +
1254 + label = conf->lsm_aa_profile;
1255
1256 /* user may request that we just ignore apparmor */
1257 if (label && strcmp(label, AA_UNCHANGED) == 0) {
1258 - INFO("apparmor profile unchanged per user request");
1259 + INFO("AppArmor profile unchanged per user request");
1260 + conf->lsm_aa_profile_computed = must_copy_string(label);
1261 return 0;
1262 }
1263
1264 + if (label && strcmp(label, AA_GENERATED) == 0) {
1265 + if (!aa_parser_available) {
1266 + ERROR("Cannot use generated profile: apparmor_parser not available");
1267 + goto out;
1268 + }
1269 +
1270 + /* auto-generate profile based on available/requested security features */
1271 + if (load_apparmor_profile(conf, lxcpath) != 0) {
1272 + ERROR("Failed to load generated AppArmor profile");
1273 + goto out;
1274 + }
1275 +
1276 + genlabel = apparmor_profile_full(conf->name, lxcpath);
1277 + if (!genlabel) {
1278 + ERROR("Failed to build AppArmor profile name");
1279 + goto out;
1280 + }
1281 +
1282 + if (aa_can_stack && !aa_is_stacked) {
1283 + char *namespace = apparmor_namespace(conf->name, lxcpath);
1284 + size_t llen = strlen(genlabel);
1285 + must_append_sized(&genlabel, &llen, "//&:", sizeof("//&:") - 1);
1286 + must_append_sized(&genlabel, &llen, namespace, strlen(namespace));
1287 + must_append_sized(&genlabel, &llen, ":", sizeof(":")); /* with the nul byte */
1288 + free(namespace);
1289 + }
1290 +
1291 + label = genlabel;
1292 + }
1293 +
1294 curlabel = apparmor_process_label_get(lxc_raw_getpid());
1295
1296 - if (!aa_stacking_supported() && aa_needs_transition(curlabel)) {
1297 + if (!aa_can_stack && aa_needs_transition(curlabel)) {
1298 /* we're already confined, and stacking isn't supported */
1299
1300 if (!label || strcmp(curlabel, label) == 0) {
1301 /* no change requested */
1302 - free(curlabel);
1303 - return 0;
1304 + ret = 0;
1305 + goto out;
1306 }
1307
1308 - ERROR("already apparmor confined, but new label requested.");
1309 - free(curlabel);
1310 - return -1;
1311 + ERROR("Already AppArmor confined, but new label requested.");
1312 + goto out;
1313 }
1314 - free(curlabel);
1315
1316 if (!label) {
1317 - if (use_default) {
1318 - if (cgns_supported())
1319 - label = AA_DEF_PROFILE_CGNS;
1320 - else
1321 - label = AA_DEF_PROFILE;
1322 - }
1323 + if (cgns_supported())
1324 + label = AA_DEF_PROFILE_CGNS;
1325 else
1326 - label = "unconfined";
1327 + label = AA_DEF_PROFILE;
1328 }
1329
1330 if (!check_mount_feature_enabled() && strcmp(label, "unconfined") != 0) {
1331 @@ -223,30 +1036,78 @@ static int apparmor_process_label_set(const char *inlabel, struct lxc_conf *conf
1332 ERROR("If you really want to start this container, set");
1333 ERROR("lxc.apparmor.allow_incomplete = 1");
1334 ERROR("in your container configuration file");
1335 - return -1;
1336 + goto out;
1337 }
1338 }
1339
1340 + conf->lsm_aa_profile_computed = must_copy_string(label);
1341 + ret = 0;
1342 +
1343 +out:
1344 + if (genlabel) {
1345 + free(genlabel);
1346 + if (ret != 0)
1347 + apparmor_cleanup(conf, lxcpath);
1348 + }
1349 + free(curlabel);
1350 + return ret;
1351 +}
1352 +
1353 +/*
1354 + * apparmor_process_label_set: Set AppArmor process profile
1355 + *
1356 + * @label : the profile to set
1357 + * @conf : the container configuration to use if @label is NULL
1358 + * @default : use the default profile if @label is NULL
1359 + * @on_exec : this is ignored. Apparmor profile will be changed immediately
1360 + *
1361 + * Returns 0 on success, < 0 on failure
1362 + *
1363 + * Notes: This relies on /proc being available.
1364 + */
1365 +static int apparmor_process_label_set(const char *inlabel, struct lxc_conf *conf,
1366 + bool on_exec)
1367 +{
1368 + int label_fd, ret;
1369 + pid_t tid;
1370 + const char *label;
1371 +
1372 + if (!aa_enabled) {
1373 + ERROR("AppArmor not enabled");
1374 + return -1;
1375 + }
1376 +
1377 + label = inlabel ? inlabel : conf->lsm_aa_profile_computed;
1378 + if (!label) {
1379 + ERROR("LSM wasn't prepared");
1380 + return -1;
1381 + }
1382 +
1383 + /* user may request that we just ignore apparmor */
1384 + if (strcmp(label, AA_UNCHANGED) == 0) {
1385 + INFO("AppArmor profile unchanged per user request");
1386 + return 0;
1387 + }
1388
1389 if (strcmp(label, "unconfined") == 0 && apparmor_am_unconfined()) {
1390 - INFO("apparmor profile unchanged");
1391 + INFO("AppArmor profile unchanged");
1392 return 0;
1393 }
1394 tid = lxc_raw_gettid();
1395 label_fd = lsm_process_label_fd_get(tid, on_exec);
1396 if (label_fd < 0) {
1397 - SYSERROR("Failed to change apparmor profile to %s", label);
1398 + SYSERROR("Failed to change AppArmor profile to %s", label);
1399 return -1;
1400 }
1401
1402 ret = lsm_process_label_set_at(label_fd, label, on_exec);
1403 close(label_fd);
1404 if (ret < 0) {
1405 - ERROR("Failed to change apparmor profile to %s", label);
1406 + ERROR("Failed to change AppArmor profile to %s", label);
1407 return -1;
1408 }
1409
1410 - INFO("Changed apparmor profile to %s", label);
1411 + INFO("Changed AppArmor profile to %s", label);
1412 return 0;
1413 }
1414
1415 @@ -255,12 +1116,39 @@ static struct lsm_drv apparmor_drv = {
1416 .enabled = apparmor_enabled,
1417 .process_label_get = apparmor_process_label_get,
1418 .process_label_set = apparmor_process_label_set,
1419 + .prepare = apparmor_prepare,
1420 + .cleanup = apparmor_cleanup,
1421 };
1422
1423 struct lsm_drv *lsm_apparmor_drv_init(void)
1424 {
1425 + bool have_mac_admin = false;
1426 +
1427 if (!apparmor_enabled())
1428 return NULL;
1429 +
1430 + /* We only support generated profiles when apparmor_parser is usable */
1431 + if (!check_apparmor_parser_version())
1432 + goto out;
1433 +
1434 + aa_parser_available = true;
1435 +
1436 + aa_can_stack = apparmor_can_stack();
1437 + if (aa_can_stack)
1438 + aa_is_stacked = file_is_yes("/sys/kernel/security/apparmor/.ns_stacked");
1439 +
1440 + #if HAVE_LIBCAP
1441 + have_mac_admin = lxc_proc_cap_is_set(CAP_SETGID, CAP_EFFECTIVE);
1442 + #endif
1443 +
1444 + if (!have_mac_admin)
1445 + WARN("Per-container AppArmor profiles are disabled because the mac_admin capability is missing");
1446 + else if (am_host_unpriv() && !aa_is_stacked)
1447 + WARN("Per-container AppArmor profiles are disabled because LXC is running in an unprivileged container without stacking");
1448 + else
1449 + aa_admin = true;
1450 +
1451 +out:
1452 aa_enabled = 1;
1453 return &apparmor_drv;
1454 }
1455 diff --git a/src/lxc/lsm/lsm.c b/src/lxc/lsm/lsm.c
1456 index 8d7de2db..46e21206 100644
1457 --- a/src/lxc/lsm/lsm.c
1458 +++ b/src/lxc/lsm/lsm.c
1459 @@ -177,11 +177,37 @@ on_error:
1460 }
1461
1462 int lsm_process_label_set(const char *label, struct lxc_conf *conf,
1463 - bool use_default, bool on_exec)
1464 + bool on_exec)
1465 {
1466 if (!drv) {
1467 ERROR("LSM driver not inited");
1468 return -1;
1469 }
1470 - return drv->process_label_set(label, conf, use_default, on_exec);
1471 + return drv->process_label_set(label, conf, on_exec);
1472 +}
1473 +
1474 +int lsm_process_prepare(struct lxc_conf *conf, const char *lxcpath)
1475 +{
1476 + if (!drv) {
1477 + ERROR("LSM driver not inited");
1478 + return 0;
1479 + }
1480 +
1481 + if (!drv->prepare)
1482 + return 0;
1483 +
1484 + return drv->prepare(conf, lxcpath);
1485 +}
1486 +
1487 +void lsm_process_cleanup(struct lxc_conf *conf, const char *lxcpath)
1488 +{
1489 + if (!drv) {
1490 + ERROR("LSM driver not inited");
1491 + return;
1492 + }
1493 +
1494 + if (!drv->cleanup)
1495 + return;
1496 +
1497 + drv->cleanup(conf, lxcpath);
1498 }
1499 diff --git a/src/lxc/lsm/lsm.h b/src/lxc/lsm/lsm.h
1500 index cafb2ac7..52e656d6 100644
1501 --- a/src/lxc/lsm/lsm.h
1502 +++ b/src/lxc/lsm/lsm.h
1503 @@ -38,17 +38,21 @@ struct lsm_drv {
1504 int (*enabled)(void);
1505 char *(*process_label_get)(pid_t pid);
1506 int (*process_label_set)(const char *label, struct lxc_conf *conf,
1507 - bool use_default, bool on_exec);
1508 + bool on_exec);
1509 + int (*prepare)(struct lxc_conf *conf, const char *lxcpath);
1510 + void (*cleanup)(struct lxc_conf *conf, const char *lxcpath);
1511 };
1512
1513 extern void lsm_init(void);
1514 extern int lsm_enabled(void);
1515 extern const char *lsm_name(void);
1516 extern char *lsm_process_label_get(pid_t pid);
1517 +extern int lsm_process_prepare(struct lxc_conf *conf, const char *lxcpath);
1518 extern int lsm_process_label_set(const char *label, struct lxc_conf *conf,
1519 - bool use_default, bool on_exec);
1520 + bool on_exec);
1521 extern int lsm_process_label_fd_get(pid_t pid, bool on_exec);
1522 extern int lsm_process_label_set_at(int label_fd, const char *label,
1523 bool on_exec);
1524 +extern void lsm_process_cleanup(struct lxc_conf *conf, const char *lxcpath);
1525
1526 #endif /* __LXC_LSM_H */
1527 diff --git a/src/lxc/lsm/nop.c b/src/lxc/lsm/nop.c
1528 index 7bb8121b..9397f2bf 100644
1529 --- a/src/lxc/lsm/nop.c
1530 +++ b/src/lxc/lsm/nop.c
1531 @@ -30,7 +30,7 @@ static char *nop_process_label_get(pid_t pid)
1532 }
1533
1534 static int nop_process_label_set(const char *label, struct lxc_conf *conf,
1535 - bool use_default, bool on_exec)
1536 + bool on_exec)
1537 {
1538 return 0;
1539 }
1540 diff --git a/src/lxc/lsm/selinux.c b/src/lxc/lsm/selinux.c
1541 index c88c18e3..9f7b7bc3 100644
1542 --- a/src/lxc/lsm/selinux.c
1543 +++ b/src/lxc/lsm/selinux.c
1544 @@ -75,15 +75,13 @@ static char *selinux_process_label_get(pid_t pid)
1545 * Notes: This relies on /proc being available.
1546 */
1547 static int selinux_process_label_set(const char *inlabel, struct lxc_conf *conf,
1548 - bool use_default, bool on_exec)
1549 + bool on_exec)
1550 {
1551 int ret;
1552 const char *label;
1553
1554 label = inlabel ? inlabel : conf->lsm_se_context;
1555 if (!label) {
1556 - if (!use_default)
1557 - return -EINVAL;
1558
1559 label = DEFAULT_LABEL;
1560 }
1561 diff --git a/src/lxc/start.c b/src/lxc/start.c
1562 index 3343f9bf..6d6dc7ee 100644
1563 --- a/src/lxc/start.c
1564 +++ b/src/lxc/start.c
1565 @@ -863,9 +863,19 @@ int lxc_init(const char *name, struct lxc_handler *handler)
1566 }
1567 TRACE("Initialized cgroup driver");
1568
1569 + ret = lsm_process_prepare(conf, handler->lxcpath);
1570 + if (ret < 0) {
1571 + ERROR("Failed to initialize LSM");
1572 + goto out_destroy_cgroups;
1573 + }
1574 + TRACE("Initialized LSM");
1575 +
1576 INFO("Container \"%s\" is initialized", name);
1577 return 0;
1578
1579 +out_destroy_cgroups:
1580 + handler->cgroup_ops->destroy(handler->cgroup_ops, handler);
1581 +
1582 out_delete_terminal:
1583 lxc_terminal_delete(&handler->conf->console);
1584
1585 @@ -956,6 +966,8 @@ void lxc_fini(const char *name, struct lxc_handler *handler)
1586 while (namespace_count--)
1587 free(namespaces[namespace_count]);
1588
1589 + lsm_process_cleanup(handler->conf, handler->lxcpath);
1590 +
1591 cgroup_ops->destroy(cgroup_ops, handler);
1592 cgroup_exit(cgroup_ops);
1593
1594 @@ -1235,7 +1247,7 @@ static int do_start(void *data)
1595 }
1596
1597 /* Set the label to change to when we exec(2) the container's init. */
1598 - ret = lsm_process_label_set(NULL, handler->conf, 1, 1);
1599 + ret = lsm_process_label_set(NULL, handler->conf, true);
1600 if (ret < 0)
1601 goto out_warn_father;
1602
1603 --
1604 2.11.0
1605