1 /* SPDX-License-Identifier: LGPL-2.1+ */
11 #include <sys/types.h>
18 #include "initutils.h"
22 #include "raw_syscalls.h"
25 lxc_log_define(apparmor
, lsm
);
27 /* set by lsm_apparmor_drv_init if true */
28 static int aa_enabled
= 0;
29 static bool aa_parser_available
= false;
30 static bool aa_supports_unix
= false;
31 static bool aa_can_stack
= false;
32 static bool aa_is_stacked
= false;
33 static bool aa_admin
= false;
35 static int mount_features_enabled
= 0;
37 #define AA_DEF_PROFILE "lxc-container-default"
38 #define AA_DEF_PROFILE_CGNS "lxc-container-default-cgns"
39 #define AA_MOUNT_RESTR "/sys/kernel/security/apparmor/features/mount/mask"
40 #define AA_ENABLED_FILE "/sys/module/apparmor/parameters/enabled"
41 #define AA_UNCHANGED "unchanged"
42 #define AA_GENERATED "generated"
44 #define AA_CMD_LOAD 'r'
45 #define AA_CMD_UNLOAD 'R'
46 #define AA_CMD_PARSE 'Q'
48 static const char AA_PROFILE_BASE
[] =
56 " # Allow us to receive signals from anywhere.\n"
57 " signal (receive),\n"
59 " # Allow us to send signals to ourselves\n"
60 " signal peer=@{profile_name},\n"
62 " # Allow other processes to read our /proc entries, futexes, perf tracing and\n"
63 " # kcmp for now (they will need 'read' in the first place). Administrators can\n"
65 " # deny ptrace (readby) ...\n"
68 " # Allow other processes to trace us by default (they will need 'trace' in\n"
69 " # the first place). Administrators can override with:\n"
70 " # deny ptrace (tracedby) ...\n"
71 " ptrace (tracedby),\n"
73 " # Allow us to ptrace ourselves\n"
74 " ptrace peer=@{profile_name},\n"
76 " # ignore DENIED message on / remount\n"
77 " deny mount options=(ro, remount) -> /,\n"
78 " deny mount options=(ro, remount, silent) -> /,\n"
80 " # allow tmpfs mounts everywhere\n"
81 " mount fstype=tmpfs,\n"
83 " # allow hugetlbfs mounts everywhere\n"
84 " mount fstype=hugetlbfs,\n"
86 " # allow mqueue mounts everywhere\n"
87 " mount fstype=mqueue,\n"
89 " # allow fuse mounts everywhere\n"
90 " mount fstype=fuse,\n"
91 " mount fstype=fuse.*,\n"
93 " # deny access under /proc/bus to avoid e.g. messing with pci devices directly\n"
94 " deny @{PROC}/bus/** wklx,\n"
96 " # deny writes in /proc/sys/fs but allow binfmt_misc to be mounted\n"
97 " mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/,\n"
98 " deny @{PROC}/sys/fs/** wklx,\n"
100 " # allow efivars to be mounted, writing to it will be blocked though\n"
101 " mount fstype=efivarfs -> /sys/firmware/efi/efivars/,\n"
103 " # block some other dangerous paths\n"
104 " deny @{PROC}/kcore rwklx,\n"
105 " deny @{PROC}/sysrq-trigger rwklx,\n"
106 " deny @{PROC}/acpi/** rwklx,\n"
108 " # deny writes in /sys except for /sys/fs/cgroup, also allow\n"
109 " # fusectl, securityfs and debugfs to be mounted there (read-only)\n"
110 " mount fstype=fusectl -> /sys/fs/fuse/connections/,\n"
111 " mount fstype=securityfs -> /sys/kernel/security/,\n"
112 " mount fstype=debugfs -> /sys/kernel/debug/,\n"
113 " deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,\n"
114 " mount fstype=proc -> /proc/,\n"
115 " mount fstype=sysfs -> /sys/,\n"
116 " mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,\n"
117 " deny /sys/firmware/efi/efivars/** rwklx,\n"
118 " # note, /sys/kernel/security/** handled below\n"
119 " mount options=(ro, nosuid, nodev, noexec, remount, strictatime) -> /sys/fs/cgroup/,\n"
121 " # deny reads from debugfs\n"
122 " deny /sys/kernel/debug/{,**} rwklx,\n"
124 " # allow paths to be made slave, shared, private or unbindable\n"
125 " # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.\n"
126 "# mount options=(rw,make-slave) -> **,\n"
127 "# mount options=(rw,make-rslave) -> **,\n"
128 "# mount options=(rw,make-shared) -> **,\n"
129 "# mount options=(rw,make-rshared) -> **,\n"
130 "# mount options=(rw,make-private) -> **,\n"
131 "# mount options=(rw,make-rprivate) -> **,\n"
132 "# mount options=(rw,make-unbindable) -> **,\n"
133 "# mount options=(rw,make-runbindable) -> **,\n"
135 "# Allow limited modification of mount propagation\n"
136 " mount options=(rw,make-slave) -> /,\n"
137 " mount options=(rw,make-rslave) -> /,\n"
138 " mount options=(rw,make-shared) -> /,\n"
139 " mount options=(rw,make-rshared) -> /,\n"
140 " mount options=(rw,make-private) -> /,\n"
141 " mount options=(rw,make-rprivate) -> /,\n"
142 " mount options=(rw,make-unbindable) -> /,\n"
143 " mount options=(rw,make-runbindable) -> /,\n"
145 " # allow bind-mounts of anything except /proc, /sys and /dev\n"
146 " mount options=(rw,bind) /[^spd]*{,/**},\n"
147 " mount options=(rw,bind) /d[^e]*{,/**},\n"
148 " mount options=(rw,bind) /de[^v]*{,/**},\n"
149 " mount options=(rw,bind) /dev/.[^l]*{,/**},\n"
150 " mount options=(rw,bind) /dev/.l[^x]*{,/**},\n"
151 " mount options=(rw,bind) /dev/.lx[^c]*{,/**},\n"
152 " mount options=(rw,bind) /dev/.lxc?*{,/**},\n"
153 " mount options=(rw,bind) /dev/[^.]*{,/**},\n"
154 " mount options=(rw,bind) /dev?*{,/**},\n"
155 " mount options=(rw,bind) /p[^r]*{,/**},\n"
156 " mount options=(rw,bind) /pr[^o]*{,/**},\n"
157 " mount options=(rw,bind) /pro[^c]*{,/**},\n"
158 " mount options=(rw,bind) /proc?*{,/**},\n"
159 " mount options=(rw,bind) /s[^y]*{,/**},\n"
160 " mount options=(rw,bind) /sy[^s]*{,/**},\n"
161 " mount options=(rw,bind) /sys?*{,/**},\n"
163 " # Allow rbind-mounts of anything except /, /dev, /proc and /sys\n"
164 " mount options=(rw,rbind) /[^spd]*{,/**},\n"
165 " mount options=(rw,rbind) /d[^e]*{,/**},\n"
166 " mount options=(rw,rbind) /de[^v]*{,/**},\n"
167 " mount options=(rw,rbind) /dev?*{,/**},\n"
168 " mount options=(rw,rbind) /p[^r]*{,/**},\n"
169 " mount options=(rw,rbind) /pr[^o]*{,/**},\n"
170 " mount options=(rw,rbind) /pro[^c]*{,/**},\n"
171 " mount options=(rw,rbind) /proc?*{,/**},\n"
172 " mount options=(rw,rbind) /s[^y]*{,/**},\n"
173 " mount options=(rw,rbind) /sy[^s]*{,/**},\n"
174 " mount options=(rw,rbind) /sys?*{,/**},\n"
176 " # allow moving mounts except for /proc, /sys and /dev\n"
177 " mount options=(rw,move) /[^spd]*{,/**},\n"
178 " mount options=(rw,move) /d[^e]*{,/**},\n"
179 " mount options=(rw,move) /de[^v]*{,/**},\n"
180 " mount options=(rw,move) /dev/.[^l]*{,/**},\n"
181 " mount options=(rw,move) /dev/.l[^x]*{,/**},\n"
182 " mount options=(rw,move) /dev/.lx[^c]*{,/**},\n"
183 " mount options=(rw,move) /dev/.lxc?*{,/**},\n"
184 " mount options=(rw,move) /dev/[^.]*{,/**},\n"
185 " mount options=(rw,move) /dev?*{,/**},\n"
186 " mount options=(rw,move) /p[^r]*{,/**},\n"
187 " mount options=(rw,move) /pr[^o]*{,/**},\n"
188 " mount options=(rw,move) /pro[^c]*{,/**},\n"
189 " mount options=(rw,move) /proc?*{,/**},\n"
190 " mount options=(rw,move) /s[^y]*{,/**},\n"
191 " mount options=(rw,move) /sy[^s]*{,/**},\n"
192 " mount options=(rw,move) /sys?*{,/**},\n"
194 " # generated by: lxc-generate-aa-rules.py container-rules.base\n"
195 " deny /proc/sys/[^kn]*{,/**} wklx,\n"
196 " deny /proc/sys/k[^e]*{,/**} wklx,\n"
197 " deny /proc/sys/ke[^r]*{,/**} wklx,\n"
198 " deny /proc/sys/ker[^n]*{,/**} wklx,\n"
199 " deny /proc/sys/kern[^e]*{,/**} wklx,\n"
200 " deny /proc/sys/kerne[^l]*{,/**} wklx,\n"
201 " deny /proc/sys/kernel/[^smhd]*{,/**} wklx,\n"
202 " deny /proc/sys/kernel/d[^o]*{,/**} wklx,\n"
203 " deny /proc/sys/kernel/do[^m]*{,/**} wklx,\n"
204 " deny /proc/sys/kernel/dom[^a]*{,/**} wklx,\n"
205 " deny /proc/sys/kernel/doma[^i]*{,/**} wklx,\n"
206 " deny /proc/sys/kernel/domai[^n]*{,/**} wklx,\n"
207 " deny /proc/sys/kernel/domain[^n]*{,/**} wklx,\n"
208 " deny /proc/sys/kernel/domainn[^a]*{,/**} wklx,\n"
209 " deny /proc/sys/kernel/domainna[^m]*{,/**} wklx,\n"
210 " deny /proc/sys/kernel/domainnam[^e]*{,/**} wklx,\n"
211 " deny /proc/sys/kernel/domainname?*{,/**} wklx,\n"
212 " deny /proc/sys/kernel/h[^o]*{,/**} wklx,\n"
213 " deny /proc/sys/kernel/ho[^s]*{,/**} wklx,\n"
214 " deny /proc/sys/kernel/hos[^t]*{,/**} wklx,\n"
215 " deny /proc/sys/kernel/host[^n]*{,/**} wklx,\n"
216 " deny /proc/sys/kernel/hostn[^a]*{,/**} wklx,\n"
217 " deny /proc/sys/kernel/hostna[^m]*{,/**} wklx,\n"
218 " deny /proc/sys/kernel/hostnam[^e]*{,/**} wklx,\n"
219 " deny /proc/sys/kernel/hostname?*{,/**} wklx,\n"
220 " deny /proc/sys/kernel/m[^s]*{,/**} wklx,\n"
221 " deny /proc/sys/kernel/ms[^g]*{,/**} wklx,\n"
222 " deny /proc/sys/kernel/msg*/** wklx,\n"
223 " deny /proc/sys/kernel/s[^he]*{,/**} wklx,\n"
224 " deny /proc/sys/kernel/se[^m]*{,/**} wklx,\n"
225 " deny /proc/sys/kernel/sem*/** wklx,\n"
226 " deny /proc/sys/kernel/sh[^m]*{,/**} wklx,\n"
227 " deny /proc/sys/kernel/shm*/** wklx,\n"
228 " deny /proc/sys/kernel?*{,/**} wklx,\n"
229 " deny /proc/sys/n[^e]*{,/**} wklx,\n"
230 " deny /proc/sys/ne[^t]*{,/**} wklx,\n"
231 " deny /proc/sys/net?*{,/**} wklx,\n"
232 " deny /sys/[^fdck]*{,/**} wklx,\n"
233 " deny /sys/c[^l]*{,/**} wklx,\n"
234 " deny /sys/cl[^a]*{,/**} wklx,\n"
235 " deny /sys/cla[^s]*{,/**} wklx,\n"
236 " deny /sys/clas[^s]*{,/**} wklx,\n"
237 " deny /sys/class/[^n]*{,/**} wklx,\n"
238 " deny /sys/class/n[^e]*{,/**} wklx,\n"
239 " deny /sys/class/ne[^t]*{,/**} wklx,\n"
240 " deny /sys/class/net?*{,/**} wklx,\n"
241 " deny /sys/class?*{,/**} wklx,\n"
242 " deny /sys/d[^e]*{,/**} wklx,\n"
243 " deny /sys/de[^v]*{,/**} wklx,\n"
244 " deny /sys/dev[^i]*{,/**} wklx,\n"
245 " deny /sys/devi[^c]*{,/**} wklx,\n"
246 " deny /sys/devic[^e]*{,/**} wklx,\n"
247 " deny /sys/device[^s]*{,/**} wklx,\n"
248 " deny /sys/devices/[^v]*{,/**} wklx,\n"
249 " deny /sys/devices/v[^i]*{,/**} wklx,\n"
250 " deny /sys/devices/vi[^r]*{,/**} wklx,\n"
251 " deny /sys/devices/vir[^t]*{,/**} wklx,\n"
252 " deny /sys/devices/virt[^u]*{,/**} wklx,\n"
253 " deny /sys/devices/virtu[^a]*{,/**} wklx,\n"
254 " deny /sys/devices/virtua[^l]*{,/**} wklx,\n"
255 " deny /sys/devices/virtual/[^n]*{,/**} wklx,\n"
256 " deny /sys/devices/virtual/n[^e]*{,/**} wklx,\n"
257 " deny /sys/devices/virtual/ne[^t]*{,/**} wklx,\n"
258 " deny /sys/devices/virtual/net?*{,/**} wklx,\n"
259 " deny /sys/devices/virtual?*{,/**} wklx,\n"
260 " deny /sys/devices?*{,/**} wklx,\n"
261 " deny /sys/f[^s]*{,/**} wklx,\n"
262 " deny /sys/fs/[^c]*{,/**} wklx,\n"
263 " deny /sys/fs/c[^g]*{,/**} wklx,\n"
264 " deny /sys/fs/cg[^r]*{,/**} wklx,\n"
265 " deny /sys/fs/cgr[^o]*{,/**} wklx,\n"
266 " deny /sys/fs/cgro[^u]*{,/**} wklx,\n"
267 " deny /sys/fs/cgrou[^p]*{,/**} wklx,\n"
268 " deny /sys/fs/cgroup?*{,/**} wklx,\n"
269 " deny /sys/fs?*{,/**} wklx,\n"
272 static const char AA_PROFILE_UNIX_SOCKETS
[] =
274 " ### Feature: unix\n"
275 " # Allow receive via unix sockets from anywhere\n"
278 " # Allow all unix sockets in the container\n"
279 " unix peer=(label=@{profile_name}),\n"
282 static const char AA_PROFILE_CGROUP_NAMESPACES
[] =
284 " ### Feature: cgroup namespace\n"
285 " mount fstype=cgroup -> /sys/fs/cgroup/**,\n"
286 " mount fstype=cgroup2 -> /sys/fs/cgroup/**,\n"
289 /* '_BASE' because we still need to append generated change_profile rules */
290 static const char AA_PROFILE_STACKING_BASE
[] =
292 " ### Feature: apparmor stacking\n"
293 " ### Configuration: apparmor profile loading (in namespace)\n"
294 " deny /sys/k[^e]*{,/**} wklx,\n"
295 " deny /sys/ke[^r]*{,/**} wklx,\n"
296 " deny /sys/ker[^n]*{,/**} wklx,\n"
297 " deny /sys/kern[^e]*{,/**} wklx,\n"
298 " deny /sys/kerne[^l]*{,/**} wklx,\n"
299 " deny /sys/kernel/[^s]*{,/**} wklx,\n"
300 " deny /sys/kernel/s[^e]*{,/**} wklx,\n"
301 " deny /sys/kernel/se[^c]*{,/**} wklx,\n"
302 " deny /sys/kernel/sec[^u]*{,/**} wklx,\n"
303 " deny /sys/kernel/secu[^r]*{,/**} wklx,\n"
304 " deny /sys/kernel/secur[^i]*{,/**} wklx,\n"
305 " deny /sys/kernel/securi[^t]*{,/**} wklx,\n"
306 " deny /sys/kernel/securit[^y]*{,/**} wklx,\n"
307 " deny /sys/kernel/security/[^a]*{,/**} wklx,\n"
308 " deny /sys/kernel/security/a[^p]*{,/**} wklx,\n"
309 " deny /sys/kernel/security/ap[^p]*{,/**} wklx,\n"
310 " deny /sys/kernel/security/app[^a]*{,/**} wklx,\n"
311 " deny /sys/kernel/security/appa[^r]*{,/**} wklx,\n"
312 " deny /sys/kernel/security/appar[^m]*{,/**} wklx,\n"
313 " deny /sys/kernel/security/apparm[^o]*{,/**} wklx,\n"
314 " deny /sys/kernel/security/apparmo[^r]*{,/**} wklx,\n"
315 " deny /sys/kernel/security/apparmor?*{,/**} wklx,\n"
316 " deny /sys/kernel/security?*{,/**} wklx,\n"
317 " deny /sys/kernel?*{,/**} wklx,\n"
320 static const char AA_PROFILE_NO_STACKING
[] =
322 " ### Feature: apparmor stacking (not present)\n"
323 " deny /sys/k*{,/**} rwklx,\n"
326 /* '_BASE' because we need to append change_profile for stacking */
327 static const char AA_PROFILE_NESTING_BASE
[] =
329 " ### Configuration: nesting\n"
334 /* NOTE: See conf.c's "nesting_helpers" for details. */
335 " deny /dev/.lxc/proc/** rw,\n"
336 " deny /dev/.lxc/sys/** rw,\n"
338 " mount fstype=proc -> /usr/lib/*/lxc/**,\n"
339 " mount fstype=sysfs -> /usr/lib/*/lxc/**,\n"
341 " # Allow nested LXD\n"
342 " mount none -> /var/lib/lxd/shmounts/,\n"
343 " mount /var/lib/lxd/shmounts/ -> /var/lib/lxd/shmounts/,\n"
344 " mount options=bind /var/lib/lxd/shmounts/** -> /var/lib/lxd/**,\n"
346 " # FIXME: There doesn't seem to be a way to ask for:\n"
347 " # mount options=(ro,nosuid,nodev,noexec,remount,bind),\n"
348 " # as we always get mount to $cdir/proc/sys with those flags denied\n"
349 " # So allow all mounts until that is straightened out:\n"
353 static const char AA_PROFILE_UNPRIVILEGED
[] =
355 " ### Configuration: unprivileged container\n"
358 " # Allow modifying mount propagation\n"
359 " mount options=(rw,make-slave) -> **,\n"
360 " mount options=(rw,make-rslave) -> **,\n"
361 " mount options=(rw,make-shared) -> **,\n"
362 " mount options=(rw,make-rshared) -> **,\n"
363 " mount options=(rw,make-private) -> **,\n"
364 " mount options=(rw,make-rprivate) -> **,\n"
365 " mount options=(rw,make-unbindable) -> **,\n"
366 " mount options=(rw,make-runbindable) -> **,\n"
368 " # Allow all bind-mounts\n"
369 " mount options=(rw,bind),\n"
370 " mount options=(rw,rbind),\n"
372 " # Allow remounting things read-only\n"
373 " mount options=(ro,remount),\n"
376 static bool check_mount_feature_enabled(void)
378 return mount_features_enabled
== 1;
381 static void load_mount_features_enabled(void)
386 ret
= stat(AA_MOUNT_RESTR
, &statbuf
);
388 mount_features_enabled
= 1;
391 /* aa_getcon is not working right now. Use our hand-rolled version below */
392 static int apparmor_enabled(void)
398 fin
= fopen_cloexec(AA_ENABLED_FILE
, "r");
401 ret
= fscanf(fin
, "%c", &e
);
403 if (ret
== 1 && e
== 'Y') {
404 load_mount_features_enabled();
411 static char *apparmor_process_label_get(pid_t pid
)
413 char path
[100], *space
;
415 char *buf
= NULL
, *newbuf
;
419 ret
= snprintf(path
, 100, "/proc/%d/attr/current", pid
);
420 if (ret
< 0 || ret
>= 100) {
421 ERROR("path name too long");
425 f
= fopen_cloexec(path
, "r");
427 SYSERROR("opening %s", path
);
432 newbuf
= realloc(buf
, sz
);
435 ERROR("out of memory");
441 ret
= fread(buf
, 1, sz
- 1, f
);
444 ERROR("reading %s", path
);
450 space
= strchr(buf
, '\n');
453 space
= strchr(buf
, ' ');
460 * Probably makes sense to reorganize these to only read
463 static bool apparmor_am_unconfined(void)
465 char *p
= apparmor_process_label_get(lxc_raw_getpid());
467 if (!p
|| strcmp(p
, "unconfined") == 0)
473 static bool aa_needs_transition(char *curlabel
)
477 if (strcmp(curlabel
, "unconfined") == 0)
479 if (strcmp(curlabel
, "/usr/bin/lxc-start") == 0)
484 static inline void uint64hex(char *buf
, uint64_t num
)
490 char c
= (char)(num
& 0xf);
491 buf
[i
] = c
+ (c
< 0xa ? '0' : 'a' - 0xa);
496 static inline char *shorten_apparmor_name(char *name
)
498 size_t len
= strlen(name
);
501 hash
= fnv_64a_buf(name
, len
, FNV1A_64_INIT
);
502 name
= must_realloc(name
, 16 + 1);
503 uint64hex(name
, hash
);
509 /* Replace slashes with hyphens */
510 static inline void sanitize_path(char *path
)
514 for (i
= 0; path
[i
]; i
++)
519 static inline char *apparmor_dir(const char *ctname
, const char *lxcpath
)
521 return must_make_path(lxcpath
, ctname
, "apparmor", NULL
);
525 static inline char *apparmor_profile_full(const char *ctname
, const char *lxcpath
)
527 return shorten_apparmor_name(must_concat(NULL
, "lxc-", ctname
, "_<", lxcpath
, ">", NULL
));
530 /* Like apparmor_profile_full() but with slashes replaced by hyphens */
531 static inline char *apparmor_namespace(const char *ctname
, const char *lxcpath
)
535 full
= apparmor_profile_full(ctname
, lxcpath
);
541 /* FIXME: This is currently run only in the context of a constructor (via the
542 * initial lsm_init() called due to its __attribute__((constructor)), so we
543 * do not have ERROR/... macros available, so there are some fprintf(stderr)s
546 static bool check_apparmor_parser_version()
548 struct lxc_popen_FILE
*parserpipe
;
550 int major
= 0, minor
= 0, micro
= 0;
552 parserpipe
= lxc_popen("apparmor_parser --version");
554 fprintf(stderr
, "Failed to run check for apparmor_parser\n");
558 rc
= fscanf(parserpipe
->f
, "AppArmor parser version %d.%d.%d", &major
, &minor
, µ
);
560 lxc_pclose(parserpipe
);
561 /* We stay silent for now as this most likely means the shell
562 * lxc_popen executed failed to find the apparmor_parser binary.
563 * See the FIXME comment above for details.
568 rc
= lxc_pclose(parserpipe
);
570 fprintf(stderr
, "Error waiting for child process\n");
574 fprintf(stderr
, "'apparmor_parser --version' executed with an error status\n");
578 aa_supports_unix
= (major
> 2) ||
579 (major
== 2 && minor
> 10) ||
580 (major
== 2 && minor
== 10 && micro
>= 95);
585 static bool file_is_yes(const char *path
)
589 char buf
[8]; /* we actually just expect "yes" or "no" */
591 fd
= open(path
, O_RDONLY
| O_CLOEXEC
);
595 rd
= lxc_read_nointr(fd
, buf
, sizeof(buf
));
598 return rd
>= 4 && strncmp(buf
, "yes\n", 4) == 0;
601 static bool apparmor_can_stack()
603 int major
, minor
, scanned
;
606 if (!file_is_yes("/sys/kernel/security/apparmor/features/domain/stack"))
609 f
= fopen_cloexec("/sys/kernel/security/apparmor/features/domain/version", "r");
613 scanned
= fscanf(f
, "%d.%d", &major
, &minor
);
618 return major
> 1 || (major
== 1 && minor
>= 2);
621 static void must_append_sized_full(char **buf
, size_t *bufsz
, const char *data
,
622 size_t size
, bool append_newline
)
624 size_t newsize
= *bufsz
+ size
;
629 *buf
= must_realloc(*buf
, newsize
);
630 memcpy(*buf
+ *bufsz
, data
, size
);
633 (*buf
)[newsize
- 1] = '\n';
638 static void must_append_sized(char **buf
, size_t *bufsz
, const char *data
, size_t size
)
640 return must_append_sized_full(buf
, bufsz
, data
, size
, false);
643 static bool is_privileged(struct lxc_conf
*conf
)
645 return lxc_list_empty(&conf
->id_map
);
648 static const char* AA_ALL_DEST_PATH_LIST
[] = {
649 " -> /[^spd]*{,/**},\n",
650 " -> /d[^e]*{,/**},\n",
651 " -> /de[^v]*{,/**},\n",
652 " -> /dev/.[^l]*{,/**},\n",
653 " -> /dev/.l[^x]*{,/**},\n",
654 " -> /dev/.lx[^c]*{,/**},\n",
655 " -> /dev/.lxc?*{,/**},\n",
656 " -> /dev/[^.]*{,/**},\n",
657 " -> /dev?*{,/**},\n",
658 " -> /p[^r]*{,/**},\n",
659 " -> /pr[^o]*{,/**},\n",
660 " -> /pro[^c]*{,/**},\n",
661 " -> /proc?*{,/**},\n",
662 " -> /s[^y]*{,/**},\n",
663 " -> /sy[^s]*{,/**},\n",
664 " -> /sys?*{,/**},\n",
668 static const struct mntopt_t
{
671 } REMOUNT_OPTIONS
[] = {
672 { ",nodev", sizeof(",nodev")-1 },
673 { ",nosuid", sizeof(",nosuid")-1 },
674 { ",noexec", sizeof(",noexec")-1 },
677 static void append_remount_rule(char **profile
, size_t *size
, const char *rule
)
679 size_t rule_len
= strlen(rule
);
681 for (const char **dest
= AA_ALL_DEST_PATH_LIST
; *dest
; ++dest
) {
682 must_append_sized(profile
, size
, rule
, rule_len
);
683 must_append_sized(profile
, size
, *dest
, strlen(*dest
));
687 static void append_all_remount_rules(char **profile
, size_t *size
)
690 * That's 30, and we add at most:
691 * ",nodev,nosuid,noexec,strictatime -> /dev/.lx[^c]*{,/ **},\ n",
692 * which is anouther ~58, this s hould be enough:
694 char buf
[128] = " mount options=(ro,remount,bind";
695 const size_t buf_append_pos
= strlen(buf
);
697 const size_t opt_count
= ARRAY_SIZE(REMOUNT_OPTIONS
);
700 must_append_sized(profile
, size
,
701 "# allow various ro-bind-*re*mounts\n",
702 sizeof("# allow various ro-bind-*re*mounts\n")-1);
704 for (opt_bits
= 0; opt_bits
!= 1 << opt_count
; ++opt_bits
) {
705 size_t at
= buf_append_pos
;
709 for (o
= 0; o
!= opt_count
; ++o
, bit
<<= 1) {
710 if (opt_bits
& bit
) {
711 const struct mntopt_t
*opt
= &REMOUNT_OPTIONS
[o
];
712 memcpy(&buf
[at
], opt
->opt
, opt
->len
);
717 memcpy(&buf
[at
], ")", sizeof(")"));
718 append_remount_rule(profile
, size
, buf
);
720 /* noatime and strictatime don't go together */
721 memcpy(&buf
[at
], ",noatime)", sizeof(",noatime)"));
722 append_remount_rule(profile
, size
, buf
);
723 memcpy(&buf
[at
], ",strictatime)", sizeof(",strictatime)"));
724 append_remount_rule(profile
, size
, buf
);
728 static char *get_apparmor_profile_content(struct lxc_conf
*conf
, const char *lxcpath
)
730 char *profile
, *profile_name_full
;
734 profile_name_full
= apparmor_profile_full(conf
->name
, lxcpath
);
736 profile
= must_concat(NULL
,
737 "#include <tunables/global>\n"
738 "profile \"", profile_name_full
, "\" flags=(attach_disconnected,mediate_deleted) {\n",
740 size
= strlen(profile
);
742 must_append_sized(&profile
, &size
, AA_PROFILE_BASE
,
743 STRARRAYLEN(AA_PROFILE_BASE
));
745 append_all_remount_rules(&profile
, &size
);
747 if (aa_supports_unix
)
748 must_append_sized(&profile
, &size
, AA_PROFILE_UNIX_SOCKETS
,
749 STRARRAYLEN(AA_PROFILE_UNIX_SOCKETS
));
751 if (file_exists("/proc/self/ns/cgroup"))
752 must_append_sized(&profile
, &size
, AA_PROFILE_CGROUP_NAMESPACES
,
753 STRARRAYLEN(AA_PROFILE_CGROUP_NAMESPACES
));
755 if (aa_can_stack
&& !aa_is_stacked
) {
756 char *namespace, *temp
;
758 must_append_sized(&profile
, &size
, AA_PROFILE_STACKING_BASE
,
759 STRARRAYLEN(AA_PROFILE_STACKING_BASE
));
761 namespace = apparmor_namespace(conf
->name
, lxcpath
);
762 temp
= must_concat(NULL
, " change_profile -> \":", namespace, ":*\",\n"
763 " change_profile -> \":", namespace, "://*\",\n",
767 must_append_sized(&profile
, &size
, temp
, strlen(temp
));
770 must_append_sized(&profile
, &size
, AA_PROFILE_NO_STACKING
,
771 STRARRAYLEN(AA_PROFILE_NO_STACKING
));
774 if (conf
->lsm_aa_allow_nesting
) {
775 must_append_sized(&profile
, &size
, AA_PROFILE_NESTING_BASE
,
776 STRARRAYLEN(AA_PROFILE_NESTING_BASE
));
778 if (!aa_can_stack
|| aa_is_stacked
) {
781 temp
= must_concat(NULL
, " change_profile -> \"",
782 profile_name_full
, "\",\n", NULL
);
783 must_append_sized(&profile
, &size
, temp
, strlen(temp
));
788 if (!is_privileged(conf
) || am_host_unpriv())
789 must_append_sized(&profile
, &size
, AA_PROFILE_UNPRIVILEGED
,
790 STRARRAYLEN(AA_PROFILE_UNPRIVILEGED
));
792 lxc_list_for_each(it
, &conf
->lsm_aa_raw
) {
793 const char *line
= it
->elem
;
795 must_append_sized_full(&profile
, &size
, line
, strlen(line
), true);
798 /* include terminating \0 byte */
799 must_append_sized(&profile
, &size
, "}\n", 3);
801 free(profile_name_full
);
807 * apparmor_parser creates a cache file using the parsed file's name as a name.
808 * This means there may be multiple containers with the same name but different
809 * lxcpaths. Therefore we need a sanitized version of the complete profile name
810 * as profile file-name.
811 * We already get this exactly from apparmor_namespace().
813 static char *make_apparmor_profile_path(const char *ctname
, const char *lxcpath
)
815 char *ret
, *filename
;
817 filename
= apparmor_namespace(ctname
, lxcpath
);
818 ret
= must_make_path(lxcpath
, ctname
, "apparmor", filename
, NULL
);
824 static char *make_apparmor_namespace_path(const char *ctname
, const char *lxcpath
)
826 char *ret
, *namespace;
828 namespace = apparmor_namespace(ctname
, lxcpath
);
829 ret
= must_make_path("/sys/kernel/security/apparmor/policy/namespaces", namespace, NULL
);
835 static bool make_apparmor_namespace(struct lxc_conf
*conf
, const char *lxcpath
)
839 if (!aa_can_stack
|| aa_is_stacked
)
842 path
= make_apparmor_namespace_path(conf
->name
, lxcpath
);
844 if (mkdir(path
, 0755) < 0 && errno
!= EEXIST
) {
845 SYSERROR("Error creating AppArmor namespace: %s", path
);
854 static void remove_apparmor_namespace(struct lxc_conf
*conf
, const char *lxcpath
)
858 path
= make_apparmor_namespace_path(conf
->name
, lxcpath
);
859 if (rmdir(path
) != 0)
860 SYSERROR("Error removing AppArmor namespace");
864 struct apparmor_parser_args
{
869 static int apparmor_parser_exec(void *data
)
871 struct apparmor_parser_args
*args
= data
;
872 char cmdbuf
[] = { '-', args
->cmd
, 'W', 'L', 0 };
874 execlp("apparmor_parser", "apparmor_parser", cmdbuf
, APPARMOR_CACHE_DIR
, args
->file
, NULL
);
879 static int run_apparmor_parser(char command
,
880 struct lxc_conf
*conf
,
883 char output
[PATH_MAX
];
885 struct apparmor_parser_args args
= {
887 .file
= make_apparmor_profile_path(conf
->name
, lxcpath
),
890 ret
= run_command(output
, sizeof(output
), apparmor_parser_exec
, (void*)&args
);
892 ERROR("Failed to run apparmor_parser on \"%s\": %s", args
.file
, output
);
901 static void remove_apparmor_profile(struct lxc_conf
*conf
, const char *lxcpath
)
905 /* It's ok if these deletes fail: if the container was never started,
906 * we'll have never written a profile or cached it.
909 path
= make_apparmor_profile_path(conf
->name
, lxcpath
);
913 /* Also remove the apparmor/ subdirectory */
914 path
= apparmor_dir(conf
->name
, lxcpath
);
919 static int load_apparmor_profile(struct lxc_conf
*conf
, const char *lxcpath
)
921 struct stat profile_sb
;
925 char *profile_path
= NULL
, *old_content
= NULL
, *new_content
= NULL
;
928 if (!make_apparmor_namespace(conf
, lxcpath
))
931 /* In order to avoid forcing a profile parse (potentially slow) on
932 * every container start, let's use apparmor's binary policy cache,
933 * which checks mtime of the files to figure out if the policy needs to
936 * Since it uses mtimes, we shouldn't just always write out our local
937 * apparmor template; instead we should check to see whether the
938 * template is the same as ours. If it isn't we should write our
939 * version out so that the new changes are reflected and we definitely
943 profile_path
= make_apparmor_profile_path(conf
->name
, lxcpath
);
944 profile_fd
= open(profile_path
, O_RDONLY
| O_CLOEXEC
);
945 if (profile_fd
>= 0) {
946 if (fstat(profile_fd
, &profile_sb
) < 0) {
947 SYSERROR("Error accessing old profile from %s",
951 old_len
= profile_sb
.st_size
;
952 old_content
= lxc_strmmap(NULL
, old_len
, PROT_READ
,
953 MAP_PRIVATE
, profile_fd
, 0);
955 SYSERROR("Failed to mmap old profile from %s",
959 } else if (errno
!= ENOENT
) {
960 SYSERROR("Error reading old profile from %s", profile_path
);
964 new_content
= get_apparmor_profile_content(conf
, lxcpath
);
968 content_len
= strlen(new_content
);
970 if (!old_content
|| old_len
!= content_len
|| memcmp(old_content
, new_content
, content_len
) != 0) {
973 ret
= mkdir_p(APPARMOR_CACHE_DIR
, 0755);
975 SYSERROR("Error creating AppArmor profile cache directory " APPARMOR_CACHE_DIR
);
979 path
= apparmor_dir(conf
->name
, lxcpath
);
980 ret
= mkdir_p(path
, 0755);
982 SYSERROR("Error creating AppArmor profile directory: %s", path
);
988 ret
= lxc_write_to_file(profile_path
, new_content
, content_len
, false, 0600);
990 SYSERROR("Error writing profile to %s", profile_path
);
995 ret
= run_apparmor_parser(AA_CMD_LOAD
, conf
, lxcpath
);
997 goto out_remove_profile
;
999 conf
->lsm_aa_profile_created
= true;
1004 remove_apparmor_profile(conf
, lxcpath
);
1006 remove_apparmor_namespace(conf
, lxcpath
);
1008 if (profile_fd
>= 0) {
1010 lxc_strmunmap(old_content
, old_len
);
1019 * Ensure that the container's policy namespace is unloaded to free kernel
1020 * memory. This does not delete the policy from disk or cache.
1022 static void apparmor_cleanup(struct lxc_conf
*conf
, const char *lxcpath
)
1027 if (!conf
->lsm_aa_profile_created
)
1030 remove_apparmor_namespace(conf
, lxcpath
);
1031 (void)run_apparmor_parser(AA_CMD_UNLOAD
, conf
, lxcpath
);
1033 remove_apparmor_profile(conf
, lxcpath
);
1036 static int apparmor_prepare(struct lxc_conf
*conf
, const char *lxcpath
)
1040 char *curlabel
= NULL
, *genlabel
= NULL
;
1043 ERROR("AppArmor not enabled");
1047 label
= conf
->lsm_aa_profile
;
1049 /* user may request that we just ignore apparmor */
1050 if (label
&& strcmp(label
, AA_UNCHANGED
) == 0) {
1051 INFO("AppArmor profile unchanged per user request");
1052 conf
->lsm_aa_profile_computed
= must_copy_string(label
);
1056 if (label
&& strcmp(label
, AA_GENERATED
) == 0) {
1057 if (!aa_parser_available
) {
1058 ERROR("Cannot use generated profile: apparmor_parser not available");
1062 /* auto-generate profile based on available/requested security features */
1063 if (load_apparmor_profile(conf
, lxcpath
) != 0) {
1064 ERROR("Failed to load generated AppArmor profile");
1068 genlabel
= apparmor_profile_full(conf
->name
, lxcpath
);
1070 ERROR("Failed to build AppArmor profile name");
1074 if (aa_can_stack
&& !aa_is_stacked
) {
1075 char *namespace = apparmor_namespace(conf
->name
, lxcpath
);
1076 size_t llen
= strlen(genlabel
);
1077 must_append_sized(&genlabel
, &llen
, "//&:", STRARRAYLEN("//&:"));
1078 must_append_sized(&genlabel
, &llen
, namespace, strlen(namespace));
1079 must_append_sized(&genlabel
, &llen
, ":", STRARRAYLEN(":") + 1); /* with the nul byte */
1086 curlabel
= apparmor_process_label_get(lxc_raw_getpid());
1088 if (!aa_can_stack
&& aa_needs_transition(curlabel
)) {
1089 /* we're already confined, and stacking isn't supported */
1091 if (!label
|| strcmp(curlabel
, label
) == 0) {
1092 /* no change requested */
1097 ERROR("Already AppArmor confined, but new label requested.");
1102 if (cgns_supported())
1103 label
= AA_DEF_PROFILE_CGNS
;
1105 label
= AA_DEF_PROFILE
;
1108 if (!check_mount_feature_enabled() && strcmp(label
, "unconfined") != 0) {
1109 WARN("Incomplete AppArmor support in your kernel");
1110 if (!conf
->lsm_aa_allow_incomplete
) {
1111 ERROR("If you really want to start this container, set");
1112 ERROR("lxc.apparmor.allow_incomplete = 1");
1113 ERROR("in your container configuration file");
1118 conf
->lsm_aa_profile_computed
= must_copy_string(label
);
1125 apparmor_cleanup(conf
, lxcpath
);
1132 * apparmor_process_label_set: Set AppArmor process profile
1134 * @label : the profile to set
1135 * @conf : the container configuration to use if @label is NULL
1136 * @default : use the default profile if @label is NULL
1137 * @on_exec : this is ignored. Apparmor profile will be changed immediately
1139 * Returns 0 on success, < 0 on failure
1141 * Notes: This relies on /proc being available.
1143 static int apparmor_process_label_set(const char *inlabel
, struct lxc_conf
*conf
,
1151 ERROR("AppArmor not enabled");
1155 label
= inlabel
? inlabel
: conf
->lsm_aa_profile_computed
;
1157 ERROR("LSM wasn't prepared");
1161 /* user may request that we just ignore apparmor */
1162 if (strcmp(label
, AA_UNCHANGED
) == 0) {
1163 INFO("AppArmor profile unchanged per user request");
1167 if (strcmp(label
, "unconfined") == 0 && apparmor_am_unconfined()) {
1168 INFO("AppArmor profile unchanged");
1171 tid
= lxc_raw_gettid();
1172 label_fd
= lsm_process_label_fd_get(tid
, on_exec
);
1174 SYSERROR("Failed to change AppArmor profile to %s", label
);
1178 ret
= lsm_process_label_set_at(label_fd
, label
, on_exec
);
1181 ERROR("Failed to change AppArmor profile to %s", label
);
1185 INFO("Changed AppArmor profile to %s", label
);
1189 static struct lsm_drv apparmor_drv
= {
1191 .enabled
= apparmor_enabled
,
1192 .process_label_get
= apparmor_process_label_get
,
1193 .process_label_set
= apparmor_process_label_set
,
1194 .prepare
= apparmor_prepare
,
1195 .cleanup
= apparmor_cleanup
,
1198 struct lsm_drv
*lsm_apparmor_drv_init(void)
1200 bool have_mac_admin
= false;
1202 if (!apparmor_enabled())
1205 /* We only support generated profiles when apparmor_parser is usable */
1206 if (!check_apparmor_parser_version())
1209 aa_parser_available
= true;
1211 aa_can_stack
= apparmor_can_stack();
1213 aa_is_stacked
= file_is_yes("/sys/kernel/security/apparmor/.ns_stacked");
1216 have_mac_admin
= lxc_proc_cap_is_set(CAP_SETGID
, CAP_EFFECTIVE
);
1219 if (!have_mac_admin
)
1220 WARN("Per-container AppArmor profiles are disabled because the mac_admin capability is missing");
1221 else if (am_host_unpriv() && !aa_is_stacked
)
1222 WARN("Per-container AppArmor profiles are disabled because LXC is running in an unprivileged container without stacking");
1228 return &apparmor_drv
;