]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/lsm/apparmor.c
cgroups: flatten hierarchy
[mirror_lxc.git] / src / lxc / lsm / apparmor.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
250b1eec 2
d38dd64a
CB
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
6#include <errno.h>
e075f5d9 7#include <stdio.h>
9958532b 8#include <stdlib.h>
e075f5d9 9#include <sys/mount.h>
d38dd64a
CB
10#include <sys/stat.h>
11#include <sys/types.h>
85108024 12#include <sys/vfs.h>
d38dd64a 13#include <unistd.h>
f2363e38 14
d38dd64a 15#include "caps.h"
7aff4f43 16#include "conf.h"
d38dd64a 17#include "config.h"
7e556d18 18#include "initutils.h"
d38dd64a
CB
19#include "log.h"
20#include "lsm.h"
1800f924 21#include "parse.h"
d7b58715 22#include "raw_syscalls.h"
d38dd64a 23#include "utils.h"
e075f5d9 24
ac2cecc4 25lxc_log_define(apparmor, lsm);
e075f5d9 26
7e6966e5
SH
27/* set by lsm_apparmor_drv_init if true */
28static int aa_enabled = 0;
1800f924
WB
29static bool aa_parser_available = false;
30static bool aa_supports_unix = false;
31static bool aa_can_stack = false;
32static bool aa_is_stacked = false;
33static bool aa_admin = false;
7e6966e5 34
7196c7b3
SH
35static int mount_features_enabled = 0;
36
fe4de9a6 37#define AA_DEF_PROFILE "lxc-container-default"
603fd084 38#define AA_DEF_PROFILE_CGNS "lxc-container-default-cgns"
e075f5d9
SH
39#define AA_MOUNT_RESTR "/sys/kernel/security/apparmor/features/mount/mask"
40#define AA_ENABLED_FILE "/sys/module/apparmor/parameters/enabled"
480c876b 41#define AA_UNCHANGED "unchanged"
1800f924
WB
42#define AA_GENERATED "generated"
43
44#define AA_CMD_LOAD 'r'
45#define AA_CMD_UNLOAD 'R'
46#define AA_CMD_PARSE 'Q'
47
48static const char AA_PROFILE_BASE[] =
49" ### Base profile\n"
50" capability,\n"
51" dbus,\n"
52" file,\n"
53" network,\n"
54" umount,\n"
55"\n"
56" # Allow us to receive signals from anywhere.\n"
57" signal (receive),\n"
58"\n"
59" # Allow us to send signals to ourselves\n"
60" signal peer=@{profile_name},\n"
61"\n"
62" # Allow other processes to read our /proc entries, futexes, perf tracing and\n"
63" # kcmp for now (they will need 'read' in the first place). Administrators can\n"
64" # override with:\n"
65" # deny ptrace (readby) ...\n"
66" ptrace (readby),\n"
67"\n"
68" # Allow other processes to trace us by default (they will need 'trace' in\n"
69" # the first place). Administrators can override with:\n"
70" # deny ptrace (tracedby) ...\n"
71" ptrace (tracedby),\n"
72"\n"
73" # Allow us to ptrace ourselves\n"
74" ptrace peer=@{profile_name},\n"
75"\n"
76" # ignore DENIED message on / remount\n"
77" deny mount options=(ro, remount) -> /,\n"
78" deny mount options=(ro, remount, silent) -> /,\n"
79"\n"
80" # allow tmpfs mounts everywhere\n"
81" mount fstype=tmpfs,\n"
82"\n"
83" # allow hugetlbfs mounts everywhere\n"
84" mount fstype=hugetlbfs,\n"
85"\n"
86" # allow mqueue mounts everywhere\n"
87" mount fstype=mqueue,\n"
88"\n"
89" # allow fuse mounts everywhere\n"
90" mount fstype=fuse,\n"
91" mount fstype=fuse.*,\n"
92"\n"
93" # deny access under /proc/bus to avoid e.g. messing with pci devices directly\n"
94" deny @{PROC}/bus/** wklx,\n"
95"\n"
96" # deny writes in /proc/sys/fs but allow binfmt_misc to be mounted\n"
97" mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/,\n"
98" deny @{PROC}/sys/fs/** wklx,\n"
99"\n"
100" # allow efivars to be mounted, writing to it will be blocked though\n"
101" mount fstype=efivarfs -> /sys/firmware/efi/efivars/,\n"
102"\n"
103" # block some other dangerous paths\n"
104" deny @{PROC}/kcore rwklx,\n"
105" deny @{PROC}/sysrq-trigger rwklx,\n"
95ad620e 106" deny @{PROC}/acpi/** rwklx,\n"
1800f924
WB
107"\n"
108" # deny writes in /sys except for /sys/fs/cgroup, also allow\n"
109" # fusectl, securityfs and debugfs to be mounted there (read-only)\n"
110" mount fstype=fusectl -> /sys/fs/fuse/connections/,\n"
111" mount fstype=securityfs -> /sys/kernel/security/,\n"
112" mount fstype=debugfs -> /sys/kernel/debug/,\n"
113" deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,\n"
114" mount fstype=proc -> /proc/,\n"
115" mount fstype=sysfs -> /sys/,\n"
116" mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,\n"
117" deny /sys/firmware/efi/efivars/** rwklx,\n"
118" # note, /sys/kernel/security/** handled below\n"
119" mount options=(ro, nosuid, nodev, noexec, remount, strictatime) -> /sys/fs/cgroup/,\n"
120"\n"
121" # deny reads from debugfs\n"
122" deny /sys/kernel/debug/{,**} rwklx,\n"
123"\n"
124" # allow paths to be made slave, shared, private or unbindable\n"
125" # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.\n"
126"# mount options=(rw,make-slave) -> **,\n"
127"# mount options=(rw,make-rslave) -> **,\n"
128"# mount options=(rw,make-shared) -> **,\n"
129"# mount options=(rw,make-rshared) -> **,\n"
130"# mount options=(rw,make-private) -> **,\n"
131"# mount options=(rw,make-rprivate) -> **,\n"
132"# mount options=(rw,make-unbindable) -> **,\n"
133"# mount options=(rw,make-runbindable) -> **,\n"
134"\n"
135" # allow bind-mounts of anything except /proc, /sys and /dev\n"
136" mount options=(rw,bind) /[^spd]*{,/**},\n"
137" mount options=(rw,bind) /d[^e]*{,/**},\n"
138" mount options=(rw,bind) /de[^v]*{,/**},\n"
139" mount options=(rw,bind) /dev/.[^l]*{,/**},\n"
140" mount options=(rw,bind) /dev/.l[^x]*{,/**},\n"
141" mount options=(rw,bind) /dev/.lx[^c]*{,/**},\n"
142" mount options=(rw,bind) /dev/.lxc?*{,/**},\n"
143" mount options=(rw,bind) /dev/[^.]*{,/**},\n"
144" mount options=(rw,bind) /dev?*{,/**},\n"
145" mount options=(rw,bind) /p[^r]*{,/**},\n"
146" mount options=(rw,bind) /pr[^o]*{,/**},\n"
147" mount options=(rw,bind) /pro[^c]*{,/**},\n"
148" mount options=(rw,bind) /proc?*{,/**},\n"
149" mount options=(rw,bind) /s[^y]*{,/**},\n"
150" mount options=(rw,bind) /sy[^s]*{,/**},\n"
151" mount options=(rw,bind) /sys?*{,/**},\n"
152"\n"
e6ec0a9e
WB
153" # allow various ro-bind-*re*-mounts\n"
154" mount options=(ro,remount,bind),\n"
155" mount options=(ro,remount,bind,nosuid),\n"
156" mount options=(ro,remount,bind,noexec),\n"
157" mount options=(ro,remount,bind,nodev),\n"
158" mount options=(ro,remount,bind,nosuid,noexec),\n"
159" mount options=(ro,remount,bind,noexec,nodev),\n"
160" mount options=(ro,remount,bind,nodev,nosuid),\n"
161" mount options=(ro,remount,bind,nosuid,noexec,nodev),\n"
1800f924
WB
162"\n"
163" # allow moving mounts except for /proc, /sys and /dev\n"
164" mount options=(rw,move) /[^spd]*{,/**},\n"
165" mount options=(rw,move) /d[^e]*{,/**},\n"
166" mount options=(rw,move) /de[^v]*{,/**},\n"
167" mount options=(rw,move) /dev/.[^l]*{,/**},\n"
168" mount options=(rw,move) /dev/.l[^x]*{,/**},\n"
169" mount options=(rw,move) /dev/.lx[^c]*{,/**},\n"
170" mount options=(rw,move) /dev/.lxc?*{,/**},\n"
171" mount options=(rw,move) /dev/[^.]*{,/**},\n"
172" mount options=(rw,move) /dev?*{,/**},\n"
173" mount options=(rw,move) /p[^r]*{,/**},\n"
174" mount options=(rw,move) /pr[^o]*{,/**},\n"
175" mount options=(rw,move) /pro[^c]*{,/**},\n"
176" mount options=(rw,move) /proc?*{,/**},\n"
177" mount options=(rw,move) /s[^y]*{,/**},\n"
178" mount options=(rw,move) /sy[^s]*{,/**},\n"
179" mount options=(rw,move) /sys?*{,/**},\n"
180"\n"
181" # generated by: lxc-generate-aa-rules.py container-rules.base\n"
182" deny /proc/sys/[^kn]*{,/**} wklx,\n"
183" deny /proc/sys/k[^e]*{,/**} wklx,\n"
184" deny /proc/sys/ke[^r]*{,/**} wklx,\n"
185" deny /proc/sys/ker[^n]*{,/**} wklx,\n"
186" deny /proc/sys/kern[^e]*{,/**} wklx,\n"
187" deny /proc/sys/kerne[^l]*{,/**} wklx,\n"
188" deny /proc/sys/kernel/[^smhd]*{,/**} wklx,\n"
189" deny /proc/sys/kernel/d[^o]*{,/**} wklx,\n"
190" deny /proc/sys/kernel/do[^m]*{,/**} wklx,\n"
191" deny /proc/sys/kernel/dom[^a]*{,/**} wklx,\n"
192" deny /proc/sys/kernel/doma[^i]*{,/**} wklx,\n"
193" deny /proc/sys/kernel/domai[^n]*{,/**} wklx,\n"
194" deny /proc/sys/kernel/domain[^n]*{,/**} wklx,\n"
195" deny /proc/sys/kernel/domainn[^a]*{,/**} wklx,\n"
196" deny /proc/sys/kernel/domainna[^m]*{,/**} wklx,\n"
197" deny /proc/sys/kernel/domainnam[^e]*{,/**} wklx,\n"
198" deny /proc/sys/kernel/domainname?*{,/**} wklx,\n"
199" deny /proc/sys/kernel/h[^o]*{,/**} wklx,\n"
200" deny /proc/sys/kernel/ho[^s]*{,/**} wklx,\n"
201" deny /proc/sys/kernel/hos[^t]*{,/**} wklx,\n"
202" deny /proc/sys/kernel/host[^n]*{,/**} wklx,\n"
203" deny /proc/sys/kernel/hostn[^a]*{,/**} wklx,\n"
204" deny /proc/sys/kernel/hostna[^m]*{,/**} wklx,\n"
205" deny /proc/sys/kernel/hostnam[^e]*{,/**} wklx,\n"
206" deny /proc/sys/kernel/hostname?*{,/**} wklx,\n"
207" deny /proc/sys/kernel/m[^s]*{,/**} wklx,\n"
208" deny /proc/sys/kernel/ms[^g]*{,/**} wklx,\n"
209" deny /proc/sys/kernel/msg*/** wklx,\n"
210" deny /proc/sys/kernel/s[^he]*{,/**} wklx,\n"
211" deny /proc/sys/kernel/se[^m]*{,/**} wklx,\n"
212" deny /proc/sys/kernel/sem*/** wklx,\n"
213" deny /proc/sys/kernel/sh[^m]*{,/**} wklx,\n"
214" deny /proc/sys/kernel/shm*/** wklx,\n"
215" deny /proc/sys/kernel?*{,/**} wklx,\n"
216" deny /proc/sys/n[^e]*{,/**} wklx,\n"
217" deny /proc/sys/ne[^t]*{,/**} wklx,\n"
218" deny /proc/sys/net?*{,/**} wklx,\n"
219" deny /sys/[^fdck]*{,/**} wklx,\n"
220" deny /sys/c[^l]*{,/**} wklx,\n"
221" deny /sys/cl[^a]*{,/**} wklx,\n"
222" deny /sys/cla[^s]*{,/**} wklx,\n"
223" deny /sys/clas[^s]*{,/**} wklx,\n"
224" deny /sys/class/[^n]*{,/**} wklx,\n"
225" deny /sys/class/n[^e]*{,/**} wklx,\n"
226" deny /sys/class/ne[^t]*{,/**} wklx,\n"
227" deny /sys/class/net?*{,/**} wklx,\n"
228" deny /sys/class?*{,/**} wklx,\n"
229" deny /sys/d[^e]*{,/**} wklx,\n"
230" deny /sys/de[^v]*{,/**} wklx,\n"
231" deny /sys/dev[^i]*{,/**} wklx,\n"
232" deny /sys/devi[^c]*{,/**} wklx,\n"
233" deny /sys/devic[^e]*{,/**} wklx,\n"
234" deny /sys/device[^s]*{,/**} wklx,\n"
235" deny /sys/devices/[^v]*{,/**} wklx,\n"
236" deny /sys/devices/v[^i]*{,/**} wklx,\n"
237" deny /sys/devices/vi[^r]*{,/**} wklx,\n"
238" deny /sys/devices/vir[^t]*{,/**} wklx,\n"
239" deny /sys/devices/virt[^u]*{,/**} wklx,\n"
240" deny /sys/devices/virtu[^a]*{,/**} wklx,\n"
241" deny /sys/devices/virtua[^l]*{,/**} wklx,\n"
242" deny /sys/devices/virtual/[^n]*{,/**} wklx,\n"
243" deny /sys/devices/virtual/n[^e]*{,/**} wklx,\n"
244" deny /sys/devices/virtual/ne[^t]*{,/**} wklx,\n"
245" deny /sys/devices/virtual/net?*{,/**} wklx,\n"
246" deny /sys/devices/virtual?*{,/**} wklx,\n"
247" deny /sys/devices?*{,/**} wklx,\n"
248" deny /sys/f[^s]*{,/**} wklx,\n"
249" deny /sys/fs/[^c]*{,/**} wklx,\n"
250" deny /sys/fs/c[^g]*{,/**} wklx,\n"
251" deny /sys/fs/cg[^r]*{,/**} wklx,\n"
252" deny /sys/fs/cgr[^o]*{,/**} wklx,\n"
253" deny /sys/fs/cgro[^u]*{,/**} wklx,\n"
254" deny /sys/fs/cgrou[^p]*{,/**} wklx,\n"
255" deny /sys/fs/cgroup?*{,/**} wklx,\n"
256" deny /sys/fs?*{,/**} wklx,\n"
257;
258
259static const char AA_PROFILE_UNIX_SOCKETS[] =
260"\n"
261" ### Feature: unix\n"
262" # Allow receive via unix sockets from anywhere\n"
263" unix (receive),\n"
264"\n"
265" # Allow all unix sockets in the container\n"
266" unix peer=(label=@{profile_name}),\n"
267;
268
269static const char AA_PROFILE_CGROUP_NAMESPACES[] =
270"\n"
271" ### Feature: cgroup namespace\n"
272" mount fstype=cgroup -> /sys/fs/cgroup/**,\n"
273" mount fstype=cgroup2 -> /sys/fs/cgroup/**,\n"
274;
275
276/* '_BASE' because we still need to append generated change_profile rules */
277static const char AA_PROFILE_STACKING_BASE[] =
278"\n"
279" ### Feature: apparmor stacking\n"
280" ### Configuration: apparmor profile loading (in namespace)\n"
281" deny /sys/k[^e]*{,/**} wklx,\n"
282" deny /sys/ke[^r]*{,/**} wklx,\n"
283" deny /sys/ker[^n]*{,/**} wklx,\n"
284" deny /sys/kern[^e]*{,/**} wklx,\n"
285" deny /sys/kerne[^l]*{,/**} wklx,\n"
286" deny /sys/kernel/[^s]*{,/**} wklx,\n"
287" deny /sys/kernel/s[^e]*{,/**} wklx,\n"
288" deny /sys/kernel/se[^c]*{,/**} wklx,\n"
289" deny /sys/kernel/sec[^u]*{,/**} wklx,\n"
290" deny /sys/kernel/secu[^r]*{,/**} wklx,\n"
291" deny /sys/kernel/secur[^i]*{,/**} wklx,\n"
292" deny /sys/kernel/securi[^t]*{,/**} wklx,\n"
293" deny /sys/kernel/securit[^y]*{,/**} wklx,\n"
294" deny /sys/kernel/security/[^a]*{,/**} wklx,\n"
295" deny /sys/kernel/security/a[^p]*{,/**} wklx,\n"
296" deny /sys/kernel/security/ap[^p]*{,/**} wklx,\n"
297" deny /sys/kernel/security/app[^a]*{,/**} wklx,\n"
298" deny /sys/kernel/security/appa[^r]*{,/**} wklx,\n"
299" deny /sys/kernel/security/appar[^m]*{,/**} wklx,\n"
300" deny /sys/kernel/security/apparm[^o]*{,/**} wklx,\n"
301" deny /sys/kernel/security/apparmo[^r]*{,/**} wklx,\n"
302" deny /sys/kernel/security/apparmor?*{,/**} wklx,\n"
303" deny /sys/kernel/security?*{,/**} wklx,\n"
304" deny /sys/kernel?*{,/**} wklx,\n"
305;
306
307static const char AA_PROFILE_NO_STACKING[] =
308"\n"
309" ### Feature: apparmor stacking (not present)\n"
310" deny /sys/k*{,/**} rwklx,\n"
311;
312
313/* '_BASE' because we need to append change_profile for stacking */
314static const char AA_PROFILE_NESTING_BASE[] =
315"\n"
316" ### Configuration: nesting\n"
317" pivot_root,\n"
318" ptrace,\n"
319" signal,\n"
320"\n"
321 /* NOTE: See conf.c's "nesting_helpers" for details. */
322" deny /dev/.lxc/proc/** rw,\n"
323" deny /dev/.lxc/sys/** rw,\n"
324"\n"
325" mount fstype=proc -> /usr/lib/*/lxc/**,\n"
326" mount fstype=sysfs -> /usr/lib/*/lxc/**,\n"
327" mount options=(rw,bind),\n"
328" mount options=(rw,rbind),\n"
329" mount options=(rw,make-rshared),\n"
330"\n"
331 /* FIXME: What's the state here on apparmor's side? */
332" # there doesn't seem to be a way to ask for:\n"
333" # mount options=(ro,nosuid,nodev,noexec,remount,bind),\n"
334" # as we always get mount to $cdir/proc/sys with those flags denied\n"
335" # So allow all mounts until that is straightened out:\n"
336" mount,\n"
337;
338
339static const char AA_PROFILE_UNPRIVILEGED[] =
340"\n"
341" ### Configuration: unprivileged container\n"
342" pivot_root,\n"
343"\n"
344" # Allow modifying mount propagation\n"
345" mount options=(rw,make-slave) -> **,\n"
346" mount options=(rw,make-rslave) -> **,\n"
347" mount options=(rw,make-shared) -> **,\n"
348" mount options=(rw,make-rshared) -> **,\n"
349" mount options=(rw,make-private) -> **,\n"
350" mount options=(rw,make-rprivate) -> **,\n"
351" mount options=(rw,make-unbindable) -> **,\n"
352" mount options=(rw,make-runbindable) -> **,\n"
353"\n"
354" # Allow all bind-mounts\n"
355" mount options=(rw,bind),\n"
356" mount options=(rw,rbind),\n"
357"\n"
358" # Allow remounting things read-only\n"
359" mount options=(ro,remount),\n"
360;
e075f5d9 361
7196c7b3
SH
362static bool check_mount_feature_enabled(void)
363{
364 return mount_features_enabled == 1;
365}
366
367static void load_mount_features_enabled(void)
7aff4f43
SH
368{
369 struct stat statbuf;
370 int ret;
85108024 371
7aff4f43 372 ret = stat(AA_MOUNT_RESTR, &statbuf);
7196c7b3
SH
373 if (ret == 0)
374 mount_features_enabled = 1;
7aff4f43
SH
375}
376
fe4de9a6
DE
377/* aa_getcon is not working right now. Use our hand-rolled version below */
378static int apparmor_enabled(void)
379{
fe4de9a6
DE
380 FILE *fin;
381 char e;
382 int ret;
383
7e556d18 384 fin = fopen_cloexec(AA_ENABLED_FILE, "r");
fe4de9a6
DE
385 if (!fin)
386 return 0;
387 ret = fscanf(fin, "%c", &e);
fe4de9a6 388 fclose(fin);
7196c7b3
SH
389 if (ret == 1 && e == 'Y') {
390 load_mount_features_enabled();
fe4de9a6 391 return 1;
7196c7b3
SH
392 }
393
fe4de9a6
DE
394 return 0;
395}
9958532b 396
fe4de9a6 397static char *apparmor_process_label_get(pid_t pid)
e075f5d9 398{
9958532b 399 char path[100], *space;
e075f5d9 400 int ret;
d380c7ff 401 char *buf = NULL, *newbuf;
9958532b
SH
402 int sz = 0;
403 FILE *f;
404
405 ret = snprintf(path, 100, "/proc/%d/attr/current", pid);
406 if (ret < 0 || ret >= 100) {
407 ERROR("path name too long");
408 return NULL;
409 }
410again:
7e556d18 411 f = fopen_cloexec(path, "r");
9958532b 412 if (!f) {
959aee9c 413 SYSERROR("opening %s", path);
f10fad2f 414 free(buf);
9958532b
SH
415 return NULL;
416 }
417 sz += 1024;
d380c7ff
ÇO
418 newbuf = realloc(buf, sz);
419 if (!newbuf) {
420 free(buf);
9958532b
SH
421 ERROR("out of memory");
422 fclose(f);
423 return NULL;
424 }
d380c7ff 425 buf = newbuf;
fe4de9a6 426 memset(buf, 0, sz);
626ad11b 427 ret = fread(buf, 1, sz - 1, f);
e075f5d9 428 fclose(f);
9958532b 429 if (ret < 0) {
959aee9c 430 ERROR("reading %s", path);
9958532b
SH
431 free(buf);
432 return NULL;
433 }
fe4de9a6
DE
434 if (ret >= sz)
435 goto again;
46cd2845 436 space = strchr(buf, '\n');
c3cb8580
SH
437 if (space)
438 *space = '\0';
46cd2845 439 space = strchr(buf, ' ');
9958532b
SH
440 if (space)
441 *space = '\0';
442 return buf;
443}
444
9bfdc0ad
SH
445/*
446 * Probably makes sense to reorganize these to only read
447 * the label once
448 */
449static bool apparmor_am_unconfined(void)
9958532b 450{
0059379f 451 char *p = apparmor_process_label_get(lxc_raw_getpid());
9bfdc0ad 452 bool ret = false;
9958532b 453 if (!p || strcmp(p, "unconfined") == 0)
9bfdc0ad
SH
454 ret = true;
455 free(p);
456 return ret;
457}
458
374625aa
SH
459static bool aa_needs_transition(char *curlabel)
460{
461 if (!curlabel)
462 return false;
463 if (strcmp(curlabel, "unconfined") == 0)
464 return false;
465 if (strcmp(curlabel, "/usr/bin/lxc-start") == 0)
466 return false;
467 return true;
e075f5d9
SH
468}
469
1800f924
WB
470static inline void uint64hex(char *buf, uint64_t num)
471{
472 size_t i;
473
474 buf[16] = 0;
475 for (i = 16; i--;) {
476 char c = (char)(num & 0xf);
477 buf[i] = c + (c < 0xa ? '0' : 'a' - 0xa);
478 num >>= 4;
479 }
480}
481
482static inline char *shorten_apparmor_name(char *name)
483{
484 size_t len = strlen(name);
485 if (len + 7 > 253) {
486 uint64_t hash;
487 hash = fnv_64a_buf(name, len, FNV1A_64_INIT);
488 name = must_realloc(name, 16 + 1);
489 uint64hex(name, hash);
490 }
491
492 return name;
493}
494
495/* Replace slashes with hyphens */
496static inline void sanitize_path(char *path)
497{
498 size_t i;
499
500 for (i = 0; path[i]; i++)
501 if (path[i] == '/')
502 path[i] = '-';
503}
504
505static inline char *apparmor_dir(const char *ctname, const char *lxcpath)
506{
507 return must_make_path(lxcpath, ctname, "apparmor", NULL);
508}
509
510
511static inline char *apparmor_profile_full(const char *ctname, const char *lxcpath)
512{
fe70edee 513 return shorten_apparmor_name(must_concat(NULL, "lxc-", ctname, "_<", lxcpath, ">", NULL));
1800f924
WB
514}
515
516/* Like apparmor_profile_full() but with slashes replaced by hyphens */
517static inline char *apparmor_namespace(const char *ctname, const char *lxcpath)
518{
519 char *full;
520
521 full = apparmor_profile_full(ctname, lxcpath);
522 sanitize_path(full);
523
524 return full;
525}
526
527/* FIXME: This is currently run only in the context of a constructor (via the
528 * initial lsm_init() called due to its __attribute__((constructor)), so we
529 * do not have ERROR/... macros available, so there are some fprintf(stderr)s
530 * in there.
531 */
532static bool check_apparmor_parser_version()
533{
534 struct lxc_popen_FILE *parserpipe;
535 int rc;
536 int major = 0, minor = 0, micro = 0;
537
538 parserpipe = lxc_popen("apparmor_parser --version");
539 if (!parserpipe) {
540 fprintf(stderr, "Failed to run check for apparmor_parser\n");
541 return false;
542 }
543
544 rc = fscanf(parserpipe->f, "AppArmor parser version %d.%d.%d", &major, &minor, &micro);
545 if (rc < 1) {
546 lxc_pclose(parserpipe);
547 /* We stay silent for now as this most likely means the shell
548 * lxc_popen executed failed to find the apparmor_parser binary.
549 * See the FIXME comment above for details.
550 */
551 return false;
552 }
553
554 rc = lxc_pclose(parserpipe);
555 if (rc < 0) {
556 fprintf(stderr, "Error waiting for child process\n");
557 return false;
558 }
559 if (rc != 0) {
560 fprintf(stderr, "'apparmor_parser --version' executed with an error status\n");
561 return false;
562 }
563
564 aa_supports_unix = (major > 2) ||
565 (major == 2 && minor > 10) ||
566 (major == 2 && minor == 10 && micro >= 95);
567
568 return true;
569}
570
571static bool file_is_yes(const char *path)
572{
573 ssize_t rd;
574 int fd;
575 char buf[8]; /* we actually just expect "yes" or "no" */
576
577 fd = open(path, O_RDONLY | O_CLOEXEC);
578 if (fd < 0)
579 return false;
580
1fabf7d4 581 rd = lxc_read_nointr(fd, buf, sizeof(buf));
1800f924
WB
582 close(fd);
583
584 return rd >= 4 && strncmp(buf, "yes\n", 4) == 0;
585}
586
587static bool apparmor_can_stack()
588{
589 int major, minor, scanned;
590 FILE *f;
591
592 if (!file_is_yes("/sys/kernel/security/apparmor/features/domain/stack"))
593 return false;
594
595 f = fopen_cloexec("/sys/kernel/security/apparmor/features/domain/version", "r");
596 if (!f)
597 return false;
598
599 scanned = fscanf(f, "%d.%d", &major, &minor);
600 fclose(f);
601 if (scanned != 2)
602 return false;
603
604 return major > 1 || (major == 1 && minor >= 2);
605}
606
607static void must_append_sized_full(char **buf, size_t *bufsz, const char *data,
608 size_t size, bool append_newline)
609{
610 size_t newsize = *bufsz + size;
611
612 if (append_newline)
613 ++newsize;
614
615 *buf = must_realloc(*buf, newsize);
616 memcpy(*buf + *bufsz, data, size);
617
618 if (append_newline)
619 (*buf)[newsize - 1] = '\n';
620
621 *bufsz = newsize;
622}
623
624static void must_append_sized(char **buf, size_t *bufsz, const char *data, size_t size)
625{
626 return must_append_sized_full(buf, bufsz, data, size, false);
627}
628
629static bool is_privileged(struct lxc_conf *conf)
630{
631 return lxc_list_empty(&conf->id_map);
632}
633
634static char *get_apparmor_profile_content(struct lxc_conf *conf, const char *lxcpath)
635{
636 char *profile, *profile_name_full;
637 size_t size;
638 struct lxc_list *it;
639
640 profile_name_full = apparmor_profile_full(conf->name, lxcpath);
641
fe70edee 642 profile = must_concat(NULL,
1800f924
WB
643"#include <tunables/global>\n"
644"profile \"", profile_name_full, "\" flags=(attach_disconnected,mediate_deleted) {\n",
645 NULL);
646 size = strlen(profile);
647
648 must_append_sized(&profile, &size, AA_PROFILE_BASE,
6333c915 649 STRARRAYLEN(AA_PROFILE_BASE));
1800f924
WB
650
651 if (aa_supports_unix)
652 must_append_sized(&profile, &size, AA_PROFILE_UNIX_SOCKETS,
6333c915 653 STRARRAYLEN(AA_PROFILE_UNIX_SOCKETS));
1800f924
WB
654
655 if (file_exists("/proc/self/ns/cgroup"))
656 must_append_sized(&profile, &size, AA_PROFILE_CGROUP_NAMESPACES,
6333c915 657 STRARRAYLEN(AA_PROFILE_CGROUP_NAMESPACES));
1800f924
WB
658
659 if (aa_can_stack && !aa_is_stacked) {
660 char *namespace, *temp;
661
662 must_append_sized(&profile, &size, AA_PROFILE_STACKING_BASE,
6333c915 663 STRARRAYLEN(AA_PROFILE_STACKING_BASE));
1800f924
WB
664
665 namespace = apparmor_namespace(conf->name, lxcpath);
fe70edee 666 temp = must_concat(NULL, " change_profile -> \":", namespace, ":*\",\n"
1800f924
WB
667 " change_profile -> \":", namespace, "://*\",\n",
668 NULL);
669 free(namespace);
670
671 must_append_sized(&profile, &size, temp, strlen(temp));
672 free(temp);
673 } else {
674 must_append_sized(&profile, &size, AA_PROFILE_NO_STACKING,
6333c915 675 STRARRAYLEN(AA_PROFILE_NO_STACKING));
1800f924
WB
676 }
677
678 if (conf->lsm_aa_allow_nesting) {
679 must_append_sized(&profile, &size, AA_PROFILE_NESTING_BASE,
6333c915 680 STRARRAYLEN(AA_PROFILE_NESTING_BASE));
1800f924
WB
681
682 if (!aa_can_stack || aa_is_stacked) {
683 char *temp;
684
fe70edee 685 temp = must_concat(NULL, " change_profile -> \"",
1800f924
WB
686 profile_name_full, "\",\n", NULL);
687 must_append_sized(&profile, &size, temp, strlen(temp));
688 free(temp);
689 }
690 }
691
692 if (!is_privileged(conf) || am_host_unpriv())
693 must_append_sized(&profile, &size, AA_PROFILE_UNPRIVILEGED,
6333c915 694 STRARRAYLEN(AA_PROFILE_UNPRIVILEGED));
1800f924
WB
695
696 lxc_list_for_each(it, &conf->lsm_aa_raw) {
697 const char *line = it->elem;
698
699 must_append_sized_full(&profile, &size, line, strlen(line), true);
700 }
701
702 /* include terminating \0 byte */
703 must_append_sized(&profile, &size, "}\n", 3);
704
705 free(profile_name_full);
706
707 return profile;
708}
709
fe4de9a6 710/*
1800f924
WB
711 * apparmor_parser creates a cache file using the parsed file's name as a name.
712 * This means there may be multiple containers with the same name but different
713 * lxcpaths. Therefore we need a sanitized version of the complete profile name
714 * as profile file-name.
715 * We already get this exactly from apparmor_namespace().
fe4de9a6 716 */
1800f924 717static char *make_apparmor_profile_path(const char *ctname, const char *lxcpath)
e075f5d9 718{
1800f924 719 char *ret, *filename;
7aff4f43 720
1800f924
WB
721 filename = apparmor_namespace(ctname, lxcpath);
722 ret = must_make_path(lxcpath, ctname, "apparmor", filename, NULL);
723 free(filename);
724
725 return ret;
726}
727
728static char *make_apparmor_namespace_path(const char *ctname, const char *lxcpath)
729{
730 char *ret, *namespace;
731
732 namespace = apparmor_namespace(ctname, lxcpath);
733 ret = must_make_path("/sys/kernel/security/apparmor/policy/namespaces", namespace, NULL);
734 free(namespace);
735
736 return ret;
737}
738
739static bool make_apparmor_namespace(struct lxc_conf *conf, const char *lxcpath)
740{
741 char *path;
742
743 if (!aa_can_stack || aa_is_stacked)
744 return true;
745
746 path = make_apparmor_namespace_path(conf->name, lxcpath);
747 errno = 0;
748 if (mkdir(path, 0755) < 0 && errno != EEXIST) {
749 SYSERROR("Error creating AppArmor namespace: %s", path);
750 free(path);
751 return false;
752 }
753 free(path);
754
755 return true;
756}
757
758static void remove_apparmor_namespace(struct lxc_conf *conf, const char *lxcpath)
759{
760 char *path;
761
762 path = make_apparmor_namespace_path(conf->name, lxcpath);
763 if (rmdir(path) != 0)
764 SYSERROR("Error removing AppArmor namespace");
765 free(path);
766}
767
768struct apparmor_parser_args {
769 char cmd;
770 char *file;
771};
772
773static int apparmor_parser_exec(void *data)
774{
775 struct apparmor_parser_args *args = data;
776 char cmdbuf[] = { '-', args->cmd, 'W', 'L', 0 };
777
778 execlp("apparmor_parser", "apparmor_parser", cmdbuf, APPARMOR_CACHE_DIR, args->file, NULL);
779
780 return -1;
781}
782
783static int run_apparmor_parser(char command,
784 struct lxc_conf *conf,
785 const char *lxcpath)
786{
85d67fba 787 char output[PATH_MAX];
1800f924
WB
788 int ret;
789 struct apparmor_parser_args args = {
790 .cmd = command,
791 .file = make_apparmor_profile_path(conf->name, lxcpath),
792 };
793
794 ret = run_command(output, sizeof(output), apparmor_parser_exec, (void*)&args);
795 if (ret < 0) {
796 ERROR("Failed to run apparmor_parser on \"%s\": %s", args.file, output);
797 ret = -1;
798 }
799
800
801 free(args.file);
802 return ret;
803}
804
805static void remove_apparmor_profile(struct lxc_conf *conf, const char *lxcpath)
806{
807 char *path;
808
809 /* It's ok if these deletes fail: if the container was never started,
810 * we'll have never written a profile or cached it.
811 */
812
813 path = make_apparmor_profile_path(conf->name, lxcpath);
814 (void)unlink(path);
815 free(path);
816
817 /* Also remove the apparmor/ subdirectory */
818 path = apparmor_dir(conf->name, lxcpath);
819 (void)rmdir(path);
820 free(path);
821}
822
823static int load_apparmor_profile(struct lxc_conf *conf, const char *lxcpath)
824{
825 struct stat profile_sb;
826 size_t content_len;
827 int ret = -1;
828 size_t old_len = 0;
829 char *profile_path = NULL, *old_content = NULL, *new_content = NULL;
830 int profile_fd = -1;
831
832 if (!make_apparmor_namespace(conf, lxcpath))
833 return -1;
834
835 /* In order to avoid forcing a profile parse (potentially slow) on
836 * every container start, let's use apparmor's binary policy cache,
837 * which checks mtime of the files to figure out if the policy needs to
838 * be regenerated.
839 *
840 * Since it uses mtimes, we shouldn't just always write out our local
841 * apparmor template; instead we should check to see whether the
842 * template is the same as ours. If it isn't we should write our
843 * version out so that the new changes are reflected and we definitely
844 * force a recompile.
845 */
846
847 profile_path = make_apparmor_profile_path(conf->name, lxcpath);
848 profile_fd = open(profile_path, O_RDONLY | O_CLOEXEC);
849 if (profile_fd >= 0) {
850 if (fstat(profile_fd, &profile_sb) < 0) {
851 SYSERROR("Error accessing old profile from %s",
852 profile_path);
853 goto out;
854 }
855 old_len = profile_sb.st_size;
856 old_content = lxc_strmmap(NULL, old_len, PROT_READ,
857 MAP_PRIVATE, profile_fd, 0);
858 if (!old_content) {
859 SYSERROR("Failed to mmap old profile from %s",
860 profile_path);
861 goto out;
862 }
863 } else if (errno != ENOENT) {
864 SYSERROR("Error reading old profile from %s", profile_path);
865 goto out;
866 }
867
868 new_content = get_apparmor_profile_content(conf, lxcpath);
869 if (!new_content)
870 goto out;
871
872 content_len = strlen(new_content);
873
874 if (!old_content || old_len != content_len || memcmp(old_content, new_content, content_len) != 0) {
875 char *path;
876
877 ret = mkdir_p(APPARMOR_CACHE_DIR, 0755);
878 if (ret < 0) {
879 SYSERROR("Error creating AppArmor profile cache directory " APPARMOR_CACHE_DIR);
880 goto out;
881 }
882
883 path = apparmor_dir(conf->name, lxcpath);
884 ret = mkdir_p(path, 0755);
885 if (ret < 0) {
886 SYSERROR("Error creating AppArmor profile directory: %s", path);
887 free(path);
888 goto out;
889 }
890 free(path);
891
892 ret = lxc_write_to_file(profile_path, new_content, content_len, false, 0600);
893 if (ret < 0) {
894 SYSERROR("Error writing profile to %s", profile_path);
895 goto out;
896 }
897 }
898
899 ret = run_apparmor_parser(AA_CMD_LOAD, conf, lxcpath);
900 if (ret != 0)
901 goto out_remove_profile;
902
903 conf->lsm_aa_profile_created = true;
904
905 goto out_ok;
906
907out_remove_profile:
908 remove_apparmor_profile(conf, lxcpath);
909out:
910 remove_apparmor_namespace(conf, lxcpath);
911out_ok:
912 if (profile_fd >= 0) {
913 if (old_content)
914 lxc_strmunmap(old_content, old_len);
915 close(profile_fd);
916 }
917 free(profile_path);
918 free(new_content);
919 return ret;
920}
921
922/*
923 * Ensure that the container's policy namespace is unloaded to free kernel
924 * memory. This does not delete the policy from disk or cache.
925 */
926static void apparmor_cleanup(struct lxc_conf *conf, const char *lxcpath)
927{
928 if (!aa_admin)
929 return;
930
931 if (!conf->lsm_aa_profile_created)
932 return;
933
934 remove_apparmor_namespace(conf, lxcpath);
935 (void)run_apparmor_parser(AA_CMD_UNLOAD, conf, lxcpath);
936
937 remove_apparmor_profile(conf, lxcpath);
938}
939
940static int apparmor_prepare(struct lxc_conf *conf, const char *lxcpath)
941{
942 int ret = -1;
943 const char *label;
944 char *curlabel = NULL, *genlabel = NULL;
945
946 if (!aa_enabled) {
947 ERROR("AppArmor not enabled");
948 return -1;
949 }
950
951 label = conf->lsm_aa_profile;
e075f5d9 952
480c876b
SH
953 /* user may request that we just ignore apparmor */
954 if (label && strcmp(label, AA_UNCHANGED) == 0) {
1800f924
WB
955 INFO("AppArmor profile unchanged per user request");
956 conf->lsm_aa_profile_computed = must_copy_string(label);
480c876b
SH
957 return 0;
958 }
959
1800f924
WB
960 if (label && strcmp(label, AA_GENERATED) == 0) {
961 if (!aa_parser_available) {
962 ERROR("Cannot use generated profile: apparmor_parser not available");
963 goto out;
964 }
965
966 /* auto-generate profile based on available/requested security features */
967 if (load_apparmor_profile(conf, lxcpath) != 0) {
968 ERROR("Failed to load generated AppArmor profile");
969 goto out;
970 }
971
972 genlabel = apparmor_profile_full(conf->name, lxcpath);
973 if (!genlabel) {
974 ERROR("Failed to build AppArmor profile name");
975 goto out;
976 }
977
978 if (aa_can_stack && !aa_is_stacked) {
979 char *namespace = apparmor_namespace(conf->name, lxcpath);
980 size_t llen = strlen(genlabel);
6333c915 981 must_append_sized(&genlabel, &llen, "//&:", STRARRAYLEN("//&:"));
1800f924 982 must_append_sized(&genlabel, &llen, namespace, strlen(namespace));
6333c915 983 must_append_sized(&genlabel, &llen, ":", STRARRAYLEN(":") + 1); /* with the nul byte */
1800f924
WB
984 free(namespace);
985 }
986
987 label = genlabel;
988 }
989
0059379f 990 curlabel = apparmor_process_label_get(lxc_raw_getpid());
374625aa 991
1800f924 992 if (!aa_can_stack && aa_needs_transition(curlabel)) {
1a0e70ac 993 /* we're already confined, and stacking isn't supported */
374625aa
SH
994
995 if (!label || strcmp(curlabel, label) == 0) {
1a0e70ac 996 /* no change requested */
1800f924
WB
997 ret = 0;
998 goto out;
9bfdc0ad 999 }
374625aa 1000
1800f924
WB
1001 ERROR("Already AppArmor confined, but new label requested.");
1002 goto out;
9bfdc0ad
SH
1003 }
1004
fe4de9a6 1005 if (!label) {
1800f924
WB
1006 if (cgns_supported())
1007 label = AA_DEF_PROFILE_CGNS;
fe4de9a6 1008 else
1800f924 1009 label = AA_DEF_PROFILE;
7aff4f43
SH
1010 }
1011
7196c7b3 1012 if (!check_mount_feature_enabled() && strcmp(label, "unconfined") != 0) {
7aff4f43
SH
1013 WARN("Incomplete AppArmor support in your kernel");
1014 if (!conf->lsm_aa_allow_incomplete) {
1015 ERROR("If you really want to start this container, set");
69e38e00 1016 ERROR("lxc.apparmor.allow_incomplete = 1");
7aff4f43 1017 ERROR("in your container configuration file");
1800f924 1018 goto out;
7aff4f43 1019 }
e075f5d9 1020 }
e075f5d9 1021
1800f924
WB
1022 conf->lsm_aa_profile_computed = must_copy_string(label);
1023 ret = 0;
1024
1025out:
1026 if (genlabel) {
1027 free(genlabel);
1028 if (ret != 0)
1029 apparmor_cleanup(conf, lxcpath);
1030 }
1031 free(curlabel);
1032 return ret;
1033}
1034
1035/*
1036 * apparmor_process_label_set: Set AppArmor process profile
1037 *
1038 * @label : the profile to set
1039 * @conf : the container configuration to use if @label is NULL
1040 * @default : use the default profile if @label is NULL
1041 * @on_exec : this is ignored. Apparmor profile will be changed immediately
1042 *
1043 * Returns 0 on success, < 0 on failure
1044 *
1045 * Notes: This relies on /proc being available.
1046 */
1047static int apparmor_process_label_set(const char *inlabel, struct lxc_conf *conf,
1048 bool on_exec)
1049{
1050 int label_fd, ret;
1051 pid_t tid;
1052 const char *label;
1053
1054 if (!aa_enabled) {
1055 ERROR("AppArmor not enabled");
1056 return -1;
1057 }
1058
1059 label = inlabel ? inlabel : conf->lsm_aa_profile_computed;
1060 if (!label) {
1061 ERROR("LSM wasn't prepared");
1062 return -1;
1063 }
1064
1065 /* user may request that we just ignore apparmor */
1066 if (strcmp(label, AA_UNCHANGED) == 0) {
1067 INFO("AppArmor profile unchanged per user request");
1068 return 0;
1069 }
7aff4f43 1070
fe4de9a6 1071 if (strcmp(label, "unconfined") == 0 && apparmor_am_unconfined()) {
1800f924 1072 INFO("AppArmor profile unchanged");
e075f5d9
SH
1073 return 0;
1074 }
5288a74f
CB
1075 tid = lxc_raw_gettid();
1076 label_fd = lsm_process_label_fd_get(tid, on_exec);
1077 if (label_fd < 0) {
1800f924 1078 SYSERROR("Failed to change AppArmor profile to %s", label);
b2fe91c7 1079 return -1;
e075f5d9 1080 }
e075f5d9 1081
5288a74f
CB
1082 ret = lsm_process_label_set_at(label_fd, label, on_exec);
1083 close(label_fd);
1084 if (ret < 0) {
1800f924 1085 ERROR("Failed to change AppArmor profile to %s", label);
5288a74f
CB
1086 return -1;
1087 }
1088
1800f924 1089 INFO("Changed AppArmor profile to %s", label);
e075f5d9
SH
1090 return 0;
1091}
1092
fe4de9a6
DE
1093static struct lsm_drv apparmor_drv = {
1094 .name = "AppArmor",
9e4bf8b1 1095 .enabled = apparmor_enabled,
fe4de9a6
DE
1096 .process_label_get = apparmor_process_label_get,
1097 .process_label_set = apparmor_process_label_set,
1800f924
WB
1098 .prepare = apparmor_prepare,
1099 .cleanup = apparmor_cleanup,
fe4de9a6 1100};
9958532b 1101
fe4de9a6 1102struct lsm_drv *lsm_apparmor_drv_init(void)
e075f5d9 1103{
1800f924
WB
1104 bool have_mac_admin = false;
1105
fe4de9a6
DE
1106 if (!apparmor_enabled())
1107 return NULL;
1800f924
WB
1108
1109 /* We only support generated profiles when apparmor_parser is usable */
1110 if (!check_apparmor_parser_version())
1111 goto out;
1112
1113 aa_parser_available = true;
1114
1115 aa_can_stack = apparmor_can_stack();
1116 if (aa_can_stack)
1117 aa_is_stacked = file_is_yes("/sys/kernel/security/apparmor/.ns_stacked");
1118
1119 #if HAVE_LIBCAP
1120 have_mac_admin = lxc_proc_cap_is_set(CAP_SETGID, CAP_EFFECTIVE);
1121 #endif
1122
1123 if (!have_mac_admin)
1124 WARN("Per-container AppArmor profiles are disabled because the mac_admin capability is missing");
1125 else if (am_host_unpriv() && !aa_is_stacked)
1126 WARN("Per-container AppArmor profiles are disabled because LXC is running in an unprivileged container without stacking");
1127 else
1128 aa_admin = true;
1129
1130out:
7e6966e5 1131 aa_enabled = 1;
fe4de9a6 1132 return &apparmor_drv;
e075f5d9 1133}