]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/lsm/apparmor.c
Merge pull request #2676 from 2xsec/bugfix
[mirror_lxc.git] / src / lxc / lsm / apparmor.c
1 /* apparmor
2 *
3 * Copyright © 2012 Serge Hallyn <serge.hallyn@ubuntu.com>.
4 * Copyright © 2012 Canonical Ltd.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #ifndef _GNU_SOURCE
22 #define _GNU_SOURCE 1
23 #endif
24 #include <errno.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <sys/mount.h>
28 #include <sys/stat.h>
29 #include <sys/types.h>
30 #include <sys/vfs.h>
31 #include <unistd.h>
32
33 #include "caps.h"
34 #include "conf.h"
35 #include "config.h"
36 #include "initutils.h"
37 #include "log.h"
38 #include "lsm.h"
39 #include "parse.h"
40 #include "raw_syscalls.h"
41 #include "utils.h"
42
43 lxc_log_define(apparmor, lsm);
44
45 /* set by lsm_apparmor_drv_init if true */
46 static int aa_enabled = 0;
47 static bool aa_parser_available = false;
48 static bool aa_supports_unix = false;
49 static bool aa_can_stack = false;
50 static bool aa_is_stacked = false;
51 static bool aa_admin = false;
52
53 static int mount_features_enabled = 0;
54
55 #define AA_DEF_PROFILE "lxc-container-default"
56 #define AA_DEF_PROFILE_CGNS "lxc-container-default-cgns"
57 #define AA_MOUNT_RESTR "/sys/kernel/security/apparmor/features/mount/mask"
58 #define AA_ENABLED_FILE "/sys/module/apparmor/parameters/enabled"
59 #define AA_UNCHANGED "unchanged"
60 #define AA_GENERATED "generated"
61
62 #define AA_CMD_LOAD 'r'
63 #define AA_CMD_UNLOAD 'R'
64 #define AA_CMD_PARSE 'Q'
65
66 static const char AA_PROFILE_BASE[] =
67 " ### Base profile\n"
68 " capability,\n"
69 " dbus,\n"
70 " file,\n"
71 " network,\n"
72 " umount,\n"
73 "\n"
74 " # Allow us to receive signals from anywhere.\n"
75 " signal (receive),\n"
76 "\n"
77 " # Allow us to send signals to ourselves\n"
78 " signal peer=@{profile_name},\n"
79 "\n"
80 " # Allow other processes to read our /proc entries, futexes, perf tracing and\n"
81 " # kcmp for now (they will need 'read' in the first place). Administrators can\n"
82 " # override with:\n"
83 " # deny ptrace (readby) ...\n"
84 " ptrace (readby),\n"
85 "\n"
86 " # Allow other processes to trace us by default (they will need 'trace' in\n"
87 " # the first place). Administrators can override with:\n"
88 " # deny ptrace (tracedby) ...\n"
89 " ptrace (tracedby),\n"
90 "\n"
91 " # Allow us to ptrace ourselves\n"
92 " ptrace peer=@{profile_name},\n"
93 "\n"
94 " # ignore DENIED message on / remount\n"
95 " deny mount options=(ro, remount) -> /,\n"
96 " deny mount options=(ro, remount, silent) -> /,\n"
97 "\n"
98 " # allow tmpfs mounts everywhere\n"
99 " mount fstype=tmpfs,\n"
100 "\n"
101 " # allow hugetlbfs mounts everywhere\n"
102 " mount fstype=hugetlbfs,\n"
103 "\n"
104 " # allow mqueue mounts everywhere\n"
105 " mount fstype=mqueue,\n"
106 "\n"
107 " # allow fuse mounts everywhere\n"
108 " mount fstype=fuse,\n"
109 " mount fstype=fuse.*,\n"
110 "\n"
111 " # deny access under /proc/bus to avoid e.g. messing with pci devices directly\n"
112 " deny @{PROC}/bus/** wklx,\n"
113 "\n"
114 " # deny writes in /proc/sys/fs but allow binfmt_misc to be mounted\n"
115 " mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/,\n"
116 " deny @{PROC}/sys/fs/** wklx,\n"
117 "\n"
118 " # allow efivars to be mounted, writing to it will be blocked though\n"
119 " mount fstype=efivarfs -> /sys/firmware/efi/efivars/,\n"
120 "\n"
121 " # block some other dangerous paths\n"
122 " deny @{PROC}/kcore rwklx,\n"
123 " deny @{PROC}/sysrq-trigger rwklx,\n"
124 "\n"
125 " # deny writes in /sys except for /sys/fs/cgroup, also allow\n"
126 " # fusectl, securityfs and debugfs to be mounted there (read-only)\n"
127 " mount fstype=fusectl -> /sys/fs/fuse/connections/,\n"
128 " mount fstype=securityfs -> /sys/kernel/security/,\n"
129 " mount fstype=debugfs -> /sys/kernel/debug/,\n"
130 " deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,\n"
131 " mount fstype=proc -> /proc/,\n"
132 " mount fstype=sysfs -> /sys/,\n"
133 " mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,\n"
134 " deny /sys/firmware/efi/efivars/** rwklx,\n"
135 " # note, /sys/kernel/security/** handled below\n"
136 " mount options=(ro, nosuid, nodev, noexec, remount, strictatime) -> /sys/fs/cgroup/,\n"
137 "\n"
138 " # deny reads from debugfs\n"
139 " deny /sys/kernel/debug/{,**} rwklx,\n"
140 "\n"
141 " # allow paths to be made slave, shared, private or unbindable\n"
142 " # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.\n"
143 "# mount options=(rw,make-slave) -> **,\n"
144 "# mount options=(rw,make-rslave) -> **,\n"
145 "# mount options=(rw,make-shared) -> **,\n"
146 "# mount options=(rw,make-rshared) -> **,\n"
147 "# mount options=(rw,make-private) -> **,\n"
148 "# mount options=(rw,make-rprivate) -> **,\n"
149 "# mount options=(rw,make-unbindable) -> **,\n"
150 "# mount options=(rw,make-runbindable) -> **,\n"
151 "\n"
152 " # allow bind-mounts of anything except /proc, /sys and /dev\n"
153 " mount options=(rw,bind) /[^spd]*{,/**},\n"
154 " mount options=(rw,bind) /d[^e]*{,/**},\n"
155 " mount options=(rw,bind) /de[^v]*{,/**},\n"
156 " mount options=(rw,bind) /dev/.[^l]*{,/**},\n"
157 " mount options=(rw,bind) /dev/.l[^x]*{,/**},\n"
158 " mount options=(rw,bind) /dev/.lx[^c]*{,/**},\n"
159 " mount options=(rw,bind) /dev/.lxc?*{,/**},\n"
160 " mount options=(rw,bind) /dev/[^.]*{,/**},\n"
161 " mount options=(rw,bind) /dev?*{,/**},\n"
162 " mount options=(rw,bind) /p[^r]*{,/**},\n"
163 " mount options=(rw,bind) /pr[^o]*{,/**},\n"
164 " mount options=(rw,bind) /pro[^c]*{,/**},\n"
165 " mount options=(rw,bind) /proc?*{,/**},\n"
166 " mount options=(rw,bind) /s[^y]*{,/**},\n"
167 " mount options=(rw,bind) /sy[^s]*{,/**},\n"
168 " mount options=(rw,bind) /sys?*{,/**},\n"
169 "\n"
170 " # allow read-only bind-mounts of anything except /proc, /sys and /dev\n"
171 " mount options=(ro,remount,bind) -> /[^spd]*{,/**},\n"
172 " mount options=(ro,remount,bind) -> /d[^e]*{,/**},\n"
173 " mount options=(ro,remount,bind) -> /de[^v]*{,/**},\n"
174 " mount options=(ro,remount,bind) -> /dev/.[^l]*{,/**},\n"
175 " mount options=(ro,remount,bind) -> /dev/.l[^x]*{,/**},\n"
176 " mount options=(ro,remount,bind) -> /dev/.lx[^c]*{,/**},\n"
177 " mount options=(ro,remount,bind) -> /dev/.lxc?*{,/**},\n"
178 " mount options=(ro,remount,bind) -> /dev/[^.]*{,/**},\n"
179 " mount options=(ro,remount,bind) -> /dev?*{,/**},\n"
180 " mount options=(ro,remount,bind) -> /p[^r]*{,/**},\n"
181 " mount options=(ro,remount,bind) -> /pr[^o]*{,/**},\n"
182 " mount options=(ro,remount,bind) -> /pro[^c]*{,/**},\n"
183 " mount options=(ro,remount,bind) -> /proc?*{,/**},\n"
184 " mount options=(ro,remount,bind) -> /s[^y]*{,/**},\n"
185 " mount options=(ro,remount,bind) -> /sy[^s]*{,/**},\n"
186 " mount options=(ro,remount,bind) -> /sys?*{,/**},\n"
187 "\n"
188 " # allow moving mounts except for /proc, /sys and /dev\n"
189 " mount options=(rw,move) /[^spd]*{,/**},\n"
190 " mount options=(rw,move) /d[^e]*{,/**},\n"
191 " mount options=(rw,move) /de[^v]*{,/**},\n"
192 " mount options=(rw,move) /dev/.[^l]*{,/**},\n"
193 " mount options=(rw,move) /dev/.l[^x]*{,/**},\n"
194 " mount options=(rw,move) /dev/.lx[^c]*{,/**},\n"
195 " mount options=(rw,move) /dev/.lxc?*{,/**},\n"
196 " mount options=(rw,move) /dev/[^.]*{,/**},\n"
197 " mount options=(rw,move) /dev?*{,/**},\n"
198 " mount options=(rw,move) /p[^r]*{,/**},\n"
199 " mount options=(rw,move) /pr[^o]*{,/**},\n"
200 " mount options=(rw,move) /pro[^c]*{,/**},\n"
201 " mount options=(rw,move) /proc?*{,/**},\n"
202 " mount options=(rw,move) /s[^y]*{,/**},\n"
203 " mount options=(rw,move) /sy[^s]*{,/**},\n"
204 " mount options=(rw,move) /sys?*{,/**},\n"
205 "\n"
206 " # generated by: lxc-generate-aa-rules.py container-rules.base\n"
207 " deny /proc/sys/[^kn]*{,/**} wklx,\n"
208 " deny /proc/sys/k[^e]*{,/**} wklx,\n"
209 " deny /proc/sys/ke[^r]*{,/**} wklx,\n"
210 " deny /proc/sys/ker[^n]*{,/**} wklx,\n"
211 " deny /proc/sys/kern[^e]*{,/**} wklx,\n"
212 " deny /proc/sys/kerne[^l]*{,/**} wklx,\n"
213 " deny /proc/sys/kernel/[^smhd]*{,/**} wklx,\n"
214 " deny /proc/sys/kernel/d[^o]*{,/**} wklx,\n"
215 " deny /proc/sys/kernel/do[^m]*{,/**} wklx,\n"
216 " deny /proc/sys/kernel/dom[^a]*{,/**} wklx,\n"
217 " deny /proc/sys/kernel/doma[^i]*{,/**} wklx,\n"
218 " deny /proc/sys/kernel/domai[^n]*{,/**} wklx,\n"
219 " deny /proc/sys/kernel/domain[^n]*{,/**} wklx,\n"
220 " deny /proc/sys/kernel/domainn[^a]*{,/**} wklx,\n"
221 " deny /proc/sys/kernel/domainna[^m]*{,/**} wklx,\n"
222 " deny /proc/sys/kernel/domainnam[^e]*{,/**} wklx,\n"
223 " deny /proc/sys/kernel/domainname?*{,/**} wklx,\n"
224 " deny /proc/sys/kernel/h[^o]*{,/**} wklx,\n"
225 " deny /proc/sys/kernel/ho[^s]*{,/**} wklx,\n"
226 " deny /proc/sys/kernel/hos[^t]*{,/**} wklx,\n"
227 " deny /proc/sys/kernel/host[^n]*{,/**} wklx,\n"
228 " deny /proc/sys/kernel/hostn[^a]*{,/**} wklx,\n"
229 " deny /proc/sys/kernel/hostna[^m]*{,/**} wklx,\n"
230 " deny /proc/sys/kernel/hostnam[^e]*{,/**} wklx,\n"
231 " deny /proc/sys/kernel/hostname?*{,/**} wklx,\n"
232 " deny /proc/sys/kernel/m[^s]*{,/**} wklx,\n"
233 " deny /proc/sys/kernel/ms[^g]*{,/**} wklx,\n"
234 " deny /proc/sys/kernel/msg*/** wklx,\n"
235 " deny /proc/sys/kernel/s[^he]*{,/**} wklx,\n"
236 " deny /proc/sys/kernel/se[^m]*{,/**} wklx,\n"
237 " deny /proc/sys/kernel/sem*/** wklx,\n"
238 " deny /proc/sys/kernel/sh[^m]*{,/**} wklx,\n"
239 " deny /proc/sys/kernel/shm*/** wklx,\n"
240 " deny /proc/sys/kernel?*{,/**} wklx,\n"
241 " deny /proc/sys/n[^e]*{,/**} wklx,\n"
242 " deny /proc/sys/ne[^t]*{,/**} wklx,\n"
243 " deny /proc/sys/net?*{,/**} wklx,\n"
244 " deny /sys/[^fdck]*{,/**} wklx,\n"
245 " deny /sys/c[^l]*{,/**} wklx,\n"
246 " deny /sys/cl[^a]*{,/**} wklx,\n"
247 " deny /sys/cla[^s]*{,/**} wklx,\n"
248 " deny /sys/clas[^s]*{,/**} wklx,\n"
249 " deny /sys/class/[^n]*{,/**} wklx,\n"
250 " deny /sys/class/n[^e]*{,/**} wklx,\n"
251 " deny /sys/class/ne[^t]*{,/**} wklx,\n"
252 " deny /sys/class/net?*{,/**} wklx,\n"
253 " deny /sys/class?*{,/**} wklx,\n"
254 " deny /sys/d[^e]*{,/**} wklx,\n"
255 " deny /sys/de[^v]*{,/**} wklx,\n"
256 " deny /sys/dev[^i]*{,/**} wklx,\n"
257 " deny /sys/devi[^c]*{,/**} wklx,\n"
258 " deny /sys/devic[^e]*{,/**} wklx,\n"
259 " deny /sys/device[^s]*{,/**} wklx,\n"
260 " deny /sys/devices/[^v]*{,/**} wklx,\n"
261 " deny /sys/devices/v[^i]*{,/**} wklx,\n"
262 " deny /sys/devices/vi[^r]*{,/**} wklx,\n"
263 " deny /sys/devices/vir[^t]*{,/**} wklx,\n"
264 " deny /sys/devices/virt[^u]*{,/**} wklx,\n"
265 " deny /sys/devices/virtu[^a]*{,/**} wklx,\n"
266 " deny /sys/devices/virtua[^l]*{,/**} wklx,\n"
267 " deny /sys/devices/virtual/[^n]*{,/**} wklx,\n"
268 " deny /sys/devices/virtual/n[^e]*{,/**} wklx,\n"
269 " deny /sys/devices/virtual/ne[^t]*{,/**} wklx,\n"
270 " deny /sys/devices/virtual/net?*{,/**} wklx,\n"
271 " deny /sys/devices/virtual?*{,/**} wklx,\n"
272 " deny /sys/devices?*{,/**} wklx,\n"
273 " deny /sys/f[^s]*{,/**} wklx,\n"
274 " deny /sys/fs/[^c]*{,/**} wklx,\n"
275 " deny /sys/fs/c[^g]*{,/**} wklx,\n"
276 " deny /sys/fs/cg[^r]*{,/**} wklx,\n"
277 " deny /sys/fs/cgr[^o]*{,/**} wklx,\n"
278 " deny /sys/fs/cgro[^u]*{,/**} wklx,\n"
279 " deny /sys/fs/cgrou[^p]*{,/**} wklx,\n"
280 " deny /sys/fs/cgroup?*{,/**} wklx,\n"
281 " deny /sys/fs?*{,/**} wklx,\n"
282 ;
283
284 static const char AA_PROFILE_UNIX_SOCKETS[] =
285 "\n"
286 " ### Feature: unix\n"
287 " # Allow receive via unix sockets from anywhere\n"
288 " unix (receive),\n"
289 "\n"
290 " # Allow all unix sockets in the container\n"
291 " unix peer=(label=@{profile_name}),\n"
292 ;
293
294 static const char AA_PROFILE_CGROUP_NAMESPACES[] =
295 "\n"
296 " ### Feature: cgroup namespace\n"
297 " mount fstype=cgroup -> /sys/fs/cgroup/**,\n"
298 " mount fstype=cgroup2 -> /sys/fs/cgroup/**,\n"
299 ;
300
301 /* '_BASE' because we still need to append generated change_profile rules */
302 static const char AA_PROFILE_STACKING_BASE[] =
303 "\n"
304 " ### Feature: apparmor stacking\n"
305 " ### Configuration: apparmor profile loading (in namespace)\n"
306 " deny /sys/k[^e]*{,/**} wklx,\n"
307 " deny /sys/ke[^r]*{,/**} wklx,\n"
308 " deny /sys/ker[^n]*{,/**} wklx,\n"
309 " deny /sys/kern[^e]*{,/**} wklx,\n"
310 " deny /sys/kerne[^l]*{,/**} wklx,\n"
311 " deny /sys/kernel/[^s]*{,/**} wklx,\n"
312 " deny /sys/kernel/s[^e]*{,/**} wklx,\n"
313 " deny /sys/kernel/se[^c]*{,/**} wklx,\n"
314 " deny /sys/kernel/sec[^u]*{,/**} wklx,\n"
315 " deny /sys/kernel/secu[^r]*{,/**} wklx,\n"
316 " deny /sys/kernel/secur[^i]*{,/**} wklx,\n"
317 " deny /sys/kernel/securi[^t]*{,/**} wklx,\n"
318 " deny /sys/kernel/securit[^y]*{,/**} wklx,\n"
319 " deny /sys/kernel/security/[^a]*{,/**} wklx,\n"
320 " deny /sys/kernel/security/a[^p]*{,/**} wklx,\n"
321 " deny /sys/kernel/security/ap[^p]*{,/**} wklx,\n"
322 " deny /sys/kernel/security/app[^a]*{,/**} wklx,\n"
323 " deny /sys/kernel/security/appa[^r]*{,/**} wklx,\n"
324 " deny /sys/kernel/security/appar[^m]*{,/**} wklx,\n"
325 " deny /sys/kernel/security/apparm[^o]*{,/**} wklx,\n"
326 " deny /sys/kernel/security/apparmo[^r]*{,/**} wklx,\n"
327 " deny /sys/kernel/security/apparmor?*{,/**} wklx,\n"
328 " deny /sys/kernel/security?*{,/**} wklx,\n"
329 " deny /sys/kernel?*{,/**} wklx,\n"
330 ;
331
332 static const char AA_PROFILE_NO_STACKING[] =
333 "\n"
334 " ### Feature: apparmor stacking (not present)\n"
335 " deny /sys/k*{,/**} rwklx,\n"
336 ;
337
338 /* '_BASE' because we need to append change_profile for stacking */
339 static const char AA_PROFILE_NESTING_BASE[] =
340 "\n"
341 " ### Configuration: nesting\n"
342 " pivot_root,\n"
343 " ptrace,\n"
344 " signal,\n"
345 "\n"
346 /* NOTE: See conf.c's "nesting_helpers" for details. */
347 " deny /dev/.lxc/proc/** rw,\n"
348 " deny /dev/.lxc/sys/** rw,\n"
349 "\n"
350 " mount fstype=proc -> /usr/lib/*/lxc/**,\n"
351 " mount fstype=sysfs -> /usr/lib/*/lxc/**,\n"
352 " mount options=(rw,bind),\n"
353 " mount options=(rw,rbind),\n"
354 " mount options=(rw,make-rshared),\n"
355 "\n"
356 /* FIXME: What's the state here on apparmor's side? */
357 " # there doesn't seem to be a way to ask for:\n"
358 " # mount options=(ro,nosuid,nodev,noexec,remount,bind),\n"
359 " # as we always get mount to $cdir/proc/sys with those flags denied\n"
360 " # So allow all mounts until that is straightened out:\n"
361 " mount,\n"
362 ;
363
364 static const char AA_PROFILE_UNPRIVILEGED[] =
365 "\n"
366 " ### Configuration: unprivileged container\n"
367 " pivot_root,\n"
368 "\n"
369 " # Allow modifying mount propagation\n"
370 " mount options=(rw,make-slave) -> **,\n"
371 " mount options=(rw,make-rslave) -> **,\n"
372 " mount options=(rw,make-shared) -> **,\n"
373 " mount options=(rw,make-rshared) -> **,\n"
374 " mount options=(rw,make-private) -> **,\n"
375 " mount options=(rw,make-rprivate) -> **,\n"
376 " mount options=(rw,make-unbindable) -> **,\n"
377 " mount options=(rw,make-runbindable) -> **,\n"
378 "\n"
379 " # Allow all bind-mounts\n"
380 " mount options=(rw,bind),\n"
381 " mount options=(rw,rbind),\n"
382 "\n"
383 " # Allow remounting things read-only\n"
384 " mount options=(ro,remount),\n"
385 ;
386
387 static bool check_mount_feature_enabled(void)
388 {
389 return mount_features_enabled == 1;
390 }
391
392 static void load_mount_features_enabled(void)
393 {
394 struct stat statbuf;
395 int ret;
396
397 ret = stat(AA_MOUNT_RESTR, &statbuf);
398 if (ret == 0)
399 mount_features_enabled = 1;
400 }
401
402 /* aa_getcon is not working right now. Use our hand-rolled version below */
403 static int apparmor_enabled(void)
404 {
405 FILE *fin;
406 char e;
407 int ret;
408
409 fin = fopen_cloexec(AA_ENABLED_FILE, "r");
410 if (!fin)
411 return 0;
412 ret = fscanf(fin, "%c", &e);
413 fclose(fin);
414 if (ret == 1 && e == 'Y') {
415 load_mount_features_enabled();
416 return 1;
417 }
418
419 return 0;
420 }
421
422 static char *apparmor_process_label_get(pid_t pid)
423 {
424 char path[100], *space;
425 int ret;
426 char *buf = NULL, *newbuf;
427 int sz = 0;
428 FILE *f;
429
430 ret = snprintf(path, 100, "/proc/%d/attr/current", pid);
431 if (ret < 0 || ret >= 100) {
432 ERROR("path name too long");
433 return NULL;
434 }
435 again:
436 f = fopen_cloexec(path, "r");
437 if (!f) {
438 SYSERROR("opening %s", path);
439 free(buf);
440 return NULL;
441 }
442 sz += 1024;
443 newbuf = realloc(buf, sz);
444 if (!newbuf) {
445 free(buf);
446 ERROR("out of memory");
447 fclose(f);
448 return NULL;
449 }
450 buf = newbuf;
451 memset(buf, 0, sz);
452 ret = fread(buf, 1, sz - 1, f);
453 fclose(f);
454 if (ret < 0) {
455 ERROR("reading %s", path);
456 free(buf);
457 return NULL;
458 }
459 if (ret >= sz)
460 goto again;
461 space = strchr(buf, '\n');
462 if (space)
463 *space = '\0';
464 space = strchr(buf, ' ');
465 if (space)
466 *space = '\0';
467 return buf;
468 }
469
470 /*
471 * Probably makes sense to reorganize these to only read
472 * the label once
473 */
474 static bool apparmor_am_unconfined(void)
475 {
476 char *p = apparmor_process_label_get(lxc_raw_getpid());
477 bool ret = false;
478 if (!p || strcmp(p, "unconfined") == 0)
479 ret = true;
480 free(p);
481 return ret;
482 }
483
484 static bool aa_needs_transition(char *curlabel)
485 {
486 if (!curlabel)
487 return false;
488 if (strcmp(curlabel, "unconfined") == 0)
489 return false;
490 if (strcmp(curlabel, "/usr/bin/lxc-start") == 0)
491 return false;
492 return true;
493 }
494
495 static inline void uint64hex(char *buf, uint64_t num)
496 {
497 size_t i;
498
499 buf[16] = 0;
500 for (i = 16; i--;) {
501 char c = (char)(num & 0xf);
502 buf[i] = c + (c < 0xa ? '0' : 'a' - 0xa);
503 num >>= 4;
504 }
505 }
506
507 static inline char *shorten_apparmor_name(char *name)
508 {
509 size_t len = strlen(name);
510 if (len + 7 > 253) {
511 uint64_t hash;
512 hash = fnv_64a_buf(name, len, FNV1A_64_INIT);
513 name = must_realloc(name, 16 + 1);
514 uint64hex(name, hash);
515 }
516
517 return name;
518 }
519
520 /* Replace slashes with hyphens */
521 static inline void sanitize_path(char *path)
522 {
523 size_t i;
524
525 for (i = 0; path[i]; i++)
526 if (path[i] == '/')
527 path[i] = '-';
528 }
529
530 static inline char *apparmor_dir(const char *ctname, const char *lxcpath)
531 {
532 return must_make_path(lxcpath, ctname, "apparmor", NULL);
533 }
534
535
536 static inline char *apparmor_profile_full(const char *ctname, const char *lxcpath)
537 {
538 return shorten_apparmor_name(must_concat("lxc-", ctname, "_<", lxcpath, ">", NULL));
539 }
540
541 /* Like apparmor_profile_full() but with slashes replaced by hyphens */
542 static inline char *apparmor_namespace(const char *ctname, const char *lxcpath)
543 {
544 char *full;
545
546 full = apparmor_profile_full(ctname, lxcpath);
547 sanitize_path(full);
548
549 return full;
550 }
551
552 /* FIXME: This is currently run only in the context of a constructor (via the
553 * initial lsm_init() called due to its __attribute__((constructor)), so we
554 * do not have ERROR/... macros available, so there are some fprintf(stderr)s
555 * in there.
556 */
557 static bool check_apparmor_parser_version()
558 {
559 struct lxc_popen_FILE *parserpipe;
560 int rc;
561 int major = 0, minor = 0, micro = 0;
562
563 parserpipe = lxc_popen("apparmor_parser --version");
564 if (!parserpipe) {
565 fprintf(stderr, "Failed to run check for apparmor_parser\n");
566 return false;
567 }
568
569 rc = fscanf(parserpipe->f, "AppArmor parser version %d.%d.%d", &major, &minor, &micro);
570 if (rc < 1) {
571 lxc_pclose(parserpipe);
572 /* We stay silent for now as this most likely means the shell
573 * lxc_popen executed failed to find the apparmor_parser binary.
574 * See the FIXME comment above for details.
575 */
576 return false;
577 }
578
579 rc = lxc_pclose(parserpipe);
580 if (rc < 0) {
581 fprintf(stderr, "Error waiting for child process\n");
582 return false;
583 }
584 if (rc != 0) {
585 fprintf(stderr, "'apparmor_parser --version' executed with an error status\n");
586 return false;
587 }
588
589 aa_supports_unix = (major > 2) ||
590 (major == 2 && minor > 10) ||
591 (major == 2 && minor == 10 && micro >= 95);
592
593 return true;
594 }
595
596 static bool file_is_yes(const char *path)
597 {
598 ssize_t rd;
599 int fd;
600 char buf[8]; /* we actually just expect "yes" or "no" */
601
602 fd = open(path, O_RDONLY | O_CLOEXEC);
603 if (fd < 0)
604 return false;
605
606 rd = lxc_read_nointr(fd, buf, sizeof(buf));
607 close(fd);
608
609 return rd >= 4 && strncmp(buf, "yes\n", 4) == 0;
610 }
611
612 static bool apparmor_can_stack()
613 {
614 int major, minor, scanned;
615 FILE *f;
616
617 if (!file_is_yes("/sys/kernel/security/apparmor/features/domain/stack"))
618 return false;
619
620 f = fopen_cloexec("/sys/kernel/security/apparmor/features/domain/version", "r");
621 if (!f)
622 return false;
623
624 scanned = fscanf(f, "%d.%d", &major, &minor);
625 fclose(f);
626 if (scanned != 2)
627 return false;
628
629 return major > 1 || (major == 1 && minor >= 2);
630 }
631
632 static void must_append_sized_full(char **buf, size_t *bufsz, const char *data,
633 size_t size, bool append_newline)
634 {
635 size_t newsize = *bufsz + size;
636
637 if (append_newline)
638 ++newsize;
639
640 *buf = must_realloc(*buf, newsize);
641 memcpy(*buf + *bufsz, data, size);
642
643 if (append_newline)
644 (*buf)[newsize - 1] = '\n';
645
646 *bufsz = newsize;
647 }
648
649 static void must_append_sized(char **buf, size_t *bufsz, const char *data, size_t size)
650 {
651 return must_append_sized_full(buf, bufsz, data, size, false);
652 }
653
654 static bool is_privileged(struct lxc_conf *conf)
655 {
656 return lxc_list_empty(&conf->id_map);
657 }
658
659 static char *get_apparmor_profile_content(struct lxc_conf *conf, const char *lxcpath)
660 {
661 char *profile, *profile_name_full;
662 size_t size;
663 struct lxc_list *it;
664
665 profile_name_full = apparmor_profile_full(conf->name, lxcpath);
666
667 profile = must_concat(
668 "#include <tunables/global>\n"
669 "profile \"", profile_name_full, "\" flags=(attach_disconnected,mediate_deleted) {\n",
670 NULL);
671 size = strlen(profile);
672
673 must_append_sized(&profile, &size, AA_PROFILE_BASE,
674 STRARRAYLEN(AA_PROFILE_BASE));
675
676 if (aa_supports_unix)
677 must_append_sized(&profile, &size, AA_PROFILE_UNIX_SOCKETS,
678 STRARRAYLEN(AA_PROFILE_UNIX_SOCKETS));
679
680 if (file_exists("/proc/self/ns/cgroup"))
681 must_append_sized(&profile, &size, AA_PROFILE_CGROUP_NAMESPACES,
682 STRARRAYLEN(AA_PROFILE_CGROUP_NAMESPACES));
683
684 if (aa_can_stack && !aa_is_stacked) {
685 char *namespace, *temp;
686
687 must_append_sized(&profile, &size, AA_PROFILE_STACKING_BASE,
688 STRARRAYLEN(AA_PROFILE_STACKING_BASE));
689
690 namespace = apparmor_namespace(conf->name, lxcpath);
691 temp = must_concat(" change_profile -> \":", namespace, ":*\",\n"
692 " change_profile -> \":", namespace, "://*\",\n",
693 NULL);
694 free(namespace);
695
696 must_append_sized(&profile, &size, temp, strlen(temp));
697 free(temp);
698 } else {
699 must_append_sized(&profile, &size, AA_PROFILE_NO_STACKING,
700 STRARRAYLEN(AA_PROFILE_NO_STACKING));
701 }
702
703 if (conf->lsm_aa_allow_nesting) {
704 must_append_sized(&profile, &size, AA_PROFILE_NESTING_BASE,
705 STRARRAYLEN(AA_PROFILE_NESTING_BASE));
706
707 if (!aa_can_stack || aa_is_stacked) {
708 char *temp;
709
710 temp = must_concat(" change_profile -> \"",
711 profile_name_full, "\",\n", NULL);
712 must_append_sized(&profile, &size, temp, strlen(temp));
713 free(temp);
714 }
715 }
716
717 if (!is_privileged(conf) || am_host_unpriv())
718 must_append_sized(&profile, &size, AA_PROFILE_UNPRIVILEGED,
719 STRARRAYLEN(AA_PROFILE_UNPRIVILEGED));
720
721 lxc_list_for_each(it, &conf->lsm_aa_raw) {
722 const char *line = it->elem;
723
724 must_append_sized_full(&profile, &size, line, strlen(line), true);
725 }
726
727 /* include terminating \0 byte */
728 must_append_sized(&profile, &size, "}\n", 3);
729
730 free(profile_name_full);
731
732 return profile;
733 }
734
735 /*
736 * apparmor_parser creates a cache file using the parsed file's name as a name.
737 * This means there may be multiple containers with the same name but different
738 * lxcpaths. Therefore we need a sanitized version of the complete profile name
739 * as profile file-name.
740 * We already get this exactly from apparmor_namespace().
741 */
742 static char *make_apparmor_profile_path(const char *ctname, const char *lxcpath)
743 {
744 char *ret, *filename;
745
746 filename = apparmor_namespace(ctname, lxcpath);
747 ret = must_make_path(lxcpath, ctname, "apparmor", filename, NULL);
748 free(filename);
749
750 return ret;
751 }
752
753 static char *make_apparmor_namespace_path(const char *ctname, const char *lxcpath)
754 {
755 char *ret, *namespace;
756
757 namespace = apparmor_namespace(ctname, lxcpath);
758 ret = must_make_path("/sys/kernel/security/apparmor/policy/namespaces", namespace, NULL);
759 free(namespace);
760
761 return ret;
762 }
763
764 static bool make_apparmor_namespace(struct lxc_conf *conf, const char *lxcpath)
765 {
766 char *path;
767
768 if (!aa_can_stack || aa_is_stacked)
769 return true;
770
771 path = make_apparmor_namespace_path(conf->name, lxcpath);
772 errno = 0;
773 if (mkdir(path, 0755) < 0 && errno != EEXIST) {
774 SYSERROR("Error creating AppArmor namespace: %s", path);
775 free(path);
776 return false;
777 }
778 free(path);
779
780 return true;
781 }
782
783 static void remove_apparmor_namespace(struct lxc_conf *conf, const char *lxcpath)
784 {
785 char *path;
786
787 path = make_apparmor_namespace_path(conf->name, lxcpath);
788 if (rmdir(path) != 0)
789 SYSERROR("Error removing AppArmor namespace");
790 free(path);
791 }
792
793 struct apparmor_parser_args {
794 char cmd;
795 char *file;
796 };
797
798 static int apparmor_parser_exec(void *data)
799 {
800 struct apparmor_parser_args *args = data;
801 char cmdbuf[] = { '-', args->cmd, 'W', 'L', 0 };
802
803 execlp("apparmor_parser", "apparmor_parser", cmdbuf, APPARMOR_CACHE_DIR, args->file, NULL);
804
805 return -1;
806 }
807
808 static int run_apparmor_parser(char command,
809 struct lxc_conf *conf,
810 const char *lxcpath)
811 {
812 char output[PATH_MAX];
813 int ret;
814 struct apparmor_parser_args args = {
815 .cmd = command,
816 .file = make_apparmor_profile_path(conf->name, lxcpath),
817 };
818
819 ret = run_command(output, sizeof(output), apparmor_parser_exec, (void*)&args);
820 if (ret < 0) {
821 ERROR("Failed to run apparmor_parser on \"%s\": %s", args.file, output);
822 ret = -1;
823 }
824
825
826 free(args.file);
827 return ret;
828 }
829
830 static void remove_apparmor_profile(struct lxc_conf *conf, const char *lxcpath)
831 {
832 char *path;
833
834 /* It's ok if these deletes fail: if the container was never started,
835 * we'll have never written a profile or cached it.
836 */
837
838 path = make_apparmor_profile_path(conf->name, lxcpath);
839 (void)unlink(path);
840 free(path);
841
842 /* Also remove the apparmor/ subdirectory */
843 path = apparmor_dir(conf->name, lxcpath);
844 (void)rmdir(path);
845 free(path);
846 }
847
848 static int load_apparmor_profile(struct lxc_conf *conf, const char *lxcpath)
849 {
850 struct stat profile_sb;
851 size_t content_len;
852 int ret = -1;
853 size_t old_len = 0;
854 char *profile_path = NULL, *old_content = NULL, *new_content = NULL;
855 int profile_fd = -1;
856
857 if (!make_apparmor_namespace(conf, lxcpath))
858 return -1;
859
860 /* In order to avoid forcing a profile parse (potentially slow) on
861 * every container start, let's use apparmor's binary policy cache,
862 * which checks mtime of the files to figure out if the policy needs to
863 * be regenerated.
864 *
865 * Since it uses mtimes, we shouldn't just always write out our local
866 * apparmor template; instead we should check to see whether the
867 * template is the same as ours. If it isn't we should write our
868 * version out so that the new changes are reflected and we definitely
869 * force a recompile.
870 */
871
872 profile_path = make_apparmor_profile_path(conf->name, lxcpath);
873 profile_fd = open(profile_path, O_RDONLY | O_CLOEXEC);
874 if (profile_fd >= 0) {
875 if (fstat(profile_fd, &profile_sb) < 0) {
876 SYSERROR("Error accessing old profile from %s",
877 profile_path);
878 goto out;
879 }
880 old_len = profile_sb.st_size;
881 old_content = lxc_strmmap(NULL, old_len, PROT_READ,
882 MAP_PRIVATE, profile_fd, 0);
883 if (!old_content) {
884 SYSERROR("Failed to mmap old profile from %s",
885 profile_path);
886 goto out;
887 }
888 } else if (errno != ENOENT) {
889 SYSERROR("Error reading old profile from %s", profile_path);
890 goto out;
891 }
892
893 new_content = get_apparmor_profile_content(conf, lxcpath);
894 if (!new_content)
895 goto out;
896
897 content_len = strlen(new_content);
898
899 if (!old_content || old_len != content_len || memcmp(old_content, new_content, content_len) != 0) {
900 char *path;
901
902 ret = mkdir_p(APPARMOR_CACHE_DIR, 0755);
903 if (ret < 0) {
904 SYSERROR("Error creating AppArmor profile cache directory " APPARMOR_CACHE_DIR);
905 goto out;
906 }
907
908 path = apparmor_dir(conf->name, lxcpath);
909 ret = mkdir_p(path, 0755);
910 if (ret < 0) {
911 SYSERROR("Error creating AppArmor profile directory: %s", path);
912 free(path);
913 goto out;
914 }
915 free(path);
916
917 ret = lxc_write_to_file(profile_path, new_content, content_len, false, 0600);
918 if (ret < 0) {
919 SYSERROR("Error writing profile to %s", profile_path);
920 goto out;
921 }
922 }
923
924 ret = run_apparmor_parser(AA_CMD_LOAD, conf, lxcpath);
925 if (ret != 0)
926 goto out_remove_profile;
927
928 conf->lsm_aa_profile_created = true;
929
930 goto out_ok;
931
932 out_remove_profile:
933 remove_apparmor_profile(conf, lxcpath);
934 out:
935 remove_apparmor_namespace(conf, lxcpath);
936 out_ok:
937 if (profile_fd >= 0) {
938 if (old_content)
939 lxc_strmunmap(old_content, old_len);
940 close(profile_fd);
941 }
942 free(profile_path);
943 free(new_content);
944 return ret;
945 }
946
947 /*
948 * Ensure that the container's policy namespace is unloaded to free kernel
949 * memory. This does not delete the policy from disk or cache.
950 */
951 static void apparmor_cleanup(struct lxc_conf *conf, const char *lxcpath)
952 {
953 if (!aa_admin)
954 return;
955
956 if (!conf->lsm_aa_profile_created)
957 return;
958
959 remove_apparmor_namespace(conf, lxcpath);
960 (void)run_apparmor_parser(AA_CMD_UNLOAD, conf, lxcpath);
961
962 remove_apparmor_profile(conf, lxcpath);
963 }
964
965 static int apparmor_prepare(struct lxc_conf *conf, const char *lxcpath)
966 {
967 int ret = -1;
968 const char *label;
969 char *curlabel = NULL, *genlabel = NULL;
970
971 if (!aa_enabled) {
972 ERROR("AppArmor not enabled");
973 return -1;
974 }
975
976 label = conf->lsm_aa_profile;
977
978 /* user may request that we just ignore apparmor */
979 if (label && strcmp(label, AA_UNCHANGED) == 0) {
980 INFO("AppArmor profile unchanged per user request");
981 conf->lsm_aa_profile_computed = must_copy_string(label);
982 return 0;
983 }
984
985 if (label && strcmp(label, AA_GENERATED) == 0) {
986 if (!aa_parser_available) {
987 ERROR("Cannot use generated profile: apparmor_parser not available");
988 goto out;
989 }
990
991 /* auto-generate profile based on available/requested security features */
992 if (load_apparmor_profile(conf, lxcpath) != 0) {
993 ERROR("Failed to load generated AppArmor profile");
994 goto out;
995 }
996
997 genlabel = apparmor_profile_full(conf->name, lxcpath);
998 if (!genlabel) {
999 ERROR("Failed to build AppArmor profile name");
1000 goto out;
1001 }
1002
1003 if (aa_can_stack && !aa_is_stacked) {
1004 char *namespace = apparmor_namespace(conf->name, lxcpath);
1005 size_t llen = strlen(genlabel);
1006 must_append_sized(&genlabel, &llen, "//&:", STRARRAYLEN("//&:"));
1007 must_append_sized(&genlabel, &llen, namespace, strlen(namespace));
1008 must_append_sized(&genlabel, &llen, ":", STRARRAYLEN(":") + 1); /* with the nul byte */
1009 free(namespace);
1010 }
1011
1012 label = genlabel;
1013 }
1014
1015 curlabel = apparmor_process_label_get(lxc_raw_getpid());
1016
1017 if (!aa_can_stack && aa_needs_transition(curlabel)) {
1018 /* we're already confined, and stacking isn't supported */
1019
1020 if (!label || strcmp(curlabel, label) == 0) {
1021 /* no change requested */
1022 ret = 0;
1023 goto out;
1024 }
1025
1026 ERROR("Already AppArmor confined, but new label requested.");
1027 goto out;
1028 }
1029
1030 if (!label) {
1031 if (cgns_supported())
1032 label = AA_DEF_PROFILE_CGNS;
1033 else
1034 label = AA_DEF_PROFILE;
1035 }
1036
1037 if (!check_mount_feature_enabled() && strcmp(label, "unconfined") != 0) {
1038 WARN("Incomplete AppArmor support in your kernel");
1039 if (!conf->lsm_aa_allow_incomplete) {
1040 ERROR("If you really want to start this container, set");
1041 ERROR("lxc.apparmor.allow_incomplete = 1");
1042 ERROR("in your container configuration file");
1043 goto out;
1044 }
1045 }
1046
1047 conf->lsm_aa_profile_computed = must_copy_string(label);
1048 ret = 0;
1049
1050 out:
1051 if (genlabel) {
1052 free(genlabel);
1053 if (ret != 0)
1054 apparmor_cleanup(conf, lxcpath);
1055 }
1056 free(curlabel);
1057 return ret;
1058 }
1059
1060 /*
1061 * apparmor_process_label_set: Set AppArmor process profile
1062 *
1063 * @label : the profile to set
1064 * @conf : the container configuration to use if @label is NULL
1065 * @default : use the default profile if @label is NULL
1066 * @on_exec : this is ignored. Apparmor profile will be changed immediately
1067 *
1068 * Returns 0 on success, < 0 on failure
1069 *
1070 * Notes: This relies on /proc being available.
1071 */
1072 static int apparmor_process_label_set(const char *inlabel, struct lxc_conf *conf,
1073 bool on_exec)
1074 {
1075 int label_fd, ret;
1076 pid_t tid;
1077 const char *label;
1078
1079 if (!aa_enabled) {
1080 ERROR("AppArmor not enabled");
1081 return -1;
1082 }
1083
1084 label = inlabel ? inlabel : conf->lsm_aa_profile_computed;
1085 if (!label) {
1086 ERROR("LSM wasn't prepared");
1087 return -1;
1088 }
1089
1090 /* user may request that we just ignore apparmor */
1091 if (strcmp(label, AA_UNCHANGED) == 0) {
1092 INFO("AppArmor profile unchanged per user request");
1093 return 0;
1094 }
1095
1096 if (strcmp(label, "unconfined") == 0 && apparmor_am_unconfined()) {
1097 INFO("AppArmor profile unchanged");
1098 return 0;
1099 }
1100 tid = lxc_raw_gettid();
1101 label_fd = lsm_process_label_fd_get(tid, on_exec);
1102 if (label_fd < 0) {
1103 SYSERROR("Failed to change AppArmor profile to %s", label);
1104 return -1;
1105 }
1106
1107 ret = lsm_process_label_set_at(label_fd, label, on_exec);
1108 close(label_fd);
1109 if (ret < 0) {
1110 ERROR("Failed to change AppArmor profile to %s", label);
1111 return -1;
1112 }
1113
1114 INFO("Changed AppArmor profile to %s", label);
1115 return 0;
1116 }
1117
1118 static struct lsm_drv apparmor_drv = {
1119 .name = "AppArmor",
1120 .enabled = apparmor_enabled,
1121 .process_label_get = apparmor_process_label_get,
1122 .process_label_set = apparmor_process_label_set,
1123 .prepare = apparmor_prepare,
1124 .cleanup = apparmor_cleanup,
1125 };
1126
1127 struct lsm_drv *lsm_apparmor_drv_init(void)
1128 {
1129 bool have_mac_admin = false;
1130
1131 if (!apparmor_enabled())
1132 return NULL;
1133
1134 /* We only support generated profiles when apparmor_parser is usable */
1135 if (!check_apparmor_parser_version())
1136 goto out;
1137
1138 aa_parser_available = true;
1139
1140 aa_can_stack = apparmor_can_stack();
1141 if (aa_can_stack)
1142 aa_is_stacked = file_is_yes("/sys/kernel/security/apparmor/.ns_stacked");
1143
1144 #if HAVE_LIBCAP
1145 have_mac_admin = lxc_proc_cap_is_set(CAP_SETGID, CAP_EFFECTIVE);
1146 #endif
1147
1148 if (!have_mac_admin)
1149 WARN("Per-container AppArmor profiles are disabled because the mac_admin capability is missing");
1150 else if (am_host_unpriv() && !aa_is_stacked)
1151 WARN("Per-container AppArmor profiles are disabled because LXC is running in an unprivileged container without stacking");
1152 else
1153 aa_admin = true;
1154
1155 out:
1156 aa_enabled = 1;
1157 return &apparmor_drv;
1158 }