]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/seccomp.c
seccomp: leak fixup
[mirror_lxc.git] / src / lxc / seccomp.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright Canonical, Inc. 2012
5 *
6 * Authors:
7 * Serge Hallyn <serge.hallyn@canonical.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #define _GNU_SOURCE
25 #include <errno.h>
26 #include <seccomp.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <sys/mount.h>
30 #include <sys/utsname.h>
31
32 #include "config.h"
33 #include "log.h"
34 #include "lxcseccomp.h"
35 #include "utils.h"
36
37 #ifdef __MIPSEL__
38 #define MIPS_ARCH_O32 lxc_seccomp_arch_mipsel
39 #define MIPS_ARCH_N64 lxc_seccomp_arch_mipsel64
40 #else
41 #define MIPS_ARCH_O32 lxc_seccomp_arch_mips
42 #define MIPS_ARCH_N64 lxc_seccomp_arch_mips64
43 #endif
44
45 lxc_log_define(lxc_seccomp, lxc);
46
47 static int parse_config_v1(FILE *f, struct lxc_conf *conf)
48 {
49 int ret = 0;
50 size_t line_bufsz = 0;
51 char *line = NULL;
52
53 while (getline(&line, &line_bufsz, f) != -1) {
54 int nr;
55
56 ret = sscanf(line, "%d", &nr);
57 if (ret != 1) {
58 ret = -1;
59 break;
60 }
61
62 #if HAVE_SCMP_FILTER_CTX
63 ret = seccomp_rule_add(conf->seccomp_ctx, SCMP_ACT_ALLOW, nr, 0);
64 #else
65 ret = seccomp_rule_add(SCMP_ACT_ALLOW, nr, 0);
66 #endif
67 if (ret < 0) {
68 ERROR("Failed loading allow rule for %d", nr);
69 break;
70 }
71 }
72 free(line);
73
74 return ret;
75 }
76
77 #if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
78 static const char *get_action_name(uint32_t action)
79 {
80 /* The upper 16 bits indicate the type of the seccomp action. */
81 switch (action & 0xffff0000) {
82 case SCMP_ACT_KILL:
83 return "kill";
84 case SCMP_ACT_ALLOW:
85 return "allow";
86 case SCMP_ACT_TRAP:
87 return "trap";
88 case SCMP_ACT_ERRNO(0):
89 return "errno";
90 }
91
92 return "invalid action";
93 }
94
95 static uint32_t get_v2_default_action(char *line)
96 {
97 uint32_t ret_action = -1;
98
99 while (*line == ' ')
100 line++;
101
102 /* After 'whitelist' or 'blacklist' comes default behavior. */
103 if (strncmp(line, "kill", 4) == 0) {
104 ret_action = SCMP_ACT_KILL;
105 } else if (strncmp(line, "errno", 5) == 0) {
106 int e, ret;
107
108 ret = sscanf(line + 5, "%d", &e);
109 if (ret != 1) {
110 ERROR("Failed to parse errno value from %s", line);
111 return -2;
112 }
113
114 ret_action = SCMP_ACT_ERRNO(e);
115 } else if (strncmp(line, "allow", 5) == 0) {
116 ret_action = SCMP_ACT_ALLOW;
117 } else if (strncmp(line, "trap", 4) == 0) {
118 ret_action = SCMP_ACT_TRAP;
119 }
120
121 return ret_action;
122 }
123
124 static uint32_t get_v2_action(char *line, uint32_t def_action)
125 {
126 char *p;
127 uint32_t ret;
128
129 p = strchr(line, ' ');
130 if (!p)
131 return def_action;
132 p++;
133
134 while (*p == ' ')
135 p++;
136
137 if (!*p || *p == '#')
138 return def_action;
139
140 ret = get_v2_default_action(p);
141 switch (ret) {
142 case -2:
143 return -1;
144 case -1:
145 return def_action;
146 }
147
148 return ret;
149 }
150
151 struct seccomp_v2_rule_args {
152 uint32_t index;
153 uint64_t value;
154 uint64_t mask;
155 enum scmp_compare op;
156 };
157
158 struct seccomp_v2_rule {
159 uint32_t action;
160 uint32_t args_num;
161 struct seccomp_v2_rule_args args_value[6];
162 };
163
164 static enum scmp_compare parse_v2_rule_op(char *s)
165 {
166 if (strcmp(s, "SCMP_CMP_NE") == 0 || strcmp(s, "!=") == 0)
167 return SCMP_CMP_NE;
168 else if (strcmp(s, "SCMP_CMP_LT") == 0 || strcmp(s, "<") == 0)
169 return SCMP_CMP_LT;
170 else if (strcmp(s, "SCMP_CMP_LE") == 0 || strcmp(s, "<=") == 0)
171 return SCMP_CMP_LE;
172 else if (strcmp(s, "SCMP_CMP_EQ") == 0 || strcmp(s, "==") == 0)
173 return SCMP_CMP_EQ;
174 else if (strcmp(s, "SCMP_CMP_GE") == 0 || strcmp(s, ">=") == 0)
175 return SCMP_CMP_GE;
176 else if (strcmp(s, "SCMP_CMP_GT") == 0 || strcmp(s, ">") == 0)
177 return SCMP_CMP_GT;
178 else if (strcmp(s, "SCMP_CMP_MASKED_EQ") == 0 || strcmp(s, "&=") == 0)
179 return SCMP_CMP_MASKED_EQ;
180
181 return _SCMP_CMP_MAX;
182 }
183
184 /*
185 * This function is used to parse the args string into the structure.
186 * args string format:[index,value,op,valueTwo] or [index,value,op]
187 * index: the index for syscall arguments (type uint)
188 * value: the value for syscall arguments (type uint64)
189 * op: the operator for syscall arguments(string),
190 a valid list of constants as of libseccomp v2.3.2 is
191 SCMP_CMP_NE,SCMP_CMP_LE,SCMP_CMP_LE, SCMP_CMP_EQ, SCMP_CMP_GE,
192 SCMP_CMP_GT, SCMP_CMP_MASKED_EQ, or !=,<=,==,>=,>,&=
193 * valueTwo: the value for syscall arguments only used for mask eq (type uint64, optional)
194 * Returns 0 on success, < 0 otherwise.
195 */
196 static int get_seccomp_arg_value(char *key, struct seccomp_v2_rule_args *rule_args)
197 {
198 int ret = 0;
199 uint32_t index = 0;
200 uint64_t mask = 0, value = 0;
201 enum scmp_compare op = 0;
202 char *tmp = NULL;
203 char s[31] = {0}, v[24] = {0}, m[24] = {0};
204
205 tmp = strchr(key, '[');
206 if (!tmp) {
207 ERROR("Failed to interpret args");
208 return -1;
209 }
210
211 ret = sscanf(tmp, "[%i,%23[^,],%30[^0-9^,],%23[^,]", &index, v, s, m);
212 if ((ret != 3 && ret != 4) || index >= 6) {
213 ERROR("Failed to interpret args value");
214 return -1;
215 }
216
217 ret = lxc_safe_uint64(v, &value);
218 if (ret < 0) {
219 ERROR("Invalid argument value");
220 return -1;
221 }
222
223 ret = lxc_safe_uint64(m, &mask);
224 if (ret < 0) {
225 ERROR("Invalid argument mask");
226 return -1;
227 }
228
229 op = parse_v2_rule_op(s);
230 if (op == _SCMP_CMP_MAX) {
231 ERROR("Failed to interpret args operator value");
232 return -1;
233 }
234
235 rule_args->index = index;
236 rule_args->value = value;
237 rule_args->mask = mask;
238 rule_args->op = op;
239 return 0;
240 }
241
242 /* This function is used to parse the seccomp rule entry.
243 * @line : seccomp rule entry string.
244 * @def_action : default action used in the case if the 'line' contain non valid action.
245 * @rules : output struct.
246 * Returns 0 on success, < 0 otherwise.
247 */
248 static int parse_v2_rules(char *line, uint32_t def_action,
249 struct seccomp_v2_rule *rules)
250 {
251 int i = 0, ret = -1;
252 char *key = NULL, *saveptr = NULL, *tmp = NULL;
253
254 tmp = strdup(line);
255 if (!tmp)
256 return -1;
257
258 /* read optional action which follows the syscall */
259 rules->action = get_v2_action(tmp, def_action);
260
261 ret = 0;
262 rules->args_num = 0;
263 if (!strchr(tmp, '['))
264 goto out;
265
266 ret = -1;
267 for ((key = strtok_r(tmp, "]", &saveptr)), i = 0; key && i < 6;
268 (key = strtok_r(NULL, "]", &saveptr)), i++) {
269 ret = get_seccomp_arg_value(key, &rules->args_value[i]);
270 if (ret < 0)
271 goto out;
272
273 rules->args_num++;
274 }
275
276 ret = 0;
277
278 out:
279 free(tmp);
280
281 return ret;
282 }
283 #endif
284
285 #if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
286 enum lxc_hostarch_t {
287 lxc_seccomp_arch_all = 0,
288 lxc_seccomp_arch_native,
289 lxc_seccomp_arch_i386,
290 lxc_seccomp_arch_x32,
291 lxc_seccomp_arch_amd64,
292 lxc_seccomp_arch_arm,
293 lxc_seccomp_arch_arm64,
294 lxc_seccomp_arch_ppc64,
295 lxc_seccomp_arch_ppc64le,
296 lxc_seccomp_arch_ppc,
297 lxc_seccomp_arch_mips,
298 lxc_seccomp_arch_mips64,
299 lxc_seccomp_arch_mips64n32,
300 lxc_seccomp_arch_mipsel,
301 lxc_seccomp_arch_mipsel64,
302 lxc_seccomp_arch_mipsel64n32,
303 lxc_seccomp_arch_s390x,
304 lxc_seccomp_arch_unknown = 999,
305 };
306
307 int get_hostarch(void)
308 {
309 struct utsname uts;
310 if (uname(&uts) < 0) {
311 SYSERROR("Failed to read host arch");
312 return -1;
313 }
314
315 if (strcmp(uts.machine, "i686") == 0)
316 return lxc_seccomp_arch_i386;
317 /* no x32 kernels */
318 else if (strcmp(uts.machine, "x86_64") == 0)
319 return lxc_seccomp_arch_amd64;
320 else if (strncmp(uts.machine, "armv7", 5) == 0)
321 return lxc_seccomp_arch_arm;
322 else if (strncmp(uts.machine, "aarch64", 7) == 0)
323 return lxc_seccomp_arch_arm64;
324 else if (strncmp(uts.machine, "ppc64le", 7) == 0)
325 return lxc_seccomp_arch_ppc64le;
326 else if (strncmp(uts.machine, "ppc64", 5) == 0)
327 return lxc_seccomp_arch_ppc64;
328 else if (strncmp(uts.machine, "ppc", 3) == 0)
329 return lxc_seccomp_arch_ppc;
330 else if (strncmp(uts.machine, "mips64", 6) == 0)
331 return MIPS_ARCH_N64;
332 else if (strncmp(uts.machine, "mips", 4) == 0)
333 return MIPS_ARCH_O32;
334 else if (strncmp(uts.machine, "s390x", 5) == 0)
335 return lxc_seccomp_arch_s390x;
336
337 return lxc_seccomp_arch_unknown;
338 }
339
340 scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch,
341 uint32_t default_policy_action, bool *needs_merge)
342 {
343 int ret;
344 uint32_t arch;
345 scmp_filter_ctx ctx;
346
347 switch (n_arch) {
348 case lxc_seccomp_arch_i386:
349 arch = SCMP_ARCH_X86;
350 break;
351 case lxc_seccomp_arch_x32:
352 arch = SCMP_ARCH_X32;
353 break;
354 case lxc_seccomp_arch_amd64:
355 arch = SCMP_ARCH_X86_64;
356 break;
357 case lxc_seccomp_arch_arm:
358 arch = SCMP_ARCH_ARM;
359 break;
360 #ifdef SCMP_ARCH_AARCH64
361 case lxc_seccomp_arch_arm64:
362 arch = SCMP_ARCH_AARCH64;
363 break;
364 #endif
365 #ifdef SCMP_ARCH_PPC64LE
366 case lxc_seccomp_arch_ppc64le:
367 arch = SCMP_ARCH_PPC64LE;
368 break;
369 #endif
370 #ifdef SCMP_ARCH_PPC64
371 case lxc_seccomp_arch_ppc64:
372 arch = SCMP_ARCH_PPC64;
373 break;
374 #endif
375 #ifdef SCMP_ARCH_PPC
376 case lxc_seccomp_arch_ppc:
377 arch = SCMP_ARCH_PPC;
378 break;
379 #endif
380 #ifdef SCMP_ARCH_MIPS
381 case lxc_seccomp_arch_mips:
382 arch = SCMP_ARCH_MIPS;
383 break;
384 case lxc_seccomp_arch_mips64:
385 arch = SCMP_ARCH_MIPS64;
386 break;
387 case lxc_seccomp_arch_mips64n32:
388 arch = SCMP_ARCH_MIPS64N32;
389 break;
390 case lxc_seccomp_arch_mipsel:
391 arch = SCMP_ARCH_MIPSEL;
392 break;
393 case lxc_seccomp_arch_mipsel64:
394 arch = SCMP_ARCH_MIPSEL64;
395 break;
396 case lxc_seccomp_arch_mipsel64n32:
397 arch = SCMP_ARCH_MIPSEL64N32;
398 break;
399 #endif
400 #ifdef SCMP_ARCH_S390X
401 case lxc_seccomp_arch_s390x:
402 arch = SCMP_ARCH_S390X;
403 break;
404 #endif
405 default:
406 return NULL;
407 }
408
409 ctx = seccomp_init(default_policy_action);
410 if (!ctx) {
411 ERROR("Error initializing seccomp context");
412 return NULL;
413 }
414
415 ret = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_NNP, 0);
416 if (ret < 0) {
417 ERROR("%s - Failed to turn off no-new-privs", strerror(-ret));
418 seccomp_release(ctx);
419 return NULL;
420 }
421
422 #ifdef SCMP_FLTATR_ATL_TSKIP
423 ret = seccomp_attr_set(ctx, SCMP_FLTATR_ATL_TSKIP, 1);
424 if (ret < 0)
425 WARN("%s - Failed to turn on seccomp nop-skip, continuing", strerror(-ret));
426 #endif
427
428 ret = seccomp_arch_exist(ctx, arch);
429 if (ret < 0) {
430 if (ret != -EEXIST) {
431 ERROR("%s - Failed to determine whether arch %d is "
432 "already present in the main seccomp context",
433 strerror(-ret), (int)n_arch);
434 seccomp_release(ctx);
435 return NULL;
436 }
437
438 ret = seccomp_arch_add(ctx, arch);
439 if (ret != 0) {
440 ERROR("%s - Failed to add arch %d to main seccomp context",
441 strerror(-ret), (int)n_arch);
442 seccomp_release(ctx);
443 return NULL;
444 }
445 TRACE("Added arch %d to main seccomp context", (int)n_arch);
446
447 ret = seccomp_arch_remove(ctx, SCMP_ARCH_NATIVE);
448 if (ret != 0) {
449 ERROR("Failed to remove native arch from main seccomp context");
450 seccomp_release(ctx);
451 return NULL;
452 }
453 TRACE("Removed native arch from main seccomp context");
454
455 *needs_merge = true;
456 } else {
457 *needs_merge = false;
458 TRACE("Arch %d already present in main seccomp context", (int)n_arch);
459 }
460
461 return ctx;
462 }
463
464 bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx,
465 struct seccomp_v2_rule *rule)
466 {
467 int i, nr, ret;
468 struct scmp_arg_cmp arg_cmp[6];
469
470 ret = seccomp_arch_exist(ctx, arch);
471 if (arch && ret != 0) {
472 ERROR("%s - Seccomp: rule and context arch do not match (arch %d)", strerror(-ret), arch);
473 return false;
474 }
475
476 /*get the syscall name*/
477 char *p = strchr(line, ' ');
478 if (p)
479 *p = '\0';
480
481 if (strncmp(line, "reject_force_umount", 19) == 0) {
482 ret = seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EACCES),
483 SCMP_SYS(umount2), 1,
484 SCMP_A1(SCMP_CMP_MASKED_EQ, MNT_FORCE, MNT_FORCE));
485 if (ret < 0) {
486 ERROR("%s - Failed loading rule to reject force umount", strerror(-ret));
487 return false;
488 }
489
490 INFO("Set seccomp rule to reject force umounts");
491 return true;
492 }
493
494 nr = seccomp_syscall_resolve_name(line);
495 if (nr == __NR_SCMP_ERROR) {
496 WARN("Failed to resolve syscall \"%s\"", line);
497 WARN("This syscall will NOT be blacklisted");
498 return true;
499 }
500
501 if (nr < 0) {
502 WARN("Got negative return value %d for syscall \"%s\"", nr, line);
503 WARN("This syscall will NOT be blacklisted");
504 return true;
505 }
506
507 memset(&arg_cmp, 0, sizeof(arg_cmp));
508 for (i = 0; i < rule->args_num; i++) {
509 INFO("arg_cmp[%d]: SCMP_CMP(%u, %llu, %llu, %llu)", i,
510 rule->args_value[i].index,
511 (long long unsigned int)rule->args_value[i].op,
512 (long long unsigned int)rule->args_value[i].mask,
513 (long long unsigned int)rule->args_value[i].value);
514
515 if (SCMP_CMP_MASKED_EQ == rule->args_value[i].op)
516 arg_cmp[i] = SCMP_CMP(rule->args_value[i].index,
517 rule->args_value[i].op,
518 rule->args_value[i].mask,
519 rule->args_value[i].value);
520 else
521 arg_cmp[i] = SCMP_CMP(rule->args_value[i].index,
522 rule->args_value[i].op,
523 rule->args_value[i].value);
524 }
525
526 ret = seccomp_rule_add_exact_array(ctx, rule->action, nr,
527 rule->args_num, arg_cmp);
528 if (ret < 0) {
529 ERROR("%s - Failed loading rule for %s (nr %d action %d (%s))",
530 strerror(-ret), line, nr, rule->action,
531 get_action_name(rule->action));
532 return false;
533 }
534
535 return true;
536 }
537
538 /*
539 * v2 consists of
540 * [x86]
541 * open
542 * read
543 * write
544 * close
545 * # a comment
546 * [x86_64]
547 * open
548 * read
549 * write
550 * close
551 */
552 static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
553 {
554 int ret;
555 char *p;
556 enum lxc_hostarch_t cur_rule_arch, native_arch;
557 size_t line_bufsz = 0;
558 bool blacklist = false;
559 char *rule_line = NULL;
560 uint32_t default_policy_action = -1, default_rule_action = -1;
561 struct seccomp_v2_rule rule;
562 struct scmp_ctx_info {
563 uint32_t architectures[3];
564 scmp_filter_ctx contexts[3];
565 bool needs_merge[3];
566 } ctx;
567
568 if (strncmp(line, "blacklist", 9) == 0)
569 blacklist = true;
570 else if (strncmp(line, "whitelist", 9) != 0) {
571 ERROR("Bad seccomp policy style \"%s\"", line);
572 return -1;
573 }
574
575 p = strchr(line, ' ');
576 if (p) {
577 default_policy_action = get_v2_default_action(p + 1);
578 if (default_policy_action == -2)
579 return -1;
580 }
581
582 /* for blacklist, allow any syscall which has no rule */
583 if (blacklist) {
584 if (default_policy_action == -1)
585 default_policy_action = SCMP_ACT_ALLOW;
586
587 if (default_rule_action == -1)
588 default_rule_action = SCMP_ACT_KILL;
589 } else {
590 if (default_policy_action == -1)
591 default_policy_action = SCMP_ACT_KILL;
592
593 if (default_rule_action == -1)
594 default_rule_action = SCMP_ACT_ALLOW;
595 }
596
597 memset(&ctx, 0, sizeof(ctx));
598 ctx.architectures[0] = SCMP_ARCH_NATIVE;
599 ctx.architectures[1] = SCMP_ARCH_NATIVE;
600 ctx.architectures[2] = SCMP_ARCH_NATIVE;
601 native_arch = get_hostarch();
602 cur_rule_arch = native_arch;
603 if (native_arch == lxc_seccomp_arch_amd64) {
604 cur_rule_arch = lxc_seccomp_arch_all;
605
606 ctx.architectures[0] = SCMP_ARCH_X86;
607 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_i386,
608 default_policy_action,
609 &ctx.needs_merge[0]);
610 if (!ctx.contexts[0])
611 goto bad;
612
613 ctx.architectures[1] = SCMP_ARCH_X32;
614 ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_x32,
615 default_policy_action,
616 &ctx.needs_merge[1]);
617 if (!ctx.contexts[1])
618 goto bad;
619
620 ctx.architectures[2] = SCMP_ARCH_X86_64;
621 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_amd64,
622 default_policy_action,
623 &ctx.needs_merge[2]);
624 if (!ctx.contexts[2])
625 goto bad;
626 #ifdef SCMP_ARCH_PPC
627 } else if (native_arch == lxc_seccomp_arch_ppc64) {
628 cur_rule_arch = lxc_seccomp_arch_all;
629
630 ctx.architectures[0] = SCMP_ARCH_PPC;
631 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_ppc,
632 default_policy_action,
633 &ctx.needs_merge[0]);
634 if (!ctx.contexts[0])
635 goto bad;
636
637 ctx.architectures[2] = SCMP_ARCH_PPC64;
638 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_ppc64,
639 default_policy_action,
640 &ctx.needs_merge[2]);
641 if (!ctx.contexts[2])
642 goto bad;
643 #endif
644 #ifdef SCMP_ARCH_ARM
645 } else if (native_arch == lxc_seccomp_arch_arm64) {
646 cur_rule_arch = lxc_seccomp_arch_all;
647
648 ctx.architectures[0] = SCMP_ARCH_ARM;
649 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_arm,
650 default_policy_action,
651 &ctx.needs_merge[0]);
652 if (!ctx.contexts[0])
653 goto bad;
654
655 #ifdef SCMP_ARCH_AARCH64
656 ctx.architectures[2] = SCMP_ARCH_AARCH64;
657 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_arm64,
658 default_policy_action,
659 &ctx.needs_merge[2]);
660 if (!ctx.contexts[2])
661 goto bad;
662 #endif
663 #endif
664 #ifdef SCMP_ARCH_MIPS
665 } else if (native_arch == lxc_seccomp_arch_mips64) {
666 cur_rule_arch = lxc_seccomp_arch_all;
667
668 ctx.architectures[0] = SCMP_ARCH_MIPS;
669 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mips,
670 default_policy_action,
671 &ctx.needs_merge[0]);
672 if (!ctx.contexts[0])
673 goto bad;
674
675 ctx.architectures[1] = SCMP_ARCH_MIPS64N32;
676 ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mips64n32,
677 default_policy_action,
678 &ctx.needs_merge[1]);
679 if (!ctx.contexts[1])
680 goto bad;
681
682 ctx.architectures[2] = SCMP_ARCH_MIPS64;
683 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mips64,
684 default_policy_action,
685 &ctx.needs_merge[2]);
686 if (!ctx.contexts[2])
687 goto bad;
688 } else if (native_arch == lxc_seccomp_arch_mipsel64) {
689 cur_rule_arch = lxc_seccomp_arch_all;
690
691 ctx.architectures[0] = SCMP_ARCH_MIPSEL;
692 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mipsel,
693 default_policy_action,
694 &ctx.needs_merge[0]);
695 if (!ctx.contexts[0])
696 goto bad;
697
698 ctx.architectures[1] = SCMP_ARCH_MIPSEL64N32;
699 ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mipsel64n32,
700 default_policy_action,
701 &ctx.needs_merge[1]);
702 if (!ctx.contexts[1])
703 goto bad;
704
705 ctx.architectures[2] = SCMP_ARCH_MIPSEL64;
706 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mipsel64,
707 default_policy_action,
708 &ctx.needs_merge[2]);
709 if (!ctx.contexts[2])
710 goto bad;
711 #endif
712 }
713
714 if (default_policy_action != SCMP_ACT_KILL) {
715 ret = seccomp_reset(conf->seccomp_ctx, default_policy_action);
716 if (ret != 0) {
717 ERROR("Error re-initializing Seccomp");
718 return -1;
719 }
720
721 ret = seccomp_attr_set(conf->seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0);
722 if (ret < 0) {
723 ERROR("%s - Failed to turn off no-new-privs", strerror(-ret));
724 return -1;
725 }
726
727 #ifdef SCMP_FLTATR_ATL_TSKIP
728 ret = seccomp_attr_set(conf->seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1);
729 if (ret < 0)
730 WARN("%s - Failed to turn on seccomp nop-skip, continuing", strerror(-ret));
731 #endif
732 }
733
734 while (getline(&rule_line, &line_bufsz, f) != -1) {
735 if (line[0] == '#')
736 continue;
737
738 if (line[0] == '\0')
739 continue;
740
741 remove_trailing_newlines(line);
742
743 INFO("Processing \"%s\"", line);
744 if (line[0] == '[') {
745 /* Read the architecture for next set of rules. */
746 if (strcmp(line, "[x86]") == 0 ||
747 strcmp(line, "[X86]") == 0) {
748 if (native_arch != lxc_seccomp_arch_i386 &&
749 native_arch != lxc_seccomp_arch_amd64) {
750 cur_rule_arch = lxc_seccomp_arch_unknown;
751 continue;
752 }
753
754 cur_rule_arch = lxc_seccomp_arch_i386;
755 } else if (strcmp(line, "[x32]") == 0 ||
756 strcmp(line, "[X32]") == 0) {
757 if (native_arch != lxc_seccomp_arch_amd64) {
758 cur_rule_arch = lxc_seccomp_arch_unknown;
759 continue;
760 }
761
762 cur_rule_arch = lxc_seccomp_arch_x32;
763 } else if (strcmp(line, "[X86_64]") == 0 ||
764 strcmp(line, "[x86_64]") == 0) {
765 if (native_arch != lxc_seccomp_arch_amd64) {
766 cur_rule_arch = lxc_seccomp_arch_unknown;
767 continue;
768 }
769
770 cur_rule_arch = lxc_seccomp_arch_amd64;
771 } else if (strcmp(line, "[all]") == 0 ||
772 strcmp(line, "[ALL]") == 0) {
773 cur_rule_arch = lxc_seccomp_arch_all;
774 }
775 #ifdef SCMP_ARCH_ARM
776 else if (strcmp(line, "[arm]") == 0 ||
777 strcmp(line, "[ARM]") == 0) {
778 if (native_arch != lxc_seccomp_arch_arm &&
779 native_arch != lxc_seccomp_arch_arm64) {
780 cur_rule_arch = lxc_seccomp_arch_unknown;
781 continue;
782 }
783
784 cur_rule_arch = lxc_seccomp_arch_arm;
785 }
786 #endif
787 #ifdef SCMP_ARCH_AARCH64
788 else if (strcmp(line, "[arm64]") == 0 ||
789 strcmp(line, "[ARM64]") == 0) {
790 if (native_arch != lxc_seccomp_arch_arm64) {
791 cur_rule_arch = lxc_seccomp_arch_unknown;
792 continue;
793 }
794
795 cur_rule_arch = lxc_seccomp_arch_arm64;
796 }
797 #endif
798 #ifdef SCMP_ARCH_PPC64LE
799 else if (strcmp(line, "[ppc64le]") == 0 ||
800 strcmp(line, "[PPC64LE]") == 0) {
801 if (native_arch != lxc_seccomp_arch_ppc64le) {
802 cur_rule_arch = lxc_seccomp_arch_unknown;
803 continue;
804 }
805
806 cur_rule_arch = lxc_seccomp_arch_ppc64le;
807 }
808 #endif
809 #ifdef SCMP_ARCH_PPC64
810 else if (strcmp(line, "[ppc64]") == 0 ||
811 strcmp(line, "[PPC64]") == 0) {
812 if (native_arch != lxc_seccomp_arch_ppc64) {
813 cur_rule_arch = lxc_seccomp_arch_unknown;
814 continue;
815 }
816
817 cur_rule_arch = lxc_seccomp_arch_ppc64;
818 }
819 #endif
820 #ifdef SCMP_ARCH_PPC
821 else if (strcmp(line, "[ppc]") == 0 ||
822 strcmp(line, "[PPC]") == 0) {
823 if (native_arch != lxc_seccomp_arch_ppc &&
824 native_arch != lxc_seccomp_arch_ppc64) {
825 cur_rule_arch = lxc_seccomp_arch_unknown;
826 continue;
827 }
828
829 cur_rule_arch = lxc_seccomp_arch_ppc;
830 }
831 #endif
832 #ifdef SCMP_ARCH_MIPS
833 else if (strcmp(line, "[mips64]") == 0 ||
834 strcmp(line, "[MIPS64]") == 0) {
835 if (native_arch != lxc_seccomp_arch_mips64) {
836 cur_rule_arch = lxc_seccomp_arch_unknown;
837 continue;
838 }
839
840 cur_rule_arch = lxc_seccomp_arch_mips64;
841 } else if (strcmp(line, "[mips64n32]") == 0 ||
842 strcmp(line, "[MIPS64N32]") == 0) {
843 if (native_arch != lxc_seccomp_arch_mips64) {
844 cur_rule_arch = lxc_seccomp_arch_unknown;
845 continue;
846 }
847
848 cur_rule_arch = lxc_seccomp_arch_mips64n32;
849 } else if (strcmp(line, "[mips]") == 0 ||
850 strcmp(line, "[MIPS]") == 0) {
851 if (native_arch != lxc_seccomp_arch_mips &&
852 native_arch != lxc_seccomp_arch_mips64) {
853 cur_rule_arch = lxc_seccomp_arch_unknown;
854 continue;
855 }
856
857 cur_rule_arch = lxc_seccomp_arch_mips;
858 } else if (strcmp(line, "[mipsel64]") == 0 ||
859 strcmp(line, "[MIPSEL64]") == 0) {
860 if (native_arch != lxc_seccomp_arch_mipsel64) {
861 cur_rule_arch = lxc_seccomp_arch_unknown;
862 continue;
863 }
864
865 cur_rule_arch = lxc_seccomp_arch_mipsel64;
866 } else if (strcmp(line, "[mipsel64n32]") == 0 ||
867 strcmp(line, "[MIPSEL64N32]") == 0) {
868 if (native_arch != lxc_seccomp_arch_mipsel64) {
869 cur_rule_arch = lxc_seccomp_arch_unknown;
870 continue;
871 }
872
873 cur_rule_arch = lxc_seccomp_arch_mipsel64n32;
874 } else if (strcmp(line, "[mipsel]") == 0 ||
875 strcmp(line, "[MIPSEL]") == 0) {
876 if (native_arch != lxc_seccomp_arch_mipsel &&
877 native_arch != lxc_seccomp_arch_mipsel64) {
878 cur_rule_arch = lxc_seccomp_arch_unknown;
879 continue;
880 }
881
882 cur_rule_arch = lxc_seccomp_arch_mipsel;
883 }
884 #endif
885 #ifdef SCMP_ARCH_S390X
886 else if (strcmp(line, "[s390x]") == 0 ||
887 strcmp(line, "[S390X]") == 0) {
888 if (native_arch != lxc_seccomp_arch_s390x) {
889 cur_rule_arch = lxc_seccomp_arch_unknown;
890 continue;
891 }
892
893 cur_rule_arch = lxc_seccomp_arch_s390x;
894 #endif
895 } else {
896 goto bad_arch;
897 }
898
899 continue;
900 }
901
902 /* irrelevant arch - i.e. arm on i386 */
903 if (cur_rule_arch == lxc_seccomp_arch_unknown)
904 continue;
905
906 memset(&rule, 0, sizeof(rule));
907 /* read optional action which follows the syscall */
908 ret = parse_v2_rules(line, default_rule_action, &rule);
909 if (ret != 0) {
910 ERROR("Failed to interpret seccomp rule");
911 goto bad_rule;
912 }
913
914 if (!do_resolve_add_rule(SCMP_ARCH_NATIVE, line,
915 conf->seccomp_ctx, &rule))
916 goto bad_rule;
917
918 INFO("Added native rule for arch %d for %s action %d(%s)",
919 SCMP_ARCH_NATIVE, line, rule.action,
920 get_action_name(rule.action));
921
922 if (ctx.architectures[0] != SCMP_ARCH_NATIVE) {
923 if (!do_resolve_add_rule(ctx.architectures[0], line,
924 ctx.contexts[0], &rule))
925 goto bad_rule;
926
927 INFO("Added compat rule for arch %d for %s action %d(%s)",
928 ctx.architectures[0], line, rule.action,
929 get_action_name(rule.action));
930 }
931
932 if (ctx.architectures[1] != SCMP_ARCH_NATIVE) {
933 if (!do_resolve_add_rule(ctx.architectures[1], line,
934 ctx.contexts[1], &rule))
935 goto bad_rule;
936
937 INFO("Added compat rule for arch %d for %s action %d(%s)",
938 ctx.architectures[1], line, rule.action,
939 get_action_name(rule.action));
940 }
941
942 if (ctx.architectures[2] != SCMP_ARCH_NATIVE) {
943 if (!do_resolve_add_rule(ctx.architectures[2], line,
944 ctx.contexts[2], &rule))
945 goto bad_rule;
946
947 INFO("Added native rule for arch %d for %s action %d(%s)",
948 ctx.architectures[2], line, rule.action,
949 get_action_name(rule.action));
950 }
951 }
952
953 INFO("Merging compat seccomp contexts into main context");
954 if (ctx.contexts[0]) {
955 if (ctx.needs_merge[0]) {
956 ret = seccomp_merge(conf->seccomp_ctx, ctx.contexts[0]);
957 if (ret < 0) {
958 ERROR("Failed to merge first compat seccomp "
959 "context into main context");
960 goto bad;
961 }
962
963 TRACE("Merged first compat seccomp context into main context");
964 } else {
965 seccomp_release(ctx.contexts[0]);
966 ctx.contexts[0] = NULL;
967 }
968 }
969
970 if (ctx.contexts[1]) {
971 if (ctx.needs_merge[1]) {
972 ret = seccomp_merge(conf->seccomp_ctx, ctx.contexts[1]);
973 if (ret < 0) {
974 ERROR("Failed to merge first compat seccomp "
975 "context into main context");
976 goto bad;
977 }
978
979 TRACE("Merged second compat seccomp context into main context");
980 } else {
981 seccomp_release(ctx.contexts[1]);
982 ctx.contexts[1] = NULL;
983 }
984 }
985
986 if (ctx.contexts[2]) {
987 if (ctx.needs_merge[2]) {
988 ret = seccomp_merge(conf->seccomp_ctx, ctx.contexts[2]);
989 if (ret < 0) {
990 ERROR("Failed to merge third compat seccomp "
991 "context into main context");
992 goto bad;
993 }
994
995 TRACE("Merged third compat seccomp context into main context");
996 } else {
997 seccomp_release(ctx.contexts[2]);
998 ctx.contexts[2] = NULL;
999 }
1000 }
1001
1002 free(rule_line);
1003 return 0;
1004
1005 bad_arch:
1006 ERROR("Unsupported architecture \"%s\"", line);
1007
1008 bad_rule:
1009 bad:
1010 if (ctx.contexts[0])
1011 seccomp_release(ctx.contexts[0]);
1012
1013 if (ctx.contexts[1])
1014 seccomp_release(ctx.contexts[1]);
1015
1016 if (ctx.contexts[2])
1017 seccomp_release(ctx.contexts[2]);
1018
1019 free(rule_line);
1020
1021 return -1;
1022 }
1023 #else /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
1024 static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
1025 {
1026 return -1;
1027 }
1028 #endif /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
1029
1030 /*
1031 * The first line of the config file has a policy language version
1032 * the second line has some directives
1033 * then comes policy subject to the directives
1034 * right now version must be '1' or '2'
1035 * the directives must include 'whitelist'(version == 1 or 2) or 'blacklist'
1036 * (version == 2) and can include 'debug' (though debug is not yet supported).
1037 */
1038 static int parse_config(FILE *f, struct lxc_conf *conf)
1039 {
1040 char line[MAXPATHLEN];
1041 int ret, version;
1042
1043 ret = fscanf(f, "%d\n", &version);
1044 if (ret != 1 || (version != 1 && version != 2)) {
1045 ERROR("Invalid version");
1046 return -1;
1047 }
1048
1049 if (!fgets(line, MAXPATHLEN, f)) {
1050 ERROR("Invalid config file");
1051 return -1;
1052 }
1053
1054 if (version == 1 && !strstr(line, "whitelist")) {
1055 ERROR("Only whitelist policy is supported");
1056 return -1;
1057 }
1058
1059 if (strstr(line, "debug")) {
1060 ERROR("Debug not yet implemented");
1061 return -1;
1062 }
1063
1064 if (version == 1)
1065 return parse_config_v1(f, conf);
1066
1067 return parse_config_v2(f, line, conf);
1068 }
1069
1070 /*
1071 * use_seccomp: return true if we should try and apply a seccomp policy
1072 * if defined for the container.
1073 * This will return false if
1074 * 1. seccomp is not enabled in the kernel
1075 * 2. a seccomp policy is already enabled for this task
1076 */
1077 static bool use_seccomp(void)
1078 {
1079 int ret, v;
1080 FILE *f;
1081 size_t line_bufsz = 0;
1082 char *line = NULL;
1083 bool already_enabled = false, found = false;
1084
1085 f = fopen("/proc/self/status", "r");
1086 if (!f)
1087 return true;
1088
1089 while (getline(&line, &line_bufsz, f) != -1) {
1090 if (strncmp(line, "Seccomp:", 8) == 0) {
1091 found = true;
1092
1093 ret = sscanf(line + 8, "%d", &v);
1094 if (ret == 1 && v != 0)
1095 already_enabled = true;
1096
1097 break;
1098 }
1099 }
1100 free(line);
1101 fclose(f);
1102
1103 if (!found) {
1104 INFO("Seccomp is not enabled in the kernel");
1105 return false;
1106 }
1107
1108 if (already_enabled) {
1109 INFO("Already seccomp-confined, not loading new policy");
1110 return false;
1111 }
1112
1113 return true;
1114 }
1115
1116 int lxc_read_seccomp_config(struct lxc_conf *conf)
1117 {
1118 int check_seccomp_attr_set, ret;
1119 FILE *f;
1120
1121 if (!conf->seccomp)
1122 return 0;
1123
1124 if (!use_seccomp())
1125 return 0;
1126
1127 #if HAVE_SCMP_FILTER_CTX
1128 /* XXX for debug, pass in SCMP_ACT_TRAP */
1129 conf->seccomp_ctx = seccomp_init(SCMP_ACT_KILL);
1130 ret = !conf->seccomp_ctx;
1131 #else
1132 ret = seccomp_init(SCMP_ACT_KILL) < 0;
1133 #endif
1134 if (ret) {
1135 ERROR("Failed initializing seccomp");
1136 return -1;
1137 }
1138
1139 /* turn off no-new-privs. We don't want it in lxc, and it breaks
1140 * with apparmor */
1141 #if HAVE_SCMP_FILTER_CTX
1142 check_seccomp_attr_set = seccomp_attr_set(conf->seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0);
1143 #else
1144 check_seccomp_attr_set = seccomp_attr_set(SCMP_FLTATR_CTL_NNP, 0);
1145 #endif
1146 if (check_seccomp_attr_set) {
1147 ERROR("%s - Failed to turn off no-new-privs", strerror(-check_seccomp_attr_set));
1148 return -1;
1149 }
1150 #ifdef SCMP_FLTATR_ATL_TSKIP
1151 check_seccomp_attr_set = seccomp_attr_set(conf->seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1);
1152 if (check_seccomp_attr_set < 0)
1153 WARN("%s - Failed to turn on seccomp nop-skip, continuing",
1154 strerror(-check_seccomp_attr_set));
1155 #endif
1156
1157 f = fopen(conf->seccomp, "r");
1158 if (!f) {
1159 SYSERROR("Failed to open seccomp policy file %s", conf->seccomp);
1160 return -1;
1161 }
1162
1163 ret = parse_config(f, conf);
1164 fclose(f);
1165
1166 return ret;
1167 }
1168
1169 int lxc_seccomp_load(struct lxc_conf *conf)
1170 {
1171 int ret;
1172
1173 if (!conf->seccomp)
1174 return 0;
1175
1176 if (!use_seccomp())
1177 return 0;
1178
1179 #if HAVE_SCMP_FILTER_CTX
1180 ret = seccomp_load(conf->seccomp_ctx);
1181 #else
1182 ret = seccomp_load();
1183 #endif
1184 if (ret < 0) {
1185 ERROR("%s- Error loading the seccomp policy", strerror(-ret));
1186 return -1;
1187 }
1188
1189 /* After load seccomp filter into the kernel successfully, export the current seccomp
1190 * filter to log file */
1191 #if HAVE_SCMP_FILTER_CTX
1192 if ((lxc_log_get_level() <= LXC_LOG_LEVEL_TRACE ||
1193 conf->loglevel <= LXC_LOG_LEVEL_TRACE) &&
1194 lxc_log_fd >= 0) {
1195 ret = seccomp_export_pfc(conf->seccomp_ctx, lxc_log_fd);
1196 /* Just give an warning when export error */
1197 if (ret < 0)
1198 WARN("%s - Failed to export seccomp filter to log file", strerror(-ret));
1199 }
1200 #endif
1201
1202 return 0;
1203 }
1204
1205 void lxc_seccomp_free(struct lxc_conf *conf)
1206 {
1207 free(conf->seccomp);
1208 conf->seccomp = NULL;
1209
1210 #if HAVE_SCMP_FILTER_CTX
1211 if (conf->seccomp_ctx) {
1212 seccomp_release(conf->seccomp_ctx);
1213 conf->seccomp_ctx = NULL;
1214 }
1215 #endif
1216 }