]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/seccomp.c
seccomp: get_v2_action()
[mirror_lxc.git] / src / lxc / seccomp.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright Canonical, Inc. 2012
5 *
6 * Authors:
7 * Serge Hallyn <serge.hallyn@canonical.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #define _GNU_SOURCE
25 #include <errno.h>
26 #include <seccomp.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <sys/mount.h>
30 #include <sys/utsname.h>
31
32 #include "config.h"
33 #include "log.h"
34 #include "lxcseccomp.h"
35 #include "utils.h"
36
37 lxc_log_define(lxc_seccomp, lxc);
38
39 static int parse_config_v1(FILE *f, struct lxc_conf *conf)
40 {
41 int ret = 0;
42 size_t line_bufsz = 0;
43 char *line = NULL;
44
45 while (getline(&line, &line_bufsz, f) != -1) {
46 int nr;
47
48 ret = sscanf(line, "%d", &nr);
49 if (ret != 1)
50 return -1;
51
52 #if HAVE_SCMP_FILTER_CTX
53 ret = seccomp_rule_add(conf->seccomp_ctx, SCMP_ACT_ALLOW, nr, 0);
54 #else
55 ret = seccomp_rule_add(SCMP_ACT_ALLOW, nr, 0);
56 #endif
57 if (ret < 0) {
58 ERROR("Failed loading allow rule for %d", nr);
59 break;
60 }
61 }
62 free(line);
63
64 return ret;
65 }
66
67 #if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
68 static const char *get_action_name(uint32_t action)
69 {
70 /* The upper 16 bits indicate the type of the seccomp action. */
71 switch (action & 0xffff0000) {
72 case SCMP_ACT_KILL:
73 return "kill";
74 case SCMP_ACT_ALLOW:
75 return "allow";
76 case SCMP_ACT_TRAP:
77 return "trap";
78 case SCMP_ACT_ERRNO(0):
79 return "errno";
80 }
81
82 return "invalid action";
83 }
84
85 static uint32_t get_v2_default_action(char *line)
86 {
87 uint32_t ret_action = -1;
88
89 while (*line == ' ')
90 line++;
91
92 /* After 'whitelist' or 'blacklist' comes default behavior. */
93 if (strncmp(line, "kill", 4) == 0) {
94 ret_action = SCMP_ACT_KILL;
95 } else if (strncmp(line, "errno", 5) == 0) {
96 int e, ret;
97
98 ret = sscanf(line + 5, "%d", &e);
99 if (ret != 1) {
100 ERROR("Failed to parse errno value from %s", line);
101 return -2;
102 }
103
104 ret_action = SCMP_ACT_ERRNO(e);
105 } else if (strncmp(line, "allow", 5) == 0) {
106 ret_action = SCMP_ACT_ALLOW;
107 } else if (strncmp(line, "trap", 4) == 0) {
108 ret_action = SCMP_ACT_TRAP;
109 }
110
111 return ret_action;
112 }
113
114 static uint32_t get_v2_action(char *line, uint32_t def_action)
115 {
116 char *p;
117 uint32_t ret;
118
119 p = strchr(line, ' ');
120 if (!p)
121 return def_action;
122 p++;
123
124 while (*p == ' ')
125 p++;
126
127 if (!*p || *p == '#')
128 return def_action;
129
130 ret = get_v2_default_action(p);
131 switch (ret) {
132 case -2:
133 return -1;
134 case -1:
135 return def_action;
136 }
137
138 return ret;
139 }
140
141 struct v2_rule_args {
142 uint32_t index;
143 uint64_t value;
144 uint64_t mask;
145 enum scmp_compare op;
146 };
147
148 struct seccomp_v2_rule {
149 uint32_t action;
150 uint32_t args_num;
151 struct v2_rule_args args_value[6];
152 };
153
154 static enum scmp_compare parse_v2_rule_op(char *s)
155 {
156 if (strcmp(s, "SCMP_CMP_NE") == 0 || strcmp(s, "!=") == 0)
157 return SCMP_CMP_NE;
158 else if (strcmp(s, "SCMP_CMP_LT") == 0 || strcmp(s, "<") == 0)
159 return SCMP_CMP_LT;
160 else if (strcmp(s, "SCMP_CMP_LE") == 0 || strcmp(s, "<=") == 0)
161 return SCMP_CMP_LE;
162 else if (strcmp(s, "SCMP_CMP_EQ") == 0 || strcmp(s, "==") == 0)
163 return SCMP_CMP_EQ;
164 else if (strcmp(s, "SCMP_CMP_GE") == 0 || strcmp(s, ">=") == 0)
165 return SCMP_CMP_GE;
166 else if (strcmp(s, "SCMP_CMP_GT") == 0 || strcmp(s, ">") == 0)
167 return SCMP_CMP_GT;
168 else if (strcmp(s, "SCMP_CMP_MASKED_EQ") == 0 || strcmp(s, "&=") == 0)
169 return SCMP_CMP_MASKED_EQ;
170
171 return _SCMP_CMP_MAX;
172 }
173
174 /* This function is used to parse the args string into the structure.
175 * args string format:[index,value,op,valueTwo] or [index,value,op]
176 * index: the index for syscall arguments (type uint)
177 * value: the value for syscall arguments (type uint64)
178 * op: the operator for syscall arguments(string),
179 a valid list of constants as of libseccomp v2.3.2 is
180 SCMP_CMP_NE,SCMP_CMP_LE,SCMP_CMP_LE, SCMP_CMP_EQ, SCMP_CMP_GE,
181 SCMP_CMP_GT, SCMP_CMP_MASKED_EQ, or !=,<=,==,>=,>,&=
182 * valueTwo: the value for syscall arguments only used for mask eq (type uint64, optional)
183 * Returns 0 on success, < 0 otherwise.
184 */
185 static int get_seccomp_arg_value(char *key, struct v2_rule_args *rule_args)
186 {
187 int ret = 0;
188 uint64_t value = 0;
189 uint64_t mask = 0;
190 enum scmp_compare op = 0;
191 uint32_t index = 0;
192 char s[31] = {0}, v[24] = {0}, m[24] = {0};
193 char *tmp = NULL;
194
195 tmp = strchr(key, '[');
196 if (!tmp) {
197 ERROR("Failed to interpret args");
198 return -1;
199 }
200 ret = sscanf(tmp, "[%i,%23[^,],%30[^0-9^,],%23[^,]", &index, v, s, m);
201 if ((ret != 3 && ret != 4) || index >= 6) {
202 ERROR("Failed to interpret args value");
203 return -1;
204 }
205
206 ret = lxc_safe_uint64(v, &value);
207 if (ret < 0) {
208 ERROR("Invalid argument value");
209 return -1;
210 }
211
212 ret = lxc_safe_uint64(v, &mask);
213 if (ret < 0) {
214 ERROR("Invalid argument mask");
215 return -1;
216 }
217
218 op = parse_v2_rule_op(s);
219 if (op == _SCMP_CMP_MAX) {
220 ERROR("Failed to interpret args operator value");
221 return -1;
222 }
223
224 rule_args->index = index;
225 rule_args->value = value;
226 rule_args->mask = mask;
227 rule_args->op = op;
228 return 0;
229 }
230
231 /* This function is used to parse the seccomp rule entry.
232 * @line : seccomp rule entry string.
233 * @def_action : default action used in the case if the 'line' contain non valid action.
234 * @rules : output struct.
235 * Returns 0 on success, < 0 otherwise.
236 */
237 static int parse_v2_rules(char *line, uint32_t def_action, struct seccomp_v2_rule *rules)
238 {
239 int ret = 0 ;
240 int i = 0;
241 char *tmp = NULL;
242 char *key = NULL;
243 char *saveptr = NULL;
244
245 tmp = strdup(line);
246 if (!tmp)
247 return -1;
248
249 /* read optional action which follows the syscall */
250 rules->action = get_v2_action(tmp, def_action);
251 if (rules->action == -1) {
252 ERROR("Failed to interpret action");
253 ret = -1;
254 goto out;
255 }
256
257 rules->args_num = 0;
258 if (!strchr(tmp, '[')) {
259 ret = 0;
260 goto out;
261 }
262
263 for ((key = strtok_r(tmp, "]", &saveptr)), i = 0; key && i < 6; (key = strtok_r(NULL, "]", &saveptr)), i++) {
264 ret = get_seccomp_arg_value(key, &rules->args_value[i]);
265 if (ret < 0) {
266 ret = -1;
267 goto out;
268 }
269 rules->args_num++;
270 }
271
272 ret = 0;
273 out:
274 free(tmp);
275 return ret;
276 }
277
278 #endif
279
280 #if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
281 enum lxc_hostarch_t {
282 lxc_seccomp_arch_all = 0,
283 lxc_seccomp_arch_native,
284 lxc_seccomp_arch_i386,
285 lxc_seccomp_arch_x32,
286 lxc_seccomp_arch_amd64,
287 lxc_seccomp_arch_arm,
288 lxc_seccomp_arch_arm64,
289 lxc_seccomp_arch_ppc64,
290 lxc_seccomp_arch_ppc64le,
291 lxc_seccomp_arch_ppc,
292 lxc_seccomp_arch_mips,
293 lxc_seccomp_arch_mips64,
294 lxc_seccomp_arch_mips64n32,
295 lxc_seccomp_arch_mipsel,
296 lxc_seccomp_arch_mipsel64,
297 lxc_seccomp_arch_mipsel64n32,
298 lxc_seccomp_arch_s390x,
299 lxc_seccomp_arch_unknown = 999,
300 };
301
302 #ifdef __MIPSEL__
303 # define MIPS_ARCH_O32 lxc_seccomp_arch_mipsel
304 # define MIPS_ARCH_N64 lxc_seccomp_arch_mipsel64
305 #else
306 # define MIPS_ARCH_O32 lxc_seccomp_arch_mips
307 # define MIPS_ARCH_N64 lxc_seccomp_arch_mips64
308 #endif
309
310 int get_hostarch(void)
311 {
312 struct utsname uts;
313 if (uname(&uts) < 0) {
314 SYSERROR("Failed to read host arch");
315 return -1;
316 }
317 if (strcmp(uts.machine, "i686") == 0)
318 return lxc_seccomp_arch_i386;
319 /* no x32 kernels */
320 else if (strcmp(uts.machine, "x86_64") == 0)
321 return lxc_seccomp_arch_amd64;
322 else if (strncmp(uts.machine, "armv7", 5) == 0)
323 return lxc_seccomp_arch_arm;
324 else if (strncmp(uts.machine, "aarch64", 7) == 0)
325 return lxc_seccomp_arch_arm64;
326 else if (strncmp(uts.machine, "ppc64le", 7) == 0)
327 return lxc_seccomp_arch_ppc64le;
328 else if (strncmp(uts.machine, "ppc64", 5) == 0)
329 return lxc_seccomp_arch_ppc64;
330 else if (strncmp(uts.machine, "ppc", 3) == 0)
331 return lxc_seccomp_arch_ppc;
332 else if (strncmp(uts.machine, "mips64", 6) == 0)
333 return MIPS_ARCH_N64;
334 else if (strncmp(uts.machine, "mips", 4) == 0)
335 return MIPS_ARCH_O32;
336 else if (strncmp(uts.machine, "s390x", 5) == 0)
337 return lxc_seccomp_arch_s390x;
338 return lxc_seccomp_arch_unknown;
339 }
340
341 scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch, uint32_t default_policy_action, bool *needs_merge)
342 {
343 scmp_filter_ctx ctx;
344 int ret;
345 uint32_t arch;
346
347 switch(n_arch) {
348 case lxc_seccomp_arch_i386: arch = SCMP_ARCH_X86; break;
349 case lxc_seccomp_arch_x32: arch = SCMP_ARCH_X32; break;
350 case lxc_seccomp_arch_amd64: arch = SCMP_ARCH_X86_64; break;
351 case lxc_seccomp_arch_arm: arch = SCMP_ARCH_ARM; break;
352 #ifdef SCMP_ARCH_AARCH64
353 case lxc_seccomp_arch_arm64: arch = SCMP_ARCH_AARCH64; break;
354 #endif
355 #ifdef SCMP_ARCH_PPC64LE
356 case lxc_seccomp_arch_ppc64le: arch = SCMP_ARCH_PPC64LE; break;
357 #endif
358 #ifdef SCMP_ARCH_PPC64
359 case lxc_seccomp_arch_ppc64: arch = SCMP_ARCH_PPC64; break;
360 #endif
361 #ifdef SCMP_ARCH_PPC
362 case lxc_seccomp_arch_ppc: arch = SCMP_ARCH_PPC; break;
363 #endif
364 #ifdef SCMP_ARCH_MIPS
365 case lxc_seccomp_arch_mips: arch = SCMP_ARCH_MIPS; break;
366 case lxc_seccomp_arch_mips64: arch = SCMP_ARCH_MIPS64; break;
367 case lxc_seccomp_arch_mips64n32: arch = SCMP_ARCH_MIPS64N32; break;
368 case lxc_seccomp_arch_mipsel: arch = SCMP_ARCH_MIPSEL; break;
369 case lxc_seccomp_arch_mipsel64: arch = SCMP_ARCH_MIPSEL64; break;
370 case lxc_seccomp_arch_mipsel64n32: arch = SCMP_ARCH_MIPSEL64N32; break;
371 #endif
372 #ifdef SCMP_ARCH_S390X
373 case lxc_seccomp_arch_s390x: arch = SCMP_ARCH_S390X; break;
374 #endif
375 default: return NULL;
376 }
377
378 if ((ctx = seccomp_init(default_policy_action)) == NULL) {
379 ERROR("Error initializing seccomp context");
380 return NULL;
381 }
382 if (seccomp_attr_set(ctx, SCMP_FLTATR_CTL_NNP, 0)) {
383 ERROR("Failed to turn off no-new-privs");
384 seccomp_release(ctx);
385 return NULL;
386 }
387 #ifdef SCMP_FLTATR_ATL_TSKIP
388 if (seccomp_attr_set(ctx, SCMP_FLTATR_ATL_TSKIP, 1)) {
389 WARN("Failed to turn on seccomp nop-skip, continuing");
390 }
391 #endif
392
393 ret = seccomp_arch_exist(ctx, arch);
394 if (ret < 0) {
395 if (ret != -EEXIST) {
396 ERROR("%s - Failed to determine whether arch %d is "
397 "already present in the main seccomp context",
398 strerror(-ret), (int)n_arch);
399 seccomp_release(ctx);
400 return NULL;
401 }
402
403 ret = seccomp_arch_add(ctx, arch);
404 if (ret != 0) {
405 ERROR("%s - Failed to add arch %d to main seccomp context",
406 strerror(-ret), (int)n_arch);
407 seccomp_release(ctx);
408 return NULL;
409 }
410 TRACE("Added arch %d to main seccomp context", (int)n_arch);
411
412 ret = seccomp_arch_remove(ctx, SCMP_ARCH_NATIVE);
413 if (ret != 0) {
414 ERROR("Failed to remove native arch from main seccomp context");
415 seccomp_release(ctx);
416 return NULL;
417 }
418 TRACE("Removed native arch from main seccomp context");
419
420 *needs_merge = true;
421 } else {
422 *needs_merge = false;
423 TRACE("Arch %d already present in main seccomp context", (int)n_arch);
424 }
425
426 return ctx;
427 }
428
429 bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx,
430 struct seccomp_v2_rule *rule)
431 {
432 int nr, ret, i;
433 struct scmp_arg_cmp arg_cmp[6];
434
435 memset(arg_cmp, 0 ,sizeof(arg_cmp));
436
437 ret = seccomp_arch_exist(ctx, arch);
438 if (arch && ret != 0) {
439 ERROR("BUG: Seccomp: rule and context arch do not match (arch "
440 "%d): %s",
441 arch, strerror(-ret));
442 return false;
443 }
444
445 /*get the syscall name*/
446 char *p = strchr(line, ' ');
447 if (p)
448 *p = '\0';
449
450 if (strncmp(line, "reject_force_umount", 19) == 0) {
451 INFO("Setting Seccomp rule to reject force umounts");
452 ret = seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EACCES), SCMP_SYS(umount2),
453 1, SCMP_A1(SCMP_CMP_MASKED_EQ , MNT_FORCE , MNT_FORCE ));
454 if (ret < 0) {
455 ERROR("Failed (%d) loading rule to reject force "
456 "umount: %s",
457 ret, strerror(-ret));
458 return false;
459 }
460 return true;
461 }
462
463 nr = seccomp_syscall_resolve_name(line);
464 if (nr == __NR_SCMP_ERROR) {
465 WARN("Seccomp: failed to resolve syscall: %s", line);
466 WARN("This syscall will NOT be blacklisted");
467 return true;
468 }
469 if (nr < 0) {
470 WARN("Seccomp: got negative for syscall: %d: %s", nr, line);
471 WARN("This syscall will NOT be blacklisted");
472 return true;
473 }
474
475 for (i = 0; i < rule->args_num; i++) {
476 INFO("arg_cmp[%d]:SCMP_CMP(%u, %llu, %llu, %llu)", i,
477 rule->args_value[i].index,
478 (long long unsigned int)rule->args_value[i].op,
479 (long long unsigned int)rule->args_value[i].mask,
480 (long long unsigned int)rule->args_value[i].value);
481
482 if (SCMP_CMP_MASKED_EQ == rule->args_value[i].op)
483 arg_cmp[i] = SCMP_CMP(rule->args_value[i].index, rule->args_value[i].op, rule->args_value[i].mask, rule->args_value[i].value);
484 else
485 arg_cmp[i] = SCMP_CMP(rule->args_value[i].index, rule->args_value[i].op, rule->args_value[i].value);
486 }
487
488 ret = seccomp_rule_add_exact_array(ctx, rule->action, nr, rule->args_num, arg_cmp);
489 if (ret < 0) {
490 ERROR("Failed (%d) loading rule for %s (nr %d action %d(%s)): %s",
491 ret, line, nr, rule->action, get_action_name(rule->action), strerror(-ret));
492 return false;
493 }
494 return true;
495 }
496
497 /*
498 * v2 consists of
499 * [x86]
500 * open
501 * read
502 * write
503 * close
504 * # a comment
505 * [x86_64]
506 * open
507 * read
508 * write
509 * close
510 */
511 static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
512 {
513 char *p;
514 int ret;
515 bool blacklist = false;
516 uint32_t default_policy_action = -1, default_rule_action = -1;
517 enum lxc_hostarch_t native_arch = get_hostarch(),
518 cur_rule_arch = native_arch;
519 struct seccomp_v2_rule rule;
520 struct scmp_ctx_info {
521 uint32_t architectures[3];
522 scmp_filter_ctx contexts[3];
523 bool needs_merge[3];
524 } ctx;
525
526 if (strncmp(line, "blacklist", 9) == 0)
527 blacklist = true;
528 else if (strncmp(line, "whitelist", 9) != 0) {
529 ERROR("Bad seccomp policy style: %s", line);
530 return -1;
531 }
532
533 if ((p = strchr(line, ' '))) {
534 default_policy_action = get_v2_default_action(p + 1);
535 if (default_policy_action == -2)
536 return -1;
537 }
538
539 /* for blacklist, allow any syscall which has no rule */
540 if (blacklist) {
541 if (default_policy_action == -1)
542 default_policy_action = SCMP_ACT_ALLOW;
543 if (default_rule_action == -1)
544 default_rule_action = SCMP_ACT_KILL;
545 } else {
546 if (default_policy_action == -1)
547 default_policy_action = SCMP_ACT_KILL;
548 if (default_rule_action == -1)
549 default_rule_action = SCMP_ACT_ALLOW;
550 }
551
552 memset(&ctx, 0, sizeof(ctx));
553 ctx.architectures[0] = SCMP_ARCH_NATIVE;
554 ctx.architectures[1] = SCMP_ARCH_NATIVE;
555 ctx.architectures[2] = SCMP_ARCH_NATIVE;
556 if (native_arch == lxc_seccomp_arch_amd64) {
557 cur_rule_arch = lxc_seccomp_arch_all;
558
559 ctx.architectures[0] = SCMP_ARCH_X86;
560 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_i386,
561 default_policy_action,
562 &ctx.needs_merge[0]);
563 if (!ctx.contexts[0])
564 goto bad;
565
566 ctx.architectures[1] = SCMP_ARCH_X32;
567 ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_x32,
568 default_policy_action,
569 &ctx.needs_merge[1]);
570 if (!ctx.contexts[1])
571 goto bad;
572
573 ctx.architectures[2] = SCMP_ARCH_X86_64;
574 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_amd64,
575 default_policy_action,
576 &ctx.needs_merge[2]);
577 if (!ctx.contexts[2])
578 goto bad;
579 #ifdef SCMP_ARCH_PPC
580 } else if (native_arch == lxc_seccomp_arch_ppc64) {
581 cur_rule_arch = lxc_seccomp_arch_all;
582
583 ctx.architectures[0] = SCMP_ARCH_PPC;
584 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_ppc,
585 default_policy_action,
586 &ctx.needs_merge[0]);
587 if (!ctx.contexts[0])
588 goto bad;
589
590 ctx.architectures[2] = SCMP_ARCH_PPC64;
591 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_ppc64,
592 default_policy_action,
593 &ctx.needs_merge[2]);
594 if (!ctx.contexts[2])
595 goto bad;
596 #endif
597 #ifdef SCMP_ARCH_ARM
598 } else if (native_arch == lxc_seccomp_arch_arm64) {
599 cur_rule_arch = lxc_seccomp_arch_all;
600
601 ctx.architectures[0] = SCMP_ARCH_ARM;
602 ctx.contexts[0] =
603 get_new_ctx(lxc_seccomp_arch_arm, default_policy_action,
604 &ctx.needs_merge[0]);
605 if (!ctx.contexts[0])
606 goto bad;
607
608 #ifdef SCMP_ARCH_AARCH64
609 ctx.architectures[2] = SCMP_ARCH_AARCH64;
610 ctx.contexts[2] =
611 get_new_ctx(lxc_seccomp_arch_arm64, default_policy_action,
612 &ctx.needs_merge[2]);
613 if (!ctx.contexts[2])
614 goto bad;
615 #endif
616 #endif
617 #ifdef SCMP_ARCH_MIPS
618 } else if (native_arch == lxc_seccomp_arch_mips64) {
619 cur_rule_arch = lxc_seccomp_arch_all;
620
621 ctx.architectures[0] = SCMP_ARCH_MIPS;
622 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mips,
623 default_policy_action,
624 &ctx.needs_merge[0]);
625 if (!ctx.contexts[0])
626 goto bad;
627
628 ctx.architectures[1] = SCMP_ARCH_MIPS64N32;
629 ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mips64n32,
630 default_policy_action,
631 &ctx.needs_merge[1]);
632 if (!ctx.contexts[1])
633 goto bad;
634
635 ctx.architectures[2] = SCMP_ARCH_MIPS64;
636 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mips64,
637 default_policy_action,
638 &ctx.needs_merge[2]);
639 if (!ctx.contexts[2])
640 goto bad;
641 } else if (native_arch == lxc_seccomp_arch_mipsel64) {
642 cur_rule_arch = lxc_seccomp_arch_all;
643
644 ctx.architectures[0] = SCMP_ARCH_MIPSEL;
645 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mipsel,
646 default_policy_action,
647 &ctx.needs_merge[0]);
648 if (!ctx.contexts[0])
649 goto bad;
650
651 ctx.architectures[1] = SCMP_ARCH_MIPSEL64N32;
652 ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mipsel64n32,
653 default_policy_action,
654 &ctx.needs_merge[1]);
655 if (!ctx.contexts[1])
656 goto bad;
657
658 ctx.architectures[2] = SCMP_ARCH_MIPSEL64;
659 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mipsel64,
660 default_policy_action,
661 &ctx.needs_merge[2]);
662 if (!ctx.contexts[2])
663 goto bad;
664 #endif
665 }
666
667 if (default_policy_action != SCMP_ACT_KILL) {
668 ret = seccomp_reset(conf->seccomp_ctx, default_policy_action);
669 if (ret != 0) {
670 ERROR("Error re-initializing Seccomp");
671 return -1;
672 }
673 if (seccomp_attr_set(conf->seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0)) {
674 ERROR("Failed to turn off no-new-privs");
675 return -1;
676 }
677 #ifdef SCMP_FLTATR_ATL_TSKIP
678 if (seccomp_attr_set(conf->seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1)) {
679 WARN("Failed to turn on seccomp nop-skip, continuing");
680 }
681 #endif
682 }
683
684 while (fgets(line, 1024, f)) {
685
686 if (line[0] == '#')
687 continue;
688 if (strlen(line) == 0)
689 continue;
690 remove_trailing_newlines(line);
691 INFO("processing: .%s", line);
692 if (line[0] == '[') {
693 /* Read the architecture for next set of rules. */
694 if (strcmp(line, "[x86]") == 0 ||
695 strcmp(line, "[X86]") == 0) {
696 if (native_arch != lxc_seccomp_arch_i386 &&
697 native_arch != lxc_seccomp_arch_amd64) {
698 cur_rule_arch = lxc_seccomp_arch_unknown;
699 continue;
700 }
701 cur_rule_arch = lxc_seccomp_arch_i386;
702 } else if (strcmp(line, "[x32]") == 0 ||
703 strcmp(line, "[X32]") == 0) {
704 if (native_arch != lxc_seccomp_arch_amd64) {
705 cur_rule_arch = lxc_seccomp_arch_unknown;
706 continue;
707 }
708 cur_rule_arch = lxc_seccomp_arch_x32;
709 } else if (strcmp(line, "[X86_64]") == 0 ||
710 strcmp(line, "[x86_64]") == 0) {
711 if (native_arch != lxc_seccomp_arch_amd64) {
712 cur_rule_arch = lxc_seccomp_arch_unknown;
713 continue;
714 }
715 cur_rule_arch = lxc_seccomp_arch_amd64;
716 } else if (strcmp(line, "[all]") == 0 ||
717 strcmp(line, "[ALL]") == 0) {
718 cur_rule_arch = lxc_seccomp_arch_all;
719 }
720 #ifdef SCMP_ARCH_ARM
721 else if (strcmp(line, "[arm]") == 0 ||
722 strcmp(line, "[ARM]") == 0) {
723 if (native_arch != lxc_seccomp_arch_arm &&
724 native_arch != lxc_seccomp_arch_arm64) {
725 cur_rule_arch = lxc_seccomp_arch_unknown;
726 continue;
727 }
728 cur_rule_arch = lxc_seccomp_arch_arm;
729 }
730 #endif
731 #ifdef SCMP_ARCH_AARCH64
732 else if (strcmp(line, "[arm64]") == 0 ||
733 strcmp(line, "[ARM64]") == 0) {
734 if (native_arch != lxc_seccomp_arch_arm64) {
735 cur_rule_arch = lxc_seccomp_arch_unknown;
736 continue;
737 }
738 cur_rule_arch = lxc_seccomp_arch_arm64;
739 }
740 #endif
741 #ifdef SCMP_ARCH_PPC64LE
742 else if (strcmp(line, "[ppc64le]") == 0 ||
743 strcmp(line, "[PPC64LE]") == 0) {
744 if (native_arch != lxc_seccomp_arch_ppc64le) {
745 cur_rule_arch = lxc_seccomp_arch_unknown;
746 continue;
747 }
748 cur_rule_arch = lxc_seccomp_arch_ppc64le;
749 }
750 #endif
751 #ifdef SCMP_ARCH_PPC64
752 else if (strcmp(line, "[ppc64]") == 0 ||
753 strcmp(line, "[PPC64]") == 0) {
754 if (native_arch != lxc_seccomp_arch_ppc64) {
755 cur_rule_arch = lxc_seccomp_arch_unknown;
756 continue;
757 }
758 cur_rule_arch = lxc_seccomp_arch_ppc64;
759 }
760 #endif
761 #ifdef SCMP_ARCH_PPC
762 else if (strcmp(line, "[ppc]") == 0 ||
763 strcmp(line, "[PPC]") == 0) {
764 if (native_arch != lxc_seccomp_arch_ppc &&
765 native_arch != lxc_seccomp_arch_ppc64) {
766 cur_rule_arch = lxc_seccomp_arch_unknown;
767 continue;
768 }
769 cur_rule_arch = lxc_seccomp_arch_ppc;
770 }
771 #endif
772 #ifdef SCMP_ARCH_MIPS
773 else if (strcmp(line, "[mips64]") == 0 ||
774 strcmp(line, "[MIPS64]") == 0) {
775 if (native_arch != lxc_seccomp_arch_mips64) {
776 cur_rule_arch = lxc_seccomp_arch_unknown;
777 continue;
778 }
779 cur_rule_arch = lxc_seccomp_arch_mips64;
780 } else if (strcmp(line, "[mips64n32]") == 0 ||
781 strcmp(line, "[MIPS64N32]") == 0) {
782 if (native_arch != lxc_seccomp_arch_mips64) {
783 cur_rule_arch = lxc_seccomp_arch_unknown;
784 continue;
785 }
786 cur_rule_arch = lxc_seccomp_arch_mips64n32;
787 } else if (strcmp(line, "[mips]") == 0 ||
788 strcmp(line, "[MIPS]") == 0) {
789 if (native_arch != lxc_seccomp_arch_mips &&
790 native_arch != lxc_seccomp_arch_mips64) {
791 cur_rule_arch = lxc_seccomp_arch_unknown;
792 continue;
793 }
794 cur_rule_arch = lxc_seccomp_arch_mips;
795 } else if (strcmp(line, "[mipsel64]") == 0 ||
796 strcmp(line, "[MIPSEL64]") == 0) {
797 if (native_arch != lxc_seccomp_arch_mipsel64) {
798 cur_rule_arch = lxc_seccomp_arch_unknown;
799 continue;
800 }
801 cur_rule_arch = lxc_seccomp_arch_mipsel64;
802 } else if (strcmp(line, "[mipsel64n32]") == 0 ||
803 strcmp(line, "[MIPSEL64N32]") == 0) {
804 if (native_arch != lxc_seccomp_arch_mipsel64) {
805 cur_rule_arch = lxc_seccomp_arch_unknown;
806 continue;
807 }
808 cur_rule_arch = lxc_seccomp_arch_mipsel64n32;
809 } else if (strcmp(line, "[mipsel]") == 0 ||
810 strcmp(line, "[MIPSEL]") == 0) {
811 if (native_arch != lxc_seccomp_arch_mipsel &&
812 native_arch != lxc_seccomp_arch_mipsel64) {
813 cur_rule_arch = lxc_seccomp_arch_unknown;
814 continue;
815 }
816 cur_rule_arch = lxc_seccomp_arch_mipsel;
817 }
818 #endif
819 #ifdef SCMP_ARCH_S390X
820 else if (strcmp(line, "[s390x]") == 0 ||
821 strcmp(line, "[S390X]") == 0) {
822 if (native_arch != lxc_seccomp_arch_s390x) {
823 cur_rule_arch = lxc_seccomp_arch_unknown;
824 continue;
825 }
826 cur_rule_arch = lxc_seccomp_arch_s390x;
827 }
828 #endif
829 else
830 goto bad_arch;
831
832 continue;
833 }
834
835 /* irrelevant arch - i.e. arm on i386 */
836 if (cur_rule_arch == lxc_seccomp_arch_unknown)
837 continue;
838
839 memset(&rule, 0, sizeof(rule));
840 /* read optional action which follows the syscall */
841 ret = parse_v2_rules(line, default_rule_action, &rule);
842 if (ret != 0) {
843 ERROR("Failed to interpret seccomp rule");
844 goto bad_rule;
845 }
846
847 if (!do_resolve_add_rule(SCMP_ARCH_NATIVE, line,
848 conf->seccomp_ctx, &rule))
849 goto bad_rule;
850 INFO("Added native rule for arch %d for %s action %d(%s)",
851 SCMP_ARCH_NATIVE, line, rule.action,
852 get_action_name(rule.action));
853
854 if (ctx.architectures[0] != SCMP_ARCH_NATIVE) {
855 if (!do_resolve_add_rule(ctx.architectures[0], line,
856 ctx.contexts[0], &rule))
857 goto bad_rule;
858 INFO("Added compat rule for arch %d for %s action %d(%s)",
859 ctx.architectures[0], line, rule.action,
860 get_action_name(rule.action));
861 }
862
863 if (ctx.architectures[1] != SCMP_ARCH_NATIVE) {
864 if (!do_resolve_add_rule(ctx.architectures[1], line,
865 ctx.contexts[1], &rule))
866 goto bad_rule;
867 INFO("Added compat rule for arch %d for %s action %d(%s)",
868 ctx.architectures[1], line, rule.action,
869 get_action_name(rule.action));
870 }
871
872 if (ctx.architectures[2] != SCMP_ARCH_NATIVE) {
873 if (!do_resolve_add_rule(ctx.architectures[2], line,
874 ctx.contexts[2], &rule))
875 goto bad_rule;
876 INFO("Added native rule for arch %d for %s action %d(%s)",
877 ctx.architectures[2], line, rule.action,
878 get_action_name(rule.action));
879 }
880 }
881
882 INFO("Merging compat seccomp contexts into main context");
883 if (ctx.contexts[0]) {
884 if (ctx.needs_merge[0]) {
885 ret = seccomp_merge(conf->seccomp_ctx, ctx.contexts[0]);
886 if (ret < 0) {
887 ERROR("Failed to merge first compat seccomp "
888 "context into main context");
889 goto bad;
890 }
891 TRACE("Merged first compat seccomp context into main context");
892 } else {
893 seccomp_release(ctx.contexts[0]);
894 ctx.contexts[0] = NULL;
895 }
896 }
897
898 if (ctx.contexts[1]) {
899 if (ctx.needs_merge[1]) {
900 ret = seccomp_merge(conf->seccomp_ctx, ctx.contexts[1]);
901 if (ret < 0) {
902 ERROR("Failed to merge first compat seccomp "
903 "context into main context");
904 goto bad;
905 }
906 TRACE("Merged second compat seccomp context into main context");
907 } else {
908 seccomp_release(ctx.contexts[1]);
909 ctx.contexts[1] = NULL;
910 }
911 }
912
913 if (ctx.contexts[2]) {
914 if (ctx.needs_merge[2]) {
915 ret = seccomp_merge(conf->seccomp_ctx, ctx.contexts[2]);
916 if (ret < 0) {
917 ERROR("Failed to merge third compat seccomp "
918 "context into main context");
919 goto bad;
920 }
921 TRACE("Merged third compat seccomp context into main context");
922 } else {
923 seccomp_release(ctx.contexts[2]);
924 ctx.contexts[2] = NULL;
925 }
926 }
927
928 return 0;
929
930 bad_arch:
931 ERROR("Unsupported arch: %s.", line);
932 bad_rule:
933 bad:
934 if (ctx.contexts[0])
935 seccomp_release(ctx.contexts[0]);
936 if (ctx.contexts[1])
937 seccomp_release(ctx.contexts[1]);
938 if (ctx.contexts[2])
939 seccomp_release(ctx.contexts[2]);
940
941 return -1;
942 }
943 #else /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
944 static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
945 {
946 return -1;
947 }
948 #endif /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
949
950 /*
951 * The first line of the config file has a policy language version
952 * the second line has some directives
953 * then comes policy subject to the directives
954 * right now version must be '1' or '2'
955 * the directives must include 'whitelist'(version == 1 or 2) or 'blacklist'
956 * (version == 2) and can include 'debug' (though debug is not yet supported).
957 */
958 static int parse_config(FILE *f, struct lxc_conf *conf)
959 {
960 char line[1024];
961 int ret, version;
962
963 ret = fscanf(f, "%d\n", &version);
964 if (ret != 1 || (version != 1 && version != 2)) {
965 ERROR("Invalid version");
966 return -1;
967 }
968 if (!fgets(line, 1024, f)) {
969 ERROR("Invalid config file");
970 return -1;
971 }
972 if (version == 1 && !strstr(line, "whitelist")) {
973 ERROR("Only whitelist policy is supported");
974 return -1;
975 }
976
977 if (strstr(line, "debug")) {
978 ERROR("Debug not yet implemented");
979 return -1;
980 }
981
982 if (version == 1)
983 return parse_config_v1(f, conf);
984 return parse_config_v2(f, line, conf);
985 }
986
987 /*
988 * use_seccomp: return true if we should try and apply a seccomp policy
989 * if defined for the container.
990 * This will return false if
991 * 1. seccomp is not enabled in the kernel
992 * 2. a seccomp policy is already enabled for this task
993 */
994 static bool use_seccomp(void)
995 {
996 FILE *f = fopen("/proc/self/status", "r");
997 char line[1024];
998 bool already_enabled = false;
999 bool found = false;
1000 int ret, v;
1001
1002 if (!f)
1003 return true;
1004
1005 while (fgets(line, 1024, f)) {
1006 if (strncmp(line, "Seccomp:", 8) == 0) {
1007 found = true;
1008 ret = sscanf(line + 8, "%d", &v);
1009 if (ret == 1 && v != 0)
1010 already_enabled = true;
1011 break;
1012 }
1013 }
1014
1015 fclose(f);
1016 if (!found) { /* no Seccomp line, no seccomp in kernel */
1017 INFO("Seccomp is not enabled in the kernel");
1018 return false;
1019 }
1020 if (already_enabled) { /* already seccomp-confined */
1021 INFO("Already seccomp-confined, not loading new policy");
1022 return false;
1023 }
1024 return true;
1025 }
1026
1027 int lxc_read_seccomp_config(struct lxc_conf *conf)
1028 {
1029 FILE *f;
1030 int ret;
1031 int check_seccomp_attr_set;
1032
1033 if (!conf->seccomp)
1034 return 0;
1035
1036 if (!use_seccomp())
1037 return 0;
1038 #if HAVE_SCMP_FILTER_CTX
1039 /* XXX for debug, pass in SCMP_ACT_TRAP */
1040 conf->seccomp_ctx = seccomp_init(SCMP_ACT_KILL);
1041 ret = !conf->seccomp_ctx;
1042 #else
1043 ret = seccomp_init(SCMP_ACT_KILL) < 0;
1044 #endif
1045 if (ret) {
1046 ERROR("Failed initializing seccomp");
1047 return -1;
1048 }
1049
1050 /* turn off no-new-privs. We don't want it in lxc, and it breaks
1051 * with apparmor */
1052 #if HAVE_SCMP_FILTER_CTX
1053 check_seccomp_attr_set = seccomp_attr_set(conf->seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0);
1054 #else
1055 check_seccomp_attr_set = seccomp_attr_set(SCMP_FLTATR_CTL_NNP, 0);
1056 #endif
1057 if (check_seccomp_attr_set) {
1058 ERROR("Failed to turn off no-new-privs");
1059 return -1;
1060 }
1061 #ifdef SCMP_FLTATR_ATL_TSKIP
1062 if (seccomp_attr_set(conf->seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1)) {
1063 WARN("Failed to turn on seccomp nop-skip, continuing");
1064 }
1065 #endif
1066
1067 f = fopen(conf->seccomp, "r");
1068 if (!f) {
1069 SYSERROR("Failed to open seccomp policy file %s", conf->seccomp);
1070 return -1;
1071 }
1072 ret = parse_config(f, conf);
1073 fclose(f);
1074 return ret;
1075 }
1076
1077 int lxc_seccomp_load(struct lxc_conf *conf)
1078 {
1079 int ret;
1080 if (!conf->seccomp)
1081 return 0;
1082 if (!use_seccomp())
1083 return 0;
1084 ret = seccomp_load(
1085 #if HAVE_SCMP_FILTER_CTX
1086 conf->seccomp_ctx
1087 #endif
1088 );
1089 if (ret < 0) {
1090 ERROR("Error loading the seccomp policy: %s", strerror(-ret));
1091 return -1;
1092 }
1093
1094 /* After load seccomp filter into the kernel successfully, export the current seccomp
1095 * filter to log file */
1096 #if HAVE_SCMP_FILTER_CTX
1097 if ((lxc_log_get_level() <= LXC_LOG_LEVEL_TRACE || conf->loglevel <= LXC_LOG_LEVEL_TRACE) &&
1098 lxc_log_fd >= 0) {
1099 ret = seccomp_export_pfc(conf->seccomp_ctx, lxc_log_fd);
1100 /* Just give an warning when export error */
1101 if (ret < 0)
1102 WARN("Failed to export seccomp filter to log file: %s", strerror(-ret));
1103 }
1104 #endif
1105 return 0;
1106 }
1107
1108 void lxc_seccomp_free(struct lxc_conf *conf)
1109 {
1110 free(conf->seccomp);
1111 conf->seccomp = NULL;
1112 #if HAVE_SCMP_FILTER_CTX
1113 if (conf->seccomp_ctx) {
1114 seccomp_release(conf->seccomp_ctx);
1115 conf->seccomp_ctx = NULL;
1116 }
1117 #endif
1118 }