]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/seccomp.c
cgroups: remove freezer_state()
[mirror_lxc.git] / src / lxc / seccomp.c
CommitLineData
8f2c3a70
SH
1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright Canonical, Inc. 2012
5 *
6 * Authors:
7 * Serge Hallyn <serge.hallyn@canonical.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
8f2c3a70
SH
22 */
23
24#define _GNU_SOURCE
567b2049 25#include <errno.h>
8f2c3a70
SH
26#include <stdio.h>
27#include <stdlib.h>
28#include <seccomp.h>
6166fa6d 29#include <sys/mount.h>
567b2049 30#include <sys/utsname.h>
f2363e38 31
769872f9 32#include "config.h"
8f2c3a70 33#include "log.h"
567b2049 34#include "lxcseccomp.h"
8f2c3a70
SH
35
36lxc_log_define(lxc_seccomp, lxc);
37
50798138
SH
38static int parse_config_v1(FILE *f, struct lxc_conf *conf)
39{
40 char line[1024];
41 int ret;
42
43 while (fgets(line, 1024, f)) {
44 int nr;
45 ret = sscanf(line, "%d", &nr);
46 if (ret != 1)
47 return -1;
48 ret = seccomp_rule_add(
49#if HAVE_SCMP_FILTER_CTX
f06c6207 50 conf->seccomp_ctx,
50798138 51#endif
f06c6207 52 SCMP_ACT_ALLOW, nr, 0);
50798138 53 if (ret < 0) {
3ee26d19 54 ERROR("Failed loading allow rule for %d", nr);
50798138
SH
55 return ret;
56 }
57 }
58 return 0;
59}
60
2b0ae718 61#if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
50798138
SH
62static void remove_trailing_newlines(char *l)
63{
64 char *p = l;
65
66 while (*p)
67 p++;
68 while (--p >= l && *p == '\n')
69 *p = '\0';
70}
71
72static uint32_t get_v2_default_action(char *line)
73{
74 uint32_t ret_action = -1;
75
f06c6207
CB
76 while (*line == ' ')
77 line++;
1a0e70ac 78 /* After 'whitelist' or 'blacklist' comes default behavior. */
50798138
SH
79 if (strncmp(line, "kill", 4) == 0)
80 ret_action = SCMP_ACT_KILL;
81 else if (strncmp(line, "errno", 5) == 0) {
82 int e;
f06c6207 83 if (sscanf(line + 5, "%d", &e) != 1) {
3ee26d19 84 ERROR("Bad errno value in %s", line);
50798138
SH
85 return -2;
86 }
87 ret_action = SCMP_ACT_ERRNO(e);
88 } else if (strncmp(line, "allow", 5) == 0)
89 ret_action = SCMP_ACT_ALLOW;
90 else if (strncmp(line, "trap", 4) == 0)
91 ret_action = SCMP_ACT_TRAP;
92 return ret_action;
93}
94
4836330b 95static const char *get_action_name(uint32_t action)
96{
1a0e70ac 97 /* The upper 16 bits indicate the type of the seccomp action. */
4836330b 98 switch(action & 0xffff0000){
99 case SCMP_ACT_KILL:
100 return "kill";
101 case SCMP_ACT_ALLOW:
102 return "allow";
103 case SCMP_ACT_TRAP:
104 return "trap";
105 case SCMP_ACT_ERRNO(0):
106 return "errno";
107 default:
108 return "invalid action";
109 }
110}
111
3ee26d19 112static uint32_t get_v2_action(char *line, uint32_t def_action)
50798138
SH
113{
114 char *p = strchr(line, ' ');
115 uint32_t ret;
116
117 if (!p)
118 return def_action;
50798138
SH
119 p++;
120 while (*p == ' ')
121 p++;
122 if (!*p || *p == '#')
123 return def_action;
124 ret = get_v2_default_action(p);
125 switch(ret) {
126 case -2: return -1;
127 case -1: return def_action;
128 default: return ret;
129 }
130}
3ee26d19
L
131
132struct v2_rule_args {
133 uint32_t index;
134 uint64_t value;
135 uint64_t mask;
136 enum scmp_compare op;
137};
138
139struct seccomp_v2_rule {
140 uint32_t action;
141 uint32_t args_num;
142 struct v2_rule_args args_value[6];
143};
144
145static enum scmp_compare parse_v2_rule_op(char *s)
146{
3ee26d19 147 if (strcmp(s, "SCMP_CMP_NE") == 0 || strcmp(s, "!=") == 0)
29cb2617 148 return SCMP_CMP_NE;
3ee26d19 149 else if (strcmp(s, "SCMP_CMP_LT") == 0 || strcmp(s, "<") == 0)
29cb2617 150 return SCMP_CMP_LT;
3ee26d19 151 else if (strcmp(s, "SCMP_CMP_LE") == 0 || strcmp(s, "<=") == 0)
29cb2617 152 return SCMP_CMP_LE;
3ee26d19 153 else if (strcmp(s, "SCMP_CMP_EQ") == 0 || strcmp(s, "==") == 0)
29cb2617 154 return SCMP_CMP_EQ;
3ee26d19 155 else if (strcmp(s, "SCMP_CMP_GE") == 0 || strcmp(s, ">=") == 0)
29cb2617 156 return SCMP_CMP_GE;
3ee26d19 157 else if (strcmp(s, "SCMP_CMP_GT") == 0 || strcmp(s, ">") == 0)
29cb2617 158 return SCMP_CMP_GT;
3ee26d19 159 else if (strcmp(s, "SCMP_CMP_MASKED_EQ") == 0 || strcmp(s, "&=") == 0)
29cb2617 160 return SCMP_CMP_MASKED_EQ;
3ee26d19 161
29cb2617 162 return _SCMP_CMP_MAX;
3ee26d19
L
163}
164
165/* This function is used to parse the args string into the structure.
166 * args string format:[index,value,op,valueTwo] or [index,value,op]
167 * For one arguments, [index,value,valueTwo,op]
168 * index: the index for syscall arguments (type uint)
169 * value: the value for syscall arguments (type uint64)
170 * op: the operator for syscall arguments(string),
171 a valid list of constants as of libseccomp v2.3.2 is
172 SCMP_CMP_NE,SCMP_CMP_LE,SCMP_CMP_LE, SCMP_CMP_EQ, SCMP_CMP_GE,
173 SCMP_CMP_GT, SCMP_CMP_MASKED_EQ, or !=,<=,==,>=,>,&=
174 * valueTwo: the value for syscall arguments only used for mask eq (type uint64, optional)
175 * Returns 0 on success, < 0 otherwise.
176 */
177static int get_seccomp_arg_value(char *key, struct v2_rule_args *rule_args)
178{
179 int ret = 0;
180 uint64_t value = 0;
181 uint64_t mask = 0;
182 enum scmp_compare op = 0;
183 uint32_t index = 0;
184 char s[30] = {0};
185 char *tmp = NULL;
186
187 memset(s, 0, sizeof(s));
188 tmp = strchr(key, '[');
189 if (!tmp) {
190 ERROR("Failed to interpret args");
191 return -1;
192 }
193 ret = sscanf(tmp, "[%i,%lli,%30[^0-9^,],%lli", &index, (long long unsigned int *)&value, s, (long long unsigned int *)&mask);
194 if ((ret != 3 && ret != 4) || index >= 6) {
195 ERROR("Failed to interpret args value");
196 return -1;
197 }
198
199 op = parse_v2_rule_op(s);
200 if (op == _SCMP_CMP_MAX) {
201 ERROR("Failed to interpret args operator value");
202 return -1;
203 }
204
205 rule_args->index = index;
206 rule_args->value = value;
207 rule_args->mask = mask;
208 rule_args->op = op;
209 return 0;
210}
211
212/* This function is used to parse the seccomp rule entry.
213 * @line : seccomp rule entry string.
214 * @def_action : default action used in the case if the 'line' contain non valid action.
215 * @rules : output struct.
216 * Returns 0 on success, < 0 otherwise.
217 */
218static int parse_v2_rules(char *line, uint32_t def_action, struct seccomp_v2_rule *rules)
219{
220 int ret = 0 ;
221 int i = 0;
222 char *tmp = NULL;
223 char *key = NULL;
224 char *saveptr = NULL;
225
226 tmp = strdup(line);
227 if (!tmp)
228 return -1;
229
230 /* read optional action which follows the syscall */
231 rules->action = get_v2_action(tmp, def_action);
232 if (rules->action == -1) {
233 ERROR("Failed to interpret action");
234 ret = -1;
235 goto out;
236 }
237
238 rules->args_num = 0;
239 if (!strchr(tmp, '[')) {
240 ret = 0;
241 goto out;
242 }
243
244 for ((key = strtok_r(tmp, "]", &saveptr)), i = 0; key && i < 6; (key = strtok_r(NULL, "]", &saveptr)), i++) {
245 ret = get_seccomp_arg_value(key, &rules->args_value[i]);
246 if (ret < 0) {
247 ret = -1;
248 goto out;
249 }
250 rules->args_num++;
251 }
252
253 ret = 0;
254out:
255 free(tmp);
256 return ret;
257}
258
2b0ae718 259#endif
50798138 260
d58c6ad0
SH
261#if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
262enum lxc_hostarch_t {
263 lxc_seccomp_arch_all = 0,
264 lxc_seccomp_arch_native,
265 lxc_seccomp_arch_i386,
11de80d6 266 lxc_seccomp_arch_x32,
d58c6ad0
SH
267 lxc_seccomp_arch_amd64,
268 lxc_seccomp_arch_arm,
9d291dd2 269 lxc_seccomp_arch_arm64,
b4067426
BP
270 lxc_seccomp_arch_ppc64,
271 lxc_seccomp_arch_ppc64le,
272 lxc_seccomp_arch_ppc,
2ccd9eda
JC
273 lxc_seccomp_arch_mips,
274 lxc_seccomp_arch_mips64,
275 lxc_seccomp_arch_mips64n32,
276 lxc_seccomp_arch_mipsel,
277 lxc_seccomp_arch_mipsel64,
278 lxc_seccomp_arch_mipsel64n32,
be038e49 279 lxc_seccomp_arch_s390x,
d58c6ad0
SH
280 lxc_seccomp_arch_unknown = 999,
281};
282
2ccd9eda
JC
283#ifdef __MIPSEL__
284# define MIPS_ARCH_O32 lxc_seccomp_arch_mipsel
285# define MIPS_ARCH_N64 lxc_seccomp_arch_mipsel64
286#else
287# define MIPS_ARCH_O32 lxc_seccomp_arch_mips
288# define MIPS_ARCH_N64 lxc_seccomp_arch_mips64
289#endif
290
d58c6ad0
SH
291int get_hostarch(void)
292{
293 struct utsname uts;
294 if (uname(&uts) < 0) {
3ee26d19 295 SYSERROR("Failed to read host arch");
d58c6ad0
SH
296 return -1;
297 }
298 if (strcmp(uts.machine, "i686") == 0)
299 return lxc_seccomp_arch_i386;
1a0e70ac 300 /* no x32 kernels */
d58c6ad0
SH
301 else if (strcmp(uts.machine, "x86_64") == 0)
302 return lxc_seccomp_arch_amd64;
303 else if (strncmp(uts.machine, "armv7", 5) == 0)
304 return lxc_seccomp_arch_arm;
9d291dd2
BP
305 else if (strncmp(uts.machine, "aarch64", 7) == 0)
306 return lxc_seccomp_arch_arm64;
b4067426
BP
307 else if (strncmp(uts.machine, "ppc64le", 7) == 0)
308 return lxc_seccomp_arch_ppc64le;
309 else if (strncmp(uts.machine, "ppc64", 5) == 0)
310 return lxc_seccomp_arch_ppc64;
311 else if (strncmp(uts.machine, "ppc", 3) == 0)
312 return lxc_seccomp_arch_ppc;
2ccd9eda
JC
313 else if (strncmp(uts.machine, "mips64", 6) == 0)
314 return MIPS_ARCH_N64;
315 else if (strncmp(uts.machine, "mips", 4) == 0)
316 return MIPS_ARCH_O32;
be038e49
CB
317 else if (strncmp(uts.machine, "s390x", 5) == 0)
318 return lxc_seccomp_arch_s390x;
d58c6ad0
SH
319 return lxc_seccomp_arch_unknown;
320}
321
eca6736e 322scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch, uint32_t default_policy_action, bool *needs_merge)
d58c6ad0
SH
323{
324 scmp_filter_ctx ctx;
325 int ret;
326 uint32_t arch;
327
328 switch(n_arch) {
329 case lxc_seccomp_arch_i386: arch = SCMP_ARCH_X86; break;
11de80d6 330 case lxc_seccomp_arch_x32: arch = SCMP_ARCH_X32; break;
d58c6ad0
SH
331 case lxc_seccomp_arch_amd64: arch = SCMP_ARCH_X86_64; break;
332 case lxc_seccomp_arch_arm: arch = SCMP_ARCH_ARM; break;
9d291dd2
BP
333#ifdef SCMP_ARCH_AARCH64
334 case lxc_seccomp_arch_arm64: arch = SCMP_ARCH_AARCH64; break;
335#endif
b4067426
BP
336#ifdef SCMP_ARCH_PPC64LE
337 case lxc_seccomp_arch_ppc64le: arch = SCMP_ARCH_PPC64LE; break;
338#endif
339#ifdef SCMP_ARCH_PPC64
340 case lxc_seccomp_arch_ppc64: arch = SCMP_ARCH_PPC64; break;
341#endif
342#ifdef SCMP_ARCH_PPC
343 case lxc_seccomp_arch_ppc: arch = SCMP_ARCH_PPC; break;
2ccd9eda
JC
344#endif
345#ifdef SCMP_ARCH_MIPS
346 case lxc_seccomp_arch_mips: arch = SCMP_ARCH_MIPS; break;
347 case lxc_seccomp_arch_mips64: arch = SCMP_ARCH_MIPS64; break;
348 case lxc_seccomp_arch_mips64n32: arch = SCMP_ARCH_MIPS64N32; break;
349 case lxc_seccomp_arch_mipsel: arch = SCMP_ARCH_MIPSEL; break;
350 case lxc_seccomp_arch_mipsel64: arch = SCMP_ARCH_MIPSEL64; break;
351 case lxc_seccomp_arch_mipsel64n32: arch = SCMP_ARCH_MIPSEL64N32; break;
be038e49
CB
352#endif
353#ifdef SCMP_ARCH_S390X
354 case lxc_seccomp_arch_s390x: arch = SCMP_ARCH_S390X; break;
b4067426 355#endif
d58c6ad0
SH
356 default: return NULL;
357 }
358
359 if ((ctx = seccomp_init(default_policy_action)) == NULL) {
3ee26d19 360 ERROR("Error initializing seccomp context");
d58c6ad0
SH
361 return NULL;
362 }
363 if (seccomp_attr_set(ctx, SCMP_FLTATR_CTL_NNP, 0)) {
3ee26d19 364 ERROR("Failed to turn off no-new-privs");
d58c6ad0
SH
365 seccomp_release(ctx);
366 return NULL;
367 }
127c5293
SH
368#ifdef SCMP_FLTATR_ATL_TSKIP
369 if (seccomp_attr_set(ctx, SCMP_FLTATR_ATL_TSKIP, 1)) {
370 WARN("Failed to turn on seccomp nop-skip, continuing");
371 }
372#endif
b5ed021b 373
adfee3a8
CB
374 ret = seccomp_arch_exist(ctx, arch);
375 if (ret < 0) {
376 if (ret != -EEXIST) {
377 ERROR("%s - Failed to determine whether arch %d is "
378 "already present in the main seccomp context",
379 strerror(-ret), (int)n_arch);
380 seccomp_release(ctx);
381 return NULL;
382 }
383
b5ed021b
CB
384 ret = seccomp_arch_add(ctx, arch);
385 if (ret != 0) {
adfee3a8
CB
386 ERROR("%s - Failed to add arch %d to main seccomp context",
387 strerror(-ret), (int)n_arch);
b5ed021b
CB
388 seccomp_release(ctx);
389 return NULL;
390 }
adfee3a8 391 TRACE("Added arch %d to main seccomp context", (int)n_arch);
b5ed021b 392
adfee3a8
CB
393 ret = seccomp_arch_remove(ctx, SCMP_ARCH_NATIVE);
394 if (ret != 0) {
395 ERROR("Failed to remove native arch from main seccomp context");
b5ed021b
CB
396 seccomp_release(ctx);
397 return NULL;
398 }
adfee3a8 399 TRACE("Removed native arch from main seccomp context");
eca6736e
CB
400
401 *needs_merge = true;
adfee3a8 402 } else {
eca6736e 403 *needs_merge = false;
adfee3a8 404 TRACE("Arch %d already present in main seccomp context", (int)n_arch);
d58c6ad0
SH
405 }
406
407 return ctx;
408}
409
410bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx,
3ee26d19 411 struct seccomp_v2_rule *rule)
d58c6ad0 412{
3ee26d19
L
413 int nr, ret, i;
414 struct scmp_arg_cmp arg_cmp[6];
415
416 memset(arg_cmp, 0 ,sizeof(arg_cmp));
d58c6ad0 417
f06c6207
CB
418 ret = seccomp_arch_exist(ctx, arch);
419 if (arch && ret != 0) {
420 ERROR("BUG: Seccomp: rule and context arch do not match (arch "
3ee26d19 421 "%d): %s",
f06c6207 422 arch, strerror(-ret));
d58c6ad0
SH
423 return false;
424 }
6166fa6d 425
3ee26d19
L
426 /*get the syscall name*/
427 char *p = strchr(line, ' ');
428 if (p)
429 *p = '\0';
430
6166fa6d 431 if (strncmp(line, "reject_force_umount", 19) == 0) {
3ee26d19 432 INFO("Setting Seccomp rule to reject force umounts");
6166fa6d
SH
433 ret = seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EACCES), SCMP_SYS(umount2),
434 1, SCMP_A1(SCMP_CMP_MASKED_EQ , MNT_FORCE , MNT_FORCE ));
435 if (ret < 0) {
f06c6207 436 ERROR("Failed (%d) loading rule to reject force "
3ee26d19 437 "umount: %s",
f06c6207 438 ret, strerror(-ret));
6166fa6d
SH
439 return false;
440 }
441 return true;
442 }
443
cd75548b 444 nr = seccomp_syscall_resolve_name(line);
d58c6ad0 445 if (nr == __NR_SCMP_ERROR) {
3ee26d19
L
446 WARN("Seccomp: failed to resolve syscall: %s", line);
447 WARN("This syscall will NOT be blacklisted");
d58c6ad0
SH
448 return true;
449 }
450 if (nr < 0) {
3ee26d19
L
451 WARN("Seccomp: got negative for syscall: %d: %s", nr, line);
452 WARN("This syscall will NOT be blacklisted");
d58c6ad0
SH
453 return true;
454 }
3ee26d19
L
455
456 for (i = 0; i < rule->args_num; i++) {
457 INFO("arg_cmp[%d]:SCMP_CMP(%u, %llu, %llu, %llu)", i,
458 rule->args_value[i].index,
459 (long long unsigned int)rule->args_value[i].op,
460 (long long unsigned int)rule->args_value[i].mask,
461 (long long unsigned int)rule->args_value[i].value);
462
463 if (SCMP_CMP_MASKED_EQ == rule->args_value[i].op)
464 arg_cmp[i] = SCMP_CMP(rule->args_value[i].index, rule->args_value[i].op, rule->args_value[i].mask, rule->args_value[i].value);
465 else
466 arg_cmp[i] = SCMP_CMP(rule->args_value[i].index, rule->args_value[i].op, rule->args_value[i].value);
467 }
468
469 ret = seccomp_rule_add_exact_array(ctx, rule->action, nr, rule->args_num, arg_cmp);
d58c6ad0 470 if (ret < 0) {
3ee26d19
L
471 ERROR("Failed (%d) loading rule for %s (nr %d action %d(%s)): %s",
472 ret, line, nr, rule->action, get_action_name(rule->action), strerror(-ret));
d58c6ad0
SH
473 return false;
474 }
475 return true;
476}
477
50798138
SH
478/*
479 * v2 consists of
480 * [x86]
481 * open
482 * read
483 * write
484 * close
485 * # a comment
486 * [x86_64]
487 * open
488 * read
489 * write
490 * close
491 */
492static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
493{
50798138
SH
494 char *p;
495 int ret;
50798138 496 bool blacklist = false;
3ee26d19 497 uint32_t default_policy_action = -1, default_rule_action = -1;
d58c6ad0
SH
498 enum lxc_hostarch_t native_arch = get_hostarch(),
499 cur_rule_arch = native_arch;
3ee26d19 500 struct seccomp_v2_rule rule;
eca6736e
CB
501 struct scmp_ctx_info {
502 uint32_t architectures[3];
503 scmp_filter_ctx contexts[3];
504 bool needs_merge[3];
505 } ctx;
50798138
SH
506
507 if (strncmp(line, "blacklist", 9) == 0)
508 blacklist = true;
509 else if (strncmp(line, "whitelist", 9) != 0) {
3ee26d19 510 ERROR("Bad seccomp policy style: %s", line);
50798138
SH
511 return -1;
512 }
513
514 if ((p = strchr(line, ' '))) {
f06c6207 515 default_policy_action = get_v2_default_action(p + 1);
50798138
SH
516 if (default_policy_action == -2)
517 return -1;
518 }
519
520 /* for blacklist, allow any syscall which has no rule */
521 if (blacklist) {
522 if (default_policy_action == -1)
523 default_policy_action = SCMP_ACT_ALLOW;
524 if (default_rule_action == -1)
525 default_rule_action = SCMP_ACT_KILL;
526 } else {
527 if (default_policy_action == -1)
528 default_policy_action = SCMP_ACT_KILL;
529 if (default_rule_action == -1)
530 default_rule_action = SCMP_ACT_ALLOW;
531 }
532
eca6736e
CB
533 memset(&ctx, 0, sizeof(ctx));
534 ctx.architectures[0] = SCMP_ARCH_NATIVE;
535 ctx.architectures[1] = SCMP_ARCH_NATIVE;
536 ctx.architectures[2] = SCMP_ARCH_NATIVE;
d58c6ad0
SH
537 if (native_arch == lxc_seccomp_arch_amd64) {
538 cur_rule_arch = lxc_seccomp_arch_all;
eca6736e
CB
539
540 ctx.architectures[0] = SCMP_ARCH_X86;
541 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_i386,
542 default_policy_action,
543 &ctx.needs_merge[0]);
544 if (!ctx.contexts[0])
545 goto bad;
546
547 ctx.architectures[1] = SCMP_ARCH_X32;
548 ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_x32,
549 default_policy_action,
550 &ctx.needs_merge[1]);
551 if (!ctx.contexts[1])
552 goto bad;
553
554 ctx.architectures[2] = SCMP_ARCH_X86_64;
555 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_amd64,
556 default_policy_action,
557 &ctx.needs_merge[2]);
558 if (!ctx.contexts[2])
ab5e52f6 559 goto bad;
ca399594 560#ifdef SCMP_ARCH_PPC
7635139a
SH
561 } else if (native_arch == lxc_seccomp_arch_ppc64) {
562 cur_rule_arch = lxc_seccomp_arch_all;
eca6736e
CB
563
564 ctx.architectures[0] = SCMP_ARCH_PPC;
565 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_ppc,
566 default_policy_action,
567 &ctx.needs_merge[0]);
568 if (!ctx.contexts[0])
569 goto bad;
570
571 ctx.architectures[2] = SCMP_ARCH_PPC64;
572 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_ppc64,
573 default_policy_action,
574 &ctx.needs_merge[2]);
575 if (!ctx.contexts[2])
7635139a 576 goto bad;
ca399594
CB
577#endif
578#ifdef SCMP_ARCH_ARM
7635139a
SH
579 } else if (native_arch == lxc_seccomp_arch_arm64) {
580 cur_rule_arch = lxc_seccomp_arch_all;
eca6736e
CB
581
582 ctx.architectures[0] = SCMP_ARCH_ARM;
583 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_arm,
584 default_policy_action,
585 &ctx.needs_merge[0]);
586 if (!ctx.contexts[0])
587 goto bad;
588
589 ctx.architectures[2] = SCMP_ARCH_AARCH64;
590 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_arm64,
591 default_policy_action,
592 &ctx.needs_merge[2]);
593 if (!ctx.contexts[2])
2ccd9eda
JC
594 goto bad;
595#endif
596#ifdef SCMP_ARCH_MIPS
597 } else if (native_arch == lxc_seccomp_arch_mips64) {
598 cur_rule_arch = lxc_seccomp_arch_all;
eca6736e
CB
599
600 ctx.architectures[0] = SCMP_ARCH_MIPS;
601 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mips,
602 default_policy_action,
603 &ctx.needs_merge[0]);
604 if (!ctx.contexts[0])
605 goto bad;
606
607 ctx.architectures[1] = SCMP_ARCH_MIPS64N32;
608 ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mips64n32,
609 default_policy_action,
610 &ctx.needs_merge[1]);
611 if (!ctx.contexts[1])
612 goto bad;
613
614 ctx.architectures[2] = SCMP_ARCH_MIPS64;
615 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mips64,
616 default_policy_action,
617 &ctx.needs_merge[2]);
618 if (!ctx.contexts[2])
2ccd9eda
JC
619 goto bad;
620 } else if (native_arch == lxc_seccomp_arch_mipsel64) {
621 cur_rule_arch = lxc_seccomp_arch_all;
eca6736e
CB
622
623 ctx.architectures[0] = SCMP_ARCH_MIPSEL;
624 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mipsel,
625 default_policy_action,
626 &ctx.needs_merge[0]);
627 if (!ctx.contexts[0])
628 goto bad;
629
630 ctx.architectures[1] = SCMP_ARCH_MIPSEL64N32;
631 ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mipsel64n32,
632 default_policy_action,
633 &ctx.needs_merge[1]);
634 if (!ctx.contexts[1])
635 goto bad;
636
637 ctx.architectures[2] = SCMP_ARCH_MIPSEL64;
638 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mipsel64,
639 default_policy_action,
640 &ctx.needs_merge[2]);
641 if (!ctx.contexts[2])
7635139a 642 goto bad;
be038e49 643#endif
d58c6ad0
SH
644 }
645
50798138
SH
646 if (default_policy_action != SCMP_ACT_KILL) {
647 ret = seccomp_reset(conf->seccomp_ctx, default_policy_action);
648 if (ret != 0) {
3ee26d19 649 ERROR("Error re-initializing Seccomp");
50798138
SH
650 return -1;
651 }
652 if (seccomp_attr_set(conf->seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0)) {
3ee26d19 653 ERROR("Failed to turn off no-new-privs");
50798138
SH
654 return -1;
655 }
127c5293
SH
656#ifdef SCMP_FLTATR_ATL_TSKIP
657 if (seccomp_attr_set(conf->seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1)) {
658 WARN("Failed to turn on seccomp nop-skip, continuing");
659 }
660#endif
50798138
SH
661 }
662
663 while (fgets(line, 1024, f)) {
50798138
SH
664
665 if (line[0] == '#')
666 continue;
667 if (strlen(line) == 0)
668 continue;
669 remove_trailing_newlines(line);
3ee26d19 670 INFO("processing: .%s", line);
50798138 671 if (line[0] == '[') {
1a0e70ac 672 /* Read the architecture for next set of rules. */
50798138 673 if (strcmp(line, "[x86]") == 0 ||
f06c6207 674 strcmp(line, "[X86]") == 0) {
d58c6ad0 675 if (native_arch != lxc_seccomp_arch_i386 &&
7e84441e 676 native_arch != lxc_seccomp_arch_amd64) {
d58c6ad0
SH
677 cur_rule_arch = lxc_seccomp_arch_unknown;
678 continue;
679 }
680 cur_rule_arch = lxc_seccomp_arch_i386;
11de80d6
AB
681 } else if (strcmp(line, "[x32]") == 0 ||
682 strcmp(line, "[X32]") == 0) {
683 if (native_arch != lxc_seccomp_arch_amd64) {
684 cur_rule_arch = lxc_seccomp_arch_unknown;
685 continue;
686 }
687 cur_rule_arch = lxc_seccomp_arch_x32;
d58c6ad0 688 } else if (strcmp(line, "[X86_64]") == 0 ||
f06c6207 689 strcmp(line, "[x86_64]") == 0) {
d58c6ad0
SH
690 if (native_arch != lxc_seccomp_arch_amd64) {
691 cur_rule_arch = lxc_seccomp_arch_unknown;
692 continue;
693 }
694 cur_rule_arch = lxc_seccomp_arch_amd64;
d58c6ad0 695 } else if (strcmp(line, "[all]") == 0 ||
f06c6207 696 strcmp(line, "[ALL]") == 0) {
d58c6ad0 697 cur_rule_arch = lxc_seccomp_arch_all;
d58c6ad0 698 }
2b0ae718 699#ifdef SCMP_ARCH_ARM
50798138 700 else if (strcmp(line, "[arm]") == 0 ||
f06c6207 701 strcmp(line, "[ARM]") == 0) {
7635139a 702 if (native_arch != lxc_seccomp_arch_arm &&
7e84441e 703 native_arch != lxc_seccomp_arch_arm64) {
d58c6ad0
SH
704 cur_rule_arch = lxc_seccomp_arch_unknown;
705 continue;
706 }
707 cur_rule_arch = lxc_seccomp_arch_arm;
d58c6ad0 708 }
b4067426 709#endif
9d291dd2
BP
710#ifdef SCMP_ARCH_AARCH64
711 else if (strcmp(line, "[arm64]") == 0 ||
f06c6207 712 strcmp(line, "[ARM64]") == 0) {
9d291dd2
BP
713 if (native_arch != lxc_seccomp_arch_arm64) {
714 cur_rule_arch = lxc_seccomp_arch_unknown;
715 continue;
716 }
717 cur_rule_arch = lxc_seccomp_arch_arm64;
718 }
719#endif
b4067426
BP
720#ifdef SCMP_ARCH_PPC64LE
721 else if (strcmp(line, "[ppc64le]") == 0 ||
f06c6207 722 strcmp(line, "[PPC64LE]") == 0) {
b4067426
BP
723 if (native_arch != lxc_seccomp_arch_ppc64le) {
724 cur_rule_arch = lxc_seccomp_arch_unknown;
725 continue;
726 }
727 cur_rule_arch = lxc_seccomp_arch_ppc64le;
728 }
729#endif
730#ifdef SCMP_ARCH_PPC64
731 else if (strcmp(line, "[ppc64]") == 0 ||
f06c6207 732 strcmp(line, "[PPC64]") == 0) {
b4067426
BP
733 if (native_arch != lxc_seccomp_arch_ppc64) {
734 cur_rule_arch = lxc_seccomp_arch_unknown;
735 continue;
736 }
737 cur_rule_arch = lxc_seccomp_arch_ppc64;
738 }
739#endif
740#ifdef SCMP_ARCH_PPC
741 else if (strcmp(line, "[ppc]") == 0 ||
f06c6207 742 strcmp(line, "[PPC]") == 0) {
7635139a 743 if (native_arch != lxc_seccomp_arch_ppc &&
7e84441e 744 native_arch != lxc_seccomp_arch_ppc64) {
b4067426
BP
745 cur_rule_arch = lxc_seccomp_arch_unknown;
746 continue;
747 }
748 cur_rule_arch = lxc_seccomp_arch_ppc;
749 }
2ccd9eda
JC
750#endif
751#ifdef SCMP_ARCH_MIPS
752 else if (strcmp(line, "[mips64]") == 0 ||
f06c6207 753 strcmp(line, "[MIPS64]") == 0) {
2ccd9eda
JC
754 if (native_arch != lxc_seccomp_arch_mips64) {
755 cur_rule_arch = lxc_seccomp_arch_unknown;
756 continue;
757 }
758 cur_rule_arch = lxc_seccomp_arch_mips64;
759 } else if (strcmp(line, "[mips64n32]") == 0 ||
f06c6207 760 strcmp(line, "[MIPS64N32]") == 0) {
2ccd9eda
JC
761 if (native_arch != lxc_seccomp_arch_mips64) {
762 cur_rule_arch = lxc_seccomp_arch_unknown;
763 continue;
764 }
765 cur_rule_arch = lxc_seccomp_arch_mips64n32;
766 } else if (strcmp(line, "[mips]") == 0 ||
f06c6207 767 strcmp(line, "[MIPS]") == 0) {
2ccd9eda 768 if (native_arch != lxc_seccomp_arch_mips &&
7e84441e 769 native_arch != lxc_seccomp_arch_mips64) {
2ccd9eda
JC
770 cur_rule_arch = lxc_seccomp_arch_unknown;
771 continue;
772 }
773 cur_rule_arch = lxc_seccomp_arch_mips;
774 } else if (strcmp(line, "[mipsel64]") == 0 ||
f06c6207 775 strcmp(line, "[MIPSEL64]") == 0) {
2ccd9eda
JC
776 if (native_arch != lxc_seccomp_arch_mipsel64) {
777 cur_rule_arch = lxc_seccomp_arch_unknown;
778 continue;
779 }
780 cur_rule_arch = lxc_seccomp_arch_mipsel64;
781 } else if (strcmp(line, "[mipsel64n32]") == 0 ||
f06c6207 782 strcmp(line, "[MIPSEL64N32]") == 0) {
2ccd9eda
JC
783 if (native_arch != lxc_seccomp_arch_mipsel64) {
784 cur_rule_arch = lxc_seccomp_arch_unknown;
785 continue;
786 }
787 cur_rule_arch = lxc_seccomp_arch_mipsel64n32;
788 } else if (strcmp(line, "[mipsel]") == 0 ||
f06c6207 789 strcmp(line, "[MIPSEL]") == 0) {
2ccd9eda 790 if (native_arch != lxc_seccomp_arch_mipsel &&
7e84441e 791 native_arch != lxc_seccomp_arch_mipsel64) {
2ccd9eda
JC
792 cur_rule_arch = lxc_seccomp_arch_unknown;
793 continue;
794 }
795 cur_rule_arch = lxc_seccomp_arch_mipsel;
796 }
be038e49
CB
797#endif
798#ifdef SCMP_ARCH_S390X
799 else if (strcmp(line, "[s390x]") == 0 ||
f06c6207 800 strcmp(line, "[S390X]") == 0) {
be038e49
CB
801 if (native_arch != lxc_seccomp_arch_s390x) {
802 cur_rule_arch = lxc_seccomp_arch_unknown;
803 continue;
804 }
805 cur_rule_arch = lxc_seccomp_arch_s390x;
806 }
2b0ae718 807#endif
50798138
SH
808 else
809 goto bad_arch;
d58c6ad0 810
50798138
SH
811 continue;
812 }
813
d58c6ad0
SH
814 /* irrelevant arch - i.e. arm on i386 */
815 if (cur_rule_arch == lxc_seccomp_arch_unknown)
816 continue;
817
3ee26d19 818 memset(&rule, 0, sizeof(rule));
d58c6ad0 819 /* read optional action which follows the syscall */
3ee26d19
L
820 ret = parse_v2_rules(line, default_rule_action, &rule);
821 if (ret != 0) {
822 ERROR("Failed to interpret seccomp rule");
50798138
SH
823 goto bad_rule;
824 }
d58c6ad0 825
eca6736e
CB
826 if (!do_resolve_add_rule(SCMP_ARCH_NATIVE, line,
827 conf->seccomp_ctx, &rule))
828 goto bad_rule;
829 INFO("Added native rule for arch %d for %s action %d(%s)",
830 SCMP_ARCH_NATIVE, line, rule.action,
831 get_action_name(rule.action));
832
833 if (ctx.architectures[0] != SCMP_ARCH_NATIVE) {
834 if (!do_resolve_add_rule(ctx.architectures[0], line,
835 ctx.contexts[0], &rule))
d58c6ad0 836 goto bad_rule;
eca6736e
CB
837 INFO("Added compat rule for arch %d for %s action %d(%s)",
838 ctx.architectures[0], line, rule.action,
3ee26d19 839 get_action_name(rule.action));
eca6736e 840 }
94d56054 841
eca6736e
CB
842 if (ctx.architectures[1] != SCMP_ARCH_NATIVE) {
843 if (!do_resolve_add_rule(ctx.architectures[1], line,
844 ctx.contexts[1], &rule))
d6417887 845 goto bad_rule;
eca6736e
CB
846 INFO("Added compat rule for arch %d for %s action %d(%s)",
847 ctx.architectures[1], line, rule.action,
3ee26d19 848 get_action_name(rule.action));
eca6736e
CB
849 }
850
851 if (ctx.architectures[2] != SCMP_ARCH_NATIVE) {
852 if (!do_resolve_add_rule(ctx.architectures[2], line,
853 ctx.contexts[2], &rule))
d6417887 854 goto bad_rule;
94d56054 855 INFO("Added native rule for arch %d for %s action %d(%s)",
eca6736e 856 ctx.architectures[2], line, rule.action,
3ee26d19 857 get_action_name(rule.action));
50798138
SH
858 }
859 }
d58c6ad0 860
d648e178 861 INFO("Merging compat seccomp contexts into main context");
eca6736e
CB
862 if (ctx.contexts[0]) {
863 if (ctx.needs_merge[0]) {
864 ret = seccomp_merge(conf->seccomp_ctx, ctx.contexts[0]);
b5ed021b 865 if (ret < 0) {
d648e178
CB
866 ERROR("Failed to merge first compat seccomp "
867 "context into main context");
b5ed021b
CB
868 goto bad;
869 }
870 TRACE("Merged first compat seccomp context into main context");
d648e178 871 } else {
eca6736e
CB
872 seccomp_release(ctx.contexts[0]);
873 ctx.contexts[0] = NULL;
b5ed021b 874 }
d648e178 875 }
b5ed021b 876
eca6736e
CB
877 if (ctx.contexts[1]) {
878 if (ctx.needs_merge[1]) {
879 ret = seccomp_merge(conf->seccomp_ctx, ctx.contexts[1]);
b5ed021b 880 if (ret < 0) {
d648e178
CB
881 ERROR("Failed to merge first compat seccomp "
882 "context into main context");
b5ed021b
CB
883 goto bad;
884 }
885 TRACE("Merged second compat seccomp context into main context");
d648e178 886 } else {
eca6736e
CB
887 seccomp_release(ctx.contexts[1]);
888 ctx.contexts[1] = NULL;
889 }
890 }
891
892 if (ctx.contexts[2]) {
893 if (ctx.needs_merge[2]) {
894 ret = seccomp_merge(conf->seccomp_ctx, ctx.contexts[2]);
895 if (ret < 0) {
896 ERROR("Failed to merge third compat seccomp "
897 "context into main context");
898 goto bad;
899 }
900 TRACE("Merged third compat seccomp context into main context");
901 } else {
902 seccomp_release(ctx.contexts[2]);
903 ctx.contexts[2] = NULL;
50798138
SH
904 }
905 }
6166fa6d 906
50798138
SH
907 return 0;
908
909bad_arch:
f06c6207 910 ERROR("Unsupported arch: %s.", line);
50798138 911bad_rule:
d58c6ad0 912bad:
eca6736e
CB
913 if (ctx.contexts[0])
914 seccomp_release(ctx.contexts[0]);
915 if (ctx.contexts[1])
916 seccomp_release(ctx.contexts[1]);
917 if (ctx.contexts[2])
918 seccomp_release(ctx.contexts[2]);
919
50798138 920 return -1;
d58c6ad0
SH
921}
922#else /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
923static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
924{
50798138 925 return -1;
50798138 926}
d58c6ad0 927#endif /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
50798138 928
8f2c3a70
SH
929/*
930 * The first line of the config file has a policy language version
931 * the second line has some directives
932 * then comes policy subject to the directives
998cd2f4 933 * right now version must be '1' or '2'
934 * the directives must include 'whitelist'(version == 1 or 2) or 'blacklist'
935 * (version == 2) and can include 'debug' (though debug is not yet supported).
8f2c3a70
SH
936 */
937static int parse_config(FILE *f, struct lxc_conf *conf)
938{
939 char line[1024];
940 int ret, version;
941
942 ret = fscanf(f, "%d\n", &version);
50798138 943 if (ret != 1 || (version != 1 && version != 2)) {
3ee26d19 944 ERROR("Invalid version");
8f2c3a70
SH
945 return -1;
946 }
947 if (!fgets(line, 1024, f)) {
3ee26d19 948 ERROR("Invalid config file");
8f2c3a70
SH
949 return -1;
950 }
50798138 951 if (version == 1 && !strstr(line, "whitelist")) {
3ee26d19 952 ERROR("Only whitelist policy is supported");
8f2c3a70
SH
953 return -1;
954 }
50798138 955
8f2c3a70 956 if (strstr(line, "debug")) {
3ee26d19 957 ERROR("Debug not yet implemented");
8f2c3a70
SH
958 return -1;
959 }
50798138
SH
960
961 if (version == 1)
962 return parse_config_v1(f, conf);
963 return parse_config_v2(f, line, conf);
8f2c3a70
SH
964}
965
cd75548b
SH
966/*
967 * use_seccomp: return true if we should try and apply a seccomp policy
968 * if defined for the container.
969 * This will return false if
970 * 1. seccomp is not enabled in the kernel
971 * 2. a seccomp policy is already enabled for this task
972 */
973static bool use_seccomp(void)
d58c6ad0
SH
974{
975 FILE *f = fopen("/proc/self/status", "r");
976 char line[1024];
cd75548b
SH
977 bool already_enabled = false;
978 bool found = false;
d58c6ad0
SH
979 int ret, v;
980
981 if (!f)
cd75548b 982 return true;
d58c6ad0
SH
983
984 while (fgets(line, 1024, f)) {
985 if (strncmp(line, "Seccomp:", 8) == 0) {
cd75548b 986 found = true;
f06c6207 987 ret = sscanf(line + 8, "%d", &v);
cd75548b
SH
988 if (ret == 1 && v != 0)
989 already_enabled = true;
990 break;
d58c6ad0
SH
991 }
992 }
993
d58c6ad0 994 fclose(f);
f06c6207 995 if (!found) { /* no Seccomp line, no seccomp in kernel */
3ee26d19 996 INFO("Seccomp is not enabled in the kernel");
cd75548b
SH
997 return false;
998 }
f06c6207 999 if (already_enabled) { /* already seccomp-confined */
3ee26d19 1000 INFO("Already seccomp-confined, not loading new policy");
cd75548b
SH
1001 return false;
1002 }
1003 return true;
d58c6ad0
SH
1004}
1005
8f2c3a70
SH
1006int lxc_read_seccomp_config(struct lxc_conf *conf)
1007{
1008 FILE *f;
1009 int ret;
727c3073 1010 int check_seccomp_attr_set;
8f2c3a70 1011
769872f9
SH
1012 if (!conf->seccomp)
1013 return 0;
1014
cd75548b 1015 if (!use_seccomp())
d58c6ad0 1016 return 0;
769872f9
SH
1017#if HAVE_SCMP_FILTER_CTX
1018 /* XXX for debug, pass in SCMP_ACT_TRAP */
50798138 1019 conf->seccomp_ctx = seccomp_init(SCMP_ACT_KILL);
769872f9
SH
1020 ret = !conf->seccomp_ctx;
1021#else
50798138 1022 ret = seccomp_init(SCMP_ACT_KILL) < 0;
769872f9
SH
1023#endif
1024 if (ret) {
3ee26d19 1025 ERROR("Failed initializing seccomp");
8f2c3a70
SH
1026 return -1;
1027 }
8f2c3a70 1028
127c5293 1029/* turn off no-new-privs. We don't want it in lxc, and it breaks
f06c6207 1030 * with apparmor */
769872f9 1031#if HAVE_SCMP_FILTER_CTX
f06c6207 1032 check_seccomp_attr_set = seccomp_attr_set(conf->seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0);
727c3073 1033#else
f06c6207 1034 check_seccomp_attr_set = seccomp_attr_set(SCMP_FLTATR_CTL_NNP, 0);
769872f9 1035#endif
727c3073 1036 if (check_seccomp_attr_set) {
3ee26d19 1037 ERROR("Failed to turn off no-new-privs");
8f2c3a70
SH
1038 return -1;
1039 }
127c5293
SH
1040#ifdef SCMP_FLTATR_ATL_TSKIP
1041 if (seccomp_attr_set(conf->seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1)) {
1042 WARN("Failed to turn on seccomp nop-skip, continuing");
1043 }
1044#endif
8f2c3a70
SH
1045
1046 f = fopen(conf->seccomp, "r");
1047 if (!f) {
3ee26d19 1048 SYSERROR("Failed to open seccomp policy file %s", conf->seccomp);
8f2c3a70
SH
1049 return -1;
1050 }
1051 ret = parse_config(f, conf);
1052 fclose(f);
1053 return ret;
1054}
1055
1056int lxc_seccomp_load(struct lxc_conf *conf)
1057{
1058 int ret;
1059 if (!conf->seccomp)
1060 return 0;
cd75548b 1061 if (!use_seccomp())
d58c6ad0 1062 return 0;
769872f9
SH
1063 ret = seccomp_load(
1064#if HAVE_SCMP_FILTER_CTX
f06c6207 1065 conf->seccomp_ctx
769872f9 1066#endif
f06c6207 1067 );
8f2c3a70 1068 if (ret < 0) {
3ee26d19 1069 ERROR("Error loading the seccomp policy: %s", strerror(-ret));
8f2c3a70
SH
1070 return -1;
1071 }
5107af32 1072
1073/* After load seccomp filter into the kernel successfully, export the current seccomp
1074 * filter to log file */
1075#if HAVE_SCMP_FILTER_CTX
4b73005c 1076 if ((lxc_log_get_level() <= LXC_LOG_LEVEL_TRACE || conf->loglevel <= LXC_LOG_LEVEL_TRACE) &&
5107af32 1077 lxc_log_fd >= 0) {
1078 ret = seccomp_export_pfc(conf->seccomp_ctx, lxc_log_fd);
1079 /* Just give an warning when export error */
1080 if (ret < 0)
3ee26d19 1081 WARN("Failed to export seccomp filter to log file: %s", strerror(-ret));
5107af32 1082 }
1083#endif
8f2c3a70
SH
1084 return 0;
1085}
769872f9 1086
f06c6207
CB
1087void lxc_seccomp_free(struct lxc_conf *conf)
1088{
f10fad2f
ME
1089 free(conf->seccomp);
1090 conf->seccomp = NULL;
769872f9
SH
1091#if HAVE_SCMP_FILTER_CTX
1092 if (conf->seccomp_ctx) {
1093 seccomp_release(conf->seccomp_ctx);
1094 conf->seccomp_ctx = NULL;
1095 }
1096#endif
1097}