]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/seccomp.c
seccomp: fix complication when !HAVE_DECL_SECCOMP_NOTIFY_FD
[mirror_lxc.git] / src / lxc / seccomp.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
8f2c3a70 2
d38dd64a
CB
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
567b2049 6#include <errno.h>
ccf8d128 7#include <seccomp.h>
8f2c3a70
SH
8#include <stdio.h>
9#include <stdlib.h>
e4353a7f 10#include <sys/epoll.h>
6166fa6d 11#include <sys/mount.h>
567b2049 12#include <sys/utsname.h>
f2363e38 13
e35b7bf8 14#include "af_unix.h"
c3e3c21a 15#include "commands.h"
769872f9 16#include "config.h"
8f2c3a70 17#include "log.h"
cdb2a47f 18#include "lxccontainer.h"
567b2049 19#include "lxcseccomp.h"
c3e3c21a 20#include "mainloop.h"
cdb2a47f 21#include "memory_utils.h"
eacebcc3 22#include "utils.h"
8f2c3a70 23
0b5c590d
CB
24#ifdef __MIPSEL__
25#define MIPS_ARCH_O32 lxc_seccomp_arch_mipsel
26#define MIPS_ARCH_N64 lxc_seccomp_arch_mipsel64
27#else
28#define MIPS_ARCH_O32 lxc_seccomp_arch_mips
29#define MIPS_ARCH_N64 lxc_seccomp_arch_mips64
30#endif
31
4a094eec
WB
32#ifndef SECCOMP_GET_NOTIF_SIZES
33#define SECCOMP_GET_NOTIF_SIZES 3
34#endif
35
ac2cecc4 36lxc_log_define(seccomp, lxc);
8f2c3a70 37
4a094eec
WB
38#if HAVE_DECL_SECCOMP_NOTIFY_FD
39static inline int __seccomp(unsigned int operation, unsigned int flags,
40 void *args)
41{
4a094eec 42 return syscall(__NR_seccomp, operation, flags, args);
4a094eec
WB
43}
44#endif
45
9dbd8ff3 46static int parse_config_v1(FILE *f, char *line, size_t *line_bufsz, struct lxc_conf *conf)
50798138 47{
ccf8d128 48 int ret = 0;
50798138 49
9dbd8ff3 50 while (getline(&line, line_bufsz, f) != -1) {
50798138 51 int nr;
ccf8d128 52
50798138 53 ret = sscanf(line, "%d", &nr);
97a9b258
WB
54 if (ret != 1) {
55 ret = -1;
56 break;
57 }
ccf8d128 58
50798138 59#if HAVE_SCMP_FILTER_CTX
c3e3c21a 60 ret = seccomp_rule_add(conf->seccomp.seccomp_ctx, SCMP_ACT_ALLOW, nr, 0);
ccf8d128
CB
61#else
62 ret = seccomp_rule_add(SCMP_ACT_ALLOW, nr, 0);
50798138 63#endif
50798138 64 if (ret < 0) {
3ee26d19 65 ERROR("Failed loading allow rule for %d", nr);
ccf8d128 66 break;
50798138
SH
67 }
68 }
ccf8d128
CB
69 free(line);
70
71 return ret;
50798138
SH
72}
73
2b0ae718 74#if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
1ab6b4a1
CB
75static const char *get_action_name(uint32_t action)
76{
77 /* The upper 16 bits indicate the type of the seccomp action. */
78 switch (action & 0xffff0000) {
79 case SCMP_ACT_KILL:
80 return "kill";
81 case SCMP_ACT_ALLOW:
82 return "allow";
83 case SCMP_ACT_TRAP:
84 return "trap";
85 case SCMP_ACT_ERRNO(0):
86 return "errno";
d7d2d2d9 87#if HAVE_DECL_SECCOMP_NOTIFY_FD
02ca9d75 88 case SCMP_ACT_NOTIFY:
cdb2a47f
CB
89 return "notify";
90#endif
1ab6b4a1
CB
91 }
92
93 return "invalid action";
94}
95
50798138
SH
96static uint32_t get_v2_default_action(char *line)
97{
98 uint32_t ret_action = -1;
99
f06c6207
CB
100 while (*line == ' ')
101 line++;
30448a13 102
78522aa9 103 /* After 'allowlist' or 'denylist' comes default behavior. */
becc8d20 104 if (strnequal(line, "kill", 4)) {
50798138 105 ret_action = SCMP_ACT_KILL;
becc8d20 106 } else if (strnequal(line, "errno", 5)) {
30448a13
CB
107 int e, ret;
108
109 ret = sscanf(line + 5, "%d", &e);
110 if (ret != 1) {
111 ERROR("Failed to parse errno value from %s", line);
50798138
SH
112 return -2;
113 }
30448a13 114
50798138 115 ret_action = SCMP_ACT_ERRNO(e);
becc8d20 116 } else if (strnequal(line, "allow", 5)) {
50798138 117 ret_action = SCMP_ACT_ALLOW;
becc8d20 118 } else if (strnequal(line, "trap", 4)) {
50798138 119 ret_action = SCMP_ACT_TRAP;
d7d2d2d9 120#if HAVE_DECL_SECCOMP_NOTIFY_FD
becc8d20 121 } else if (strnequal(line, "notify", 6)) {
02ca9d75 122 ret_action = SCMP_ACT_NOTIFY;
cdb2a47f 123#endif
7474b5b3 124 } else if (line[0]) {
54a051c1 125 ERROR("Unrecognized seccomp action \"%s\"", line);
7474b5b3 126 return -2;
30448a13
CB
127 }
128
50798138
SH
129 return ret_action;
130}
131
3ee26d19 132static uint32_t get_v2_action(char *line, uint32_t def_action)
50798138 133{
1ab6b4a1 134 char *p;
50798138
SH
135 uint32_t ret;
136
1ab6b4a1 137 p = strchr(line, ' ');
50798138
SH
138 if (!p)
139 return def_action;
50798138 140 p++;
1ab6b4a1 141
50798138
SH
142 while (*p == ' ')
143 p++;
1ab6b4a1 144
50798138
SH
145 if (!*p || *p == '#')
146 return def_action;
1ab6b4a1 147
50798138 148 ret = get_v2_default_action(p);
1ab6b4a1
CB
149 switch (ret) {
150 case -2:
151 return -1;
152 case -1:
153 return def_action;
50798138 154 }
1ab6b4a1
CB
155
156 return ret;
50798138 157}
3ee26d19 158
63a49b03 159struct seccomp_v2_rule_args {
3ee26d19
L
160 uint32_t index;
161 uint64_t value;
162 uint64_t mask;
163 enum scmp_compare op;
164};
165
166struct seccomp_v2_rule {
167 uint32_t action;
168 uint32_t args_num;
63a49b03 169 struct seccomp_v2_rule_args args_value[6];
3ee26d19
L
170};
171
172static enum scmp_compare parse_v2_rule_op(char *s)
173{
dc2c2622 174 if (strequal(s, "SCMP_CMP_NE") || strequal(s, "!="))
29cb2617 175 return SCMP_CMP_NE;
dc2c2622 176 else if (strequal(s, "SCMP_CMP_LT") || strequal(s, "<"))
29cb2617 177 return SCMP_CMP_LT;
dc2c2622 178 else if (strequal(s, "SCMP_CMP_LE") || strequal(s, "<="))
29cb2617 179 return SCMP_CMP_LE;
dc2c2622 180 else if (strequal(s, "SCMP_CMP_EQ") || strequal(s, "=="))
29cb2617 181 return SCMP_CMP_EQ;
dc2c2622 182 else if (strequal(s, "SCMP_CMP_GE") || strequal(s, ">="))
29cb2617 183 return SCMP_CMP_GE;
dc2c2622 184 else if (strequal(s, "SCMP_CMP_GT") || strequal(s, ">"))
29cb2617 185 return SCMP_CMP_GT;
dc2c2622 186 else if (strequal(s, "SCMP_CMP_MASKED_EQ") || strequal(s, "&="))
29cb2617 187 return SCMP_CMP_MASKED_EQ;
3ee26d19 188
29cb2617 189 return _SCMP_CMP_MAX;
3ee26d19
L
190}
191
63a49b03
CB
192/*
193 * This function is used to parse the args string into the structure.
73e3cb9a 194 * args string format:[index,value,op,mask] or [index,value,op]
3ee26d19
L
195 * index: the index for syscall arguments (type uint)
196 * value: the value for syscall arguments (type uint64)
197 * op: the operator for syscall arguments(string),
198 a valid list of constants as of libseccomp v2.3.2 is
199 SCMP_CMP_NE,SCMP_CMP_LE,SCMP_CMP_LE, SCMP_CMP_EQ, SCMP_CMP_GE,
200 SCMP_CMP_GT, SCMP_CMP_MASKED_EQ, or !=,<=,==,>=,>,&=
73e3cb9a 201 * mask: the mask to apply on "value" for SCMP_CMP_MASKED_EQ (type uint64, optional)
3ee26d19
L
202 * Returns 0 on success, < 0 otherwise.
203 */
63a49b03 204static int get_seccomp_arg_value(char *key, struct seccomp_v2_rule_args *rule_args)
3ee26d19
L
205{
206 int ret = 0;
3ee26d19 207 uint32_t index = 0;
63a49b03
CB
208 uint64_t mask = 0, value = 0;
209 enum scmp_compare op = 0;
3ee26d19 210 char *tmp = NULL;
f42183e6 211 char s[31] = {0}, v[24] = {0}, m[24] = {'0'};
3ee26d19 212
3ee26d19
L
213 tmp = strchr(key, '[');
214 if (!tmp) {
215 ERROR("Failed to interpret args");
216 return -1;
217 }
63a49b03 218
eacebcc3 219 ret = sscanf(tmp, "[%i,%23[^,],%30[^0-9^,],%23[^,]", &index, v, s, m);
3ee26d19
L
220 if ((ret != 3 && ret != 4) || index >= 6) {
221 ERROR("Failed to interpret args value");
222 return -1;
223 }
224
573ad77f 225 ret = lxc_safe_uint64(v, &value, 0);
eacebcc3
FA
226 if (ret < 0) {
227 ERROR("Invalid argument value");
228 return -1;
229 }
230
573ad77f 231 ret = lxc_safe_uint64(m, &mask, 0);
eacebcc3
FA
232 if (ret < 0) {
233 ERROR("Invalid argument mask");
234 return -1;
235 }
236
3ee26d19
L
237 op = parse_v2_rule_op(s);
238 if (op == _SCMP_CMP_MAX) {
239 ERROR("Failed to interpret args operator value");
240 return -1;
241 }
242
243 rule_args->index = index;
244 rule_args->value = value;
245 rule_args->mask = mask;
246 rule_args->op = op;
247 return 0;
248}
249
250/* This function is used to parse the seccomp rule entry.
251 * @line : seccomp rule entry string.
252 * @def_action : default action used in the case if the 'line' contain non valid action.
253 * @rules : output struct.
254 * Returns 0 on success, < 0 otherwise.
255 */
f67c94d0
CB
256static int parse_v2_rules(char *line, uint32_t def_action,
257 struct seccomp_v2_rule *rules)
3ee26d19 258{
f67c94d0
CB
259 int i = 0, ret = -1;
260 char *key = NULL, *saveptr = NULL, *tmp = NULL;
3ee26d19
L
261
262 tmp = strdup(line);
263 if (!tmp)
264 return -1;
265
266 /* read optional action which follows the syscall */
267 rules->action = get_v2_action(tmp, def_action);
f858dd50
WB
268 if (rules->action == -1) {
269 ERROR("Failed to interpret action");
270 ret = -1;
54a051c1 271 goto on_error;
f858dd50 272 }
3ee26d19 273
f67c94d0 274 ret = 0;
3ee26d19 275 rules->args_num = 0;
f67c94d0 276 if (!strchr(tmp, '['))
54a051c1 277 goto on_error;
3ee26d19 278
f67c94d0
CB
279 ret = -1;
280 for ((key = strtok_r(tmp, "]", &saveptr)), i = 0; key && i < 6;
281 (key = strtok_r(NULL, "]", &saveptr)), i++) {
3ee26d19 282 ret = get_seccomp_arg_value(key, &rules->args_value[i]);
f67c94d0 283 if (ret < 0)
54a051c1 284 goto on_error;
f67c94d0 285
3ee26d19
L
286 rules->args_num++;
287 }
288
289 ret = 0;
f67c94d0 290
54a051c1 291on_error:
3ee26d19 292 free(tmp);
f67c94d0 293
3ee26d19
L
294 return ret;
295}
2b0ae718 296#endif
50798138 297
d58c6ad0 298#if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
3e9671a1 299enum lxc_hostarch_t {
d58c6ad0
SH
300 lxc_seccomp_arch_all = 0,
301 lxc_seccomp_arch_native,
302 lxc_seccomp_arch_i386,
11de80d6 303 lxc_seccomp_arch_x32,
d58c6ad0
SH
304 lxc_seccomp_arch_amd64,
305 lxc_seccomp_arch_arm,
9d291dd2 306 lxc_seccomp_arch_arm64,
b4067426
BP
307 lxc_seccomp_arch_ppc64,
308 lxc_seccomp_arch_ppc64le,
309 lxc_seccomp_arch_ppc,
2ccd9eda
JC
310 lxc_seccomp_arch_mips,
311 lxc_seccomp_arch_mips64,
312 lxc_seccomp_arch_mips64n32,
313 lxc_seccomp_arch_mipsel,
314 lxc_seccomp_arch_mipsel64,
315 lxc_seccomp_arch_mipsel64n32,
be038e49 316 lxc_seccomp_arch_s390x,
3c3fab00 317 lxc_seccomp_arch_s390,
d58c6ad0
SH
318 lxc_seccomp_arch_unknown = 999,
319};
320
59eac805 321static int get_hostarch(void)
d58c6ad0
SH
322{
323 struct utsname uts;
324 if (uname(&uts) < 0) {
3ee26d19 325 SYSERROR("Failed to read host arch");
d58c6ad0
SH
326 return -1;
327 }
0197fe2e 328
dc2c2622 329 if (strequal(uts.machine, "i686"))
d58c6ad0 330 return lxc_seccomp_arch_i386;
1a0e70ac 331 /* no x32 kernels */
dc2c2622 332 else if (strequal(uts.machine, "x86_64"))
d58c6ad0 333 return lxc_seccomp_arch_amd64;
becc8d20 334 else if (strnequal(uts.machine, "armv7", 5))
d58c6ad0 335 return lxc_seccomp_arch_arm;
becc8d20 336 else if (strnequal(uts.machine, "aarch64", 7))
9d291dd2 337 return lxc_seccomp_arch_arm64;
becc8d20 338 else if (strnequal(uts.machine, "ppc64le", 7))
b4067426 339 return lxc_seccomp_arch_ppc64le;
becc8d20 340 else if (strnequal(uts.machine, "ppc64", 5))
b4067426 341 return lxc_seccomp_arch_ppc64;
becc8d20 342 else if (strnequal(uts.machine, "ppc", 3))
b4067426 343 return lxc_seccomp_arch_ppc;
becc8d20 344 else if (strnequal(uts.machine, "mips64", 6))
2ccd9eda 345 return MIPS_ARCH_N64;
becc8d20 346 else if (strnequal(uts.machine, "mips", 4))
2ccd9eda 347 return MIPS_ARCH_O32;
becc8d20 348 else if (strnequal(uts.machine, "s390x", 5))
be038e49 349 return lxc_seccomp_arch_s390x;
becc8d20 350 else if (strnequal(uts.machine, "s390", 4))
3c3fab00 351 return lxc_seccomp_arch_s390;
d58c6ad0
SH
352 return lxc_seccomp_arch_unknown;
353}
354
59eac805
CB
355static scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch, uint32_t default_policy_action,
356 bool *needs_merge)
d58c6ad0 357{
d58c6ad0
SH
358 int ret;
359 uint32_t arch;
04263914 360 scmp_filter_ctx ctx;
d58c6ad0 361
04263914
CB
362 switch (n_arch) {
363 case lxc_seccomp_arch_i386:
364 arch = SCMP_ARCH_X86;
365 break;
366 case lxc_seccomp_arch_x32:
367 arch = SCMP_ARCH_X32;
368 break;
369 case lxc_seccomp_arch_amd64:
370 arch = SCMP_ARCH_X86_64;
371 break;
372 case lxc_seccomp_arch_arm:
373 arch = SCMP_ARCH_ARM;
374 break;
9d291dd2 375#ifdef SCMP_ARCH_AARCH64
04263914
CB
376 case lxc_seccomp_arch_arm64:
377 arch = SCMP_ARCH_AARCH64;
378 break;
9d291dd2 379#endif
b4067426 380#ifdef SCMP_ARCH_PPC64LE
04263914
CB
381 case lxc_seccomp_arch_ppc64le:
382 arch = SCMP_ARCH_PPC64LE;
383 break;
b4067426
BP
384#endif
385#ifdef SCMP_ARCH_PPC64
04263914
CB
386 case lxc_seccomp_arch_ppc64:
387 arch = SCMP_ARCH_PPC64;
388 break;
b4067426
BP
389#endif
390#ifdef SCMP_ARCH_PPC
04263914
CB
391 case lxc_seccomp_arch_ppc:
392 arch = SCMP_ARCH_PPC;
393 break;
2ccd9eda
JC
394#endif
395#ifdef SCMP_ARCH_MIPS
04263914
CB
396 case lxc_seccomp_arch_mips:
397 arch = SCMP_ARCH_MIPS;
398 break;
399 case lxc_seccomp_arch_mips64:
400 arch = SCMP_ARCH_MIPS64;
401 break;
402 case lxc_seccomp_arch_mips64n32:
403 arch = SCMP_ARCH_MIPS64N32;
404 break;
405 case lxc_seccomp_arch_mipsel:
406 arch = SCMP_ARCH_MIPSEL;
407 break;
408 case lxc_seccomp_arch_mipsel64:
409 arch = SCMP_ARCH_MIPSEL64;
410 break;
411 case lxc_seccomp_arch_mipsel64n32:
412 arch = SCMP_ARCH_MIPSEL64N32;
413 break;
be038e49
CB
414#endif
415#ifdef SCMP_ARCH_S390X
04263914
CB
416 case lxc_seccomp_arch_s390x:
417 arch = SCMP_ARCH_S390X;
418 break;
3c3fab00 419#endif
420#ifdef SCMP_ARCH_S390
421 case lxc_seccomp_arch_s390:
422 arch = SCMP_ARCH_S390;
423 break;
b4067426 424#endif
04263914
CB
425 default:
426 return NULL;
d58c6ad0
SH
427 }
428
04263914
CB
429 ctx = seccomp_init(default_policy_action);
430 if (!ctx) {
3ee26d19 431 ERROR("Error initializing seccomp context");
d58c6ad0
SH
432 return NULL;
433 }
04263914
CB
434
435 ret = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_NNP, 0);
436 if (ret < 0) {
6d1400b5 437 errno = -ret;
438 SYSERROR("Failed to turn off no-new-privs");
d58c6ad0
SH
439 seccomp_release(ctx);
440 return NULL;
441 }
04263914 442
127c5293 443#ifdef SCMP_FLTATR_ATL_TSKIP
04263914 444 ret = seccomp_attr_set(ctx, SCMP_FLTATR_ATL_TSKIP, 1);
a24c5678 445 if (ret < 0) {
446 errno = -ret;
447 SYSWARN("Failed to turn on seccomp nop-skip, continuing");
448 }
127c5293 449#endif
b5ed021b 450
adfee3a8
CB
451 ret = seccomp_arch_exist(ctx, arch);
452 if (ret < 0) {
453 if (ret != -EEXIST) {
6d1400b5 454 errno = -ret;
455 SYSERROR("Failed to determine whether arch %d is "
456 "already present in the main seccomp context",
457 (int)n_arch);
adfee3a8
CB
458 seccomp_release(ctx);
459 return NULL;
460 }
461
b5ed021b
CB
462 ret = seccomp_arch_add(ctx, arch);
463 if (ret != 0) {
6d1400b5 464 errno = -ret;
465 SYSERROR("Failed to add arch %d to main seccomp context",
466 (int)n_arch);
b5ed021b
CB
467 seccomp_release(ctx);
468 return NULL;
469 }
adfee3a8 470 TRACE("Added arch %d to main seccomp context", (int)n_arch);
b5ed021b 471
adfee3a8
CB
472 ret = seccomp_arch_remove(ctx, SCMP_ARCH_NATIVE);
473 if (ret != 0) {
474 ERROR("Failed to remove native arch from main seccomp context");
b5ed021b
CB
475 seccomp_release(ctx);
476 return NULL;
477 }
adfee3a8 478 TRACE("Removed native arch from main seccomp context");
3e9671a1
CB
479
480 *needs_merge = true;
adfee3a8 481 } else {
3e9671a1 482 *needs_merge = false;
adfee3a8 483 TRACE("Arch %d already present in main seccomp context", (int)n_arch);
d58c6ad0
SH
484 }
485
486 return ctx;
487}
488
0ff0d23e
RJ
489enum lxc_seccomp_rule_status_t {
490 lxc_seccomp_rule_added = 0,
491 lxc_seccomp_rule_err,
492 lxc_seccomp_rule_undefined_syscall,
493 lxc_seccomp_rule_unsupported_arch,
494};
495
496static enum lxc_seccomp_rule_status_t do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx,
59eac805 497 struct seccomp_v2_rule *rule)
d58c6ad0 498{
ad9a5b72 499 int i, nr, ret;
3ee26d19
L
500 struct scmp_arg_cmp arg_cmp[6];
501
f06c6207
CB
502 ret = seccomp_arch_exist(ctx, arch);
503 if (arch && ret != 0) {
6d1400b5 504 errno = -ret;
505 SYSERROR("Seccomp: rule and context arch do not match (arch %d)", arch);
0ff0d23e 506 return lxc_seccomp_rule_err;
d58c6ad0 507 }
6166fa6d 508
3ee26d19
L
509 /*get the syscall name*/
510 char *p = strchr(line, ' ');
511 if (p)
512 *p = '\0';
513
becc8d20 514 if (strnequal(line, "reject_force_umount", 19)) {
ad9a5b72
CB
515 ret = seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EACCES),
516 SCMP_SYS(umount2), 1,
517 SCMP_A1(SCMP_CMP_MASKED_EQ, MNT_FORCE, MNT_FORCE));
6166fa6d 518 if (ret < 0) {
6d1400b5 519 errno = -ret;
520 SYSERROR("Failed loading rule to reject force umount");
0ff0d23e 521 return lxc_seccomp_rule_err;
6166fa6d 522 }
ad9a5b72
CB
523
524 INFO("Set seccomp rule to reject force umounts");
0ff0d23e 525 return lxc_seccomp_rule_added;
6166fa6d
SH
526 }
527
cd75548b 528 nr = seccomp_syscall_resolve_name(line);
d58c6ad0 529 if (nr == __NR_SCMP_ERROR) {
0ff0d23e
RJ
530 INFO("The syscall[%s] is is undefined on host native arch", line);
531 return lxc_seccomp_rule_undefined_syscall;
d58c6ad0 532 }
ad9a5b72 533
0ff0d23e
RJ
534 // The syscall resolves to a pseudo syscall and may be available on compat archs.
535 if (nr < 0 && arch == SCMP_ARCH_NATIVE) {
536 DEBUG("The syscall[%d:%s] is a pseudo syscall and not available on host native arch.", nr, line);
537 return lxc_seccomp_rule_unsupported_arch;
d58c6ad0 538 }
3ee26d19 539
fbec5f83 540 if (arch != SCMP_ARCH_NATIVE && seccomp_syscall_resolve_name_arch(arch, line) < 0) {
0ff0d23e
RJ
541 DEBUG("The syscall[%d:%s] is not supported on compat arch[%u]", nr, line, arch);
542 return lxc_seccomp_rule_unsupported_arch;
fbec5f83
RJ
543 }
544
ad9a5b72 545 memset(&arg_cmp, 0, sizeof(arg_cmp));
3ee26d19 546 for (i = 0; i < rule->args_num; i++) {
ad9a5b72
CB
547 INFO("arg_cmp[%d]: SCMP_CMP(%u, %llu, %llu, %llu)", i,
548 rule->args_value[i].index,
549 (long long unsigned int)rule->args_value[i].op,
550 (long long unsigned int)rule->args_value[i].mask,
551 (long long unsigned int)rule->args_value[i].value);
3ee26d19
L
552
553 if (SCMP_CMP_MASKED_EQ == rule->args_value[i].op)
ad9a5b72
CB
554 arg_cmp[i] = SCMP_CMP(rule->args_value[i].index,
555 rule->args_value[i].op,
556 rule->args_value[i].mask,
557 rule->args_value[i].value);
3ee26d19 558 else
ad9a5b72
CB
559 arg_cmp[i] = SCMP_CMP(rule->args_value[i].index,
560 rule->args_value[i].op,
561 rule->args_value[i].value);
3ee26d19
L
562 }
563
0ff0d23e
RJ
564 INFO("Adding %s rule for syscall[%d:%s] action[%d:%s] arch[%u]",
565 (arch == SCMP_ARCH_NATIVE) ? "native" : "compat",
566 nr, line, rule->action, get_action_name(rule->action), arch);
567
ad9a5b72
CB
568 ret = seccomp_rule_add_exact_array(ctx, rule->action, nr,
569 rule->args_num, arg_cmp);
d58c6ad0 570 if (ret < 0) {
6d1400b5 571 errno = -ret;
0ff0d23e
RJ
572 SYSERROR("Failed to add rule for syscall[%d:%s] action[%d:%s] arch[%u]",
573 nr, line, rule->action, get_action_name(rule->action), arch);
574 return lxc_seccomp_rule_err;
d58c6ad0 575 }
ad9a5b72 576
0ff0d23e 577 return lxc_seccomp_rule_added;
d58c6ad0
SH
578}
579
78522aa9
CB
580/*
581 * It is unfortunate, but we can't simply remove those terms since this would
582 * break way too many users.
583 */
584#define BACKWARDCOMPAT_TERMINOLOGY_DENYLIST "blacklist"
585#define BACKWARDCOMPAT_TERMINOLOGY_ALLOWLIST "whitelist"
586
587static inline bool is_denylist(const char *type)
588{
589 return strnequal(type, "denylist", STRLITERALLEN("denylist")) ||
590 strnequal(type, BACKWARDCOMPAT_TERMINOLOGY_DENYLIST,
591 STRLITERALLEN(BACKWARDCOMPAT_TERMINOLOGY_DENYLIST));
592}
593
594static inline bool is_allowlist(const char *type)
595{
596 return strnequal(type, "allowlist", STRLITERALLEN("allowlist")) ||
597 strnequal(type, BACKWARDCOMPAT_TERMINOLOGY_ALLOWLIST,
598 STRLITERALLEN(BACKWARDCOMPAT_TERMINOLOGY_ALLOWLIST));
599}
600
50798138
SH
601/*
602 * v2 consists of
603 * [x86]
604 * open
605 * read
606 * write
607 * close
608 * # a comment
609 * [x86_64]
610 * open
611 * read
612 * write
613 * close
614 */
9dbd8ff3 615static int parse_config_v2(FILE *f, char *line, size_t *line_bufsz, struct lxc_conf *conf)
50798138 616{
50798138 617 int ret;
9c3798eb 618 char *p;
3e9671a1 619 enum lxc_hostarch_t cur_rule_arch, native_arch;
78522aa9 620 bool denylist = false;
3ee26d19 621 uint32_t default_policy_action = -1, default_rule_action = -1;
3ee26d19 622 struct seccomp_v2_rule rule;
3e9671a1
CB
623 struct scmp_ctx_info {
624 uint32_t architectures[3];
625 scmp_filter_ctx contexts[3];
626 bool needs_merge[3];
627 } ctx;
50798138 628
78522aa9
CB
629 if (is_denylist(line))
630 denylist = true;
631 else if (!is_allowlist(line))
632 return log_error(-EINVAL, "Bad seccomp policy style \"%s\"", line);
50798138 633
9c3798eb
CB
634 p = strchr(line, ' ');
635 if (p) {
f06c6207 636 default_policy_action = get_v2_default_action(p + 1);
50798138
SH
637 if (default_policy_action == -2)
638 return -1;
639 }
640
78522aa9
CB
641 /* for denylist, allow any syscall which has no rule */
642 if (denylist) {
50798138
SH
643 if (default_policy_action == -1)
644 default_policy_action = SCMP_ACT_ALLOW;
9c3798eb 645
50798138
SH
646 if (default_rule_action == -1)
647 default_rule_action = SCMP_ACT_KILL;
648 } else {
649 if (default_policy_action == -1)
650 default_policy_action = SCMP_ACT_KILL;
9c3798eb 651
50798138
SH
652 if (default_rule_action == -1)
653 default_rule_action = SCMP_ACT_ALLOW;
654 }
655
15044cd1
RJ
656 DEBUG("Host native arch is [%u]", seccomp_arch_native());
657
eca6736e
CB
658 memset(&ctx, 0, sizeof(ctx));
659 ctx.architectures[0] = SCMP_ARCH_NATIVE;
660 ctx.architectures[1] = SCMP_ARCH_NATIVE;
661 ctx.architectures[2] = SCMP_ARCH_NATIVE;
9c3798eb
CB
662 native_arch = get_hostarch();
663 cur_rule_arch = native_arch;
d58c6ad0
SH
664 if (native_arch == lxc_seccomp_arch_amd64) {
665 cur_rule_arch = lxc_seccomp_arch_all;
eca6736e
CB
666
667 ctx.architectures[0] = SCMP_ARCH_X86;
668 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_i386,
3e9671a1
CB
669 default_policy_action,
670 &ctx.needs_merge[0]);
eca6736e
CB
671 if (!ctx.contexts[0])
672 goto bad;
673
674 ctx.architectures[1] = SCMP_ARCH_X32;
675 ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_x32,
3e9671a1
CB
676 default_policy_action,
677 &ctx.needs_merge[1]);
eca6736e
CB
678 if (!ctx.contexts[1])
679 goto bad;
680
681 ctx.architectures[2] = SCMP_ARCH_X86_64;
682 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_amd64,
3e9671a1
CB
683 default_policy_action,
684 &ctx.needs_merge[2]);
eca6736e 685 if (!ctx.contexts[2])
ab5e52f6 686 goto bad;
ca399594 687#ifdef SCMP_ARCH_PPC
7635139a
SH
688 } else if (native_arch == lxc_seccomp_arch_ppc64) {
689 cur_rule_arch = lxc_seccomp_arch_all;
eca6736e
CB
690
691 ctx.architectures[0] = SCMP_ARCH_PPC;
692 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_ppc,
3e9671a1
CB
693 default_policy_action,
694 &ctx.needs_merge[0]);
eca6736e
CB
695 if (!ctx.contexts[0])
696 goto bad;
697
3e9671a1
CB
698 ctx.architectures[2] = SCMP_ARCH_PPC64;
699 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_ppc64,
700 default_policy_action,
701 &ctx.needs_merge[2]);
702 if (!ctx.contexts[2])
7635139a 703 goto bad;
ca399594
CB
704#endif
705#ifdef SCMP_ARCH_ARM
7635139a
SH
706 } else if (native_arch == lxc_seccomp_arch_arm64) {
707 cur_rule_arch = lxc_seccomp_arch_all;
eca6736e
CB
708
709 ctx.architectures[0] = SCMP_ARCH_ARM;
9c3798eb 710 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_arm,
3e9671a1
CB
711 default_policy_action,
712 &ctx.needs_merge[0]);
eca6736e
CB
713 if (!ctx.contexts[0])
714 goto bad;
715
b1c428f9 716#ifdef SCMP_ARCH_AARCH64
3e9671a1
CB
717 ctx.architectures[2] = SCMP_ARCH_AARCH64;
718 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_arm64,
719 default_policy_action,
720 &ctx.needs_merge[2]);
721 if (!ctx.contexts[2])
2ccd9eda
JC
722 goto bad;
723#endif
b1c428f9 724#endif
2ccd9eda
JC
725#ifdef SCMP_ARCH_MIPS
726 } else if (native_arch == lxc_seccomp_arch_mips64) {
727 cur_rule_arch = lxc_seccomp_arch_all;
eca6736e
CB
728
729 ctx.architectures[0] = SCMP_ARCH_MIPS;
730 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mips,
3e9671a1
CB
731 default_policy_action,
732 &ctx.needs_merge[0]);
eca6736e
CB
733 if (!ctx.contexts[0])
734 goto bad;
735
736 ctx.architectures[1] = SCMP_ARCH_MIPS64N32;
737 ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mips64n32,
3e9671a1
CB
738 default_policy_action,
739 &ctx.needs_merge[1]);
eca6736e
CB
740 if (!ctx.contexts[1])
741 goto bad;
742
743 ctx.architectures[2] = SCMP_ARCH_MIPS64;
744 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mips64,
3e9671a1
CB
745 default_policy_action,
746 &ctx.needs_merge[2]);
eca6736e 747 if (!ctx.contexts[2])
2ccd9eda
JC
748 goto bad;
749 } else if (native_arch == lxc_seccomp_arch_mipsel64) {
750 cur_rule_arch = lxc_seccomp_arch_all;
eca6736e
CB
751
752 ctx.architectures[0] = SCMP_ARCH_MIPSEL;
753 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mipsel,
3e9671a1
CB
754 default_policy_action,
755 &ctx.needs_merge[0]);
eca6736e
CB
756 if (!ctx.contexts[0])
757 goto bad;
758
759 ctx.architectures[1] = SCMP_ARCH_MIPSEL64N32;
760 ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mipsel64n32,
3e9671a1
CB
761 default_policy_action,
762 &ctx.needs_merge[1]);
eca6736e
CB
763 if (!ctx.contexts[1])
764 goto bad;
765
766 ctx.architectures[2] = SCMP_ARCH_MIPSEL64;
767 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mipsel64,
3e9671a1
CB
768 default_policy_action,
769 &ctx.needs_merge[2]);
eca6736e 770 if (!ctx.contexts[2])
7635139a 771 goto bad;
be038e49 772#endif
d58c6ad0
SH
773 }
774
50798138 775 if (default_policy_action != SCMP_ACT_KILL) {
c3e3c21a 776 ret = seccomp_reset(conf->seccomp.seccomp_ctx, default_policy_action);
50798138 777 if (ret != 0) {
3ee26d19 778 ERROR("Error re-initializing Seccomp");
50798138
SH
779 return -1;
780 }
9c3798eb 781
c3e3c21a 782 ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0);
9c3798eb 783 if (ret < 0) {
6d1400b5 784 errno = -ret;
785 SYSERROR("Failed to turn off no-new-privs");
50798138
SH
786 return -1;
787 }
9c3798eb 788
127c5293 789#ifdef SCMP_FLTATR_ATL_TSKIP
c3e3c21a 790 ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1);
a24c5678 791 if (ret < 0) {
792 errno = -ret;
793 SYSWARN("Failed to turn on seccomp nop-skip, continuing");
794 }
127c5293 795#endif
50798138
SH
796 }
797
9dbd8ff3 798 while (getline(&line, line_bufsz, f) != -1) {
50798138
SH
799 if (line[0] == '#')
800 continue;
9c3798eb
CB
801
802 if (line[0] == '\0')
50798138 803 continue;
9c3798eb 804
50798138 805 remove_trailing_newlines(line);
9c3798eb
CB
806
807 INFO("Processing \"%s\"", line);
50798138 808 if (line[0] == '[') {
1a0e70ac 809 /* Read the architecture for next set of rules. */
dc2c2622
CB
810 if (strequal(line, "[x86]") ||
811 strequal(line, "[X86]")) {
d58c6ad0 812 if (native_arch != lxc_seccomp_arch_i386 &&
7e84441e 813 native_arch != lxc_seccomp_arch_amd64) {
d58c6ad0
SH
814 cur_rule_arch = lxc_seccomp_arch_unknown;
815 continue;
816 }
9c3798eb 817
d58c6ad0 818 cur_rule_arch = lxc_seccomp_arch_i386;
dc2c2622
CB
819 } else if (strequal(line, "[x32]") ||
820 strequal(line, "[X32]")) {
11de80d6
AB
821 if (native_arch != lxc_seccomp_arch_amd64) {
822 cur_rule_arch = lxc_seccomp_arch_unknown;
823 continue;
824 }
9c3798eb 825
11de80d6 826 cur_rule_arch = lxc_seccomp_arch_x32;
dc2c2622
CB
827 } else if (strequal(line, "[X86_64]") ||
828 strequal(line, "[x86_64]")) {
d58c6ad0
SH
829 if (native_arch != lxc_seccomp_arch_amd64) {
830 cur_rule_arch = lxc_seccomp_arch_unknown;
831 continue;
832 }
9c3798eb 833
d58c6ad0 834 cur_rule_arch = lxc_seccomp_arch_amd64;
dc2c2622
CB
835 } else if (strequal(line, "[all]") ||
836 strequal(line, "[ALL]")) {
d58c6ad0 837 cur_rule_arch = lxc_seccomp_arch_all;
d58c6ad0 838 }
2b0ae718 839#ifdef SCMP_ARCH_ARM
dc2c2622
CB
840 else if (strequal(line, "[arm]") ||
841 strequal(line, "[ARM]")) {
7635139a 842 if (native_arch != lxc_seccomp_arch_arm &&
7e84441e 843 native_arch != lxc_seccomp_arch_arm64) {
d58c6ad0
SH
844 cur_rule_arch = lxc_seccomp_arch_unknown;
845 continue;
846 }
9c3798eb 847
d58c6ad0 848 cur_rule_arch = lxc_seccomp_arch_arm;
d58c6ad0 849 }
b4067426 850#endif
9d291dd2 851#ifdef SCMP_ARCH_AARCH64
dc2c2622
CB
852 else if (strequal(line, "[arm64]") ||
853 strequal(line, "[ARM64]")) {
9d291dd2
BP
854 if (native_arch != lxc_seccomp_arch_arm64) {
855 cur_rule_arch = lxc_seccomp_arch_unknown;
856 continue;
857 }
9c3798eb 858
9d291dd2
BP
859 cur_rule_arch = lxc_seccomp_arch_arm64;
860 }
861#endif
b4067426 862#ifdef SCMP_ARCH_PPC64LE
dc2c2622
CB
863 else if (strequal(line, "[ppc64le]") ||
864 strequal(line, "[PPC64LE]")) {
b4067426
BP
865 if (native_arch != lxc_seccomp_arch_ppc64le) {
866 cur_rule_arch = lxc_seccomp_arch_unknown;
867 continue;
868 }
9c3798eb 869
b4067426
BP
870 cur_rule_arch = lxc_seccomp_arch_ppc64le;
871 }
872#endif
873#ifdef SCMP_ARCH_PPC64
dc2c2622
CB
874 else if (strequal(line, "[ppc64]") ||
875 strequal(line, "[PPC64]")) {
b4067426
BP
876 if (native_arch != lxc_seccomp_arch_ppc64) {
877 cur_rule_arch = lxc_seccomp_arch_unknown;
878 continue;
879 }
9c3798eb 880
b4067426
BP
881 cur_rule_arch = lxc_seccomp_arch_ppc64;
882 }
883#endif
884#ifdef SCMP_ARCH_PPC
dc2c2622
CB
885 else if (strequal(line, "[ppc]") ||
886 strequal(line, "[PPC]")) {
7635139a 887 if (native_arch != lxc_seccomp_arch_ppc &&
7e84441e 888 native_arch != lxc_seccomp_arch_ppc64) {
b4067426
BP
889 cur_rule_arch = lxc_seccomp_arch_unknown;
890 continue;
891 }
9c3798eb 892
b4067426
BP
893 cur_rule_arch = lxc_seccomp_arch_ppc;
894 }
2ccd9eda
JC
895#endif
896#ifdef SCMP_ARCH_MIPS
dc2c2622
CB
897 else if (strequal(line, "[mips64]") ||
898 strequal(line, "[MIPS64]")) {
2ccd9eda
JC
899 if (native_arch != lxc_seccomp_arch_mips64) {
900 cur_rule_arch = lxc_seccomp_arch_unknown;
901 continue;
902 }
9c3798eb 903
2ccd9eda 904 cur_rule_arch = lxc_seccomp_arch_mips64;
dc2c2622
CB
905 } else if (strequal(line, "[mips64n32]") ||
906 strequal(line, "[MIPS64N32]")) {
2ccd9eda
JC
907 if (native_arch != lxc_seccomp_arch_mips64) {
908 cur_rule_arch = lxc_seccomp_arch_unknown;
909 continue;
910 }
9c3798eb 911
2ccd9eda 912 cur_rule_arch = lxc_seccomp_arch_mips64n32;
dc2c2622
CB
913 } else if (strequal(line, "[mips]") ||
914 strequal(line, "[MIPS]")) {
2ccd9eda 915 if (native_arch != lxc_seccomp_arch_mips &&
7e84441e 916 native_arch != lxc_seccomp_arch_mips64) {
2ccd9eda
JC
917 cur_rule_arch = lxc_seccomp_arch_unknown;
918 continue;
919 }
9c3798eb 920
2ccd9eda 921 cur_rule_arch = lxc_seccomp_arch_mips;
dc2c2622
CB
922 } else if (strequal(line, "[mipsel64]") ||
923 strequal(line, "[MIPSEL64]")) {
2ccd9eda
JC
924 if (native_arch != lxc_seccomp_arch_mipsel64) {
925 cur_rule_arch = lxc_seccomp_arch_unknown;
926 continue;
927 }
9c3798eb 928
2ccd9eda 929 cur_rule_arch = lxc_seccomp_arch_mipsel64;
dc2c2622
CB
930 } else if (strequal(line, "[mipsel64n32]") ||
931 strequal(line, "[MIPSEL64N32]")) {
2ccd9eda
JC
932 if (native_arch != lxc_seccomp_arch_mipsel64) {
933 cur_rule_arch = lxc_seccomp_arch_unknown;
934 continue;
935 }
9c3798eb 936
2ccd9eda 937 cur_rule_arch = lxc_seccomp_arch_mipsel64n32;
dc2c2622
CB
938 } else if (strequal(line, "[mipsel]") ||
939 strequal(line, "[MIPSEL]")) {
2ccd9eda 940 if (native_arch != lxc_seccomp_arch_mipsel &&
7e84441e 941 native_arch != lxc_seccomp_arch_mipsel64) {
2ccd9eda
JC
942 cur_rule_arch = lxc_seccomp_arch_unknown;
943 continue;
944 }
9c3798eb 945
2ccd9eda
JC
946 cur_rule_arch = lxc_seccomp_arch_mipsel;
947 }
be038e49
CB
948#endif
949#ifdef SCMP_ARCH_S390X
dc2c2622
CB
950 else if (strequal(line, "[s390x]") ||
951 strequal(line, "[S390X]")) {
be038e49
CB
952 if (native_arch != lxc_seccomp_arch_s390x) {
953 cur_rule_arch = lxc_seccomp_arch_unknown;
954 continue;
955 }
9c3798eb 956
be038e49 957 cur_rule_arch = lxc_seccomp_arch_s390x;
b8bcbe9b 958 }
3c3fab00 959#endif
960#ifdef SCMP_ARCH_S390
dc2c2622
CB
961 else if (strequal(line, "[s390]") ||
962 strequal(line, "[S390]")) {
3c3fab00 963 if (native_arch != lxc_seccomp_arch_s390) {
964 cur_rule_arch = lxc_seccomp_arch_unknown;
965 continue;
966 }
967
968 cur_rule_arch = lxc_seccomp_arch_s390;
969 }
2b0ae718 970#endif
b8bcbe9b 971 else {
50798138 972 goto bad_arch;
9c3798eb 973 }
d58c6ad0 974
50798138
SH
975 continue;
976 }
977
d58c6ad0
SH
978 /* irrelevant arch - i.e. arm on i386 */
979 if (cur_rule_arch == lxc_seccomp_arch_unknown)
980 continue;
981
3ee26d19 982 memset(&rule, 0, sizeof(rule));
d58c6ad0 983 /* read optional action which follows the syscall */
3ee26d19
L
984 ret = parse_v2_rules(line, default_rule_action, &rule);
985 if (ret != 0) {
986 ERROR("Failed to interpret seccomp rule");
50798138
SH
987 goto bad_rule;
988 }
d58c6ad0 989
d7d2d2d9 990#if HAVE_DECL_SECCOMP_NOTIFY_FD
02ca9d75 991 if ((rule.action == SCMP_ACT_NOTIFY) &&
c3e3c21a 992 !conf->seccomp.notifier.wants_supervision) {
c3e3c21a 993 conf->seccomp.notifier.wants_supervision = true;
2e5bcac3 994 TRACE("Set SECCOMP_FILTER_FLAG_NEW_LISTENER attribute");
cdb2a47f
CB
995 }
996#endif
997
9c3798eb 998
0ff0d23e
RJ
999 ret = do_resolve_add_rule(SCMP_ARCH_NATIVE, line,
1000 conf->seccomp.seccomp_ctx, &rule);
1001 if (ret == lxc_seccomp_rule_err)
1002 goto bad_rule;
1003 if (ret == lxc_seccomp_rule_undefined_syscall)
1004 continue;
94d56054 1005
15044cd1
RJ
1006 for (int i = 0; i < 3; i++ ) {
1007 uint32_t arch = ctx.architectures[i];
1008 if (arch != SCMP_ARCH_NATIVE && arch != seccomp_arch_native()) {
1009 if (lxc_seccomp_rule_err == do_resolve_add_rule(arch, line,
1010 ctx.contexts[i], &rule))
1011 goto bad_rule;
1012 }
3e9671a1 1013 }
f1bcfc79 1014
50798138 1015 }
d58c6ad0 1016
d648e178 1017 INFO("Merging compat seccomp contexts into main context");
eca6736e
CB
1018 if (ctx.contexts[0]) {
1019 if (ctx.needs_merge[0]) {
c3e3c21a 1020 ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[0]);
b5ed021b 1021 if (ret < 0) {
3e9671a1
CB
1022 ERROR("Failed to merge first compat seccomp "
1023 "context into main context");
b5ed021b
CB
1024 goto bad;
1025 }
9c3798eb 1026
b5ed021b 1027 TRACE("Merged first compat seccomp context into main context");
d648e178 1028 } else {
eca6736e
CB
1029 seccomp_release(ctx.contexts[0]);
1030 ctx.contexts[0] = NULL;
b5ed021b 1031 }
d648e178 1032 }
b5ed021b 1033
eca6736e
CB
1034 if (ctx.contexts[1]) {
1035 if (ctx.needs_merge[1]) {
c3e3c21a 1036 ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[1]);
b5ed021b 1037 if (ret < 0) {
3e9671a1
CB
1038 ERROR("Failed to merge first compat seccomp "
1039 "context into main context");
b5ed021b
CB
1040 goto bad;
1041 }
9c3798eb 1042
b5ed021b 1043 TRACE("Merged second compat seccomp context into main context");
d648e178 1044 } else {
eca6736e
CB
1045 seccomp_release(ctx.contexts[1]);
1046 ctx.contexts[1] = NULL;
1047 }
1048 }
1049
1050 if (ctx.contexts[2]) {
1051 if (ctx.needs_merge[2]) {
c3e3c21a 1052 ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[2]);
eca6736e 1053 if (ret < 0) {
3e9671a1
CB
1054 ERROR("Failed to merge third compat seccomp "
1055 "context into main context");
eca6736e
CB
1056 goto bad;
1057 }
9c3798eb 1058
eca6736e
CB
1059 TRACE("Merged third compat seccomp context into main context");
1060 } else {
1061 seccomp_release(ctx.contexts[2]);
1062 ctx.contexts[2] = NULL;
50798138
SH
1063 }
1064 }
6166fa6d 1065
9dbd8ff3 1066 free(line);
50798138
SH
1067 return 0;
1068
1069bad_arch:
9c3798eb
CB
1070 ERROR("Unsupported architecture \"%s\"", line);
1071
50798138 1072bad_rule:
d58c6ad0 1073bad:
eca6736e
CB
1074 if (ctx.contexts[0])
1075 seccomp_release(ctx.contexts[0]);
9c3798eb 1076
eca6736e
CB
1077 if (ctx.contexts[1])
1078 seccomp_release(ctx.contexts[1]);
9c3798eb 1079
eca6736e
CB
1080 if (ctx.contexts[2])
1081 seccomp_release(ctx.contexts[2]);
1082
9dbd8ff3 1083 free(line);
9c3798eb 1084
50798138 1085 return -1;
d58c6ad0
SH
1086}
1087#else /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
1088static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
1089{
50798138 1090 return -1;
50798138 1091}
d58c6ad0 1092#endif /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
50798138 1093
8f2c3a70
SH
1094/*
1095 * The first line of the config file has a policy language version
1096 * the second line has some directives
1097 * then comes policy subject to the directives
998cd2f4 1098 * right now version must be '1' or '2'
78522aa9 1099 * the directives must include 'allowlist'(version == 1 or 2) or 'denylist'
998cd2f4 1100 * (version == 2) and can include 'debug' (though debug is not yet supported).
8f2c3a70
SH
1101 */
1102static int parse_config(FILE *f, struct lxc_conf *conf)
1103{
9dbd8ff3
WB
1104 char *line = NULL;
1105 size_t line_bufsz = 0;
8f2c3a70
SH
1106 int ret, version;
1107
1108 ret = fscanf(f, "%d\n", &version);
50798138 1109 if (ret != 1 || (version != 1 && version != 2)) {
3ee26d19 1110 ERROR("Invalid version");
8f2c3a70
SH
1111 return -1;
1112 }
6ca8172d 1113
9dbd8ff3 1114 if (getline(&line, &line_bufsz, f) == -1) {
3ee26d19 1115 ERROR("Invalid config file");
9dbd8ff3 1116 goto bad_line;
8f2c3a70 1117 }
6ca8172d 1118
78522aa9
CB
1119 if (version == 1 && !strstr(line, "allowlist")) {
1120 ERROR("Only allowlist policy is supported");
9dbd8ff3 1121 goto bad_line;
8f2c3a70 1122 }
50798138 1123
8f2c3a70 1124 if (strstr(line, "debug")) {
3ee26d19 1125 ERROR("Debug not yet implemented");
9dbd8ff3 1126 goto bad_line;
8f2c3a70 1127 }
50798138
SH
1128
1129 if (version == 1)
9dbd8ff3 1130 return parse_config_v1(f, line, &line_bufsz, conf);
6ca8172d 1131
9dbd8ff3
WB
1132 return parse_config_v2(f, line, &line_bufsz, conf);
1133
1134bad_line:
1135 free(line);
1136 return -1;
8f2c3a70
SH
1137}
1138
cd75548b
SH
1139/*
1140 * use_seccomp: return true if we should try and apply a seccomp policy
1141 * if defined for the container.
1142 * This will return false if
1143 * 1. seccomp is not enabled in the kernel
1144 * 2. a seccomp policy is already enabled for this task
1145 */
50d86993 1146static bool use_seccomp(const struct lxc_conf *conf)
d58c6ad0 1147{
4110345b
CB
1148 __do_free char *line = NULL;
1149 __do_fclose FILE *f = NULL;
d58c6ad0 1150 int ret, v;
6ca8172d 1151 size_t line_bufsz = 0;
6ca8172d 1152 bool already_enabled = false, found = false;
d58c6ad0 1153
c3e3c21a 1154 if (conf->seccomp.allow_nesting > 0)
50d86993
CB
1155 return true;
1156
4110345b 1157 f = fopen("/proc/self/status", "re");
d58c6ad0 1158 if (!f)
cd75548b 1159 return true;
d58c6ad0 1160
6ca8172d 1161 while (getline(&line, &line_bufsz, f) != -1) {
becc8d20 1162 if (strnequal(line, "Seccomp:", 8)) {
cd75548b 1163 found = true;
6ca8172d 1164
f06c6207 1165 ret = sscanf(line + 8, "%d", &v);
cd75548b
SH
1166 if (ret == 1 && v != 0)
1167 already_enabled = true;
6ca8172d 1168
cd75548b 1169 break;
d58c6ad0
SH
1170 }
1171 }
6ca8172d
CB
1172
1173 if (!found) {
3ee26d19 1174 INFO("Seccomp is not enabled in the kernel");
cd75548b
SH
1175 return false;
1176 }
6ca8172d
CB
1177
1178 if (already_enabled) {
3ee26d19 1179 INFO("Already seccomp-confined, not loading new policy");
cd75548b
SH
1180 return false;
1181 }
6ca8172d 1182
cd75548b 1183 return true;
d58c6ad0
SH
1184}
1185
8f2c3a70
SH
1186int lxc_read_seccomp_config(struct lxc_conf *conf)
1187{
4110345b 1188 __do_fclose FILE *f = NULL;
cf6624c1 1189 int ret;
8f2c3a70 1190
c3e3c21a 1191 if (!conf->seccomp.seccomp)
769872f9
SH
1192 return 0;
1193
50d86993 1194 if (!use_seccomp(conf))
d58c6ad0 1195 return 0;
47f6d547 1196
769872f9
SH
1197#if HAVE_SCMP_FILTER_CTX
1198 /* XXX for debug, pass in SCMP_ACT_TRAP */
c3e3c21a
CB
1199 conf->seccomp.seccomp_ctx = seccomp_init(SCMP_ACT_KILL);
1200 ret = !conf->seccomp.seccomp_ctx;
769872f9 1201#else
50798138 1202 ret = seccomp_init(SCMP_ACT_KILL) < 0;
769872f9
SH
1203#endif
1204 if (ret) {
3ee26d19 1205 ERROR("Failed initializing seccomp");
8f2c3a70
SH
1206 return -1;
1207 }
8f2c3a70 1208
47f6d547 1209/* turn off no-new-privs. We don't want it in lxc, and it breaks
f06c6207 1210 * with apparmor */
769872f9 1211#if HAVE_SCMP_FILTER_CTX
c3e3c21a 1212 ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0);
727c3073 1213#else
cf6624c1 1214 ret = seccomp_attr_set(SCMP_FLTATR_CTL_NNP, 0);
769872f9 1215#endif
cf6624c1 1216 if (ret < 0) {
6d1400b5 1217 errno = -ret;
1218 SYSERROR("Failed to turn off no-new-privs");
8f2c3a70
SH
1219 return -1;
1220 }
a24c5678 1221
127c5293 1222#ifdef SCMP_FLTATR_ATL_TSKIP
c3e3c21a 1223 ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1);
a24c5678 1224 if (ret < 0) {
1225 errno = -ret;
1226 SYSWARN("Failed to turn on seccomp nop-skip, continuing");
1227 }
127c5293 1228#endif
8f2c3a70 1229
4110345b 1230 f = fopen(conf->seccomp.seccomp, "re");
8f2c3a70 1231 if (!f) {
c3e3c21a 1232 SYSERROR("Failed to open seccomp policy file %s", conf->seccomp.seccomp);
8f2c3a70
SH
1233 return -1;
1234 }
47f6d547 1235
4110345b 1236 return parse_config(f, conf);
8f2c3a70
SH
1237}
1238
1239int lxc_seccomp_load(struct lxc_conf *conf)
1240{
1241 int ret;
47f6d547 1242
c3e3c21a 1243 if (!conf->seccomp.seccomp)
8f2c3a70 1244 return 0;
47f6d547 1245
50d86993 1246 if (!use_seccomp(conf))
d58c6ad0 1247 return 0;
47f6d547 1248
769872f9 1249#if HAVE_SCMP_FILTER_CTX
c3e3c21a 1250 ret = seccomp_load(conf->seccomp.seccomp_ctx);
47f6d547
CB
1251#else
1252 ret = seccomp_load();
769872f9 1253#endif
8f2c3a70 1254 if (ret < 0) {
6d1400b5 1255 errno = -ret;
1256 SYSERROR("Error loading the seccomp policy");
8f2c3a70
SH
1257 return -1;
1258 }
5107af32 1259
1260/* After load seccomp filter into the kernel successfully, export the current seccomp
1261 * filter to log file */
1262#if HAVE_SCMP_FILTER_CTX
de96cd60 1263 if (lxc_log_trace()) {
25a8b256
CB
1264 int fd_log;
1265
1266 fd_log = lxc_log_get_fd();
1267 if (fd_log >= 0) {
1268 ret = seccomp_export_pfc(conf->seccomp.seccomp_ctx, fd_log);
1269 if (ret < 0) {
1270 errno = -ret;
1271 SYSWARN("Failed to export seccomp filter to log file");
1272 }
a24c5678 1273 }
5107af32 1274 }
1275#endif
47f6d547 1276
d7d2d2d9 1277#if HAVE_DECL_SECCOMP_NOTIFY_FD
c3e3c21a 1278 if (conf->seccomp.notifier.wants_supervision) {
da9c8317 1279 ret = seccomp_notify_fd(conf->seccomp.seccomp_ctx);
cdb2a47f
CB
1280 if (ret < 0) {
1281 errno = -ret;
1282 return -1;
1283 }
1284
a60c98aa
CB
1285 if (fd_make_nonblocking(ret))
1286 return log_error_errno(-1, errno, "Failed to make seccomp listener fd non-blocking");;
1287
c3e3c21a 1288 conf->seccomp.notifier.notify_fd = ret;
cdb2a47f
CB
1289 TRACE("Retrieved new seccomp listener fd %d", ret);
1290 }
1291#endif
1292
8f2c3a70
SH
1293 return 0;
1294}
769872f9 1295
c3e3c21a 1296void lxc_seccomp_free(struct lxc_seccomp *seccomp)
f06c6207 1297{
c3e3c21a 1298 free_disarm(seccomp->seccomp);
47f6d547 1299
769872f9 1300#if HAVE_SCMP_FILTER_CTX
c3e3c21a
CB
1301 if (seccomp->seccomp_ctx) {
1302 seccomp_release(seccomp->seccomp_ctx);
1303 seccomp->seccomp_ctx = NULL;
769872f9
SH
1304 }
1305#endif
cdb2a47f 1306
d7d2d2d9 1307#if HAVE_DECL_SECCOMP_NOTIFY_FD
c3e3c21a
CB
1308 close_prot_errno_disarm(seccomp->notifier.notify_fd);
1309 close_prot_errno_disarm(seccomp->notifier.proxy_fd);
99656206 1310 seccomp_notify_free(seccomp->notifier.req_buf, seccomp->notifier.rsp_buf);
c3e3c21a
CB
1311 seccomp->notifier.req_buf = NULL;
1312 seccomp->notifier.rsp_buf = NULL;
5dd07023 1313 free_disarm(seccomp->notifier.cookie);
cdb2a47f
CB
1314#endif
1315}
1316
d7d2d2d9 1317#if HAVE_DECL_SECCOMP_NOTIFY_FD
e35b7bf8
CB
1318static int seccomp_notify_reconnect(struct lxc_handler *handler)
1319{
f62cf1d4 1320 __do_close int notify_fd = -EBADF;
e35b7bf8 1321
c3e3c21a 1322 close_prot_errno_disarm(handler->conf->seccomp.notifier.proxy_fd);
e35b7bf8 1323
045ee721
WB
1324 notify_fd = lxc_unix_connect_type(
1325 &handler->conf->seccomp.notifier.proxy_addr, SOCK_SEQPACKET);
e35b7bf8
CB
1326 if (notify_fd < 0) {
1327 SYSERROR("Failed to reconnect to seccomp proxy");
1328 return -1;
1329 }
1330
1331 /* 30 second timeout */
1332 if (lxc_socket_set_timeout(notify_fd, 30, 30)) {
1333 SYSERROR("Failed to set socket timeout");
1334 return -1;
1335 }
c3e3c21a 1336 handler->conf->seccomp.notifier.proxy_fd = move_fd(notify_fd);
e35b7bf8
CB
1337 return 0;
1338}
1339#endif
1340
d7d2d2d9 1341#if HAVE_DECL_SECCOMP_NOTIFY_FD
651e63a7
WB
1342static void seccomp_notify_default_answer(int fd, struct seccomp_notif *req,
1343 struct seccomp_notif_resp *resp,
1344 struct lxc_handler *handler)
e35b7bf8
CB
1345{
1346 resp->id = req->id;
1347 resp->error = -ENOSYS;
dc70d7e4
CB
1348 resp->val = 0;
1349 resp->flags = 0;
e35b7bf8 1350
3c216fe2 1351 if (seccomp_notify_respond(fd, resp))
50926f4b
CB
1352 SYSERROR("Failed to send default message to seccomp notification with id(%llu)",
1353 (long long unsigned int)resp->id);
dc70d7e4 1354 else
50926f4b
CB
1355 TRACE("Sent default response for seccomp notification with id(%llu)",
1356 (long long unsigned int)resp->id);
dc70d7e4 1357 memset(resp, 0, handler->conf->seccomp.notifier.sizes.seccomp_notif_resp);
e35b7bf8
CB
1358}
1359#endif
1360
543d2f83
CB
1361int seccomp_notify_cleanup_handler(int fd, void *data)
1362{
c16d194a 1363#if HAVE_DECL_SECCOMP_NOTIFY_FD
543d2f83
CB
1364 struct lxc_handler *hdlr = data;
1365 struct lxc_conf *conf = hdlr->conf;
1366
1367 /* TODO: Make sure that we don't need to free any memory in here. */
1368 if (fd == conf->seccomp.notifier.notify_fd)
1369 fd = move_fd(conf->seccomp.notifier.notify_fd);
1370
1371 /*
1372 * If this isn't the main notify_fd it means that someone registered a
1373 * seccomp notify handler through the command socket (e.g. for attach)
1374 * and so we won't touch the container's config.
1375 */
c16d194a 1376#endif
543d2f83
CB
1377 return 0;
1378}
1379
cdb2a47f 1380int seccomp_notify_handler(int fd, uint32_t events, void *data,
3298b37d 1381 struct lxc_async_descr *descr)
cdb2a47f
CB
1382{
1383
d7d2d2d9 1384#if HAVE_DECL_SECCOMP_NOTIFY_FD
f62cf1d4
CB
1385 __do_close int fd_pid = -EBADF;
1386 __do_close int fd_mem = -EBADF;
8a99ab01 1387 int ret;
e35b7bf8 1388 ssize_t bytes;
ec49d30f 1389 int send_fd_list[3];
4a094eec
WB
1390 struct iovec iov[4];
1391 size_t iov_len, msg_base_size, msg_full_size;
18847d37
CB
1392 char mem_path[6 /* /proc/ */
1393 + INTTYPE_TO_STRLEN(int64_t)
1394 + 3 /* mem */
1395 + 1 /* \0 */];
3745ee0e 1396 bool reconnected = false;
cdb2a47f
CB
1397 struct lxc_handler *hdlr = data;
1398 struct lxc_conf *conf = hdlr->conf;
c3e3c21a
CB
1399 struct seccomp_notif *req = conf->seccomp.notifier.req_buf;
1400 struct seccomp_notif_resp *resp = conf->seccomp.notifier.rsp_buf;
1401 int listener_proxy_fd = conf->seccomp.notifier.proxy_fd;
37046066 1402 struct seccomp_notify_proxy_msg msg = {0};
4a094eec 1403 char *cookie = conf->seccomp.notifier.cookie;
a76fe490 1404 __u64 req_id;
cdb2a47f 1405
543d2f83
CB
1406 if (events & EPOLLHUP)
1407 return log_trace(LXC_MAINLOOP_DISARM, "Removing seccomp notifier fd %d", fd);
b2acb9dc 1408
dc70d7e4 1409 memset(req, 0, conf->seccomp.notifier.sizes.seccomp_notif);
e3998402 1410 ret = seccomp_notify_receive(fd, req);
e35b7bf8 1411 if (ret) {
0d724ab4
CB
1412 if (errno == ENOENT)
1413 TRACE("Intercepted system call aborted");
1414 else
1415 SYSERROR("Failed to read seccomp notification");
e35b7bf8
CB
1416 goto out;
1417 }
cdb2a47f 1418
5357b872 1419 if (listener_proxy_fd < 0) {
ed3a98c4
WB
1420 ret = -1;
1421 /* Same condition as for the initial setup_proxy() */
1422 if (conf->seccomp.notifier.wants_supervision &&
1423 conf->seccomp.notifier.proxy_addr.sun_path[1] != '\0') {
1424 ret = seccomp_notify_reconnect(hdlr);
1425 }
1426 if (ret) {
1427 ERROR("No seccomp proxy registered");
651e63a7
WB
1428 seccomp_notify_default_answer(fd, req, resp, hdlr);
1429 goto out;
ed3a98c4
WB
1430 }
1431 listener_proxy_fd = conf->seccomp.notifier.proxy_fd;
5357b872
WB
1432 }
1433
4a094eec
WB
1434 /* remember the ID in case we receive garbage from the proxy */
1435 resp->id = req_id = req->id;
50926f4b 1436 TRACE("Received seccomp notification with id(%llu)", (long long unsigned int)req_id);
4a094eec 1437
8a6bea94
CB
1438 ret = strnprintf(mem_path, sizeof(mem_path), "/proc/%d", req->pid);
1439 if (ret < 0) {
1440 seccomp_notify_default_answer(fd, req, resp, hdlr);
1441 SYSERROR("Failed to create path to process's proc directory");
1442 goto out;
1443 }
1444
637996a4
WB
1445 fd_pid = open(mem_path, O_RDONLY | O_DIRECTORY | O_CLOEXEC);
1446 if (fd_pid < 0) {
1447 seccomp_notify_default_answer(fd, req, resp, hdlr);
1448 SYSERROR("Failed to open process pidfd for seccomp notify request");
1449 goto out;
1450 }
1451
8a6bea94
CB
1452 ret = strnprintf(mem_path, sizeof(mem_path), "/proc/%d/mem", req->pid);
1453 if (ret < 0) {
1454 seccomp_notify_default_answer(fd, req, resp, hdlr);
1455 SYSERROR("Failed to create path to process's virtual memory");
1456 goto out;
1457 }
1458
aad859c4 1459 fd_mem = open(mem_path, O_RDWR | O_CLOEXEC);
5ed06d3a 1460 if (fd_mem < 0) {
651e63a7 1461 seccomp_notify_default_answer(fd, req, resp, hdlr);
5ed06d3a
CB
1462 SYSERROR("Failed to open process memory for seccomp notify request");
1463 goto out;
1464 }
1465
1466 /*
1467 * Make sure that the fd for /proc/<pid>/mem we just opened still
1468 * refers to the correct process's memory.
1469 */
72b101ae 1470 ret = seccomp_notify_id_valid(fd, req->id);
5ed06d3a 1471 if (ret < 0) {
651e63a7 1472 seccomp_notify_default_answer(fd, req, resp, hdlr);
50926f4b 1473 SYSERROR("Invalid seccomp notify request id(%llu)", (long long unsigned int)req->id);
5ed06d3a
CB
1474 goto out;
1475 }
1476
cdb2a47f
CB
1477 msg.monitor_pid = hdlr->monitor_pid;
1478 msg.init_pid = hdlr->pid;
4a094eec
WB
1479 memcpy(&msg.sizes, &conf->seccomp.notifier.sizes, sizeof(msg.sizes));
1480
1481 msg_base_size = 0;
1482 iov[0].iov_base = &msg;
1483 msg_base_size += (iov[0].iov_len = sizeof(msg));
1484 iov[1].iov_base = req;
1485 msg_base_size += (iov[1].iov_len = msg.sizes.seccomp_notif);
1486 iov[2].iov_base = resp;
1487 msg_base_size += (iov[2].iov_len = msg.sizes.seccomp_notif_resp);
1488 msg_full_size = msg_base_size;
1489
1490 if (cookie) {
1491 size_t len = strlen(cookie);
1492
1493 msg.cookie_len = (uint64_t)len;
1494
1495 iov[3].iov_base = cookie;
1496 msg_full_size += (iov[3].iov_len = len);
1497
1498 iov_len = 4;
1499 } else {
1500 iov_len = 3;
1501 }
cdb2a47f 1502
637996a4
WB
1503 send_fd_list[0] = fd_pid;
1504 send_fd_list[1] = fd_mem;
ec49d30f 1505 send_fd_list[2] = fd;
637996a4 1506
3745ee0e 1507retry:
ec49d30f 1508 bytes = lxc_abstract_unix_send_fds_iov(listener_proxy_fd, send_fd_list, 3, iov, iov_len);
8a99ab01
WB
1509 if (bytes != (ssize_t)msg_full_size) {
1510 SYSERROR("Failed to forward message to seccomp proxy");
3745ee0e
WB
1511 if (!reconnected) {
1512 ret = seccomp_notify_reconnect(hdlr);
1513 if (ret == 0) {
1514 reconnected = true;
1515 goto retry;
1516 }
1517 }
1518
651e63a7 1519 seccomp_notify_default_answer(fd, req, resp, hdlr);
8a99ab01
WB
1520 goto out;
1521 }
e35b7bf8 1522
5ed06d3a
CB
1523 close_prot_errno_disarm(fd_mem);
1524
f910c9e5
WB
1525 if (msg.__reserved != 0) {
1526 ERROR("Proxy filled reserved data in response");
1527 seccomp_notify_default_answer(fd, req, resp, hdlr);
1528 goto out;
1529 }
1530
4a094eec 1531 if (resp->id != req_id) {
1c01dc2c 1532 ERROR("Proxy returned response with invalid id(%llu) != id(%llu)",
50926f4b 1533 (long long unsigned int)resp->id, (long long unsigned int)req_id);
4a094eec 1534 resp->id = req_id;
651e63a7 1535 seccomp_notify_default_answer(fd, req, resp, hdlr);
4a094eec
WB
1536 goto out;
1537 }
1538
cbbdd1dd 1539 bytes = lxc_recvmsg_nointr_iov(listener_proxy_fd, iov, iov_len, MSG_TRUNC);
8a99ab01
WB
1540 if (bytes != (ssize_t)msg_base_size) {
1541 SYSERROR("Failed to receive message from seccomp proxy");
651e63a7 1542 seccomp_notify_default_answer(fd, req, resp, hdlr);
8a99ab01
WB
1543 goto out;
1544 }
cdb2a47f 1545
a76fe490 1546 if (resp->id != req_id) {
1c01dc2c 1547 ERROR("Proxy returned response with invalid id(%llu) != id(%llu)",
50926f4b 1548 (long long unsigned int)resp->id, (long long unsigned int)req_id);
a76fe490
CB
1549 resp->id = req_id;
1550 }
1551
3c216fe2 1552 ret = seccomp_notify_respond(fd, resp);
cdb2a47f 1553 if (ret)
e35b7bf8 1554 SYSERROR("Failed to send seccomp notification");
a76fe490 1555 else
50926f4b
CB
1556 TRACE("Sent response for seccomp notification with id(%llu)",
1557 (long long unsigned int)resp->id);
dc70d7e4 1558 memset(resp, 0, conf->seccomp.notifier.sizes.seccomp_notif_resp);
cdb2a47f 1559
e35b7bf8 1560out:
cdb2a47f 1561#endif
f7a97743 1562 return LXC_MAINLOOP_CONTINUE;
769872f9 1563}
c3e3c21a
CB
1564
1565void seccomp_conf_init(struct lxc_conf *conf)
1566{
1567 conf->seccomp.seccomp = NULL;
1568#if HAVE_SCMP_FILTER_CTX
1569 conf->seccomp.allow_nesting = 0;
1570 memset(&conf->seccomp.seccomp_ctx, 0, sizeof(conf->seccomp.seccomp_ctx));
1571#endif /* HAVE_SCMP_FILTER_CTX */
d7d2d2d9 1572#if HAVE_DECL_SECCOMP_NOTIFY_FD
c3e3c21a
CB
1573 conf->seccomp.notifier.wants_supervision = false;
1574 conf->seccomp.notifier.notify_fd = -EBADF;
1575 conf->seccomp.notifier.proxy_fd = -EBADF;
1576 memset(&conf->seccomp.notifier.proxy_addr, 0,
1577 sizeof(conf->seccomp.notifier.proxy_addr));
1578 conf->seccomp.notifier.req_buf = NULL;
1579 conf->seccomp.notifier.rsp_buf = NULL;
5dd07023 1580 conf->seccomp.notifier.cookie = NULL;
c3e3c21a
CB
1581#endif
1582}
1583
2ac0f627 1584int lxc_seccomp_setup_proxy(struct lxc_seccomp *seccomp,
3298b37d 1585 struct lxc_async_descr *descr,
2ac0f627 1586 struct lxc_handler *handler)
c3e3c21a 1587{
d7d2d2d9 1588#if HAVE_DECL_SECCOMP_NOTIFY_FD
c3e3c21a
CB
1589 if (seccomp->notifier.wants_supervision &&
1590 seccomp->notifier.proxy_addr.sun_path[1] != '\0') {
f62cf1d4 1591 __do_close int notify_fd = -EBADF;
c3e3c21a
CB
1592 int ret;
1593
045ee721
WB
1594 notify_fd = lxc_unix_connect_type(&seccomp->notifier.proxy_addr,
1595 SOCK_SEQPACKET);
2ac0f627
CB
1596 if (notify_fd < 0) {
1597 SYSERROR("Failed to connect to seccomp proxy");
c3e3c21a 1598 return -1;
2ac0f627 1599 }
c3e3c21a
CB
1600
1601 /* 30 second timeout */
1602 ret = lxc_socket_set_timeout(notify_fd, 30, 30);
2ac0f627
CB
1603 if (ret) {
1604 SYSERROR("Failed to set timeouts for seccomp proxy");
c3e3c21a 1605 return -1;
2ac0f627
CB
1606 }
1607
4a094eec
WB
1608 ret = __seccomp(SECCOMP_GET_NOTIF_SIZES, 0,
1609 &seccomp->notifier.sizes);
1610 if (ret) {
1611 SYSERROR("Failed to query seccomp notify struct sizes");
1612 return -1;
1613 }
1614
1f51fc70 1615 ret = seccomp_notify_alloc(&seccomp->notifier.req_buf,
2ac0f627
CB
1616 &seccomp->notifier.rsp_buf);
1617 if (ret) {
1618 ERROR("Failed to allocate seccomp notify request and response buffers");
1619 errno = ret;
1620 return -1;
1621 }
c3e3c21a 1622
543d2f83
CB
1623 ret = lxc_mainloop_add_handler(descr, seccomp->notifier.notify_fd,
1624 seccomp_notify_handler,
1625 seccomp_notify_cleanup_handler,
1626 handler,
1627 "seccomp_notify_handler");
c3e3c21a
CB
1628 if (ret < 0) {
1629 ERROR("Failed to add seccomp notify handler for %d to mainloop",
2ac0f627 1630 notify_fd);
c3e3c21a
CB
1631 return -1;
1632 }
1633
1634 seccomp->notifier.proxy_fd = move_fd(notify_fd);
1635 }
1636#endif
1637 return 0;
1638}
1639
1640int lxc_seccomp_send_notifier_fd(struct lxc_seccomp *seccomp, int socket_fd)
1641{
d7d2d2d9 1642#if HAVE_DECL_SECCOMP_NOTIFY_FD
c3e3c21a
CB
1643 if (seccomp->notifier.wants_supervision) {
1644 if (lxc_abstract_unix_send_fds(socket_fd,
1645 &seccomp->notifier.notify_fd, 1,
1646 NULL, 0) < 0)
1647 return -1;
1648 close_prot_errno_disarm(seccomp->notifier.notify_fd);
1649 }
1650#endif
1651 return 0;
1652}
1653
1654int lxc_seccomp_recv_notifier_fd(struct lxc_seccomp *seccomp, int socket_fd)
1655{
d7d2d2d9 1656#if HAVE_DECL_SECCOMP_NOTIFY_FD
c3e3c21a
CB
1657 if (seccomp->notifier.wants_supervision) {
1658 int ret;
1659
d17c815d
CB
1660 ret = lxc_abstract_unix_recv_one_fd(socket_fd,
1661 &seccomp->notifier.notify_fd,
1662 NULL, 0);
c3e3c21a
CB
1663 if (ret < 0)
1664 return -1;
c3e3c21a
CB
1665 }
1666#endif
1667 return 0;
1668}
1669
1670int lxc_seccomp_add_notifier(const char *name, const char *lxcpath,
1671 struct lxc_seccomp *seccomp)
1672{
d7d2d2d9 1673#if HAVE_DECL_SECCOMP_NOTIFY_FD
2ac0f627 1674 if (seccomp->notifier.wants_supervision) {
c3e3c21a
CB
1675 int ret;
1676
1677 ret = lxc_cmd_seccomp_notify_add_listener(name, lxcpath,
2ac0f627 1678 seccomp->notifier.notify_fd,
c3e3c21a
CB
1679 -1, 0);
1680 close_prot_errno_disarm(seccomp->notifier.notify_fd);
1681 if (ret < 0)
1682 return -1;
1683 }
1684#endif
1685 return 0;
1686}