]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/seccomp.c
lxccontainer: properly cleanup on mount injection failure
[mirror_lxc.git] / src / lxc / seccomp.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright Canonical, Inc. 2012
5 *
6 * Authors:
7 * Serge Hallyn <serge.hallyn@canonical.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #ifndef _GNU_SOURCE
25 #define _GNU_SOURCE 1
26 #endif
27 #include <errno.h>
28 #include <seccomp.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <sys/mount.h>
32 #include <sys/utsname.h>
33
34 #include "af_unix.h"
35 #include "commands.h"
36 #include "config.h"
37 #include "log.h"
38 #include "lxccontainer.h"
39 #include "lxcseccomp.h"
40 #include "mainloop.h"
41 #include "memory_utils.h"
42 #include "utils.h"
43
44 #ifdef __MIPSEL__
45 #define MIPS_ARCH_O32 lxc_seccomp_arch_mipsel
46 #define MIPS_ARCH_N64 lxc_seccomp_arch_mipsel64
47 #else
48 #define MIPS_ARCH_O32 lxc_seccomp_arch_mips
49 #define MIPS_ARCH_N64 lxc_seccomp_arch_mips64
50 #endif
51
52 lxc_log_define(seccomp, lxc);
53
54 static int parse_config_v1(FILE *f, char *line, size_t *line_bufsz, struct lxc_conf *conf)
55 {
56 int ret = 0;
57
58 while (getline(&line, line_bufsz, f) != -1) {
59 int nr;
60
61 ret = sscanf(line, "%d", &nr);
62 if (ret != 1) {
63 ret = -1;
64 break;
65 }
66
67 #if HAVE_SCMP_FILTER_CTX
68 ret = seccomp_rule_add(conf->seccomp.seccomp_ctx, SCMP_ACT_ALLOW, nr, 0);
69 #else
70 ret = seccomp_rule_add(SCMP_ACT_ALLOW, nr, 0);
71 #endif
72 if (ret < 0) {
73 ERROR("Failed loading allow rule for %d", nr);
74 break;
75 }
76 }
77 free(line);
78
79 return ret;
80 }
81
82 #if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
83 static const char *get_action_name(uint32_t action)
84 {
85 /* The upper 16 bits indicate the type of the seccomp action. */
86 switch (action & 0xffff0000) {
87 case SCMP_ACT_KILL:
88 return "kill";
89 case SCMP_ACT_ALLOW:
90 return "allow";
91 case SCMP_ACT_TRAP:
92 return "trap";
93 case SCMP_ACT_ERRNO(0):
94 return "errno";
95 #if HAVE_DECL_SECCOMP_NOTIFY_FD
96 case SCMP_ACT_NOTIFY:
97 return "notify";
98 #endif
99 }
100
101 return "invalid action";
102 }
103
104 static uint32_t get_v2_default_action(char *line)
105 {
106 uint32_t ret_action = -1;
107
108 while (*line == ' ')
109 line++;
110
111 /* After 'whitelist' or 'blacklist' comes default behavior. */
112 if (strncmp(line, "kill", 4) == 0) {
113 ret_action = SCMP_ACT_KILL;
114 } else if (strncmp(line, "errno", 5) == 0) {
115 int e, ret;
116
117 ret = sscanf(line + 5, "%d", &e);
118 if (ret != 1) {
119 ERROR("Failed to parse errno value from %s", line);
120 return -2;
121 }
122
123 ret_action = SCMP_ACT_ERRNO(e);
124 } else if (strncmp(line, "allow", 5) == 0) {
125 ret_action = SCMP_ACT_ALLOW;
126 } else if (strncmp(line, "trap", 4) == 0) {
127 ret_action = SCMP_ACT_TRAP;
128 #if HAVE_DECL_SECCOMP_NOTIFY_FD
129 } else if (strncmp(line, "notify", 6) == 0) {
130 ret_action = SCMP_ACT_NOTIFY;
131 #endif
132 } else if (line[0]) {
133 ERROR("Unrecognized seccomp action \"%s\"", line);
134 return -2;
135 }
136
137 return ret_action;
138 }
139
140 static uint32_t get_v2_action(char *line, uint32_t def_action)
141 {
142 char *p;
143 uint32_t ret;
144
145 p = strchr(line, ' ');
146 if (!p)
147 return def_action;
148 p++;
149
150 while (*p == ' ')
151 p++;
152
153 if (!*p || *p == '#')
154 return def_action;
155
156 ret = get_v2_default_action(p);
157 switch (ret) {
158 case -2:
159 return -1;
160 case -1:
161 return def_action;
162 }
163
164 return ret;
165 }
166
167 struct seccomp_v2_rule_args {
168 uint32_t index;
169 uint64_t value;
170 uint64_t mask;
171 enum scmp_compare op;
172 };
173
174 struct seccomp_v2_rule {
175 uint32_t action;
176 uint32_t args_num;
177 struct seccomp_v2_rule_args args_value[6];
178 };
179
180 static enum scmp_compare parse_v2_rule_op(char *s)
181 {
182 if (strcmp(s, "SCMP_CMP_NE") == 0 || strcmp(s, "!=") == 0)
183 return SCMP_CMP_NE;
184 else if (strcmp(s, "SCMP_CMP_LT") == 0 || strcmp(s, "<") == 0)
185 return SCMP_CMP_LT;
186 else if (strcmp(s, "SCMP_CMP_LE") == 0 || strcmp(s, "<=") == 0)
187 return SCMP_CMP_LE;
188 else if (strcmp(s, "SCMP_CMP_EQ") == 0 || strcmp(s, "==") == 0)
189 return SCMP_CMP_EQ;
190 else if (strcmp(s, "SCMP_CMP_GE") == 0 || strcmp(s, ">=") == 0)
191 return SCMP_CMP_GE;
192 else if (strcmp(s, "SCMP_CMP_GT") == 0 || strcmp(s, ">") == 0)
193 return SCMP_CMP_GT;
194 else if (strcmp(s, "SCMP_CMP_MASKED_EQ") == 0 || strcmp(s, "&=") == 0)
195 return SCMP_CMP_MASKED_EQ;
196
197 return _SCMP_CMP_MAX;
198 }
199
200 /*
201 * This function is used to parse the args string into the structure.
202 * args string format:[index,value,op,mask] or [index,value,op]
203 * index: the index for syscall arguments (type uint)
204 * value: the value for syscall arguments (type uint64)
205 * op: the operator for syscall arguments(string),
206 a valid list of constants as of libseccomp v2.3.2 is
207 SCMP_CMP_NE,SCMP_CMP_LE,SCMP_CMP_LE, SCMP_CMP_EQ, SCMP_CMP_GE,
208 SCMP_CMP_GT, SCMP_CMP_MASKED_EQ, or !=,<=,==,>=,>,&=
209 * mask: the mask to apply on "value" for SCMP_CMP_MASKED_EQ (type uint64, optional)
210 * Returns 0 on success, < 0 otherwise.
211 */
212 static int get_seccomp_arg_value(char *key, struct seccomp_v2_rule_args *rule_args)
213 {
214 int ret = 0;
215 uint32_t index = 0;
216 uint64_t mask = 0, value = 0;
217 enum scmp_compare op = 0;
218 char *tmp = NULL;
219 char s[31] = {0}, v[24] = {0}, m[24] = {'0'};
220
221 tmp = strchr(key, '[');
222 if (!tmp) {
223 ERROR("Failed to interpret args");
224 return -1;
225 }
226
227 ret = sscanf(tmp, "[%i,%23[^,],%30[^0-9^,],%23[^,]", &index, v, s, m);
228 if ((ret != 3 && ret != 4) || index >= 6) {
229 ERROR("Failed to interpret args value");
230 return -1;
231 }
232
233 ret = lxc_safe_uint64(v, &value, 0);
234 if (ret < 0) {
235 ERROR("Invalid argument value");
236 return -1;
237 }
238
239 ret = lxc_safe_uint64(m, &mask, 0);
240 if (ret < 0) {
241 ERROR("Invalid argument mask");
242 return -1;
243 }
244
245 op = parse_v2_rule_op(s);
246 if (op == _SCMP_CMP_MAX) {
247 ERROR("Failed to interpret args operator value");
248 return -1;
249 }
250
251 rule_args->index = index;
252 rule_args->value = value;
253 rule_args->mask = mask;
254 rule_args->op = op;
255 return 0;
256 }
257
258 /* This function is used to parse the seccomp rule entry.
259 * @line : seccomp rule entry string.
260 * @def_action : default action used in the case if the 'line' contain non valid action.
261 * @rules : output struct.
262 * Returns 0 on success, < 0 otherwise.
263 */
264 static int parse_v2_rules(char *line, uint32_t def_action,
265 struct seccomp_v2_rule *rules)
266 {
267 int i = 0, ret = -1;
268 char *key = NULL, *saveptr = NULL, *tmp = NULL;
269
270 tmp = strdup(line);
271 if (!tmp)
272 return -1;
273
274 /* read optional action which follows the syscall */
275 rules->action = get_v2_action(tmp, def_action);
276 if (rules->action == -1) {
277 ERROR("Failed to interpret action");
278 ret = -1;
279 goto on_error;
280 }
281
282 ret = 0;
283 rules->args_num = 0;
284 if (!strchr(tmp, '['))
285 goto on_error;
286
287 ret = -1;
288 for ((key = strtok_r(tmp, "]", &saveptr)), i = 0; key && i < 6;
289 (key = strtok_r(NULL, "]", &saveptr)), i++) {
290 ret = get_seccomp_arg_value(key, &rules->args_value[i]);
291 if (ret < 0)
292 goto on_error;
293
294 rules->args_num++;
295 }
296
297 ret = 0;
298
299 on_error:
300 free(tmp);
301
302 return ret;
303 }
304 #endif
305
306 #if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
307 enum lxc_hostarch_t {
308 lxc_seccomp_arch_all = 0,
309 lxc_seccomp_arch_native,
310 lxc_seccomp_arch_i386,
311 lxc_seccomp_arch_x32,
312 lxc_seccomp_arch_amd64,
313 lxc_seccomp_arch_arm,
314 lxc_seccomp_arch_arm64,
315 lxc_seccomp_arch_ppc64,
316 lxc_seccomp_arch_ppc64le,
317 lxc_seccomp_arch_ppc,
318 lxc_seccomp_arch_mips,
319 lxc_seccomp_arch_mips64,
320 lxc_seccomp_arch_mips64n32,
321 lxc_seccomp_arch_mipsel,
322 lxc_seccomp_arch_mipsel64,
323 lxc_seccomp_arch_mipsel64n32,
324 lxc_seccomp_arch_s390x,
325 lxc_seccomp_arch_unknown = 999,
326 };
327
328 int get_hostarch(void)
329 {
330 struct utsname uts;
331 if (uname(&uts) < 0) {
332 SYSERROR("Failed to read host arch");
333 return -1;
334 }
335
336 if (strcmp(uts.machine, "i686") == 0)
337 return lxc_seccomp_arch_i386;
338 /* no x32 kernels */
339 else if (strcmp(uts.machine, "x86_64") == 0)
340 return lxc_seccomp_arch_amd64;
341 else if (strncmp(uts.machine, "armv7", 5) == 0)
342 return lxc_seccomp_arch_arm;
343 else if (strncmp(uts.machine, "aarch64", 7) == 0)
344 return lxc_seccomp_arch_arm64;
345 else if (strncmp(uts.machine, "ppc64le", 7) == 0)
346 return lxc_seccomp_arch_ppc64le;
347 else if (strncmp(uts.machine, "ppc64", 5) == 0)
348 return lxc_seccomp_arch_ppc64;
349 else if (strncmp(uts.machine, "ppc", 3) == 0)
350 return lxc_seccomp_arch_ppc;
351 else if (strncmp(uts.machine, "mips64", 6) == 0)
352 return MIPS_ARCH_N64;
353 else if (strncmp(uts.machine, "mips", 4) == 0)
354 return MIPS_ARCH_O32;
355 else if (strncmp(uts.machine, "s390x", 5) == 0)
356 return lxc_seccomp_arch_s390x;
357
358 return lxc_seccomp_arch_unknown;
359 }
360
361 scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch,
362 uint32_t default_policy_action, bool *needs_merge)
363 {
364 int ret;
365 uint32_t arch;
366 scmp_filter_ctx ctx;
367
368 switch (n_arch) {
369 case lxc_seccomp_arch_i386:
370 arch = SCMP_ARCH_X86;
371 break;
372 case lxc_seccomp_arch_x32:
373 arch = SCMP_ARCH_X32;
374 break;
375 case lxc_seccomp_arch_amd64:
376 arch = SCMP_ARCH_X86_64;
377 break;
378 case lxc_seccomp_arch_arm:
379 arch = SCMP_ARCH_ARM;
380 break;
381 #ifdef SCMP_ARCH_AARCH64
382 case lxc_seccomp_arch_arm64:
383 arch = SCMP_ARCH_AARCH64;
384 break;
385 #endif
386 #ifdef SCMP_ARCH_PPC64LE
387 case lxc_seccomp_arch_ppc64le:
388 arch = SCMP_ARCH_PPC64LE;
389 break;
390 #endif
391 #ifdef SCMP_ARCH_PPC64
392 case lxc_seccomp_arch_ppc64:
393 arch = SCMP_ARCH_PPC64;
394 break;
395 #endif
396 #ifdef SCMP_ARCH_PPC
397 case lxc_seccomp_arch_ppc:
398 arch = SCMP_ARCH_PPC;
399 break;
400 #endif
401 #ifdef SCMP_ARCH_MIPS
402 case lxc_seccomp_arch_mips:
403 arch = SCMP_ARCH_MIPS;
404 break;
405 case lxc_seccomp_arch_mips64:
406 arch = SCMP_ARCH_MIPS64;
407 break;
408 case lxc_seccomp_arch_mips64n32:
409 arch = SCMP_ARCH_MIPS64N32;
410 break;
411 case lxc_seccomp_arch_mipsel:
412 arch = SCMP_ARCH_MIPSEL;
413 break;
414 case lxc_seccomp_arch_mipsel64:
415 arch = SCMP_ARCH_MIPSEL64;
416 break;
417 case lxc_seccomp_arch_mipsel64n32:
418 arch = SCMP_ARCH_MIPSEL64N32;
419 break;
420 #endif
421 #ifdef SCMP_ARCH_S390X
422 case lxc_seccomp_arch_s390x:
423 arch = SCMP_ARCH_S390X;
424 break;
425 #endif
426 default:
427 return NULL;
428 }
429
430 ctx = seccomp_init(default_policy_action);
431 if (!ctx) {
432 ERROR("Error initializing seccomp context");
433 return NULL;
434 }
435
436 ret = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_NNP, 0);
437 if (ret < 0) {
438 errno = -ret;
439 SYSERROR("Failed to turn off no-new-privs");
440 seccomp_release(ctx);
441 return NULL;
442 }
443
444 #ifdef SCMP_FLTATR_ATL_TSKIP
445 ret = seccomp_attr_set(ctx, SCMP_FLTATR_ATL_TSKIP, 1);
446 if (ret < 0) {
447 errno = -ret;
448 SYSWARN("Failed to turn on seccomp nop-skip, continuing");
449 }
450 #endif
451
452 ret = seccomp_arch_exist(ctx, arch);
453 if (ret < 0) {
454 if (ret != -EEXIST) {
455 errno = -ret;
456 SYSERROR("Failed to determine whether arch %d is "
457 "already present in the main seccomp context",
458 (int)n_arch);
459 seccomp_release(ctx);
460 return NULL;
461 }
462
463 ret = seccomp_arch_add(ctx, arch);
464 if (ret != 0) {
465 errno = -ret;
466 SYSERROR("Failed to add arch %d to main seccomp context",
467 (int)n_arch);
468 seccomp_release(ctx);
469 return NULL;
470 }
471 TRACE("Added arch %d to main seccomp context", (int)n_arch);
472
473 ret = seccomp_arch_remove(ctx, SCMP_ARCH_NATIVE);
474 if (ret != 0) {
475 ERROR("Failed to remove native arch from main seccomp context");
476 seccomp_release(ctx);
477 return NULL;
478 }
479 TRACE("Removed native arch from main seccomp context");
480
481 *needs_merge = true;
482 } else {
483 *needs_merge = false;
484 TRACE("Arch %d already present in main seccomp context", (int)n_arch);
485 }
486
487 return ctx;
488 }
489
490 bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx,
491 struct seccomp_v2_rule *rule)
492 {
493 int i, nr, ret;
494 struct scmp_arg_cmp arg_cmp[6];
495
496 ret = seccomp_arch_exist(ctx, arch);
497 if (arch && ret != 0) {
498 errno = -ret;
499 SYSERROR("Seccomp: rule and context arch do not match (arch %d)", arch);
500 return false;
501 }
502
503 /*get the syscall name*/
504 char *p = strchr(line, ' ');
505 if (p)
506 *p = '\0';
507
508 if (strncmp(line, "reject_force_umount", 19) == 0) {
509 ret = seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EACCES),
510 SCMP_SYS(umount2), 1,
511 SCMP_A1(SCMP_CMP_MASKED_EQ, MNT_FORCE, MNT_FORCE));
512 if (ret < 0) {
513 errno = -ret;
514 SYSERROR("Failed loading rule to reject force umount");
515 return false;
516 }
517
518 INFO("Set seccomp rule to reject force umounts");
519 return true;
520 }
521
522 nr = seccomp_syscall_resolve_name(line);
523 if (nr == __NR_SCMP_ERROR) {
524 WARN("Failed to resolve syscall \"%s\"", line);
525 WARN("This syscall will NOT be handled by seccomp");
526 return true;
527 }
528
529 if (nr < 0) {
530 WARN("Got negative return value %d for syscall \"%s\"", nr, line);
531 WARN("This syscall will NOT be handled by seccomp");
532 return true;
533 }
534
535 memset(&arg_cmp, 0, sizeof(arg_cmp));
536 for (i = 0; i < rule->args_num; i++) {
537 INFO("arg_cmp[%d]: SCMP_CMP(%u, %llu, %llu, %llu)", i,
538 rule->args_value[i].index,
539 (long long unsigned int)rule->args_value[i].op,
540 (long long unsigned int)rule->args_value[i].mask,
541 (long long unsigned int)rule->args_value[i].value);
542
543 if (SCMP_CMP_MASKED_EQ == rule->args_value[i].op)
544 arg_cmp[i] = SCMP_CMP(rule->args_value[i].index,
545 rule->args_value[i].op,
546 rule->args_value[i].mask,
547 rule->args_value[i].value);
548 else
549 arg_cmp[i] = SCMP_CMP(rule->args_value[i].index,
550 rule->args_value[i].op,
551 rule->args_value[i].value);
552 }
553
554 ret = seccomp_rule_add_exact_array(ctx, rule->action, nr,
555 rule->args_num, arg_cmp);
556 if (ret < 0) {
557 errno = -ret;
558 SYSERROR("Failed loading rule for %s (nr %d action %d (%s))",
559 line, nr, rule->action, get_action_name(rule->action));
560 return false;
561 }
562
563 return true;
564 }
565
566 /*
567 * v2 consists of
568 * [x86]
569 * open
570 * read
571 * write
572 * close
573 * # a comment
574 * [x86_64]
575 * open
576 * read
577 * write
578 * close
579 */
580 static int parse_config_v2(FILE *f, char *line, size_t *line_bufsz, struct lxc_conf *conf)
581 {
582 int ret;
583 char *p;
584 enum lxc_hostarch_t cur_rule_arch, native_arch;
585 bool blacklist = false;
586 uint32_t default_policy_action = -1, default_rule_action = -1;
587 struct seccomp_v2_rule rule;
588 struct scmp_ctx_info {
589 uint32_t architectures[3];
590 scmp_filter_ctx contexts[3];
591 bool needs_merge[3];
592 } ctx;
593
594 if (strncmp(line, "blacklist", 9) == 0)
595 blacklist = true;
596 else if (strncmp(line, "whitelist", 9) != 0) {
597 ERROR("Bad seccomp policy style \"%s\"", line);
598 return -1;
599 }
600
601 p = strchr(line, ' ');
602 if (p) {
603 default_policy_action = get_v2_default_action(p + 1);
604 if (default_policy_action == -2)
605 return -1;
606 }
607
608 /* for blacklist, allow any syscall which has no rule */
609 if (blacklist) {
610 if (default_policy_action == -1)
611 default_policy_action = SCMP_ACT_ALLOW;
612
613 if (default_rule_action == -1)
614 default_rule_action = SCMP_ACT_KILL;
615 } else {
616 if (default_policy_action == -1)
617 default_policy_action = SCMP_ACT_KILL;
618
619 if (default_rule_action == -1)
620 default_rule_action = SCMP_ACT_ALLOW;
621 }
622
623 memset(&ctx, 0, sizeof(ctx));
624 ctx.architectures[0] = SCMP_ARCH_NATIVE;
625 ctx.architectures[1] = SCMP_ARCH_NATIVE;
626 ctx.architectures[2] = SCMP_ARCH_NATIVE;
627 native_arch = get_hostarch();
628 cur_rule_arch = native_arch;
629 if (native_arch == lxc_seccomp_arch_amd64) {
630 cur_rule_arch = lxc_seccomp_arch_all;
631
632 ctx.architectures[0] = SCMP_ARCH_X86;
633 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_i386,
634 default_policy_action,
635 &ctx.needs_merge[0]);
636 if (!ctx.contexts[0])
637 goto bad;
638
639 ctx.architectures[1] = SCMP_ARCH_X32;
640 ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_x32,
641 default_policy_action,
642 &ctx.needs_merge[1]);
643 if (!ctx.contexts[1])
644 goto bad;
645
646 ctx.architectures[2] = SCMP_ARCH_X86_64;
647 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_amd64,
648 default_policy_action,
649 &ctx.needs_merge[2]);
650 if (!ctx.contexts[2])
651 goto bad;
652 #ifdef SCMP_ARCH_PPC
653 } else if (native_arch == lxc_seccomp_arch_ppc64) {
654 cur_rule_arch = lxc_seccomp_arch_all;
655
656 ctx.architectures[0] = SCMP_ARCH_PPC;
657 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_ppc,
658 default_policy_action,
659 &ctx.needs_merge[0]);
660 if (!ctx.contexts[0])
661 goto bad;
662
663 ctx.architectures[2] = SCMP_ARCH_PPC64;
664 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_ppc64,
665 default_policy_action,
666 &ctx.needs_merge[2]);
667 if (!ctx.contexts[2])
668 goto bad;
669 #endif
670 #ifdef SCMP_ARCH_ARM
671 } else if (native_arch == lxc_seccomp_arch_arm64) {
672 cur_rule_arch = lxc_seccomp_arch_all;
673
674 ctx.architectures[0] = SCMP_ARCH_ARM;
675 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_arm,
676 default_policy_action,
677 &ctx.needs_merge[0]);
678 if (!ctx.contexts[0])
679 goto bad;
680
681 #ifdef SCMP_ARCH_AARCH64
682 ctx.architectures[2] = SCMP_ARCH_AARCH64;
683 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_arm64,
684 default_policy_action,
685 &ctx.needs_merge[2]);
686 if (!ctx.contexts[2])
687 goto bad;
688 #endif
689 #endif
690 #ifdef SCMP_ARCH_MIPS
691 } else if (native_arch == lxc_seccomp_arch_mips64) {
692 cur_rule_arch = lxc_seccomp_arch_all;
693
694 ctx.architectures[0] = SCMP_ARCH_MIPS;
695 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mips,
696 default_policy_action,
697 &ctx.needs_merge[0]);
698 if (!ctx.contexts[0])
699 goto bad;
700
701 ctx.architectures[1] = SCMP_ARCH_MIPS64N32;
702 ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mips64n32,
703 default_policy_action,
704 &ctx.needs_merge[1]);
705 if (!ctx.contexts[1])
706 goto bad;
707
708 ctx.architectures[2] = SCMP_ARCH_MIPS64;
709 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mips64,
710 default_policy_action,
711 &ctx.needs_merge[2]);
712 if (!ctx.contexts[2])
713 goto bad;
714 } else if (native_arch == lxc_seccomp_arch_mipsel64) {
715 cur_rule_arch = lxc_seccomp_arch_all;
716
717 ctx.architectures[0] = SCMP_ARCH_MIPSEL;
718 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mipsel,
719 default_policy_action,
720 &ctx.needs_merge[0]);
721 if (!ctx.contexts[0])
722 goto bad;
723
724 ctx.architectures[1] = SCMP_ARCH_MIPSEL64N32;
725 ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mipsel64n32,
726 default_policy_action,
727 &ctx.needs_merge[1]);
728 if (!ctx.contexts[1])
729 goto bad;
730
731 ctx.architectures[2] = SCMP_ARCH_MIPSEL64;
732 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mipsel64,
733 default_policy_action,
734 &ctx.needs_merge[2]);
735 if (!ctx.contexts[2])
736 goto bad;
737 #endif
738 }
739
740 if (default_policy_action != SCMP_ACT_KILL) {
741 ret = seccomp_reset(conf->seccomp.seccomp_ctx, default_policy_action);
742 if (ret != 0) {
743 ERROR("Error re-initializing Seccomp");
744 return -1;
745 }
746
747 ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0);
748 if (ret < 0) {
749 errno = -ret;
750 SYSERROR("Failed to turn off no-new-privs");
751 return -1;
752 }
753
754 #ifdef SCMP_FLTATR_ATL_TSKIP
755 ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1);
756 if (ret < 0) {
757 errno = -ret;
758 SYSWARN("Failed to turn on seccomp nop-skip, continuing");
759 }
760 #endif
761 }
762
763 while (getline(&line, line_bufsz, f) != -1) {
764 if (line[0] == '#')
765 continue;
766
767 if (line[0] == '\0')
768 continue;
769
770 remove_trailing_newlines(line);
771
772 INFO("Processing \"%s\"", line);
773 if (line[0] == '[') {
774 /* Read the architecture for next set of rules. */
775 if (strcmp(line, "[x86]") == 0 ||
776 strcmp(line, "[X86]") == 0) {
777 if (native_arch != lxc_seccomp_arch_i386 &&
778 native_arch != lxc_seccomp_arch_amd64) {
779 cur_rule_arch = lxc_seccomp_arch_unknown;
780 continue;
781 }
782
783 cur_rule_arch = lxc_seccomp_arch_i386;
784 } else if (strcmp(line, "[x32]") == 0 ||
785 strcmp(line, "[X32]") == 0) {
786 if (native_arch != lxc_seccomp_arch_amd64) {
787 cur_rule_arch = lxc_seccomp_arch_unknown;
788 continue;
789 }
790
791 cur_rule_arch = lxc_seccomp_arch_x32;
792 } else if (strcmp(line, "[X86_64]") == 0 ||
793 strcmp(line, "[x86_64]") == 0) {
794 if (native_arch != lxc_seccomp_arch_amd64) {
795 cur_rule_arch = lxc_seccomp_arch_unknown;
796 continue;
797 }
798
799 cur_rule_arch = lxc_seccomp_arch_amd64;
800 } else if (strcmp(line, "[all]") == 0 ||
801 strcmp(line, "[ALL]") == 0) {
802 cur_rule_arch = lxc_seccomp_arch_all;
803 }
804 #ifdef SCMP_ARCH_ARM
805 else if (strcmp(line, "[arm]") == 0 ||
806 strcmp(line, "[ARM]") == 0) {
807 if (native_arch != lxc_seccomp_arch_arm &&
808 native_arch != lxc_seccomp_arch_arm64) {
809 cur_rule_arch = lxc_seccomp_arch_unknown;
810 continue;
811 }
812
813 cur_rule_arch = lxc_seccomp_arch_arm;
814 }
815 #endif
816 #ifdef SCMP_ARCH_AARCH64
817 else if (strcmp(line, "[arm64]") == 0 ||
818 strcmp(line, "[ARM64]") == 0) {
819 if (native_arch != lxc_seccomp_arch_arm64) {
820 cur_rule_arch = lxc_seccomp_arch_unknown;
821 continue;
822 }
823
824 cur_rule_arch = lxc_seccomp_arch_arm64;
825 }
826 #endif
827 #ifdef SCMP_ARCH_PPC64LE
828 else if (strcmp(line, "[ppc64le]") == 0 ||
829 strcmp(line, "[PPC64LE]") == 0) {
830 if (native_arch != lxc_seccomp_arch_ppc64le) {
831 cur_rule_arch = lxc_seccomp_arch_unknown;
832 continue;
833 }
834
835 cur_rule_arch = lxc_seccomp_arch_ppc64le;
836 }
837 #endif
838 #ifdef SCMP_ARCH_PPC64
839 else if (strcmp(line, "[ppc64]") == 0 ||
840 strcmp(line, "[PPC64]") == 0) {
841 if (native_arch != lxc_seccomp_arch_ppc64) {
842 cur_rule_arch = lxc_seccomp_arch_unknown;
843 continue;
844 }
845
846 cur_rule_arch = lxc_seccomp_arch_ppc64;
847 }
848 #endif
849 #ifdef SCMP_ARCH_PPC
850 else if (strcmp(line, "[ppc]") == 0 ||
851 strcmp(line, "[PPC]") == 0) {
852 if (native_arch != lxc_seccomp_arch_ppc &&
853 native_arch != lxc_seccomp_arch_ppc64) {
854 cur_rule_arch = lxc_seccomp_arch_unknown;
855 continue;
856 }
857
858 cur_rule_arch = lxc_seccomp_arch_ppc;
859 }
860 #endif
861 #ifdef SCMP_ARCH_MIPS
862 else if (strcmp(line, "[mips64]") == 0 ||
863 strcmp(line, "[MIPS64]") == 0) {
864 if (native_arch != lxc_seccomp_arch_mips64) {
865 cur_rule_arch = lxc_seccomp_arch_unknown;
866 continue;
867 }
868
869 cur_rule_arch = lxc_seccomp_arch_mips64;
870 } else if (strcmp(line, "[mips64n32]") == 0 ||
871 strcmp(line, "[MIPS64N32]") == 0) {
872 if (native_arch != lxc_seccomp_arch_mips64) {
873 cur_rule_arch = lxc_seccomp_arch_unknown;
874 continue;
875 }
876
877 cur_rule_arch = lxc_seccomp_arch_mips64n32;
878 } else if (strcmp(line, "[mips]") == 0 ||
879 strcmp(line, "[MIPS]") == 0) {
880 if (native_arch != lxc_seccomp_arch_mips &&
881 native_arch != lxc_seccomp_arch_mips64) {
882 cur_rule_arch = lxc_seccomp_arch_unknown;
883 continue;
884 }
885
886 cur_rule_arch = lxc_seccomp_arch_mips;
887 } else if (strcmp(line, "[mipsel64]") == 0 ||
888 strcmp(line, "[MIPSEL64]") == 0) {
889 if (native_arch != lxc_seccomp_arch_mipsel64) {
890 cur_rule_arch = lxc_seccomp_arch_unknown;
891 continue;
892 }
893
894 cur_rule_arch = lxc_seccomp_arch_mipsel64;
895 } else if (strcmp(line, "[mipsel64n32]") == 0 ||
896 strcmp(line, "[MIPSEL64N32]") == 0) {
897 if (native_arch != lxc_seccomp_arch_mipsel64) {
898 cur_rule_arch = lxc_seccomp_arch_unknown;
899 continue;
900 }
901
902 cur_rule_arch = lxc_seccomp_arch_mipsel64n32;
903 } else if (strcmp(line, "[mipsel]") == 0 ||
904 strcmp(line, "[MIPSEL]") == 0) {
905 if (native_arch != lxc_seccomp_arch_mipsel &&
906 native_arch != lxc_seccomp_arch_mipsel64) {
907 cur_rule_arch = lxc_seccomp_arch_unknown;
908 continue;
909 }
910
911 cur_rule_arch = lxc_seccomp_arch_mipsel;
912 }
913 #endif
914 #ifdef SCMP_ARCH_S390X
915 else if (strcmp(line, "[s390x]") == 0 ||
916 strcmp(line, "[S390X]") == 0) {
917 if (native_arch != lxc_seccomp_arch_s390x) {
918 cur_rule_arch = lxc_seccomp_arch_unknown;
919 continue;
920 }
921
922 cur_rule_arch = lxc_seccomp_arch_s390x;
923 }
924 #endif
925 else {
926 goto bad_arch;
927 }
928
929 continue;
930 }
931
932 /* irrelevant arch - i.e. arm on i386 */
933 if (cur_rule_arch == lxc_seccomp_arch_unknown)
934 continue;
935
936 memset(&rule, 0, sizeof(rule));
937 /* read optional action which follows the syscall */
938 ret = parse_v2_rules(line, default_rule_action, &rule);
939 if (ret != 0) {
940 ERROR("Failed to interpret seccomp rule");
941 goto bad_rule;
942 }
943
944 #if HAVE_DECL_SECCOMP_NOTIFY_FD
945 if ((rule.action == SCMP_ACT_NOTIFY) &&
946 !conf->seccomp.notifier.wants_supervision) {
947 conf->seccomp.notifier.wants_supervision = true;
948 TRACE("Set SECCOMP_FILTER_FLAG_NEW_LISTENER attribute");
949 }
950 #endif
951
952 if (!do_resolve_add_rule(SCMP_ARCH_NATIVE, line,
953 conf->seccomp.seccomp_ctx, &rule))
954 goto bad_rule;
955
956 INFO("Added native rule for arch %d for %s action %d(%s)",
957 SCMP_ARCH_NATIVE, line, rule.action,
958 get_action_name(rule.action));
959
960 if (ctx.architectures[0] != SCMP_ARCH_NATIVE) {
961 if (!do_resolve_add_rule(ctx.architectures[0], line,
962 ctx.contexts[0], &rule))
963 goto bad_rule;
964
965 INFO("Added compat rule for arch %d for %s action %d(%s)",
966 ctx.architectures[0], line, rule.action,
967 get_action_name(rule.action));
968 }
969
970 if (ctx.architectures[1] != SCMP_ARCH_NATIVE) {
971 if (!do_resolve_add_rule(ctx.architectures[1], line,
972 ctx.contexts[1], &rule))
973 goto bad_rule;
974
975 INFO("Added compat rule for arch %d for %s action %d(%s)",
976 ctx.architectures[1], line, rule.action,
977 get_action_name(rule.action));
978 }
979
980 if (ctx.architectures[2] != SCMP_ARCH_NATIVE) {
981 if (!do_resolve_add_rule(ctx.architectures[2], line,
982 ctx.contexts[2], &rule))
983 goto bad_rule;
984
985 INFO("Added native rule for arch %d for %s action %d(%s)",
986 ctx.architectures[2], line, rule.action,
987 get_action_name(rule.action));
988 }
989 }
990
991 INFO("Merging compat seccomp contexts into main context");
992 if (ctx.contexts[0]) {
993 if (ctx.needs_merge[0]) {
994 ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[0]);
995 if (ret < 0) {
996 ERROR("Failed to merge first compat seccomp "
997 "context into main context");
998 goto bad;
999 }
1000
1001 TRACE("Merged first compat seccomp context into main context");
1002 } else {
1003 seccomp_release(ctx.contexts[0]);
1004 ctx.contexts[0] = NULL;
1005 }
1006 }
1007
1008 if (ctx.contexts[1]) {
1009 if (ctx.needs_merge[1]) {
1010 ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[1]);
1011 if (ret < 0) {
1012 ERROR("Failed to merge first compat seccomp "
1013 "context into main context");
1014 goto bad;
1015 }
1016
1017 TRACE("Merged second compat seccomp context into main context");
1018 } else {
1019 seccomp_release(ctx.contexts[1]);
1020 ctx.contexts[1] = NULL;
1021 }
1022 }
1023
1024 if (ctx.contexts[2]) {
1025 if (ctx.needs_merge[2]) {
1026 ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[2]);
1027 if (ret < 0) {
1028 ERROR("Failed to merge third compat seccomp "
1029 "context into main context");
1030 goto bad;
1031 }
1032
1033 TRACE("Merged third compat seccomp context into main context");
1034 } else {
1035 seccomp_release(ctx.contexts[2]);
1036 ctx.contexts[2] = NULL;
1037 }
1038 }
1039
1040 free(line);
1041 return 0;
1042
1043 bad_arch:
1044 ERROR("Unsupported architecture \"%s\"", line);
1045
1046 bad_rule:
1047 bad:
1048 if (ctx.contexts[0])
1049 seccomp_release(ctx.contexts[0]);
1050
1051 if (ctx.contexts[1])
1052 seccomp_release(ctx.contexts[1]);
1053
1054 if (ctx.contexts[2])
1055 seccomp_release(ctx.contexts[2]);
1056
1057 free(line);
1058
1059 return -1;
1060 }
1061 #else /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
1062 static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
1063 {
1064 return -1;
1065 }
1066 #endif /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
1067
1068 /*
1069 * The first line of the config file has a policy language version
1070 * the second line has some directives
1071 * then comes policy subject to the directives
1072 * right now version must be '1' or '2'
1073 * the directives must include 'whitelist'(version == 1 or 2) or 'blacklist'
1074 * (version == 2) and can include 'debug' (though debug is not yet supported).
1075 */
1076 static int parse_config(FILE *f, struct lxc_conf *conf)
1077 {
1078 char *line = NULL;
1079 size_t line_bufsz = 0;
1080 int ret, version;
1081
1082 ret = fscanf(f, "%d\n", &version);
1083 if (ret != 1 || (version != 1 && version != 2)) {
1084 ERROR("Invalid version");
1085 return -1;
1086 }
1087
1088 if (getline(&line, &line_bufsz, f) == -1) {
1089 ERROR("Invalid config file");
1090 goto bad_line;
1091 }
1092
1093 if (version == 1 && !strstr(line, "whitelist")) {
1094 ERROR("Only whitelist policy is supported");
1095 goto bad_line;
1096 }
1097
1098 if (strstr(line, "debug")) {
1099 ERROR("Debug not yet implemented");
1100 goto bad_line;
1101 }
1102
1103 if (version == 1)
1104 return parse_config_v1(f, line, &line_bufsz, conf);
1105
1106 return parse_config_v2(f, line, &line_bufsz, conf);
1107
1108 bad_line:
1109 free(line);
1110 return -1;
1111 }
1112
1113 /*
1114 * use_seccomp: return true if we should try and apply a seccomp policy
1115 * if defined for the container.
1116 * This will return false if
1117 * 1. seccomp is not enabled in the kernel
1118 * 2. a seccomp policy is already enabled for this task
1119 */
1120 static bool use_seccomp(const struct lxc_conf *conf)
1121 {
1122 int ret, v;
1123 FILE *f;
1124 size_t line_bufsz = 0;
1125 char *line = NULL;
1126 bool already_enabled = false, found = false;
1127
1128 if (conf->seccomp.allow_nesting > 0)
1129 return true;
1130
1131 f = fopen("/proc/self/status", "r");
1132 if (!f)
1133 return true;
1134
1135 while (getline(&line, &line_bufsz, f) != -1) {
1136 if (strncmp(line, "Seccomp:", 8) == 0) {
1137 found = true;
1138
1139 ret = sscanf(line + 8, "%d", &v);
1140 if (ret == 1 && v != 0)
1141 already_enabled = true;
1142
1143 break;
1144 }
1145 }
1146 free(line);
1147 fclose(f);
1148
1149 if (!found) {
1150 INFO("Seccomp is not enabled in the kernel");
1151 return false;
1152 }
1153
1154 if (already_enabled) {
1155 INFO("Already seccomp-confined, not loading new policy");
1156 return false;
1157 }
1158
1159 return true;
1160 }
1161
1162 int lxc_read_seccomp_config(struct lxc_conf *conf)
1163 {
1164 int ret;
1165 FILE *f;
1166
1167 if (!conf->seccomp.seccomp)
1168 return 0;
1169
1170 if (!use_seccomp(conf))
1171 return 0;
1172
1173 #if HAVE_SCMP_FILTER_CTX
1174 /* XXX for debug, pass in SCMP_ACT_TRAP */
1175 conf->seccomp.seccomp_ctx = seccomp_init(SCMP_ACT_KILL);
1176 ret = !conf->seccomp.seccomp_ctx;
1177 #else
1178 ret = seccomp_init(SCMP_ACT_KILL) < 0;
1179 #endif
1180 if (ret) {
1181 ERROR("Failed initializing seccomp");
1182 return -1;
1183 }
1184
1185 /* turn off no-new-privs. We don't want it in lxc, and it breaks
1186 * with apparmor */
1187 #if HAVE_SCMP_FILTER_CTX
1188 ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0);
1189 #else
1190 ret = seccomp_attr_set(SCMP_FLTATR_CTL_NNP, 0);
1191 #endif
1192 if (ret < 0) {
1193 errno = -ret;
1194 SYSERROR("Failed to turn off no-new-privs");
1195 return -1;
1196 }
1197
1198 #ifdef SCMP_FLTATR_ATL_TSKIP
1199 ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1);
1200 if (ret < 0) {
1201 errno = -ret;
1202 SYSWARN("Failed to turn on seccomp nop-skip, continuing");
1203 }
1204 #endif
1205
1206 f = fopen(conf->seccomp.seccomp, "r");
1207 if (!f) {
1208 SYSERROR("Failed to open seccomp policy file %s", conf->seccomp.seccomp);
1209 return -1;
1210 }
1211
1212 ret = parse_config(f, conf);
1213 fclose(f);
1214
1215 return ret;
1216 }
1217
1218 int lxc_seccomp_load(struct lxc_conf *conf)
1219 {
1220 int ret;
1221
1222 if (!conf->seccomp.seccomp)
1223 return 0;
1224
1225 if (!use_seccomp(conf))
1226 return 0;
1227
1228 #if HAVE_SCMP_FILTER_CTX
1229 ret = seccomp_load(conf->seccomp.seccomp_ctx);
1230 #else
1231 ret = seccomp_load();
1232 #endif
1233 if (ret < 0) {
1234 errno = -ret;
1235 SYSERROR("Error loading the seccomp policy");
1236 return -1;
1237 }
1238
1239 /* After load seccomp filter into the kernel successfully, export the current seccomp
1240 * filter to log file */
1241 #if HAVE_SCMP_FILTER_CTX
1242 if ((lxc_log_get_level() <= LXC_LOG_LEVEL_TRACE ||
1243 conf->loglevel <= LXC_LOG_LEVEL_TRACE) &&
1244 lxc_log_fd >= 0) {
1245 ret = seccomp_export_pfc(conf->seccomp.seccomp_ctx, lxc_log_fd);
1246 /* Just give an warning when export error */
1247 if (ret < 0) {
1248 errno = -ret;
1249 SYSWARN("Failed to export seccomp filter to log file");
1250 }
1251 }
1252 #endif
1253
1254 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1255 if (conf->seccomp.notifier.wants_supervision) {
1256 ret = seccomp_notify_fd(conf->seccomp.seccomp_ctx);
1257 if (ret < 0) {
1258 errno = -ret;
1259 return -1;
1260 }
1261
1262 conf->seccomp.notifier.notify_fd = ret;
1263 TRACE("Retrieved new seccomp listener fd %d", ret);
1264 }
1265 #endif
1266
1267 return 0;
1268 }
1269
1270 void lxc_seccomp_free(struct lxc_seccomp *seccomp)
1271 {
1272 free_disarm(seccomp->seccomp);
1273
1274 #if HAVE_SCMP_FILTER_CTX
1275 if (seccomp->seccomp_ctx) {
1276 seccomp_release(seccomp->seccomp_ctx);
1277 seccomp->seccomp_ctx = NULL;
1278 }
1279 #endif
1280
1281 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1282 close_prot_errno_disarm(seccomp->notifier.notify_fd);
1283 close_prot_errno_disarm(seccomp->notifier.proxy_fd);
1284 seccomp_notify_free(seccomp->notifier.req_buf, seccomp->notifier.rsp_buf);
1285 seccomp->notifier.req_buf = NULL;
1286 seccomp->notifier.rsp_buf = NULL;
1287 #endif
1288 }
1289
1290 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1291 static int seccomp_notify_reconnect(struct lxc_handler *handler)
1292 {
1293 __do_close_prot_errno int notify_fd = -EBADF;
1294
1295 close_prot_errno_disarm(handler->conf->seccomp.notifier.proxy_fd);
1296
1297 notify_fd = lxc_unix_connect(&handler->conf->seccomp.notifier.proxy_addr);
1298 if (notify_fd < 0) {
1299 SYSERROR("Failed to reconnect to seccomp proxy");
1300 return -1;
1301 }
1302
1303 /* 30 second timeout */
1304 if (lxc_socket_set_timeout(notify_fd, 30, 30)) {
1305 SYSERROR("Failed to set socket timeout");
1306 return -1;
1307 }
1308 handler->conf->seccomp.notifier.proxy_fd = move_fd(notify_fd);
1309 return 0;
1310 }
1311 #endif
1312
1313 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1314 static int seccomp_notify_default_answer(int fd, struct seccomp_notif *req,
1315 struct seccomp_notif_resp *resp,
1316 struct lxc_handler *handler)
1317 {
1318 resp->id = req->id;
1319 resp->error = -ENOSYS;
1320
1321 if (seccomp_notify_respond(fd, resp))
1322 SYSERROR("Failed to send default message to seccomp");
1323
1324 return seccomp_notify_reconnect(handler);
1325 }
1326 #endif
1327
1328 int seccomp_notify_handler(int fd, uint32_t events, void *data,
1329 struct lxc_epoll_descr *descr)
1330 {
1331
1332 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1333 __do_close_prot_errno int fd_mem = -EBADF;
1334 int reconnect_count, ret;
1335 ssize_t bytes;
1336 char mem_path[6 /* /proc/ */
1337 + INTTYPE_TO_STRLEN(int64_t)
1338 + 3 /* mem */
1339 + 1 /* \0 */];
1340 struct lxc_handler *hdlr = data;
1341 struct lxc_conf *conf = hdlr->conf;
1342 struct seccomp_notif *req = conf->seccomp.notifier.req_buf;
1343 struct seccomp_notif_resp *resp = conf->seccomp.notifier.rsp_buf;
1344 int listener_proxy_fd = conf->seccomp.notifier.proxy_fd;
1345 struct seccomp_notify_proxy_msg msg = {0};
1346
1347 if (listener_proxy_fd < 0) {
1348 ERROR("No seccomp proxy registered");
1349 return minus_one_set_errno(EINVAL);
1350 }
1351
1352 ret = seccomp_notify_receive(fd, req);
1353 if (ret) {
1354 SYSERROR("Failed to read seccomp notification");
1355 goto out;
1356 }
1357
1358 snprintf(mem_path, sizeof(mem_path), "/proc/%d/mem", req->pid);
1359 fd_mem = open(mem_path, O_RDONLY | O_CLOEXEC);
1360 if (fd_mem < 0) {
1361 (void)seccomp_notify_default_answer(fd, req, resp, hdlr);
1362 SYSERROR("Failed to open process memory for seccomp notify request");
1363 goto out;
1364 }
1365
1366 /*
1367 * Make sure that the fd for /proc/<pid>/mem we just opened still
1368 * refers to the correct process's memory.
1369 */
1370 ret = seccomp_notify_id_valid(fd, req->id);
1371 if (ret < 0) {
1372 (void)seccomp_notify_default_answer(fd, req, resp, hdlr);
1373 SYSERROR("Invalid seccomp notify request id");
1374 goto out;
1375 }
1376
1377 memcpy(&msg.req, req, sizeof(msg.req));
1378 msg.monitor_pid = hdlr->monitor_pid;
1379 msg.init_pid = hdlr->pid;
1380
1381 reconnect_count = 0;
1382 do {
1383 bytes = lxc_unix_send_fds(listener_proxy_fd, &fd_mem, 1, &msg,
1384 sizeof(msg));
1385 if (bytes != (ssize_t)sizeof(msg)) {
1386 SYSERROR("Failed to forward message to seccomp proxy");
1387 if (seccomp_notify_default_answer(fd, req, resp, hdlr))
1388 goto out;
1389 }
1390 } while (reconnect_count++);
1391
1392 close_prot_errno_disarm(fd_mem);
1393
1394 reconnect_count = 0;
1395 do {
1396 bytes = lxc_recv_nointr(listener_proxy_fd, &msg, sizeof(msg), 0);
1397 if (bytes != (ssize_t)sizeof(msg)) {
1398 SYSERROR("Failed to receive message from seccomp proxy");
1399 if (seccomp_notify_default_answer(fd, req, resp, hdlr))
1400 goto out;
1401 }
1402 } while (reconnect_count++);
1403
1404 memcpy(resp, &msg.resp, sizeof(*resp));
1405 ret = seccomp_notify_respond(fd, resp);
1406 if (ret)
1407 SYSERROR("Failed to send seccomp notification");
1408
1409 out:
1410 return 0;
1411 #else
1412 return -ENOSYS;
1413 #endif
1414 }
1415
1416 void seccomp_conf_init(struct lxc_conf *conf)
1417 {
1418 conf->seccomp.seccomp = NULL;
1419 #if HAVE_SCMP_FILTER_CTX
1420 conf->seccomp.allow_nesting = 0;
1421 memset(&conf->seccomp.seccomp_ctx, 0, sizeof(conf->seccomp.seccomp_ctx));
1422 #endif /* HAVE_SCMP_FILTER_CTX */
1423 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1424 conf->seccomp.notifier.wants_supervision = false;
1425 conf->seccomp.notifier.notify_fd = -EBADF;
1426 conf->seccomp.notifier.proxy_fd = -EBADF;
1427 memset(&conf->seccomp.notifier.proxy_addr, 0,
1428 sizeof(conf->seccomp.notifier.proxy_addr));
1429 conf->seccomp.notifier.req_buf = NULL;
1430 conf->seccomp.notifier.rsp_buf = NULL;
1431 #endif
1432 }
1433
1434 int lxc_seccomp_setup_proxy(struct lxc_seccomp *seccomp,
1435 struct lxc_epoll_descr *descr,
1436 struct lxc_handler *handler)
1437 {
1438 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1439 if (seccomp->notifier.wants_supervision &&
1440 seccomp->notifier.proxy_addr.sun_path[1] != '\0') {
1441 __do_close_prot_errno int notify_fd = -EBADF;
1442 int ret;
1443
1444 notify_fd = lxc_unix_connect(&seccomp->notifier.proxy_addr);
1445 if (notify_fd < 0) {
1446 SYSERROR("Failed to connect to seccomp proxy");
1447 return -1;
1448 }
1449
1450 /* 30 second timeout */
1451 ret = lxc_socket_set_timeout(notify_fd, 30, 30);
1452 if (ret) {
1453 SYSERROR("Failed to set timeouts for seccomp proxy");
1454 return -1;
1455 }
1456
1457 ret = seccomp_notify_alloc(&seccomp->notifier.req_buf,
1458 &seccomp->notifier.rsp_buf);
1459 if (ret) {
1460 ERROR("Failed to allocate seccomp notify request and response buffers");
1461 errno = ret;
1462 return -1;
1463 }
1464
1465 ret = lxc_mainloop_add_handler(descr,
1466 seccomp->notifier.notify_fd,
1467 seccomp_notify_handler, handler);
1468 if (ret < 0) {
1469 ERROR("Failed to add seccomp notify handler for %d to mainloop",
1470 notify_fd);
1471 return -1;
1472 }
1473
1474 seccomp->notifier.proxy_fd = move_fd(notify_fd);
1475 }
1476 #endif
1477 return 0;
1478 }
1479
1480 int lxc_seccomp_send_notifier_fd(struct lxc_seccomp *seccomp, int socket_fd)
1481 {
1482 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1483 if (seccomp->notifier.wants_supervision) {
1484 if (lxc_abstract_unix_send_fds(socket_fd,
1485 &seccomp->notifier.notify_fd, 1,
1486 NULL, 0) < 0)
1487 return -1;
1488 close_prot_errno_disarm(seccomp->notifier.notify_fd);
1489 }
1490 #endif
1491 return 0;
1492 }
1493
1494 int lxc_seccomp_recv_notifier_fd(struct lxc_seccomp *seccomp, int socket_fd)
1495 {
1496 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1497 if (seccomp->notifier.wants_supervision) {
1498 int ret;
1499
1500 ret = lxc_abstract_unix_recv_fds(socket_fd,
1501 &seccomp->notifier.notify_fd,
1502 1, NULL, 0);
1503 if (ret < 0)
1504 return -1;
1505 }
1506 #endif
1507 return 0;
1508 }
1509
1510 int lxc_seccomp_add_notifier(const char *name, const char *lxcpath,
1511 struct lxc_seccomp *seccomp)
1512 {
1513
1514 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1515 if (seccomp->notifier.wants_supervision) {
1516 int ret;
1517
1518 ret = lxc_cmd_seccomp_notify_add_listener(name, lxcpath,
1519 seccomp->notifier.notify_fd,
1520 -1, 0);
1521 close_prot_errno_disarm(seccomp->notifier.notify_fd);
1522 if (ret < 0)
1523 return -1;
1524 }
1525 #endif
1526 return 0;
1527 }