]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/seccomp.c
fd: only add valid fd to mainloop
[mirror_lxc.git] / src / lxc / seccomp.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright Canonical, Inc. 2012
5 *
6 * Authors:
7 * Serge Hallyn <serge.hallyn@canonical.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #ifndef _GNU_SOURCE
25 #define _GNU_SOURCE 1
26 #endif
27 #include <errno.h>
28 #include <seccomp.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <sys/mount.h>
32 #include <sys/utsname.h>
33
34 #include "af_unix.h"
35 #include "commands.h"
36 #include "config.h"
37 #include "log.h"
38 #include "lxccontainer.h"
39 #include "lxcseccomp.h"
40 #include "mainloop.h"
41 #include "memory_utils.h"
42 #include "utils.h"
43
44 #ifdef __MIPSEL__
45 #define MIPS_ARCH_O32 lxc_seccomp_arch_mipsel
46 #define MIPS_ARCH_N64 lxc_seccomp_arch_mipsel64
47 #else
48 #define MIPS_ARCH_O32 lxc_seccomp_arch_mips
49 #define MIPS_ARCH_N64 lxc_seccomp_arch_mips64
50 #endif
51
52 #ifndef SECCOMP_GET_NOTIF_SIZES
53 #define SECCOMP_GET_NOTIF_SIZES 3
54 #endif
55
56 lxc_log_define(seccomp, lxc);
57
58 #if HAVE_DECL_SECCOMP_NOTIFY_FD
59 static inline int __seccomp(unsigned int operation, unsigned int flags,
60 void *args)
61 {
62 #ifdef __NR_seccomp
63 return syscall(__NR_seccomp, operation, flags, args);
64 #else
65 errno = ENOSYS;
66 return -1;
67 #endif
68 }
69 #endif
70
71 static int parse_config_v1(FILE *f, char *line, size_t *line_bufsz, struct lxc_conf *conf)
72 {
73 int ret = 0;
74
75 while (getline(&line, line_bufsz, f) != -1) {
76 int nr;
77
78 ret = sscanf(line, "%d", &nr);
79 if (ret != 1) {
80 ret = -1;
81 break;
82 }
83
84 #if HAVE_SCMP_FILTER_CTX
85 ret = seccomp_rule_add(conf->seccomp.seccomp_ctx, SCMP_ACT_ALLOW, nr, 0);
86 #else
87 ret = seccomp_rule_add(SCMP_ACT_ALLOW, nr, 0);
88 #endif
89 if (ret < 0) {
90 ERROR("Failed loading allow rule for %d", nr);
91 break;
92 }
93 }
94 free(line);
95
96 return ret;
97 }
98
99 #if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
100 static const char *get_action_name(uint32_t action)
101 {
102 /* The upper 16 bits indicate the type of the seccomp action. */
103 switch (action & 0xffff0000) {
104 case SCMP_ACT_KILL:
105 return "kill";
106 case SCMP_ACT_ALLOW:
107 return "allow";
108 case SCMP_ACT_TRAP:
109 return "trap";
110 case SCMP_ACT_ERRNO(0):
111 return "errno";
112 #if HAVE_DECL_SECCOMP_NOTIFY_FD
113 case SCMP_ACT_NOTIFY:
114 return "notify";
115 #endif
116 }
117
118 return "invalid action";
119 }
120
121 static uint32_t get_v2_default_action(char *line)
122 {
123 uint32_t ret_action = -1;
124
125 while (*line == ' ')
126 line++;
127
128 /* After 'whitelist' or 'blacklist' comes default behavior. */
129 if (strncmp(line, "kill", 4) == 0) {
130 ret_action = SCMP_ACT_KILL;
131 } else if (strncmp(line, "errno", 5) == 0) {
132 int e, ret;
133
134 ret = sscanf(line + 5, "%d", &e);
135 if (ret != 1) {
136 ERROR("Failed to parse errno value from %s", line);
137 return -2;
138 }
139
140 ret_action = SCMP_ACT_ERRNO(e);
141 } else if (strncmp(line, "allow", 5) == 0) {
142 ret_action = SCMP_ACT_ALLOW;
143 } else if (strncmp(line, "trap", 4) == 0) {
144 ret_action = SCMP_ACT_TRAP;
145 #if HAVE_DECL_SECCOMP_NOTIFY_FD
146 } else if (strncmp(line, "notify", 6) == 0) {
147 ret_action = SCMP_ACT_NOTIFY;
148 #endif
149 } else if (line[0]) {
150 ERROR("Unrecognized seccomp action \"%s\"", line);
151 return -2;
152 }
153
154 return ret_action;
155 }
156
157 static uint32_t get_v2_action(char *line, uint32_t def_action)
158 {
159 char *p;
160 uint32_t ret;
161
162 p = strchr(line, ' ');
163 if (!p)
164 return def_action;
165 p++;
166
167 while (*p == ' ')
168 p++;
169
170 if (!*p || *p == '#')
171 return def_action;
172
173 ret = get_v2_default_action(p);
174 switch (ret) {
175 case -2:
176 return -1;
177 case -1:
178 return def_action;
179 }
180
181 return ret;
182 }
183
184 struct seccomp_v2_rule_args {
185 uint32_t index;
186 uint64_t value;
187 uint64_t mask;
188 enum scmp_compare op;
189 };
190
191 struct seccomp_v2_rule {
192 uint32_t action;
193 uint32_t args_num;
194 struct seccomp_v2_rule_args args_value[6];
195 };
196
197 static enum scmp_compare parse_v2_rule_op(char *s)
198 {
199 if (strcmp(s, "SCMP_CMP_NE") == 0 || strcmp(s, "!=") == 0)
200 return SCMP_CMP_NE;
201 else if (strcmp(s, "SCMP_CMP_LT") == 0 || strcmp(s, "<") == 0)
202 return SCMP_CMP_LT;
203 else if (strcmp(s, "SCMP_CMP_LE") == 0 || strcmp(s, "<=") == 0)
204 return SCMP_CMP_LE;
205 else if (strcmp(s, "SCMP_CMP_EQ") == 0 || strcmp(s, "==") == 0)
206 return SCMP_CMP_EQ;
207 else if (strcmp(s, "SCMP_CMP_GE") == 0 || strcmp(s, ">=") == 0)
208 return SCMP_CMP_GE;
209 else if (strcmp(s, "SCMP_CMP_GT") == 0 || strcmp(s, ">") == 0)
210 return SCMP_CMP_GT;
211 else if (strcmp(s, "SCMP_CMP_MASKED_EQ") == 0 || strcmp(s, "&=") == 0)
212 return SCMP_CMP_MASKED_EQ;
213
214 return _SCMP_CMP_MAX;
215 }
216
217 /*
218 * This function is used to parse the args string into the structure.
219 * args string format:[index,value,op,mask] or [index,value,op]
220 * index: the index for syscall arguments (type uint)
221 * value: the value for syscall arguments (type uint64)
222 * op: the operator for syscall arguments(string),
223 a valid list of constants as of libseccomp v2.3.2 is
224 SCMP_CMP_NE,SCMP_CMP_LE,SCMP_CMP_LE, SCMP_CMP_EQ, SCMP_CMP_GE,
225 SCMP_CMP_GT, SCMP_CMP_MASKED_EQ, or !=,<=,==,>=,>,&=
226 * mask: the mask to apply on "value" for SCMP_CMP_MASKED_EQ (type uint64, optional)
227 * Returns 0 on success, < 0 otherwise.
228 */
229 static int get_seccomp_arg_value(char *key, struct seccomp_v2_rule_args *rule_args)
230 {
231 int ret = 0;
232 uint32_t index = 0;
233 uint64_t mask = 0, value = 0;
234 enum scmp_compare op = 0;
235 char *tmp = NULL;
236 char s[31] = {0}, v[24] = {0}, m[24] = {'0'};
237
238 tmp = strchr(key, '[');
239 if (!tmp) {
240 ERROR("Failed to interpret args");
241 return -1;
242 }
243
244 ret = sscanf(tmp, "[%i,%23[^,],%30[^0-9^,],%23[^,]", &index, v, s, m);
245 if ((ret != 3 && ret != 4) || index >= 6) {
246 ERROR("Failed to interpret args value");
247 return -1;
248 }
249
250 ret = lxc_safe_uint64(v, &value, 0);
251 if (ret < 0) {
252 ERROR("Invalid argument value");
253 return -1;
254 }
255
256 ret = lxc_safe_uint64(m, &mask, 0);
257 if (ret < 0) {
258 ERROR("Invalid argument mask");
259 return -1;
260 }
261
262 op = parse_v2_rule_op(s);
263 if (op == _SCMP_CMP_MAX) {
264 ERROR("Failed to interpret args operator value");
265 return -1;
266 }
267
268 rule_args->index = index;
269 rule_args->value = value;
270 rule_args->mask = mask;
271 rule_args->op = op;
272 return 0;
273 }
274
275 /* This function is used to parse the seccomp rule entry.
276 * @line : seccomp rule entry string.
277 * @def_action : default action used in the case if the 'line' contain non valid action.
278 * @rules : output struct.
279 * Returns 0 on success, < 0 otherwise.
280 */
281 static int parse_v2_rules(char *line, uint32_t def_action,
282 struct seccomp_v2_rule *rules)
283 {
284 int i = 0, ret = -1;
285 char *key = NULL, *saveptr = NULL, *tmp = NULL;
286
287 tmp = strdup(line);
288 if (!tmp)
289 return -1;
290
291 /* read optional action which follows the syscall */
292 rules->action = get_v2_action(tmp, def_action);
293 if (rules->action == -1) {
294 ERROR("Failed to interpret action");
295 ret = -1;
296 goto on_error;
297 }
298
299 ret = 0;
300 rules->args_num = 0;
301 if (!strchr(tmp, '['))
302 goto on_error;
303
304 ret = -1;
305 for ((key = strtok_r(tmp, "]", &saveptr)), i = 0; key && i < 6;
306 (key = strtok_r(NULL, "]", &saveptr)), i++) {
307 ret = get_seccomp_arg_value(key, &rules->args_value[i]);
308 if (ret < 0)
309 goto on_error;
310
311 rules->args_num++;
312 }
313
314 ret = 0;
315
316 on_error:
317 free(tmp);
318
319 return ret;
320 }
321 #endif
322
323 #if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
324 enum lxc_hostarch_t {
325 lxc_seccomp_arch_all = 0,
326 lxc_seccomp_arch_native,
327 lxc_seccomp_arch_i386,
328 lxc_seccomp_arch_x32,
329 lxc_seccomp_arch_amd64,
330 lxc_seccomp_arch_arm,
331 lxc_seccomp_arch_arm64,
332 lxc_seccomp_arch_ppc64,
333 lxc_seccomp_arch_ppc64le,
334 lxc_seccomp_arch_ppc,
335 lxc_seccomp_arch_mips,
336 lxc_seccomp_arch_mips64,
337 lxc_seccomp_arch_mips64n32,
338 lxc_seccomp_arch_mipsel,
339 lxc_seccomp_arch_mipsel64,
340 lxc_seccomp_arch_mipsel64n32,
341 lxc_seccomp_arch_s390x,
342 lxc_seccomp_arch_s390,
343 lxc_seccomp_arch_unknown = 999,
344 };
345
346 int get_hostarch(void)
347 {
348 struct utsname uts;
349 if (uname(&uts) < 0) {
350 SYSERROR("Failed to read host arch");
351 return -1;
352 }
353
354 if (strcmp(uts.machine, "i686") == 0)
355 return lxc_seccomp_arch_i386;
356 /* no x32 kernels */
357 else if (strcmp(uts.machine, "x86_64") == 0)
358 return lxc_seccomp_arch_amd64;
359 else if (strncmp(uts.machine, "armv7", 5) == 0)
360 return lxc_seccomp_arch_arm;
361 else if (strncmp(uts.machine, "aarch64", 7) == 0)
362 return lxc_seccomp_arch_arm64;
363 else if (strncmp(uts.machine, "ppc64le", 7) == 0)
364 return lxc_seccomp_arch_ppc64le;
365 else if (strncmp(uts.machine, "ppc64", 5) == 0)
366 return lxc_seccomp_arch_ppc64;
367 else if (strncmp(uts.machine, "ppc", 3) == 0)
368 return lxc_seccomp_arch_ppc;
369 else if (strncmp(uts.machine, "mips64", 6) == 0)
370 return MIPS_ARCH_N64;
371 else if (strncmp(uts.machine, "mips", 4) == 0)
372 return MIPS_ARCH_O32;
373 else if (strncmp(uts.machine, "s390x", 5) == 0)
374 return lxc_seccomp_arch_s390x;
375 else if (strncmp(uts.machine, "s390", 4) == 0)
376 return lxc_seccomp_arch_s390;
377 return lxc_seccomp_arch_unknown;
378 }
379
380 scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch,
381 uint32_t default_policy_action, bool *needs_merge)
382 {
383 int ret;
384 uint32_t arch;
385 scmp_filter_ctx ctx;
386
387 switch (n_arch) {
388 case lxc_seccomp_arch_i386:
389 arch = SCMP_ARCH_X86;
390 break;
391 case lxc_seccomp_arch_x32:
392 arch = SCMP_ARCH_X32;
393 break;
394 case lxc_seccomp_arch_amd64:
395 arch = SCMP_ARCH_X86_64;
396 break;
397 case lxc_seccomp_arch_arm:
398 arch = SCMP_ARCH_ARM;
399 break;
400 #ifdef SCMP_ARCH_AARCH64
401 case lxc_seccomp_arch_arm64:
402 arch = SCMP_ARCH_AARCH64;
403 break;
404 #endif
405 #ifdef SCMP_ARCH_PPC64LE
406 case lxc_seccomp_arch_ppc64le:
407 arch = SCMP_ARCH_PPC64LE;
408 break;
409 #endif
410 #ifdef SCMP_ARCH_PPC64
411 case lxc_seccomp_arch_ppc64:
412 arch = SCMP_ARCH_PPC64;
413 break;
414 #endif
415 #ifdef SCMP_ARCH_PPC
416 case lxc_seccomp_arch_ppc:
417 arch = SCMP_ARCH_PPC;
418 break;
419 #endif
420 #ifdef SCMP_ARCH_MIPS
421 case lxc_seccomp_arch_mips:
422 arch = SCMP_ARCH_MIPS;
423 break;
424 case lxc_seccomp_arch_mips64:
425 arch = SCMP_ARCH_MIPS64;
426 break;
427 case lxc_seccomp_arch_mips64n32:
428 arch = SCMP_ARCH_MIPS64N32;
429 break;
430 case lxc_seccomp_arch_mipsel:
431 arch = SCMP_ARCH_MIPSEL;
432 break;
433 case lxc_seccomp_arch_mipsel64:
434 arch = SCMP_ARCH_MIPSEL64;
435 break;
436 case lxc_seccomp_arch_mipsel64n32:
437 arch = SCMP_ARCH_MIPSEL64N32;
438 break;
439 #endif
440 #ifdef SCMP_ARCH_S390X
441 case lxc_seccomp_arch_s390x:
442 arch = SCMP_ARCH_S390X;
443 break;
444 #endif
445 #ifdef SCMP_ARCH_S390
446 case lxc_seccomp_arch_s390:
447 arch = SCMP_ARCH_S390;
448 break;
449 #endif
450 default:
451 return NULL;
452 }
453
454 ctx = seccomp_init(default_policy_action);
455 if (!ctx) {
456 ERROR("Error initializing seccomp context");
457 return NULL;
458 }
459
460 ret = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_NNP, 0);
461 if (ret < 0) {
462 errno = -ret;
463 SYSERROR("Failed to turn off no-new-privs");
464 seccomp_release(ctx);
465 return NULL;
466 }
467
468 #ifdef SCMP_FLTATR_ATL_TSKIP
469 ret = seccomp_attr_set(ctx, SCMP_FLTATR_ATL_TSKIP, 1);
470 if (ret < 0) {
471 errno = -ret;
472 SYSWARN("Failed to turn on seccomp nop-skip, continuing");
473 }
474 #endif
475
476 ret = seccomp_arch_exist(ctx, arch);
477 if (ret < 0) {
478 if (ret != -EEXIST) {
479 errno = -ret;
480 SYSERROR("Failed to determine whether arch %d is "
481 "already present in the main seccomp context",
482 (int)n_arch);
483 seccomp_release(ctx);
484 return NULL;
485 }
486
487 ret = seccomp_arch_add(ctx, arch);
488 if (ret != 0) {
489 errno = -ret;
490 SYSERROR("Failed to add arch %d to main seccomp context",
491 (int)n_arch);
492 seccomp_release(ctx);
493 return NULL;
494 }
495 TRACE("Added arch %d to main seccomp context", (int)n_arch);
496
497 ret = seccomp_arch_remove(ctx, SCMP_ARCH_NATIVE);
498 if (ret != 0) {
499 ERROR("Failed to remove native arch from main seccomp context");
500 seccomp_release(ctx);
501 return NULL;
502 }
503 TRACE("Removed native arch from main seccomp context");
504
505 *needs_merge = true;
506 } else {
507 *needs_merge = false;
508 TRACE("Arch %d already present in main seccomp context", (int)n_arch);
509 }
510
511 return ctx;
512 }
513
514 bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx,
515 struct seccomp_v2_rule *rule)
516 {
517 int i, nr, ret;
518 struct scmp_arg_cmp arg_cmp[6];
519
520 ret = seccomp_arch_exist(ctx, arch);
521 if (arch && ret != 0) {
522 errno = -ret;
523 SYSERROR("Seccomp: rule and context arch do not match (arch %d)", arch);
524 return false;
525 }
526
527 /*get the syscall name*/
528 char *p = strchr(line, ' ');
529 if (p)
530 *p = '\0';
531
532 if (strncmp(line, "reject_force_umount", 19) == 0) {
533 ret = seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EACCES),
534 SCMP_SYS(umount2), 1,
535 SCMP_A1(SCMP_CMP_MASKED_EQ, MNT_FORCE, MNT_FORCE));
536 if (ret < 0) {
537 errno = -ret;
538 SYSERROR("Failed loading rule to reject force umount");
539 return false;
540 }
541
542 INFO("Set seccomp rule to reject force umounts");
543 return true;
544 }
545
546 nr = seccomp_syscall_resolve_name(line);
547 if (nr == __NR_SCMP_ERROR) {
548 WARN("Failed to resolve syscall \"%s\"", line);
549 WARN("This syscall will NOT be handled by seccomp");
550 return true;
551 }
552
553 if (nr < 0) {
554 WARN("Got negative return value %d for syscall \"%s\"", nr, line);
555 WARN("This syscall will NOT be handled by seccomp");
556 return true;
557 }
558
559 memset(&arg_cmp, 0, sizeof(arg_cmp));
560 for (i = 0; i < rule->args_num; i++) {
561 INFO("arg_cmp[%d]: SCMP_CMP(%u, %llu, %llu, %llu)", i,
562 rule->args_value[i].index,
563 (long long unsigned int)rule->args_value[i].op,
564 (long long unsigned int)rule->args_value[i].mask,
565 (long long unsigned int)rule->args_value[i].value);
566
567 if (SCMP_CMP_MASKED_EQ == rule->args_value[i].op)
568 arg_cmp[i] = SCMP_CMP(rule->args_value[i].index,
569 rule->args_value[i].op,
570 rule->args_value[i].mask,
571 rule->args_value[i].value);
572 else
573 arg_cmp[i] = SCMP_CMP(rule->args_value[i].index,
574 rule->args_value[i].op,
575 rule->args_value[i].value);
576 }
577
578 ret = seccomp_rule_add_exact_array(ctx, rule->action, nr,
579 rule->args_num, arg_cmp);
580 if (ret < 0) {
581 errno = -ret;
582 SYSERROR("Failed loading rule for %s (nr %d action %d (%s))",
583 line, nr, rule->action, get_action_name(rule->action));
584 return false;
585 }
586
587 return true;
588 }
589
590 /*
591 * v2 consists of
592 * [x86]
593 * open
594 * read
595 * write
596 * close
597 * # a comment
598 * [x86_64]
599 * open
600 * read
601 * write
602 * close
603 */
604 static int parse_config_v2(FILE *f, char *line, size_t *line_bufsz, struct lxc_conf *conf)
605 {
606 int ret;
607 char *p;
608 enum lxc_hostarch_t cur_rule_arch, native_arch;
609 bool blacklist = false;
610 uint32_t default_policy_action = -1, default_rule_action = -1;
611 struct seccomp_v2_rule rule;
612 struct scmp_ctx_info {
613 uint32_t architectures[3];
614 scmp_filter_ctx contexts[3];
615 bool needs_merge[3];
616 } ctx;
617
618 if (strncmp(line, "blacklist", 9) == 0)
619 blacklist = true;
620 else if (strncmp(line, "whitelist", 9) != 0) {
621 ERROR("Bad seccomp policy style \"%s\"", line);
622 return -1;
623 }
624
625 p = strchr(line, ' ');
626 if (p) {
627 default_policy_action = get_v2_default_action(p + 1);
628 if (default_policy_action == -2)
629 return -1;
630 }
631
632 /* for blacklist, allow any syscall which has no rule */
633 if (blacklist) {
634 if (default_policy_action == -1)
635 default_policy_action = SCMP_ACT_ALLOW;
636
637 if (default_rule_action == -1)
638 default_rule_action = SCMP_ACT_KILL;
639 } else {
640 if (default_policy_action == -1)
641 default_policy_action = SCMP_ACT_KILL;
642
643 if (default_rule_action == -1)
644 default_rule_action = SCMP_ACT_ALLOW;
645 }
646
647 memset(&ctx, 0, sizeof(ctx));
648 ctx.architectures[0] = SCMP_ARCH_NATIVE;
649 ctx.architectures[1] = SCMP_ARCH_NATIVE;
650 ctx.architectures[2] = SCMP_ARCH_NATIVE;
651 native_arch = get_hostarch();
652 cur_rule_arch = native_arch;
653 if (native_arch == lxc_seccomp_arch_amd64) {
654 cur_rule_arch = lxc_seccomp_arch_all;
655
656 ctx.architectures[0] = SCMP_ARCH_X86;
657 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_i386,
658 default_policy_action,
659 &ctx.needs_merge[0]);
660 if (!ctx.contexts[0])
661 goto bad;
662
663 ctx.architectures[1] = SCMP_ARCH_X32;
664 ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_x32,
665 default_policy_action,
666 &ctx.needs_merge[1]);
667 if (!ctx.contexts[1])
668 goto bad;
669
670 ctx.architectures[2] = SCMP_ARCH_X86_64;
671 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_amd64,
672 default_policy_action,
673 &ctx.needs_merge[2]);
674 if (!ctx.contexts[2])
675 goto bad;
676 #ifdef SCMP_ARCH_PPC
677 } else if (native_arch == lxc_seccomp_arch_ppc64) {
678 cur_rule_arch = lxc_seccomp_arch_all;
679
680 ctx.architectures[0] = SCMP_ARCH_PPC;
681 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_ppc,
682 default_policy_action,
683 &ctx.needs_merge[0]);
684 if (!ctx.contexts[0])
685 goto bad;
686
687 ctx.architectures[2] = SCMP_ARCH_PPC64;
688 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_ppc64,
689 default_policy_action,
690 &ctx.needs_merge[2]);
691 if (!ctx.contexts[2])
692 goto bad;
693 #endif
694 #ifdef SCMP_ARCH_ARM
695 } else if (native_arch == lxc_seccomp_arch_arm64) {
696 cur_rule_arch = lxc_seccomp_arch_all;
697
698 ctx.architectures[0] = SCMP_ARCH_ARM;
699 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_arm,
700 default_policy_action,
701 &ctx.needs_merge[0]);
702 if (!ctx.contexts[0])
703 goto bad;
704
705 #ifdef SCMP_ARCH_AARCH64
706 ctx.architectures[2] = SCMP_ARCH_AARCH64;
707 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_arm64,
708 default_policy_action,
709 &ctx.needs_merge[2]);
710 if (!ctx.contexts[2])
711 goto bad;
712 #endif
713 #endif
714 #ifdef SCMP_ARCH_MIPS
715 } else if (native_arch == lxc_seccomp_arch_mips64) {
716 cur_rule_arch = lxc_seccomp_arch_all;
717
718 ctx.architectures[0] = SCMP_ARCH_MIPS;
719 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mips,
720 default_policy_action,
721 &ctx.needs_merge[0]);
722 if (!ctx.contexts[0])
723 goto bad;
724
725 ctx.architectures[1] = SCMP_ARCH_MIPS64N32;
726 ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mips64n32,
727 default_policy_action,
728 &ctx.needs_merge[1]);
729 if (!ctx.contexts[1])
730 goto bad;
731
732 ctx.architectures[2] = SCMP_ARCH_MIPS64;
733 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mips64,
734 default_policy_action,
735 &ctx.needs_merge[2]);
736 if (!ctx.contexts[2])
737 goto bad;
738 } else if (native_arch == lxc_seccomp_arch_mipsel64) {
739 cur_rule_arch = lxc_seccomp_arch_all;
740
741 ctx.architectures[0] = SCMP_ARCH_MIPSEL;
742 ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mipsel,
743 default_policy_action,
744 &ctx.needs_merge[0]);
745 if (!ctx.contexts[0])
746 goto bad;
747
748 ctx.architectures[1] = SCMP_ARCH_MIPSEL64N32;
749 ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mipsel64n32,
750 default_policy_action,
751 &ctx.needs_merge[1]);
752 if (!ctx.contexts[1])
753 goto bad;
754
755 ctx.architectures[2] = SCMP_ARCH_MIPSEL64;
756 ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mipsel64,
757 default_policy_action,
758 &ctx.needs_merge[2]);
759 if (!ctx.contexts[2])
760 goto bad;
761 #endif
762 }
763
764 if (default_policy_action != SCMP_ACT_KILL) {
765 ret = seccomp_reset(conf->seccomp.seccomp_ctx, default_policy_action);
766 if (ret != 0) {
767 ERROR("Error re-initializing Seccomp");
768 return -1;
769 }
770
771 ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0);
772 if (ret < 0) {
773 errno = -ret;
774 SYSERROR("Failed to turn off no-new-privs");
775 return -1;
776 }
777
778 #ifdef SCMP_FLTATR_ATL_TSKIP
779 ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1);
780 if (ret < 0) {
781 errno = -ret;
782 SYSWARN("Failed to turn on seccomp nop-skip, continuing");
783 }
784 #endif
785 }
786
787 while (getline(&line, line_bufsz, f) != -1) {
788 if (line[0] == '#')
789 continue;
790
791 if (line[0] == '\0')
792 continue;
793
794 remove_trailing_newlines(line);
795
796 INFO("Processing \"%s\"", line);
797 if (line[0] == '[') {
798 /* Read the architecture for next set of rules. */
799 if (strcmp(line, "[x86]") == 0 ||
800 strcmp(line, "[X86]") == 0) {
801 if (native_arch != lxc_seccomp_arch_i386 &&
802 native_arch != lxc_seccomp_arch_amd64) {
803 cur_rule_arch = lxc_seccomp_arch_unknown;
804 continue;
805 }
806
807 cur_rule_arch = lxc_seccomp_arch_i386;
808 } else if (strcmp(line, "[x32]") == 0 ||
809 strcmp(line, "[X32]") == 0) {
810 if (native_arch != lxc_seccomp_arch_amd64) {
811 cur_rule_arch = lxc_seccomp_arch_unknown;
812 continue;
813 }
814
815 cur_rule_arch = lxc_seccomp_arch_x32;
816 } else if (strcmp(line, "[X86_64]") == 0 ||
817 strcmp(line, "[x86_64]") == 0) {
818 if (native_arch != lxc_seccomp_arch_amd64) {
819 cur_rule_arch = lxc_seccomp_arch_unknown;
820 continue;
821 }
822
823 cur_rule_arch = lxc_seccomp_arch_amd64;
824 } else if (strcmp(line, "[all]") == 0 ||
825 strcmp(line, "[ALL]") == 0) {
826 cur_rule_arch = lxc_seccomp_arch_all;
827 }
828 #ifdef SCMP_ARCH_ARM
829 else if (strcmp(line, "[arm]") == 0 ||
830 strcmp(line, "[ARM]") == 0) {
831 if (native_arch != lxc_seccomp_arch_arm &&
832 native_arch != lxc_seccomp_arch_arm64) {
833 cur_rule_arch = lxc_seccomp_arch_unknown;
834 continue;
835 }
836
837 cur_rule_arch = lxc_seccomp_arch_arm;
838 }
839 #endif
840 #ifdef SCMP_ARCH_AARCH64
841 else if (strcmp(line, "[arm64]") == 0 ||
842 strcmp(line, "[ARM64]") == 0) {
843 if (native_arch != lxc_seccomp_arch_arm64) {
844 cur_rule_arch = lxc_seccomp_arch_unknown;
845 continue;
846 }
847
848 cur_rule_arch = lxc_seccomp_arch_arm64;
849 }
850 #endif
851 #ifdef SCMP_ARCH_PPC64LE
852 else if (strcmp(line, "[ppc64le]") == 0 ||
853 strcmp(line, "[PPC64LE]") == 0) {
854 if (native_arch != lxc_seccomp_arch_ppc64le) {
855 cur_rule_arch = lxc_seccomp_arch_unknown;
856 continue;
857 }
858
859 cur_rule_arch = lxc_seccomp_arch_ppc64le;
860 }
861 #endif
862 #ifdef SCMP_ARCH_PPC64
863 else if (strcmp(line, "[ppc64]") == 0 ||
864 strcmp(line, "[PPC64]") == 0) {
865 if (native_arch != lxc_seccomp_arch_ppc64) {
866 cur_rule_arch = lxc_seccomp_arch_unknown;
867 continue;
868 }
869
870 cur_rule_arch = lxc_seccomp_arch_ppc64;
871 }
872 #endif
873 #ifdef SCMP_ARCH_PPC
874 else if (strcmp(line, "[ppc]") == 0 ||
875 strcmp(line, "[PPC]") == 0) {
876 if (native_arch != lxc_seccomp_arch_ppc &&
877 native_arch != lxc_seccomp_arch_ppc64) {
878 cur_rule_arch = lxc_seccomp_arch_unknown;
879 continue;
880 }
881
882 cur_rule_arch = lxc_seccomp_arch_ppc;
883 }
884 #endif
885 #ifdef SCMP_ARCH_MIPS
886 else if (strcmp(line, "[mips64]") == 0 ||
887 strcmp(line, "[MIPS64]") == 0) {
888 if (native_arch != lxc_seccomp_arch_mips64) {
889 cur_rule_arch = lxc_seccomp_arch_unknown;
890 continue;
891 }
892
893 cur_rule_arch = lxc_seccomp_arch_mips64;
894 } else if (strcmp(line, "[mips64n32]") == 0 ||
895 strcmp(line, "[MIPS64N32]") == 0) {
896 if (native_arch != lxc_seccomp_arch_mips64) {
897 cur_rule_arch = lxc_seccomp_arch_unknown;
898 continue;
899 }
900
901 cur_rule_arch = lxc_seccomp_arch_mips64n32;
902 } else if (strcmp(line, "[mips]") == 0 ||
903 strcmp(line, "[MIPS]") == 0) {
904 if (native_arch != lxc_seccomp_arch_mips &&
905 native_arch != lxc_seccomp_arch_mips64) {
906 cur_rule_arch = lxc_seccomp_arch_unknown;
907 continue;
908 }
909
910 cur_rule_arch = lxc_seccomp_arch_mips;
911 } else if (strcmp(line, "[mipsel64]") == 0 ||
912 strcmp(line, "[MIPSEL64]") == 0) {
913 if (native_arch != lxc_seccomp_arch_mipsel64) {
914 cur_rule_arch = lxc_seccomp_arch_unknown;
915 continue;
916 }
917
918 cur_rule_arch = lxc_seccomp_arch_mipsel64;
919 } else if (strcmp(line, "[mipsel64n32]") == 0 ||
920 strcmp(line, "[MIPSEL64N32]") == 0) {
921 if (native_arch != lxc_seccomp_arch_mipsel64) {
922 cur_rule_arch = lxc_seccomp_arch_unknown;
923 continue;
924 }
925
926 cur_rule_arch = lxc_seccomp_arch_mipsel64n32;
927 } else if (strcmp(line, "[mipsel]") == 0 ||
928 strcmp(line, "[MIPSEL]") == 0) {
929 if (native_arch != lxc_seccomp_arch_mipsel &&
930 native_arch != lxc_seccomp_arch_mipsel64) {
931 cur_rule_arch = lxc_seccomp_arch_unknown;
932 continue;
933 }
934
935 cur_rule_arch = lxc_seccomp_arch_mipsel;
936 }
937 #endif
938 #ifdef SCMP_ARCH_S390X
939 else if (strcmp(line, "[s390x]") == 0 ||
940 strcmp(line, "[S390X]") == 0) {
941 if (native_arch != lxc_seccomp_arch_s390x) {
942 cur_rule_arch = lxc_seccomp_arch_unknown;
943 continue;
944 }
945
946 cur_rule_arch = lxc_seccomp_arch_s390x;
947 }
948 #endif
949 #ifdef SCMP_ARCH_S390
950 else if (strcmp(line, "[s390]") == 0 ||
951 strcmp(line, "[S390]") == 0) {
952 if (native_arch != lxc_seccomp_arch_s390) {
953 cur_rule_arch = lxc_seccomp_arch_unknown;
954 continue;
955 }
956
957 cur_rule_arch = lxc_seccomp_arch_s390;
958 }
959 #endif
960 else {
961 goto bad_arch;
962 }
963
964 continue;
965 }
966
967 /* irrelevant arch - i.e. arm on i386 */
968 if (cur_rule_arch == lxc_seccomp_arch_unknown)
969 continue;
970
971 memset(&rule, 0, sizeof(rule));
972 /* read optional action which follows the syscall */
973 ret = parse_v2_rules(line, default_rule_action, &rule);
974 if (ret != 0) {
975 ERROR("Failed to interpret seccomp rule");
976 goto bad_rule;
977 }
978
979 #if HAVE_DECL_SECCOMP_NOTIFY_FD
980 if ((rule.action == SCMP_ACT_NOTIFY) &&
981 !conf->seccomp.notifier.wants_supervision) {
982 conf->seccomp.notifier.wants_supervision = true;
983 TRACE("Set SECCOMP_FILTER_FLAG_NEW_LISTENER attribute");
984 }
985 #endif
986
987 if (!do_resolve_add_rule(SCMP_ARCH_NATIVE, line,
988 conf->seccomp.seccomp_ctx, &rule))
989 goto bad_rule;
990
991 INFO("Added native rule for arch %d for %s action %d(%s)",
992 SCMP_ARCH_NATIVE, line, rule.action,
993 get_action_name(rule.action));
994
995 if (ctx.architectures[0] != SCMP_ARCH_NATIVE) {
996 if (!do_resolve_add_rule(ctx.architectures[0], line,
997 ctx.contexts[0], &rule))
998 goto bad_rule;
999
1000 INFO("Added compat rule for arch %d for %s action %d(%s)",
1001 ctx.architectures[0], line, rule.action,
1002 get_action_name(rule.action));
1003 }
1004
1005 if (ctx.architectures[1] != SCMP_ARCH_NATIVE) {
1006 if (!do_resolve_add_rule(ctx.architectures[1], line,
1007 ctx.contexts[1], &rule))
1008 goto bad_rule;
1009
1010 INFO("Added compat rule for arch %d for %s action %d(%s)",
1011 ctx.architectures[1], line, rule.action,
1012 get_action_name(rule.action));
1013 }
1014
1015 if (ctx.architectures[2] != SCMP_ARCH_NATIVE) {
1016 if (!do_resolve_add_rule(ctx.architectures[2], line,
1017 ctx.contexts[2], &rule))
1018 goto bad_rule;
1019
1020 INFO("Added native rule for arch %d for %s action %d(%s)",
1021 ctx.architectures[2], line, rule.action,
1022 get_action_name(rule.action));
1023 }
1024 }
1025
1026 INFO("Merging compat seccomp contexts into main context");
1027 if (ctx.contexts[0]) {
1028 if (ctx.needs_merge[0]) {
1029 ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[0]);
1030 if (ret < 0) {
1031 ERROR("Failed to merge first compat seccomp "
1032 "context into main context");
1033 goto bad;
1034 }
1035
1036 TRACE("Merged first compat seccomp context into main context");
1037 } else {
1038 seccomp_release(ctx.contexts[0]);
1039 ctx.contexts[0] = NULL;
1040 }
1041 }
1042
1043 if (ctx.contexts[1]) {
1044 if (ctx.needs_merge[1]) {
1045 ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[1]);
1046 if (ret < 0) {
1047 ERROR("Failed to merge first compat seccomp "
1048 "context into main context");
1049 goto bad;
1050 }
1051
1052 TRACE("Merged second compat seccomp context into main context");
1053 } else {
1054 seccomp_release(ctx.contexts[1]);
1055 ctx.contexts[1] = NULL;
1056 }
1057 }
1058
1059 if (ctx.contexts[2]) {
1060 if (ctx.needs_merge[2]) {
1061 ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[2]);
1062 if (ret < 0) {
1063 ERROR("Failed to merge third compat seccomp "
1064 "context into main context");
1065 goto bad;
1066 }
1067
1068 TRACE("Merged third compat seccomp context into main context");
1069 } else {
1070 seccomp_release(ctx.contexts[2]);
1071 ctx.contexts[2] = NULL;
1072 }
1073 }
1074
1075 free(line);
1076 return 0;
1077
1078 bad_arch:
1079 ERROR("Unsupported architecture \"%s\"", line);
1080
1081 bad_rule:
1082 bad:
1083 if (ctx.contexts[0])
1084 seccomp_release(ctx.contexts[0]);
1085
1086 if (ctx.contexts[1])
1087 seccomp_release(ctx.contexts[1]);
1088
1089 if (ctx.contexts[2])
1090 seccomp_release(ctx.contexts[2]);
1091
1092 free(line);
1093
1094 return -1;
1095 }
1096 #else /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
1097 static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
1098 {
1099 return -1;
1100 }
1101 #endif /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
1102
1103 /*
1104 * The first line of the config file has a policy language version
1105 * the second line has some directives
1106 * then comes policy subject to the directives
1107 * right now version must be '1' or '2'
1108 * the directives must include 'whitelist'(version == 1 or 2) or 'blacklist'
1109 * (version == 2) and can include 'debug' (though debug is not yet supported).
1110 */
1111 static int parse_config(FILE *f, struct lxc_conf *conf)
1112 {
1113 char *line = NULL;
1114 size_t line_bufsz = 0;
1115 int ret, version;
1116
1117 ret = fscanf(f, "%d\n", &version);
1118 if (ret != 1 || (version != 1 && version != 2)) {
1119 ERROR("Invalid version");
1120 return -1;
1121 }
1122
1123 if (getline(&line, &line_bufsz, f) == -1) {
1124 ERROR("Invalid config file");
1125 goto bad_line;
1126 }
1127
1128 if (version == 1 && !strstr(line, "whitelist")) {
1129 ERROR("Only whitelist policy is supported");
1130 goto bad_line;
1131 }
1132
1133 if (strstr(line, "debug")) {
1134 ERROR("Debug not yet implemented");
1135 goto bad_line;
1136 }
1137
1138 if (version == 1)
1139 return parse_config_v1(f, line, &line_bufsz, conf);
1140
1141 return parse_config_v2(f, line, &line_bufsz, conf);
1142
1143 bad_line:
1144 free(line);
1145 return -1;
1146 }
1147
1148 /*
1149 * use_seccomp: return true if we should try and apply a seccomp policy
1150 * if defined for the container.
1151 * This will return false if
1152 * 1. seccomp is not enabled in the kernel
1153 * 2. a seccomp policy is already enabled for this task
1154 */
1155 static bool use_seccomp(const struct lxc_conf *conf)
1156 {
1157 int ret, v;
1158 FILE *f;
1159 size_t line_bufsz = 0;
1160 char *line = NULL;
1161 bool already_enabled = false, found = false;
1162
1163 if (conf->seccomp.allow_nesting > 0)
1164 return true;
1165
1166 f = fopen("/proc/self/status", "r");
1167 if (!f)
1168 return true;
1169
1170 while (getline(&line, &line_bufsz, f) != -1) {
1171 if (strncmp(line, "Seccomp:", 8) == 0) {
1172 found = true;
1173
1174 ret = sscanf(line + 8, "%d", &v);
1175 if (ret == 1 && v != 0)
1176 already_enabled = true;
1177
1178 break;
1179 }
1180 }
1181 free(line);
1182 fclose(f);
1183
1184 if (!found) {
1185 INFO("Seccomp is not enabled in the kernel");
1186 return false;
1187 }
1188
1189 if (already_enabled) {
1190 INFO("Already seccomp-confined, not loading new policy");
1191 return false;
1192 }
1193
1194 return true;
1195 }
1196
1197 int lxc_read_seccomp_config(struct lxc_conf *conf)
1198 {
1199 int ret;
1200 FILE *f;
1201
1202 if (!conf->seccomp.seccomp)
1203 return 0;
1204
1205 if (!use_seccomp(conf))
1206 return 0;
1207
1208 #if HAVE_SCMP_FILTER_CTX
1209 /* XXX for debug, pass in SCMP_ACT_TRAP */
1210 conf->seccomp.seccomp_ctx = seccomp_init(SCMP_ACT_KILL);
1211 ret = !conf->seccomp.seccomp_ctx;
1212 #else
1213 ret = seccomp_init(SCMP_ACT_KILL) < 0;
1214 #endif
1215 if (ret) {
1216 ERROR("Failed initializing seccomp");
1217 return -1;
1218 }
1219
1220 /* turn off no-new-privs. We don't want it in lxc, and it breaks
1221 * with apparmor */
1222 #if HAVE_SCMP_FILTER_CTX
1223 ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0);
1224 #else
1225 ret = seccomp_attr_set(SCMP_FLTATR_CTL_NNP, 0);
1226 #endif
1227 if (ret < 0) {
1228 errno = -ret;
1229 SYSERROR("Failed to turn off no-new-privs");
1230 return -1;
1231 }
1232
1233 #ifdef SCMP_FLTATR_ATL_TSKIP
1234 ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1);
1235 if (ret < 0) {
1236 errno = -ret;
1237 SYSWARN("Failed to turn on seccomp nop-skip, continuing");
1238 }
1239 #endif
1240
1241 f = fopen(conf->seccomp.seccomp, "r");
1242 if (!f) {
1243 SYSERROR("Failed to open seccomp policy file %s", conf->seccomp.seccomp);
1244 return -1;
1245 }
1246
1247 ret = parse_config(f, conf);
1248 fclose(f);
1249
1250 return ret;
1251 }
1252
1253 int lxc_seccomp_load(struct lxc_conf *conf)
1254 {
1255 int ret;
1256
1257 if (!conf->seccomp.seccomp)
1258 return 0;
1259
1260 if (!use_seccomp(conf))
1261 return 0;
1262
1263 #if HAVE_SCMP_FILTER_CTX
1264 ret = seccomp_load(conf->seccomp.seccomp_ctx);
1265 #else
1266 ret = seccomp_load();
1267 #endif
1268 if (ret < 0) {
1269 errno = -ret;
1270 SYSERROR("Error loading the seccomp policy");
1271 return -1;
1272 }
1273
1274 /* After load seccomp filter into the kernel successfully, export the current seccomp
1275 * filter to log file */
1276 #if HAVE_SCMP_FILTER_CTX
1277 if ((lxc_log_get_level() <= LXC_LOG_LEVEL_TRACE ||
1278 conf->loglevel <= LXC_LOG_LEVEL_TRACE) &&
1279 lxc_log_fd >= 0) {
1280 ret = seccomp_export_pfc(conf->seccomp.seccomp_ctx, lxc_log_fd);
1281 /* Just give an warning when export error */
1282 if (ret < 0) {
1283 errno = -ret;
1284 SYSWARN("Failed to export seccomp filter to log file");
1285 }
1286 }
1287 #endif
1288
1289 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1290 if (conf->seccomp.notifier.wants_supervision) {
1291 ret = seccomp_notify_fd(conf->seccomp.seccomp_ctx);
1292 if (ret < 0) {
1293 errno = -ret;
1294 return -1;
1295 }
1296
1297 conf->seccomp.notifier.notify_fd = ret;
1298 TRACE("Retrieved new seccomp listener fd %d", ret);
1299 }
1300 #endif
1301
1302 return 0;
1303 }
1304
1305 void lxc_seccomp_free(struct lxc_seccomp *seccomp)
1306 {
1307 free_disarm(seccomp->seccomp);
1308
1309 #if HAVE_SCMP_FILTER_CTX
1310 if (seccomp->seccomp_ctx) {
1311 seccomp_release(seccomp->seccomp_ctx);
1312 seccomp->seccomp_ctx = NULL;
1313 }
1314 #endif
1315
1316 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1317 close_prot_errno_disarm(seccomp->notifier.notify_fd);
1318 close_prot_errno_disarm(seccomp->notifier.proxy_fd);
1319 seccomp_notify_free(seccomp->notifier.req_buf, seccomp->notifier.rsp_buf);
1320 seccomp->notifier.req_buf = NULL;
1321 seccomp->notifier.rsp_buf = NULL;
1322 #endif
1323 }
1324
1325 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1326 static int seccomp_notify_reconnect(struct lxc_handler *handler)
1327 {
1328 __do_close_prot_errno int notify_fd = -EBADF;
1329
1330 close_prot_errno_disarm(handler->conf->seccomp.notifier.proxy_fd);
1331
1332 notify_fd = lxc_unix_connect_type(
1333 &handler->conf->seccomp.notifier.proxy_addr, SOCK_SEQPACKET);
1334 if (notify_fd < 0) {
1335 SYSERROR("Failed to reconnect to seccomp proxy");
1336 return -1;
1337 }
1338
1339 /* 30 second timeout */
1340 if (lxc_socket_set_timeout(notify_fd, 30, 30)) {
1341 SYSERROR("Failed to set socket timeout");
1342 return -1;
1343 }
1344 handler->conf->seccomp.notifier.proxy_fd = move_fd(notify_fd);
1345 return 0;
1346 }
1347 #endif
1348
1349 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1350 static void seccomp_notify_default_answer(int fd, struct seccomp_notif *req,
1351 struct seccomp_notif_resp *resp,
1352 struct lxc_handler *handler)
1353 {
1354 resp->id = req->id;
1355 resp->error = -ENOSYS;
1356
1357 if (seccomp_notify_respond(fd, resp))
1358 SYSERROR("Failed to send default message to seccomp");
1359 }
1360 #endif
1361
1362 int seccomp_notify_handler(int fd, uint32_t events, void *data,
1363 struct lxc_epoll_descr *descr)
1364 {
1365
1366 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1367 __do_close_prot_errno int fd_pid = -EBADF;
1368 __do_close_prot_errno int fd_mem = -EBADF;
1369 int ret;
1370 ssize_t bytes;
1371 int send_fd_list[2];
1372 struct iovec iov[4];
1373 size_t iov_len, msg_base_size, msg_full_size;
1374 char mem_path[6 /* /proc/ */
1375 + INTTYPE_TO_STRLEN(int64_t)
1376 + 3 /* mem */
1377 + 1 /* \0 */];
1378 bool reconnected = false;
1379 struct lxc_handler *hdlr = data;
1380 struct lxc_conf *conf = hdlr->conf;
1381 struct seccomp_notif *req = conf->seccomp.notifier.req_buf;
1382 struct seccomp_notif_resp *resp = conf->seccomp.notifier.rsp_buf;
1383 int listener_proxy_fd = conf->seccomp.notifier.proxy_fd;
1384 struct seccomp_notify_proxy_msg msg = {0};
1385 char *cookie = conf->seccomp.notifier.cookie;
1386 uint64_t req_id;
1387
1388 ret = seccomp_notify_receive(fd, req);
1389 if (ret) {
1390 SYSERROR("Failed to read seccomp notification");
1391 goto out;
1392 }
1393
1394 if (listener_proxy_fd < 0) {
1395 ret = -1;
1396 /* Same condition as for the initial setup_proxy() */
1397 if (conf->seccomp.notifier.wants_supervision &&
1398 conf->seccomp.notifier.proxy_addr.sun_path[1] != '\0') {
1399 ret = seccomp_notify_reconnect(hdlr);
1400 }
1401 if (ret) {
1402 ERROR("No seccomp proxy registered");
1403 seccomp_notify_default_answer(fd, req, resp, hdlr);
1404 goto out;
1405 }
1406 listener_proxy_fd = conf->seccomp.notifier.proxy_fd;
1407 }
1408
1409 /* remember the ID in case we receive garbage from the proxy */
1410 resp->id = req_id = req->id;
1411
1412 snprintf(mem_path, sizeof(mem_path), "/proc/%d", req->pid);
1413 fd_pid = open(mem_path, O_RDONLY | O_DIRECTORY | O_CLOEXEC);
1414 if (fd_pid < 0) {
1415 seccomp_notify_default_answer(fd, req, resp, hdlr);
1416 SYSERROR("Failed to open process pidfd for seccomp notify request");
1417 goto out;
1418 }
1419
1420 snprintf(mem_path, sizeof(mem_path), "/proc/%d/mem", req->pid);
1421 fd_mem = open(mem_path, O_RDWR | O_CLOEXEC);
1422 if (fd_mem < 0) {
1423 seccomp_notify_default_answer(fd, req, resp, hdlr);
1424 SYSERROR("Failed to open process memory for seccomp notify request");
1425 goto out;
1426 }
1427
1428 /*
1429 * Make sure that the fd for /proc/<pid>/mem we just opened still
1430 * refers to the correct process's memory.
1431 */
1432 ret = seccomp_notify_id_valid(fd, req->id);
1433 if (ret < 0) {
1434 seccomp_notify_default_answer(fd, req, resp, hdlr);
1435 SYSERROR("Invalid seccomp notify request id");
1436 goto out;
1437 }
1438
1439 msg.monitor_pid = hdlr->monitor_pid;
1440 msg.init_pid = hdlr->pid;
1441 memcpy(&msg.sizes, &conf->seccomp.notifier.sizes, sizeof(msg.sizes));
1442
1443 msg_base_size = 0;
1444 iov[0].iov_base = &msg;
1445 msg_base_size += (iov[0].iov_len = sizeof(msg));
1446 iov[1].iov_base = req;
1447 msg_base_size += (iov[1].iov_len = msg.sizes.seccomp_notif);
1448 iov[2].iov_base = resp;
1449 msg_base_size += (iov[2].iov_len = msg.sizes.seccomp_notif_resp);
1450 msg_full_size = msg_base_size;
1451
1452 if (cookie) {
1453 size_t len = strlen(cookie);
1454
1455 msg.cookie_len = (uint64_t)len;
1456
1457 iov[3].iov_base = cookie;
1458 msg_full_size += (iov[3].iov_len = len);
1459
1460 iov_len = 4;
1461 } else {
1462 iov_len = 3;
1463 }
1464
1465 send_fd_list[0] = fd_pid;
1466 send_fd_list[1] = fd_mem;
1467
1468 retry:
1469 bytes = lxc_abstract_unix_send_fds_iov(listener_proxy_fd, send_fd_list,
1470 2, iov, iov_len);
1471 if (bytes != (ssize_t)msg_full_size) {
1472 SYSERROR("Failed to forward message to seccomp proxy");
1473 if (!reconnected) {
1474 ret = seccomp_notify_reconnect(hdlr);
1475 if (ret == 0) {
1476 reconnected = true;
1477 goto retry;
1478 }
1479 }
1480
1481 seccomp_notify_default_answer(fd, req, resp, hdlr);
1482 goto out;
1483 }
1484
1485 close_prot_errno_disarm(fd_mem);
1486
1487 if (msg.__reserved != 0) {
1488 ERROR("Proxy filled reserved data in response");
1489 seccomp_notify_default_answer(fd, req, resp, hdlr);
1490 goto out;
1491 }
1492
1493 if (resp->id != req_id) {
1494 resp->id = req_id;
1495 ERROR("Proxy returned response with illegal id");
1496 seccomp_notify_default_answer(fd, req, resp, hdlr);
1497 goto out;
1498 }
1499
1500 bytes = lxc_recvmsg_nointr_iov(listener_proxy_fd, iov, iov_len, MSG_TRUNC);
1501 if (bytes != (ssize_t)msg_base_size) {
1502 SYSERROR("Failed to receive message from seccomp proxy");
1503 seccomp_notify_default_answer(fd, req, resp, hdlr);
1504 goto out;
1505 }
1506
1507 ret = seccomp_notify_respond(fd, resp);
1508 if (ret)
1509 SYSERROR("Failed to send seccomp notification");
1510
1511 out:
1512 return 0;
1513 #else
1514 return -ENOSYS;
1515 #endif
1516 }
1517
1518 void seccomp_conf_init(struct lxc_conf *conf)
1519 {
1520 conf->seccomp.seccomp = NULL;
1521 #if HAVE_SCMP_FILTER_CTX
1522 conf->seccomp.allow_nesting = 0;
1523 memset(&conf->seccomp.seccomp_ctx, 0, sizeof(conf->seccomp.seccomp_ctx));
1524 #endif /* HAVE_SCMP_FILTER_CTX */
1525 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1526 conf->seccomp.notifier.wants_supervision = false;
1527 conf->seccomp.notifier.notify_fd = -EBADF;
1528 conf->seccomp.notifier.proxy_fd = -EBADF;
1529 memset(&conf->seccomp.notifier.proxy_addr, 0,
1530 sizeof(conf->seccomp.notifier.proxy_addr));
1531 conf->seccomp.notifier.req_buf = NULL;
1532 conf->seccomp.notifier.rsp_buf = NULL;
1533 #endif
1534 }
1535
1536 int lxc_seccomp_setup_proxy(struct lxc_seccomp *seccomp,
1537 struct lxc_epoll_descr *descr,
1538 struct lxc_handler *handler)
1539 {
1540 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1541 if (seccomp->notifier.wants_supervision &&
1542 seccomp->notifier.proxy_addr.sun_path[1] != '\0') {
1543 __do_close_prot_errno int notify_fd = -EBADF;
1544 int ret;
1545
1546 notify_fd = lxc_unix_connect_type(&seccomp->notifier.proxy_addr,
1547 SOCK_SEQPACKET);
1548 if (notify_fd < 0) {
1549 SYSERROR("Failed to connect to seccomp proxy");
1550 return -1;
1551 }
1552
1553 /* 30 second timeout */
1554 ret = lxc_socket_set_timeout(notify_fd, 30, 30);
1555 if (ret) {
1556 SYSERROR("Failed to set timeouts for seccomp proxy");
1557 return -1;
1558 }
1559
1560 ret = __seccomp(SECCOMP_GET_NOTIF_SIZES, 0,
1561 &seccomp->notifier.sizes);
1562 if (ret) {
1563 SYSERROR("Failed to query seccomp notify struct sizes");
1564 return -1;
1565 }
1566
1567 ret = seccomp_notify_alloc(&seccomp->notifier.req_buf,
1568 &seccomp->notifier.rsp_buf);
1569 if (ret) {
1570 ERROR("Failed to allocate seccomp notify request and response buffers");
1571 errno = ret;
1572 return -1;
1573 }
1574
1575 ret = lxc_mainloop_add_handler(descr,
1576 seccomp->notifier.notify_fd,
1577 seccomp_notify_handler, handler);
1578 if (ret < 0) {
1579 ERROR("Failed to add seccomp notify handler for %d to mainloop",
1580 notify_fd);
1581 return -1;
1582 }
1583
1584 seccomp->notifier.proxy_fd = move_fd(notify_fd);
1585 }
1586 #endif
1587 return 0;
1588 }
1589
1590 int lxc_seccomp_send_notifier_fd(struct lxc_seccomp *seccomp, int socket_fd)
1591 {
1592 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1593 if (seccomp->notifier.wants_supervision) {
1594 if (lxc_abstract_unix_send_fds(socket_fd,
1595 &seccomp->notifier.notify_fd, 1,
1596 NULL, 0) < 0)
1597 return -1;
1598 close_prot_errno_disarm(seccomp->notifier.notify_fd);
1599 }
1600 #endif
1601 return 0;
1602 }
1603
1604 int lxc_seccomp_recv_notifier_fd(struct lxc_seccomp *seccomp, int socket_fd)
1605 {
1606 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1607 if (seccomp->notifier.wants_supervision) {
1608 int ret;
1609
1610 ret = lxc_abstract_unix_recv_fds(socket_fd,
1611 &seccomp->notifier.notify_fd,
1612 1, NULL, 0);
1613 if (ret < 0)
1614 return -1;
1615 }
1616 #endif
1617 return 0;
1618 }
1619
1620 int lxc_seccomp_add_notifier(const char *name, const char *lxcpath,
1621 struct lxc_seccomp *seccomp)
1622 {
1623
1624 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1625 if (seccomp->notifier.wants_supervision) {
1626 int ret;
1627
1628 ret = lxc_cmd_seccomp_notify_add_listener(name, lxcpath,
1629 seccomp->notifier.notify_fd,
1630 -1, 0);
1631 close_prot_errno_disarm(seccomp->notifier.notify_fd);
1632 if (ret < 0)
1633 return -1;
1634 }
1635 #endif
1636 return 0;
1637 }