2 * lxc: linux Container library
4 * (C) Copyright Canonical, Inc. 2012
7 * Serge Hallyn <serge.hallyn@canonical.com>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
31 #include <sys/mount.h>
32 #include <sys/utsname.h>
38 #include "lxccontainer.h"
39 #include "lxcseccomp.h"
41 #include "memory_utils.h"
45 #define MIPS_ARCH_O32 lxc_seccomp_arch_mipsel
46 #define MIPS_ARCH_N64 lxc_seccomp_arch_mipsel64
48 #define MIPS_ARCH_O32 lxc_seccomp_arch_mips
49 #define MIPS_ARCH_N64 lxc_seccomp_arch_mips64
52 lxc_log_define(seccomp
, lxc
);
54 static int parse_config_v1(FILE *f
, char *line
, size_t *line_bufsz
, struct lxc_conf
*conf
)
58 while (getline(&line
, line_bufsz
, f
) != -1) {
61 ret
= sscanf(line
, "%d", &nr
);
67 #if HAVE_SCMP_FILTER_CTX
68 ret
= seccomp_rule_add(conf
->seccomp
.seccomp_ctx
, SCMP_ACT_ALLOW
, nr
, 0);
70 ret
= seccomp_rule_add(SCMP_ACT_ALLOW
, nr
, 0);
73 ERROR("Failed loading allow rule for %d", nr
);
82 #if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
83 static const char *get_action_name(uint32_t action
)
85 /* The upper 16 bits indicate the type of the seccomp action. */
86 switch (action
& 0xffff0000) {
93 case SCMP_ACT_ERRNO(0):
95 #if HAVE_DECL_SECCOMP_NOTIFY_FD
101 return "invalid action";
104 static uint32_t get_v2_default_action(char *line
)
106 uint32_t ret_action
= -1;
111 /* After 'whitelist' or 'blacklist' comes default behavior. */
112 if (strncmp(line
, "kill", 4) == 0) {
113 ret_action
= SCMP_ACT_KILL
;
114 } else if (strncmp(line
, "errno", 5) == 0) {
117 ret
= sscanf(line
+ 5, "%d", &e
);
119 ERROR("Failed to parse errno value from %s", line
);
123 ret_action
= SCMP_ACT_ERRNO(e
);
124 } else if (strncmp(line
, "allow", 5) == 0) {
125 ret_action
= SCMP_ACT_ALLOW
;
126 } else if (strncmp(line
, "trap", 4) == 0) {
127 ret_action
= SCMP_ACT_TRAP
;
128 #if HAVE_DECL_SECCOMP_NOTIFY_FD
129 } else if (strncmp(line
, "notify", 6) == 0) {
130 ret_action
= SCMP_ACT_NOTIFY
;
132 } else if (line
[0]) {
133 ERROR("Unrecognized seccomp action \"%s\"", line
);
140 static uint32_t get_v2_action(char *line
, uint32_t def_action
)
145 p
= strchr(line
, ' ');
153 if (!*p
|| *p
== '#')
156 ret
= get_v2_default_action(p
);
167 struct seccomp_v2_rule_args
{
171 enum scmp_compare op
;
174 struct seccomp_v2_rule
{
177 struct seccomp_v2_rule_args args_value
[6];
180 static enum scmp_compare
parse_v2_rule_op(char *s
)
182 if (strcmp(s
, "SCMP_CMP_NE") == 0 || strcmp(s
, "!=") == 0)
184 else if (strcmp(s
, "SCMP_CMP_LT") == 0 || strcmp(s
, "<") == 0)
186 else if (strcmp(s
, "SCMP_CMP_LE") == 0 || strcmp(s
, "<=") == 0)
188 else if (strcmp(s
, "SCMP_CMP_EQ") == 0 || strcmp(s
, "==") == 0)
190 else if (strcmp(s
, "SCMP_CMP_GE") == 0 || strcmp(s
, ">=") == 0)
192 else if (strcmp(s
, "SCMP_CMP_GT") == 0 || strcmp(s
, ">") == 0)
194 else if (strcmp(s
, "SCMP_CMP_MASKED_EQ") == 0 || strcmp(s
, "&=") == 0)
195 return SCMP_CMP_MASKED_EQ
;
197 return _SCMP_CMP_MAX
;
201 * This function is used to parse the args string into the structure.
202 * args string format:[index,value,op,mask] or [index,value,op]
203 * index: the index for syscall arguments (type uint)
204 * value: the value for syscall arguments (type uint64)
205 * op: the operator for syscall arguments(string),
206 a valid list of constants as of libseccomp v2.3.2 is
207 SCMP_CMP_NE,SCMP_CMP_LE,SCMP_CMP_LE, SCMP_CMP_EQ, SCMP_CMP_GE,
208 SCMP_CMP_GT, SCMP_CMP_MASKED_EQ, or !=,<=,==,>=,>,&=
209 * mask: the mask to apply on "value" for SCMP_CMP_MASKED_EQ (type uint64, optional)
210 * Returns 0 on success, < 0 otherwise.
212 static int get_seccomp_arg_value(char *key
, struct seccomp_v2_rule_args
*rule_args
)
216 uint64_t mask
= 0, value
= 0;
217 enum scmp_compare op
= 0;
219 char s
[31] = {0}, v
[24] = {0}, m
[24] = {'0'};
221 tmp
= strchr(key
, '[');
223 ERROR("Failed to interpret args");
227 ret
= sscanf(tmp
, "[%i,%23[^,],%30[^0-9^,],%23[^,]", &index
, v
, s
, m
);
228 if ((ret
!= 3 && ret
!= 4) || index
>= 6) {
229 ERROR("Failed to interpret args value");
233 ret
= lxc_safe_uint64(v
, &value
, 0);
235 ERROR("Invalid argument value");
239 ret
= lxc_safe_uint64(m
, &mask
, 0);
241 ERROR("Invalid argument mask");
245 op
= parse_v2_rule_op(s
);
246 if (op
== _SCMP_CMP_MAX
) {
247 ERROR("Failed to interpret args operator value");
251 rule_args
->index
= index
;
252 rule_args
->value
= value
;
253 rule_args
->mask
= mask
;
258 /* This function is used to parse the seccomp rule entry.
259 * @line : seccomp rule entry string.
260 * @def_action : default action used in the case if the 'line' contain non valid action.
261 * @rules : output struct.
262 * Returns 0 on success, < 0 otherwise.
264 static int parse_v2_rules(char *line
, uint32_t def_action
,
265 struct seccomp_v2_rule
*rules
)
268 char *key
= NULL
, *saveptr
= NULL
, *tmp
= NULL
;
274 /* read optional action which follows the syscall */
275 rules
->action
= get_v2_action(tmp
, def_action
);
276 if (rules
->action
== -1) {
277 ERROR("Failed to interpret action");
284 if (!strchr(tmp
, '['))
288 for ((key
= strtok_r(tmp
, "]", &saveptr
)), i
= 0; key
&& i
< 6;
289 (key
= strtok_r(NULL
, "]", &saveptr
)), i
++) {
290 ret
= get_seccomp_arg_value(key
, &rules
->args_value
[i
]);
306 #if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
307 enum lxc_hostarch_t
{
308 lxc_seccomp_arch_all
= 0,
309 lxc_seccomp_arch_native
,
310 lxc_seccomp_arch_i386
,
311 lxc_seccomp_arch_x32
,
312 lxc_seccomp_arch_amd64
,
313 lxc_seccomp_arch_arm
,
314 lxc_seccomp_arch_arm64
,
315 lxc_seccomp_arch_ppc64
,
316 lxc_seccomp_arch_ppc64le
,
317 lxc_seccomp_arch_ppc
,
318 lxc_seccomp_arch_mips
,
319 lxc_seccomp_arch_mips64
,
320 lxc_seccomp_arch_mips64n32
,
321 lxc_seccomp_arch_mipsel
,
322 lxc_seccomp_arch_mipsel64
,
323 lxc_seccomp_arch_mipsel64n32
,
324 lxc_seccomp_arch_s390x
,
325 lxc_seccomp_arch_unknown
= 999,
328 int get_hostarch(void)
331 if (uname(&uts
) < 0) {
332 SYSERROR("Failed to read host arch");
336 if (strcmp(uts
.machine
, "i686") == 0)
337 return lxc_seccomp_arch_i386
;
339 else if (strcmp(uts
.machine
, "x86_64") == 0)
340 return lxc_seccomp_arch_amd64
;
341 else if (strncmp(uts
.machine
, "armv7", 5) == 0)
342 return lxc_seccomp_arch_arm
;
343 else if (strncmp(uts
.machine
, "aarch64", 7) == 0)
344 return lxc_seccomp_arch_arm64
;
345 else if (strncmp(uts
.machine
, "ppc64le", 7) == 0)
346 return lxc_seccomp_arch_ppc64le
;
347 else if (strncmp(uts
.machine
, "ppc64", 5) == 0)
348 return lxc_seccomp_arch_ppc64
;
349 else if (strncmp(uts
.machine
, "ppc", 3) == 0)
350 return lxc_seccomp_arch_ppc
;
351 else if (strncmp(uts
.machine
, "mips64", 6) == 0)
352 return MIPS_ARCH_N64
;
353 else if (strncmp(uts
.machine
, "mips", 4) == 0)
354 return MIPS_ARCH_O32
;
355 else if (strncmp(uts
.machine
, "s390x", 5) == 0)
356 return lxc_seccomp_arch_s390x
;
358 return lxc_seccomp_arch_unknown
;
361 scmp_filter_ctx
get_new_ctx(enum lxc_hostarch_t n_arch
,
362 uint32_t default_policy_action
, bool *needs_merge
)
369 case lxc_seccomp_arch_i386
:
370 arch
= SCMP_ARCH_X86
;
372 case lxc_seccomp_arch_x32
:
373 arch
= SCMP_ARCH_X32
;
375 case lxc_seccomp_arch_amd64
:
376 arch
= SCMP_ARCH_X86_64
;
378 case lxc_seccomp_arch_arm
:
379 arch
= SCMP_ARCH_ARM
;
381 #ifdef SCMP_ARCH_AARCH64
382 case lxc_seccomp_arch_arm64
:
383 arch
= SCMP_ARCH_AARCH64
;
386 #ifdef SCMP_ARCH_PPC64LE
387 case lxc_seccomp_arch_ppc64le
:
388 arch
= SCMP_ARCH_PPC64LE
;
391 #ifdef SCMP_ARCH_PPC64
392 case lxc_seccomp_arch_ppc64
:
393 arch
= SCMP_ARCH_PPC64
;
397 case lxc_seccomp_arch_ppc
:
398 arch
= SCMP_ARCH_PPC
;
401 #ifdef SCMP_ARCH_MIPS
402 case lxc_seccomp_arch_mips
:
403 arch
= SCMP_ARCH_MIPS
;
405 case lxc_seccomp_arch_mips64
:
406 arch
= SCMP_ARCH_MIPS64
;
408 case lxc_seccomp_arch_mips64n32
:
409 arch
= SCMP_ARCH_MIPS64N32
;
411 case lxc_seccomp_arch_mipsel
:
412 arch
= SCMP_ARCH_MIPSEL
;
414 case lxc_seccomp_arch_mipsel64
:
415 arch
= SCMP_ARCH_MIPSEL64
;
417 case lxc_seccomp_arch_mipsel64n32
:
418 arch
= SCMP_ARCH_MIPSEL64N32
;
421 #ifdef SCMP_ARCH_S390X
422 case lxc_seccomp_arch_s390x
:
423 arch
= SCMP_ARCH_S390X
;
430 ctx
= seccomp_init(default_policy_action
);
432 ERROR("Error initializing seccomp context");
436 ret
= seccomp_attr_set(ctx
, SCMP_FLTATR_CTL_NNP
, 0);
439 SYSERROR("Failed to turn off no-new-privs");
440 seccomp_release(ctx
);
444 #ifdef SCMP_FLTATR_ATL_TSKIP
445 ret
= seccomp_attr_set(ctx
, SCMP_FLTATR_ATL_TSKIP
, 1);
448 SYSWARN("Failed to turn on seccomp nop-skip, continuing");
452 ret
= seccomp_arch_exist(ctx
, arch
);
454 if (ret
!= -EEXIST
) {
456 SYSERROR("Failed to determine whether arch %d is "
457 "already present in the main seccomp context",
459 seccomp_release(ctx
);
463 ret
= seccomp_arch_add(ctx
, arch
);
466 SYSERROR("Failed to add arch %d to main seccomp context",
468 seccomp_release(ctx
);
471 TRACE("Added arch %d to main seccomp context", (int)n_arch
);
473 ret
= seccomp_arch_remove(ctx
, SCMP_ARCH_NATIVE
);
475 ERROR("Failed to remove native arch from main seccomp context");
476 seccomp_release(ctx
);
479 TRACE("Removed native arch from main seccomp context");
483 *needs_merge
= false;
484 TRACE("Arch %d already present in main seccomp context", (int)n_arch
);
490 bool do_resolve_add_rule(uint32_t arch
, char *line
, scmp_filter_ctx ctx
,
491 struct seccomp_v2_rule
*rule
)
494 struct scmp_arg_cmp arg_cmp
[6];
496 ret
= seccomp_arch_exist(ctx
, arch
);
497 if (arch
&& ret
!= 0) {
499 SYSERROR("Seccomp: rule and context arch do not match (arch %d)", arch
);
503 /*get the syscall name*/
504 char *p
= strchr(line
, ' ');
508 if (strncmp(line
, "reject_force_umount", 19) == 0) {
509 ret
= seccomp_rule_add_exact(ctx
, SCMP_ACT_ERRNO(EACCES
),
510 SCMP_SYS(umount2
), 1,
511 SCMP_A1(SCMP_CMP_MASKED_EQ
, MNT_FORCE
, MNT_FORCE
));
514 SYSERROR("Failed loading rule to reject force umount");
518 INFO("Set seccomp rule to reject force umounts");
522 nr
= seccomp_syscall_resolve_name(line
);
523 if (nr
== __NR_SCMP_ERROR
) {
524 WARN("Failed to resolve syscall \"%s\"", line
);
525 WARN("This syscall will NOT be handled by seccomp");
530 WARN("Got negative return value %d for syscall \"%s\"", nr
, line
);
531 WARN("This syscall will NOT be handled by seccomp");
535 memset(&arg_cmp
, 0, sizeof(arg_cmp
));
536 for (i
= 0; i
< rule
->args_num
; i
++) {
537 INFO("arg_cmp[%d]: SCMP_CMP(%u, %llu, %llu, %llu)", i
,
538 rule
->args_value
[i
].index
,
539 (long long unsigned int)rule
->args_value
[i
].op
,
540 (long long unsigned int)rule
->args_value
[i
].mask
,
541 (long long unsigned int)rule
->args_value
[i
].value
);
543 if (SCMP_CMP_MASKED_EQ
== rule
->args_value
[i
].op
)
544 arg_cmp
[i
] = SCMP_CMP(rule
->args_value
[i
].index
,
545 rule
->args_value
[i
].op
,
546 rule
->args_value
[i
].mask
,
547 rule
->args_value
[i
].value
);
549 arg_cmp
[i
] = SCMP_CMP(rule
->args_value
[i
].index
,
550 rule
->args_value
[i
].op
,
551 rule
->args_value
[i
].value
);
554 ret
= seccomp_rule_add_exact_array(ctx
, rule
->action
, nr
,
555 rule
->args_num
, arg_cmp
);
558 SYSERROR("Failed loading rule for %s (nr %d action %d (%s))",
559 line
, nr
, rule
->action
, get_action_name(rule
->action
));
580 static int parse_config_v2(FILE *f
, char *line
, size_t *line_bufsz
, struct lxc_conf
*conf
)
584 enum lxc_hostarch_t cur_rule_arch
, native_arch
;
585 bool blacklist
= false;
586 uint32_t default_policy_action
= -1, default_rule_action
= -1;
587 struct seccomp_v2_rule rule
;
588 struct scmp_ctx_info
{
589 uint32_t architectures
[3];
590 scmp_filter_ctx contexts
[3];
594 if (strncmp(line
, "blacklist", 9) == 0)
596 else if (strncmp(line
, "whitelist", 9) != 0) {
597 ERROR("Bad seccomp policy style \"%s\"", line
);
601 p
= strchr(line
, ' ');
603 default_policy_action
= get_v2_default_action(p
+ 1);
604 if (default_policy_action
== -2)
608 /* for blacklist, allow any syscall which has no rule */
610 if (default_policy_action
== -1)
611 default_policy_action
= SCMP_ACT_ALLOW
;
613 if (default_rule_action
== -1)
614 default_rule_action
= SCMP_ACT_KILL
;
616 if (default_policy_action
== -1)
617 default_policy_action
= SCMP_ACT_KILL
;
619 if (default_rule_action
== -1)
620 default_rule_action
= SCMP_ACT_ALLOW
;
623 memset(&ctx
, 0, sizeof(ctx
));
624 ctx
.architectures
[0] = SCMP_ARCH_NATIVE
;
625 ctx
.architectures
[1] = SCMP_ARCH_NATIVE
;
626 ctx
.architectures
[2] = SCMP_ARCH_NATIVE
;
627 native_arch
= get_hostarch();
628 cur_rule_arch
= native_arch
;
629 if (native_arch
== lxc_seccomp_arch_amd64
) {
630 cur_rule_arch
= lxc_seccomp_arch_all
;
632 ctx
.architectures
[0] = SCMP_ARCH_X86
;
633 ctx
.contexts
[0] = get_new_ctx(lxc_seccomp_arch_i386
,
634 default_policy_action
,
635 &ctx
.needs_merge
[0]);
636 if (!ctx
.contexts
[0])
639 ctx
.architectures
[1] = SCMP_ARCH_X32
;
640 ctx
.contexts
[1] = get_new_ctx(lxc_seccomp_arch_x32
,
641 default_policy_action
,
642 &ctx
.needs_merge
[1]);
643 if (!ctx
.contexts
[1])
646 ctx
.architectures
[2] = SCMP_ARCH_X86_64
;
647 ctx
.contexts
[2] = get_new_ctx(lxc_seccomp_arch_amd64
,
648 default_policy_action
,
649 &ctx
.needs_merge
[2]);
650 if (!ctx
.contexts
[2])
653 } else if (native_arch
== lxc_seccomp_arch_ppc64
) {
654 cur_rule_arch
= lxc_seccomp_arch_all
;
656 ctx
.architectures
[0] = SCMP_ARCH_PPC
;
657 ctx
.contexts
[0] = get_new_ctx(lxc_seccomp_arch_ppc
,
658 default_policy_action
,
659 &ctx
.needs_merge
[0]);
660 if (!ctx
.contexts
[0])
663 ctx
.architectures
[2] = SCMP_ARCH_PPC64
;
664 ctx
.contexts
[2] = get_new_ctx(lxc_seccomp_arch_ppc64
,
665 default_policy_action
,
666 &ctx
.needs_merge
[2]);
667 if (!ctx
.contexts
[2])
671 } else if (native_arch
== lxc_seccomp_arch_arm64
) {
672 cur_rule_arch
= lxc_seccomp_arch_all
;
674 ctx
.architectures
[0] = SCMP_ARCH_ARM
;
675 ctx
.contexts
[0] = get_new_ctx(lxc_seccomp_arch_arm
,
676 default_policy_action
,
677 &ctx
.needs_merge
[0]);
678 if (!ctx
.contexts
[0])
681 #ifdef SCMP_ARCH_AARCH64
682 ctx
.architectures
[2] = SCMP_ARCH_AARCH64
;
683 ctx
.contexts
[2] = get_new_ctx(lxc_seccomp_arch_arm64
,
684 default_policy_action
,
685 &ctx
.needs_merge
[2]);
686 if (!ctx
.contexts
[2])
690 #ifdef SCMP_ARCH_MIPS
691 } else if (native_arch
== lxc_seccomp_arch_mips64
) {
692 cur_rule_arch
= lxc_seccomp_arch_all
;
694 ctx
.architectures
[0] = SCMP_ARCH_MIPS
;
695 ctx
.contexts
[0] = get_new_ctx(lxc_seccomp_arch_mips
,
696 default_policy_action
,
697 &ctx
.needs_merge
[0]);
698 if (!ctx
.contexts
[0])
701 ctx
.architectures
[1] = SCMP_ARCH_MIPS64N32
;
702 ctx
.contexts
[1] = get_new_ctx(lxc_seccomp_arch_mips64n32
,
703 default_policy_action
,
704 &ctx
.needs_merge
[1]);
705 if (!ctx
.contexts
[1])
708 ctx
.architectures
[2] = SCMP_ARCH_MIPS64
;
709 ctx
.contexts
[2] = get_new_ctx(lxc_seccomp_arch_mips64
,
710 default_policy_action
,
711 &ctx
.needs_merge
[2]);
712 if (!ctx
.contexts
[2])
714 } else if (native_arch
== lxc_seccomp_arch_mipsel64
) {
715 cur_rule_arch
= lxc_seccomp_arch_all
;
717 ctx
.architectures
[0] = SCMP_ARCH_MIPSEL
;
718 ctx
.contexts
[0] = get_new_ctx(lxc_seccomp_arch_mipsel
,
719 default_policy_action
,
720 &ctx
.needs_merge
[0]);
721 if (!ctx
.contexts
[0])
724 ctx
.architectures
[1] = SCMP_ARCH_MIPSEL64N32
;
725 ctx
.contexts
[1] = get_new_ctx(lxc_seccomp_arch_mipsel64n32
,
726 default_policy_action
,
727 &ctx
.needs_merge
[1]);
728 if (!ctx
.contexts
[1])
731 ctx
.architectures
[2] = SCMP_ARCH_MIPSEL64
;
732 ctx
.contexts
[2] = get_new_ctx(lxc_seccomp_arch_mipsel64
,
733 default_policy_action
,
734 &ctx
.needs_merge
[2]);
735 if (!ctx
.contexts
[2])
740 if (default_policy_action
!= SCMP_ACT_KILL
) {
741 ret
= seccomp_reset(conf
->seccomp
.seccomp_ctx
, default_policy_action
);
743 ERROR("Error re-initializing Seccomp");
747 ret
= seccomp_attr_set(conf
->seccomp
.seccomp_ctx
, SCMP_FLTATR_CTL_NNP
, 0);
750 SYSERROR("Failed to turn off no-new-privs");
754 #ifdef SCMP_FLTATR_ATL_TSKIP
755 ret
= seccomp_attr_set(conf
->seccomp
.seccomp_ctx
, SCMP_FLTATR_ATL_TSKIP
, 1);
758 SYSWARN("Failed to turn on seccomp nop-skip, continuing");
763 while (getline(&line
, line_bufsz
, f
) != -1) {
770 remove_trailing_newlines(line
);
772 INFO("Processing \"%s\"", line
);
773 if (line
[0] == '[') {
774 /* Read the architecture for next set of rules. */
775 if (strcmp(line
, "[x86]") == 0 ||
776 strcmp(line
, "[X86]") == 0) {
777 if (native_arch
!= lxc_seccomp_arch_i386
&&
778 native_arch
!= lxc_seccomp_arch_amd64
) {
779 cur_rule_arch
= lxc_seccomp_arch_unknown
;
783 cur_rule_arch
= lxc_seccomp_arch_i386
;
784 } else if (strcmp(line
, "[x32]") == 0 ||
785 strcmp(line
, "[X32]") == 0) {
786 if (native_arch
!= lxc_seccomp_arch_amd64
) {
787 cur_rule_arch
= lxc_seccomp_arch_unknown
;
791 cur_rule_arch
= lxc_seccomp_arch_x32
;
792 } else if (strcmp(line
, "[X86_64]") == 0 ||
793 strcmp(line
, "[x86_64]") == 0) {
794 if (native_arch
!= lxc_seccomp_arch_amd64
) {
795 cur_rule_arch
= lxc_seccomp_arch_unknown
;
799 cur_rule_arch
= lxc_seccomp_arch_amd64
;
800 } else if (strcmp(line
, "[all]") == 0 ||
801 strcmp(line
, "[ALL]") == 0) {
802 cur_rule_arch
= lxc_seccomp_arch_all
;
805 else if (strcmp(line
, "[arm]") == 0 ||
806 strcmp(line
, "[ARM]") == 0) {
807 if (native_arch
!= lxc_seccomp_arch_arm
&&
808 native_arch
!= lxc_seccomp_arch_arm64
) {
809 cur_rule_arch
= lxc_seccomp_arch_unknown
;
813 cur_rule_arch
= lxc_seccomp_arch_arm
;
816 #ifdef SCMP_ARCH_AARCH64
817 else if (strcmp(line
, "[arm64]") == 0 ||
818 strcmp(line
, "[ARM64]") == 0) {
819 if (native_arch
!= lxc_seccomp_arch_arm64
) {
820 cur_rule_arch
= lxc_seccomp_arch_unknown
;
824 cur_rule_arch
= lxc_seccomp_arch_arm64
;
827 #ifdef SCMP_ARCH_PPC64LE
828 else if (strcmp(line
, "[ppc64le]") == 0 ||
829 strcmp(line
, "[PPC64LE]") == 0) {
830 if (native_arch
!= lxc_seccomp_arch_ppc64le
) {
831 cur_rule_arch
= lxc_seccomp_arch_unknown
;
835 cur_rule_arch
= lxc_seccomp_arch_ppc64le
;
838 #ifdef SCMP_ARCH_PPC64
839 else if (strcmp(line
, "[ppc64]") == 0 ||
840 strcmp(line
, "[PPC64]") == 0) {
841 if (native_arch
!= lxc_seccomp_arch_ppc64
) {
842 cur_rule_arch
= lxc_seccomp_arch_unknown
;
846 cur_rule_arch
= lxc_seccomp_arch_ppc64
;
850 else if (strcmp(line
, "[ppc]") == 0 ||
851 strcmp(line
, "[PPC]") == 0) {
852 if (native_arch
!= lxc_seccomp_arch_ppc
&&
853 native_arch
!= lxc_seccomp_arch_ppc64
) {
854 cur_rule_arch
= lxc_seccomp_arch_unknown
;
858 cur_rule_arch
= lxc_seccomp_arch_ppc
;
861 #ifdef SCMP_ARCH_MIPS
862 else if (strcmp(line
, "[mips64]") == 0 ||
863 strcmp(line
, "[MIPS64]") == 0) {
864 if (native_arch
!= lxc_seccomp_arch_mips64
) {
865 cur_rule_arch
= lxc_seccomp_arch_unknown
;
869 cur_rule_arch
= lxc_seccomp_arch_mips64
;
870 } else if (strcmp(line
, "[mips64n32]") == 0 ||
871 strcmp(line
, "[MIPS64N32]") == 0) {
872 if (native_arch
!= lxc_seccomp_arch_mips64
) {
873 cur_rule_arch
= lxc_seccomp_arch_unknown
;
877 cur_rule_arch
= lxc_seccomp_arch_mips64n32
;
878 } else if (strcmp(line
, "[mips]") == 0 ||
879 strcmp(line
, "[MIPS]") == 0) {
880 if (native_arch
!= lxc_seccomp_arch_mips
&&
881 native_arch
!= lxc_seccomp_arch_mips64
) {
882 cur_rule_arch
= lxc_seccomp_arch_unknown
;
886 cur_rule_arch
= lxc_seccomp_arch_mips
;
887 } else if (strcmp(line
, "[mipsel64]") == 0 ||
888 strcmp(line
, "[MIPSEL64]") == 0) {
889 if (native_arch
!= lxc_seccomp_arch_mipsel64
) {
890 cur_rule_arch
= lxc_seccomp_arch_unknown
;
894 cur_rule_arch
= lxc_seccomp_arch_mipsel64
;
895 } else if (strcmp(line
, "[mipsel64n32]") == 0 ||
896 strcmp(line
, "[MIPSEL64N32]") == 0) {
897 if (native_arch
!= lxc_seccomp_arch_mipsel64
) {
898 cur_rule_arch
= lxc_seccomp_arch_unknown
;
902 cur_rule_arch
= lxc_seccomp_arch_mipsel64n32
;
903 } else if (strcmp(line
, "[mipsel]") == 0 ||
904 strcmp(line
, "[MIPSEL]") == 0) {
905 if (native_arch
!= lxc_seccomp_arch_mipsel
&&
906 native_arch
!= lxc_seccomp_arch_mipsel64
) {
907 cur_rule_arch
= lxc_seccomp_arch_unknown
;
911 cur_rule_arch
= lxc_seccomp_arch_mipsel
;
914 #ifdef SCMP_ARCH_S390X
915 else if (strcmp(line
, "[s390x]") == 0 ||
916 strcmp(line
, "[S390X]") == 0) {
917 if (native_arch
!= lxc_seccomp_arch_s390x
) {
918 cur_rule_arch
= lxc_seccomp_arch_unknown
;
922 cur_rule_arch
= lxc_seccomp_arch_s390x
;
932 /* irrelevant arch - i.e. arm on i386 */
933 if (cur_rule_arch
== lxc_seccomp_arch_unknown
)
936 memset(&rule
, 0, sizeof(rule
));
937 /* read optional action which follows the syscall */
938 ret
= parse_v2_rules(line
, default_rule_action
, &rule
);
940 ERROR("Failed to interpret seccomp rule");
944 #if HAVE_DECL_SECCOMP_NOTIFY_FD
945 if ((rule
.action
== SCMP_ACT_NOTIFY
) &&
946 !conf
->seccomp
.notifier
.wants_supervision
) {
947 ret
= seccomp_attr_set(conf
->seccomp
.seccomp_ctx
,
948 SCMP_FLTATR_NEW_LISTENER
, 1);
952 conf
->seccomp
.notifier
.wants_supervision
= true;
953 TRACE("Set SCMP_FLTATR_NEW_LISTENER attribute");
957 if (!do_resolve_add_rule(SCMP_ARCH_NATIVE
, line
,
958 conf
->seccomp
.seccomp_ctx
, &rule
))
961 INFO("Added native rule for arch %d for %s action %d(%s)",
962 SCMP_ARCH_NATIVE
, line
, rule
.action
,
963 get_action_name(rule
.action
));
965 if (ctx
.architectures
[0] != SCMP_ARCH_NATIVE
) {
966 if (!do_resolve_add_rule(ctx
.architectures
[0], line
,
967 ctx
.contexts
[0], &rule
))
970 INFO("Added compat rule for arch %d for %s action %d(%s)",
971 ctx
.architectures
[0], line
, rule
.action
,
972 get_action_name(rule
.action
));
975 if (ctx
.architectures
[1] != SCMP_ARCH_NATIVE
) {
976 if (!do_resolve_add_rule(ctx
.architectures
[1], line
,
977 ctx
.contexts
[1], &rule
))
980 INFO("Added compat rule for arch %d for %s action %d(%s)",
981 ctx
.architectures
[1], line
, rule
.action
,
982 get_action_name(rule
.action
));
985 if (ctx
.architectures
[2] != SCMP_ARCH_NATIVE
) {
986 if (!do_resolve_add_rule(ctx
.architectures
[2], line
,
987 ctx
.contexts
[2], &rule
))
990 INFO("Added native rule for arch %d for %s action %d(%s)",
991 ctx
.architectures
[2], line
, rule
.action
,
992 get_action_name(rule
.action
));
996 INFO("Merging compat seccomp contexts into main context");
997 if (ctx
.contexts
[0]) {
998 if (ctx
.needs_merge
[0]) {
999 ret
= seccomp_merge(conf
->seccomp
.seccomp_ctx
, ctx
.contexts
[0]);
1001 ERROR("Failed to merge first compat seccomp "
1002 "context into main context");
1006 TRACE("Merged first compat seccomp context into main context");
1008 seccomp_release(ctx
.contexts
[0]);
1009 ctx
.contexts
[0] = NULL
;
1013 if (ctx
.contexts
[1]) {
1014 if (ctx
.needs_merge
[1]) {
1015 ret
= seccomp_merge(conf
->seccomp
.seccomp_ctx
, ctx
.contexts
[1]);
1017 ERROR("Failed to merge first compat seccomp "
1018 "context into main context");
1022 TRACE("Merged second compat seccomp context into main context");
1024 seccomp_release(ctx
.contexts
[1]);
1025 ctx
.contexts
[1] = NULL
;
1029 if (ctx
.contexts
[2]) {
1030 if (ctx
.needs_merge
[2]) {
1031 ret
= seccomp_merge(conf
->seccomp
.seccomp_ctx
, ctx
.contexts
[2]);
1033 ERROR("Failed to merge third compat seccomp "
1034 "context into main context");
1038 TRACE("Merged third compat seccomp context into main context");
1040 seccomp_release(ctx
.contexts
[2]);
1041 ctx
.contexts
[2] = NULL
;
1049 ERROR("Unsupported architecture \"%s\"", line
);
1053 if (ctx
.contexts
[0])
1054 seccomp_release(ctx
.contexts
[0]);
1056 if (ctx
.contexts
[1])
1057 seccomp_release(ctx
.contexts
[1]);
1059 if (ctx
.contexts
[2])
1060 seccomp_release(ctx
.contexts
[2]);
1066 #else /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
1067 static int parse_config_v2(FILE *f
, char *line
, struct lxc_conf
*conf
)
1071 #endif /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
1074 * The first line of the config file has a policy language version
1075 * the second line has some directives
1076 * then comes policy subject to the directives
1077 * right now version must be '1' or '2'
1078 * the directives must include 'whitelist'(version == 1 or 2) or 'blacklist'
1079 * (version == 2) and can include 'debug' (though debug is not yet supported).
1081 static int parse_config(FILE *f
, struct lxc_conf
*conf
)
1084 size_t line_bufsz
= 0;
1087 ret
= fscanf(f
, "%d\n", &version
);
1088 if (ret
!= 1 || (version
!= 1 && version
!= 2)) {
1089 ERROR("Invalid version");
1093 if (getline(&line
, &line_bufsz
, f
) == -1) {
1094 ERROR("Invalid config file");
1098 if (version
== 1 && !strstr(line
, "whitelist")) {
1099 ERROR("Only whitelist policy is supported");
1103 if (strstr(line
, "debug")) {
1104 ERROR("Debug not yet implemented");
1109 return parse_config_v1(f
, line
, &line_bufsz
, conf
);
1111 return parse_config_v2(f
, line
, &line_bufsz
, conf
);
1119 * use_seccomp: return true if we should try and apply a seccomp policy
1120 * if defined for the container.
1121 * This will return false if
1122 * 1. seccomp is not enabled in the kernel
1123 * 2. a seccomp policy is already enabled for this task
1125 static bool use_seccomp(const struct lxc_conf
*conf
)
1129 size_t line_bufsz
= 0;
1131 bool already_enabled
= false, found
= false;
1133 if (conf
->seccomp
.allow_nesting
> 0)
1136 f
= fopen("/proc/self/status", "r");
1140 while (getline(&line
, &line_bufsz
, f
) != -1) {
1141 if (strncmp(line
, "Seccomp:", 8) == 0) {
1144 ret
= sscanf(line
+ 8, "%d", &v
);
1145 if (ret
== 1 && v
!= 0)
1146 already_enabled
= true;
1155 INFO("Seccomp is not enabled in the kernel");
1159 if (already_enabled
) {
1160 INFO("Already seccomp-confined, not loading new policy");
1167 int lxc_read_seccomp_config(struct lxc_conf
*conf
)
1172 if (!conf
->seccomp
.seccomp
)
1175 if (!use_seccomp(conf
))
1178 #if HAVE_SCMP_FILTER_CTX
1179 /* XXX for debug, pass in SCMP_ACT_TRAP */
1180 conf
->seccomp
.seccomp_ctx
= seccomp_init(SCMP_ACT_KILL
);
1181 ret
= !conf
->seccomp
.seccomp_ctx
;
1183 ret
= seccomp_init(SCMP_ACT_KILL
) < 0;
1186 ERROR("Failed initializing seccomp");
1190 /* turn off no-new-privs. We don't want it in lxc, and it breaks
1192 #if HAVE_SCMP_FILTER_CTX
1193 ret
= seccomp_attr_set(conf
->seccomp
.seccomp_ctx
, SCMP_FLTATR_CTL_NNP
, 0);
1195 ret
= seccomp_attr_set(SCMP_FLTATR_CTL_NNP
, 0);
1199 SYSERROR("Failed to turn off no-new-privs");
1203 #ifdef SCMP_FLTATR_ATL_TSKIP
1204 ret
= seccomp_attr_set(conf
->seccomp
.seccomp_ctx
, SCMP_FLTATR_ATL_TSKIP
, 1);
1207 SYSWARN("Failed to turn on seccomp nop-skip, continuing");
1211 f
= fopen(conf
->seccomp
.seccomp
, "r");
1213 SYSERROR("Failed to open seccomp policy file %s", conf
->seccomp
.seccomp
);
1217 ret
= parse_config(f
, conf
);
1223 int lxc_seccomp_load(struct lxc_conf
*conf
)
1227 if (!conf
->seccomp
.seccomp
)
1230 if (!use_seccomp(conf
))
1233 #if HAVE_SCMP_FILTER_CTX
1234 ret
= seccomp_load(conf
->seccomp
.seccomp_ctx
);
1236 ret
= seccomp_load();
1240 SYSERROR("Error loading the seccomp policy");
1244 /* After load seccomp filter into the kernel successfully, export the current seccomp
1245 * filter to log file */
1246 #if HAVE_SCMP_FILTER_CTX
1247 if ((lxc_log_get_level() <= LXC_LOG_LEVEL_TRACE
||
1248 conf
->loglevel
<= LXC_LOG_LEVEL_TRACE
) &&
1250 ret
= seccomp_export_pfc(conf
->seccomp
.seccomp_ctx
, lxc_log_fd
);
1251 /* Just give an warning when export error */
1254 SYSWARN("Failed to export seccomp filter to log file");
1259 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1260 if (conf
->seccomp
.notifier
.wants_supervision
) {
1261 ret
= seccomp_notify_fd(conf
->seccomp
.seccomp_ctx
);
1267 conf
->seccomp
.notifier
.notify_fd
= ret
;
1268 TRACE("Retrieved new seccomp listener fd %d", ret
);
1275 void lxc_seccomp_free(struct lxc_seccomp
*seccomp
)
1277 free_disarm(seccomp
->seccomp
);
1279 #if HAVE_SCMP_FILTER_CTX
1280 if (seccomp
->seccomp_ctx
) {
1281 seccomp_release(seccomp
->seccomp_ctx
);
1282 seccomp
->seccomp_ctx
= NULL
;
1286 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1287 close_prot_errno_disarm(seccomp
->notifier
.notify_fd
);
1288 close_prot_errno_disarm(seccomp
->notifier
.proxy_fd
);
1289 seccomp_notify_free(seccomp
->notifier
.req_buf
, seccomp
->notifier
.rsp_buf
);
1290 seccomp
->notifier
.req_buf
= NULL
;
1291 seccomp
->notifier
.rsp_buf
= NULL
;
1295 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1296 static int seccomp_notify_reconnect(struct lxc_handler
*handler
)
1298 __do_close_prot_errno
int notify_fd
= -EBADF
;
1300 close_prot_errno_disarm(handler
->conf
->seccomp
.notifier
.proxy_fd
);
1302 notify_fd
= lxc_unix_connect(&handler
->conf
->seccomp
.notifier
.proxy_addr
);
1303 if (notify_fd
< 0) {
1304 SYSERROR("Failed to reconnect to seccomp proxy");
1308 /* 30 second timeout */
1309 if (lxc_socket_set_timeout(notify_fd
, 30, 30)) {
1310 SYSERROR("Failed to set socket timeout");
1313 handler
->conf
->seccomp
.notifier
.proxy_fd
= move_fd(notify_fd
);
1318 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1319 static int seccomp_notify_default_answer(int fd
, struct seccomp_notif
*req
,
1320 struct seccomp_notif_resp
*resp
,
1321 struct lxc_handler
*handler
)
1324 resp
->error
= -ENOSYS
;
1326 if (seccomp_notify_respond(fd
, resp
))
1327 SYSERROR("Failed to send default message to seccomp");
1329 return seccomp_notify_reconnect(handler
);
1333 int seccomp_notify_handler(int fd
, uint32_t events
, void *data
,
1334 struct lxc_epoll_descr
*descr
)
1337 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1338 __do_close_prot_errno
int fd_mem
= -EBADF
;
1339 int reconnect_count
, ret
;
1341 char mem_path
[6 /* /proc/ */
1342 + INTTYPE_TO_STRLEN(int64_t)
1345 struct lxc_handler
*hdlr
= data
;
1346 struct lxc_conf
*conf
= hdlr
->conf
;
1347 struct seccomp_notif
*req
= conf
->seccomp
.notifier
.req_buf
;
1348 struct seccomp_notif_resp
*resp
= conf
->seccomp
.notifier
.rsp_buf
;
1349 int listener_proxy_fd
= conf
->seccomp
.notifier
.proxy_fd
;
1350 struct seccomp_notify_proxy_msg msg
= {0};
1352 if (listener_proxy_fd
< 0) {
1353 ERROR("No seccomp proxy registered");
1354 return minus_one_set_errno(EINVAL
);
1357 ret
= seccomp_notify_receive(fd
, req
);
1359 SYSERROR("Failed to read seccomp notification");
1363 snprintf(mem_path
, sizeof(mem_path
), "/proc/%d/mem", req
->pid
);
1364 fd_mem
= open(mem_path
, O_RDONLY
| O_CLOEXEC
);
1366 (void)seccomp_notify_default_answer(fd
, req
, resp
, hdlr
);
1367 SYSERROR("Failed to open process memory for seccomp notify request");
1372 * Make sure that the fd for /proc/<pid>/mem we just opened still
1373 * refers to the correct process's memory.
1375 ret
= seccomp_notify_id_valid(fd
, req
->id
);
1377 (void)seccomp_notify_default_answer(fd
, req
, resp
, hdlr
);
1378 SYSERROR("Invalid seccomp notify request id");
1382 memcpy(&msg
.req
, req
, sizeof(msg
.req
));
1383 msg
.monitor_pid
= hdlr
->monitor_pid
;
1384 msg
.init_pid
= hdlr
->pid
;
1386 reconnect_count
= 0;
1388 bytes
= lxc_unix_send_fds(listener_proxy_fd
, &fd_mem
, 1, &msg
,
1390 if (bytes
!= (ssize_t
)sizeof(msg
)) {
1391 SYSERROR("Failed to forward message to seccomp proxy");
1392 if (seccomp_notify_default_answer(fd
, req
, resp
, hdlr
))
1395 } while (reconnect_count
++);
1397 close_prot_errno_disarm(fd_mem
);
1399 reconnect_count
= 0;
1401 bytes
= lxc_recv_nointr(listener_proxy_fd
, &msg
, sizeof(msg
), 0);
1402 if (bytes
!= (ssize_t
)sizeof(msg
)) {
1403 SYSERROR("Failed to receive message from seccomp proxy");
1404 if (seccomp_notify_default_answer(fd
, req
, resp
, hdlr
))
1407 } while (reconnect_count
++);
1409 memcpy(resp
, &msg
.resp
, sizeof(*resp
));
1410 ret
= seccomp_notify_respond(fd
, resp
);
1412 SYSERROR("Failed to send seccomp notification");
1421 void seccomp_conf_init(struct lxc_conf
*conf
)
1423 conf
->seccomp
.seccomp
= NULL
;
1424 #if HAVE_SCMP_FILTER_CTX
1425 conf
->seccomp
.allow_nesting
= 0;
1426 memset(&conf
->seccomp
.seccomp_ctx
, 0, sizeof(conf
->seccomp
.seccomp_ctx
));
1427 #endif /* HAVE_SCMP_FILTER_CTX */
1428 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1429 conf
->seccomp
.notifier
.wants_supervision
= false;
1430 conf
->seccomp
.notifier
.notify_fd
= -EBADF
;
1431 conf
->seccomp
.notifier
.proxy_fd
= -EBADF
;
1432 memset(&conf
->seccomp
.notifier
.proxy_addr
, 0,
1433 sizeof(conf
->seccomp
.notifier
.proxy_addr
));
1434 conf
->seccomp
.notifier
.req_buf
= NULL
;
1435 conf
->seccomp
.notifier
.rsp_buf
= NULL
;
1439 int lxc_seccomp_setup_proxy(struct lxc_seccomp
*seccomp
,
1440 struct lxc_epoll_descr
*descr
,
1441 struct lxc_handler
*handler
)
1443 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1444 if (seccomp
->notifier
.wants_supervision
&&
1445 seccomp
->notifier
.proxy_addr
.sun_path
[1] != '\0') {
1446 __do_close_prot_errno
int notify_fd
= -EBADF
;
1449 notify_fd
= lxc_unix_connect(&seccomp
->notifier
.proxy_addr
);
1450 if (notify_fd
< 0) {
1451 SYSERROR("Failed to connect to seccomp proxy");
1455 /* 30 second timeout */
1456 ret
= lxc_socket_set_timeout(notify_fd
, 30, 30);
1458 SYSERROR("Failed to set timeouts for seccomp proxy");
1462 ret
= seccomp_notify_alloc(&seccomp
->notifier
.req_buf
,
1463 &seccomp
->notifier
.rsp_buf
);
1465 ERROR("Failed to allocate seccomp notify request and response buffers");
1470 ret
= lxc_mainloop_add_handler(descr
,
1471 seccomp
->notifier
.notify_fd
,
1472 seccomp_notify_handler
, handler
);
1474 ERROR("Failed to add seccomp notify handler for %d to mainloop",
1479 seccomp
->notifier
.proxy_fd
= move_fd(notify_fd
);
1485 int lxc_seccomp_send_notifier_fd(struct lxc_seccomp
*seccomp
, int socket_fd
)
1487 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1488 if (seccomp
->notifier
.wants_supervision
) {
1489 if (lxc_abstract_unix_send_fds(socket_fd
,
1490 &seccomp
->notifier
.notify_fd
, 1,
1493 close_prot_errno_disarm(seccomp
->notifier
.notify_fd
);
1499 int lxc_seccomp_recv_notifier_fd(struct lxc_seccomp
*seccomp
, int socket_fd
)
1501 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1502 if (seccomp
->notifier
.wants_supervision
) {
1505 ret
= lxc_abstract_unix_recv_fds(socket_fd
,
1506 &seccomp
->notifier
.notify_fd
,
1515 int lxc_seccomp_add_notifier(const char *name
, const char *lxcpath
,
1516 struct lxc_seccomp
*seccomp
)
1519 #if HAVE_DECL_SECCOMP_NOTIFY_FD
1520 if (seccomp
->notifier
.wants_supervision
) {
1523 ret
= lxc_cmd_seccomp_notify_add_listener(name
, lxcpath
,
1524 seccomp
->notifier
.notify_fd
,
1526 close_prot_errno_disarm(seccomp
->notifier
.notify_fd
);