]> git.proxmox.com Git - mirror_lxc.git/blobdiff - src/lxc/seccomp.c
seccomp: s/seccomp_notif_alloc/seccomp_notify_alloc/g
[mirror_lxc.git] / src / lxc / seccomp.c
index eeb9c8bf39a9cb8f7728e7a356bf7d85f2d0bbc3..cb3e57c69956d947b178447bac4c5ebb6966c893 100644 (file)
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#define _GNU_SOURCE
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
 #include <errno.h>
+#include <seccomp.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <seccomp.h>
 #include <sys/mount.h>
 #include <sys/utsname.h>
 
+#include "af_unix.h"
+#include "commands.h"
 #include "config.h"
 #include "log.h"
+#include "lxccontainer.h"
 #include "lxcseccomp.h"
+#include "mainloop.h"
+#include "memory_utils.h"
+#include "utils.h"
+
+#ifdef __MIPSEL__
+#define MIPS_ARCH_O32 lxc_seccomp_arch_mipsel
+#define MIPS_ARCH_N64 lxc_seccomp_arch_mipsel64
+#else
+#define MIPS_ARCH_O32 lxc_seccomp_arch_mips
+#define MIPS_ARCH_N64 lxc_seccomp_arch_mips64
+#endif
 
-lxc_log_define(lxc_seccomp, lxc);
+lxc_log_define(seccomp, lxc);
 
-static int parse_config_v1(FILE *f, struct lxc_conf *conf)
+static int parse_config_v1(FILE *f, char *line, size_t *line_bufsz, struct lxc_conf *conf)
 {
-       char line[1024];
-       int ret;
+       int ret = 0;
 
-       while (fgets(line, 1024, f)) {
+       while (getline(&line, line_bufsz, f) != -1) {
                int nr;
+
                ret = sscanf(line, "%d", &nr);
-               if (ret != 1)
-                       return -1;
-               ret = seccomp_rule_add(
+               if (ret != 1) {
+                       ret = -1;
+                       break;
+               }
+
 #if HAVE_SCMP_FILTER_CTX
-                   conf->seccomp_ctx,
+               ret = seccomp_rule_add(conf->seccomp.seccomp_ctx, SCMP_ACT_ALLOW, nr, 0);
+#else
+               ret = seccomp_rule_add(SCMP_ACT_ALLOW, nr, 0);
 #endif
-                   SCMP_ACT_ALLOW, nr, 0);
                if (ret < 0) {
                        ERROR("Failed loading allow rule for %d", nr);
-                       return ret;
+                       break;
                }
        }
-       return 0;
+       free(line);
+
+       return ret;
 }
 
 #if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
-static void remove_trailing_newlines(char *l)
+static const char *get_action_name(uint32_t action)
 {
-       char *p = l;
+       /* The upper 16 bits indicate the type of the seccomp action. */
+       switch (action & 0xffff0000) {
+       case SCMP_ACT_KILL:
+               return "kill";
+       case SCMP_ACT_ALLOW:
+               return "allow";
+       case SCMP_ACT_TRAP:
+               return "trap";
+       case SCMP_ACT_ERRNO(0):
+               return "errno";
+#if HAVE_DECL_SECCOMP_NOTIF_GET_FD
+       case SCMP_ACT_NOTIFY:
+               return "notify";
+#endif
+       }
 
-       while (*p)
-               p++;
-       while (--p >= l && *p == '\n')
-               *p = '\0';
+       return "invalid action";
 }
 
 static uint32_t get_v2_default_action(char *line)
@@ -75,61 +107,64 @@ static uint32_t get_v2_default_action(char *line)
 
        while (*line == ' ')
                line++;
+
        /* After 'whitelist' or 'blacklist' comes default behavior. */
-       if (strncmp(line, "kill", 4) == 0)
+       if (strncmp(line, "kill", 4) == 0) {
                ret_action = SCMP_ACT_KILL;
-       else if (strncmp(line, "errno", 5) == 0) {
-               int e;
-               if (sscanf(line + 5, "%d", &e) != 1) {
-                       ERROR("Bad errno value in %s", line);
+       } else if (strncmp(line, "errno", 5) == 0) {
+               int e, ret;
+
+               ret = sscanf(line + 5, "%d", &e);
+               if (ret != 1) {
+                       ERROR("Failed to parse errno value from %s", line);
                        return -2;
                }
+
                ret_action = SCMP_ACT_ERRNO(e);
-       } else if (strncmp(line, "allow", 5) == 0)
+       } else if (strncmp(line, "allow", 5) == 0) {
                ret_action = SCMP_ACT_ALLOW;
-       else if (strncmp(line, "trap", 4) == 0)
+       } else if (strncmp(line, "trap", 4) == 0) {
                ret_action = SCMP_ACT_TRAP;
-       return ret_action;
-}
-
-static const char *get_action_name(uint32_t action)
-{
-       /* The upper 16 bits indicate the type of the seccomp action. */
-       switch(action & 0xffff0000){
-       case SCMP_ACT_KILL:
-               return "kill";
-       case SCMP_ACT_ALLOW:
-               return "allow";
-       case SCMP_ACT_TRAP:
-               return "trap";
-       case SCMP_ACT_ERRNO(0):
-               return "errno";
-       default:
-               return "invalid action";
+#if HAVE_DECL_SECCOMP_NOTIF_GET_FD
+       } else if (strncmp(line, "notify", 6) == 0) {
+               ret_action = SCMP_ACT_NOTIFY;
+#endif
+       } else if (line[0]) {
+               ERROR("Unrecognized seccomp action \"%s\"", line);
+               return -2;
        }
+
+       return ret_action;
 }
 
 static uint32_t get_v2_action(char *line, uint32_t def_action)
 {
-       char *p = strchr(line, ' ');
+       char *p;
        uint32_t ret;
 
+       p = strchr(line, ' ');
        if (!p)
                return def_action;
        p++;
+
        while (*p == ' ')
                p++;
+
        if (!*p || *p == '#')
                return def_action;
+
        ret = get_v2_default_action(p);
-       switch(ret) {
-       case -2: return -1;
-       case -1: return def_action;
-       default: return ret;
+       switch (ret) {
+       case -2:
+               return -1;
+       case -1:
+               return def_action;
        }
+
+       return ret;
 }
 
-struct v2_rule_args {
+struct seccomp_v2_rule_args {
        uint32_t index;
        uint64_t value;
        uint64_t mask;
@@ -139,7 +174,7 @@ struct v2_rule_args {
 struct seccomp_v2_rule {
        uint32_t action;
        uint32_t args_num;
-       struct v2_rule_args args_value[6];
+       struct seccomp_v2_rule_args args_value[6];
 };
 
 static enum scmp_compare parse_v2_rule_op(char *s)
@@ -162,40 +197,51 @@ static enum scmp_compare parse_v2_rule_op(char *s)
        return _SCMP_CMP_MAX;
 }
 
-/* This function is used to parse the args string into the structure.
- * args string format:[index,value,op,valueTwo] or [index,value,op]
- * For one arguments, [index,value,valueTwo,op]
+/*
+ * This function is used to parse the args string into the structure.
+ * args string format:[index,value,op,mask] or [index,value,op]
  * index: the index for syscall arguments (type uint)
  * value: the value for syscall arguments (type uint64)
  * op: the operator for syscall arguments(string),
         a valid list of constants as of libseccomp v2.3.2 is
         SCMP_CMP_NE,SCMP_CMP_LE,SCMP_CMP_LE, SCMP_CMP_EQ, SCMP_CMP_GE,
         SCMP_CMP_GT, SCMP_CMP_MASKED_EQ, or !=,<=,==,>=,>,&=
- * valueTwo: the value for syscall arguments only used for mask eq (type uint64, optional)
+ * mask: the mask to apply on "value" for SCMP_CMP_MASKED_EQ (type uint64, optional)
  * Returns 0 on success, < 0 otherwise.
  */
-static int get_seccomp_arg_value(char *key, struct v2_rule_args *rule_args)
+static int get_seccomp_arg_value(char *key, struct seccomp_v2_rule_args *rule_args)
 {
        int ret = 0;
-       uint64_t value = 0;
-       uint64_t mask = 0;
-       enum scmp_compare op = 0;
        uint32_t index = 0;
-       char s[30] = {0};
+       uint64_t mask = 0, value = 0;
+       enum scmp_compare op = 0;
        char *tmp = NULL;
+       char s[31] = {0}, v[24] = {0}, m[24] = {'0'};
 
-       memset(s, 0, sizeof(s));
        tmp = strchr(key, '[');
        if (!tmp) {
                ERROR("Failed to interpret args");
                return -1;
        }
-       ret = sscanf(tmp, "[%i,%lli,%30[^0-9^,],%lli", &index, (long long unsigned int *)&value, s, (long long unsigned int *)&mask);
+
+       ret = sscanf(tmp, "[%i,%23[^,],%30[^0-9^,],%23[^,]", &index, v, s, m);
        if ((ret != 3 && ret != 4) || index >= 6) {
                ERROR("Failed to interpret args value");
                return -1;
        }
 
+       ret = lxc_safe_uint64(v, &value, 0);
+       if (ret < 0) {
+               ERROR("Invalid argument value");
+               return -1;
+       }
+
+       ret = lxc_safe_uint64(m, &mask, 0);
+       if (ret < 0) {
+               ERROR("Invalid argument mask");
+               return -1;
+       }
+
        op = parse_v2_rule_op(s);
        if (op == _SCMP_CMP_MAX) {
                ERROR("Failed to interpret args operator value");
@@ -215,13 +261,11 @@ static int get_seccomp_arg_value(char *key, struct v2_rule_args *rule_args)
  * @rules      : output struct.
  * Returns 0 on success, < 0 otherwise.
  */
-static int parse_v2_rules(char *line, uint32_t def_action, struct seccomp_v2_rule *rules)
+static int parse_v2_rules(char *line, uint32_t def_action,
+                         struct seccomp_v2_rule *rules)
 {
-       int ret = 0 ;
-       int i = 0;
-       char *tmp = NULL;
-       char *key = NULL;
-       char *saveptr = NULL;
+       int i = 0, ret = -1;
+       char *key = NULL, *saveptr = NULL, *tmp = NULL;
 
        tmp = strdup(line);
        if (!tmp)
@@ -232,30 +276,31 @@ static int parse_v2_rules(char *line, uint32_t def_action, struct seccomp_v2_rul
        if (rules->action == -1) {
                ERROR("Failed to interpret action");
                ret = -1;
-               goto out;
+               goto on_error;
        }
 
+       ret = 0;
        rules->args_num = 0;
-       if (!strchr(tmp, '[')) {
-               ret = 0;
-               goto out;
-       }
+       if (!strchr(tmp, '['))
+               goto on_error;
 
-       for ((key = strtok_r(tmp, "]", &saveptr)), i = 0; key && i < 6; (key = strtok_r(NULL, "]", &saveptr)), i++) {
+       ret = -1;
+       for ((key = strtok_r(tmp, "]", &saveptr)), i = 0; key && i < 6;
+            (key = strtok_r(NULL, "]", &saveptr)), i++) {
                ret = get_seccomp_arg_value(key, &rules->args_value[i]);
-               if (ret < 0) {
-                       ret = -1;
-                       goto out;
-               }
+               if (ret < 0)
+                       goto on_error;
+
                rules->args_num++;
        }
 
        ret = 0;
-out:
+
+on_error:
        free(tmp);
+
        return ret;
 }
-
 #endif
 
 #if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
@@ -280,14 +325,6 @@ enum lxc_hostarch_t {
        lxc_seccomp_arch_unknown = 999,
 };
 
-#ifdef __MIPSEL__
-# define MIPS_ARCH_O32 lxc_seccomp_arch_mipsel
-# define MIPS_ARCH_N64 lxc_seccomp_arch_mipsel64
-#else
-# define MIPS_ARCH_O32 lxc_seccomp_arch_mips
-# define MIPS_ARCH_N64 lxc_seccomp_arch_mips64
-#endif
-
 int get_hostarch(void)
 {
        struct utsname uts;
@@ -295,6 +332,7 @@ int get_hostarch(void)
                SYSERROR("Failed to read host arch");
                return -1;
        }
+
        if (strcmp(uts.machine, "i686") == 0)
                return lxc_seccomp_arch_i386;
        /* no x32 kernels */
@@ -316,75 +354,117 @@ int get_hostarch(void)
                return MIPS_ARCH_O32;
        else if (strncmp(uts.machine, "s390x", 5) == 0)
                return lxc_seccomp_arch_s390x;
+
        return lxc_seccomp_arch_unknown;
 }
 
-scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch, uint32_t default_policy_action, bool *needs_merge)
+scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch,
+                           uint32_t default_policy_action, bool *needs_merge)
 {
-       scmp_filter_ctx ctx;
        int ret;
        uint32_t arch;
+       scmp_filter_ctx ctx;
 
-       switch(n_arch) {
-       case lxc_seccomp_arch_i386: arch = SCMP_ARCH_X86; break;
-       case lxc_seccomp_arch_x32: arch = SCMP_ARCH_X32; break;
-       case lxc_seccomp_arch_amd64: arch = SCMP_ARCH_X86_64; break;
-       case lxc_seccomp_arch_arm: arch = SCMP_ARCH_ARM; break;
+       switch (n_arch) {
+       case lxc_seccomp_arch_i386:
+               arch = SCMP_ARCH_X86;
+               break;
+       case lxc_seccomp_arch_x32:
+               arch = SCMP_ARCH_X32;
+               break;
+       case lxc_seccomp_arch_amd64:
+               arch = SCMP_ARCH_X86_64;
+               break;
+       case lxc_seccomp_arch_arm:
+               arch = SCMP_ARCH_ARM;
+               break;
 #ifdef SCMP_ARCH_AARCH64
-       case lxc_seccomp_arch_arm64: arch = SCMP_ARCH_AARCH64; break;
+       case lxc_seccomp_arch_arm64:
+               arch = SCMP_ARCH_AARCH64;
+               break;
 #endif
 #ifdef SCMP_ARCH_PPC64LE
-       case lxc_seccomp_arch_ppc64le: arch = SCMP_ARCH_PPC64LE; break;
+       case lxc_seccomp_arch_ppc64le:
+               arch = SCMP_ARCH_PPC64LE;
+               break;
 #endif
 #ifdef SCMP_ARCH_PPC64
-       case lxc_seccomp_arch_ppc64: arch = SCMP_ARCH_PPC64; break;
+       case lxc_seccomp_arch_ppc64:
+               arch = SCMP_ARCH_PPC64;
+               break;
 #endif
 #ifdef SCMP_ARCH_PPC
-       case lxc_seccomp_arch_ppc: arch = SCMP_ARCH_PPC; break;
+       case lxc_seccomp_arch_ppc:
+               arch = SCMP_ARCH_PPC;
+               break;
 #endif
 #ifdef SCMP_ARCH_MIPS
-       case lxc_seccomp_arch_mips: arch = SCMP_ARCH_MIPS; break;
-       case lxc_seccomp_arch_mips64: arch = SCMP_ARCH_MIPS64; break;
-       case lxc_seccomp_arch_mips64n32: arch = SCMP_ARCH_MIPS64N32; break;
-       case lxc_seccomp_arch_mipsel: arch = SCMP_ARCH_MIPSEL; break;
-       case lxc_seccomp_arch_mipsel64: arch = SCMP_ARCH_MIPSEL64; break;
-       case lxc_seccomp_arch_mipsel64n32: arch = SCMP_ARCH_MIPSEL64N32; break;
+       case lxc_seccomp_arch_mips:
+               arch = SCMP_ARCH_MIPS;
+               break;
+       case lxc_seccomp_arch_mips64:
+               arch = SCMP_ARCH_MIPS64;
+               break;
+       case lxc_seccomp_arch_mips64n32:
+               arch = SCMP_ARCH_MIPS64N32;
+               break;
+       case lxc_seccomp_arch_mipsel:
+               arch = SCMP_ARCH_MIPSEL;
+               break;
+       case lxc_seccomp_arch_mipsel64:
+               arch = SCMP_ARCH_MIPSEL64;
+               break;
+       case lxc_seccomp_arch_mipsel64n32:
+               arch = SCMP_ARCH_MIPSEL64N32;
+               break;
 #endif
 #ifdef SCMP_ARCH_S390X
-       case lxc_seccomp_arch_s390x: arch = SCMP_ARCH_S390X; break;
+       case lxc_seccomp_arch_s390x:
+               arch = SCMP_ARCH_S390X;
+               break;
 #endif
-       default: return NULL;
+       default:
+               return NULL;
        }
 
-       if ((ctx = seccomp_init(default_policy_action)) == NULL) {
+       ctx = seccomp_init(default_policy_action);
+       if (!ctx) {
                ERROR("Error initializing seccomp context");
                return NULL;
        }
-       if (seccomp_attr_set(ctx, SCMP_FLTATR_CTL_NNP, 0)) {
-               ERROR("Failed to turn off no-new-privs");
+
+       ret = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_NNP, 0);
+       if (ret < 0) {
+               errno = -ret;
+               SYSERROR("Failed to turn off no-new-privs");
                seccomp_release(ctx);
                return NULL;
        }
+
 #ifdef SCMP_FLTATR_ATL_TSKIP
-       if (seccomp_attr_set(ctx, SCMP_FLTATR_ATL_TSKIP, 1)) {
-               WARN("Failed to turn on seccomp nop-skip, continuing");
+       ret = seccomp_attr_set(ctx, SCMP_FLTATR_ATL_TSKIP, 1);
+       if (ret < 0) {
+               errno = -ret;
+               SYSWARN("Failed to turn on seccomp nop-skip, continuing");
        }
 #endif
 
        ret = seccomp_arch_exist(ctx, arch);
        if (ret < 0) {
                if (ret != -EEXIST) {
-                       ERROR("%s - Failed to determine whether arch %d is "
-                             "already present in the main seccomp context",
-                              strerror(-ret), (int)n_arch);
+                       errno = -ret;
+                       SYSERROR("Failed to determine whether arch %d is "
+                                "already present in the main seccomp context",
+                                (int)n_arch);
                        seccomp_release(ctx);
                        return NULL;
                }
 
                ret = seccomp_arch_add(ctx, arch);
                if (ret != 0) {
-                       ERROR("%s - Failed to add arch %d to main seccomp context",
-                             strerror(-ret), (int)n_arch);
+                       errno = -ret;
+                       SYSERROR("Failed to add arch %d to main seccomp context",
+                                (int)n_arch);
                        seccomp_release(ctx);
                        return NULL;
                }
@@ -408,18 +488,15 @@ scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch, uint32_t default_policy_
 }
 
 bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx,
-                       struct seccomp_v2_rule *rule)
+                        struct seccomp_v2_rule *rule)
 {
-       int nr, ret, i;
+       int i, nr, ret;
        struct scmp_arg_cmp arg_cmp[6];
 
-       memset(arg_cmp, 0 ,sizeof(arg_cmp));
-
        ret = seccomp_arch_exist(ctx, arch);
        if (arch && ret != 0) {
-               ERROR("BUG: Seccomp: rule and context arch do not match (arch "
-                     "%d): %s",
-                     arch, strerror(-ret));
+               errno = -ret;
+               SYSERROR("Seccomp: rule and context arch do not match (arch %d)", arch);
                return false;
        }
 
@@ -429,49 +506,60 @@ bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx,
                *p = '\0';
 
        if (strncmp(line, "reject_force_umount", 19) == 0) {
-               INFO("Setting Seccomp rule to reject force umounts");
-               ret = seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EACCES), SCMP_SYS(umount2),
-                               1, SCMP_A1(SCMP_CMP_MASKED_EQ , MNT_FORCE , MNT_FORCE ));
+               ret = seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EACCES),
+                                            SCMP_SYS(umount2), 1,
+                                            SCMP_A1(SCMP_CMP_MASKED_EQ, MNT_FORCE, MNT_FORCE));
                if (ret < 0) {
-                       ERROR("Failed (%d) loading rule to reject force "
-                             "umount: %s",
-                             ret, strerror(-ret));
+                       errno = -ret;
+                       SYSERROR("Failed loading rule to reject force umount");
                        return false;
                }
+
+               INFO("Set seccomp rule to reject force umounts");
                return true;
        }
 
        nr = seccomp_syscall_resolve_name(line);
        if (nr == __NR_SCMP_ERROR) {
-               WARN("Seccomp: failed to resolve syscall: %s", line);
-               WARN("This syscall will NOT be blacklisted");
+               WARN("Failed to resolve syscall \"%s\"", line);
+               WARN("This syscall will NOT be handled by seccomp");
                return true;
        }
+
        if (nr < 0) {
-               WARN("Seccomp: got negative for syscall: %d: %s", nr, line);
-               WARN("This syscall will NOT be blacklisted");
+               WARN("Got negative return value %d for syscall \"%s\"", nr, line);
+               WARN("This syscall will NOT be handled by seccomp");
                return true;
        }
 
+       memset(&arg_cmp, 0, sizeof(arg_cmp));
        for (i = 0; i < rule->args_num; i++) {
-               INFO("arg_cmp[%d]:SCMP_CMP(%u, %llu, %llu, %llu)", i,
-                     rule->args_value[i].index,
-                     (long long unsigned int)rule->args_value[i].op,
-                     (long long unsigned int)rule->args_value[i].mask,
-                     (long long unsigned int)rule->args_value[i].value);
+               INFO("arg_cmp[%d]: SCMP_CMP(%u, %llu, %llu, %llu)", i,
+                    rule->args_value[i].index,
+                    (long long unsigned int)rule->args_value[i].op,
+                    (long long unsigned int)rule->args_value[i].mask,
+                    (long long unsigned int)rule->args_value[i].value);
 
                if (SCMP_CMP_MASKED_EQ == rule->args_value[i].op)
-                       arg_cmp[i] = SCMP_CMP(rule->args_value[i].index, rule->args_value[i].op, rule->args_value[i].mask, rule->args_value[i].value);
+                       arg_cmp[i] = SCMP_CMP(rule->args_value[i].index,
+                                             rule->args_value[i].op,
+                                             rule->args_value[i].mask,
+                                             rule->args_value[i].value);
                else
-                       arg_cmp[i] = SCMP_CMP(rule->args_value[i].index, rule->args_value[i].op, rule->args_value[i].value);
+                       arg_cmp[i] = SCMP_CMP(rule->args_value[i].index,
+                                             rule->args_value[i].op,
+                                             rule->args_value[i].value);
        }
 
-       ret = seccomp_rule_add_exact_array(ctx, rule->action, nr, rule->args_num, arg_cmp);
+       ret = seccomp_rule_add_exact_array(ctx, rule->action, nr,
+                                          rule->args_num, arg_cmp);
        if (ret < 0) {
-               ERROR("Failed (%d) loading rule for %s (nr %d action %d(%s)): %s",
-                     ret, line, nr, rule->action, get_action_name(rule->action), strerror(-ret));
+               errno = -ret;
+               SYSERROR("Failed loading rule for %s (nr %d action %d (%s))",
+                        line, nr, rule->action, get_action_name(rule->action));
                return false;
        }
+
        return true;
 }
 
@@ -489,14 +577,13 @@ bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx,
  * write
  * close
  */
-static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
+static int parse_config_v2(FILE *f, char *line, size_t *line_bufsz, struct lxc_conf *conf)
 {
-       char *p;
        int ret;
+       char *p;
+       enum lxc_hostarch_t cur_rule_arch, native_arch;
        bool blacklist = false;
        uint32_t default_policy_action = -1, default_rule_action = -1;
-       enum lxc_hostarch_t native_arch = get_hostarch(),
-                           cur_rule_arch = native_arch;
        struct seccomp_v2_rule rule;
        struct scmp_ctx_info {
                uint32_t architectures[3];
@@ -507,11 +594,12 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
        if (strncmp(line, "blacklist", 9) == 0)
                blacklist = true;
        else if (strncmp(line, "whitelist", 9) != 0) {
-               ERROR("Bad seccomp policy style: %s", line);
+               ERROR("Bad seccomp policy style \"%s\"", line);
                return -1;
        }
 
-       if ((p = strchr(line, ' '))) {
+       p = strchr(line, ' ');
+       if (p) {
                default_policy_action = get_v2_default_action(p + 1);
                if (default_policy_action == -2)
                        return -1;
@@ -521,11 +609,13 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
        if (blacklist) {
                if (default_policy_action == -1)
                        default_policy_action = SCMP_ACT_ALLOW;
+
                if (default_rule_action == -1)
                        default_rule_action = SCMP_ACT_KILL;
        } else {
                if (default_policy_action == -1)
                        default_policy_action = SCMP_ACT_KILL;
+
                if (default_rule_action == -1)
                        default_rule_action = SCMP_ACT_ALLOW;
        }
@@ -534,6 +624,8 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
        ctx.architectures[0] = SCMP_ARCH_NATIVE;
        ctx.architectures[1] = SCMP_ARCH_NATIVE;
        ctx.architectures[2] = SCMP_ARCH_NATIVE;
+       native_arch = get_hostarch();
+       cur_rule_arch = native_arch;
        if (native_arch == lxc_seccomp_arch_amd64) {
                cur_rule_arch = lxc_seccomp_arch_all;
 
@@ -580,17 +672,17 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                cur_rule_arch = lxc_seccomp_arch_all;
 
                ctx.architectures[0] = SCMP_ARCH_ARM;
-               ctx.contexts[0] =
-                   get_new_ctx(lxc_seccomp_arch_arm, default_policy_action,
-                               &ctx.needs_merge[0]);
+               ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_arm,
+                                             default_policy_action,
+                                             &ctx.needs_merge[0]);
                if (!ctx.contexts[0])
                        goto bad;
 
 #ifdef SCMP_ARCH_AARCH64
                ctx.architectures[2] = SCMP_ARCH_AARCH64;
-               ctx.contexts[2] =
-                   get_new_ctx(lxc_seccomp_arch_arm64, default_policy_action,
-                               &ctx.needs_merge[2]);
+               ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_arm64,
+                                             default_policy_action,
+                                             &ctx.needs_merge[2]);
                if (!ctx.contexts[2])
                        goto bad;
 #endif
@@ -646,30 +738,38 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
        }
 
        if (default_policy_action != SCMP_ACT_KILL) {
-               ret = seccomp_reset(conf->seccomp_ctx, default_policy_action);
+               ret = seccomp_reset(conf->seccomp.seccomp_ctx, default_policy_action);
                if (ret != 0) {
                        ERROR("Error re-initializing Seccomp");
                        return -1;
                }
-               if (seccomp_attr_set(conf->seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0)) {
-                       ERROR("Failed to turn off no-new-privs");
+
+               ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0);
+               if (ret < 0) {
+                       errno = -ret;
+                       SYSERROR("Failed to turn off no-new-privs");
                        return -1;
                }
+
 #ifdef SCMP_FLTATR_ATL_TSKIP
-               if (seccomp_attr_set(conf->seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1)) {
-                       WARN("Failed to turn on seccomp nop-skip, continuing");
+               ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1);
+               if (ret < 0) {
+                       errno = -ret;
+                       SYSWARN("Failed to turn on seccomp nop-skip, continuing");
                }
 #endif
        }
 
-       while (fgets(line, 1024, f)) {
-
+       while (getline(&line, line_bufsz, f) != -1) {
                if (line[0] == '#')
                        continue;
-               if (strlen(line) == 0)
+
+               if (line[0] == '\0')
                        continue;
+
                remove_trailing_newlines(line);
-               INFO("processing: .%s", line);
+
+               INFO("Processing \"%s\"", line);
                if (line[0] == '[') {
                        /* Read the architecture for next set of rules. */
                        if (strcmp(line, "[x86]") == 0 ||
@@ -679,6 +779,7 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                                        cur_rule_arch = lxc_seccomp_arch_unknown;
                                        continue;
                                }
+
                                cur_rule_arch = lxc_seccomp_arch_i386;
                        } else if (strcmp(line, "[x32]") == 0 ||
                                   strcmp(line, "[X32]") == 0) {
@@ -686,6 +787,7 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                                        cur_rule_arch = lxc_seccomp_arch_unknown;
                                        continue;
                                }
+
                                cur_rule_arch = lxc_seccomp_arch_x32;
                        } else if (strcmp(line, "[X86_64]") == 0 ||
                                   strcmp(line, "[x86_64]") == 0) {
@@ -693,6 +795,7 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                                        cur_rule_arch = lxc_seccomp_arch_unknown;
                                        continue;
                                }
+
                                cur_rule_arch = lxc_seccomp_arch_amd64;
                        } else if (strcmp(line, "[all]") == 0 ||
                                   strcmp(line, "[ALL]") == 0) {
@@ -706,6 +809,7 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                                        cur_rule_arch = lxc_seccomp_arch_unknown;
                                        continue;
                                }
+
                                cur_rule_arch = lxc_seccomp_arch_arm;
                        }
 #endif
@@ -716,6 +820,7 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                                        cur_rule_arch = lxc_seccomp_arch_unknown;
                                        continue;
                                }
+
                                cur_rule_arch = lxc_seccomp_arch_arm64;
                        }
 #endif
@@ -726,6 +831,7 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                                        cur_rule_arch = lxc_seccomp_arch_unknown;
                                        continue;
                                }
+
                                cur_rule_arch = lxc_seccomp_arch_ppc64le;
                        }
 #endif
@@ -736,6 +842,7 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                                        cur_rule_arch = lxc_seccomp_arch_unknown;
                                        continue;
                                }
+
                                cur_rule_arch = lxc_seccomp_arch_ppc64;
                        }
 #endif
@@ -747,6 +854,7 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                                        cur_rule_arch = lxc_seccomp_arch_unknown;
                                        continue;
                                }
+
                                cur_rule_arch = lxc_seccomp_arch_ppc;
                        }
 #endif
@@ -757,6 +865,7 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                                        cur_rule_arch = lxc_seccomp_arch_unknown;
                                        continue;
                                }
+
                                cur_rule_arch = lxc_seccomp_arch_mips64;
                        } else if (strcmp(line, "[mips64n32]") == 0 ||
                                   strcmp(line, "[MIPS64N32]") == 0) {
@@ -764,6 +873,7 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                                        cur_rule_arch = lxc_seccomp_arch_unknown;
                                        continue;
                                }
+
                                cur_rule_arch = lxc_seccomp_arch_mips64n32;
                        } else if (strcmp(line, "[mips]") == 0 ||
                                   strcmp(line, "[MIPS]") == 0) {
@@ -772,6 +882,7 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                                        cur_rule_arch = lxc_seccomp_arch_unknown;
                                        continue;
                                }
+
                                cur_rule_arch = lxc_seccomp_arch_mips;
                        } else if (strcmp(line, "[mipsel64]") == 0 ||
                                   strcmp(line, "[MIPSEL64]") == 0) {
@@ -779,6 +890,7 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                                        cur_rule_arch = lxc_seccomp_arch_unknown;
                                        continue;
                                }
+
                                cur_rule_arch = lxc_seccomp_arch_mipsel64;
                        } else if (strcmp(line, "[mipsel64n32]") == 0 ||
                                   strcmp(line, "[MIPSEL64N32]") == 0) {
@@ -786,6 +898,7 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                                        cur_rule_arch = lxc_seccomp_arch_unknown;
                                        continue;
                                }
+
                                cur_rule_arch = lxc_seccomp_arch_mipsel64n32;
                        } else if (strcmp(line, "[mipsel]") == 0 ||
                                   strcmp(line, "[MIPSEL]") == 0) {
@@ -794,6 +907,7 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                                        cur_rule_arch = lxc_seccomp_arch_unknown;
                                        continue;
                                }
+
                                cur_rule_arch = lxc_seccomp_arch_mipsel;
                        }
 #endif
@@ -804,11 +918,13 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                                        cur_rule_arch = lxc_seccomp_arch_unknown;
                                        continue;
                                }
+
                                cur_rule_arch = lxc_seccomp_arch_s390x;
                        }
 #endif
-                       else
+                       else {
                                goto bad_arch;
+                       }
 
                        continue;
                }
@@ -825,9 +941,23 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                        goto bad_rule;
                }
 
+#if HAVE_DECL_SECCOMP_NOTIF_GET_FD
+               if ((rule.action == SCMP_ACT_NOTIFY) &&
+                   !conf->seccomp.notifier.wants_supervision) {
+                       ret = seccomp_attr_set(conf->seccomp.seccomp_ctx,
+                                              SCMP_FLTATR_NEW_LISTENER, 1);
+                       if (ret)
+                               goto bad_rule;
+
+                       conf->seccomp.notifier.wants_supervision = true;
+                       TRACE("Set SCMP_FLTATR_NEW_LISTENER attribute");
+               }
+#endif
+
                if (!do_resolve_add_rule(SCMP_ARCH_NATIVE, line,
-                                        conf->seccomp_ctx, &rule))
+                                        conf->seccomp.seccomp_ctx, &rule))
                        goto bad_rule;
+
                INFO("Added native rule for arch %d for %s action %d(%s)",
                     SCMP_ARCH_NATIVE, line, rule.action,
                     get_action_name(rule.action));
@@ -836,6 +966,7 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                        if (!do_resolve_add_rule(ctx.architectures[0], line,
                                                 ctx.contexts[0], &rule))
                                goto bad_rule;
+
                        INFO("Added compat rule for arch %d for %s action %d(%s)",
                             ctx.architectures[0], line, rule.action,
                             get_action_name(rule.action));
@@ -845,6 +976,7 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                        if (!do_resolve_add_rule(ctx.architectures[1], line,
                                                 ctx.contexts[1], &rule))
                                goto bad_rule;
+
                        INFO("Added compat rule for arch %d for %s action %d(%s)",
                             ctx.architectures[1], line, rule.action,
                             get_action_name(rule.action));
@@ -854,6 +986,7 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                        if (!do_resolve_add_rule(ctx.architectures[2], line,
                                                ctx.contexts[2], &rule))
                                goto bad_rule;
+
                        INFO("Added native rule for arch %d for %s action %d(%s)",
                             ctx.architectures[2], line, rule.action,
                             get_action_name(rule.action));
@@ -863,12 +996,13 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
        INFO("Merging compat seccomp contexts into main context");
        if (ctx.contexts[0]) {
                if (ctx.needs_merge[0]) {
-                       ret = seccomp_merge(conf->seccomp_ctx, ctx.contexts[0]);
+                       ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[0]);
                        if (ret < 0) {
                                ERROR("Failed to merge first compat seccomp "
                                      "context into main context");
                                goto bad;
                        }
+
                        TRACE("Merged first compat seccomp context into main context");
                } else {
                        seccomp_release(ctx.contexts[0]);
@@ -878,12 +1012,13 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
 
        if (ctx.contexts[1]) {
                if (ctx.needs_merge[1]) {
-                       ret = seccomp_merge(conf->seccomp_ctx, ctx.contexts[1]);
+                       ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[1]);
                        if (ret < 0) {
                                ERROR("Failed to merge first compat seccomp "
                                      "context into main context");
                                goto bad;
                        }
+
                        TRACE("Merged second compat seccomp context into main context");
                } else {
                        seccomp_release(ctx.contexts[1]);
@@ -893,12 +1028,13 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
 
        if (ctx.contexts[2]) {
                if (ctx.needs_merge[2]) {
-                       ret = seccomp_merge(conf->seccomp_ctx, ctx.contexts[2]);
+                       ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[2]);
                        if (ret < 0) {
                                ERROR("Failed to merge third compat seccomp "
                                      "context into main context");
                                goto bad;
                        }
+
                        TRACE("Merged third compat seccomp context into main context");
                } else {
                        seccomp_release(ctx.contexts[2]);
@@ -906,19 +1042,25 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
                }
        }
 
+       free(line);
        return 0;
 
 bad_arch:
-       ERROR("Unsupported arch: %s.", line);
+       ERROR("Unsupported architecture \"%s\"", line);
+
 bad_rule:
 bad:
        if (ctx.contexts[0])
                seccomp_release(ctx.contexts[0]);
+
        if (ctx.contexts[1])
                seccomp_release(ctx.contexts[1]);
+
        if (ctx.contexts[2])
                seccomp_release(ctx.contexts[2]);
 
+       free(line);
+
        return -1;
 }
 #else /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
@@ -938,7 +1080,8 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
  */
 static int parse_config(FILE *f, struct lxc_conf *conf)
 {
-       char line[1024];
+       char *line = NULL;
+       size_t line_bufsz = 0;
        int ret, version;
 
        ret = fscanf(f, "%d\n", &version);
@@ -946,23 +1089,30 @@ static int parse_config(FILE *f, struct lxc_conf *conf)
                ERROR("Invalid version");
                return -1;
        }
-       if (!fgets(line, 1024, f)) {
+
+       if (getline(&line, &line_bufsz, f) == -1) {
                ERROR("Invalid config file");
-               return -1;
+               goto bad_line;
        }
+
        if (version == 1 && !strstr(line, "whitelist")) {
                ERROR("Only whitelist policy is supported");
-               return -1;
+               goto bad_line;
        }
 
        if (strstr(line, "debug")) {
                ERROR("Debug not yet implemented");
-               return -1;
+               goto bad_line;
        }
 
        if (version == 1)
-               return parse_config_v1(f, conf);
-       return parse_config_v2(f, line, conf);
+               return parse_config_v1(f, line, &line_bufsz, conf);
+
+       return parse_config_v2(f, line, &line_bufsz, conf);
+
+bad_line:
+       free(line);
+       return -1;
 }
 
 /*
@@ -972,54 +1122,63 @@ static int parse_config(FILE *f, struct lxc_conf *conf)
  *   1. seccomp is not enabled in the kernel
  *   2. a seccomp policy is already enabled for this task
  */
-static bool use_seccomp(void)
+static bool use_seccomp(const struct lxc_conf *conf)
 {
-       FILE *f = fopen("/proc/self/status", "r");
-       char line[1024];
-       bool already_enabled = false;
-       bool found = false;
        int ret, v;
+       FILE *f;
+       size_t line_bufsz = 0;
+       char *line = NULL;
+       bool already_enabled = false, found = false;
 
+       if (conf->seccomp.allow_nesting > 0)
+               return true;
+
+       f = fopen("/proc/self/status", "r");
        if (!f)
                return true;
 
-       while (fgets(line, 1024, f)) {
+       while (getline(&line, &line_bufsz, f) != -1) {
                if (strncmp(line, "Seccomp:", 8) == 0) {
                        found = true;
+
                        ret = sscanf(line + 8, "%d", &v);
                        if (ret == 1 && v != 0)
                                already_enabled = true;
+
                        break;
                }
        }
-
+       free(line);
        fclose(f);
-       if (!found) { /* no Seccomp line, no seccomp in kernel */
+
+       if (!found) {
                INFO("Seccomp is not enabled in the kernel");
                return false;
        }
-       if (already_enabled) { /* already seccomp-confined */
+
+       if (already_enabled) {
                INFO("Already seccomp-confined, not loading new policy");
                return false;
        }
+
        return true;
 }
 
 int lxc_read_seccomp_config(struct lxc_conf *conf)
 {
-       FILE *f;
        int ret;
-       int check_seccomp_attr_set;
+       FILE *f;
 
-       if (!conf->seccomp)
+       if (!conf->seccomp.seccomp)
                return 0;
 
-       if (!use_seccomp())
+       if (!use_seccomp(conf))
                return 0;
+
 #if HAVE_SCMP_FILTER_CTX
        /* XXX for debug, pass in SCMP_ACT_TRAP */
-       conf->seccomp_ctx = seccomp_init(SCMP_ACT_KILL);
-       ret = !conf->seccomp_ctx;
+       conf->seccomp.seccomp_ctx = seccomp_init(SCMP_ACT_KILL);
+       ret = !conf->seccomp.seccomp_ctx;
 #else
        ret = seccomp_init(SCMP_ACT_KILL) < 0;
 #endif
@@ -1028,72 +1187,346 @@ int lxc_read_seccomp_config(struct lxc_conf *conf)
                return -1;
        }
 
-/* turn off no-new-privs.  We don't want it in lxc, and it breaks
+/* turn off no-new-privs. We don't want it in lxc, and it breaks
  * with apparmor */
 #if HAVE_SCMP_FILTER_CTX
-       check_seccomp_attr_set = seccomp_attr_set(conf->seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0);
+       ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0);
 #else
-       check_seccomp_attr_set = seccomp_attr_set(SCMP_FLTATR_CTL_NNP, 0);
+       ret = seccomp_attr_set(SCMP_FLTATR_CTL_NNP, 0);
 #endif
-       if (check_seccomp_attr_set) {
-               ERROR("Failed to turn off no-new-privs");
+       if (ret < 0) {
+               errno = -ret;
+               SYSERROR("Failed to turn off no-new-privs");
                return -1;
        }
+
 #ifdef SCMP_FLTATR_ATL_TSKIP
-       if (seccomp_attr_set(conf->seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1)) {
-               WARN("Failed to turn on seccomp nop-skip, continuing");
+       ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1);
+       if (ret < 0) {
+               errno = -ret;
+               SYSWARN("Failed to turn on seccomp nop-skip, continuing");
        }
 #endif
 
-       f = fopen(conf->seccomp, "r");
+       f = fopen(conf->seccomp.seccomp, "r");
        if (!f) {
-               SYSERROR("Failed to open seccomp policy file %s", conf->seccomp);
+               SYSERROR("Failed to open seccomp policy file %s", conf->seccomp.seccomp);
                return -1;
        }
+
        ret = parse_config(f, conf);
        fclose(f);
+
        return ret;
 }
 
 int lxc_seccomp_load(struct lxc_conf *conf)
 {
        int ret;
-       if (!conf->seccomp)
+
+       if (!conf->seccomp.seccomp)
                return 0;
-       if (!use_seccomp())
+
+       if (!use_seccomp(conf))
                return 0;
-       ret = seccomp_load(
+
 #if HAVE_SCMP_FILTER_CTX
-           conf->seccomp_ctx
+       ret = seccomp_load(conf->seccomp.seccomp_ctx);
+#else
+       ret = seccomp_load();
 #endif
-           );
        if (ret < 0) {
-               ERROR("Error loading the seccomp policy: %s", strerror(-ret));
+               errno = -ret;
+               SYSERROR("Error loading the seccomp policy");
                return -1;
        }
 
 /* After load seccomp filter into the kernel successfully, export the current seccomp
  * filter to log file */
 #if HAVE_SCMP_FILTER_CTX
-       if ((lxc_log_get_level() <= LXC_LOG_LEVEL_TRACE || conf->loglevel <= LXC_LOG_LEVEL_TRACE) &&
+       if ((lxc_log_get_level() <= LXC_LOG_LEVEL_TRACE ||
+            conf->loglevel <= LXC_LOG_LEVEL_TRACE) &&
            lxc_log_fd >= 0) {
-               ret = seccomp_export_pfc(conf->seccomp_ctx, lxc_log_fd);
+               ret = seccomp_export_pfc(conf->seccomp.seccomp_ctx, lxc_log_fd);
                /* Just give an warning when export error */
-               if (ret < 0)
-                       WARN("Failed to export seccomp filter to log file: %s", strerror(-ret));
+               if (ret < 0) {
+                       errno = -ret;
+                       SYSWARN("Failed to export seccomp filter to log file");
+               }
        }
 #endif
+
+#if HAVE_DECL_SECCOMP_NOTIF_GET_FD
+       if (conf->seccomp.notifier.wants_supervision) {
+               ret = seccomp_notify_fd(conf->seccomp.seccomp_ctx);
+               if (ret < 0) {
+                       errno = -ret;
+                       return -1;
+               }
+
+               conf->seccomp.notifier.notify_fd = ret;
+               TRACE("Retrieved new seccomp listener fd %d", ret);
+       }
+#endif
+
        return 0;
 }
 
-void lxc_seccomp_free(struct lxc_conf *conf)
+void lxc_seccomp_free(struct lxc_seccomp *seccomp)
 {
-       free(conf->seccomp);
-       conf->seccomp = NULL;
+       free_disarm(seccomp->seccomp);
+
 #if HAVE_SCMP_FILTER_CTX
-       if (conf->seccomp_ctx) {
-               seccomp_release(conf->seccomp_ctx);
-               conf->seccomp_ctx = NULL;
+       if (seccomp->seccomp_ctx) {
+               seccomp_release(seccomp->seccomp_ctx);
+               seccomp->seccomp_ctx = NULL;
        }
 #endif
+
+#if HAVE_DECL_SECCOMP_NOTIF_GET_FD
+       close_prot_errno_disarm(seccomp->notifier.notify_fd);
+       close_prot_errno_disarm(seccomp->notifier.proxy_fd);
+       seccomp_notif_free(seccomp->notifier.req_buf, seccomp->notifier.rsp_buf);
+       seccomp->notifier.req_buf = NULL;
+       seccomp->notifier.rsp_buf = NULL;
+#endif
+}
+
+#if HAVE_DECL_SECCOMP_NOTIF_GET_FD
+static int seccomp_notify_reconnect(struct lxc_handler *handler)
+{
+       __do_close_prot_errno int notify_fd = -EBADF;
+
+       close_prot_errno_disarm(handler->conf->seccomp.notifier.proxy_fd);
+
+       notify_fd = lxc_unix_connect(&handler->conf->seccomp.notifier.proxy_addr);
+       if (notify_fd < 0) {
+               SYSERROR("Failed to reconnect to seccomp proxy");
+               return -1;
+       }
+
+       /* 30 second timeout */
+       if (lxc_socket_set_timeout(notify_fd, 30, 30)) {
+               SYSERROR("Failed to set socket timeout");
+               return -1;
+       }
+       handler->conf->seccomp.notifier.proxy_fd = move_fd(notify_fd);
+       return 0;
+}
+#endif
+
+#if HAVE_DECL_SECCOMP_NOTIF_GET_FD
+static int seccomp_notify_default_answer(int fd, struct seccomp_notif *req,
+                                        struct seccomp_notif_resp *resp,
+                                        struct lxc_handler *handler)
+{
+       resp->id = req->id;
+       resp->error = -ENOSYS;
+
+       if (seccomp_notify_respond(fd, resp))
+               SYSERROR("Failed to send default message to seccomp");
+
+       return seccomp_notify_reconnect(handler);
+}
+#endif
+
+int seccomp_notify_handler(int fd, uint32_t events, void *data,
+                          struct lxc_epoll_descr *descr)
+{
+
+#if HAVE_DECL_SECCOMP_NOTIF_GET_FD
+       __do_close_prot_errno int fd_mem = -EBADF;
+       int reconnect_count, ret;
+       ssize_t bytes;
+       char mem_path[6 /* /proc/ */
+                     + INTTYPE_TO_STRLEN(int64_t)
+                     + 3 /* mem */
+                     + 1 /* \0 */];
+       struct lxc_handler *hdlr = data;
+       struct lxc_conf *conf = hdlr->conf;
+       struct seccomp_notif *req = conf->seccomp.notifier.req_buf;
+       struct seccomp_notif_resp *resp = conf->seccomp.notifier.rsp_buf;
+       int listener_proxy_fd = conf->seccomp.notifier.proxy_fd;
+       struct seccomp_notify_proxy_msg msg = {0};
+
+       if (listener_proxy_fd < 0) {
+               ERROR("No seccomp proxy registered");
+               return minus_one_set_errno(EINVAL);
+       }
+
+       ret = seccomp_notify_receive(fd, req);
+       if (ret) {
+               SYSERROR("Failed to read seccomp notification");
+               goto out;
+       }
+
+       snprintf(mem_path, sizeof(mem_path), "/proc/%d/mem", req->pid);
+       fd_mem = open(mem_path, O_RDONLY | O_CLOEXEC);
+       if (fd_mem < 0) {
+               (void)seccomp_notify_default_answer(fd, req, resp, hdlr);
+               SYSERROR("Failed to open process memory for seccomp notify request");
+               goto out;
+       }
+
+       /*
+        * Make sure that the fd for /proc/<pid>/mem we just opened still
+        * refers to the correct process's memory.
+        */
+       ret = seccomp_notify_id_valid(fd, req->id);
+       if (ret < 0) {
+               (void)seccomp_notify_default_answer(fd, req, resp, hdlr);
+               SYSERROR("Invalid seccomp notify request id");
+               goto out;
+       }
+
+       memcpy(&msg.req, req, sizeof(msg.req));
+       msg.monitor_pid = hdlr->monitor_pid;
+       msg.init_pid = hdlr->pid;
+
+       reconnect_count = 0;
+       do {
+               bytes = lxc_unix_send_fds(listener_proxy_fd, &fd_mem, 1, &msg,
+                                         sizeof(msg));
+               if (bytes != (ssize_t)sizeof(msg)) {
+                       SYSERROR("Failed to forward message to seccomp proxy");
+                       if (seccomp_notify_default_answer(fd, req, resp, hdlr))
+                               goto out;
+               }
+       } while (reconnect_count++);
+
+       close_prot_errno_disarm(fd_mem);
+
+       reconnect_count = 0;
+       do {
+               bytes = lxc_recv_nointr(listener_proxy_fd, &msg, sizeof(msg), 0);
+               if (bytes != (ssize_t)sizeof(msg)) {
+                       SYSERROR("Failed to receive message from seccomp proxy");
+                       if (seccomp_notify_default_answer(fd, req, resp, hdlr))
+                               goto out;
+               }
+       } while (reconnect_count++);
+
+       memcpy(resp, &msg.resp, sizeof(*resp));
+       ret = seccomp_notify_respond(fd, resp);
+       if (ret)
+               SYSERROR("Failed to send seccomp notification");
+
+out:
+       return 0;
+#else
+       return -ENOSYS;
+#endif
+}
+
+void seccomp_conf_init(struct lxc_conf *conf)
+{
+       conf->seccomp.seccomp = NULL;
+#if HAVE_SCMP_FILTER_CTX
+       conf->seccomp.allow_nesting = 0;
+       memset(&conf->seccomp.seccomp_ctx, 0, sizeof(conf->seccomp.seccomp_ctx));
+#endif /* HAVE_SCMP_FILTER_CTX */
+#if HAVE_DECL_SECCOMP_NOTIF_GET_FD
+       conf->seccomp.notifier.wants_supervision = false;
+       conf->seccomp.notifier.notify_fd = -EBADF;
+       conf->seccomp.notifier.proxy_fd = -EBADF;
+       memset(&conf->seccomp.notifier.proxy_addr, 0,
+              sizeof(conf->seccomp.notifier.proxy_addr));
+       conf->seccomp.notifier.req_buf = NULL;
+       conf->seccomp.notifier.rsp_buf = NULL;
+#endif
+}
+
+int lxc_seccomp_setup_proxy(struct lxc_seccomp *seccomp,
+                           struct lxc_epoll_descr *descr,
+                           struct lxc_handler *handler)
+{
+#if HAVE_DECL_SECCOMP_NOTIF_GET_FD
+       if (seccomp->notifier.wants_supervision &&
+           seccomp->notifier.proxy_addr.sun_path[1] != '\0') {
+               __do_close_prot_errno int notify_fd = -EBADF;
+               int ret;
+
+               notify_fd = lxc_unix_connect(&seccomp->notifier.proxy_addr);
+               if (notify_fd < 0) {
+                       SYSERROR("Failed to connect to seccomp proxy");
+                       return -1;
+               }
+
+               /* 30 second timeout */
+               ret = lxc_socket_set_timeout(notify_fd, 30, 30);
+               if (ret) {
+                       SYSERROR("Failed to set timeouts for seccomp proxy");
+                       return -1;
+               }
+
+               ret = seccomp_notify_alloc(&seccomp->notifier.req_buf,
+                                         &seccomp->notifier.rsp_buf);
+               if (ret) {
+                       ERROR("Failed to allocate seccomp notify request and response buffers");
+                       errno = ret;
+                       return -1;
+               }
+
+               ret = lxc_mainloop_add_handler(descr,
+                                              seccomp->notifier.notify_fd,
+                                              seccomp_notify_handler, handler);
+               if (ret < 0) {
+                       ERROR("Failed to add seccomp notify handler for %d to mainloop",
+                             notify_fd);
+                       return -1;
+               }
+
+               seccomp->notifier.proxy_fd = move_fd(notify_fd);
+       }
+#endif
+       return 0;
+}
+
+int lxc_seccomp_send_notifier_fd(struct lxc_seccomp *seccomp, int socket_fd)
+{
+#if HAVE_DECL_SECCOMP_NOTIF_GET_FD
+       if (seccomp->notifier.wants_supervision) {
+               if (lxc_abstract_unix_send_fds(socket_fd,
+                                              &seccomp->notifier.notify_fd, 1,
+                                              NULL, 0) < 0)
+                       return -1;
+               close_prot_errno_disarm(seccomp->notifier.notify_fd);
+       }
+#endif
+       return 0;
+}
+
+int lxc_seccomp_recv_notifier_fd(struct lxc_seccomp *seccomp, int socket_fd)
+{
+#if HAVE_DECL_SECCOMP_NOTIF_GET_FD
+       if (seccomp->notifier.wants_supervision) {
+               int ret;
+
+               ret = lxc_abstract_unix_recv_fds(socket_fd,
+                                                &seccomp->notifier.notify_fd,
+                                                1, NULL, 0);
+               if (ret < 0)
+                       return -1;
+       }
+#endif
+       return 0;
+}
+
+int lxc_seccomp_add_notifier(const char *name, const char *lxcpath,
+                            struct lxc_seccomp *seccomp)
+{
+
+#if HAVE_DECL_SECCOMP_NOTIF_GET_FD
+       if (seccomp->notifier.wants_supervision) {
+               int ret;
+
+               ret = lxc_cmd_seccomp_notify_add_listener(name, lxcpath,
+                                                         seccomp->notifier.notify_fd,
+                                                         -1, 0);
+               close_prot_errno_disarm(seccomp->notifier.notify_fd);
+               if (ret < 0)
+                       return -1;
+       }
+#endif
+       return 0;
 }