]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blobdiff - kernel/seccomp.c
seccomp: Add filter flag to opt-out of SSB mitigation
[mirror_ubuntu-artful-kernel.git] / kernel / seccomp.c
index 98b59b5db90baae53b091afe807c133e6027d081..bb1cc737456eaf86014d2e9f395b188d75cba3bb 100644 (file)
 #include <linux/audit.h>
 #include <linux/compat.h>
 #include <linux/coredump.h>
+#include <linux/kmemleak.h>
+#include <linux/nospec.h>
+#include <linux/prctl.h>
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
 #include <linux/seccomp.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
+#include <linux/sysctl.h>
 
 #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
 #include <asm/syscall.h>
@@ -42,6 +46,7 @@
  *         get/put helpers should be used when accessing an instance
  *         outside of a lifetime-guarded section.  In general, this
  *         is only needed for handling filters shared across tasks.
+ * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged
  * @prev: points to a previously installed, or inherited, filter
  * @prog: the BPF program to evaluate
  *
@@ -57,6 +62,7 @@
  */
 struct seccomp_filter {
        refcount_t usage;
+       bool log;
        struct seccomp_filter *prev;
        struct bpf_prog *prog;
 };
@@ -171,16 +177,20 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
 /**
  * seccomp_run_filters - evaluates all seccomp filters against @sd
  * @sd: optional seccomp data to be passed to filters
+ * @match: stores struct seccomp_filter that resulted in the return value,
+ *         unless filter returned SECCOMP_RET_ALLOW, in which case it will
+ *         be unchanged.
  *
  * Returns valid seccomp BPF response codes.
  */
-static u32 seccomp_run_filters(const struct seccomp_data *sd)
+static u32 seccomp_run_filters(const struct seccomp_data *sd,
+                              struct seccomp_filter **match)
 {
        struct seccomp_data sd_local;
        u32 ret = SECCOMP_RET_ALLOW;
        /* Make sure cross-thread synced filter points somewhere sane. */
        struct seccomp_filter *f =
-                       lockless_dereference(current->seccomp.filter);
+                       READ_ONCE(current->seccomp.filter);
 
        /* Ensure unexpected behavior doesn't result in failing open. */
        if (unlikely(WARN_ON(f == NULL)))
@@ -198,8 +208,10 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd)
        for (; f; f = f->prev) {
                u32 cur_ret = BPF_PROG_RUN(f->prog, sd);
 
-               if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
+               if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) {
                        ret = cur_ret;
+                       *match = f;
+               }
        }
        return ret;
 }
@@ -215,8 +227,22 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
        return true;
 }
 
+/*
+ * If a given speculation mitigation is opt-in (prctl()-controlled),
+ * select it, by disabling speculation (enabling mitigation).
+ */
+static inline void spec_mitigate(struct task_struct *task,
+                                unsigned long which)
+{
+       int state = arch_prctl_spec_ctrl_get(task, which);
+
+       if (state > 0 && (state & PR_SPEC_PRCTL))
+               arch_prctl_spec_ctrl_set(task, which, PR_SPEC_FORCE_DISABLE);
+}
+
 static inline void seccomp_assign_mode(struct task_struct *task,
-                                      unsigned long seccomp_mode)
+                                      unsigned long seccomp_mode,
+                                      unsigned long flags)
 {
        assert_spin_locked(&task->sighand->siglock);
 
@@ -226,6 +252,9 @@ static inline void seccomp_assign_mode(struct task_struct *task,
         * filter) is set.
         */
        smp_mb__before_atomic();
+       /* Assume default seccomp processes want spec flaw mitigation. */
+       if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0)
+               spec_mitigate(task, PR_SPEC_STORE_BYPASS);
        set_tsk_thread_flag(task, TIF_SECCOMP);
 }
 
@@ -293,7 +322,7 @@ static inline pid_t seccomp_can_sync_threads(void)
  * without dropping the locks.
  *
  */
-static inline void seccomp_sync_threads(void)
+static inline void seccomp_sync_threads(unsigned long flags)
 {
        struct task_struct *thread, *caller;
 
@@ -334,7 +363,8 @@ static inline void seccomp_sync_threads(void)
                 * allow one thread to transition the other.
                 */
                if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
-                       seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
+                       seccomp_assign_mode(thread, SECCOMP_MODE_FILTER,
+                                           flags);
        }
 }
 
@@ -444,6 +474,10 @@ static long seccomp_attach_filter(unsigned int flags,
                        return ret;
        }
 
+       /* Set log flag, if present. */
+       if (flags & SECCOMP_FILTER_FLAG_LOG)
+               filter->log = true;
+
        /*
         * If there is an existing filter, make it the prev and don't drop its
         * task reference.
@@ -453,19 +487,24 @@ static long seccomp_attach_filter(unsigned int flags,
 
        /* Now that the new filter is in place, synchronize to all threads. */
        if (flags & SECCOMP_FILTER_FLAG_TSYNC)
-               seccomp_sync_threads();
+               seccomp_sync_threads(flags);
 
        return 0;
 }
 
+void __get_seccomp_filter(struct seccomp_filter *filter)
+{
+       /* Reference count is bounded by the number of total processes. */
+       refcount_inc(&filter->usage);
+}
+
 /* get_seccomp_filter - increments the reference count of the filter on @tsk */
 void get_seccomp_filter(struct task_struct *tsk)
 {
        struct seccomp_filter *orig = tsk->seccomp.filter;
        if (!orig)
                return;
-       /* Reference count is bounded by the number of total processes. */
-       refcount_inc(&orig->usage);
+       __get_seccomp_filter(orig);
 }
 
 static inline void seccomp_filter_free(struct seccomp_filter *filter)
@@ -476,10 +515,8 @@ static inline void seccomp_filter_free(struct seccomp_filter *filter)
        }
 }
 
-/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
-void put_seccomp_filter(struct task_struct *tsk)
+static void __put_seccomp_filter(struct seccomp_filter *orig)
 {
-       struct seccomp_filter *orig = tsk->seccomp.filter;
        /* Clean up single-reference branches iteratively. */
        while (orig && refcount_dec_and_test(&orig->usage)) {
                struct seccomp_filter *freeme = orig;
@@ -488,6 +525,12 @@ void put_seccomp_filter(struct task_struct *tsk)
        }
 }
 
+/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
+void put_seccomp_filter(struct task_struct *tsk)
+{
+       __put_seccomp_filter(tsk->seccomp.filter);
+}
+
 static void seccomp_init_siginfo(siginfo_t *info, int syscall, int reason)
 {
        memset(info, 0, sizeof(*info));
@@ -514,6 +557,58 @@ static void seccomp_send_sigsys(int syscall, int reason)
 }
 #endif /* CONFIG_SECCOMP_FILTER */
 
+/* For use with seccomp_actions_logged */
+#define SECCOMP_LOG_KILL               (1 << 0)
+#define SECCOMP_LOG_TRAP               (1 << 2)
+#define SECCOMP_LOG_ERRNO              (1 << 3)
+#define SECCOMP_LOG_TRACE              (1 << 4)
+#define SECCOMP_LOG_LOG                        (1 << 5)
+#define SECCOMP_LOG_ALLOW              (1 << 6)
+
+static u32 seccomp_actions_logged = SECCOMP_LOG_KILL  | SECCOMP_LOG_TRAP  |
+                                   SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE |
+                                   SECCOMP_LOG_LOG;
+
+static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
+                              bool requested)
+{
+       bool log = false;
+
+       switch (action) {
+       case SECCOMP_RET_ALLOW:
+               break;
+       case SECCOMP_RET_TRAP:
+               log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP;
+               break;
+       case SECCOMP_RET_ERRNO:
+               log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO;
+               break;
+       case SECCOMP_RET_TRACE:
+               log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE;
+               break;
+       case SECCOMP_RET_LOG:
+               log = seccomp_actions_logged & SECCOMP_LOG_LOG;
+               break;
+       case SECCOMP_RET_KILL:
+       default:
+               log = seccomp_actions_logged & SECCOMP_LOG_KILL;
+       }
+
+       /*
+        * Force an audit message to be emitted when the action is RET_KILL,
+        * RET_LOG, or the FILTER_FLAG_LOG bit was set and the action is
+        * allowed to be logged by the admin.
+        */
+       if (log)
+               return __audit_seccomp(syscall, signr, action);
+
+       /*
+        * Let the audit subsystem decide if the action should be audited based
+        * on whether the current task itself is being audited.
+        */
+       return audit_seccomp(syscall, signr, action);
+}
+
 /*
  * Secure computing mode 1 allows only read/write/exit/sigreturn.
  * To be fully secure this must be combined with rlimit
@@ -539,7 +634,7 @@ static void __secure_computing_strict(int this_syscall)
 #ifdef SECCOMP_DEBUG
        dump_stack();
 #endif
-       audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL);
+       seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL, true);
        do_exit(SIGKILL);
 }
 
@@ -566,6 +661,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
                            const bool recheck_after_trace)
 {
        u32 filter_ret, action;
+       struct seccomp_filter *match = NULL;
        int data;
 
        /*
@@ -574,7 +670,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
         */
        rmb();
 
-       filter_ret = seccomp_run_filters(sd);
+       filter_ret = seccomp_run_filters(sd, &match);
        data = filter_ret & SECCOMP_RET_DATA;
        action = filter_ret & SECCOMP_RET_ACTION;
 
@@ -637,12 +733,21 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 
                return 0;
 
+       case SECCOMP_RET_LOG:
+               seccomp_log(this_syscall, 0, action, true);
+               return 0;
+
        case SECCOMP_RET_ALLOW:
+               /*
+                * Note that the "match" filter will always be NULL for
+                * this action since SECCOMP_RET_ALLOW is the starting
+                * state in seccomp_run_filters().
+                */
                return 0;
 
        case SECCOMP_RET_KILL:
        default:
-               audit_seccomp(this_syscall, SIGSYS, action);
+               seccomp_log(this_syscall, SIGSYS, action, true);
                /* Dump core only if this is the last remaining thread. */
                if (get_nr_threads(current) == 1) {
                        siginfo_t info;
@@ -659,7 +764,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
        unreachable();
 
 skip:
-       audit_seccomp(this_syscall, 0, action);
+       seccomp_log(this_syscall, 0, action, match ? match->log : false);
        return -1;
 }
 #else
@@ -719,7 +824,7 @@ static long seccomp_set_mode_strict(void)
 #ifdef TIF_NOTSC
        disable_TSC();
 #endif
-       seccomp_assign_mode(current, seccomp_mode);
+       seccomp_assign_mode(current, seccomp_mode, 0);
        ret = 0;
 
 out:
@@ -777,7 +882,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
        /* Do not free the successfully attached filter. */
        prepared = NULL;
 
-       seccomp_assign_mode(current, seccomp_mode);
+       seccomp_assign_mode(current, seccomp_mode, flags);
 out:
        spin_unlock_irq(&current->sighand->siglock);
        if (flags & SECCOMP_FILTER_FLAG_TSYNC)
@@ -794,6 +899,28 @@ static inline long seccomp_set_mode_filter(unsigned int flags,
 }
 #endif
 
+static long seccomp_get_action_avail(const char __user *uaction)
+{
+       u32 action;
+
+       if (copy_from_user(&action, uaction, sizeof(action)))
+               return -EFAULT;
+
+       switch (action) {
+       case SECCOMP_RET_KILL:
+       case SECCOMP_RET_TRAP:
+       case SECCOMP_RET_ERRNO:
+       case SECCOMP_RET_TRACE:
+       case SECCOMP_RET_LOG:
+       case SECCOMP_RET_ALLOW:
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
 /* Common entry point for both prctl and syscall. */
 static long do_seccomp(unsigned int op, unsigned int flags,
                       const char __user *uargs)
@@ -805,6 +932,11 @@ static long do_seccomp(unsigned int op, unsigned int flags,
                return seccomp_set_mode_strict();
        case SECCOMP_SET_MODE_FILTER:
                return seccomp_set_mode_filter(flags, uargs);
+       case SECCOMP_GET_ACTION_AVAIL:
+               if (flags != 0)
+                       return -EINVAL;
+
+               return seccomp_get_action_avail(uargs);
        default:
                return -EINVAL;
        }
@@ -908,13 +1040,13 @@ long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
        if (!data)
                goto out;
 
-       get_seccomp_filter(task);
+       __get_seccomp_filter(filter);
        spin_unlock_irq(&task->sighand->siglock);
 
        if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog)))
                ret = -EFAULT;
 
-       put_seccomp_filter(task);
+       __put_seccomp_filter(filter);
        return ret;
 
 out:
@@ -922,3 +1054,181 @@ out:
        return ret;
 }
 #endif
+
+#ifdef CONFIG_SYSCTL
+
+/* Human readable action names for friendly sysctl interaction */
+#define SECCOMP_RET_KILL_NAME          "kill"
+#define SECCOMP_RET_TRAP_NAME          "trap"
+#define SECCOMP_RET_ERRNO_NAME         "errno"
+#define SECCOMP_RET_TRACE_NAME         "trace"
+#define SECCOMP_RET_LOG_NAME           "log"
+#define SECCOMP_RET_ALLOW_NAME         "allow"
+
+static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_NAME      " "
+                                           SECCOMP_RET_TRAP_NAME       " "
+                                           SECCOMP_RET_ERRNO_NAME      " "
+                                           SECCOMP_RET_TRACE_NAME      " "
+                                           SECCOMP_RET_LOG_NAME        " "
+                                           SECCOMP_RET_ALLOW_NAME;
+
+struct seccomp_log_name {
+       u32             log;
+       const char      *name;
+};
+
+static const struct seccomp_log_name seccomp_log_names[] = {
+       { SECCOMP_LOG_KILL, SECCOMP_RET_KILL_NAME },
+       { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
+       { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
+       { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
+       { SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME },
+       { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME },
+       { }
+};
+
+static bool seccomp_names_from_actions_logged(char *names, size_t size,
+                                             u32 actions_logged)
+{
+       const struct seccomp_log_name *cur;
+       bool append_space = false;
+
+       for (cur = seccomp_log_names; cur->name && size; cur++) {
+               ssize_t ret;
+
+               if (!(actions_logged & cur->log))
+                       continue;
+
+               if (append_space) {
+                       ret = strscpy(names, " ", size);
+                       if (ret < 0)
+                               return false;
+
+                       names += ret;
+                       size -= ret;
+               } else
+                       append_space = true;
+
+               ret = strscpy(names, cur->name, size);
+               if (ret < 0)
+                       return false;
+
+               names += ret;
+               size -= ret;
+       }
+
+       return true;
+}
+
+static bool seccomp_action_logged_from_name(u32 *action_logged,
+                                           const char *name)
+{
+       const struct seccomp_log_name *cur;
+
+       for (cur = seccomp_log_names; cur->name; cur++) {
+               if (!strcmp(cur->name, name)) {
+                       *action_logged = cur->log;
+                       return true;
+               }
+       }
+
+       return false;
+}
+
+static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names)
+{
+       char *name;
+
+       *actions_logged = 0;
+       while ((name = strsep(&names, " ")) && *name) {
+               u32 action_logged = 0;
+
+               if (!seccomp_action_logged_from_name(&action_logged, name))
+                       return false;
+
+               *actions_logged |= action_logged;
+       }
+
+       return true;
+}
+
+static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
+                                         void __user *buffer, size_t *lenp,
+                                         loff_t *ppos)
+{
+       char names[sizeof(seccomp_actions_avail)];
+       struct ctl_table table;
+       int ret;
+
+       if (write && !capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       memset(names, 0, sizeof(names));
+
+       if (!write) {
+               if (!seccomp_names_from_actions_logged(names, sizeof(names),
+                                                      seccomp_actions_logged))
+                       return -EINVAL;
+       }
+
+       table = *ro_table;
+       table.data = names;
+       table.maxlen = sizeof(names);
+       ret = proc_dostring(&table, write, buffer, lenp, ppos);
+       if (ret)
+               return ret;
+
+       if (write) {
+               u32 actions_logged;
+
+               if (!seccomp_actions_logged_from_names(&actions_logged,
+                                                      table.data))
+                       return -EINVAL;
+
+               if (actions_logged & SECCOMP_LOG_ALLOW)
+                       return -EINVAL;
+
+               seccomp_actions_logged = actions_logged;
+       }
+
+       return 0;
+}
+
+static struct ctl_path seccomp_sysctl_path[] = {
+       { .procname = "kernel", },
+       { .procname = "seccomp", },
+       { }
+};
+
+static struct ctl_table seccomp_sysctl_table[] = {
+       {
+               .procname       = "actions_avail",
+               .data           = (void *) &seccomp_actions_avail,
+               .maxlen         = sizeof(seccomp_actions_avail),
+               .mode           = 0444,
+               .proc_handler   = proc_dostring,
+       },
+       {
+               .procname       = "actions_logged",
+               .mode           = 0644,
+               .proc_handler   = seccomp_actions_logged_handler,
+       },
+       { }
+};
+
+static int __init seccomp_sysctl_init(void)
+{
+       struct ctl_table_header *hdr;
+
+       hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table);
+       if (!hdr)
+               pr_warn("seccomp: sysctl registration failed\n");
+       else
+               kmemleak_not_leak(hdr);
+
+       return 0;
+}
+
+device_initcall(seccomp_sysctl_init)
+
+#endif /* CONFIG_SYSCTL */