]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/commitdiff
Merge tag 'seccomp-3.17' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux...
authorJames Morris <james.l.morris@oracle.com>
Sat, 19 Jul 2014 07:40:49 +0000 (17:40 +1000)
committerJames Morris <james.l.morris@oracle.com>
Sat, 19 Jul 2014 07:40:49 +0000 (17:40 +1000)
22 files changed:
MAINTAINERS
arch/Kconfig
arch/arm/include/uapi/asm/unistd.h
arch/arm/kernel/calls.S
arch/mips/include/uapi/asm/unistd.h
arch/mips/kernel/scall32-o32.S
arch/mips/kernel/scall64-64.S
arch/mips/kernel/scall64-n32.S
arch/mips/kernel/scall64-o32.S
arch/x86/syscalls/syscall_32.tbl
arch/x86/syscalls/syscall_64.tbl
fs/exec.c
include/linux/sched.h
include/linux/seccomp.h
include/linux/syscalls.h
include/uapi/asm-generic/unistd.h
include/uapi/linux/seccomp.h
kernel/fork.c
kernel/seccomp.c
kernel/sys.c
kernel/sys_ni.c
security/apparmor/domain.c

index e31c87474739329ec0b2458ad26a40f7251f3874..55762cba851644205f18e535499ee88efbcab0a8 100644 (file)
@@ -7953,6 +7953,16 @@ S:       Maintained
 F:     drivers/mmc/host/sdhci.*
 F:     drivers/mmc/host/sdhci-pltfm.[ch]
 
+SECURE COMPUTING
+M:     Kees Cook <keescook@chromium.org>
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git seccomp
+S:     Supported
+F:     kernel/seccomp.c
+F:     include/uapi/linux/seccomp.h
+F:     include/linux/seccomp.h
+K:     \bsecure_computing
+K:     \bTIF_SECCOMP\b
+
 SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF)
 M:     Anton Vorontsov <anton@enomsg.org>
 L:     linuxppc-dev@lists.ozlabs.org
index 97ff872c7accf99b3779314718fe31fd60ee1d42..0eae9df35b884190f7f988702c64158b9a66cda0 100644 (file)
@@ -321,6 +321,7 @@ config HAVE_ARCH_SECCOMP_FILTER
          - secure_computing is called from a ptrace_event()-safe context
          - secure_computing return value is checked and a return value of -1
            results in the system call being skipped immediately.
+         - seccomp syscall wired up
 
 config SECCOMP_FILTER
        def_bool y
index ba94446c72d9127633de59545a3691390ecdfc5d..e21b4a069701345c3663add69307ca161e41399c 100644 (file)
 #define __NR_sched_setattr             (__NR_SYSCALL_BASE+380)
 #define __NR_sched_getattr             (__NR_SYSCALL_BASE+381)
 #define __NR_renameat2                 (__NR_SYSCALL_BASE+382)
+#define __NR_seccomp                   (__NR_SYSCALL_BASE+383)
 
 /*
  * This may need to be greater than __NR_last_syscall+1 in order to
index 8f51bdcdacbbf6675933f38fb595adbdc825f4c2..bea85f97f3633b48eb307a154910b563f40187c6 100644 (file)
 /* 380 */      CALL(sys_sched_setattr)
                CALL(sys_sched_getattr)
                CALL(sys_renameat2)
+               CALL(sys_seccomp)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
index 5805414777e0c66abd878a3b798c56809e0e57c5..9bc13eaf9d6776e8e9e2dd2dbde8401784dfb45c 100644 (file)
 #define __NR_sched_setattr             (__NR_Linux + 349)
 #define __NR_sched_getattr             (__NR_Linux + 350)
 #define __NR_renameat2                 (__NR_Linux + 351)
+#define __NR_seccomp                   (__NR_Linux + 352)
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls            351
+#define __NR_Linux_syscalls            352
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux                 4000
-#define __NR_O32_Linux_syscalls                351
+#define __NR_O32_Linux_syscalls                352
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
 #define __NR_sched_setattr             (__NR_Linux + 309)
 #define __NR_sched_getattr             (__NR_Linux + 310)
 #define __NR_renameat2                 (__NR_Linux + 311)
+#define __NR_seccomp                   (__NR_Linux + 312)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls            311
+#define __NR_Linux_syscalls            312
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux                  5000
-#define __NR_64_Linux_syscalls         311
+#define __NR_64_Linux_syscalls         312
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
 #define __NR_sched_setattr             (__NR_Linux + 313)
 #define __NR_sched_getattr             (__NR_Linux + 314)
 #define __NR_renameat2                 (__NR_Linux + 315)
+#define __NR_seccomp                   (__NR_Linux + 316)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls            315
+#define __NR_Linux_syscalls            316
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux                 6000
-#define __NR_N32_Linux_syscalls                315
+#define __NR_N32_Linux_syscalls                316
 
 #endif /* _UAPI_ASM_UNISTD_H */
index 3245474f19d5cdec5563a3841ecdcd3760f98e17..ab02d14f1b5cd65e3689f58c4ee79facccfbc92b 100644 (file)
@@ -578,3 +578,4 @@ EXPORT(sys_call_table)
        PTR     sys_sched_setattr
        PTR     sys_sched_getattr               /* 4350 */
        PTR     sys_renameat2
+       PTR     sys_seccomp
index be2fedd4ae33193937010b376e62c7d9e327022d..010dccf128ec25bd192cc6195d3ccc5b72ee305b 100644 (file)
@@ -431,4 +431,5 @@ EXPORT(sys_call_table)
        PTR     sys_sched_setattr
        PTR     sys_sched_getattr               /* 5310 */
        PTR     sys_renameat2
+       PTR     sys_seccomp
        .size   sys_call_table,.-sys_call_table
index c1dbcda4b816844cc64821aa8d64a79d5c9a38f3..c3b3b6525df567cbb6276e24cfd905d7a1cc256b 100644 (file)
@@ -424,4 +424,5 @@ EXPORT(sysn32_call_table)
        PTR     sys_sched_setattr
        PTR     sys_sched_getattr
        PTR     sys_renameat2                   /* 6315 */
+       PTR     sys_seccomp
        .size   sysn32_call_table,.-sysn32_call_table
index f1343ccd7ed7e58d14563c4413ede6517ff60853..bb1550b1f5012bba69c470ba7e57dd1f9bea03e0 100644 (file)
@@ -557,4 +557,5 @@ EXPORT(sys32_call_table)
        PTR     sys_sched_setattr
        PTR     sys_sched_getattr               /* 4350 */
        PTR     sys_renameat2
+       PTR     sys_seccomp
        .size   sys32_call_table,.-sys32_call_table
index d6b867921612f69f8800fbf309652ade49170a89..7527eac241228ea9fe436cd763da67c0f12ae448 100644 (file)
 351    i386    sched_setattr           sys_sched_setattr
 352    i386    sched_getattr           sys_sched_getattr
 353    i386    renameat2               sys_renameat2
+354    i386    seccomp                 sys_seccomp
index ec255a1646d2da7629c79cd2e0014d341c3b6259..16272a6c12b740c85efaf762382d7b925b69e373 100644 (file)
 314    common  sched_setattr           sys_sched_setattr
 315    common  sched_getattr           sys_sched_getattr
 316    common  renameat2               sys_renameat2
+317    common  seccomp                 sys_seccomp
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
index a3d33fe592d6d95619506b6f4aac33621284d3f8..ab1f1200ce5d8b45534e0a3e0da47045116dec4e 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1216,7 +1216,7 @@ EXPORT_SYMBOL(install_exec_creds);
 /*
  * determine how safe it is to execute the proposed program
  * - the caller must hold ->cred_guard_mutex to protect against
- *   PTRACE_ATTACH
+ *   PTRACE_ATTACH or seccomp thread-sync
  */
 static void check_unsafe_exec(struct linux_binprm *bprm)
 {
@@ -1234,7 +1234,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
         * This isn't strictly necessary, but it makes it harder for LSMs to
         * mess up.
         */
-       if (current->no_new_privs)
+       if (task_no_new_privs(current))
                bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
 
        t = p;
@@ -1272,7 +1272,7 @@ int prepare_binprm(struct linux_binprm *bprm)
        bprm->cred->egid = current_egid();
 
        if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
-           !current->no_new_privs &&
+           !task_no_new_privs(current) &&
            kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
            kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
                /* Set-uid? */
index 306f4f0c987a006f43f520413f7de3a780f98a23..0fd19055bb6450119467837f18924c2c848eafb1 100644 (file)
@@ -1307,13 +1307,12 @@ struct task_struct {
                                 * execve */
        unsigned in_iowait:1;
 
-       /* task may not gain privileges */
-       unsigned no_new_privs:1;
-
        /* Revert to default priority/policy when forking */
        unsigned sched_reset_on_fork:1;
        unsigned sched_contributes_to_load:1;
 
+       unsigned long atomic_flags; /* Flags needing atomic access. */
+
        pid_t pid;
        pid_t tgid;
 
@@ -1967,6 +1966,19 @@ static inline void memalloc_noio_restore(unsigned int flags)
        current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
 }
 
+/* Per-process atomic flags. */
+#define PFA_NO_NEW_PRIVS 0x00000001    /* May not gain new privileges. */
+
+static inline bool task_no_new_privs(struct task_struct *p)
+{
+       return test_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
+}
+
+static inline void task_set_no_new_privs(struct task_struct *p)
+{
+       set_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
+}
+
 /*
  * task->jobctl flags
  */
index 4054b0994071cada2d95bdbe0964a3b22cfe2590..5d586a45a319f20cc1c91412428e81beb0517e60 100644 (file)
@@ -3,6 +3,8 @@
 
 #include <uapi/linux/seccomp.h>
 
+#define SECCOMP_FILTER_FLAG_MASK       (SECCOMP_FILTER_FLAG_TSYNC)
+
 #ifdef CONFIG_SECCOMP
 
 #include <linux/thread_info.h>
@@ -14,11 +16,11 @@ struct seccomp_filter;
  *
  * @mode:  indicates one of the valid values above for controlled
  *         system calls available to a process.
- * @filter: The metadata and ruleset for determining what system calls
- *          are allowed for a task.
+ * @filter: must always point to a valid seccomp-filter or NULL as it is
+ *          accessed without locking during system call entry.
  *
  *          @filter must only be accessed from the context of current as there
- *          is no locking.
+ *          is no read locking.
  */
 struct seccomp {
        int mode;
index b0881a0ed322a98253b5a14503002f1bd50a772f..1713977ee26f8115c2800bd2e5ddae1473a6d057 100644 (file)
@@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
                         unsigned long idx1, unsigned long idx2);
 asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
+asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
+                           const char __user *uargs);
 #endif
index 3336406080874bf2bd06cb1f0563aea6d8e56650..65acbf0e2867f1f1d3e316af86c70a89a89a7955 100644 (file)
@@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr)
 __SYSCALL(__NR_sched_getattr, sys_sched_getattr)
 #define __NR_renameat2 276
 __SYSCALL(__NR_renameat2, sys_renameat2)
+#define __NR_seccomp 277
+__SYSCALL(__NR_seccomp, sys_seccomp)
 
 #undef __NR_syscalls
-#define __NR_syscalls 277
+#define __NR_syscalls 278
 
 /*
  * All syscalls below here should go away really,
index ac2dc9f7297367c0529ed70e7ed02a8be8f4c27f..0f238a43ff1e7e5ebd8bd746a055fa910c10b51d 100644 (file)
 #define SECCOMP_MODE_STRICT    1 /* uses hard-coded filter. */
 #define SECCOMP_MODE_FILTER    2 /* uses user-supplied filter. */
 
+/* Valid operations for seccomp syscall. */
+#define SECCOMP_SET_MODE_STRICT        0
+#define SECCOMP_SET_MODE_FILTER        1
+
+/* Valid flags for SECCOMP_SET_MODE_FILTER */
+#define SECCOMP_FILTER_FLAG_TSYNC      1
+
 /*
  * All BPF programs must return a 32-bit value.
  * The bottom 16-bits are for optional return data.
index 6a13c46cd87dbe72bc830bf109256fa458f22ad1..ed4bc339c9dcba0b4d3cb03a6ddef585a64a5a30 100644 (file)
@@ -315,6 +315,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
                goto free_ti;
 
        tsk->stack = ti;
+#ifdef CONFIG_SECCOMP
+       /*
+        * We must handle setting up seccomp filters once we're under
+        * the sighand lock in case orig has changed between now and
+        * then. Until then, filter must be NULL to avoid messing up
+        * the usage counts on the error path calling free_task.
+        */
+       tsk->seccomp.filter = NULL;
+#endif
 
        setup_thread_stack(tsk, orig);
        clear_user_return_notifier(tsk);
@@ -1081,6 +1090,39 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        return 0;
 }
 
+static void copy_seccomp(struct task_struct *p)
+{
+#ifdef CONFIG_SECCOMP
+       /*
+        * Must be called with sighand->lock held, which is common to
+        * all threads in the group. Holding cred_guard_mutex is not
+        * needed because this new task is not yet running and cannot
+        * be racing exec.
+        */
+       BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
+       /* Ref-count the new filter user, and assign it. */
+       get_seccomp_filter(current);
+       p->seccomp = current->seccomp;
+
+       /*
+        * Explicitly enable no_new_privs here in case it got set
+        * between the task_struct being duplicated and holding the
+        * sighand lock. The seccomp state and nnp must be in sync.
+        */
+       if (task_no_new_privs(current))
+               task_set_no_new_privs(p);
+
+       /*
+        * If the parent gained a seccomp mode after copying thread
+        * flags and between before we held the sighand lock, we have
+        * to manually enable the seccomp thread flag here.
+        */
+       if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
+               set_tsk_thread_flag(p, TIF_SECCOMP);
+#endif
+}
+
 SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
 {
        current->clear_child_tid = tidptr;
@@ -1196,7 +1238,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                goto fork_out;
 
        ftrace_graph_init_task(p);
-       get_seccomp_filter(p);
 
        rt_mutex_init_task(p);
 
@@ -1436,6 +1477,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
        spin_lock(&current->sighand->siglock);
 
+       /*
+        * Copy seccomp details explicitly here, in case they were changed
+        * before holding sighand lock.
+        */
+       copy_seccomp(p);
+
        /*
         * Process group and session signals need to be delivered to just the
         * parent before the fork or both the parent and the child after the
index 301bbc24739c9ff8fd58fe95149ad242da8e50b7..74f4601791715257acdb1299a67d5345f5a8eb9e 100644 (file)
 #include <linux/compat.h>
 #include <linux/sched.h>
 #include <linux/seccomp.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
 
 /* #define SECCOMP_DEBUG 1 */
 
 #ifdef CONFIG_SECCOMP_FILTER
 #include <asm/syscall.h>
 #include <linux/filter.h>
+#include <linux/pid.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
-#include <linux/slab.h>
 #include <linux/tracehook.h>
 #include <linux/uaccess.h>
 
@@ -172,21 +174,24 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
  */
 static u32 seccomp_run_filters(int syscall)
 {
-       struct seccomp_filter *f;
+       struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
        struct seccomp_data sd;
        u32 ret = SECCOMP_RET_ALLOW;
 
        /* Ensure unexpected behavior doesn't result in failing open. */
-       if (WARN_ON(current->seccomp.filter == NULL))
+       if (unlikely(WARN_ON(f == NULL)))
                return SECCOMP_RET_KILL;
 
+       /* Make sure cross-thread synced filter points somewhere sane. */
+       smp_read_barrier_depends();
+
        populate_seccomp_data(&sd);
 
        /*
         * All filters in the list are evaluated and the lowest BPF return
         * value always takes priority (ignoring the DATA).
         */
-       for (f = current->seccomp.filter; f; f = f->prev) {
+       for (; f; f = f->prev) {
                u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd);
 
                if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
@@ -194,29 +199,159 @@ static u32 seccomp_run_filters(int syscall)
        }
        return ret;
 }
+#endif /* CONFIG_SECCOMP_FILTER */
+
+static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
+{
+       BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
+       if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
+               return false;
+
+       return true;
+}
+
+static inline void seccomp_assign_mode(struct task_struct *task,
+                                      unsigned long seccomp_mode)
+{
+       BUG_ON(!spin_is_locked(&task->sighand->siglock));
+
+       task->seccomp.mode = seccomp_mode;
+       /*
+        * Make sure TIF_SECCOMP cannot be set before the mode (and
+        * filter) is set.
+        */
+       smp_mb__before_atomic();
+       set_tsk_thread_flag(task, TIF_SECCOMP);
+}
+
+#ifdef CONFIG_SECCOMP_FILTER
+/* Returns 1 if the parent is an ancestor of the child. */
+static int is_ancestor(struct seccomp_filter *parent,
+                      struct seccomp_filter *child)
+{
+       /* NULL is the root ancestor. */
+       if (parent == NULL)
+               return 1;
+       for (; child; child = child->prev)
+               if (child == parent)
+                       return 1;
+       return 0;
+}
 
 /**
- * seccomp_attach_filter: Attaches a seccomp filter to current.
+ * seccomp_can_sync_threads: checks if all threads can be synchronized
+ *
+ * Expects sighand and cred_guard_mutex locks to be held.
+ *
+ * Returns 0 on success, -ve on error, or the pid of a thread which was
+ * either not in the correct seccomp mode or it did not have an ancestral
+ * seccomp filter.
+ */
+static inline pid_t seccomp_can_sync_threads(void)
+{
+       struct task_struct *thread, *caller;
+
+       BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
+       BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
+       /* Validate all threads being eligible for synchronization. */
+       caller = current;
+       for_each_thread(caller, thread) {
+               pid_t failed;
+
+               /* Skip current, since it is initiating the sync. */
+               if (thread == caller)
+                       continue;
+
+               if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
+                   (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
+                    is_ancestor(thread->seccomp.filter,
+                                caller->seccomp.filter)))
+                       continue;
+
+               /* Return the first thread that cannot be synchronized. */
+               failed = task_pid_vnr(thread);
+               /* If the pid cannot be resolved, then return -ESRCH */
+               if (unlikely(WARN_ON(failed == 0)))
+                       failed = -ESRCH;
+               return failed;
+       }
+
+       return 0;
+}
+
+/**
+ * seccomp_sync_threads: sets all threads to use current's filter
+ *
+ * Expects sighand and cred_guard_mutex locks to be held, and for
+ * seccomp_can_sync_threads() to have returned success already
+ * without dropping the locks.
+ *
+ */
+static inline void seccomp_sync_threads(void)
+{
+       struct task_struct *thread, *caller;
+
+       BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
+       BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
+       /* Synchronize all threads. */
+       caller = current;
+       for_each_thread(caller, thread) {
+               /* Skip current, since it needs no changes. */
+               if (thread == caller)
+                       continue;
+
+               /* Get a task reference for the new leaf node. */
+               get_seccomp_filter(caller);
+               /*
+                * Drop the task reference to the shared ancestor since
+                * current's path will hold a reference.  (This also
+                * allows a put before the assignment.)
+                */
+               put_seccomp_filter(thread);
+               smp_store_release(&thread->seccomp.filter,
+                                 caller->seccomp.filter);
+               /*
+                * Opt the other thread into seccomp if needed.
+                * As threads are considered to be trust-realm
+                * equivalent (see ptrace_may_access), it is safe to
+                * allow one thread to transition the other.
+                */
+               if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
+                       /*
+                        * Don't let an unprivileged task work around
+                        * the no_new_privs restriction by creating
+                        * a thread that sets it up, enters seccomp,
+                        * then dies.
+                        */
+                       if (task_no_new_privs(caller))
+                               task_set_no_new_privs(thread);
+
+                       seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
+               }
+       }
+}
+
+/**
+ * seccomp_prepare_filter: Prepares a seccomp filter for use.
  * @fprog: BPF program to install
  *
- * Returns 0 on success or an errno on failure.
+ * Returns filter on success or an ERR_PTR on failure.
  */
-static long seccomp_attach_filter(struct sock_fprog *fprog)
+static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
 {
        struct seccomp_filter *filter;
-       unsigned long fp_size = fprog->len * sizeof(struct sock_filter);
-       unsigned long total_insns = fprog->len;
+       unsigned long fp_size;
        struct sock_filter *fp;
        int new_len;
        long ret;
 
        if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
-               return -EINVAL;
-
-       for (filter = current->seccomp.filter; filter; filter = filter->prev)
-               total_insns += filter->prog->len + 4;  /* include a 4 instr penalty */
-       if (total_insns > MAX_INSNS_PER_PATH)
-               return -ENOMEM;
+               return ERR_PTR(-EINVAL);
+       BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
+       fp_size = fprog->len * sizeof(struct sock_filter);
 
        /*
         * Installing a seccomp filter requires that the task has
@@ -224,14 +359,14 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
         * This avoids scenarios where unprivileged tasks can affect the
         * behavior of privileged children.
         */
-       if (!current->no_new_privs &&
+       if (!task_no_new_privs(current) &&
            security_capable_noaudit(current_cred(), current_user_ns(),
                                     CAP_SYS_ADMIN) != 0)
-               return -EACCES;
+               return ERR_PTR(-EACCES);
 
        fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN);
        if (!fp)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
 
        /* Copy the instructions from fprog. */
        ret = -EFAULT;
@@ -275,13 +410,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 
        sk_filter_select_runtime(filter->prog);
 
-       /*
-        * If there is an existing filter, make it the prev and don't drop its
-        * task reference.
-        */
-       filter->prev = current->seccomp.filter;
-       current->seccomp.filter = filter;
-       return 0;
+       return filter;
 
 free_filter_prog:
        kfree(filter->prog);
@@ -289,19 +418,20 @@ free_filter:
        kfree(filter);
 free_prog:
        kfree(fp);
-       return ret;
+       return ERR_PTR(ret);
 }
 
 /**
- * seccomp_attach_user_filter - attaches a user-supplied sock_fprog
+ * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog
  * @user_filter: pointer to the user data containing a sock_fprog.
  *
  * Returns 0 on success and non-zero otherwise.
  */
-static long seccomp_attach_user_filter(char __user *user_filter)
+static struct seccomp_filter *
+seccomp_prepare_user_filter(const char __user *user_filter)
 {
        struct sock_fprog fprog;
-       long ret = -EFAULT;
+       struct seccomp_filter *filter = ERR_PTR(-EFAULT);
 
 #ifdef CONFIG_COMPAT
        if (is_compat_task()) {
@@ -314,9 +444,56 @@ static long seccomp_attach_user_filter(char __user *user_filter)
 #endif
        if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
                goto out;
-       ret = seccomp_attach_filter(&fprog);
+       filter = seccomp_prepare_filter(&fprog);
 out:
-       return ret;
+       return filter;
+}
+
+/**
+ * seccomp_attach_filter: validate and attach filter
+ * @flags:  flags to change filter behavior
+ * @filter: seccomp filter to add to the current process
+ *
+ * Caller must be holding current->sighand->siglock lock.
+ *
+ * Returns 0 on success, -ve on error.
+ */
+static long seccomp_attach_filter(unsigned int flags,
+                                 struct seccomp_filter *filter)
+{
+       unsigned long total_insns;
+       struct seccomp_filter *walker;
+
+       BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
+       /* Validate resulting filter length. */
+       total_insns = filter->prog->len;
+       for (walker = current->seccomp.filter; walker; walker = walker->prev)
+               total_insns += walker->prog->len + 4;  /* 4 instr penalty */
+       if (total_insns > MAX_INSNS_PER_PATH)
+               return -ENOMEM;
+
+       /* If thread sync has been requested, check that it is possible. */
+       if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
+               int ret;
+
+               ret = seccomp_can_sync_threads();
+               if (ret)
+                       return ret;
+       }
+
+       /*
+        * If there is an existing filter, make it the prev and don't drop its
+        * task reference.
+        */
+       filter->prev = current->seccomp.filter;
+       current->seccomp.filter = filter;
+
+       /* Now that the new filter is in place, synchronize to all threads. */
+       if (flags & SECCOMP_FILTER_FLAG_TSYNC)
+               seccomp_sync_threads();
+
+       return 0;
 }
 
 /* get_seccomp_filter - increments the reference count of the filter on @tsk */
@@ -329,6 +506,14 @@ void get_seccomp_filter(struct task_struct *tsk)
        atomic_inc(&orig->usage);
 }
 
+static inline void seccomp_filter_free(struct seccomp_filter *filter)
+{
+       if (filter) {
+               sk_filter_free(filter->prog);
+               kfree(filter);
+       }
+}
+
 /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
 void put_seccomp_filter(struct task_struct *tsk)
 {
@@ -337,8 +522,7 @@ void put_seccomp_filter(struct task_struct *tsk)
        while (orig && atomic_dec_and_test(&orig->usage)) {
                struct seccomp_filter *freeme = orig;
                orig = orig->prev;
-               sk_filter_free(freeme->prog);
-               kfree(freeme);
+               seccomp_filter_free(freeme);
        }
 }
 
@@ -382,12 +566,17 @@ static int mode1_syscalls_32[] = {
 
 int __secure_computing(int this_syscall)
 {
-       int mode = current->seccomp.mode;
        int exit_sig = 0;
        int *syscall;
        u32 ret;
 
-       switch (mode) {
+       /*
+        * Make sure that any changes to mode from another thread have
+        * been seen after TIF_SECCOMP was seen.
+        */
+       rmb();
+
+       switch (current->seccomp.mode) {
        case SECCOMP_MODE_STRICT:
                syscall = mode1_syscalls;
 #ifdef CONFIG_COMPAT
@@ -473,47 +662,152 @@ long prctl_get_seccomp(void)
 }
 
 /**
- * prctl_set_seccomp: configures current->seccomp.mode
- * @seccomp_mode: requested mode to use
- * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
+ * seccomp_set_mode_strict: internal function for setting strict seccomp
  *
- * This function may be called repeatedly with a @seccomp_mode of
- * SECCOMP_MODE_FILTER to install additional filters.  Every filter
- * successfully installed will be evaluated (in reverse order) for each system
- * call the task makes.
+ * Once current->seccomp.mode is non-zero, it may not be changed.
+ *
+ * Returns 0 on success or -EINVAL on failure.
+ */
+static long seccomp_set_mode_strict(void)
+{
+       const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
+       long ret = -EINVAL;
+
+       spin_lock_irq(&current->sighand->siglock);
+
+       if (!seccomp_may_assign_mode(seccomp_mode))
+               goto out;
+
+#ifdef TIF_NOTSC
+       disable_TSC();
+#endif
+       seccomp_assign_mode(current, seccomp_mode);
+       ret = 0;
+
+out:
+       spin_unlock_irq(&current->sighand->siglock);
+
+       return ret;
+}
+
+#ifdef CONFIG_SECCOMP_FILTER
+/**
+ * seccomp_set_mode_filter: internal function for setting seccomp filter
+ * @flags:  flags to change filter behavior
+ * @filter: struct sock_fprog containing filter
+ *
+ * This function may be called repeatedly to install additional filters.
+ * Every filter successfully installed will be evaluated (in reverse order)
+ * for each system call the task makes.
  *
  * Once current->seccomp.mode is non-zero, it may not be changed.
  *
  * Returns 0 on success or -EINVAL on failure.
  */
-long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
+static long seccomp_set_mode_filter(unsigned int flags,
+                                   const char __user *filter)
 {
+       const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
+       struct seccomp_filter *prepared = NULL;
        long ret = -EINVAL;
 
-       if (current->seccomp.mode &&
-           current->seccomp.mode != seccomp_mode)
+       /* Validate flags. */
+       if (flags & ~SECCOMP_FILTER_FLAG_MASK)
+               return -EINVAL;
+
+       /* Prepare the new filter before holding any locks. */
+       prepared = seccomp_prepare_user_filter(filter);
+       if (IS_ERR(prepared))
+               return PTR_ERR(prepared);
+
+       /*
+        * Make sure we cannot change seccomp or nnp state via TSYNC
+        * while another thread is in the middle of calling exec.
+        */
+       if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
+           mutex_lock_killable(&current->signal->cred_guard_mutex))
+               goto out_free;
+
+       spin_lock_irq(&current->sighand->siglock);
+
+       if (!seccomp_may_assign_mode(seccomp_mode))
+               goto out;
+
+       ret = seccomp_attach_filter(flags, prepared);
+       if (ret)
                goto out;
+       /* Do not free the successfully attached filter. */
+       prepared = NULL;
+
+       seccomp_assign_mode(current, seccomp_mode);
+out:
+       spin_unlock_irq(&current->sighand->siglock);
+       if (flags & SECCOMP_FILTER_FLAG_TSYNC)
+               mutex_unlock(&current->signal->cred_guard_mutex);
+out_free:
+       seccomp_filter_free(prepared);
+       return ret;
+}
+#else
+static inline long seccomp_set_mode_filter(unsigned int flags,
+                                          const char __user *filter)
+{
+       return -EINVAL;
+}
+#endif
+
+/* Common entry point for both prctl and syscall. */
+static long do_seccomp(unsigned int op, unsigned int flags,
+                      const char __user *uargs)
+{
+       switch (op) {
+       case SECCOMP_SET_MODE_STRICT:
+               if (flags != 0 || uargs != NULL)
+                       return -EINVAL;
+               return seccomp_set_mode_strict();
+       case SECCOMP_SET_MODE_FILTER:
+               return seccomp_set_mode_filter(flags, uargs);
+       default:
+               return -EINVAL;
+       }
+}
+
+SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
+                        const char __user *, uargs)
+{
+       return do_seccomp(op, flags, uargs);
+}
+
+/**
+ * prctl_set_seccomp: configures current->seccomp.mode
+ * @seccomp_mode: requested mode to use
+ * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
+ *
+ * Returns 0 on success or -EINVAL on failure.
+ */
+long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
+{
+       unsigned int op;
+       char __user *uargs;
 
        switch (seccomp_mode) {
        case SECCOMP_MODE_STRICT:
-               ret = 0;
-#ifdef TIF_NOTSC
-               disable_TSC();
-#endif
+               op = SECCOMP_SET_MODE_STRICT;
+               /*
+                * Setting strict mode through prctl always ignored filter,
+                * so make sure it is always NULL here to pass the internal
+                * check in do_seccomp().
+                */
+               uargs = NULL;
                break;
-#ifdef CONFIG_SECCOMP_FILTER
        case SECCOMP_MODE_FILTER:
-               ret = seccomp_attach_user_filter(filter);
-               if (ret)
-                       goto out;
+               op = SECCOMP_SET_MODE_FILTER;
+               uargs = filter;
                break;
-#endif
        default:
-               goto out;
+               return -EINVAL;
        }
 
-       current->seccomp.mode = seccomp_mode;
-       set_thread_flag(TIF_SECCOMP);
-out:
-       return ret;
+       /* prctl interface doesn't have flags, so they are always zero. */
+       return do_seccomp(op, 0, uargs);
 }
index 66a751ebf9d9c77be801d263d0844d40a8b52ad0..ce8129192a26029da0d5b33b75daaeb85b1d08e5 100644 (file)
@@ -1990,12 +1990,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
                if (arg2 != 1 || arg3 || arg4 || arg5)
                        return -EINVAL;
 
-               current->no_new_privs = 1;
+               task_set_no_new_privs(current);
                break;
        case PR_GET_NO_NEW_PRIVS:
                if (arg2 || arg3 || arg4 || arg5)
                        return -EINVAL;
-               return current->no_new_privs ? 1 : 0;
+               return task_no_new_privs(current) ? 1 : 0;
        case PR_GET_THP_DISABLE:
                if (arg2 || arg3 || arg4 || arg5)
                        return -EINVAL;
index 36441b51b5dff20bb63792d3ad7ff7e0d30908bf..2904a21059145b61e55a3fee9204a64fce48c9ea 100644 (file)
@@ -213,3 +213,6 @@ cond_syscall(compat_sys_open_by_handle_at);
 
 /* compare kernel pointers */
 cond_syscall(sys_kcmp);
+
+/* operate on Secure Computing state */
+cond_syscall(sys_seccomp);
index 452567d3a08e7ccfc5a7e3ae20ae95307554f85c..d97cba3e3849bcdbd577f45720bba4ea95a4935c 100644 (file)
@@ -621,7 +621,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
         * There is no exception for unconfined as change_hat is not
         * available.
         */
-       if (current->no_new_privs)
+       if (task_no_new_privs(current))
                return -EPERM;
 
        /* released below */
@@ -776,7 +776,7 @@ int aa_change_profile(const char *ns_name, const char *hname, bool onexec,
         * no_new_privs is set because this aways results in a reduction
         * of permissions.
         */
-       if (current->no_new_privs && !unconfined(profile)) {
+       if (task_no_new_privs(current) && !unconfined(profile)) {
                put_cred(cred);
                return -EPERM;
        }