]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/commitdiff
seccomp: allow TSYNC and USER_NOTIF together
authorTycho Andersen <tycho@tycho.ws>
Wed, 4 Mar 2020 18:05:17 +0000 (11:05 -0700)
committerKees Cook <keescook@chromium.org>
Wed, 4 Mar 2020 22:48:54 +0000 (14:48 -0800)
The restriction introduced in 7a0df7fbc145 ("seccomp: Make NEW_LISTENER and
TSYNC flags exclusive") is mostly artificial: there is enough information
in a seccomp user notification to tell which thread triggered a
notification. The reason it was introduced is because TSYNC makes the
syscall return a thread-id on failure, and NEW_LISTENER returns an fd, and
there's no way to distinguish between these two cases (well, I suppose the
caller could check all fds it has, then do the syscall, and if the return
value was an fd that already existed, then it must be a thread id, but
bleh).

Matthew would like to use these two flags together in the Chrome sandbox
which wants to use TSYNC for video drivers and NEW_LISTENER to proxy
syscalls.

So, let's fix this ugliness by adding another flag, TSYNC_ESRCH, which
tells the kernel to just return -ESRCH on a TSYNC error. This way,
NEW_LISTENER (and any subsequent seccomp() commands that want to return
positive values) don't conflict with each other.

Suggested-by: Matthew Denton <mpdenton@google.com>
Signed-off-by: Tycho Andersen <tycho@tycho.ws>
Link: https://lore.kernel.org/r/20200304180517.23867-1-tycho@tycho.ws
Signed-off-by: Kees Cook <keescook@chromium.org>
include/linux/seccomp.h
include/uapi/linux/seccomp.h
kernel/seccomp.c
tools/testing/selftests/seccomp/seccomp_bpf.c

index 03583b6d1416108e5a98e69f81f0b746e361f4ee..4192369b84181d9e6a5a8dc90cc29aea17228cb5 100644 (file)
@@ -7,7 +7,8 @@
 #define SECCOMP_FILTER_FLAG_MASK       (SECCOMP_FILTER_FLAG_TSYNC | \
                                         SECCOMP_FILTER_FLAG_LOG | \
                                         SECCOMP_FILTER_FLAG_SPEC_ALLOW | \
-                                        SECCOMP_FILTER_FLAG_NEW_LISTENER)
+                                        SECCOMP_FILTER_FLAG_NEW_LISTENER | \
+                                        SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
 
 #ifdef CONFIG_SECCOMP
 
index be84d87f1f46988935a6efdc83334f284627006b..c1735455bc536b057ae90e359d31237ec6ab91e0 100644 (file)
@@ -22,6 +22,7 @@
 #define SECCOMP_FILTER_FLAG_LOG                        (1UL << 1)
 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW         (1UL << 2)
 #define SECCOMP_FILTER_FLAG_NEW_LISTENER       (1UL << 3)
+#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH                (1UL << 4)
 
 /*
  * All BPF programs must return a 32-bit value.
index b6ea3dcb57bfefa47233ada64d74f822b6666ab6..29022c1bbe180b9b661be386d54973f48203375d 100644 (file)
@@ -528,8 +528,12 @@ static long seccomp_attach_filter(unsigned int flags,
                int ret;
 
                ret = seccomp_can_sync_threads();
-               if (ret)
-                       return ret;
+               if (ret) {
+                       if (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
+                               return -ESRCH;
+                       else
+                               return ret;
+               }
        }
 
        /* Set log flag, if present. */
@@ -1288,10 +1292,12 @@ static long seccomp_set_mode_filter(unsigned int flags,
         * In the successful case, NEW_LISTENER returns the new listener fd.
         * But in the failure case, TSYNC returns the thread that died. If you
         * combine these two flags, there's no way to tell whether something
-        * succeeded or failed. So, let's disallow this combination.
+        * succeeded or failed. So, let's disallow this combination if the user
+        * has not explicitly requested no errors from TSYNC.
         */
        if ((flags & SECCOMP_FILTER_FLAG_TSYNC) &&
-           (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER))
+           (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) &&
+           ((flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH) == 0))
                return -EINVAL;
 
        /* Prepare the new filter before holding any locks. */
index ee1b727ede045dfd920bf319d3729efd217ef137..a9ad3bd8b2ad09ae3a9852eb6e54b9bc4ccbc2ed 100644 (file)
@@ -212,6 +212,10 @@ struct seccomp_notif_sizes {
 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001
 #endif
 
+#ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH
+#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
+#endif
+
 #ifndef seccomp
 int seccomp(unsigned int op, unsigned int flags, void *args)
 {
@@ -2187,7 +2191,8 @@ TEST(detect_seccomp_filter_flags)
        unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
                                 SECCOMP_FILTER_FLAG_LOG,
                                 SECCOMP_FILTER_FLAG_SPEC_ALLOW,
-                                SECCOMP_FILTER_FLAG_NEW_LISTENER };
+                                SECCOMP_FILTER_FLAG_NEW_LISTENER,
+                                SECCOMP_FILTER_FLAG_TSYNC_ESRCH };
        unsigned int exclusive[] = {
                                SECCOMP_FILTER_FLAG_TSYNC,
                                SECCOMP_FILTER_FLAG_NEW_LISTENER };
@@ -2645,6 +2650,55 @@ TEST_F(TSYNC, two_siblings_with_one_divergence)
        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
 }
 
+TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err)
+{
+       long ret, flags;
+       void *status;
+
+       ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+               TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+       }
+
+       ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+       ASSERT_NE(ENOSYS, errno) {
+               TH_LOG("Kernel does not support seccomp syscall!");
+       }
+       ASSERT_EQ(0, ret) {
+               TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+       }
+       self->sibling[0].diverge = 1;
+       tsync_start_sibling(&self->sibling[0]);
+       tsync_start_sibling(&self->sibling[1]);
+
+       while (self->sibling_count < TSYNC_SIBLINGS) {
+               sem_wait(&self->started);
+               self->sibling_count++;
+       }
+
+       flags = SECCOMP_FILTER_FLAG_TSYNC | \
+               SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
+       ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog);
+       ASSERT_EQ(ESRCH, errno) {
+               TH_LOG("Did not return ESRCH for diverged sibling.");
+       }
+       ASSERT_EQ(-1, ret) {
+               TH_LOG("Did not fail on diverged sibling.");
+       }
+
+       /* Wake the threads */
+       pthread_mutex_lock(&self->mutex);
+       ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+               TH_LOG("cond broadcast non-zero");
+       }
+       pthread_mutex_unlock(&self->mutex);
+
+       /* Ensure they are both unkilled. */
+       PTHREAD_JOIN(self->sibling[0].tid, &status);
+       EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+       PTHREAD_JOIN(self->sibling[1].tid, &status);
+       EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+}
+
 TEST_F(TSYNC, two_siblings_not_under_filter)
 {
        long ret, sib;
@@ -3196,6 +3250,24 @@ TEST(user_notification_basic)
        EXPECT_EQ(0, WEXITSTATUS(status));
 }
 
+TEST(user_notification_with_tsync)
+{
+       int ret;
+       unsigned int flags;
+
+       /* these were exclusive */
+       flags = SECCOMP_FILTER_FLAG_NEW_LISTENER |
+               SECCOMP_FILTER_FLAG_TSYNC;
+       ASSERT_EQ(-1, user_trap_syscall(__NR_getppid, flags));
+       ASSERT_EQ(EINVAL, errno);
+
+       /* but now they're not */
+       flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
+       ret = user_trap_syscall(__NR_getppid, flags);
+       close(ret);
+       ASSERT_LE(0, ret);
+}
+
 TEST(user_notification_kill_in_middle)
 {
        pid_t pid;