kernel/seccomp.c

   1 /*
   2  * linux/kernel/seccomp.c
   3  *
   4  * Copyright 2004-2005  Andrea Arcangeli <andrea@cpushare.com>
   5  *
   6  * Copyright (C) 2012 Google, Inc.
   7  * Will Drewry <wad@chromium.org>
   8  *
   9  * This defines a simple but solid secure-computing facility.
  10  *
  11  * Mode 1 uses a fixed list of allowed system calls.
  12  * Mode 2 allows user-defined system call filters in the form
  13  *        of Berkeley Packet Filters/Linux Socket Filters.
  14  */
  15
  16 #include <linux/atomic.h>
  17 #include <linux/audit.h>
  18 #include <linux/compat.h>
  19 #include <linux/sched.h>
  20 #include <linux/seccomp.h>
  21 #include <linux/slab.h>
  22 #include <linux/syscalls.h>
  23
  24 /* #define SECCOMP_DEBUG 1 */
  25
  26 #ifdef CONFIG_SECCOMP_FILTER
  27 #include <asm/syscall.h>
  28 #include <linux/filter.h>
  29 #include <linux/pid.h>
  30 #include <linux/ptrace.h>
  31 #include <linux/security.h>
  32 #include <linux/tracehook.h>
  33 #include <linux/uaccess.h>
  34
  35 /**
  36  * struct seccomp_filter - container for seccomp BPF programs
  37  *
  38  * @usage: reference count to manage the object lifetime.
  39  *         get/put helpers should be used when accessing an instance
  40  *         outside of a lifetime-guarded section.  In general, this
  41  *         is only needed for handling filters shared across tasks.
  42  * @prev: points to a previously installed, or inherited, filter
  43  * @len: the number of instructions in the program
  44  * @insnsi: the BPF program instructions to evaluate
  45  *
  46  * seccomp_filter objects are organized in a tree linked via the @prev
  47  * pointer.  For any task, it appears to be a singly-linked list starting
  48  * with current->seccomp.filter, the most recently attached or inherited filter.
  49  * However, multiple filters may share a @prev node, by way of fork(), which
  50  * results in a unidirectional tree existing in memory.  This is similar to
  51  * how namespaces work.
  52  *
  53  * seccomp_filter objects should never be modified after being attached
  54  * to a task_struct (other than @usage).
  55  */
  56 struct seccomp_filter {
  57         atomic_t usage;
  58         struct seccomp_filter *prev;
  59         struct bpf_prog *prog;
  60 };
  61
  62 /* Limit any path through the tree to 256KB worth of instructions. */
  63 #define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter))
  64
  65 /*
  66  * Endianness is explicitly ignored and left for BPF program authors to manage
  67  * as per the specific architecture.
  68  */
  69 static void populate_seccomp_data(struct seccomp_data *sd)
  70 {
  71         struct task_struct *task = current;
  72         struct pt_regs *regs = task_pt_regs(task);
  73         unsigned long args[6];
  74
  75         sd->nr = syscall_get_nr(task, regs);
  76         sd->arch = syscall_get_arch();
  77         syscall_get_arguments(task, regs, 0, 6, args);
  78         sd->args[0] = args[0];
  79         sd->args[1] = args[1];
  80         sd->args[2] = args[2];
  81         sd->args[3] = args[3];
  82         sd->args[4] = args[4];
  83         sd->args[5] = args[5];
  84         sd->instruction_pointer = KSTK_EIP(task);
  85 }
  86
  87 /**
  88  *      seccomp_check_filter - verify seccomp filter code
  89  *      @filter: filter to verify
  90  *      @flen: length of filter
  91  *
  92  * Takes a previously checked filter (by bpf_check_classic) and
  93  * redirects all filter code that loads struct sk_buff data
  94  * and related data through seccomp_bpf_load.  It also
  95  * enforces length and alignment checking of those loads.
  96  *
  97  * Returns 0 if the rule set is legal or -EINVAL if not.
  98  */
  99 static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
 100 {
 101         int pc;
 102         for (pc = 0; pc < flen; pc++) {
 103                 struct sock_filter *ftest = &filter[pc];
 104                 u16 code = ftest->code;
 105                 u32 k = ftest->k;
 106
 107                 switch (code) {
 108                 case BPF_LD | BPF_W | BPF_ABS:
 109                         ftest->code = BPF_LDX | BPF_W | BPF_ABS;
 110                         /* 32-bit aligned and not out of bounds. */
 111                         if (k >= sizeof(struct seccomp_data) || k & 3)
 112                                 return -EINVAL;
 113                         continue;
 114                 case BPF_LD | BPF_W | BPF_LEN:
 115                         ftest->code = BPF_LD | BPF_IMM;
 116                         ftest->k = sizeof(struct seccomp_data);
 117                         continue;
 118                 case BPF_LDX | BPF_W | BPF_LEN:
 119                         ftest->code = BPF_LDX | BPF_IMM;
 120                         ftest->k = sizeof(struct seccomp_data);
 121                         continue;
 122                 /* Explicitly include allowed calls. */
 123                 case BPF_RET | BPF_K:
 124                 case BPF_RET | BPF_A:
 125                 case BPF_ALU | BPF_ADD | BPF_K:
 126                 case BPF_ALU | BPF_ADD | BPF_X:
 127                 case BPF_ALU | BPF_SUB | BPF_K:
 128                 case BPF_ALU | BPF_SUB | BPF_X:
 129                 case BPF_ALU | BPF_MUL | BPF_K:
 130                 case BPF_ALU | BPF_MUL | BPF_X:
 131                 case BPF_ALU | BPF_DIV | BPF_K:
 132                 case BPF_ALU | BPF_DIV | BPF_X:
 133                 case BPF_ALU | BPF_AND | BPF_K:
 134                 case BPF_ALU | BPF_AND | BPF_X:
 135                 case BPF_ALU | BPF_OR | BPF_K:
 136                 case BPF_ALU | BPF_OR | BPF_X:
 137                 case BPF_ALU | BPF_XOR | BPF_K:
 138                 case BPF_ALU | BPF_XOR | BPF_X:
 139                 case BPF_ALU | BPF_LSH | BPF_K:
 140                 case BPF_ALU | BPF_LSH | BPF_X:
 141                 case BPF_ALU | BPF_RSH | BPF_K:
 142                 case BPF_ALU | BPF_RSH | BPF_X:
 143                 case BPF_ALU | BPF_NEG:
 144                 case BPF_LD | BPF_IMM:
 145                 case BPF_LDX | BPF_IMM:
 146                 case BPF_MISC | BPF_TAX:
 147                 case BPF_MISC | BPF_TXA:
 148                 case BPF_LD | BPF_MEM:
 149                 case BPF_LDX | BPF_MEM:
 150                 case BPF_ST:
 151                 case BPF_STX:
 152                 case BPF_JMP | BPF_JA:
 153                 case BPF_JMP | BPF_JEQ | BPF_K:
 154                 case BPF_JMP | BPF_JEQ | BPF_X:
 155                 case BPF_JMP | BPF_JGE | BPF_K:
 156                 case BPF_JMP | BPF_JGE | BPF_X:
 157                 case BPF_JMP | BPF_JGT | BPF_K:
 158                 case BPF_JMP | BPF_JGT | BPF_X:
 159                 case BPF_JMP | BPF_JSET | BPF_K:
 160                 case BPF_JMP | BPF_JSET | BPF_X:
 161                         continue;
 162                 default:
 163                         return -EINVAL;
 164                 }
 165         }
 166         return 0;
 167 }
 168
 169 /**
 170  * seccomp_run_filters - evaluates all seccomp filters against @syscall
 171  * @syscall: number of the current system call
 172  *
 173  * Returns valid seccomp BPF response codes.
 174  */
 175 static u32 seccomp_run_filters(int syscall)
 176 {
 177         struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
 178         struct seccomp_data sd;
 179         u32 ret = SECCOMP_RET_ALLOW;
 180
 181         /* Ensure unexpected behavior doesn't result in failing open. */
 182         if (unlikely(WARN_ON(f == NULL)))
 183                 return SECCOMP_RET_KILL;
 184
 185         /* Make sure cross-thread synced filter points somewhere sane. */
 186         smp_read_barrier_depends();
 187
 188         populate_seccomp_data(&sd);
 189
 190         /*
 191          * All filters in the list are evaluated and the lowest BPF return
 192          * value always takes priority (ignoring the DATA).
 193          */
 194         for (; f; f = f->prev) {
 195                 u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)&sd);
 196
 197                 if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
 198                         ret = cur_ret;
 199         }
 200         return ret;
 201 }
 202 #endif /* CONFIG_SECCOMP_FILTER */
 203
 204 static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
 205 {
 206         assert_spin_locked(&current->sighand->siglock);
 207
 208         if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
 209                 return false;
 210
 211         return true;
 212 }
 213
 214 static inline void seccomp_assign_mode(struct task_struct *task,
 215                                        unsigned long seccomp_mode)
 216 {
 217         assert_spin_locked(&task->sighand->siglock);
 218
 219         task->seccomp.mode = seccomp_mode;
 220         /*
 221          * Make sure TIF_SECCOMP cannot be set before the mode (and
 222          * filter) is set.
 223          */
 224         smp_mb__before_atomic();
 225         set_tsk_thread_flag(task, TIF_SECCOMP);
 226 }
 227
 228 #ifdef CONFIG_SECCOMP_FILTER
 229 /* Returns 1 if the parent is an ancestor of the child. */
 230 static int is_ancestor(struct seccomp_filter *parent,
 231                        struct seccomp_filter *child)
 232 {
 233         /* NULL is the root ancestor. */
 234         if (parent == NULL)
 235                 return 1;
 236         for (; child; child = child->prev)
 237                 if (child == parent)
 238                         return 1;
 239         return 0;
 240 }
 241
 242 /**
 243  * seccomp_can_sync_threads: checks if all threads can be synchronized
 244  *
 245  * Expects sighand and cred_guard_mutex locks to be held.
 246  *
 247  * Returns 0 on success, -ve on error, or the pid of a thread which was
 248  * either not in the correct seccomp mode or it did not have an ancestral
 249  * seccomp filter.
 250  */
 251 static inline pid_t seccomp_can_sync_threads(void)
 252 {
 253         struct task_struct *thread, *caller;
 254
 255         BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
 256         assert_spin_locked(&current->sighand->siglock);
 257
 258         /* Validate all threads being eligible for synchronization. */
 259         caller = current;
 260         for_each_thread(caller, thread) {
 261                 pid_t failed;
 262
 263                 /* Skip current, since it is initiating the sync. */
 264                 if (thread == caller)
 265                         continue;
 266
 267                 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
 268                     (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
 269                      is_ancestor(thread->seccomp.filter,
 270                                  caller->seccomp.filter)))
 271                         continue;
 272
 273                 /* Return the first thread that cannot be synchronized. */
 274                 failed = task_pid_vnr(thread);
 275                 /* If the pid cannot be resolved, then return -ESRCH */
 276                 if (unlikely(WARN_ON(failed == 0)))
 277                         failed = -ESRCH;
 278                 return failed;
 279         }
 280
 281         return 0;
 282 }
 283
 284 /**
 285  * seccomp_sync_threads: sets all threads to use current's filter
 286  *
 287  * Expects sighand and cred_guard_mutex locks to be held, and for
 288  * seccomp_can_sync_threads() to have returned success already
 289  * without dropping the locks.
 290  *
 291  */
 292 static inline void seccomp_sync_threads(void)
 293 {
 294         struct task_struct *thread, *caller;
 295
 296         BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
 297         assert_spin_locked(&current->sighand->siglock);
 298
 299         /* Synchronize all threads. */
 300         caller = current;
 301         for_each_thread(caller, thread) {
 302                 /* Skip current, since it needs no changes. */
 303                 if (thread == caller)
 304                         continue;
 305
 306                 /* Get a task reference for the new leaf node. */
 307                 get_seccomp_filter(caller);
 308                 /*
 309                  * Drop the task reference to the shared ancestor since
 310                  * current's path will hold a reference.  (This also
 311                  * allows a put before the assignment.)
 312                  */
 313                 put_seccomp_filter(thread);
 314                 smp_store_release(&thread->seccomp.filter,
 315                                   caller->seccomp.filter);
 316                 /*
 317                  * Opt the other thread into seccomp if needed.
 318                  * As threads are considered to be trust-realm
 319                  * equivalent (see ptrace_may_access), it is safe to
 320                  * allow one thread to transition the other.
 321                  */
 322                 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
 323                         /*
 324                          * Don't let an unprivileged task work around
 325                          * the no_new_privs restriction by creating
 326                          * a thread that sets it up, enters seccomp,
 327                          * then dies.
 328                          */
 329                         if (task_no_new_privs(caller))
 330                                 task_set_no_new_privs(thread);
 331
 332                         seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
 333                 }
 334         }
 335 }
 336
 337 /**
 338  * seccomp_prepare_filter: Prepares a seccomp filter for use.
 339  * @fprog: BPF program to install
 340  *
 341  * Returns filter on success or an ERR_PTR on failure.
 342  */
 343 static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
 344 {
 345         struct seccomp_filter *filter;
 346         unsigned long fp_size;
 347         struct sock_filter *fp;
 348         int new_len;
 349         long ret;
 350
 351         if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
 352                 return ERR_PTR(-EINVAL);
 353         BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
 354         fp_size = fprog->len * sizeof(struct sock_filter);
 355
 356         /*
 357          * Installing a seccomp filter requires that the task has
 358          * CAP_SYS_ADMIN in its namespace or be running with no_new_privs.
 359          * This avoids scenarios where unprivileged tasks can affect the
 360          * behavior of privileged children.
 361          */
 362         if (!task_no_new_privs(current) &&
 363             security_capable_noaudit(current_cred(), current_user_ns(),
 364                                      CAP_SYS_ADMIN) != 0)
 365                 return ERR_PTR(-EACCES);
 366
 367         fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN);
 368         if (!fp)
 369                 return ERR_PTR(-ENOMEM);
 370
 371         /* Copy the instructions from fprog. */
 372         ret = -EFAULT;
 373         if (copy_from_user(fp, fprog->filter, fp_size))
 374                 goto free_prog;
 375
 376         /* Check and rewrite the fprog via the skb checker */
 377         ret = bpf_check_classic(fp, fprog->len);
 378         if (ret)
 379                 goto free_prog;
 380
 381         /* Check and rewrite the fprog for seccomp use */
 382         ret = seccomp_check_filter(fp, fprog->len);
 383         if (ret)
 384                 goto free_prog;
 385
 386         /* Convert 'sock_filter' insns to 'bpf_insn' insns */
 387         ret = bpf_convert_filter(fp, fprog->len, NULL, &new_len);
 388         if (ret)
 389                 goto free_prog;
 390
 391         /* Allocate a new seccomp_filter */
 392         ret = -ENOMEM;
 393         filter = kzalloc(sizeof(struct seccomp_filter),
 394                          GFP_KERNEL|__GFP_NOWARN);
 395         if (!filter)
 396                 goto free_prog;
 397
 398         filter->prog = bpf_prog_alloc(bpf_prog_size(new_len), __GFP_NOWARN);
 399         if (!filter->prog)
 400                 goto free_filter;
 401
 402         ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len);
 403         if (ret)
 404                 goto free_filter_prog;
 405
 406         kfree(fp);
 407         atomic_set(&filter->usage, 1);
 408         filter->prog->len = new_len;
 409
 410         bpf_prog_select_runtime(filter->prog);
 411
 412         return filter;
 413
 414 free_filter_prog:
 415         __bpf_prog_free(filter->prog);
 416 free_filter:
 417         kfree(filter);
 418 free_prog:
 419         kfree(fp);
 420         return ERR_PTR(ret);
 421 }
 422
 423 /**
 424  * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog
 425  * @user_filter: pointer to the user data containing a sock_fprog.
 426  *
 427  * Returns 0 on success and non-zero otherwise.
 428  */
 429 static struct seccomp_filter *
 430 seccomp_prepare_user_filter(const char __user *user_filter)
 431 {
 432         struct sock_fprog fprog;
 433         struct seccomp_filter *filter = ERR_PTR(-EFAULT);
 434
 435 #ifdef CONFIG_COMPAT
 436         if (is_compat_task()) {
 437                 struct compat_sock_fprog fprog32;
 438                 if (copy_from_user(&fprog32, user_filter, sizeof(fprog32)))
 439                         goto out;
 440                 fprog.len = fprog32.len;
 441                 fprog.filter = compat_ptr(fprog32.filter);
 442         } else /* falls through to the if below. */
 443 #endif
 444         if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
 445                 goto out;
 446         filter = seccomp_prepare_filter(&fprog);
 447 out:
 448         return filter;
 449 }
 450
 451 /**
 452  * seccomp_attach_filter: validate and attach filter
 453  * @flags:  flags to change filter behavior
 454  * @filter: seccomp filter to add to the current process
 455  *
 456  * Caller must be holding current->sighand->siglock lock.
 457  *
 458  * Returns 0 on success, -ve on error.
 459  */
 460 static long seccomp_attach_filter(unsigned int flags,
 461                                   struct seccomp_filter *filter)
 462 {
 463         unsigned long total_insns;
 464         struct seccomp_filter *walker;
 465
 466         assert_spin_locked(&current->sighand->siglock);
 467
 468         /* Validate resulting filter length. */
 469         total_insns = filter->prog->len;
 470         for (walker = current->seccomp.filter; walker; walker = walker->prev)
 471                 total_insns += walker->prog->len + 4;  /* 4 instr penalty */
 472         if (total_insns > MAX_INSNS_PER_PATH)
 473                 return -ENOMEM;
 474
 475         /* If thread sync has been requested, check that it is possible. */
 476         if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
 477                 int ret;
 478
 479                 ret = seccomp_can_sync_threads();
 480                 if (ret)
 481                         return ret;
 482         }
 483
 484         /*
 485          * If there is an existing filter, make it the prev and don't drop its
 486          * task reference.
 487          */
 488         filter->prev = current->seccomp.filter;
 489         current->seccomp.filter = filter;
 490
 491         /* Now that the new filter is in place, synchronize to all threads. */
 492         if (flags & SECCOMP_FILTER_FLAG_TSYNC)
 493                 seccomp_sync_threads();
 494
 495         return 0;
 496 }
 497
 498 /* get_seccomp_filter - increments the reference count of the filter on @tsk */
 499 void get_seccomp_filter(struct task_struct *tsk)
 500 {
 501         struct seccomp_filter *orig = tsk->seccomp.filter;
 502         if (!orig)
 503                 return;
 504         /* Reference count is bounded by the number of total processes. */
 505         atomic_inc(&orig->usage);
 506 }
 507
 508 static inline void seccomp_filter_free(struct seccomp_filter *filter)
 509 {
 510         if (filter) {
 511                 bpf_prog_free(filter->prog);
 512                 kfree(filter);
 513         }
 514 }
 515
 516 /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
 517 void put_seccomp_filter(struct task_struct *tsk)
 518 {
 519         struct seccomp_filter *orig = tsk->seccomp.filter;
 520         /* Clean up single-reference branches iteratively. */
 521         while (orig && atomic_dec_and_test(&orig->usage)) {
 522                 struct seccomp_filter *freeme = orig;
 523                 orig = orig->prev;
 524                 seccomp_filter_free(freeme);
 525         }
 526 }
 527
 528 /**
 529  * seccomp_send_sigsys - signals the task to allow in-process syscall emulation
 530  * @syscall: syscall number to send to userland
 531  * @reason: filter-supplied reason code to send to userland (via si_errno)
 532  *
 533  * Forces a SIGSYS with a code of SYS_SECCOMP and related sigsys info.
 534  */
 535 static void seccomp_send_sigsys(int syscall, int reason)
 536 {
 537         struct siginfo info;
 538         memset(&info, 0, sizeof(info));
 539         info.si_signo = SIGSYS;
 540         info.si_code = SYS_SECCOMP;
 541         info.si_call_addr = (void __user *)KSTK_EIP(current);
 542         info.si_errno = reason;
 543         info.si_arch = syscall_get_arch();
 544         info.si_syscall = syscall;
 545         force_sig_info(SIGSYS, &info, current);
 546 }
 547 #endif  /* CONFIG_SECCOMP_FILTER */
 548
 549 /*
 550  * Secure computing mode 1 allows only read/write/exit/sigreturn.
 551  * To be fully secure this must be combined with rlimit
 552  * to limit the stack allocations too.
 553  */
 554 static int mode1_syscalls[] = {
 555         __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
 556         0, /* null terminated */
 557 };
 558
 559 #ifdef CONFIG_COMPAT
 560 static int mode1_syscalls_32[] = {
 561         __NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32,
 562         0, /* null terminated */
 563 };
 564 #endif
 565
 566 int __secure_computing(int this_syscall)
 567 {
 568         int exit_sig = 0;
 569         int *syscall;
 570         u32 ret;
 571
 572         /*
 573          * Make sure that any changes to mode from another thread have
 574          * been seen after TIF_SECCOMP was seen.
 575          */
 576         rmb();
 577
 578         switch (current->seccomp.mode) {
 579         case SECCOMP_MODE_STRICT:
 580                 syscall = mode1_syscalls;
 581 #ifdef CONFIG_COMPAT
 582                 if (is_compat_task())
 583                         syscall = mode1_syscalls_32;
 584 #endif
 585                 do {
 586                         if (*syscall == this_syscall)
 587                                 return 0;
 588                 } while (*++syscall);
 589                 exit_sig = SIGKILL;
 590                 ret = SECCOMP_RET_KILL;
 591                 break;
 592 #ifdef CONFIG_SECCOMP_FILTER
 593         case SECCOMP_MODE_FILTER: {
 594                 int data;
 595                 struct pt_regs *regs = task_pt_regs(current);
 596                 ret = seccomp_run_filters(this_syscall);
 597                 data = ret & SECCOMP_RET_DATA;
 598                 ret &= SECCOMP_RET_ACTION;
 599                 switch (ret) {
 600                 case SECCOMP_RET_ERRNO:
 601                         /* Set the low-order 16-bits as a errno. */
 602                         syscall_set_return_value(current, regs,
 603                                                  -data, 0);
 604                         goto skip;
 605                 case SECCOMP_RET_TRAP:
 606                         /* Show the handler the original registers. */
 607                         syscall_rollback(current, regs);
 608                         /* Let the filter pass back 16 bits of data. */
 609                         seccomp_send_sigsys(this_syscall, data);
 610                         goto skip;
 611                 case SECCOMP_RET_TRACE:
 612                         /* Skip these calls if there is no tracer. */
 613                         if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
 614                                 syscall_set_return_value(current, regs,
 615                                                          -ENOSYS, 0);
 616                                 goto skip;
 617                         }
 618                         /* Allow the BPF to provide the event message */
 619                         ptrace_event(PTRACE_EVENT_SECCOMP, data);
 620                         /*
 621                          * The delivery of a fatal signal during event
 622                          * notification may silently skip tracer notification.
 623                          * Terminating the task now avoids executing a system
 624                          * call that may not be intended.
 625                          */
 626                         if (fatal_signal_pending(current))
 627                                 break;
 628                         if (syscall_get_nr(current, regs) < 0)
 629                                 goto skip;  /* Explicit request to skip. */
 630
 631                         return 0;
 632                 case SECCOMP_RET_ALLOW:
 633                         return 0;
 634                 case SECCOMP_RET_KILL:
 635                 default:
 636                         break;
 637                 }
 638                 exit_sig = SIGSYS;
 639                 break;
 640         }
 641 #endif
 642         default:
 643                 BUG();
 644         }
 645
 646 #ifdef SECCOMP_DEBUG
 647         dump_stack();
 648 #endif
 649         audit_seccomp(this_syscall, exit_sig, ret);
 650         do_exit(exit_sig);
 651 #ifdef CONFIG_SECCOMP_FILTER
 652 skip:
 653         audit_seccomp(this_syscall, exit_sig, ret);
 654 #endif
 655         return -1;
 656 }
 657
 658 long prctl_get_seccomp(void)
 659 {
 660         return current->seccomp.mode;
 661 }
 662
 663 /**
 664  * seccomp_set_mode_strict: internal function for setting strict seccomp
 665  *
 666  * Once current->seccomp.mode is non-zero, it may not be changed.
 667  *
 668  * Returns 0 on success or -EINVAL on failure.
 669  */
 670 static long seccomp_set_mode_strict(void)
 671 {
 672         const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
 673         long ret = -EINVAL;
 674
 675         spin_lock_irq(&current->sighand->siglock);
 676
 677         if (!seccomp_may_assign_mode(seccomp_mode))
 678                 goto out;
 679
 680 #ifdef TIF_NOTSC
 681         disable_TSC();
 682 #endif
 683         seccomp_assign_mode(current, seccomp_mode);
 684         ret = 0;
 685
 686 out:
 687         spin_unlock_irq(&current->sighand->siglock);
 688
 689         return ret;
 690 }
 691
 692 #ifdef CONFIG_SECCOMP_FILTER
 693 /**
 694  * seccomp_set_mode_filter: internal function for setting seccomp filter
 695  * @flags:  flags to change filter behavior
 696  * @filter: struct sock_fprog containing filter
 697  *
 698  * This function may be called repeatedly to install additional filters.
 699  * Every filter successfully installed will be evaluated (in reverse order)
 700  * for each system call the task makes.
 701  *
 702  * Once current->seccomp.mode is non-zero, it may not be changed.
 703  *
 704  * Returns 0 on success or -EINVAL on failure.
 705  */
 706 static long seccomp_set_mode_filter(unsigned int flags,
 707                                     const char __user *filter)
 708 {
 709         const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
 710         struct seccomp_filter *prepared = NULL;
 711         long ret = -EINVAL;
 712
 713         /* Validate flags. */
 714         if (flags & ~SECCOMP_FILTER_FLAG_MASK)
 715                 return -EINVAL;
 716
 717         /* Prepare the new filter before holding any locks. */
 718         prepared = seccomp_prepare_user_filter(filter);
 719         if (IS_ERR(prepared))
 720                 return PTR_ERR(prepared);
 721
 722         /*
 723          * Make sure we cannot change seccomp or nnp state via TSYNC
 724          * while another thread is in the middle of calling exec.
 725          */
 726         if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
 727             mutex_lock_killable(&current->signal->cred_guard_mutex))
 728                 goto out_free;
 729
 730         spin_lock_irq(&current->sighand->siglock);
 731
 732         if (!seccomp_may_assign_mode(seccomp_mode))
 733                 goto out;
 734
 735         ret = seccomp_attach_filter(flags, prepared);
 736         if (ret)
 737                 goto out;
 738         /* Do not free the successfully attached filter. */
 739         prepared = NULL;
 740
 741         seccomp_assign_mode(current, seccomp_mode);
 742 out:
 743         spin_unlock_irq(&current->sighand->siglock);
 744         if (flags & SECCOMP_FILTER_FLAG_TSYNC)
 745                 mutex_unlock(&current->signal->cred_guard_mutex);
 746 out_free:
 747         seccomp_filter_free(prepared);
 748         return ret;
 749 }
 750 #else
 751 static inline long seccomp_set_mode_filter(unsigned int flags,
 752                                            const char __user *filter)
 753 {
 754         return -EINVAL;
 755 }
 756 #endif
 757
 758 /* Common entry point for both prctl and syscall. */
 759 static long do_seccomp(unsigned int op, unsigned int flags,
 760                        const char __user *uargs)
 761 {
 762         switch (op) {
 763         case SECCOMP_SET_MODE_STRICT:
 764                 if (flags != 0 || uargs != NULL)
 765                         return -EINVAL;
 766                 return seccomp_set_mode_strict();
 767         case SECCOMP_SET_MODE_FILTER:
 768                 return seccomp_set_mode_filter(flags, uargs);
 769         default:
 770                 return -EINVAL;
 771         }
 772 }
 773
 774 SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
 775                          const char __user *, uargs)
 776 {
 777         return do_seccomp(op, flags, uargs);
 778 }
 779
 780 /**
 781  * prctl_set_seccomp: configures current->seccomp.mode
 782  * @seccomp_mode: requested mode to use
 783  * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
 784  *
 785  * Returns 0 on success or -EINVAL on failure.
 786  */
 787 long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
 788 {
 789         unsigned int op;
 790         char __user *uargs;
 791
 792         switch (seccomp_mode) {
 793         case SECCOMP_MODE_STRICT:
 794                 op = SECCOMP_SET_MODE_STRICT;
 795                 /*
 796                  * Setting strict mode through prctl always ignored filter,
 797                  * so make sure it is always NULL here to pass the internal
 798                  * check in do_seccomp().
 799                  */
 800                 uargs = NULL;
 801                 break;
 802         case SECCOMP_MODE_FILTER:
 803                 op = SECCOMP_SET_MODE_FILTER;
 804                 uargs = filter;
 805                 break;
 806         default:
 807                 return -EINVAL;
 808         }
 809
 810         /* prctl interface doesn't have flags, so they are always zero. */
 811         return do_seccomp(op, 0, uargs);
 812 }