]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blobdiff - kernel/events/core.c
perf/core: Fix use-after-free in perf_release()
[mirror_ubuntu-zesty-kernel.git] / kernel / events / core.c
index 110b38a58493ee4ba4c19763d2678dae8815e1af..dc7ae610af94fbc3b8af4fa08dbd6124d2533a4b 100644 (file)
@@ -389,8 +389,13 @@ static struct srcu_struct pmus_srcu;
  *   0 - disallow raw tracepoint access for unpriv
  *   1 - disallow cpu events for unpriv
  *   2 - disallow kernel profiling for unpriv
+ *   3 - disallow all unpriv perf event use
  */
-int sysctl_perf_event_paranoid __read_mostly = 2;
+#ifdef CONFIG_SECURITY_PERF_EVENTS_RESTRICT
+int sysctl_perf_event_paranoid __read_mostly = 3;
+#else
+int sysctl_perf_event_paranoid __read_mostly = 1;
+#endif
 
 /* Minimum for 512 kiB + 1 user control page */
 int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */
@@ -1469,7 +1474,6 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
 static void
 list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 {
-
        lockdep_assert_held(&ctx->lock);
 
        WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
@@ -1624,6 +1628,8 @@ static void perf_group_attach(struct perf_event *event)
 {
        struct perf_event *group_leader = event->group_leader, *pos;
 
+       lockdep_assert_held(&event->ctx->lock);
+
        /*
         * We can have double attach due to group movement in perf_event_open.
         */
@@ -1697,6 +1703,8 @@ static void perf_group_detach(struct perf_event *event)
        struct perf_event *sibling, *tmp;
        struct list_head *list = NULL;
 
+       lockdep_assert_held(&event->ctx->lock);
+
        /*
         * We can have double detach due to exit/hot-unplug + close.
         */
@@ -1895,9 +1903,29 @@ __perf_remove_from_context(struct perf_event *event,
  */
 static void perf_remove_from_context(struct perf_event *event, unsigned long flags)
 {
-       lockdep_assert_held(&event->ctx->mutex);
+       struct perf_event_context *ctx = event->ctx;
+
+       lockdep_assert_held(&ctx->mutex);
 
        event_function_call(event, __perf_remove_from_context, (void *)flags);
+
+       /*
+        * The above event_function_call() can NO-OP when it hits
+        * TASK_TOMBSTONE. In that case we must already have been detached
+        * from the context (by perf_event_exit_event()) but the grouping
+        * might still be in-tact.
+        */
+       WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
+       if ((flags & DETACH_GROUP) &&
+           (event->attach_state & PERF_ATTACH_GROUP)) {
+               /*
+                * Since in that case we cannot possibly be scheduled, simply
+                * detach now.
+                */
+               raw_spin_lock_irq(&ctx->lock);
+               perf_group_detach(event);
+               raw_spin_unlock_irq(&ctx->lock);
+       }
 }
 
 /*
@@ -3464,14 +3492,15 @@ struct perf_read_data {
        int ret;
 };
 
-static int find_cpu_to_read(struct perf_event *event, int local_cpu)
+static int __perf_event_read_cpu(struct perf_event *event, int event_cpu)
 {
-       int event_cpu = event->oncpu;
        u16 local_pkg, event_pkg;
 
        if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
-               event_pkg =  topology_physical_package_id(event_cpu);
-               local_pkg =  topology_physical_package_id(local_cpu);
+               int local_cpu = smp_processor_id();
+
+               event_pkg = topology_physical_package_id(event_cpu);
+               local_pkg = topology_physical_package_id(local_cpu);
 
                if (event_pkg == local_pkg)
                        return local_cpu;
@@ -3601,7 +3630,7 @@ u64 perf_event_read_local(struct perf_event *event)
 
 static int perf_event_read(struct perf_event *event, bool group)
 {
-       int ret = 0, cpu_to_read, local_cpu;
+       int event_cpu, ret = 0;
 
        /*
         * If event is enabled and currently active on a CPU, update the
@@ -3614,21 +3643,25 @@ static int perf_event_read(struct perf_event *event, bool group)
                        .ret = 0,
                };
 
-               local_cpu = get_cpu();
-               cpu_to_read = find_cpu_to_read(event, local_cpu);
-               put_cpu();
+               event_cpu = READ_ONCE(event->oncpu);
+               if ((unsigned)event_cpu >= nr_cpu_ids)
+                       return 0;
+
+               preempt_disable();
+               event_cpu = __perf_event_read_cpu(event, event_cpu);
 
                /*
                 * Purposely ignore the smp_call_function_single() return
                 * value.
                 *
-                * If event->oncpu isn't a valid CPU it means the event got
+                * If event_cpu isn't a valid CPU it means the event got
                 * scheduled out and that will have updated the event count.
                 *
                 * Therefore, either way, we'll have an up-to-date event count
                 * after this.
                 */
-               (void)smp_call_function_single(cpu_to_read, __perf_event_read, &data, 1);
+               (void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1);
+               preempt_enable();
                ret = data.ret;
        } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
                struct perf_event_context *ctx = event->ctx;
@@ -6609,6 +6642,27 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
        char *buf = NULL;
        char *name;
 
+       if (vma->vm_flags & VM_READ)
+               prot |= PROT_READ;
+       if (vma->vm_flags & VM_WRITE)
+               prot |= PROT_WRITE;
+       if (vma->vm_flags & VM_EXEC)
+               prot |= PROT_EXEC;
+
+       if (vma->vm_flags & VM_MAYSHARE)
+               flags = MAP_SHARED;
+       else
+               flags = MAP_PRIVATE;
+
+       if (vma->vm_flags & VM_DENYWRITE)
+               flags |= MAP_DENYWRITE;
+       if (vma->vm_flags & VM_MAYEXEC)
+               flags |= MAP_EXECUTABLE;
+       if (vma->vm_flags & VM_LOCKED)
+               flags |= MAP_LOCKED;
+       if (vma->vm_flags & VM_HUGETLB)
+               flags |= MAP_HUGETLB;
+
        if (file) {
                struct inode *inode;
                dev_t dev;
@@ -6635,27 +6689,6 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
                maj = MAJOR(dev);
                min = MINOR(dev);
 
-               if (vma->vm_flags & VM_READ)
-                       prot |= PROT_READ;
-               if (vma->vm_flags & VM_WRITE)
-                       prot |= PROT_WRITE;
-               if (vma->vm_flags & VM_EXEC)
-                       prot |= PROT_EXEC;
-
-               if (vma->vm_flags & VM_MAYSHARE)
-                       flags = MAP_SHARED;
-               else
-                       flags = MAP_PRIVATE;
-
-               if (vma->vm_flags & VM_DENYWRITE)
-                       flags |= MAP_DENYWRITE;
-               if (vma->vm_flags & VM_MAYEXEC)
-                       flags |= MAP_EXECUTABLE;
-               if (vma->vm_flags & VM_LOCKED)
-                       flags |= MAP_LOCKED;
-               if (vma->vm_flags & VM_HUGETLB)
-                       flags |= MAP_HUGETLB;
-
                goto got_name;
        } else {
                if (vma->vm_ops && vma->vm_ops->name) {
@@ -9605,6 +9638,9 @@ SYSCALL_DEFINE5(perf_event_open,
        if (flags & ~PERF_FLAG_ALL)
                return -EINVAL;
 
+       if (perf_paranoid_any() && !capable(CAP_SYS_ADMIN))
+               return -EACCES;
+
        err = perf_copy_attr(attr_uptr, &attr);
        if (err)
                return err;
@@ -10346,6 +10382,17 @@ void perf_event_free_task(struct task_struct *task)
                        continue;
 
                mutex_lock(&ctx->mutex);
+               raw_spin_lock_irq(&ctx->lock);
+               /*
+                * Destroy the task <-> ctx relation and mark the context dead.
+                *
+                * This is important because even though the task hasn't been
+                * exposed yet the context has been (through child_list).
+                */
+               RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], NULL);
+               WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
+               put_task_struct(task); /* cannot be last */
+               raw_spin_unlock_irq(&ctx->lock);
 again:
                list_for_each_entry_safe(event, tmp, &ctx->pinned_groups,
                                group_entry)