]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blobdiff - kernel/events/core.c
perf/core: Fix use-after-free in perf_release()
[mirror_ubuntu-zesty-kernel.git] / kernel / events / core.c
index e5aaa806702de888b63a82bdcb72766f7a317563..dc7ae610af94fbc3b8af4fa08dbd6124d2533a4b 100644 (file)
@@ -389,8 +389,13 @@ static struct srcu_struct pmus_srcu;
  *   0 - disallow raw tracepoint access for unpriv
  *   1 - disallow cpu events for unpriv
  *   2 - disallow kernel profiling for unpriv
+ *   3 - disallow all unpriv perf event use
  */
-int sysctl_perf_event_paranoid __read_mostly = 2;
+#ifdef CONFIG_SECURITY_PERF_EVENTS_RESTRICT
+int sysctl_perf_event_paranoid __read_mostly = 3;
+#else
+int sysctl_perf_event_paranoid __read_mostly = 1;
+#endif
 
 /* Minimum for 512 kiB + 1 user control page */
 int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */
@@ -3487,14 +3492,15 @@ struct perf_read_data {
        int ret;
 };
 
-static int find_cpu_to_read(struct perf_event *event, int local_cpu)
+static int __perf_event_read_cpu(struct perf_event *event, int event_cpu)
 {
-       int event_cpu = event->oncpu;
        u16 local_pkg, event_pkg;
 
        if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
-               event_pkg =  topology_physical_package_id(event_cpu);
-               local_pkg =  topology_physical_package_id(local_cpu);
+               int local_cpu = smp_processor_id();
+
+               event_pkg = topology_physical_package_id(event_cpu);
+               local_pkg = topology_physical_package_id(local_cpu);
 
                if (event_pkg == local_pkg)
                        return local_cpu;
@@ -3624,7 +3630,7 @@ u64 perf_event_read_local(struct perf_event *event)
 
 static int perf_event_read(struct perf_event *event, bool group)
 {
-       int ret = 0, cpu_to_read, local_cpu;
+       int event_cpu, ret = 0;
 
        /*
         * If event is enabled and currently active on a CPU, update the
@@ -3637,21 +3643,25 @@ static int perf_event_read(struct perf_event *event, bool group)
                        .ret = 0,
                };
 
-               local_cpu = get_cpu();
-               cpu_to_read = find_cpu_to_read(event, local_cpu);
-               put_cpu();
+               event_cpu = READ_ONCE(event->oncpu);
+               if ((unsigned)event_cpu >= nr_cpu_ids)
+                       return 0;
+
+               preempt_disable();
+               event_cpu = __perf_event_read_cpu(event, event_cpu);
 
                /*
                 * Purposely ignore the smp_call_function_single() return
                 * value.
                 *
-                * If event->oncpu isn't a valid CPU it means the event got
+                * If event_cpu isn't a valid CPU it means the event got
                 * scheduled out and that will have updated the event count.
                 *
                 * Therefore, either way, we'll have an up-to-date event count
                 * after this.
                 */
-               (void)smp_call_function_single(cpu_to_read, __perf_event_read, &data, 1);
+               (void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1);
+               preempt_enable();
                ret = data.ret;
        } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
                struct perf_event_context *ctx = event->ctx;
@@ -9628,6 +9638,9 @@ SYSCALL_DEFINE5(perf_event_open,
        if (flags & ~PERF_FLAG_ALL)
                return -EINVAL;
 
+       if (perf_paranoid_any() && !capable(CAP_SYS_ADMIN))
+               return -EACCES;
+
        err = perf_copy_attr(attr_uptr, &attr);
        if (err)
                return err;
@@ -10369,6 +10382,17 @@ void perf_event_free_task(struct task_struct *task)
                        continue;
 
                mutex_lock(&ctx->mutex);
+               raw_spin_lock_irq(&ctx->lock);
+               /*
+                * Destroy the task <-> ctx relation and mark the context dead.
+                *
+                * This is important because even though the task hasn't been
+                * exposed yet the context has been (through child_list).
+                */
+               RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], NULL);
+               WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
+               put_task_struct(task); /* cannot be last */
+               raw_spin_unlock_irq(&ctx->lock);
 again:
                list_for_each_entry_safe(event, tmp, &ctx->pinned_groups,
                                group_entry)