]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blobdiff - kernel/events/core.c
Merge branch 'work.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[mirror_ubuntu-jammy-kernel.git] / kernel / events / core.c
index 4df5b695bf0db1c035a22b914d87ad190d1a0f42..d0d9bfb47d2e32473cf0397f402d04e9ab1218cf 100644 (file)
@@ -1231,6 +1231,10 @@ static void put_ctx(struct perf_event_context *ctx)
  *           perf_event_context::lock
  *         perf_event::mmap_mutex
  *         mmap_sem
+ *
+ *    cpu_hotplug_lock
+ *      pmus_lock
+ *       cpuctx->mutex / perf_event_context::mutex
  */
 static struct perf_event_context *
 perf_event_ctx_lock_nested(struct perf_event *event, int nesting)
@@ -4196,6 +4200,7 @@ int perf_event_release_kernel(struct perf_event *event)
 {
        struct perf_event_context *ctx = event->ctx;
        struct perf_event *child, *tmp;
+       LIST_HEAD(free_list);
 
        /*
         * If we got here through err_file: fput(event_file); we will not have
@@ -4268,8 +4273,7 @@ again:
                                               struct perf_event, child_list);
                if (tmp == child) {
                        perf_remove_from_context(child, DETACH_GROUP);
-                       list_del(&child->child_list);
-                       free_event(child);
+                       list_move(&child->child_list, &free_list);
                        /*
                         * This matches the refcount bump in inherit_event();
                         * this can't be the last reference.
@@ -4284,6 +4288,11 @@ again:
        }
        mutex_unlock(&event->child_mutex);
 
+       list_for_each_entry_safe(child, tmp, &free_list, child_list) {
+               list_del(&child->child_list);
+               free_event(child);
+       }
+
 no_ctx:
        put_event(event); /* Must be the 'last' reference */
        return 0;
@@ -4511,11 +4520,11 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
        return ret;
 }
 
-static unsigned int perf_poll(struct file *file, poll_table *wait)
+static __poll_t perf_poll(struct file *file, poll_table *wait)
 {
        struct perf_event *event = file->private_data;
        struct ring_buffer *rb;
-       unsigned int events = POLLHUP;
+       __poll_t events = POLLHUP;
 
        poll_wait(file, &event->waitq, wait);
 
@@ -4904,6 +4913,7 @@ void perf_event_update_userpage(struct perf_event *event)
 unlock:
        rcu_read_unlock();
 }
+EXPORT_SYMBOL_GPL(perf_event_update_userpage);
 
 static int perf_mmap_fault(struct vm_fault *vmf)
 {
@@ -5815,19 +5825,11 @@ void perf_output_sample(struct perf_output_handle *handle,
                perf_output_read(handle, event);
 
        if (sample_type & PERF_SAMPLE_CALLCHAIN) {
-               if (data->callchain) {
-                       int size = 1;
-
-                       if (data->callchain)
-                               size += data->callchain->nr;
-
-                       size *= sizeof(u64);
+               int size = 1;
 
-                       __output_copy(handle, data->callchain, size);
-               } else {
-                       u64 nr = 0;
-                       perf_output_put(handle, nr);
-               }
+               size += data->callchain->nr;
+               size *= sizeof(u64);
+               __output_copy(handle, data->callchain, size);
        }
 
        if (sample_type & PERF_SAMPLE_RAW) {
@@ -5980,6 +5982,26 @@ static u64 perf_virt_to_phys(u64 virt)
        return phys_addr;
 }
 
+static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
+
+static struct perf_callchain_entry *
+perf_callchain(struct perf_event *event, struct pt_regs *regs)
+{
+       bool kernel = !event->attr.exclude_callchain_kernel;
+       bool user   = !event->attr.exclude_callchain_user;
+       /* Disallow cross-task user callchains. */
+       bool crosstask = event->ctx->task && event->ctx->task != current;
+       const u32 max_stack = event->attr.sample_max_stack;
+       struct perf_callchain_entry *callchain;
+
+       if (!kernel && !user)
+               return &__empty_callchain;
+
+       callchain = get_perf_callchain(regs, 0, kernel, user,
+                                      max_stack, crosstask, true);
+       return callchain ?: &__empty_callchain;
+}
+
 void perf_prepare_sample(struct perf_event_header *header,
                         struct perf_sample_data *data,
                         struct perf_event *event,
@@ -6002,9 +6024,7 @@ void perf_prepare_sample(struct perf_event_header *header,
                int size = 1;
 
                data->callchain = perf_callchain(event, regs);
-
-               if (data->callchain)
-                       size += data->callchain->nr;
+               size += data->callchain->nr;
 
                header->size += size * sizeof(u64);
        }
@@ -8516,6 +8536,29 @@ fail_clear_files:
        return ret;
 }
 
+static int
+perf_tracepoint_set_filter(struct perf_event *event, char *filter_str)
+{
+       struct perf_event_context *ctx = event->ctx;
+       int ret;
+
+       /*
+        * Beware, here be dragons!!
+        *
+        * the tracepoint muck will deadlock against ctx->mutex, but the tracepoint
+        * stuff does not actually need it. So temporarily drop ctx->mutex. As per
+        * perf_event_ctx_lock() we already have a reference on ctx.
+        *
+        * This can result in event getting moved to a different ctx, but that
+        * does not affect the tracepoint state.
+        */
+       mutex_unlock(&ctx->mutex);
+       ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
+       mutex_lock(&ctx->mutex);
+
+       return ret;
+}
+
 static int perf_event_set_filter(struct perf_event *event, void __user *arg)
 {
        char *filter_str;
@@ -8532,8 +8575,7 @@ static int perf_event_set_filter(struct perf_event *event, void __user *arg)
 
        if (IS_ENABLED(CONFIG_EVENT_TRACING) &&
            event->attr.type == PERF_TYPE_TRACEPOINT)
-               ret = ftrace_profile_set_filter(event, event->attr.config,
-                                               filter_str);
+               ret = perf_tracepoint_set_filter(event, filter_str);
        else if (has_addr_filter(event))
                ret = perf_event_set_addr_filter(event, filter_str);
 
@@ -9168,7 +9210,13 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
        if (!try_module_get(pmu->module))
                return -ENODEV;
 
-       if (event->group_leader != event) {
+       /*
+        * A number of pmu->event_init() methods iterate the sibling_list to,
+        * for example, validate if the group fits on the PMU. Therefore,
+        * if this is a sibling event, acquire the ctx->mutex to protect
+        * the sibling_list.
+        */
+       if (event->group_leader != event && pmu->task_ctx_nr != perf_sw_context) {
                /*
                 * This ctx->mutex can nest when we're called through
                 * inheritance. See the perf_event_ctx_lock_nested() comment.
@@ -10703,6 +10751,19 @@ inherit_event(struct perf_event *parent_event,
        if (IS_ERR(child_event))
                return child_event;
 
+
+       if ((child_event->attach_state & PERF_ATTACH_TASK_DATA) &&
+           !child_ctx->task_ctx_data) {
+               struct pmu *pmu = child_event->pmu;
+
+               child_ctx->task_ctx_data = kzalloc(pmu->task_ctx_size,
+                                                  GFP_KERNEL);
+               if (!child_ctx->task_ctx_data) {
+                       free_event(child_event);
+                       return NULL;
+               }
+       }
+
        /*
         * is_orphaned_event() and list_add_tail(&parent_event->child_list)
         * must be under the same lock in order to serialize against
@@ -10713,6 +10774,7 @@ inherit_event(struct perf_event *parent_event,
        if (is_orphaned_event(parent_event) ||
            !atomic_long_inc_not_zero(&parent_event->refcount)) {
                mutex_unlock(&parent_event->child_mutex);
+               /* task_ctx_data is freed with child_ctx */
                free_event(child_event);
                return NULL;
        }