]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame_incremental - kernel/trace/trace_event_perf.c
ftrace: Add FTRACE_ENTRY_REG macro to allow event registration
[mirror_ubuntu-artful-kernel.git] / kernel / trace / trace_event_perf.c
... / ...
CommitLineData
1/*
2 * trace event based perf event profiling/tracing
3 *
4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5 * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
6 */
7
8#include <linux/module.h>
9#include <linux/kprobes.h>
10#include "trace.h"
11
12static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
13
14/*
15 * Force it to be aligned to unsigned long to avoid misaligned accesses
16 * suprises
17 */
18typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
19 perf_trace_t;
20
21/* Count the events in use (per event id, not per instance) */
22static int total_ref_count;
23
24static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event)
26{
27 /* No tracing, just counting, so no obvious leak */
28 if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
29 return 0;
30
31 /* Some events are ok to be traced by non-root users... */
32 if (p_event->attach_state == PERF_ATTACH_TASK) {
33 if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
34 return 0;
35 }
36
37 /*
38 * ...otherwise raw tracepoint data can be a severe data leak,
39 * only allow root to have these.
40 */
41 if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
42 return -EPERM;
43
44 return 0;
45}
46
47static int perf_trace_event_reg(struct ftrace_event_call *tp_event,
48 struct perf_event *p_event)
49{
50 struct hlist_head __percpu *list;
51 int ret = -ENOMEM;
52 int cpu;
53
54 p_event->tp_event = tp_event;
55 if (tp_event->perf_refcount++ > 0)
56 return 0;
57
58 list = alloc_percpu(struct hlist_head);
59 if (!list)
60 goto fail;
61
62 for_each_possible_cpu(cpu)
63 INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
64
65 tp_event->perf_events = list;
66
67 if (!total_ref_count) {
68 char __percpu *buf;
69 int i;
70
71 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
72 buf = (char __percpu *)alloc_percpu(perf_trace_t);
73 if (!buf)
74 goto fail;
75
76 perf_trace_buf[i] = buf;
77 }
78 }
79
80 ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER, NULL);
81 if (ret)
82 goto fail;
83
84 total_ref_count++;
85 return 0;
86
87fail:
88 if (!total_ref_count) {
89 int i;
90
91 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
92 free_percpu(perf_trace_buf[i]);
93 perf_trace_buf[i] = NULL;
94 }
95 }
96
97 if (!--tp_event->perf_refcount) {
98 free_percpu(tp_event->perf_events);
99 tp_event->perf_events = NULL;
100 }
101
102 return ret;
103}
104
105static void perf_trace_event_unreg(struct perf_event *p_event)
106{
107 struct ftrace_event_call *tp_event = p_event->tp_event;
108 int i;
109
110 if (--tp_event->perf_refcount > 0)
111 goto out;
112
113 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL);
114
115 /*
116 * Ensure our callback won't be called anymore. The buffers
117 * will be freed after that.
118 */
119 tracepoint_synchronize_unregister();
120
121 free_percpu(tp_event->perf_events);
122 tp_event->perf_events = NULL;
123
124 if (!--total_ref_count) {
125 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
126 free_percpu(perf_trace_buf[i]);
127 perf_trace_buf[i] = NULL;
128 }
129 }
130out:
131 module_put(tp_event->mod);
132}
133
134static int perf_trace_event_open(struct perf_event *p_event)
135{
136 struct ftrace_event_call *tp_event = p_event->tp_event;
137 return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event);
138}
139
140static void perf_trace_event_close(struct perf_event *p_event)
141{
142 struct ftrace_event_call *tp_event = p_event->tp_event;
143 tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event);
144}
145
146static int perf_trace_event_init(struct ftrace_event_call *tp_event,
147 struct perf_event *p_event)
148{
149 int ret;
150
151 ret = perf_trace_event_perm(tp_event, p_event);
152 if (ret)
153 return ret;
154
155 ret = perf_trace_event_reg(tp_event, p_event);
156 if (ret)
157 return ret;
158
159 ret = perf_trace_event_open(p_event);
160 if (ret) {
161 perf_trace_event_unreg(p_event);
162 return ret;
163 }
164
165 return 0;
166}
167
168int perf_trace_init(struct perf_event *p_event)
169{
170 struct ftrace_event_call *tp_event;
171 int event_id = p_event->attr.config;
172 int ret = -EINVAL;
173
174 mutex_lock(&event_mutex);
175 list_for_each_entry(tp_event, &ftrace_events, list) {
176 if (tp_event->event.type == event_id &&
177 tp_event->class && tp_event->class->reg &&
178 try_module_get(tp_event->mod)) {
179 ret = perf_trace_event_init(tp_event, p_event);
180 if (ret)
181 module_put(tp_event->mod);
182 break;
183 }
184 }
185 mutex_unlock(&event_mutex);
186
187 return ret;
188}
189
190void perf_trace_destroy(struct perf_event *p_event)
191{
192 mutex_lock(&event_mutex);
193 perf_trace_event_close(p_event);
194 perf_trace_event_unreg(p_event);
195 mutex_unlock(&event_mutex);
196}
197
198int perf_trace_add(struct perf_event *p_event, int flags)
199{
200 struct ftrace_event_call *tp_event = p_event->tp_event;
201 struct hlist_head __percpu *pcpu_list;
202 struct hlist_head *list;
203
204 pcpu_list = tp_event->perf_events;
205 if (WARN_ON_ONCE(!pcpu_list))
206 return -EINVAL;
207
208 if (!(flags & PERF_EF_START))
209 p_event->hw.state = PERF_HES_STOPPED;
210
211 list = this_cpu_ptr(pcpu_list);
212 hlist_add_head_rcu(&p_event->hlist_entry, list);
213
214 return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event);
215}
216
217void perf_trace_del(struct perf_event *p_event, int flags)
218{
219 struct ftrace_event_call *tp_event = p_event->tp_event;
220 hlist_del_rcu(&p_event->hlist_entry);
221 tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);
222}
223
224__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
225 struct pt_regs *regs, int *rctxp)
226{
227 struct trace_entry *entry;
228 unsigned long flags;
229 char *raw_data;
230 int pc;
231
232 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
233
234 pc = preempt_count();
235
236 *rctxp = perf_swevent_get_recursion_context();
237 if (*rctxp < 0)
238 return NULL;
239
240 raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
241
242 /* zero the dead bytes from align to not leak stack to user */
243 memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
244
245 entry = (struct trace_entry *)raw_data;
246 local_save_flags(flags);
247 tracing_generic_entry_update(entry, flags, pc);
248 entry->type = type;
249
250 return raw_data;
251}
252EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);