]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /** |
2 | * @file cpu_buffer.c | |
3 | * | |
4 | * @remark Copyright 2002 OProfile authors | |
5 | * @remark Read the file COPYING | |
6 | * | |
7 | * @author John Levon <levon@movementarian.org> | |
345c2573 | 8 | * @author Barry Kasindorf <barry.kasindorf@amd.com> |
1da177e4 LT |
9 | * |
10 | * Each CPU has a local buffer that stores PC value/event | |
11 | * pairs. We also log context switches when we notice them. | |
12 | * Eventually each CPU's buffer is processed into the global | |
13 | * event buffer by sync_buffer(). | |
14 | * | |
15 | * We use a local buffer for two reasons: an NMI or similar | |
16 | * interrupt cannot synchronise, and high sampling rates | |
17 | * would lead to catastrophic global synchronisation if | |
18 | * a global buffer was used. | |
19 | */ | |
20 | ||
21 | #include <linux/sched.h> | |
22 | #include <linux/oprofile.h> | |
23 | #include <linux/vmalloc.h> | |
24 | #include <linux/errno.h> | |
6a18037d | 25 | |
1da177e4 LT |
26 | #include "event_buffer.h" |
27 | #include "cpu_buffer.h" | |
28 | #include "buffer_sync.h" | |
29 | #include "oprof.h" | |
30 | ||
8b8b4988 | 31 | DEFINE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer); |
1da177e4 | 32 | |
c4028958 | 33 | static void wq_sync_buffer(struct work_struct *work); |
1da177e4 LT |
34 | |
35 | #define DEFAULT_TIMER_EXPIRE (HZ / 10) | |
36 | static int work_enabled; | |
37 | ||
38 | void free_cpu_buffers(void) | |
39 | { | |
40 | int i; | |
a5598ca0 | 41 | |
4bd9b9dc | 42 | for_each_possible_cpu(i) { |
608dfddd | 43 | vfree(per_cpu(cpu_buffer, i).buffer); |
f4156d1c CL |
44 | per_cpu(cpu_buffer, i).buffer = NULL; |
45 | } | |
1da177e4 | 46 | } |
77933d72 | 47 | |
a5598ca0 CL |
48 | unsigned long oprofile_get_cpu_buffer_size(void) |
49 | { | |
50 | return fs_cpu_buffer_size; | |
51 | } | |
52 | ||
53 | void oprofile_cpu_buffer_inc_smpl_lost(void) | |
54 | { | |
55 | struct oprofile_cpu_buffer *cpu_buf | |
56 | = &__get_cpu_var(cpu_buffer); | |
57 | ||
58 | cpu_buf->sample_lost_overflow++; | |
59 | } | |
60 | ||
1da177e4 LT |
61 | int alloc_cpu_buffers(void) |
62 | { | |
63 | int i; | |
6a18037d | 64 | |
1da177e4 | 65 | unsigned long buffer_size = fs_cpu_buffer_size; |
6a18037d | 66 | |
4bd9b9dc | 67 | for_each_possible_cpu(i) { |
608dfddd | 68 | struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i); |
6a18037d | 69 | |
25ab7cd8 ED |
70 | b->buffer = vmalloc_node(sizeof(struct op_sample) * buffer_size, |
71 | cpu_to_node(i)); | |
1da177e4 LT |
72 | if (!b->buffer) |
73 | goto fail; | |
6a18037d | 74 | |
1da177e4 LT |
75 | b->last_task = NULL; |
76 | b->last_is_kernel = -1; | |
77 | b->tracing = 0; | |
78 | b->buffer_size = buffer_size; | |
79 | b->tail_pos = 0; | |
80 | b->head_pos = 0; | |
81 | b->sample_received = 0; | |
82 | b->sample_lost_overflow = 0; | |
df9d177a PE |
83 | b->backtrace_aborted = 0; |
84 | b->sample_invalid_eip = 0; | |
1da177e4 | 85 | b->cpu = i; |
c4028958 | 86 | INIT_DELAYED_WORK(&b->work, wq_sync_buffer); |
1da177e4 LT |
87 | } |
88 | return 0; | |
89 | ||
90 | fail: | |
91 | free_cpu_buffers(); | |
92 | return -ENOMEM; | |
93 | } | |
1da177e4 LT |
94 | |
95 | void start_cpu_work(void) | |
96 | { | |
97 | int i; | |
98 | ||
99 | work_enabled = 1; | |
100 | ||
101 | for_each_online_cpu(i) { | |
608dfddd | 102 | struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i); |
1da177e4 LT |
103 | |
104 | /* | |
105 | * Spread the work by 1 jiffy per cpu so they dont all | |
106 | * fire at once. | |
107 | */ | |
108 | schedule_delayed_work_on(i, &b->work, DEFAULT_TIMER_EXPIRE + i); | |
109 | } | |
110 | } | |
111 | ||
1da177e4 LT |
112 | void end_cpu_work(void) |
113 | { | |
114 | int i; | |
115 | ||
116 | work_enabled = 0; | |
117 | ||
118 | for_each_online_cpu(i) { | |
608dfddd | 119 | struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i); |
1da177e4 LT |
120 | |
121 | cancel_delayed_work(&b->work); | |
122 | } | |
123 | ||
124 | flush_scheduled_work(); | |
125 | } | |
126 | ||
1da177e4 | 127 | /* Resets the cpu buffer to a sane state. */ |
25ad2913 | 128 | void cpu_buffer_reset(struct oprofile_cpu_buffer *cpu_buf) |
1da177e4 LT |
129 | { |
130 | /* reset these to invalid values; the next sample | |
131 | * collected will populate the buffer with proper | |
132 | * values to initialize the buffer | |
133 | */ | |
134 | cpu_buf->last_is_kernel = -1; | |
135 | cpu_buf->last_task = NULL; | |
136 | } | |
137 | ||
1da177e4 | 138 | /* compute number of available slots in cpu_buffer queue */ |
25ad2913 | 139 | static unsigned long nr_available_slots(struct oprofile_cpu_buffer const *b) |
1da177e4 LT |
140 | { |
141 | unsigned long head = b->head_pos; | |
142 | unsigned long tail = b->tail_pos; | |
143 | ||
144 | if (tail > head) | |
145 | return (tail - head) - 1; | |
146 | ||
147 | return tail + (b->buffer_size - head) - 1; | |
148 | } | |
149 | ||
25ad2913 | 150 | static void increment_head(struct oprofile_cpu_buffer *b) |
1da177e4 LT |
151 | { |
152 | unsigned long new_head = b->head_pos + 1; | |
153 | ||
154 | /* Ensure anything written to the slot before we | |
155 | * increment is visible */ | |
156 | wmb(); | |
157 | ||
158 | if (new_head < b->buffer_size) | |
159 | b->head_pos = new_head; | |
160 | else | |
161 | b->head_pos = 0; | |
162 | } | |
163 | ||
77933d72 | 164 | static inline void |
25ad2913 | 165 | add_sample(struct oprofile_cpu_buffer *cpu_buf, |
6a18037d | 166 | unsigned long pc, unsigned long event) |
1da177e4 | 167 | { |
25ad2913 | 168 | struct op_sample *entry = &cpu_buf->buffer[cpu_buf->head_pos]; |
1da177e4 LT |
169 | entry->eip = pc; |
170 | entry->event = event; | |
171 | increment_head(cpu_buf); | |
172 | } | |
173 | ||
77933d72 | 174 | static inline void |
25ad2913 | 175 | add_code(struct oprofile_cpu_buffer *buffer, unsigned long value) |
1da177e4 LT |
176 | { |
177 | add_sample(buffer, ESCAPE_CODE, value); | |
178 | } | |
179 | ||
1da177e4 LT |
180 | /* This must be safe from any context. It's safe writing here |
181 | * because of the head/tail separation of the writer and reader | |
182 | * of the CPU buffer. | |
183 | * | |
184 | * is_kernel is needed because on some architectures you cannot | |
185 | * tell if you are in kernel or user space simply by looking at | |
186 | * pc. We tag this in the buffer by generating kernel enter/exit | |
187 | * events whenever is_kernel changes | |
188 | */ | |
25ad2913 | 189 | static int log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, |
1da177e4 LT |
190 | int is_kernel, unsigned long event) |
191 | { | |
25ad2913 | 192 | struct task_struct *task; |
1da177e4 LT |
193 | |
194 | cpu_buf->sample_received++; | |
195 | ||
df9d177a PE |
196 | if (pc == ESCAPE_CODE) { |
197 | cpu_buf->sample_invalid_eip++; | |
198 | return 0; | |
199 | } | |
200 | ||
1da177e4 LT |
201 | if (nr_available_slots(cpu_buf) < 3) { |
202 | cpu_buf->sample_lost_overflow++; | |
203 | return 0; | |
204 | } | |
205 | ||
206 | is_kernel = !!is_kernel; | |
207 | ||
208 | task = current; | |
209 | ||
210 | /* notice a switch from user->kernel or vice versa */ | |
211 | if (cpu_buf->last_is_kernel != is_kernel) { | |
212 | cpu_buf->last_is_kernel = is_kernel; | |
213 | add_code(cpu_buf, is_kernel); | |
214 | } | |
215 | ||
216 | /* notice a task switch */ | |
217 | if (cpu_buf->last_task != task) { | |
218 | cpu_buf->last_task = task; | |
219 | add_code(cpu_buf, (unsigned long)task); | |
220 | } | |
6a18037d | 221 | |
1da177e4 LT |
222 | add_sample(cpu_buf, pc, event); |
223 | return 1; | |
224 | } | |
225 | ||
345c2573 | 226 | static int oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf) |
1da177e4 LT |
227 | { |
228 | if (nr_available_slots(cpu_buf) < 4) { | |
229 | cpu_buf->sample_lost_overflow++; | |
230 | return 0; | |
231 | } | |
232 | ||
233 | add_code(cpu_buf, CPU_TRACE_BEGIN); | |
234 | cpu_buf->tracing = 1; | |
235 | return 1; | |
236 | } | |
237 | ||
25ad2913 | 238 | static void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf) |
1da177e4 LT |
239 | { |
240 | cpu_buf->tracing = 0; | |
241 | } | |
242 | ||
27357716 BR |
243 | void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, |
244 | unsigned long event, int is_kernel) | |
1da177e4 | 245 | { |
608dfddd | 246 | struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); |
1da177e4 LT |
247 | |
248 | if (!backtrace_depth) { | |
249 | log_sample(cpu_buf, pc, is_kernel, event); | |
250 | return; | |
251 | } | |
252 | ||
253 | if (!oprofile_begin_trace(cpu_buf)) | |
254 | return; | |
255 | ||
256 | /* if log_sample() fail we can't backtrace since we lost the source | |
257 | * of this event */ | |
258 | if (log_sample(cpu_buf, pc, is_kernel, event)) | |
259 | oprofile_ops.backtrace(regs, backtrace_depth); | |
260 | oprofile_end_trace(cpu_buf); | |
261 | } | |
262 | ||
27357716 BR |
263 | void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) |
264 | { | |
265 | int is_kernel = !user_mode(regs); | |
266 | unsigned long pc = profile_pc(regs); | |
267 | ||
268 | oprofile_add_ext_sample(pc, regs, event, is_kernel); | |
269 | } | |
270 | ||
852402cc RR |
271 | #ifdef CONFIG_OPROFILE_IBS |
272 | ||
e2fee276 RR |
273 | #define MAX_IBS_SAMPLE_SIZE 14 |
274 | ||
275 | void oprofile_add_ibs_sample(struct pt_regs *const regs, | |
25ad2913 | 276 | unsigned int *const ibs_sample, int ibs_code) |
345c2573 | 277 | { |
e2fee276 RR |
278 | int is_kernel = !user_mode(regs); |
279 | struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); | |
345c2573 BK |
280 | struct task_struct *task; |
281 | ||
282 | cpu_buf->sample_received++; | |
283 | ||
284 | if (nr_available_slots(cpu_buf) < MAX_IBS_SAMPLE_SIZE) { | |
e2fee276 | 285 | /* we can't backtrace since we lost the source of this event */ |
345c2573 | 286 | cpu_buf->sample_lost_overflow++; |
e2fee276 | 287 | return; |
345c2573 BK |
288 | } |
289 | ||
345c2573 BK |
290 | /* notice a switch from user->kernel or vice versa */ |
291 | if (cpu_buf->last_is_kernel != is_kernel) { | |
292 | cpu_buf->last_is_kernel = is_kernel; | |
293 | add_code(cpu_buf, is_kernel); | |
294 | } | |
295 | ||
296 | /* notice a task switch */ | |
297 | if (!is_kernel) { | |
298 | task = current; | |
345c2573 BK |
299 | if (cpu_buf->last_task != task) { |
300 | cpu_buf->last_task = task; | |
301 | add_code(cpu_buf, (unsigned long)task); | |
302 | } | |
303 | } | |
304 | ||
305 | add_code(cpu_buf, ibs_code); | |
e2fee276 RR |
306 | add_sample(cpu_buf, ibs_sample[0], ibs_sample[1]); |
307 | add_sample(cpu_buf, ibs_sample[2], ibs_sample[3]); | |
308 | add_sample(cpu_buf, ibs_sample[4], ibs_sample[5]); | |
345c2573 BK |
309 | |
310 | if (ibs_code == IBS_OP_BEGIN) { | |
e2fee276 RR |
311 | add_sample(cpu_buf, ibs_sample[6], ibs_sample[7]); |
312 | add_sample(cpu_buf, ibs_sample[8], ibs_sample[9]); | |
313 | add_sample(cpu_buf, ibs_sample[10], ibs_sample[11]); | |
345c2573 BK |
314 | } |
315 | ||
e2fee276 | 316 | if (backtrace_depth) |
345c2573 BK |
317 | oprofile_ops.backtrace(regs, backtrace_depth); |
318 | } | |
319 | ||
852402cc RR |
320 | #endif |
321 | ||
1da177e4 LT |
322 | void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) |
323 | { | |
608dfddd | 324 | struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); |
1da177e4 LT |
325 | log_sample(cpu_buf, pc, is_kernel, event); |
326 | } | |
327 | ||
1da177e4 LT |
328 | void oprofile_add_trace(unsigned long pc) |
329 | { | |
608dfddd | 330 | struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); |
1da177e4 LT |
331 | |
332 | if (!cpu_buf->tracing) | |
333 | return; | |
334 | ||
335 | if (nr_available_slots(cpu_buf) < 1) { | |
336 | cpu_buf->tracing = 0; | |
337 | cpu_buf->sample_lost_overflow++; | |
338 | return; | |
339 | } | |
340 | ||
341 | /* broken frame can give an eip with the same value as an escape code, | |
342 | * abort the trace if we get it */ | |
343 | if (pc == ESCAPE_CODE) { | |
344 | cpu_buf->tracing = 0; | |
345 | cpu_buf->backtrace_aborted++; | |
346 | return; | |
347 | } | |
348 | ||
349 | add_sample(cpu_buf, pc, 0); | |
350 | } | |
351 | ||
1da177e4 LT |
352 | /* |
353 | * This serves to avoid cpu buffer overflow, and makes sure | |
354 | * the task mortuary progresses | |
355 | * | |
356 | * By using schedule_delayed_work_on and then schedule_delayed_work | |
357 | * we guarantee this will stay on the correct cpu | |
358 | */ | |
c4028958 | 359 | static void wq_sync_buffer(struct work_struct *work) |
1da177e4 | 360 | { |
25ad2913 | 361 | struct oprofile_cpu_buffer *b = |
c4028958 | 362 | container_of(work, struct oprofile_cpu_buffer, work.work); |
1da177e4 | 363 | if (b->cpu != smp_processor_id()) { |
bd17b625 | 364 | printk(KERN_DEBUG "WQ on CPU%d, prefer CPU%d\n", |
1da177e4 | 365 | smp_processor_id(), b->cpu); |
4bd9b9dc CA |
366 | |
367 | if (!cpu_online(b->cpu)) { | |
368 | cancel_delayed_work(&b->work); | |
369 | return; | |
370 | } | |
1da177e4 LT |
371 | } |
372 | sync_buffer(b->cpu); | |
373 | ||
374 | /* don't re-add the work if we're shutting down */ | |
375 | if (work_enabled) | |
376 | schedule_delayed_work(&b->work, DEFAULT_TIMER_EXPIRE); | |
377 | } |