]>
Commit | Line | Data |
---|---|---|
eee3af4a MM |
1 | /* |
2 | * Debug Store support | |
3 | * | |
4 | * This provides a low-level interface to the hardware's Debug Store | |
93fa7636 | 5 | * feature that is used for branch trace store (BTS) and |
eee3af4a MM |
6 | * precise-event based sampling (PEBS). |
7 | * | |
93fa7636 | 8 | * It manages: |
c2724775 | 9 | * - DS and BTS hardware configuration |
6abb11ae | 10 | * - buffer overflow handling (to be done) |
93fa7636 | 11 | * - buffer access |
eee3af4a | 12 | * |
c2724775 MM |
13 | * It does not do: |
14 | * - security checking (is the caller allowed to trace the task) | |
15 | * - buffer allocation (memory accounting) | |
eee3af4a | 16 | * |
eee3af4a | 17 | * |
93fa7636 MM |
18 | * Copyright (C) 2007-2008 Intel Corporation. |
19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008 | |
eee3af4a MM |
20 | */ |
21 | ||
93fa7636 | 22 | |
eee3af4a MM |
23 | #include <asm/ds.h> |
24 | ||
25 | #include <linux/errno.h> | |
26 | #include <linux/string.h> | |
27 | #include <linux/slab.h> | |
93fa7636 | 28 | #include <linux/sched.h> |
3c933904 | 29 | #include <linux/mm.h> |
ca0002a1 | 30 | #include <linux/kernel.h> |
93fa7636 MM |
31 | |
32 | ||
33 | /* | |
34 | * The configuration for a particular DS hardware implementation. | |
35 | */ | |
36 | struct ds_configuration { | |
c2724775 MM |
37 | /* the name of the configuration */ |
38 | const char *name; | |
39 | /* the size of one pointer-typed field in the DS structure and | |
40 | in the BTS and PEBS buffers in bytes; | |
41 | this covers the first 8 DS fields related to buffer management. */ | |
93fa7636 MM |
42 | unsigned char sizeof_field; |
43 | /* the size of a BTS/PEBS record in bytes */ | |
44 | unsigned char sizeof_rec[2]; | |
c2724775 MM |
45 | /* a series of bit-masks to control various features indexed |
46 | * by enum ds_feature */ | |
47 | unsigned long ctl[dsf_ctl_max]; | |
93fa7636 | 48 | }; |
c2724775 MM |
49 | static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); |
50 | ||
51 | #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) | |
52 | ||
53 | #define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */ | |
54 | #define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */ | |
55 | #define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ | |
56 | ||
57 | #define BTS_CONTROL \ | |
58 | (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ | |
59 | ds_cfg.ctl[dsf_bts_overflow]) | |
60 | ||
eee3af4a | 61 | |
ca0002a1 MM |
62 | /* |
63 | * A BTS or PEBS tracer. | |
64 | * | |
65 | * This holds the configuration of the tracer and serves as a handle | |
66 | * to identify tracers. | |
67 | */ | |
68 | struct ds_tracer { | |
69 | /* the DS context (partially) owned by this tracer */ | |
70 | struct ds_context *context; | |
71 | /* the buffer provided on ds_request() and its size in bytes */ | |
72 | void *buffer; | |
73 | size_t size; | |
ca0002a1 MM |
74 | }; |
75 | ||
76 | struct bts_tracer { | |
77 | /* the common DS part */ | |
78 | struct ds_tracer ds; | |
c2724775 MM |
79 | /* the trace including the DS configuration */ |
80 | struct bts_trace trace; | |
ca0002a1 MM |
81 | /* buffer overflow notification function */ |
82 | bts_ovfl_callback_t ovfl; | |
83 | }; | |
84 | ||
85 | struct pebs_tracer { | |
86 | /* the common DS part */ | |
87 | struct ds_tracer ds; | |
c2724775 MM |
88 | /* the trace including the DS configuration */ |
89 | struct pebs_trace trace; | |
ca0002a1 MM |
90 | /* buffer overflow notification function */ |
91 | pebs_ovfl_callback_t ovfl; | |
92 | }; | |
eee3af4a MM |
93 | |
94 | /* | |
95 | * Debug Store (DS) save area configuration (see Intel64 and IA32 | |
96 | * Architectures Software Developer's Manual, section 18.5) | |
97 | * | |
98 | * The DS configuration consists of the following fields; different | |
99 | * architetures vary in the size of those fields. | |
100 | * - double-word aligned base linear address of the BTS buffer | |
101 | * - write pointer into the BTS buffer | |
102 | * - end linear address of the BTS buffer (one byte beyond the end of | |
103 | * the buffer) | |
104 | * - interrupt pointer into BTS buffer | |
105 | * (interrupt occurs when write pointer passes interrupt pointer) | |
106 | * - double-word aligned base linear address of the PEBS buffer | |
107 | * - write pointer into the PEBS buffer | |
108 | * - end linear address of the PEBS buffer (one byte beyond the end of | |
109 | * the buffer) | |
110 | * - interrupt pointer into PEBS buffer | |
111 | * (interrupt occurs when write pointer passes interrupt pointer) | |
112 | * - value to which counter is reset following counter overflow | |
113 | * | |
93fa7636 MM |
114 | * Later architectures use 64bit pointers throughout, whereas earlier |
115 | * architectures use 32bit pointers in 32bit mode. | |
eee3af4a | 116 | * |
eee3af4a | 117 | * |
93fa7636 MM |
118 | * We compute the base address for the first 8 fields based on: |
119 | * - the field size stored in the DS configuration | |
120 | * - the relative field position | |
121 | * - an offset giving the start of the respective region | |
eee3af4a | 122 | * |
93fa7636 MM |
123 | * This offset is further used to index various arrays holding |
124 | * information for BTS and PEBS at the respective index. | |
eee3af4a | 125 | * |
93fa7636 MM |
126 | * On later 32bit processors, we only access the lower 32bit of the |
127 | * 64bit pointer fields. The upper halves will be zeroed out. | |
eee3af4a MM |
128 | */ |
129 | ||
93fa7636 MM |
130 | enum ds_field { |
131 | ds_buffer_base = 0, | |
132 | ds_index, | |
133 | ds_absolute_maximum, | |
134 | ds_interrupt_threshold, | |
135 | }; | |
eee3af4a | 136 | |
93fa7636 MM |
137 | enum ds_qualifier { |
138 | ds_bts = 0, | |
139 | ds_pebs | |
eee3af4a MM |
140 | }; |
141 | ||
93fa7636 MM |
142 | static inline unsigned long ds_get(const unsigned char *base, |
143 | enum ds_qualifier qual, enum ds_field field) | |
144 | { | |
145 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | |
146 | return *(unsigned long *)base; | |
147 | } | |
148 | ||
149 | static inline void ds_set(unsigned char *base, enum ds_qualifier qual, | |
150 | enum ds_field field, unsigned long value) | |
151 | { | |
152 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | |
153 | (*(unsigned long *)base) = value; | |
154 | } | |
155 | ||
156 | ||
eee3af4a | 157 | /* |
6abb11ae | 158 | * Locking is done only for allocating BTS or PEBS resources. |
eee3af4a | 159 | */ |
c2724775 | 160 | static DEFINE_SPINLOCK(ds_lock); |
eee3af4a | 161 | |
eee3af4a MM |
162 | |
163 | /* | |
93fa7636 MM |
164 | * We either support (system-wide) per-cpu or per-thread allocation. |
165 | * We distinguish the two based on the task_struct pointer, where a | |
166 | * NULL pointer indicates per-cpu allocation for the current cpu. | |
167 | * | |
168 | * Allocations are use-counted. As soon as resources are allocated, | |
169 | * further allocations must be of the same type (per-cpu or | |
170 | * per-thread). We model this by counting allocations (i.e. the number | |
171 | * of tracers of a certain type) for one type negatively: | |
172 | * =0 no tracers | |
173 | * >0 number of per-thread tracers | |
174 | * <0 number of per-cpu tracers | |
175 | * | |
93fa7636 MM |
176 | * Tracers essentially gives the number of ds contexts for a certain |
177 | * type of allocation. | |
eee3af4a | 178 | */ |
c2724775 | 179 | static atomic_t tracers = ATOMIC_INIT(0); |
93fa7636 MM |
180 | |
181 | static inline void get_tracer(struct task_struct *task) | |
eee3af4a | 182 | { |
c2724775 MM |
183 | if (task) |
184 | atomic_inc(&tracers); | |
185 | else | |
186 | atomic_dec(&tracers); | |
eee3af4a | 187 | } |
93fa7636 MM |
188 | |
189 | static inline void put_tracer(struct task_struct *task) | |
eee3af4a | 190 | { |
c2724775 MM |
191 | if (task) |
192 | atomic_dec(&tracers); | |
193 | else | |
194 | atomic_inc(&tracers); | |
eee3af4a | 195 | } |
93fa7636 MM |
196 | |
197 | static inline int check_tracer(struct task_struct *task) | |
eee3af4a | 198 | { |
c2724775 MM |
199 | return task ? |
200 | (atomic_read(&tracers) >= 0) : | |
201 | (atomic_read(&tracers) <= 0); | |
eee3af4a | 202 | } |
93fa7636 MM |
203 | |
204 | ||
205 | /* | |
206 | * The DS context is either attached to a thread or to a cpu: | |
207 | * - in the former case, the thread_struct contains a pointer to the | |
208 | * attached context. | |
209 | * - in the latter case, we use a static array of per-cpu context | |
210 | * pointers. | |
211 | * | |
212 | * Contexts are use-counted. They are allocated on first access and | |
213 | * deallocated when the last user puts the context. | |
93fa7636 | 214 | */ |
c2724775 MM |
215 | struct ds_context { |
216 | /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ | |
217 | unsigned char ds[MAX_SIZEOF_DS]; | |
218 | /* the owner of the BTS and PEBS configuration, respectively */ | |
219 | struct bts_tracer *bts_master; | |
220 | struct pebs_tracer *pebs_master; | |
221 | /* use count */ | |
222 | unsigned long count; | |
223 | /* a pointer to the context location inside the thread_struct | |
224 | * or the per_cpu context array */ | |
225 | struct ds_context **this; | |
226 | /* a pointer to the task owning this context, or NULL, if the | |
227 | * context is owned by a cpu */ | |
228 | struct task_struct *task; | |
229 | }; | |
230 | ||
231 | static DEFINE_PER_CPU(struct ds_context *, system_context_array); | |
93fa7636 | 232 | |
c2724775 | 233 | #define system_context per_cpu(system_context_array, smp_processor_id()) |
93fa7636 | 234 | |
ffc2238a | 235 | static struct ds_context *ds_get_context(struct task_struct *task) |
eee3af4a | 236 | { |
93fa7636 | 237 | struct ds_context **p_context = |
c2724775 | 238 | (task ? &task->thread.ds_ctx : &system_context); |
93fa7636 | 239 | struct ds_context *context = *p_context; |
de90add3 | 240 | unsigned long irq; |
93fa7636 MM |
241 | |
242 | if (!context) { | |
243 | context = kzalloc(sizeof(*context), GFP_KERNEL); | |
de90add3 | 244 | if (!context) |
573da422 | 245 | return NULL; |
93fa7636 | 246 | |
de90add3 MM |
247 | spin_lock_irqsave(&ds_lock, irq); |
248 | ||
10db4ef7 | 249 | if (*p_context) { |
10db4ef7 | 250 | kfree(context); |
10db4ef7 | 251 | |
de90add3 MM |
252 | context = *p_context; |
253 | } else { | |
254 | *p_context = context; | |
93fa7636 | 255 | |
de90add3 MM |
256 | context->this = p_context; |
257 | context->task = task; | |
93fa7636 | 258 | |
de90add3 MM |
259 | if (task) |
260 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); | |
93fa7636 | 261 | |
de90add3 MM |
262 | if (!task || (task == current)) |
263 | wrmsrl(MSR_IA32_DS_AREA, | |
264 | (unsigned long)context->ds); | |
265 | } | |
c2724775 MM |
266 | |
267 | context->count++; | |
268 | ||
269 | spin_unlock_irqrestore(&ds_lock, irq); | |
270 | } else { | |
271 | spin_lock_irqsave(&ds_lock, irq); | |
272 | ||
273 | context = *p_context; | |
274 | if (context) | |
275 | context->count++; | |
276 | ||
de90add3 | 277 | spin_unlock_irqrestore(&ds_lock, irq); |
93fa7636 | 278 | |
c2724775 MM |
279 | if (!context) |
280 | context = ds_get_context(task); | |
281 | } | |
93fa7636 MM |
282 | |
283 | return context; | |
eee3af4a | 284 | } |
93fa7636 | 285 | |
93fa7636 | 286 | static inline void ds_put_context(struct ds_context *context) |
eee3af4a | 287 | { |
de90add3 MM |
288 | unsigned long irq; |
289 | ||
93fa7636 MM |
290 | if (!context) |
291 | return; | |
292 | ||
de90add3 | 293 | spin_lock_irqsave(&ds_lock, irq); |
93fa7636 | 294 | |
c2724775 MM |
295 | if (--context->count) { |
296 | spin_unlock_irqrestore(&ds_lock, irq); | |
297 | return; | |
298 | } | |
93fa7636 | 299 | |
573da422 | 300 | *(context->this) = NULL; |
93fa7636 MM |
301 | |
302 | if (context->task) | |
303 | clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); | |
304 | ||
305 | if (!context->task || (context->task == current)) | |
306 | wrmsrl(MSR_IA32_DS_AREA, 0); | |
307 | ||
de90add3 | 308 | spin_unlock_irqrestore(&ds_lock, irq); |
c2724775 MM |
309 | |
310 | kfree(context); | |
eee3af4a | 311 | } |
93fa7636 MM |
312 | |
313 | ||
314 | /* | |
c2724775 | 315 | * Call the tracer's callback on a buffer overflow. |
93fa7636 | 316 | * |
93fa7636 MM |
317 | * context: the ds context |
318 | * qual: the buffer type | |
319 | */ | |
ca0002a1 MM |
320 | static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) |
321 | { | |
322 | switch (qual) { | |
c2724775 MM |
323 | case ds_bts: |
324 | if (context->bts_master && | |
325 | context->bts_master->ovfl) | |
326 | context->bts_master->ovfl(context->bts_master); | |
327 | break; | |
328 | case ds_pebs: | |
329 | if (context->pebs_master && | |
330 | context->pebs_master->ovfl) | |
331 | context->pebs_master->ovfl(context->pebs_master); | |
ca0002a1 | 332 | break; |
ca0002a1 | 333 | } |
c2724775 MM |
334 | } |
335 | ||
336 | ||
337 | /* | |
338 | * Write raw data into the BTS or PEBS buffer. | |
339 | * | |
340 | * The remainder of any partially written record is zeroed out. | |
341 | * | |
342 | * context: the DS context | |
343 | * qual: the buffer type | |
344 | * record: the data to write | |
345 | * size: the size of the data | |
346 | */ | |
347 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, | |
348 | const void *record, size_t size) | |
349 | { | |
350 | int bytes_written = 0; | |
351 | ||
352 | if (!record) | |
353 | return -EINVAL; | |
354 | ||
355 | while (size) { | |
356 | unsigned long base, index, end, write_end, int_th; | |
357 | unsigned long write_size, adj_write_size; | |
358 | ||
359 | /* | |
360 | * write as much as possible without producing an | |
361 | * overflow interrupt. | |
362 | * | |
363 | * interrupt_threshold must either be | |
364 | * - bigger than absolute_maximum or | |
365 | * - point to a record between buffer_base and absolute_maximum | |
366 | * | |
367 | * index points to a valid record. | |
368 | */ | |
369 | base = ds_get(context->ds, qual, ds_buffer_base); | |
370 | index = ds_get(context->ds, qual, ds_index); | |
371 | end = ds_get(context->ds, qual, ds_absolute_maximum); | |
372 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | |
373 | ||
374 | write_end = min(end, int_th); | |
375 | ||
376 | /* if we are already beyond the interrupt threshold, | |
377 | * we fill the entire buffer */ | |
378 | if (write_end <= index) | |
379 | write_end = end; | |
380 | ||
381 | if (write_end <= index) | |
382 | break; | |
383 | ||
384 | write_size = min((unsigned long) size, write_end - index); | |
385 | memcpy((void *)index, record, write_size); | |
386 | ||
387 | record = (const char *)record + write_size; | |
388 | size -= write_size; | |
389 | bytes_written += write_size; | |
390 | ||
391 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | |
392 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | |
393 | ||
394 | /* zero out trailing bytes */ | |
395 | memset((char *)index + write_size, 0, | |
396 | adj_write_size - write_size); | |
397 | index += adj_write_size; | |
398 | ||
399 | if (index >= end) | |
400 | index = base; | |
401 | ds_set(context->ds, qual, ds_index, index); | |
402 | ||
403 | if (index >= int_th) | |
404 | ds_overflow(context, qual); | |
405 | } | |
406 | ||
407 | return bytes_written; | |
408 | } | |
409 | ||
410 | ||
411 | /* | |
412 | * Branch Trace Store (BTS) uses the following format. Different | |
413 | * architectures vary in the size of those fields. | |
414 | * - source linear address | |
415 | * - destination linear address | |
416 | * - flags | |
417 | * | |
418 | * Later architectures use 64bit pointers throughout, whereas earlier | |
419 | * architectures use 32bit pointers in 32bit mode. | |
420 | * | |
421 | * We compute the base address for the first 8 fields based on: | |
422 | * - the field size stored in the DS configuration | |
423 | * - the relative field position | |
424 | * | |
425 | * In order to store additional information in the BTS buffer, we use | |
426 | * a special source address to indicate that the record requires | |
427 | * special interpretation. | |
428 | * | |
429 | * Netburst indicated via a bit in the flags field whether the branch | |
430 | * was predicted; this is ignored. | |
431 | * | |
432 | * We use two levels of abstraction: | |
433 | * - the raw data level defined here | |
434 | * - an arch-independent level defined in ds.h | |
435 | */ | |
436 | ||
437 | enum bts_field { | |
438 | bts_from, | |
439 | bts_to, | |
440 | bts_flags, | |
441 | ||
442 | bts_qual = bts_from, | |
443 | bts_jiffies = bts_to, | |
444 | bts_pid = bts_flags, | |
445 | ||
446 | bts_qual_mask = (bts_qual_max - 1), | |
447 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) | |
448 | }; | |
449 | ||
450 | static inline unsigned long bts_get(const char *base, enum bts_field field) | |
451 | { | |
452 | base += (ds_cfg.sizeof_field * field); | |
453 | return *(unsigned long *)base; | |
454 | } | |
455 | ||
456 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) | |
457 | { | |
458 | base += (ds_cfg.sizeof_field * field);; | |
459 | (*(unsigned long *)base) = val; | |
460 | } | |
461 | ||
462 | ||
463 | /* | |
464 | * The raw BTS data is architecture dependent. | |
465 | * | |
466 | * For higher-level users, we give an arch-independent view. | |
467 | * - ds.h defines struct bts_struct | |
468 | * - bts_read translates one raw bts record into a bts_struct | |
469 | * - bts_write translates one bts_struct into the raw format and | |
470 | * writes it into the top of the parameter tracer's buffer. | |
471 | * | |
472 | * return: bytes read/written on success; -Eerrno, otherwise | |
473 | */ | |
474 | static int bts_read(struct bts_tracer *tracer, const void *at, | |
475 | struct bts_struct *out) | |
476 | { | |
477 | if (!tracer) | |
478 | return -EINVAL; | |
479 | ||
480 | if (at < tracer->trace.ds.begin) | |
481 | return -EINVAL; | |
482 | ||
483 | if (tracer->trace.ds.end < (at + tracer->trace.ds.size)) | |
484 | return -EINVAL; | |
485 | ||
486 | memset(out, 0, sizeof(*out)); | |
487 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { | |
488 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); | |
489 | out->variant.timestamp.jiffies = bts_get(at, bts_jiffies); | |
490 | out->variant.timestamp.pid = bts_get(at, bts_pid); | |
491 | } else { | |
492 | out->qualifier = bts_branch; | |
493 | out->variant.lbr.from = bts_get(at, bts_from); | |
494 | out->variant.lbr.to = bts_get(at, bts_to); | |
495 | } | |
496 | ||
497 | return ds_cfg.sizeof_rec[ds_bts]; | |
498 | } | |
499 | ||
500 | static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) | |
501 | { | |
502 | unsigned char raw[MAX_SIZEOF_BTS]; | |
503 | ||
504 | if (!tracer) | |
505 | return -EINVAL; | |
506 | ||
507 | if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts]) | |
508 | return -EOVERFLOW; | |
509 | ||
510 | switch (in->qualifier) { | |
511 | case bts_invalid: | |
512 | bts_set(raw, bts_from, 0); | |
513 | bts_set(raw, bts_to, 0); | |
514 | bts_set(raw, bts_flags, 0); | |
515 | break; | |
516 | case bts_branch: | |
517 | bts_set(raw, bts_from, in->variant.lbr.from); | |
518 | bts_set(raw, bts_to, in->variant.lbr.to); | |
519 | bts_set(raw, bts_flags, 0); | |
520 | break; | |
521 | case bts_task_arrives: | |
522 | case bts_task_departs: | |
523 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); | |
524 | bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies); | |
525 | bts_set(raw, bts_pid, in->variant.timestamp.pid); | |
ca0002a1 | 526 | break; |
c2724775 MM |
527 | default: |
528 | return -EINVAL; | |
ca0002a1 | 529 | } |
c2724775 MM |
530 | |
531 | return ds_write(tracer->ds.context, ds_bts, raw, | |
532 | ds_cfg.sizeof_rec[ds_bts]); | |
eee3af4a | 533 | } |
93fa7636 MM |
534 | |
535 | ||
c2724775 MM |
536 | static void ds_write_config(struct ds_context *context, |
537 | struct ds_trace *cfg, enum ds_qualifier qual) | |
538 | { | |
539 | unsigned char *ds = context->ds; | |
540 | ||
541 | ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin); | |
542 | ds_set(ds, qual, ds_index, (unsigned long)cfg->top); | |
543 | ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end); | |
544 | ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith); | |
545 | } | |
546 | ||
547 | static void ds_read_config(struct ds_context *context, | |
548 | struct ds_trace *cfg, enum ds_qualifier qual) | |
eee3af4a | 549 | { |
c2724775 MM |
550 | unsigned char *ds = context->ds; |
551 | ||
552 | cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base); | |
553 | cfg->top = (void *)ds_get(ds, qual, ds_index); | |
554 | cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum); | |
555 | cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold); | |
556 | } | |
557 | ||
558 | static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | |
559 | void *base, size_t size, size_t ith, | |
560 | unsigned int flags) { | |
93fa7636 | 561 | unsigned long buffer, adj; |
ca0002a1 MM |
562 | |
563 | /* adjust the buffer address and size to meet alignment | |
564 | * constraints: | |
565 | * - buffer is double-word aligned | |
566 | * - size is multiple of record size | |
567 | * | |
568 | * We checked the size at the very beginning; we have enough | |
569 | * space to do the adjustment. | |
570 | */ | |
571 | buffer = (unsigned long)base; | |
572 | ||
573 | adj = ALIGN(buffer, DS_ALIGNMENT) - buffer; | |
574 | buffer += adj; | |
575 | size -= adj; | |
576 | ||
c2724775 MM |
577 | trace->n = size / ds_cfg.sizeof_rec[qual]; |
578 | trace->size = ds_cfg.sizeof_rec[qual]; | |
ca0002a1 | 579 | |
c2724775 | 580 | size = (trace->n * trace->size); |
ca0002a1 | 581 | |
c2724775 MM |
582 | trace->begin = (void *)buffer; |
583 | trace->top = trace->begin; | |
584 | trace->end = (void *)(buffer + size); | |
ca0002a1 MM |
585 | /* The value for 'no threshold' is -1, which will set the |
586 | * threshold outside of the buffer, just like we want it. | |
587 | */ | |
c2724775 MM |
588 | trace->ith = (void *)(buffer + size - ith); |
589 | ||
590 | trace->flags = flags; | |
ca0002a1 MM |
591 | } |
592 | ||
c2724775 MM |
593 | |
594 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, | |
595 | enum ds_qualifier qual, struct task_struct *task, | |
596 | void *base, size_t size, size_t th, unsigned int flags) | |
ca0002a1 MM |
597 | { |
598 | struct ds_context *context; | |
ca0002a1 | 599 | int error; |
93fa7636 | 600 | |
6abb11ae MM |
601 | error = -EINVAL; |
602 | if (!base) | |
603 | goto out; | |
604 | ||
93fa7636 | 605 | /* we require some space to do alignment adjustments below */ |
ca0002a1 MM |
606 | error = -EINVAL; |
607 | if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) | |
608 | goto out; | |
93fa7636 | 609 | |
ca0002a1 MM |
610 | if (th != (size_t)-1) { |
611 | th *= ds_cfg.sizeof_rec[qual]; | |
612 | ||
613 | error = -EINVAL; | |
614 | if (size <= th) | |
615 | goto out; | |
616 | } | |
617 | ||
ca0002a1 MM |
618 | tracer->buffer = base; |
619 | tracer->size = size; | |
93fa7636 | 620 | |
ca0002a1 MM |
621 | error = -ENOMEM; |
622 | context = ds_get_context(task); | |
93fa7636 | 623 | if (!context) |
ca0002a1 MM |
624 | goto out; |
625 | tracer->context = context; | |
626 | ||
c2724775 | 627 | ds_init_ds_trace(trace, qual, base, size, th, flags); |
de90add3 | 628 | |
c2724775 | 629 | error = 0; |
ca0002a1 | 630 | out: |
93fa7636 | 631 | return error; |
eee3af4a | 632 | } |
93fa7636 | 633 | |
ca0002a1 MM |
634 | struct bts_tracer *ds_request_bts(struct task_struct *task, |
635 | void *base, size_t size, | |
c2724775 MM |
636 | bts_ovfl_callback_t ovfl, size_t th, |
637 | unsigned int flags) | |
eee3af4a | 638 | { |
ca0002a1 | 639 | struct bts_tracer *tracer; |
c2724775 | 640 | unsigned long irq; |
ca0002a1 | 641 | int error; |
93fa7636 | 642 | |
c2724775 MM |
643 | error = -EOPNOTSUPP; |
644 | if (!ds_cfg.ctl[dsf_bts]) | |
645 | goto out; | |
646 | ||
ca0002a1 MM |
647 | /* buffer overflow notification is not yet implemented */ |
648 | error = -EOPNOTSUPP; | |
649 | if (ovfl) | |
650 | goto out; | |
651 | ||
652 | error = -ENOMEM; | |
653 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | |
654 | if (!tracer) | |
655 | goto out; | |
656 | tracer->ovfl = ovfl; | |
657 | ||
c2724775 MM |
658 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
659 | ds_bts, task, base, size, th, flags); | |
ca0002a1 MM |
660 | if (error < 0) |
661 | goto out_tracer; | |
662 | ||
c2724775 MM |
663 | |
664 | spin_lock_irqsave(&ds_lock, irq); | |
665 | ||
666 | error = -EPERM; | |
667 | if (!check_tracer(task)) | |
668 | goto out_unlock; | |
669 | get_tracer(task); | |
670 | ||
671 | error = -EPERM; | |
672 | if (tracer->ds.context->bts_master) | |
673 | goto out_put_tracer; | |
674 | tracer->ds.context->bts_master = tracer; | |
675 | ||
676 | spin_unlock_irqrestore(&ds_lock, irq); | |
677 | ||
678 | ||
679 | tracer->trace.read = bts_read; | |
680 | tracer->trace.write = bts_write; | |
681 | ||
682 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | |
683 | ds_resume_bts(tracer); | |
684 | ||
ca0002a1 MM |
685 | return tracer; |
686 | ||
c2724775 MM |
687 | out_put_tracer: |
688 | put_tracer(task); | |
689 | out_unlock: | |
690 | spin_unlock_irqrestore(&ds_lock, irq); | |
691 | ds_put_context(tracer->ds.context); | |
ca0002a1 | 692 | out_tracer: |
6abb11ae | 693 | kfree(tracer); |
ca0002a1 MM |
694 | out: |
695 | return ERR_PTR(error); | |
eee3af4a | 696 | } |
93fa7636 | 697 | |
ca0002a1 MM |
698 | struct pebs_tracer *ds_request_pebs(struct task_struct *task, |
699 | void *base, size_t size, | |
c2724775 MM |
700 | pebs_ovfl_callback_t ovfl, size_t th, |
701 | unsigned int flags) | |
eee3af4a | 702 | { |
ca0002a1 | 703 | struct pebs_tracer *tracer; |
c2724775 | 704 | unsigned long irq; |
93fa7636 MM |
705 | int error; |
706 | ||
ca0002a1 MM |
707 | /* buffer overflow notification is not yet implemented */ |
708 | error = -EOPNOTSUPP; | |
709 | if (ovfl) | |
93fa7636 MM |
710 | goto out; |
711 | ||
ca0002a1 MM |
712 | error = -ENOMEM; |
713 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | |
714 | if (!tracer) | |
715 | goto out; | |
716 | tracer->ovfl = ovfl; | |
93fa7636 | 717 | |
c2724775 MM |
718 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
719 | ds_pebs, task, base, size, th, flags); | |
ca0002a1 MM |
720 | if (error < 0) |
721 | goto out_tracer; | |
93fa7636 | 722 | |
c2724775 MM |
723 | spin_lock_irqsave(&ds_lock, irq); |
724 | ||
725 | error = -EPERM; | |
726 | if (!check_tracer(task)) | |
727 | goto out_unlock; | |
728 | get_tracer(task); | |
729 | ||
730 | error = -EPERM; | |
731 | if (tracer->ds.context->pebs_master) | |
732 | goto out_put_tracer; | |
733 | tracer->ds.context->pebs_master = tracer; | |
734 | ||
735 | spin_unlock_irqrestore(&ds_lock, irq); | |
736 | ||
737 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | |
738 | ds_resume_pebs(tracer); | |
739 | ||
ca0002a1 MM |
740 | return tracer; |
741 | ||
c2724775 MM |
742 | out_put_tracer: |
743 | put_tracer(task); | |
744 | out_unlock: | |
745 | spin_unlock_irqrestore(&ds_lock, irq); | |
746 | ds_put_context(tracer->ds.context); | |
ca0002a1 | 747 | out_tracer: |
6abb11ae | 748 | kfree(tracer); |
93fa7636 | 749 | out: |
ca0002a1 MM |
750 | return ERR_PTR(error); |
751 | } | |
752 | ||
c2724775 | 753 | void ds_release_bts(struct bts_tracer *tracer) |
eee3af4a | 754 | { |
ca0002a1 | 755 | if (!tracer) |
c2724775 | 756 | return; |
ca0002a1 | 757 | |
c2724775 | 758 | ds_suspend_bts(tracer); |
ca0002a1 | 759 | |
c2724775 MM |
760 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); |
761 | tracer->ds.context->bts_master = NULL; | |
93fa7636 | 762 | |
c2724775 MM |
763 | put_tracer(tracer->ds.context->task); |
764 | ds_put_context(tracer->ds.context); | |
ca0002a1 | 765 | |
ca0002a1 | 766 | kfree(tracer); |
eee3af4a | 767 | } |
93fa7636 | 768 | |
c2724775 | 769 | void ds_suspend_bts(struct bts_tracer *tracer) |
eee3af4a | 770 | { |
c2724775 | 771 | struct task_struct *task; |
ca0002a1 | 772 | |
ca0002a1 | 773 | if (!tracer) |
c2724775 | 774 | return; |
ca0002a1 | 775 | |
c2724775 | 776 | task = tracer->ds.context->task; |
ca0002a1 | 777 | |
c2724775 MM |
778 | if (!task || (task == current)) |
779 | update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL); | |
ca0002a1 | 780 | |
c2724775 MM |
781 | if (task) { |
782 | task->thread.debugctlmsr &= ~BTS_CONTROL; | |
eee3af4a | 783 | |
c2724775 MM |
784 | if (!task->thread.debugctlmsr) |
785 | clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | |
786 | } | |
93fa7636 | 787 | } |
eee3af4a | 788 | |
c2724775 | 789 | void ds_resume_bts(struct bts_tracer *tracer) |
93fa7636 | 790 | { |
c2724775 MM |
791 | struct task_struct *task; |
792 | unsigned long control; | |
eee3af4a | 793 | |
ca0002a1 | 794 | if (!tracer) |
c2724775 | 795 | return; |
eee3af4a | 796 | |
c2724775 | 797 | task = tracer->ds.context->task; |
ca0002a1 | 798 | |
c2724775 MM |
799 | control = ds_cfg.ctl[dsf_bts]; |
800 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) | |
801 | control |= ds_cfg.ctl[dsf_bts_kernel]; | |
802 | if (!(tracer->trace.ds.flags & BTS_USER)) | |
803 | control |= ds_cfg.ctl[dsf_bts_user]; | |
eee3af4a | 804 | |
c2724775 MM |
805 | if (task) { |
806 | task->thread.debugctlmsr |= control; | |
807 | set_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | |
808 | } | |
ca0002a1 | 809 | |
c2724775 MM |
810 | if (!task || (task == current)) |
811 | update_debugctlmsr(get_debugctlmsr() | control); | |
eee3af4a MM |
812 | } |
813 | ||
c2724775 | 814 | void ds_release_pebs(struct pebs_tracer *tracer) |
eee3af4a | 815 | { |
ca0002a1 | 816 | if (!tracer) |
c2724775 | 817 | return; |
93fa7636 | 818 | |
c2724775 | 819 | ds_suspend_pebs(tracer); |
93fa7636 | 820 | |
c2724775 MM |
821 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); |
822 | tracer->ds.context->pebs_master = NULL; | |
eee3af4a | 823 | |
c2724775 MM |
824 | put_tracer(tracer->ds.context->task); |
825 | ds_put_context(tracer->ds.context); | |
eee3af4a | 826 | |
c2724775 | 827 | kfree(tracer); |
a95d67f8 MM |
828 | } |
829 | ||
c2724775 | 830 | void ds_suspend_pebs(struct pebs_tracer *tracer) |
a95d67f8 | 831 | { |
a95d67f8 | 832 | |
93fa7636 | 833 | } |
eee3af4a | 834 | |
c2724775 | 835 | void ds_resume_pebs(struct pebs_tracer *tracer) |
93fa7636 | 836 | { |
eee3af4a | 837 | |
eee3af4a MM |
838 | } |
839 | ||
c2724775 | 840 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) |
eee3af4a | 841 | { |
ca0002a1 | 842 | if (!tracer) |
c2724775 | 843 | return NULL; |
ca0002a1 | 844 | |
c2724775 MM |
845 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts); |
846 | return &tracer->trace; | |
93fa7636 | 847 | } |
eee3af4a | 848 | |
c2724775 | 849 | const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) |
93fa7636 | 850 | { |
ca0002a1 | 851 | if (!tracer) |
c2724775 | 852 | return NULL; |
ca0002a1 | 853 | |
c2724775 MM |
854 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); |
855 | tracer->trace.reset_value = | |
856 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); | |
ca0002a1 | 857 | |
c2724775 | 858 | return &tracer->trace; |
93fa7636 | 859 | } |
eee3af4a | 860 | |
c2724775 | 861 | int ds_reset_bts(struct bts_tracer *tracer) |
93fa7636 | 862 | { |
ca0002a1 MM |
863 | if (!tracer) |
864 | return -EINVAL; | |
865 | ||
c2724775 | 866 | tracer->trace.ds.top = tracer->trace.ds.begin; |
ca0002a1 | 867 | |
c2724775 MM |
868 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, |
869 | (unsigned long)tracer->trace.ds.top); | |
ca0002a1 MM |
870 | |
871 | return 0; | |
93fa7636 | 872 | } |
eee3af4a | 873 | |
c2724775 | 874 | int ds_reset_pebs(struct pebs_tracer *tracer) |
93fa7636 | 875 | { |
ca0002a1 MM |
876 | if (!tracer) |
877 | return -EINVAL; | |
eee3af4a | 878 | |
c2724775 | 879 | tracer->trace.ds.top = tracer->trace.ds.begin; |
eee3af4a | 880 | |
c2724775 MM |
881 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, |
882 | (unsigned long)tracer->trace.ds.top); | |
93fa7636 | 883 | |
ca0002a1 | 884 | return 0; |
eee3af4a MM |
885 | } |
886 | ||
ca0002a1 | 887 | int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) |
eee3af4a | 888 | { |
ca0002a1 MM |
889 | if (!tracer) |
890 | return -EINVAL; | |
eee3af4a | 891 | |
ca0002a1 | 892 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; |
93fa7636 | 893 | |
ca0002a1 | 894 | return 0; |
93fa7636 MM |
895 | } |
896 | ||
c2724775 MM |
897 | static const struct ds_configuration ds_cfg_netburst = { |
898 | .name = "netburst", | |
899 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), | |
900 | .ctl[dsf_bts_kernel] = (1 << 5), | |
901 | .ctl[dsf_bts_user] = (1 << 6), | |
902 | ||
903 | .sizeof_field = sizeof(long), | |
904 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | |
c4858ffc | 905 | #ifdef __i386__ |
c2724775 | 906 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, |
c4858ffc | 907 | #else |
c2724775 | 908 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, |
c4858ffc | 909 | #endif |
eee3af4a | 910 | }; |
c2724775 MM |
911 | static const struct ds_configuration ds_cfg_pentium_m = { |
912 | .name = "pentium m", | |
913 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | |
914 | ||
915 | .sizeof_field = sizeof(long), | |
916 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | |
c4858ffc | 917 | #ifdef __i386__ |
c2724775 | 918 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, |
c4858ffc | 919 | #else |
c2724775 | 920 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, |
c4858ffc | 921 | #endif |
eee3af4a | 922 | }; |
c2724775 MM |
923 | static const struct ds_configuration ds_cfg_core2 = { |
924 | .name = "core 2", | |
925 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | |
926 | .ctl[dsf_bts_kernel] = (1 << 9), | |
927 | .ctl[dsf_bts_user] = (1 << 10), | |
928 | ||
929 | .sizeof_field = 8, | |
930 | .sizeof_rec[ds_bts] = 8 * 3, | |
931 | .sizeof_rec[ds_pebs] = 8 * 18, | |
932 | }; | |
eee3af4a | 933 | |
c2724775 | 934 | static void |
eee3af4a MM |
935 | ds_configure(const struct ds_configuration *cfg) |
936 | { | |
c2724775 | 937 | memset(&ds_cfg, 0, sizeof(ds_cfg)); |
eee3af4a | 938 | ds_cfg = *cfg; |
ca0002a1 | 939 | |
c2724775 MM |
940 | printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name); |
941 | ||
942 | if (!cpu_has_bts) { | |
943 | ds_cfg.ctl[dsf_bts] = 0; | |
944 | printk(KERN_INFO "[ds] bts not available\n"); | |
945 | } | |
946 | if (!cpu_has_pebs) | |
947 | printk(KERN_INFO "[ds] pebs not available\n"); | |
ca0002a1 | 948 | |
c2724775 | 949 | WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field)); |
eee3af4a MM |
950 | } |
951 | ||
952 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |
953 | { | |
954 | switch (c->x86) { | |
955 | case 0x6: | |
956 | switch (c->x86_model) { | |
f4166c54 MM |
957 | case 0 ... 0xC: |
958 | /* sorry, don't know about them */ | |
959 | break; | |
eee3af4a MM |
960 | case 0xD: |
961 | case 0xE: /* Pentium M */ | |
c2724775 | 962 | ds_configure(&ds_cfg_pentium_m); |
eee3af4a | 963 | break; |
f4166c54 | 964 | default: /* Core2, Atom, ... */ |
c2724775 | 965 | ds_configure(&ds_cfg_core2); |
eee3af4a | 966 | break; |
eee3af4a MM |
967 | } |
968 | break; | |
969 | case 0xF: | |
970 | switch (c->x86_model) { | |
eee3af4a MM |
971 | case 0x0: |
972 | case 0x1: | |
973 | case 0x2: /* Netburst */ | |
c2724775 | 974 | ds_configure(&ds_cfg_netburst); |
eee3af4a | 975 | break; |
eee3af4a MM |
976 | default: |
977 | /* sorry, don't know about them */ | |
978 | break; | |
979 | } | |
980 | break; | |
981 | default: | |
982 | /* sorry, don't know about them */ | |
983 | break; | |
984 | } | |
985 | } | |
93fa7636 | 986 | |
c2724775 MM |
987 | /* |
988 | * Change the DS configuration from tracing prev to tracing next. | |
989 | */ | |
990 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) | |
93fa7636 | 991 | { |
c2724775 MM |
992 | struct ds_context *prev_ctx = prev->thread.ds_ctx; |
993 | struct ds_context *next_ctx = next->thread.ds_ctx; | |
994 | ||
995 | if (prev_ctx) { | |
996 | update_debugctlmsr(0); | |
997 | ||
998 | if (prev_ctx->bts_master && | |
999 | (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | |
1000 | struct bts_struct ts = { | |
1001 | .qualifier = bts_task_departs, | |
1002 | .variant.timestamp.jiffies = jiffies_64, | |
1003 | .variant.timestamp.pid = prev->pid | |
1004 | }; | |
1005 | bts_write(prev_ctx->bts_master, &ts); | |
1006 | } | |
1007 | } | |
1008 | ||
1009 | if (next_ctx) { | |
1010 | if (next_ctx->bts_master && | |
1011 | (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | |
1012 | struct bts_struct ts = { | |
1013 | .qualifier = bts_task_arrives, | |
1014 | .variant.timestamp.jiffies = jiffies_64, | |
1015 | .variant.timestamp.pid = next->pid | |
1016 | }; | |
1017 | bts_write(next_ctx->bts_master, &ts); | |
1018 | } | |
1019 | ||
1020 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); | |
ca0002a1 | 1021 | } |
c2724775 MM |
1022 | |
1023 | update_debugctlmsr(next->thread.debugctlmsr); | |
93fa7636 | 1024 | } |