]>
Commit | Line | Data |
---|---|---|
eee3af4a MM |
1 | /* |
2 | * Debug Store support | |
3 | * | |
4 | * This provides a low-level interface to the hardware's Debug Store | |
93fa7636 | 5 | * feature that is used for branch trace store (BTS) and |
eee3af4a MM |
6 | * precise-event based sampling (PEBS). |
7 | * | |
93fa7636 | 8 | * It manages: |
c2724775 | 9 | * - DS and BTS hardware configuration |
6abb11ae | 10 | * - buffer overflow handling (to be done) |
93fa7636 | 11 | * - buffer access |
eee3af4a | 12 | * |
c2724775 MM |
13 | * It does not do: |
14 | * - security checking (is the caller allowed to trace the task) | |
15 | * - buffer allocation (memory accounting) | |
eee3af4a | 16 | * |
eee3af4a | 17 | * |
ba2607fe MM |
18 | * Copyright (C) 2007-2009 Intel Corporation. |
19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 | |
eee3af4a MM |
20 | */ |
21 | ||
93fa7636 | 22 | |
eee3af4a MM |
23 | #include <asm/ds.h> |
24 | ||
25 | #include <linux/errno.h> | |
26 | #include <linux/string.h> | |
27 | #include <linux/slab.h> | |
93fa7636 | 28 | #include <linux/sched.h> |
3c933904 | 29 | #include <linux/mm.h> |
ca0002a1 | 30 | #include <linux/kernel.h> |
93fa7636 MM |
31 | |
32 | ||
33 | /* | |
34 | * The configuration for a particular DS hardware implementation. | |
35 | */ | |
36 | struct ds_configuration { | |
c2724775 MM |
37 | /* the name of the configuration */ |
38 | const char *name; | |
39 | /* the size of one pointer-typed field in the DS structure and | |
40 | in the BTS and PEBS buffers in bytes; | |
41 | this covers the first 8 DS fields related to buffer management. */ | |
93fa7636 MM |
42 | unsigned char sizeof_field; |
43 | /* the size of a BTS/PEBS record in bytes */ | |
44 | unsigned char sizeof_rec[2]; | |
c2724775 MM |
45 | /* a series of bit-masks to control various features indexed |
46 | * by enum ds_feature */ | |
47 | unsigned long ctl[dsf_ctl_max]; | |
93fa7636 | 48 | }; |
c2724775 MM |
49 | static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); |
50 | ||
51 | #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) | |
52 | ||
53 | #define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */ | |
54 | #define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */ | |
55 | #define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ | |
56 | ||
57 | #define BTS_CONTROL \ | |
58 | (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ | |
59 | ds_cfg.ctl[dsf_bts_overflow]) | |
60 | ||
eee3af4a | 61 | |
ca0002a1 MM |
62 | /* |
63 | * A BTS or PEBS tracer. | |
64 | * | |
65 | * This holds the configuration of the tracer and serves as a handle | |
66 | * to identify tracers. | |
67 | */ | |
68 | struct ds_tracer { | |
69 | /* the DS context (partially) owned by this tracer */ | |
70 | struct ds_context *context; | |
71 | /* the buffer provided on ds_request() and its size in bytes */ | |
72 | void *buffer; | |
73 | size_t size; | |
ca0002a1 MM |
74 | }; |
75 | ||
76 | struct bts_tracer { | |
77 | /* the common DS part */ | |
78 | struct ds_tracer ds; | |
c2724775 MM |
79 | /* the trace including the DS configuration */ |
80 | struct bts_trace trace; | |
ca0002a1 MM |
81 | /* buffer overflow notification function */ |
82 | bts_ovfl_callback_t ovfl; | |
83 | }; | |
84 | ||
85 | struct pebs_tracer { | |
86 | /* the common DS part */ | |
87 | struct ds_tracer ds; | |
c2724775 MM |
88 | /* the trace including the DS configuration */ |
89 | struct pebs_trace trace; | |
ca0002a1 MM |
90 | /* buffer overflow notification function */ |
91 | pebs_ovfl_callback_t ovfl; | |
92 | }; | |
eee3af4a MM |
93 | |
94 | /* | |
95 | * Debug Store (DS) save area configuration (see Intel64 and IA32 | |
96 | * Architectures Software Developer's Manual, section 18.5) | |
97 | * | |
98 | * The DS configuration consists of the following fields; different | |
99 | * architetures vary in the size of those fields. | |
100 | * - double-word aligned base linear address of the BTS buffer | |
101 | * - write pointer into the BTS buffer | |
102 | * - end linear address of the BTS buffer (one byte beyond the end of | |
103 | * the buffer) | |
104 | * - interrupt pointer into BTS buffer | |
105 | * (interrupt occurs when write pointer passes interrupt pointer) | |
106 | * - double-word aligned base linear address of the PEBS buffer | |
107 | * - write pointer into the PEBS buffer | |
108 | * - end linear address of the PEBS buffer (one byte beyond the end of | |
109 | * the buffer) | |
110 | * - interrupt pointer into PEBS buffer | |
111 | * (interrupt occurs when write pointer passes interrupt pointer) | |
112 | * - value to which counter is reset following counter overflow | |
113 | * | |
93fa7636 MM |
114 | * Later architectures use 64bit pointers throughout, whereas earlier |
115 | * architectures use 32bit pointers in 32bit mode. | |
eee3af4a | 116 | * |
eee3af4a | 117 | * |
93fa7636 MM |
118 | * We compute the base address for the first 8 fields based on: |
119 | * - the field size stored in the DS configuration | |
120 | * - the relative field position | |
121 | * - an offset giving the start of the respective region | |
eee3af4a | 122 | * |
93fa7636 MM |
123 | * This offset is further used to index various arrays holding |
124 | * information for BTS and PEBS at the respective index. | |
eee3af4a | 125 | * |
93fa7636 MM |
126 | * On later 32bit processors, we only access the lower 32bit of the |
127 | * 64bit pointer fields. The upper halves will be zeroed out. | |
eee3af4a MM |
128 | */ |
129 | ||
93fa7636 MM |
130 | enum ds_field { |
131 | ds_buffer_base = 0, | |
132 | ds_index, | |
133 | ds_absolute_maximum, | |
134 | ds_interrupt_threshold, | |
135 | }; | |
eee3af4a | 136 | |
93fa7636 MM |
137 | enum ds_qualifier { |
138 | ds_bts = 0, | |
139 | ds_pebs | |
eee3af4a MM |
140 | }; |
141 | ||
93fa7636 MM |
142 | static inline unsigned long ds_get(const unsigned char *base, |
143 | enum ds_qualifier qual, enum ds_field field) | |
144 | { | |
145 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | |
146 | return *(unsigned long *)base; | |
147 | } | |
148 | ||
149 | static inline void ds_set(unsigned char *base, enum ds_qualifier qual, | |
150 | enum ds_field field, unsigned long value) | |
151 | { | |
152 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | |
153 | (*(unsigned long *)base) = value; | |
154 | } | |
155 | ||
156 | ||
eee3af4a | 157 | /* |
6abb11ae | 158 | * Locking is done only for allocating BTS or PEBS resources. |
eee3af4a | 159 | */ |
c2724775 | 160 | static DEFINE_SPINLOCK(ds_lock); |
eee3af4a | 161 | |
eee3af4a MM |
162 | |
163 | /* | |
93fa7636 MM |
164 | * We either support (system-wide) per-cpu or per-thread allocation. |
165 | * We distinguish the two based on the task_struct pointer, where a | |
166 | * NULL pointer indicates per-cpu allocation for the current cpu. | |
167 | * | |
168 | * Allocations are use-counted. As soon as resources are allocated, | |
169 | * further allocations must be of the same type (per-cpu or | |
170 | * per-thread). We model this by counting allocations (i.e. the number | |
171 | * of tracers of a certain type) for one type negatively: | |
172 | * =0 no tracers | |
173 | * >0 number of per-thread tracers | |
174 | * <0 number of per-cpu tracers | |
175 | * | |
93fa7636 MM |
176 | * Tracers essentially gives the number of ds contexts for a certain |
177 | * type of allocation. | |
eee3af4a | 178 | */ |
c2724775 | 179 | static atomic_t tracers = ATOMIC_INIT(0); |
93fa7636 MM |
180 | |
181 | static inline void get_tracer(struct task_struct *task) | |
eee3af4a | 182 | { |
c2724775 MM |
183 | if (task) |
184 | atomic_inc(&tracers); | |
185 | else | |
186 | atomic_dec(&tracers); | |
eee3af4a | 187 | } |
93fa7636 MM |
188 | |
189 | static inline void put_tracer(struct task_struct *task) | |
eee3af4a | 190 | { |
c2724775 MM |
191 | if (task) |
192 | atomic_dec(&tracers); | |
193 | else | |
194 | atomic_inc(&tracers); | |
eee3af4a | 195 | } |
93fa7636 MM |
196 | |
197 | static inline int check_tracer(struct task_struct *task) | |
eee3af4a | 198 | { |
c2724775 MM |
199 | return task ? |
200 | (atomic_read(&tracers) >= 0) : | |
201 | (atomic_read(&tracers) <= 0); | |
eee3af4a | 202 | } |
93fa7636 MM |
203 | |
204 | ||
205 | /* | |
206 | * The DS context is either attached to a thread or to a cpu: | |
207 | * - in the former case, the thread_struct contains a pointer to the | |
208 | * attached context. | |
209 | * - in the latter case, we use a static array of per-cpu context | |
210 | * pointers. | |
211 | * | |
212 | * Contexts are use-counted. They are allocated on first access and | |
213 | * deallocated when the last user puts the context. | |
93fa7636 | 214 | */ |
c2724775 MM |
215 | struct ds_context { |
216 | /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ | |
217 | unsigned char ds[MAX_SIZEOF_DS]; | |
218 | /* the owner of the BTS and PEBS configuration, respectively */ | |
219 | struct bts_tracer *bts_master; | |
220 | struct pebs_tracer *pebs_master; | |
221 | /* use count */ | |
222 | unsigned long count; | |
223 | /* a pointer to the context location inside the thread_struct | |
224 | * or the per_cpu context array */ | |
225 | struct ds_context **this; | |
226 | /* a pointer to the task owning this context, or NULL, if the | |
227 | * context is owned by a cpu */ | |
228 | struct task_struct *task; | |
229 | }; | |
230 | ||
231 | static DEFINE_PER_CPU(struct ds_context *, system_context_array); | |
93fa7636 | 232 | |
c2724775 | 233 | #define system_context per_cpu(system_context_array, smp_processor_id()) |
93fa7636 | 234 | |
cc1dc6d0 MM |
235 | |
236 | static inline struct ds_context *ds_get_context(struct task_struct *task) | |
eee3af4a | 237 | { |
93fa7636 | 238 | struct ds_context **p_context = |
c2724775 | 239 | (task ? &task->thread.ds_ctx : &system_context); |
cc1dc6d0 MM |
240 | struct ds_context *context = NULL; |
241 | struct ds_context *new_context = NULL; | |
de90add3 | 242 | unsigned long irq; |
93fa7636 | 243 | |
cc1dc6d0 MM |
244 | /* Chances are small that we already have a context. */ |
245 | new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); | |
246 | if (!new_context) | |
247 | return NULL; | |
de90add3 | 248 | |
cc1dc6d0 | 249 | spin_lock_irqsave(&ds_lock, irq); |
93fa7636 | 250 | |
cc1dc6d0 MM |
251 | context = *p_context; |
252 | if (!context) { | |
253 | context = new_context; | |
93fa7636 | 254 | |
cc1dc6d0 MM |
255 | context->this = p_context; |
256 | context->task = task; | |
257 | context->count = 0; | |
93fa7636 | 258 | |
cc1dc6d0 MM |
259 | if (task) |
260 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); | |
c2724775 | 261 | |
cc1dc6d0 MM |
262 | if (!task || (task == current)) |
263 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds); | |
c2724775 | 264 | |
cc1dc6d0 MM |
265 | *p_context = context; |
266 | } | |
c2724775 | 267 | |
cc1dc6d0 | 268 | context->count++; |
c2724775 | 269 | |
cc1dc6d0 | 270 | spin_unlock_irqrestore(&ds_lock, irq); |
93fa7636 | 271 | |
cc1dc6d0 MM |
272 | if (context != new_context) |
273 | kfree(new_context); | |
93fa7636 MM |
274 | |
275 | return context; | |
eee3af4a | 276 | } |
93fa7636 | 277 | |
93fa7636 | 278 | static inline void ds_put_context(struct ds_context *context) |
eee3af4a | 279 | { |
de90add3 MM |
280 | unsigned long irq; |
281 | ||
93fa7636 MM |
282 | if (!context) |
283 | return; | |
284 | ||
de90add3 | 285 | spin_lock_irqsave(&ds_lock, irq); |
93fa7636 | 286 | |
c2724775 MM |
287 | if (--context->count) { |
288 | spin_unlock_irqrestore(&ds_lock, irq); | |
289 | return; | |
290 | } | |
93fa7636 | 291 | |
573da422 | 292 | *(context->this) = NULL; |
93fa7636 MM |
293 | |
294 | if (context->task) | |
295 | clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); | |
296 | ||
297 | if (!context->task || (context->task == current)) | |
298 | wrmsrl(MSR_IA32_DS_AREA, 0); | |
299 | ||
de90add3 | 300 | spin_unlock_irqrestore(&ds_lock, irq); |
c2724775 MM |
301 | |
302 | kfree(context); | |
eee3af4a | 303 | } |
93fa7636 MM |
304 | |
305 | ||
306 | /* | |
c2724775 | 307 | * Call the tracer's callback on a buffer overflow. |
93fa7636 | 308 | * |
93fa7636 MM |
309 | * context: the ds context |
310 | * qual: the buffer type | |
311 | */ | |
ca0002a1 MM |
312 | static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) |
313 | { | |
314 | switch (qual) { | |
c2724775 MM |
315 | case ds_bts: |
316 | if (context->bts_master && | |
317 | context->bts_master->ovfl) | |
318 | context->bts_master->ovfl(context->bts_master); | |
319 | break; | |
320 | case ds_pebs: | |
321 | if (context->pebs_master && | |
322 | context->pebs_master->ovfl) | |
323 | context->pebs_master->ovfl(context->pebs_master); | |
ca0002a1 | 324 | break; |
ca0002a1 | 325 | } |
c2724775 MM |
326 | } |
327 | ||
328 | ||
329 | /* | |
330 | * Write raw data into the BTS or PEBS buffer. | |
331 | * | |
332 | * The remainder of any partially written record is zeroed out. | |
333 | * | |
334 | * context: the DS context | |
335 | * qual: the buffer type | |
336 | * record: the data to write | |
337 | * size: the size of the data | |
338 | */ | |
339 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, | |
340 | const void *record, size_t size) | |
341 | { | |
342 | int bytes_written = 0; | |
343 | ||
344 | if (!record) | |
345 | return -EINVAL; | |
346 | ||
347 | while (size) { | |
348 | unsigned long base, index, end, write_end, int_th; | |
349 | unsigned long write_size, adj_write_size; | |
350 | ||
351 | /* | |
352 | * write as much as possible without producing an | |
353 | * overflow interrupt. | |
354 | * | |
355 | * interrupt_threshold must either be | |
356 | * - bigger than absolute_maximum or | |
357 | * - point to a record between buffer_base and absolute_maximum | |
358 | * | |
359 | * index points to a valid record. | |
360 | */ | |
361 | base = ds_get(context->ds, qual, ds_buffer_base); | |
362 | index = ds_get(context->ds, qual, ds_index); | |
363 | end = ds_get(context->ds, qual, ds_absolute_maximum); | |
364 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | |
365 | ||
366 | write_end = min(end, int_th); | |
367 | ||
368 | /* if we are already beyond the interrupt threshold, | |
369 | * we fill the entire buffer */ | |
370 | if (write_end <= index) | |
371 | write_end = end; | |
372 | ||
373 | if (write_end <= index) | |
374 | break; | |
375 | ||
376 | write_size = min((unsigned long) size, write_end - index); | |
377 | memcpy((void *)index, record, write_size); | |
378 | ||
379 | record = (const char *)record + write_size; | |
380 | size -= write_size; | |
381 | bytes_written += write_size; | |
382 | ||
383 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | |
384 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | |
385 | ||
386 | /* zero out trailing bytes */ | |
387 | memset((char *)index + write_size, 0, | |
388 | adj_write_size - write_size); | |
389 | index += adj_write_size; | |
390 | ||
391 | if (index >= end) | |
392 | index = base; | |
393 | ds_set(context->ds, qual, ds_index, index); | |
394 | ||
395 | if (index >= int_th) | |
396 | ds_overflow(context, qual); | |
397 | } | |
398 | ||
399 | return bytes_written; | |
400 | } | |
401 | ||
402 | ||
403 | /* | |
404 | * Branch Trace Store (BTS) uses the following format. Different | |
405 | * architectures vary in the size of those fields. | |
406 | * - source linear address | |
407 | * - destination linear address | |
408 | * - flags | |
409 | * | |
410 | * Later architectures use 64bit pointers throughout, whereas earlier | |
411 | * architectures use 32bit pointers in 32bit mode. | |
412 | * | |
413 | * We compute the base address for the first 8 fields based on: | |
414 | * - the field size stored in the DS configuration | |
415 | * - the relative field position | |
416 | * | |
417 | * In order to store additional information in the BTS buffer, we use | |
418 | * a special source address to indicate that the record requires | |
419 | * special interpretation. | |
420 | * | |
421 | * Netburst indicated via a bit in the flags field whether the branch | |
422 | * was predicted; this is ignored. | |
423 | * | |
424 | * We use two levels of abstraction: | |
425 | * - the raw data level defined here | |
426 | * - an arch-independent level defined in ds.h | |
427 | */ | |
428 | ||
429 | enum bts_field { | |
430 | bts_from, | |
431 | bts_to, | |
432 | bts_flags, | |
433 | ||
434 | bts_qual = bts_from, | |
435 | bts_jiffies = bts_to, | |
436 | bts_pid = bts_flags, | |
437 | ||
438 | bts_qual_mask = (bts_qual_max - 1), | |
439 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) | |
440 | }; | |
441 | ||
442 | static inline unsigned long bts_get(const char *base, enum bts_field field) | |
443 | { | |
444 | base += (ds_cfg.sizeof_field * field); | |
445 | return *(unsigned long *)base; | |
446 | } | |
447 | ||
448 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) | |
449 | { | |
450 | base += (ds_cfg.sizeof_field * field);; | |
451 | (*(unsigned long *)base) = val; | |
452 | } | |
453 | ||
454 | ||
455 | /* | |
456 | * The raw BTS data is architecture dependent. | |
457 | * | |
458 | * For higher-level users, we give an arch-independent view. | |
459 | * - ds.h defines struct bts_struct | |
460 | * - bts_read translates one raw bts record into a bts_struct | |
461 | * - bts_write translates one bts_struct into the raw format and | |
462 | * writes it into the top of the parameter tracer's buffer. | |
463 | * | |
464 | * return: bytes read/written on success; -Eerrno, otherwise | |
465 | */ | |
466 | static int bts_read(struct bts_tracer *tracer, const void *at, | |
467 | struct bts_struct *out) | |
468 | { | |
469 | if (!tracer) | |
470 | return -EINVAL; | |
471 | ||
472 | if (at < tracer->trace.ds.begin) | |
473 | return -EINVAL; | |
474 | ||
475 | if (tracer->trace.ds.end < (at + tracer->trace.ds.size)) | |
476 | return -EINVAL; | |
477 | ||
478 | memset(out, 0, sizeof(*out)); | |
479 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { | |
480 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); | |
481 | out->variant.timestamp.jiffies = bts_get(at, bts_jiffies); | |
482 | out->variant.timestamp.pid = bts_get(at, bts_pid); | |
483 | } else { | |
484 | out->qualifier = bts_branch; | |
485 | out->variant.lbr.from = bts_get(at, bts_from); | |
486 | out->variant.lbr.to = bts_get(at, bts_to); | |
d072c25f MM |
487 | |
488 | if (!out->variant.lbr.from && !out->variant.lbr.to) | |
489 | out->qualifier = bts_invalid; | |
c2724775 MM |
490 | } |
491 | ||
492 | return ds_cfg.sizeof_rec[ds_bts]; | |
493 | } | |
494 | ||
495 | static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) | |
496 | { | |
497 | unsigned char raw[MAX_SIZEOF_BTS]; | |
498 | ||
499 | if (!tracer) | |
500 | return -EINVAL; | |
501 | ||
502 | if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts]) | |
503 | return -EOVERFLOW; | |
504 | ||
505 | switch (in->qualifier) { | |
506 | case bts_invalid: | |
507 | bts_set(raw, bts_from, 0); | |
508 | bts_set(raw, bts_to, 0); | |
509 | bts_set(raw, bts_flags, 0); | |
510 | break; | |
511 | case bts_branch: | |
512 | bts_set(raw, bts_from, in->variant.lbr.from); | |
513 | bts_set(raw, bts_to, in->variant.lbr.to); | |
514 | bts_set(raw, bts_flags, 0); | |
515 | break; | |
516 | case bts_task_arrives: | |
517 | case bts_task_departs: | |
518 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); | |
519 | bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies); | |
520 | bts_set(raw, bts_pid, in->variant.timestamp.pid); | |
ca0002a1 | 521 | break; |
c2724775 MM |
522 | default: |
523 | return -EINVAL; | |
ca0002a1 | 524 | } |
c2724775 MM |
525 | |
526 | return ds_write(tracer->ds.context, ds_bts, raw, | |
527 | ds_cfg.sizeof_rec[ds_bts]); | |
eee3af4a | 528 | } |
93fa7636 MM |
529 | |
530 | ||
c2724775 MM |
531 | static void ds_write_config(struct ds_context *context, |
532 | struct ds_trace *cfg, enum ds_qualifier qual) | |
533 | { | |
534 | unsigned char *ds = context->ds; | |
535 | ||
536 | ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin); | |
537 | ds_set(ds, qual, ds_index, (unsigned long)cfg->top); | |
538 | ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end); | |
539 | ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith); | |
540 | } | |
541 | ||
542 | static void ds_read_config(struct ds_context *context, | |
543 | struct ds_trace *cfg, enum ds_qualifier qual) | |
eee3af4a | 544 | { |
c2724775 MM |
545 | unsigned char *ds = context->ds; |
546 | ||
547 | cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base); | |
548 | cfg->top = (void *)ds_get(ds, qual, ds_index); | |
549 | cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum); | |
550 | cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold); | |
551 | } | |
552 | ||
553 | static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | |
554 | void *base, size_t size, size_t ith, | |
555 | unsigned int flags) { | |
93fa7636 | 556 | unsigned long buffer, adj; |
ca0002a1 MM |
557 | |
558 | /* adjust the buffer address and size to meet alignment | |
559 | * constraints: | |
560 | * - buffer is double-word aligned | |
561 | * - size is multiple of record size | |
562 | * | |
563 | * We checked the size at the very beginning; we have enough | |
564 | * space to do the adjustment. | |
565 | */ | |
566 | buffer = (unsigned long)base; | |
567 | ||
568 | adj = ALIGN(buffer, DS_ALIGNMENT) - buffer; | |
569 | buffer += adj; | |
570 | size -= adj; | |
571 | ||
c2724775 MM |
572 | trace->n = size / ds_cfg.sizeof_rec[qual]; |
573 | trace->size = ds_cfg.sizeof_rec[qual]; | |
ca0002a1 | 574 | |
c2724775 | 575 | size = (trace->n * trace->size); |
ca0002a1 | 576 | |
c2724775 MM |
577 | trace->begin = (void *)buffer; |
578 | trace->top = trace->begin; | |
579 | trace->end = (void *)(buffer + size); | |
ca0002a1 MM |
580 | /* The value for 'no threshold' is -1, which will set the |
581 | * threshold outside of the buffer, just like we want it. | |
582 | */ | |
c2724775 MM |
583 | trace->ith = (void *)(buffer + size - ith); |
584 | ||
585 | trace->flags = flags; | |
ca0002a1 MM |
586 | } |
587 | ||
c2724775 MM |
588 | |
589 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, | |
590 | enum ds_qualifier qual, struct task_struct *task, | |
591 | void *base, size_t size, size_t th, unsigned int flags) | |
ca0002a1 MM |
592 | { |
593 | struct ds_context *context; | |
ca0002a1 | 594 | int error; |
93fa7636 | 595 | |
6abb11ae MM |
596 | error = -EINVAL; |
597 | if (!base) | |
598 | goto out; | |
599 | ||
93fa7636 | 600 | /* we require some space to do alignment adjustments below */ |
ca0002a1 MM |
601 | error = -EINVAL; |
602 | if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) | |
603 | goto out; | |
93fa7636 | 604 | |
ca0002a1 MM |
605 | if (th != (size_t)-1) { |
606 | th *= ds_cfg.sizeof_rec[qual]; | |
607 | ||
608 | error = -EINVAL; | |
609 | if (size <= th) | |
610 | goto out; | |
611 | } | |
612 | ||
ca0002a1 MM |
613 | tracer->buffer = base; |
614 | tracer->size = size; | |
93fa7636 | 615 | |
ca0002a1 MM |
616 | error = -ENOMEM; |
617 | context = ds_get_context(task); | |
93fa7636 | 618 | if (!context) |
ca0002a1 MM |
619 | goto out; |
620 | tracer->context = context; | |
621 | ||
c2724775 | 622 | ds_init_ds_trace(trace, qual, base, size, th, flags); |
de90add3 | 623 | |
c2724775 | 624 | error = 0; |
ca0002a1 | 625 | out: |
93fa7636 | 626 | return error; |
eee3af4a | 627 | } |
93fa7636 | 628 | |
ca0002a1 MM |
629 | struct bts_tracer *ds_request_bts(struct task_struct *task, |
630 | void *base, size_t size, | |
c2724775 MM |
631 | bts_ovfl_callback_t ovfl, size_t th, |
632 | unsigned int flags) | |
eee3af4a | 633 | { |
ca0002a1 | 634 | struct bts_tracer *tracer; |
c2724775 | 635 | unsigned long irq; |
ca0002a1 | 636 | int error; |
93fa7636 | 637 | |
c2724775 MM |
638 | error = -EOPNOTSUPP; |
639 | if (!ds_cfg.ctl[dsf_bts]) | |
640 | goto out; | |
641 | ||
ca0002a1 MM |
642 | /* buffer overflow notification is not yet implemented */ |
643 | error = -EOPNOTSUPP; | |
644 | if (ovfl) | |
645 | goto out; | |
646 | ||
647 | error = -ENOMEM; | |
648 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | |
649 | if (!tracer) | |
650 | goto out; | |
651 | tracer->ovfl = ovfl; | |
652 | ||
c2724775 MM |
653 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
654 | ds_bts, task, base, size, th, flags); | |
ca0002a1 MM |
655 | if (error < 0) |
656 | goto out_tracer; | |
657 | ||
c2724775 MM |
658 | |
659 | spin_lock_irqsave(&ds_lock, irq); | |
660 | ||
661 | error = -EPERM; | |
662 | if (!check_tracer(task)) | |
663 | goto out_unlock; | |
664 | get_tracer(task); | |
665 | ||
666 | error = -EPERM; | |
667 | if (tracer->ds.context->bts_master) | |
668 | goto out_put_tracer; | |
669 | tracer->ds.context->bts_master = tracer; | |
670 | ||
671 | spin_unlock_irqrestore(&ds_lock, irq); | |
672 | ||
673 | ||
674 | tracer->trace.read = bts_read; | |
675 | tracer->trace.write = bts_write; | |
676 | ||
677 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | |
678 | ds_resume_bts(tracer); | |
679 | ||
ca0002a1 MM |
680 | return tracer; |
681 | ||
c2724775 MM |
682 | out_put_tracer: |
683 | put_tracer(task); | |
684 | out_unlock: | |
685 | spin_unlock_irqrestore(&ds_lock, irq); | |
686 | ds_put_context(tracer->ds.context); | |
ca0002a1 | 687 | out_tracer: |
6abb11ae | 688 | kfree(tracer); |
ca0002a1 MM |
689 | out: |
690 | return ERR_PTR(error); | |
eee3af4a | 691 | } |
93fa7636 | 692 | |
ca0002a1 MM |
693 | struct pebs_tracer *ds_request_pebs(struct task_struct *task, |
694 | void *base, size_t size, | |
c2724775 MM |
695 | pebs_ovfl_callback_t ovfl, size_t th, |
696 | unsigned int flags) | |
eee3af4a | 697 | { |
ca0002a1 | 698 | struct pebs_tracer *tracer; |
c2724775 | 699 | unsigned long irq; |
93fa7636 MM |
700 | int error; |
701 | ||
ca0002a1 MM |
702 | /* buffer overflow notification is not yet implemented */ |
703 | error = -EOPNOTSUPP; | |
704 | if (ovfl) | |
93fa7636 MM |
705 | goto out; |
706 | ||
ca0002a1 MM |
707 | error = -ENOMEM; |
708 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | |
709 | if (!tracer) | |
710 | goto out; | |
711 | tracer->ovfl = ovfl; | |
93fa7636 | 712 | |
c2724775 MM |
713 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
714 | ds_pebs, task, base, size, th, flags); | |
ca0002a1 MM |
715 | if (error < 0) |
716 | goto out_tracer; | |
93fa7636 | 717 | |
c2724775 MM |
718 | spin_lock_irqsave(&ds_lock, irq); |
719 | ||
720 | error = -EPERM; | |
721 | if (!check_tracer(task)) | |
722 | goto out_unlock; | |
723 | get_tracer(task); | |
724 | ||
725 | error = -EPERM; | |
726 | if (tracer->ds.context->pebs_master) | |
727 | goto out_put_tracer; | |
728 | tracer->ds.context->pebs_master = tracer; | |
729 | ||
730 | spin_unlock_irqrestore(&ds_lock, irq); | |
731 | ||
73bf1b62 | 732 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); |
c2724775 MM |
733 | ds_resume_pebs(tracer); |
734 | ||
ca0002a1 MM |
735 | return tracer; |
736 | ||
c2724775 MM |
737 | out_put_tracer: |
738 | put_tracer(task); | |
739 | out_unlock: | |
740 | spin_unlock_irqrestore(&ds_lock, irq); | |
741 | ds_put_context(tracer->ds.context); | |
ca0002a1 | 742 | out_tracer: |
6abb11ae | 743 | kfree(tracer); |
93fa7636 | 744 | out: |
ca0002a1 MM |
745 | return ERR_PTR(error); |
746 | } | |
747 | ||
c2724775 | 748 | void ds_release_bts(struct bts_tracer *tracer) |
eee3af4a | 749 | { |
ca0002a1 | 750 | if (!tracer) |
c2724775 | 751 | return; |
ca0002a1 | 752 | |
c2724775 | 753 | ds_suspend_bts(tracer); |
ca0002a1 | 754 | |
c2724775 MM |
755 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); |
756 | tracer->ds.context->bts_master = NULL; | |
93fa7636 | 757 | |
c2724775 MM |
758 | put_tracer(tracer->ds.context->task); |
759 | ds_put_context(tracer->ds.context); | |
ca0002a1 | 760 | |
ca0002a1 | 761 | kfree(tracer); |
eee3af4a | 762 | } |
93fa7636 | 763 | |
c2724775 | 764 | void ds_suspend_bts(struct bts_tracer *tracer) |
eee3af4a | 765 | { |
c2724775 | 766 | struct task_struct *task; |
ca0002a1 | 767 | |
ca0002a1 | 768 | if (!tracer) |
c2724775 | 769 | return; |
ca0002a1 | 770 | |
c2724775 | 771 | task = tracer->ds.context->task; |
ca0002a1 | 772 | |
c2724775 MM |
773 | if (!task || (task == current)) |
774 | update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL); | |
ca0002a1 | 775 | |
c2724775 MM |
776 | if (task) { |
777 | task->thread.debugctlmsr &= ~BTS_CONTROL; | |
eee3af4a | 778 | |
c2724775 MM |
779 | if (!task->thread.debugctlmsr) |
780 | clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | |
781 | } | |
93fa7636 | 782 | } |
eee3af4a | 783 | |
c2724775 | 784 | void ds_resume_bts(struct bts_tracer *tracer) |
93fa7636 | 785 | { |
c2724775 MM |
786 | struct task_struct *task; |
787 | unsigned long control; | |
eee3af4a | 788 | |
ca0002a1 | 789 | if (!tracer) |
c2724775 | 790 | return; |
eee3af4a | 791 | |
c2724775 | 792 | task = tracer->ds.context->task; |
ca0002a1 | 793 | |
c2724775 MM |
794 | control = ds_cfg.ctl[dsf_bts]; |
795 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) | |
796 | control |= ds_cfg.ctl[dsf_bts_kernel]; | |
797 | if (!(tracer->trace.ds.flags & BTS_USER)) | |
798 | control |= ds_cfg.ctl[dsf_bts_user]; | |
eee3af4a | 799 | |
c2724775 MM |
800 | if (task) { |
801 | task->thread.debugctlmsr |= control; | |
802 | set_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | |
803 | } | |
ca0002a1 | 804 | |
c2724775 MM |
805 | if (!task || (task == current)) |
806 | update_debugctlmsr(get_debugctlmsr() | control); | |
eee3af4a MM |
807 | } |
808 | ||
c2724775 | 809 | void ds_release_pebs(struct pebs_tracer *tracer) |
eee3af4a | 810 | { |
ca0002a1 | 811 | if (!tracer) |
c2724775 | 812 | return; |
93fa7636 | 813 | |
c2724775 | 814 | ds_suspend_pebs(tracer); |
93fa7636 | 815 | |
c2724775 MM |
816 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); |
817 | tracer->ds.context->pebs_master = NULL; | |
eee3af4a | 818 | |
c2724775 MM |
819 | put_tracer(tracer->ds.context->task); |
820 | ds_put_context(tracer->ds.context); | |
eee3af4a | 821 | |
c2724775 | 822 | kfree(tracer); |
a95d67f8 MM |
823 | } |
824 | ||
c2724775 | 825 | void ds_suspend_pebs(struct pebs_tracer *tracer) |
a95d67f8 | 826 | { |
a95d67f8 | 827 | |
93fa7636 | 828 | } |
eee3af4a | 829 | |
c2724775 | 830 | void ds_resume_pebs(struct pebs_tracer *tracer) |
93fa7636 | 831 | { |
eee3af4a | 832 | |
eee3af4a MM |
833 | } |
834 | ||
c2724775 | 835 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) |
eee3af4a | 836 | { |
ca0002a1 | 837 | if (!tracer) |
c2724775 | 838 | return NULL; |
ca0002a1 | 839 | |
c2724775 MM |
840 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts); |
841 | return &tracer->trace; | |
93fa7636 | 842 | } |
eee3af4a | 843 | |
c2724775 | 844 | const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) |
93fa7636 | 845 | { |
ca0002a1 | 846 | if (!tracer) |
c2724775 | 847 | return NULL; |
ca0002a1 | 848 | |
c2724775 MM |
849 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); |
850 | tracer->trace.reset_value = | |
851 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); | |
ca0002a1 | 852 | |
c2724775 | 853 | return &tracer->trace; |
93fa7636 | 854 | } |
eee3af4a | 855 | |
c2724775 | 856 | int ds_reset_bts(struct bts_tracer *tracer) |
93fa7636 | 857 | { |
ca0002a1 MM |
858 | if (!tracer) |
859 | return -EINVAL; | |
860 | ||
c2724775 | 861 | tracer->trace.ds.top = tracer->trace.ds.begin; |
ca0002a1 | 862 | |
c2724775 MM |
863 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, |
864 | (unsigned long)tracer->trace.ds.top); | |
ca0002a1 MM |
865 | |
866 | return 0; | |
93fa7636 | 867 | } |
eee3af4a | 868 | |
c2724775 | 869 | int ds_reset_pebs(struct pebs_tracer *tracer) |
93fa7636 | 870 | { |
ca0002a1 MM |
871 | if (!tracer) |
872 | return -EINVAL; | |
eee3af4a | 873 | |
c2724775 | 874 | tracer->trace.ds.top = tracer->trace.ds.begin; |
eee3af4a | 875 | |
c2724775 MM |
876 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, |
877 | (unsigned long)tracer->trace.ds.top); | |
93fa7636 | 878 | |
ca0002a1 | 879 | return 0; |
eee3af4a MM |
880 | } |
881 | ||
ca0002a1 | 882 | int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) |
eee3af4a | 883 | { |
ca0002a1 MM |
884 | if (!tracer) |
885 | return -EINVAL; | |
eee3af4a | 886 | |
ca0002a1 | 887 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; |
93fa7636 | 888 | |
ca0002a1 | 889 | return 0; |
93fa7636 MM |
890 | } |
891 | ||
c2724775 | 892 | static const struct ds_configuration ds_cfg_netburst = { |
ba2607fe | 893 | .name = "Netburst", |
c2724775 MM |
894 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), |
895 | .ctl[dsf_bts_kernel] = (1 << 5), | |
896 | .ctl[dsf_bts_user] = (1 << 6), | |
897 | ||
898 | .sizeof_field = sizeof(long), | |
899 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | |
c4858ffc | 900 | #ifdef __i386__ |
c2724775 | 901 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, |
c4858ffc | 902 | #else |
c2724775 | 903 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, |
c4858ffc | 904 | #endif |
eee3af4a | 905 | }; |
c2724775 | 906 | static const struct ds_configuration ds_cfg_pentium_m = { |
ba2607fe | 907 | .name = "Pentium M", |
c2724775 MM |
908 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), |
909 | ||
910 | .sizeof_field = sizeof(long), | |
911 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | |
c4858ffc | 912 | #ifdef __i386__ |
c2724775 | 913 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, |
c4858ffc | 914 | #else |
c2724775 | 915 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, |
c4858ffc | 916 | #endif |
eee3af4a | 917 | }; |
ba2607fe MM |
918 | static const struct ds_configuration ds_cfg_core2_atom = { |
919 | .name = "Core 2/Atom", | |
c2724775 MM |
920 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), |
921 | .ctl[dsf_bts_kernel] = (1 << 9), | |
922 | .ctl[dsf_bts_user] = (1 << 10), | |
923 | ||
924 | .sizeof_field = 8, | |
925 | .sizeof_rec[ds_bts] = 8 * 3, | |
926 | .sizeof_rec[ds_pebs] = 8 * 18, | |
927 | }; | |
eee3af4a | 928 | |
c2724775 | 929 | static void |
eee3af4a MM |
930 | ds_configure(const struct ds_configuration *cfg) |
931 | { | |
c2724775 | 932 | memset(&ds_cfg, 0, sizeof(ds_cfg)); |
eee3af4a | 933 | ds_cfg = *cfg; |
ca0002a1 | 934 | |
c2724775 MM |
935 | printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name); |
936 | ||
937 | if (!cpu_has_bts) { | |
938 | ds_cfg.ctl[dsf_bts] = 0; | |
939 | printk(KERN_INFO "[ds] bts not available\n"); | |
940 | } | |
941 | if (!cpu_has_pebs) | |
942 | printk(KERN_INFO "[ds] pebs not available\n"); | |
ca0002a1 | 943 | |
c2724775 | 944 | WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field)); |
eee3af4a MM |
945 | } |
946 | ||
947 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |
948 | { | |
949 | switch (c->x86) { | |
950 | case 0x6: | |
951 | switch (c->x86_model) { | |
ba2607fe MM |
952 | case 0x9: |
953 | case 0xd: /* Pentium M */ | |
c2724775 | 954 | ds_configure(&ds_cfg_pentium_m); |
eee3af4a | 955 | break; |
ba2607fe MM |
956 | case 0xf: |
957 | case 0x17: /* Core2 */ | |
958 | case 0x1c: /* Atom */ | |
959 | ds_configure(&ds_cfg_core2_atom); | |
960 | break; | |
961 | case 0x1a: /* i7 */ | |
962 | default: | |
963 | /* sorry, don't know about them */ | |
eee3af4a | 964 | break; |
eee3af4a MM |
965 | } |
966 | break; | |
ba2607fe | 967 | case 0xf: |
eee3af4a | 968 | switch (c->x86_model) { |
eee3af4a MM |
969 | case 0x0: |
970 | case 0x1: | |
971 | case 0x2: /* Netburst */ | |
c2724775 | 972 | ds_configure(&ds_cfg_netburst); |
eee3af4a | 973 | break; |
eee3af4a MM |
974 | default: |
975 | /* sorry, don't know about them */ | |
976 | break; | |
977 | } | |
978 | break; | |
979 | default: | |
980 | /* sorry, don't know about them */ | |
981 | break; | |
982 | } | |
983 | } | |
93fa7636 | 984 | |
c2724775 MM |
985 | /* |
986 | * Change the DS configuration from tracing prev to tracing next. | |
987 | */ | |
988 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) | |
93fa7636 | 989 | { |
c2724775 MM |
990 | struct ds_context *prev_ctx = prev->thread.ds_ctx; |
991 | struct ds_context *next_ctx = next->thread.ds_ctx; | |
992 | ||
993 | if (prev_ctx) { | |
994 | update_debugctlmsr(0); | |
995 | ||
996 | if (prev_ctx->bts_master && | |
997 | (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | |
998 | struct bts_struct ts = { | |
999 | .qualifier = bts_task_departs, | |
1000 | .variant.timestamp.jiffies = jiffies_64, | |
1001 | .variant.timestamp.pid = prev->pid | |
1002 | }; | |
1003 | bts_write(prev_ctx->bts_master, &ts); | |
1004 | } | |
1005 | } | |
1006 | ||
1007 | if (next_ctx) { | |
1008 | if (next_ctx->bts_master && | |
1009 | (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | |
1010 | struct bts_struct ts = { | |
1011 | .qualifier = bts_task_arrives, | |
1012 | .variant.timestamp.jiffies = jiffies_64, | |
1013 | .variant.timestamp.pid = next->pid | |
1014 | }; | |
1015 | bts_write(next_ctx->bts_master, &ts); | |
1016 | } | |
1017 | ||
1018 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); | |
ca0002a1 | 1019 | } |
c2724775 MM |
1020 | |
1021 | update_debugctlmsr(next->thread.debugctlmsr); | |
93fa7636 | 1022 | } |
bf53de90 MM |
1023 | |
1024 | void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) | |
1025 | { | |
1026 | clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR); | |
1027 | tsk->thread.ds_ctx = NULL; | |
1028 | } | |
1029 | ||
1030 | void ds_exit_thread(struct task_struct *tsk) | |
1031 | { | |
bf53de90 | 1032 | } |