]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - arch/x86/kernel/ds.c
Merge branch 'x86-cleanups-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[mirror_ubuntu-bionic-kernel.git] / arch / x86 / kernel / ds.c
CommitLineData
eee3af4a
MM
1/*
2 * Debug Store support
3 *
4 * This provides a low-level interface to the hardware's Debug Store
93fa7636 5 * feature that is used for branch trace store (BTS) and
eee3af4a
MM
6 * precise-event based sampling (PEBS).
7 *
93fa7636 8 * It manages:
c2724775 9 * - DS and BTS hardware configuration
6abb11ae 10 * - buffer overflow handling (to be done)
93fa7636 11 * - buffer access
eee3af4a 12 *
c2724775
MM
13 * It does not do:
14 * - security checking (is the caller allowed to trace the task)
15 * - buffer allocation (memory accounting)
eee3af4a 16 *
eee3af4a 17 *
ba2607fe
MM
18 * Copyright (C) 2007-2009 Intel Corporation.
19 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
eee3af4a
MM
20 */
21
e9a22d1f 22#include <linux/kernel.h>
eee3af4a 23#include <linux/string.h>
e9a22d1f 24#include <linux/errno.h>
93fa7636 25#include <linux/sched.h>
e9a22d1f 26#include <linux/slab.h>
3c933904 27#include <linux/mm.h>
15879d04 28#include <linux/trace_clock.h>
e9a22d1f
IM
29
30#include <asm/ds.h>
93fa7636 31
8a327f6d 32#include "ds_selftest.h"
93fa7636
MM
33
34/*
e9a22d1f 35 * The configuration for a particular DS hardware implementation:
93fa7636
MM
36 */
37struct ds_configuration {
e9a22d1f
IM
38 /* The name of the configuration: */
39 const char *name;
40
41 /* The size of pointer-typed fields in DS, BTS, and PEBS: */
42 unsigned char sizeof_ptr_field;
43
44 /* The size of a BTS/PEBS record in bytes: */
45 unsigned char sizeof_rec[2];
46
017bc617
MM
47 /* The number of pebs counter reset values in the DS structure. */
48 unsigned char nr_counter_reset;
49
e9a22d1f
IM
50 /* Control bit-masks indexed by enum ds_feature: */
51 unsigned long ctl[dsf_ctl_max];
93fa7636 52};
ee811517 53static struct ds_configuration ds_cfg __read_mostly;
c2724775 54
c2724775 55
e9a22d1f 56/* Maximal size of a DS configuration: */
017bc617 57#define MAX_SIZEOF_DS 0x80
e9a22d1f
IM
58
59/* Maximal size of a BTS record: */
60#define MAX_SIZEOF_BTS (3 * 8)
c2724775 61
e9a22d1f
IM
62/* BTS and PEBS buffer alignment: */
63#define DS_ALIGNMENT (1 << 3)
c2724775 64
017bc617
MM
65/* Number of buffer pointers in DS: */
66#define NUM_DS_PTR_FIELDS 8
67
68/* Size of a pebs reset value in DS: */
69#define PEBS_RESET_FIELD_SIZE 8
70
e9a22d1f
IM
71/* Mask of control bits in the DS MSR register: */
72#define BTS_CONTROL \
73 ( ds_cfg.ctl[dsf_bts] | \
74 ds_cfg.ctl[dsf_bts_kernel] | \
75 ds_cfg.ctl[dsf_bts_user] | \
76 ds_cfg.ctl[dsf_bts_overflow] )
eee3af4a 77
ca0002a1
MM
78/*
79 * A BTS or PEBS tracer.
80 *
81 * This holds the configuration of the tracer and serves as a handle
82 * to identify tracers.
83 */
84struct ds_tracer {
b8e47195 85 /* The DS context (partially) owned by this tracer. */
e9a22d1f 86 struct ds_context *context;
b8e47195 87 /* The buffer provided on ds_request() and its size in bytes. */
e9a22d1f
IM
88 void *buffer;
89 size_t size;
ca0002a1
MM
90};
91
92struct bts_tracer {
e9a22d1f
IM
93 /* The common DS part: */
94 struct ds_tracer ds;
95
96 /* The trace including the DS configuration: */
97 struct bts_trace trace;
98
99 /* Buffer overflow notification function: */
100 bts_ovfl_callback_t ovfl;
cac94f97
MM
101
102 /* Active flags affecting trace collection. */
103 unsigned int flags;
ca0002a1
MM
104};
105
106struct pebs_tracer {
e9a22d1f
IM
107 /* The common DS part: */
108 struct ds_tracer ds;
109
110 /* The trace including the DS configuration: */
111 struct pebs_trace trace;
112
113 /* Buffer overflow notification function: */
114 pebs_ovfl_callback_t ovfl;
ca0002a1 115};
eee3af4a
MM
116
117/*
118 * Debug Store (DS) save area configuration (see Intel64 and IA32
119 * Architectures Software Developer's Manual, section 18.5)
120 *
121 * The DS configuration consists of the following fields; different
122 * architetures vary in the size of those fields.
e9a22d1f 123 *
eee3af4a
MM
124 * - double-word aligned base linear address of the BTS buffer
125 * - write pointer into the BTS buffer
126 * - end linear address of the BTS buffer (one byte beyond the end of
127 * the buffer)
128 * - interrupt pointer into BTS buffer
129 * (interrupt occurs when write pointer passes interrupt pointer)
130 * - double-word aligned base linear address of the PEBS buffer
131 * - write pointer into the PEBS buffer
132 * - end linear address of the PEBS buffer (one byte beyond the end of
133 * the buffer)
134 * - interrupt pointer into PEBS buffer
135 * (interrupt occurs when write pointer passes interrupt pointer)
136 * - value to which counter is reset following counter overflow
137 *
93fa7636
MM
138 * Later architectures use 64bit pointers throughout, whereas earlier
139 * architectures use 32bit pointers in 32bit mode.
eee3af4a 140 *
eee3af4a 141 *
93fa7636
MM
142 * We compute the base address for the first 8 fields based on:
143 * - the field size stored in the DS configuration
144 * - the relative field position
145 * - an offset giving the start of the respective region
eee3af4a 146 *
93fa7636
MM
147 * This offset is further used to index various arrays holding
148 * information for BTS and PEBS at the respective index.
eee3af4a 149 *
93fa7636
MM
150 * On later 32bit processors, we only access the lower 32bit of the
151 * 64bit pointer fields. The upper halves will be zeroed out.
eee3af4a
MM
152 */
153
93fa7636
MM
154enum ds_field {
155 ds_buffer_base = 0,
156 ds_index,
157 ds_absolute_maximum,
158 ds_interrupt_threshold,
159};
eee3af4a 160
93fa7636 161enum ds_qualifier {
e9a22d1f 162 ds_bts = 0,
93fa7636 163 ds_pebs
eee3af4a
MM
164};
165
e9a22d1f
IM
166static inline unsigned long
167ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field)
93fa7636 168{
bc44fb5f 169 base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
93fa7636
MM
170 return *(unsigned long *)base;
171}
172
e9a22d1f
IM
173static inline void
174ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field,
175 unsigned long value)
93fa7636 176{
bc44fb5f 177 base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
93fa7636
MM
178 (*(unsigned long *)base) = value;
179}
180
181
eee3af4a 182/*
6abb11ae 183 * Locking is done only for allocating BTS or PEBS resources.
eee3af4a 184 */
c2724775 185static DEFINE_SPINLOCK(ds_lock);
eee3af4a 186
eee3af4a 187/*
93fa7636
MM
188 * We either support (system-wide) per-cpu or per-thread allocation.
189 * We distinguish the two based on the task_struct pointer, where a
190 * NULL pointer indicates per-cpu allocation for the current cpu.
191 *
192 * Allocations are use-counted. As soon as resources are allocated,
193 * further allocations must be of the same type (per-cpu or
194 * per-thread). We model this by counting allocations (i.e. the number
195 * of tracers of a certain type) for one type negatively:
196 * =0 no tracers
197 * >0 number of per-thread tracers
198 * <0 number of per-cpu tracers
199 *
93fa7636
MM
200 * Tracers essentially gives the number of ds contexts for a certain
201 * type of allocation.
eee3af4a 202 */
c2724775 203static atomic_t tracers = ATOMIC_INIT(0);
93fa7636 204
38f80112 205static inline int get_tracer(struct task_struct *task)
eee3af4a 206{
38f80112
MM
207 int error;
208
209 spin_lock_irq(&ds_lock);
210
211 if (task) {
212 error = -EPERM;
213 if (atomic_read(&tracers) < 0)
214 goto out;
c2724775 215 atomic_inc(&tracers);
38f80112
MM
216 } else {
217 error = -EPERM;
218 if (atomic_read(&tracers) > 0)
219 goto out;
c2724775 220 atomic_dec(&tracers);
38f80112
MM
221 }
222
223 error = 0;
224out:
225 spin_unlock_irq(&ds_lock);
226 return error;
eee3af4a 227}
93fa7636
MM
228
229static inline void put_tracer(struct task_struct *task)
eee3af4a 230{
c2724775
MM
231 if (task)
232 atomic_dec(&tracers);
233 else
234 atomic_inc(&tracers);
eee3af4a 235}
93fa7636 236
93fa7636
MM
237/*
238 * The DS context is either attached to a thread or to a cpu:
239 * - in the former case, the thread_struct contains a pointer to the
240 * attached context.
241 * - in the latter case, we use a static array of per-cpu context
242 * pointers.
243 *
244 * Contexts are use-counted. They are allocated on first access and
245 * deallocated when the last user puts the context.
93fa7636 246 */
c2724775 247struct ds_context {
e9a22d1f
IM
248 /* The DS configuration; goes into MSR_IA32_DS_AREA: */
249 unsigned char ds[MAX_SIZEOF_DS];
250
251 /* The owner of the BTS and PEBS configuration, respectively: */
252 struct bts_tracer *bts_master;
253 struct pebs_tracer *pebs_master;
254
255 /* Use count: */
de79f54f 256 unsigned long count;
e9a22d1f
IM
257
258 /* Pointer to the context pointer field: */
259 struct ds_context **this;
260
de79f54f 261 /* The traced task; NULL for cpu tracing: */
e9a22d1f 262 struct task_struct *task;
c2724775 263
de79f54f
MM
264 /* The traced cpu; only valid if task is NULL: */
265 int cpu;
266};
93fa7636 267
0fe1e009 268static DEFINE_PER_CPU(struct ds_context *, cpu_ds_context);
93fa7636 269
cc1dc6d0 270
de79f54f 271static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
eee3af4a 272{
93fa7636 273 struct ds_context **p_context =
0fe1e009 274 (task ? &task->thread.ds_ctx : &per_cpu(cpu_ds_context, cpu));
cc1dc6d0
MM
275 struct ds_context *context = NULL;
276 struct ds_context *new_context = NULL;
93fa7636 277
de79f54f
MM
278 /* Chances are small that we already have a context. */
279 new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
cc1dc6d0
MM
280 if (!new_context)
281 return NULL;
de90add3 282
de79f54f 283 spin_lock_irq(&ds_lock);
93fa7636 284
cc1dc6d0 285 context = *p_context;
de79f54f 286 if (likely(!context)) {
cc1dc6d0 287 context = new_context;
93fa7636 288
cc1dc6d0
MM
289 context->this = p_context;
290 context->task = task;
de79f54f 291 context->cpu = cpu;
cc1dc6d0 292 context->count = 0;
93fa7636 293
cc1dc6d0
MM
294 *p_context = context;
295 }
c2724775 296
cc1dc6d0 297 context->count++;
c2724775 298
de79f54f 299 spin_unlock_irq(&ds_lock);
93fa7636 300
cc1dc6d0
MM
301 if (context != new_context)
302 kfree(new_context);
93fa7636
MM
303
304 return context;
eee3af4a 305}
93fa7636 306
de79f54f 307static void ds_put_context(struct ds_context *context)
eee3af4a 308{
8d99b3ac 309 struct task_struct *task;
de90add3
MM
310 unsigned long irq;
311
93fa7636
MM
312 if (!context)
313 return;
314
de90add3 315 spin_lock_irqsave(&ds_lock, irq);
93fa7636 316
c2724775
MM
317 if (--context->count) {
318 spin_unlock_irqrestore(&ds_lock, irq);
319 return;
320 }
93fa7636 321
573da422 322 *(context->this) = NULL;
93fa7636 323
8d99b3ac
MM
324 task = context->task;
325
326 if (task)
327 clear_tsk_thread_flag(task, TIF_DS_AREA_MSR);
93fa7636 328
de79f54f
MM
329 /*
330 * We leave the (now dangling) pointer to the DS configuration in
331 * the DS_AREA msr. This is as good or as bad as replacing it with
332 * NULL - the hardware would crash if we enabled tracing.
333 *
334 * This saves us some problems with having to write an msr on a
335 * different cpu while preventing others from doing the same for the
336 * next context for that same cpu.
337 */
93fa7636 338
de90add3 339 spin_unlock_irqrestore(&ds_lock, irq);
c2724775 340
8d99b3ac
MM
341 /* The context might still be in use for context switching. */
342 if (task && (task != current))
343 wait_task_context_switch(task);
344
c2724775 345 kfree(context);
eee3af4a 346}
93fa7636 347
de79f54f
MM
348static void ds_install_ds_area(struct ds_context *context)
349{
350 unsigned long ds;
351
352 ds = (unsigned long)context->ds;
353
354 /*
355 * There is a race between the bts master and the pebs master.
356 *
357 * The thread/cpu access is synchronized via get/put_cpu() for
358 * task tracing and via wrmsr_on_cpu for cpu tracing.
359 *
360 * If bts and pebs are collected for the same task or same cpu,
361 * the same confiuration is written twice.
362 */
363 if (context->task) {
364 get_cpu();
365 if (context->task == current)
366 wrmsrl(MSR_IA32_DS_AREA, ds);
367 set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
368 put_cpu();
369 } else
370 wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA,
371 (u32)((u64)ds), (u32)((u64)ds >> 32));
372}
93fa7636
MM
373
374/*
c2724775 375 * Call the tracer's callback on a buffer overflow.
93fa7636 376 *
93fa7636
MM
377 * context: the ds context
378 * qual: the buffer type
379 */
ca0002a1
MM
380static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
381{
382 switch (qual) {
c2724775
MM
383 case ds_bts:
384 if (context->bts_master &&
385 context->bts_master->ovfl)
386 context->bts_master->ovfl(context->bts_master);
387 break;
388 case ds_pebs:
389 if (context->pebs_master &&
390 context->pebs_master->ovfl)
391 context->pebs_master->ovfl(context->pebs_master);
ca0002a1 392 break;
ca0002a1 393 }
c2724775
MM
394}
395
396
397/*
398 * Write raw data into the BTS or PEBS buffer.
399 *
400 * The remainder of any partially written record is zeroed out.
401 *
402 * context: the DS context
e9a22d1f
IM
403 * qual: the buffer type
404 * record: the data to write
405 * size: the size of the data
c2724775
MM
406 */
407static int ds_write(struct ds_context *context, enum ds_qualifier qual,
408 const void *record, size_t size)
409{
410 int bytes_written = 0;
411
412 if (!record)
413 return -EINVAL;
414
415 while (size) {
416 unsigned long base, index, end, write_end, int_th;
417 unsigned long write_size, adj_write_size;
418
419 /*
b8e47195 420 * Write as much as possible without producing an
c2724775
MM
421 * overflow interrupt.
422 *
b8e47195 423 * Interrupt_threshold must either be
c2724775
MM
424 * - bigger than absolute_maximum or
425 * - point to a record between buffer_base and absolute_maximum
426 *
b8e47195 427 * Index points to a valid record.
c2724775
MM
428 */
429 base = ds_get(context->ds, qual, ds_buffer_base);
430 index = ds_get(context->ds, qual, ds_index);
431 end = ds_get(context->ds, qual, ds_absolute_maximum);
432 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
433
434 write_end = min(end, int_th);
435
b8e47195
MM
436 /*
437 * If we are already beyond the interrupt threshold,
438 * we fill the entire buffer.
439 */
c2724775
MM
440 if (write_end <= index)
441 write_end = end;
442
443 if (write_end <= index)
444 break;
445
446 write_size = min((unsigned long) size, write_end - index);
447 memcpy((void *)index, record, write_size);
448
449 record = (const char *)record + write_size;
450 size -= write_size;
451 bytes_written += write_size;
452
453 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
454 adj_write_size *= ds_cfg.sizeof_rec[qual];
455
b8e47195 456 /* Zero out trailing bytes. */
c2724775
MM
457 memset((char *)index + write_size, 0,
458 adj_write_size - write_size);
459 index += adj_write_size;
460
461 if (index >= end)
462 index = base;
463 ds_set(context->ds, qual, ds_index, index);
464
465 if (index >= int_th)
466 ds_overflow(context, qual);
467 }
468
469 return bytes_written;
470}
471
472
473/*
474 * Branch Trace Store (BTS) uses the following format. Different
475 * architectures vary in the size of those fields.
476 * - source linear address
477 * - destination linear address
478 * - flags
479 *
480 * Later architectures use 64bit pointers throughout, whereas earlier
481 * architectures use 32bit pointers in 32bit mode.
482 *
bc44fb5f 483 * We compute the base address for the fields based on:
c2724775
MM
484 * - the field size stored in the DS configuration
485 * - the relative field position
486 *
487 * In order to store additional information in the BTS buffer, we use
488 * a special source address to indicate that the record requires
489 * special interpretation.
490 *
491 * Netburst indicated via a bit in the flags field whether the branch
492 * was predicted; this is ignored.
493 *
494 * We use two levels of abstraction:
495 * - the raw data level defined here
496 * - an arch-independent level defined in ds.h
497 */
498
499enum bts_field {
500 bts_from,
501 bts_to,
502 bts_flags,
503
e9a22d1f 504 bts_qual = bts_from,
15879d04 505 bts_clock = bts_to,
e9a22d1f 506 bts_pid = bts_flags,
c2724775 507
e9a22d1f
IM
508 bts_qual_mask = (bts_qual_max - 1),
509 bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
c2724775
MM
510};
511
feaa0457 512static inline unsigned long bts_get(const char *base, unsigned long field)
c2724775 513{
bc44fb5f 514 base += (ds_cfg.sizeof_ptr_field * field);
c2724775
MM
515 return *(unsigned long *)base;
516}
517
feaa0457 518static inline void bts_set(char *base, unsigned long field, unsigned long val)
c2724775 519{
feaa0457 520 base += (ds_cfg.sizeof_ptr_field * field);
c2724775
MM
521 (*(unsigned long *)base) = val;
522}
523
524
525/*
526 * The raw BTS data is architecture dependent.
527 *
528 * For higher-level users, we give an arch-independent view.
529 * - ds.h defines struct bts_struct
530 * - bts_read translates one raw bts record into a bts_struct
531 * - bts_write translates one bts_struct into the raw format and
532 * writes it into the top of the parameter tracer's buffer.
533 *
534 * return: bytes read/written on success; -Eerrno, otherwise
535 */
e9a22d1f
IM
536static int
537bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out)
c2724775
MM
538{
539 if (!tracer)
540 return -EINVAL;
541
542 if (at < tracer->trace.ds.begin)
543 return -EINVAL;
544
545 if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
546 return -EINVAL;
547
548 memset(out, 0, sizeof(*out));
549 if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
550 out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
15879d04
MM
551 out->variant.event.clock = bts_get(at, bts_clock);
552 out->variant.event.pid = bts_get(at, bts_pid);
c2724775
MM
553 } else {
554 out->qualifier = bts_branch;
555 out->variant.lbr.from = bts_get(at, bts_from);
556 out->variant.lbr.to = bts_get(at, bts_to);
d072c25f
MM
557
558 if (!out->variant.lbr.from && !out->variant.lbr.to)
559 out->qualifier = bts_invalid;
c2724775
MM
560 }
561
562 return ds_cfg.sizeof_rec[ds_bts];
563}
564
565static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
566{
567 unsigned char raw[MAX_SIZEOF_BTS];
568
569 if (!tracer)
570 return -EINVAL;
571
572 if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
573 return -EOVERFLOW;
574
575 switch (in->qualifier) {
576 case bts_invalid:
577 bts_set(raw, bts_from, 0);
578 bts_set(raw, bts_to, 0);
579 bts_set(raw, bts_flags, 0);
580 break;
581 case bts_branch:
582 bts_set(raw, bts_from, in->variant.lbr.from);
583 bts_set(raw, bts_to, in->variant.lbr.to);
584 bts_set(raw, bts_flags, 0);
585 break;
586 case bts_task_arrives:
587 case bts_task_departs:
588 bts_set(raw, bts_qual, (bts_escape | in->qualifier));
15879d04
MM
589 bts_set(raw, bts_clock, in->variant.event.clock);
590 bts_set(raw, bts_pid, in->variant.event.pid);
ca0002a1 591 break;
c2724775
MM
592 default:
593 return -EINVAL;
ca0002a1 594 }
c2724775
MM
595
596 return ds_write(tracer->ds.context, ds_bts, raw,
597 ds_cfg.sizeof_rec[ds_bts]);
eee3af4a 598}
93fa7636
MM
599
600
c2724775
MM
601static void ds_write_config(struct ds_context *context,
602 struct ds_trace *cfg, enum ds_qualifier qual)
603{
604 unsigned char *ds = context->ds;
605
606 ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
607 ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
608 ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
609 ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
610}
611
612static void ds_read_config(struct ds_context *context,
613 struct ds_trace *cfg, enum ds_qualifier qual)
eee3af4a 614{
c2724775
MM
615 unsigned char *ds = context->ds;
616
617 cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
618 cfg->top = (void *)ds_get(ds, qual, ds_index);
619 cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
620 cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
621}
622
623static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
624 void *base, size_t size, size_t ith,
625 unsigned int flags) {
93fa7636 626 unsigned long buffer, adj;
ca0002a1 627
b8e47195
MM
628 /*
629 * Adjust the buffer address and size to meet alignment
ca0002a1
MM
630 * constraints:
631 * - buffer is double-word aligned
632 * - size is multiple of record size
633 *
634 * We checked the size at the very beginning; we have enough
635 * space to do the adjustment.
636 */
637 buffer = (unsigned long)base;
638
639 adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
640 buffer += adj;
641 size -= adj;
642
c2724775
MM
643 trace->n = size / ds_cfg.sizeof_rec[qual];
644 trace->size = ds_cfg.sizeof_rec[qual];
ca0002a1 645
c2724775 646 size = (trace->n * trace->size);
ca0002a1 647
c2724775
MM
648 trace->begin = (void *)buffer;
649 trace->top = trace->begin;
650 trace->end = (void *)(buffer + size);
b8e47195
MM
651 /*
652 * The value for 'no threshold' is -1, which will set the
ca0002a1
MM
653 * threshold outside of the buffer, just like we want it.
654 */
de79f54f 655 ith *= ds_cfg.sizeof_rec[qual];
c2724775
MM
656 trace->ith = (void *)(buffer + size - ith);
657
658 trace->flags = flags;
ca0002a1
MM
659}
660
c2724775
MM
661
662static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
663 enum ds_qualifier qual, struct task_struct *task,
de79f54f 664 int cpu, void *base, size_t size, size_t th)
ca0002a1
MM
665{
666 struct ds_context *context;
ca0002a1 667 int error;
150f5164 668 size_t req_size;
93fa7636 669
bc44fb5f
MM
670 error = -EOPNOTSUPP;
671 if (!ds_cfg.sizeof_rec[qual])
672 goto out;
673
6abb11ae
MM
674 error = -EINVAL;
675 if (!base)
676 goto out;
677
150f5164
MM
678 req_size = ds_cfg.sizeof_rec[qual];
679 /* We might need space for alignment adjustments. */
680 if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT))
681 req_size += DS_ALIGNMENT;
682
ca0002a1 683 error = -EINVAL;
150f5164 684 if (size < req_size)
ca0002a1 685 goto out;
93fa7636 686
ca0002a1
MM
687 if (th != (size_t)-1) {
688 th *= ds_cfg.sizeof_rec[qual];
689
690 error = -EINVAL;
691 if (size <= th)
692 goto out;
693 }
694
ca0002a1
MM
695 tracer->buffer = base;
696 tracer->size = size;
93fa7636 697
ca0002a1 698 error = -ENOMEM;
de79f54f 699 context = ds_get_context(task, cpu);
93fa7636 700 if (!context)
ca0002a1
MM
701 goto out;
702 tracer->context = context;
703
de79f54f
MM
704 /*
705 * Defer any tracer-specific initialization work for the context until
706 * context ownership has been clarified.
707 */
de90add3 708
c2724775 709 error = 0;
ca0002a1 710 out:
93fa7636 711 return error;
eee3af4a 712}
93fa7636 713
de79f54f
MM
714static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu,
715 void *base, size_t size,
716 bts_ovfl_callback_t ovfl, size_t th,
717 unsigned int flags)
eee3af4a 718{
ca0002a1
MM
719 struct bts_tracer *tracer;
720 int error;
93fa7636 721
b8e47195 722 /* Buffer overflow notification is not yet implemented. */
ca0002a1
MM
723 error = -EOPNOTSUPP;
724 if (ovfl)
725 goto out;
726
38f80112
MM
727 error = get_tracer(task);
728 if (error < 0)
729 goto out;
730
ca0002a1 731 error = -ENOMEM;
de79f54f 732 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
ca0002a1 733 if (!tracer)
38f80112 734 goto out_put_tracer;
ca0002a1
MM
735 tracer->ovfl = ovfl;
736
de79f54f 737 /* Do some more error checking and acquire a tracing context. */
c2724775 738 error = ds_request(&tracer->ds, &tracer->trace.ds,
de79f54f 739 ds_bts, task, cpu, base, size, th);
ca0002a1
MM
740 if (error < 0)
741 goto out_tracer;
742
de79f54f
MM
743 /* Claim the bts part of the tracing context we acquired above. */
744 spin_lock_irq(&ds_lock);
c2724775 745
c2724775
MM
746 error = -EPERM;
747 if (tracer->ds.context->bts_master)
38f80112 748 goto out_unlock;
c2724775
MM
749 tracer->ds.context->bts_master = tracer;
750
de79f54f 751 spin_unlock_irq(&ds_lock);
c2724775 752
de79f54f
MM
753 /*
754 * Now that we own the bts part of the context, let's complete the
755 * initialization for that part.
756 */
757 ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags);
758 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
759 ds_install_ds_area(tracer->ds.context);
c2724775
MM
760
761 tracer->trace.read = bts_read;
762 tracer->trace.write = bts_write;
763
de79f54f 764 /* Start tracing. */
c2724775
MM
765 ds_resume_bts(tracer);
766
ca0002a1
MM
767 return tracer;
768
c2724775 769 out_unlock:
de79f54f 770 spin_unlock_irq(&ds_lock);
c2724775 771 ds_put_context(tracer->ds.context);
ca0002a1 772 out_tracer:
6abb11ae 773 kfree(tracer);
38f80112
MM
774 out_put_tracer:
775 put_tracer(task);
ca0002a1
MM
776 out:
777 return ERR_PTR(error);
eee3af4a 778}
93fa7636 779
de79f54f
MM
780struct bts_tracer *ds_request_bts_task(struct task_struct *task,
781 void *base, size_t size,
782 bts_ovfl_callback_t ovfl,
783 size_t th, unsigned int flags)
784{
785 return ds_request_bts(task, 0, base, size, ovfl, th, flags);
786}
787
788struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
789 bts_ovfl_callback_t ovfl,
790 size_t th, unsigned int flags)
791{
792 return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags);
793}
794
795static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu,
796 void *base, size_t size,
797 pebs_ovfl_callback_t ovfl, size_t th,
798 unsigned int flags)
eee3af4a 799{
ca0002a1 800 struct pebs_tracer *tracer;
93fa7636
MM
801 int error;
802
b8e47195 803 /* Buffer overflow notification is not yet implemented. */
ca0002a1
MM
804 error = -EOPNOTSUPP;
805 if (ovfl)
93fa7636
MM
806 goto out;
807
38f80112
MM
808 error = get_tracer(task);
809 if (error < 0)
810 goto out;
811
ca0002a1 812 error = -ENOMEM;
de79f54f 813 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
ca0002a1 814 if (!tracer)
38f80112 815 goto out_put_tracer;
ca0002a1 816 tracer->ovfl = ovfl;
93fa7636 817
de79f54f 818 /* Do some more error checking and acquire a tracing context. */
c2724775 819 error = ds_request(&tracer->ds, &tracer->trace.ds,
de79f54f 820 ds_pebs, task, cpu, base, size, th);
ca0002a1
MM
821 if (error < 0)
822 goto out_tracer;
93fa7636 823
de79f54f
MM
824 /* Claim the pebs part of the tracing context we acquired above. */
825 spin_lock_irq(&ds_lock);
c2724775 826
c2724775
MM
827 error = -EPERM;
828 if (tracer->ds.context->pebs_master)
38f80112 829 goto out_unlock;
c2724775
MM
830 tracer->ds.context->pebs_master = tracer;
831
de79f54f 832 spin_unlock_irq(&ds_lock);
c2724775 833
de79f54f
MM
834 /*
835 * Now that we own the pebs part of the context, let's complete the
836 * initialization for that part.
837 */
838 ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags);
73bf1b62 839 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
de79f54f
MM
840 ds_install_ds_area(tracer->ds.context);
841
842 /* Start tracing. */
c2724775
MM
843 ds_resume_pebs(tracer);
844
ca0002a1
MM
845 return tracer;
846
c2724775 847 out_unlock:
de79f54f 848 spin_unlock_irq(&ds_lock);
c2724775 849 ds_put_context(tracer->ds.context);
ca0002a1 850 out_tracer:
6abb11ae 851 kfree(tracer);
38f80112
MM
852 out_put_tracer:
853 put_tracer(task);
93fa7636 854 out:
ca0002a1
MM
855 return ERR_PTR(error);
856}
857
de79f54f
MM
858struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
859 void *base, size_t size,
860 pebs_ovfl_callback_t ovfl,
861 size_t th, unsigned int flags)
eee3af4a 862{
de79f54f
MM
863 return ds_request_pebs(task, 0, base, size, ovfl, th, flags);
864}
8d99b3ac 865
de79f54f
MM
866struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size,
867 pebs_ovfl_callback_t ovfl,
868 size_t th, unsigned int flags)
869{
870 return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags);
871}
ca0002a1 872
de79f54f
MM
873static void ds_free_bts(struct bts_tracer *tracer)
874{
875 struct task_struct *task;
8d99b3ac 876
de79f54f 877 task = tracer->ds.context->task;
ca0002a1 878
c2724775
MM
879 WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
880 tracer->ds.context->bts_master = NULL;
93fa7636 881
8d99b3ac
MM
882 /* Make sure tracing stopped and the tracer is not in use. */
883 if (task && (task != current))
884 wait_task_context_switch(task);
885
c2724775 886 ds_put_context(tracer->ds.context);
38f80112 887 put_tracer(task);
ca0002a1 888
ca0002a1 889 kfree(tracer);
eee3af4a 890}
93fa7636 891
de79f54f
MM
892void ds_release_bts(struct bts_tracer *tracer)
893{
894 might_sleep();
895
896 if (!tracer)
897 return;
898
899 ds_suspend_bts(tracer);
900 ds_free_bts(tracer);
901}
902
903int ds_release_bts_noirq(struct bts_tracer *tracer)
904{
905 struct task_struct *task;
906 unsigned long irq;
907 int error;
908
909 if (!tracer)
910 return 0;
911
912 task = tracer->ds.context->task;
913
914 local_irq_save(irq);
915
916 error = -EPERM;
917 if (!task &&
918 (tracer->ds.context->cpu != smp_processor_id()))
919 goto out;
920
921 error = -EPERM;
922 if (task && (task != current))
923 goto out;
924
925 ds_suspend_bts_noirq(tracer);
926 ds_free_bts(tracer);
927
928 error = 0;
929 out:
930 local_irq_restore(irq);
931 return error;
932}
933
934static void update_task_debugctlmsr(struct task_struct *task,
935 unsigned long debugctlmsr)
936{
937 task->thread.debugctlmsr = debugctlmsr;
938
939 get_cpu();
940 if (task == current)
941 update_debugctlmsr(debugctlmsr);
de79f54f
MM
942 put_cpu();
943}
944
c2724775 945void ds_suspend_bts(struct bts_tracer *tracer)
eee3af4a 946{
c2724775 947 struct task_struct *task;
de79f54f
MM
948 unsigned long debugctlmsr;
949 int cpu;
ca0002a1 950
ca0002a1 951 if (!tracer)
c2724775 952 return;
ca0002a1 953
cac94f97
MM
954 tracer->flags = 0;
955
c2724775 956 task = tracer->ds.context->task;
de79f54f 957 cpu = tracer->ds.context->cpu;
ca0002a1 958
de79f54f 959 WARN_ON(!task && irqs_disabled());
ca0002a1 960
de79f54f
MM
961 debugctlmsr = (task ?
962 task->thread.debugctlmsr :
963 get_debugctlmsr_on_cpu(cpu));
964 debugctlmsr &= ~BTS_CONTROL;
eee3af4a 965
de79f54f
MM
966 if (task)
967 update_task_debugctlmsr(task, debugctlmsr);
968 else
969 update_debugctlmsr_on_cpu(cpu, debugctlmsr);
93fa7636 970}
eee3af4a 971
de79f54f 972int ds_suspend_bts_noirq(struct bts_tracer *tracer)
93fa7636 973{
c2724775 974 struct task_struct *task;
de79f54f
MM
975 unsigned long debugctlmsr, irq;
976 int cpu, error = 0;
eee3af4a 977
ca0002a1 978 if (!tracer)
de79f54f 979 return 0;
eee3af4a 980
de79f54f 981 tracer->flags = 0;
cac94f97 982
c2724775 983 task = tracer->ds.context->task;
de79f54f
MM
984 cpu = tracer->ds.context->cpu;
985
986 local_irq_save(irq);
987
988 error = -EPERM;
989 if (!task && (cpu != smp_processor_id()))
990 goto out;
991
992 debugctlmsr = (task ?
993 task->thread.debugctlmsr :
994 get_debugctlmsr());
995 debugctlmsr &= ~BTS_CONTROL;
996
997 if (task)
998 update_task_debugctlmsr(task, debugctlmsr);
999 else
1000 update_debugctlmsr(debugctlmsr);
1001
1002 error = 0;
1003 out:
1004 local_irq_restore(irq);
1005 return error;
1006}
1007
1008static unsigned long ds_bts_control(struct bts_tracer *tracer)
1009{
1010 unsigned long control;
ca0002a1 1011
c2724775
MM
1012 control = ds_cfg.ctl[dsf_bts];
1013 if (!(tracer->trace.ds.flags & BTS_KERNEL))
1014 control |= ds_cfg.ctl[dsf_bts_kernel];
1015 if (!(tracer->trace.ds.flags & BTS_USER))
1016 control |= ds_cfg.ctl[dsf_bts_user];
eee3af4a 1017
de79f54f 1018 return control;
eee3af4a
MM
1019}
1020
de79f54f 1021void ds_resume_bts(struct bts_tracer *tracer)
eee3af4a 1022{
38f80112 1023 struct task_struct *task;
de79f54f
MM
1024 unsigned long debugctlmsr;
1025 int cpu;
38f80112 1026
ca0002a1 1027 if (!tracer)
c2724775 1028 return;
93fa7636 1029
de79f54f
MM
1030 tracer->flags = tracer->trace.ds.flags;
1031
38f80112 1032 task = tracer->ds.context->task;
de79f54f 1033 cpu = tracer->ds.context->cpu;
38f80112 1034
de79f54f
MM
1035 WARN_ON(!task && irqs_disabled());
1036
1037 debugctlmsr = (task ?
1038 task->thread.debugctlmsr :
1039 get_debugctlmsr_on_cpu(cpu));
1040 debugctlmsr |= ds_bts_control(tracer);
1041
1042 if (task)
1043 update_task_debugctlmsr(task, debugctlmsr);
1044 else
1045 update_debugctlmsr_on_cpu(cpu, debugctlmsr);
1046}
1047
1048int ds_resume_bts_noirq(struct bts_tracer *tracer)
1049{
1050 struct task_struct *task;
1051 unsigned long debugctlmsr, irq;
1052 int cpu, error = 0;
1053
1054 if (!tracer)
1055 return 0;
1056
1057 tracer->flags = tracer->trace.ds.flags;
1058
1059 task = tracer->ds.context->task;
1060 cpu = tracer->ds.context->cpu;
1061
1062 local_irq_save(irq);
1063
1064 error = -EPERM;
1065 if (!task && (cpu != smp_processor_id()))
1066 goto out;
1067
1068 debugctlmsr = (task ?
1069 task->thread.debugctlmsr :
1070 get_debugctlmsr());
1071 debugctlmsr |= ds_bts_control(tracer);
1072
1073 if (task)
1074 update_task_debugctlmsr(task, debugctlmsr);
1075 else
1076 update_debugctlmsr(debugctlmsr);
1077
1078 error = 0;
1079 out:
1080 local_irq_restore(irq);
1081 return error;
1082}
1083
1084static void ds_free_pebs(struct pebs_tracer *tracer)
1085{
1086 struct task_struct *task;
1087
1088 task = tracer->ds.context->task;
93fa7636 1089
c2724775
MM
1090 WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
1091 tracer->ds.context->pebs_master = NULL;
eee3af4a 1092
c2724775 1093 ds_put_context(tracer->ds.context);
38f80112 1094 put_tracer(task);
eee3af4a 1095
c2724775 1096 kfree(tracer);
a95d67f8
MM
1097}
1098
de79f54f
MM
1099void ds_release_pebs(struct pebs_tracer *tracer)
1100{
1101 might_sleep();
1102
1103 if (!tracer)
1104 return;
1105
1106 ds_suspend_pebs(tracer);
1107 ds_free_pebs(tracer);
1108}
1109
1110int ds_release_pebs_noirq(struct pebs_tracer *tracer)
1111{
1112 struct task_struct *task;
1113 unsigned long irq;
1114 int error;
1115
1116 if (!tracer)
1117 return 0;
1118
1119 task = tracer->ds.context->task;
1120
1121 local_irq_save(irq);
1122
1123 error = -EPERM;
1124 if (!task &&
1125 (tracer->ds.context->cpu != smp_processor_id()))
1126 goto out;
1127
1128 error = -EPERM;
1129 if (task && (task != current))
1130 goto out;
1131
1132 ds_suspend_pebs_noirq(tracer);
1133 ds_free_pebs(tracer);
1134
1135 error = 0;
1136 out:
1137 local_irq_restore(irq);
1138 return error;
1139}
1140
c2724775 1141void ds_suspend_pebs(struct pebs_tracer *tracer)
a95d67f8 1142{
a95d67f8 1143
93fa7636 1144}
eee3af4a 1145
de79f54f
MM
1146int ds_suspend_pebs_noirq(struct pebs_tracer *tracer)
1147{
1148 return 0;
1149}
1150
c2724775 1151void ds_resume_pebs(struct pebs_tracer *tracer)
93fa7636 1152{
eee3af4a 1153
eee3af4a
MM
1154}
1155
de79f54f
MM
1156int ds_resume_pebs_noirq(struct pebs_tracer *tracer)
1157{
1158 return 0;
1159}
1160
c2724775 1161const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
eee3af4a 1162{
ca0002a1 1163 if (!tracer)
c2724775 1164 return NULL;
ca0002a1 1165
c2724775
MM
1166 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
1167 return &tracer->trace;
93fa7636 1168}
eee3af4a 1169
c2724775 1170const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
93fa7636 1171{
ca0002a1 1172 if (!tracer)
c2724775 1173 return NULL;
ca0002a1 1174
c2724775 1175 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
017bc617
MM
1176
1177 tracer->trace.counters = ds_cfg.nr_counter_reset;
1178 memcpy(tracer->trace.counter_reset,
1179 tracer->ds.context->ds +
1180 (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field),
1181 ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE);
ca0002a1 1182
c2724775 1183 return &tracer->trace;
93fa7636 1184}
eee3af4a 1185
c2724775 1186int ds_reset_bts(struct bts_tracer *tracer)
93fa7636 1187{
ca0002a1
MM
1188 if (!tracer)
1189 return -EINVAL;
1190
c2724775 1191 tracer->trace.ds.top = tracer->trace.ds.begin;
ca0002a1 1192
c2724775
MM
1193 ds_set(tracer->ds.context->ds, ds_bts, ds_index,
1194 (unsigned long)tracer->trace.ds.top);
ca0002a1
MM
1195
1196 return 0;
93fa7636 1197}
eee3af4a 1198
c2724775 1199int ds_reset_pebs(struct pebs_tracer *tracer)
93fa7636 1200{
ca0002a1
MM
1201 if (!tracer)
1202 return -EINVAL;
eee3af4a 1203
c2724775 1204 tracer->trace.ds.top = tracer->trace.ds.begin;
eee3af4a 1205
608780a9 1206 ds_set(tracer->ds.context->ds, ds_pebs, ds_index,
c2724775 1207 (unsigned long)tracer->trace.ds.top);
93fa7636 1208
ca0002a1 1209 return 0;
eee3af4a
MM
1210}
1211
017bc617
MM
1212int ds_set_pebs_reset(struct pebs_tracer *tracer,
1213 unsigned int counter, u64 value)
eee3af4a 1214{
ca0002a1
MM
1215 if (!tracer)
1216 return -EINVAL;
eee3af4a 1217
017bc617
MM
1218 if (ds_cfg.nr_counter_reset < counter)
1219 return -EINVAL;
1220
bc44fb5f 1221 *(u64 *)(tracer->ds.context->ds +
017bc617
MM
1222 (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) +
1223 (counter * PEBS_RESET_FIELD_SIZE)) = value;
93fa7636 1224
ca0002a1 1225 return 0;
93fa7636
MM
1226}
1227
c2724775 1228static const struct ds_configuration ds_cfg_netburst = {
ba2607fe 1229 .name = "Netburst",
c2724775
MM
1230 .ctl[dsf_bts] = (1 << 2) | (1 << 3),
1231 .ctl[dsf_bts_kernel] = (1 << 5),
1232 .ctl[dsf_bts_user] = (1 << 6),
017bc617 1233 .nr_counter_reset = 1,
eee3af4a 1234};
c2724775 1235static const struct ds_configuration ds_cfg_pentium_m = {
ba2607fe 1236 .name = "Pentium M",
c2724775 1237 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
017bc617 1238 .nr_counter_reset = 1,
eee3af4a 1239};
ba2607fe
MM
1240static const struct ds_configuration ds_cfg_core2_atom = {
1241 .name = "Core 2/Atom",
c2724775
MM
1242 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
1243 .ctl[dsf_bts_kernel] = (1 << 9),
1244 .ctl[dsf_bts_user] = (1 << 10),
017bc617
MM
1245 .nr_counter_reset = 1,
1246};
1247static const struct ds_configuration ds_cfg_core_i7 = {
1248 .name = "Core i7",
1249 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
1250 .ctl[dsf_bts_kernel] = (1 << 9),
1251 .ctl[dsf_bts_user] = (1 << 10),
1252 .nr_counter_reset = 4,
c2724775 1253};
eee3af4a 1254
c2724775 1255static void
bc44fb5f
MM
1256ds_configure(const struct ds_configuration *cfg,
1257 struct cpuinfo_x86 *cpu)
eee3af4a 1258{
bc44fb5f
MM
1259 unsigned long nr_pebs_fields = 0;
1260
1261 printk(KERN_INFO "[ds] using %s configuration\n", cfg->name);
1262
1263#ifdef __i386__
1264 nr_pebs_fields = 10;
1265#else
1266 nr_pebs_fields = 18;
1267#endif
1268
017bc617
MM
1269 /*
1270 * Starting with version 2, architectural performance
1271 * monitoring supports a format specifier.
1272 */
1273 if ((cpuid_eax(0xa) & 0xff) > 1) {
1274 unsigned long perf_capabilities, format;
1275
1276 rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
1277
1278 format = (perf_capabilities >> 8) & 0xf;
1279
1280 switch (format) {
1281 case 0:
1282 nr_pebs_fields = 18;
1283 break;
1284 case 1:
1285 nr_pebs_fields = 22;
1286 break;
1287 default:
1288 printk(KERN_INFO
1289 "[ds] unknown PEBS format: %lu\n", format);
1290 nr_pebs_fields = 0;
1291 break;
1292 }
1293 }
1294
c2724775 1295 memset(&ds_cfg, 0, sizeof(ds_cfg));
eee3af4a 1296 ds_cfg = *cfg;
ca0002a1 1297
bc44fb5f
MM
1298 ds_cfg.sizeof_ptr_field =
1299 (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4);
c2724775 1300
bc44fb5f
MM
1301 ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3;
1302 ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields;
1303
1304 if (!cpu_has(cpu, X86_FEATURE_BTS)) {
1305 ds_cfg.sizeof_rec[ds_bts] = 0;
c2724775
MM
1306 printk(KERN_INFO "[ds] bts not available\n");
1307 }
bc44fb5f
MM
1308 if (!cpu_has(cpu, X86_FEATURE_PEBS)) {
1309 ds_cfg.sizeof_rec[ds_pebs] = 0;
c2724775 1310 printk(KERN_INFO "[ds] pebs not available\n");
bc44fb5f
MM
1311 }
1312
1313 printk(KERN_INFO "[ds] sizes: address: %u bit, ",
1314 8 * ds_cfg.sizeof_ptr_field);
1315 printk("bts/pebs record: %u/%u bytes\n",
1316 ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]);
ca0002a1 1317
017bc617 1318 WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset);
eee3af4a
MM
1319}
1320
1321void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
1322{
ee811517
MM
1323 /* Only configure the first cpu. Others are identical. */
1324 if (ds_cfg.name)
1325 return;
1326
eee3af4a
MM
1327 switch (c->x86) {
1328 case 0x6:
1329 switch (c->x86_model) {
ba2607fe
MM
1330 case 0x9:
1331 case 0xd: /* Pentium M */
bc44fb5f 1332 ds_configure(&ds_cfg_pentium_m, c);
eee3af4a 1333 break;
ba2607fe
MM
1334 case 0xf:
1335 case 0x17: /* Core2 */
1336 case 0x1c: /* Atom */
bc44fb5f 1337 ds_configure(&ds_cfg_core2_atom, c);
ba2607fe 1338 break;
b8e47195 1339 case 0x1a: /* Core i7 */
017bc617
MM
1340 ds_configure(&ds_cfg_core_i7, c);
1341 break;
ba2607fe 1342 default:
b8e47195 1343 /* Sorry, don't know about them. */
eee3af4a 1344 break;
eee3af4a
MM
1345 }
1346 break;
ba2607fe 1347 case 0xf:
eee3af4a 1348 switch (c->x86_model) {
eee3af4a
MM
1349 case 0x0:
1350 case 0x1:
1351 case 0x2: /* Netburst */
bc44fb5f 1352 ds_configure(&ds_cfg_netburst, c);
eee3af4a 1353 break;
eee3af4a 1354 default:
b8e47195 1355 /* Sorry, don't know about them. */
eee3af4a
MM
1356 break;
1357 }
1358 break;
1359 default:
b8e47195 1360 /* Sorry, don't know about them. */
eee3af4a
MM
1361 break;
1362 }
1363}
93fa7636 1364
cac94f97
MM
1365static inline void ds_take_timestamp(struct ds_context *context,
1366 enum bts_qualifier qualifier,
1367 struct task_struct *task)
1368{
1369 struct bts_tracer *tracer = context->bts_master;
1370 struct bts_struct ts;
1371
1372 /* Prevent compilers from reading the tracer pointer twice. */
1373 barrier();
1374
1375 if (!tracer || !(tracer->flags & BTS_TIMESTAMPS))
1376 return;
1377
1378 memset(&ts, 0, sizeof(ts));
15879d04
MM
1379 ts.qualifier = qualifier;
1380 ts.variant.event.clock = trace_clock_global();
1381 ts.variant.event.pid = task->pid;
cac94f97
MM
1382
1383 bts_write(tracer, &ts);
1384}
1385
c2724775
MM
1386/*
1387 * Change the DS configuration from tracing prev to tracing next.
1388 */
1389void ds_switch_to(struct task_struct *prev, struct task_struct *next)
93fa7636 1390{
cac94f97
MM
1391 struct ds_context *prev_ctx = prev->thread.ds_ctx;
1392 struct ds_context *next_ctx = next->thread.ds_ctx;
1393 unsigned long debugctlmsr = next->thread.debugctlmsr;
1394
1395 /* Make sure all data is read before we start. */
1396 barrier();
c2724775
MM
1397
1398 if (prev_ctx) {
1399 update_debugctlmsr(0);
1400
cac94f97 1401 ds_take_timestamp(prev_ctx, bts_task_departs, prev);
c2724775
MM
1402 }
1403
1404 if (next_ctx) {
cac94f97 1405 ds_take_timestamp(next_ctx, bts_task_arrives, next);
c2724775
MM
1406
1407 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
ca0002a1 1408 }
c2724775 1409
cac94f97 1410 update_debugctlmsr(debugctlmsr);
93fa7636 1411}
bf53de90 1412
de79f54f
MM
1413static __init int ds_selftest(void)
1414{
1415 if (ds_cfg.sizeof_rec[ds_bts]) {
1416 int error;
1417
1418 error = ds_selftest_bts();
1419 if (error) {
1420 WARN(1, "[ds] selftest failed. disabling bts.\n");
1421 ds_cfg.sizeof_rec[ds_bts] = 0;
1422 }
1423 }
1424
1425 if (ds_cfg.sizeof_rec[ds_pebs]) {
1426 int error;
1427
1428 error = ds_selftest_pebs();
1429 if (error) {
1430 WARN(1, "[ds] selftest failed. disabling pebs.\n");
1431 ds_cfg.sizeof_rec[ds_pebs] = 0;
1432 }
1433 }
1434
1435 return 0;
1436}
1437device_initcall(ds_selftest);