]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - arch/x86/kernel/ds.c
Merge branches 'tracing/fastboot', 'tracing/ftrace', 'tracing/function-graph-tracer...
[mirror_ubuntu-bionic-kernel.git] / arch / x86 / kernel / ds.c
CommitLineData
eee3af4a
MM
1/*
2 * Debug Store support
3 *
4 * This provides a low-level interface to the hardware's Debug Store
93fa7636 5 * feature that is used for branch trace store (BTS) and
eee3af4a
MM
6 * precise-event based sampling (PEBS).
7 *
93fa7636 8 * It manages:
c2724775 9 * - DS and BTS hardware configuration
6abb11ae 10 * - buffer overflow handling (to be done)
93fa7636 11 * - buffer access
eee3af4a 12 *
c2724775
MM
13 * It does not do:
14 * - security checking (is the caller allowed to trace the task)
15 * - buffer allocation (memory accounting)
eee3af4a 16 *
eee3af4a 17 *
93fa7636
MM
18 * Copyright (C) 2007-2008 Intel Corporation.
19 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
eee3af4a
MM
20 */
21
93fa7636 22
eee3af4a
MM
23#include <asm/ds.h>
24
25#include <linux/errno.h>
26#include <linux/string.h>
27#include <linux/slab.h>
93fa7636 28#include <linux/sched.h>
3c933904 29#include <linux/mm.h>
ca0002a1 30#include <linux/kernel.h>
93fa7636
MM
31
32
33/*
34 * The configuration for a particular DS hardware implementation.
35 */
36struct ds_configuration {
c2724775
MM
37 /* the name of the configuration */
38 const char *name;
39 /* the size of one pointer-typed field in the DS structure and
40 in the BTS and PEBS buffers in bytes;
41 this covers the first 8 DS fields related to buffer management. */
93fa7636
MM
42 unsigned char sizeof_field;
43 /* the size of a BTS/PEBS record in bytes */
44 unsigned char sizeof_rec[2];
c2724775
MM
45 /* a series of bit-masks to control various features indexed
46 * by enum ds_feature */
47 unsigned long ctl[dsf_ctl_max];
93fa7636 48};
c2724775
MM
49static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
50
51#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
52
53#define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */
54#define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */
55#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
56
57#define BTS_CONTROL \
58 (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\
59 ds_cfg.ctl[dsf_bts_overflow])
60
eee3af4a 61
ca0002a1
MM
62/*
63 * A BTS or PEBS tracer.
64 *
65 * This holds the configuration of the tracer and serves as a handle
66 * to identify tracers.
67 */
68struct ds_tracer {
69 /* the DS context (partially) owned by this tracer */
70 struct ds_context *context;
71 /* the buffer provided on ds_request() and its size in bytes */
72 void *buffer;
73 size_t size;
ca0002a1
MM
74};
75
76struct bts_tracer {
77 /* the common DS part */
78 struct ds_tracer ds;
c2724775
MM
79 /* the trace including the DS configuration */
80 struct bts_trace trace;
ca0002a1
MM
81 /* buffer overflow notification function */
82 bts_ovfl_callback_t ovfl;
83};
84
85struct pebs_tracer {
86 /* the common DS part */
87 struct ds_tracer ds;
c2724775
MM
88 /* the trace including the DS configuration */
89 struct pebs_trace trace;
ca0002a1
MM
90 /* buffer overflow notification function */
91 pebs_ovfl_callback_t ovfl;
92};
eee3af4a
MM
93
94/*
95 * Debug Store (DS) save area configuration (see Intel64 and IA32
96 * Architectures Software Developer's Manual, section 18.5)
97 *
98 * The DS configuration consists of the following fields; different
99 * architetures vary in the size of those fields.
100 * - double-word aligned base linear address of the BTS buffer
101 * - write pointer into the BTS buffer
102 * - end linear address of the BTS buffer (one byte beyond the end of
103 * the buffer)
104 * - interrupt pointer into BTS buffer
105 * (interrupt occurs when write pointer passes interrupt pointer)
106 * - double-word aligned base linear address of the PEBS buffer
107 * - write pointer into the PEBS buffer
108 * - end linear address of the PEBS buffer (one byte beyond the end of
109 * the buffer)
110 * - interrupt pointer into PEBS buffer
111 * (interrupt occurs when write pointer passes interrupt pointer)
112 * - value to which counter is reset following counter overflow
113 *
93fa7636
MM
114 * Later architectures use 64bit pointers throughout, whereas earlier
115 * architectures use 32bit pointers in 32bit mode.
eee3af4a 116 *
eee3af4a 117 *
93fa7636
MM
118 * We compute the base address for the first 8 fields based on:
119 * - the field size stored in the DS configuration
120 * - the relative field position
121 * - an offset giving the start of the respective region
eee3af4a 122 *
93fa7636
MM
123 * This offset is further used to index various arrays holding
124 * information for BTS and PEBS at the respective index.
eee3af4a 125 *
93fa7636
MM
126 * On later 32bit processors, we only access the lower 32bit of the
127 * 64bit pointer fields. The upper halves will be zeroed out.
eee3af4a
MM
128 */
129
93fa7636
MM
130enum ds_field {
131 ds_buffer_base = 0,
132 ds_index,
133 ds_absolute_maximum,
134 ds_interrupt_threshold,
135};
eee3af4a 136
93fa7636
MM
137enum ds_qualifier {
138 ds_bts = 0,
139 ds_pebs
eee3af4a
MM
140};
141
93fa7636
MM
142static inline unsigned long ds_get(const unsigned char *base,
143 enum ds_qualifier qual, enum ds_field field)
144{
145 base += (ds_cfg.sizeof_field * (field + (4 * qual)));
146 return *(unsigned long *)base;
147}
148
149static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
150 enum ds_field field, unsigned long value)
151{
152 base += (ds_cfg.sizeof_field * (field + (4 * qual)));
153 (*(unsigned long *)base) = value;
154}
155
156
eee3af4a 157/*
6abb11ae 158 * Locking is done only for allocating BTS or PEBS resources.
eee3af4a 159 */
c2724775 160static DEFINE_SPINLOCK(ds_lock);
eee3af4a 161
eee3af4a
MM
162
163/*
93fa7636
MM
164 * We either support (system-wide) per-cpu or per-thread allocation.
165 * We distinguish the two based on the task_struct pointer, where a
166 * NULL pointer indicates per-cpu allocation for the current cpu.
167 *
168 * Allocations are use-counted. As soon as resources are allocated,
169 * further allocations must be of the same type (per-cpu or
170 * per-thread). We model this by counting allocations (i.e. the number
171 * of tracers of a certain type) for one type negatively:
172 * =0 no tracers
173 * >0 number of per-thread tracers
174 * <0 number of per-cpu tracers
175 *
93fa7636
MM
176 * Tracers essentially gives the number of ds contexts for a certain
177 * type of allocation.
eee3af4a 178 */
c2724775 179static atomic_t tracers = ATOMIC_INIT(0);
93fa7636
MM
180
181static inline void get_tracer(struct task_struct *task)
eee3af4a 182{
c2724775
MM
183 if (task)
184 atomic_inc(&tracers);
185 else
186 atomic_dec(&tracers);
eee3af4a 187}
93fa7636
MM
188
189static inline void put_tracer(struct task_struct *task)
eee3af4a 190{
c2724775
MM
191 if (task)
192 atomic_dec(&tracers);
193 else
194 atomic_inc(&tracers);
eee3af4a 195}
93fa7636
MM
196
197static inline int check_tracer(struct task_struct *task)
eee3af4a 198{
c2724775
MM
199 return task ?
200 (atomic_read(&tracers) >= 0) :
201 (atomic_read(&tracers) <= 0);
eee3af4a 202}
93fa7636
MM
203
204
205/*
206 * The DS context is either attached to a thread or to a cpu:
207 * - in the former case, the thread_struct contains a pointer to the
208 * attached context.
209 * - in the latter case, we use a static array of per-cpu context
210 * pointers.
211 *
212 * Contexts are use-counted. They are allocated on first access and
213 * deallocated when the last user puts the context.
93fa7636 214 */
c2724775
MM
215struct ds_context {
216 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
217 unsigned char ds[MAX_SIZEOF_DS];
218 /* the owner of the BTS and PEBS configuration, respectively */
219 struct bts_tracer *bts_master;
220 struct pebs_tracer *pebs_master;
221 /* use count */
222 unsigned long count;
223 /* a pointer to the context location inside the thread_struct
224 * or the per_cpu context array */
225 struct ds_context **this;
226 /* a pointer to the task owning this context, or NULL, if the
227 * context is owned by a cpu */
228 struct task_struct *task;
229};
230
231static DEFINE_PER_CPU(struct ds_context *, system_context_array);
93fa7636 232
c2724775 233#define system_context per_cpu(system_context_array, smp_processor_id())
93fa7636 234
ffc2238a 235static struct ds_context *ds_get_context(struct task_struct *task)
eee3af4a 236{
93fa7636 237 struct ds_context **p_context =
c2724775 238 (task ? &task->thread.ds_ctx : &system_context);
93fa7636 239 struct ds_context *context = *p_context;
de90add3 240 unsigned long irq;
93fa7636
MM
241
242 if (!context) {
243 context = kzalloc(sizeof(*context), GFP_KERNEL);
de90add3 244 if (!context)
573da422 245 return NULL;
93fa7636 246
de90add3
MM
247 spin_lock_irqsave(&ds_lock, irq);
248
10db4ef7 249 if (*p_context) {
10db4ef7 250 kfree(context);
10db4ef7 251
de90add3
MM
252 context = *p_context;
253 } else {
254 *p_context = context;
93fa7636 255
de90add3
MM
256 context->this = p_context;
257 context->task = task;
93fa7636 258
de90add3
MM
259 if (task)
260 set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
93fa7636 261
de90add3
MM
262 if (!task || (task == current))
263 wrmsrl(MSR_IA32_DS_AREA,
264 (unsigned long)context->ds);
265 }
c2724775
MM
266
267 context->count++;
268
269 spin_unlock_irqrestore(&ds_lock, irq);
270 } else {
271 spin_lock_irqsave(&ds_lock, irq);
272
273 context = *p_context;
274 if (context)
275 context->count++;
276
de90add3 277 spin_unlock_irqrestore(&ds_lock, irq);
93fa7636 278
c2724775
MM
279 if (!context)
280 context = ds_get_context(task);
281 }
93fa7636
MM
282
283 return context;
eee3af4a 284}
93fa7636 285
93fa7636 286static inline void ds_put_context(struct ds_context *context)
eee3af4a 287{
de90add3
MM
288 unsigned long irq;
289
93fa7636
MM
290 if (!context)
291 return;
292
de90add3 293 spin_lock_irqsave(&ds_lock, irq);
93fa7636 294
c2724775
MM
295 if (--context->count) {
296 spin_unlock_irqrestore(&ds_lock, irq);
297 return;
298 }
93fa7636 299
573da422 300 *(context->this) = NULL;
93fa7636
MM
301
302 if (context->task)
303 clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
304
305 if (!context->task || (context->task == current))
306 wrmsrl(MSR_IA32_DS_AREA, 0);
307
de90add3 308 spin_unlock_irqrestore(&ds_lock, irq);
c2724775
MM
309
310 kfree(context);
eee3af4a 311}
93fa7636
MM
312
313
314/*
c2724775 315 * Call the tracer's callback on a buffer overflow.
93fa7636 316 *
93fa7636
MM
317 * context: the ds context
318 * qual: the buffer type
319 */
ca0002a1
MM
320static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
321{
322 switch (qual) {
c2724775
MM
323 case ds_bts:
324 if (context->bts_master &&
325 context->bts_master->ovfl)
326 context->bts_master->ovfl(context->bts_master);
327 break;
328 case ds_pebs:
329 if (context->pebs_master &&
330 context->pebs_master->ovfl)
331 context->pebs_master->ovfl(context->pebs_master);
ca0002a1 332 break;
ca0002a1 333 }
c2724775
MM
334}
335
336
337/*
338 * Write raw data into the BTS or PEBS buffer.
339 *
340 * The remainder of any partially written record is zeroed out.
341 *
342 * context: the DS context
343 * qual: the buffer type
344 * record: the data to write
345 * size: the size of the data
346 */
347static int ds_write(struct ds_context *context, enum ds_qualifier qual,
348 const void *record, size_t size)
349{
350 int bytes_written = 0;
351
352 if (!record)
353 return -EINVAL;
354
355 while (size) {
356 unsigned long base, index, end, write_end, int_th;
357 unsigned long write_size, adj_write_size;
358
359 /*
360 * write as much as possible without producing an
361 * overflow interrupt.
362 *
363 * interrupt_threshold must either be
364 * - bigger than absolute_maximum or
365 * - point to a record between buffer_base and absolute_maximum
366 *
367 * index points to a valid record.
368 */
369 base = ds_get(context->ds, qual, ds_buffer_base);
370 index = ds_get(context->ds, qual, ds_index);
371 end = ds_get(context->ds, qual, ds_absolute_maximum);
372 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
373
374 write_end = min(end, int_th);
375
376 /* if we are already beyond the interrupt threshold,
377 * we fill the entire buffer */
378 if (write_end <= index)
379 write_end = end;
380
381 if (write_end <= index)
382 break;
383
384 write_size = min((unsigned long) size, write_end - index);
385 memcpy((void *)index, record, write_size);
386
387 record = (const char *)record + write_size;
388 size -= write_size;
389 bytes_written += write_size;
390
391 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
392 adj_write_size *= ds_cfg.sizeof_rec[qual];
393
394 /* zero out trailing bytes */
395 memset((char *)index + write_size, 0,
396 adj_write_size - write_size);
397 index += adj_write_size;
398
399 if (index >= end)
400 index = base;
401 ds_set(context->ds, qual, ds_index, index);
402
403 if (index >= int_th)
404 ds_overflow(context, qual);
405 }
406
407 return bytes_written;
408}
409
410
411/*
412 * Branch Trace Store (BTS) uses the following format. Different
413 * architectures vary in the size of those fields.
414 * - source linear address
415 * - destination linear address
416 * - flags
417 *
418 * Later architectures use 64bit pointers throughout, whereas earlier
419 * architectures use 32bit pointers in 32bit mode.
420 *
421 * We compute the base address for the first 8 fields based on:
422 * - the field size stored in the DS configuration
423 * - the relative field position
424 *
425 * In order to store additional information in the BTS buffer, we use
426 * a special source address to indicate that the record requires
427 * special interpretation.
428 *
429 * Netburst indicated via a bit in the flags field whether the branch
430 * was predicted; this is ignored.
431 *
432 * We use two levels of abstraction:
433 * - the raw data level defined here
434 * - an arch-independent level defined in ds.h
435 */
436
437enum bts_field {
438 bts_from,
439 bts_to,
440 bts_flags,
441
442 bts_qual = bts_from,
443 bts_jiffies = bts_to,
444 bts_pid = bts_flags,
445
446 bts_qual_mask = (bts_qual_max - 1),
447 bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
448};
449
450static inline unsigned long bts_get(const char *base, enum bts_field field)
451{
452 base += (ds_cfg.sizeof_field * field);
453 return *(unsigned long *)base;
454}
455
456static inline void bts_set(char *base, enum bts_field field, unsigned long val)
457{
458 base += (ds_cfg.sizeof_field * field);;
459 (*(unsigned long *)base) = val;
460}
461
462
463/*
464 * The raw BTS data is architecture dependent.
465 *
466 * For higher-level users, we give an arch-independent view.
467 * - ds.h defines struct bts_struct
468 * - bts_read translates one raw bts record into a bts_struct
469 * - bts_write translates one bts_struct into the raw format and
470 * writes it into the top of the parameter tracer's buffer.
471 *
472 * return: bytes read/written on success; -Eerrno, otherwise
473 */
474static int bts_read(struct bts_tracer *tracer, const void *at,
475 struct bts_struct *out)
476{
477 if (!tracer)
478 return -EINVAL;
479
480 if (at < tracer->trace.ds.begin)
481 return -EINVAL;
482
483 if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
484 return -EINVAL;
485
486 memset(out, 0, sizeof(*out));
487 if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
488 out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
489 out->variant.timestamp.jiffies = bts_get(at, bts_jiffies);
490 out->variant.timestamp.pid = bts_get(at, bts_pid);
491 } else {
492 out->qualifier = bts_branch;
493 out->variant.lbr.from = bts_get(at, bts_from);
494 out->variant.lbr.to = bts_get(at, bts_to);
495 }
496
497 return ds_cfg.sizeof_rec[ds_bts];
498}
499
500static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
501{
502 unsigned char raw[MAX_SIZEOF_BTS];
503
504 if (!tracer)
505 return -EINVAL;
506
507 if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
508 return -EOVERFLOW;
509
510 switch (in->qualifier) {
511 case bts_invalid:
512 bts_set(raw, bts_from, 0);
513 bts_set(raw, bts_to, 0);
514 bts_set(raw, bts_flags, 0);
515 break;
516 case bts_branch:
517 bts_set(raw, bts_from, in->variant.lbr.from);
518 bts_set(raw, bts_to, in->variant.lbr.to);
519 bts_set(raw, bts_flags, 0);
520 break;
521 case bts_task_arrives:
522 case bts_task_departs:
523 bts_set(raw, bts_qual, (bts_escape | in->qualifier));
524 bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies);
525 bts_set(raw, bts_pid, in->variant.timestamp.pid);
ca0002a1 526 break;
c2724775
MM
527 default:
528 return -EINVAL;
ca0002a1 529 }
c2724775
MM
530
531 return ds_write(tracer->ds.context, ds_bts, raw,
532 ds_cfg.sizeof_rec[ds_bts]);
eee3af4a 533}
93fa7636
MM
534
535
c2724775
MM
536static void ds_write_config(struct ds_context *context,
537 struct ds_trace *cfg, enum ds_qualifier qual)
538{
539 unsigned char *ds = context->ds;
540
541 ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
542 ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
543 ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
544 ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
545}
546
547static void ds_read_config(struct ds_context *context,
548 struct ds_trace *cfg, enum ds_qualifier qual)
eee3af4a 549{
c2724775
MM
550 unsigned char *ds = context->ds;
551
552 cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
553 cfg->top = (void *)ds_get(ds, qual, ds_index);
554 cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
555 cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
556}
557
558static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
559 void *base, size_t size, size_t ith,
560 unsigned int flags) {
93fa7636 561 unsigned long buffer, adj;
ca0002a1
MM
562
563 /* adjust the buffer address and size to meet alignment
564 * constraints:
565 * - buffer is double-word aligned
566 * - size is multiple of record size
567 *
568 * We checked the size at the very beginning; we have enough
569 * space to do the adjustment.
570 */
571 buffer = (unsigned long)base;
572
573 adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
574 buffer += adj;
575 size -= adj;
576
c2724775
MM
577 trace->n = size / ds_cfg.sizeof_rec[qual];
578 trace->size = ds_cfg.sizeof_rec[qual];
ca0002a1 579
c2724775 580 size = (trace->n * trace->size);
ca0002a1 581
c2724775
MM
582 trace->begin = (void *)buffer;
583 trace->top = trace->begin;
584 trace->end = (void *)(buffer + size);
ca0002a1
MM
585 /* The value for 'no threshold' is -1, which will set the
586 * threshold outside of the buffer, just like we want it.
587 */
c2724775
MM
588 trace->ith = (void *)(buffer + size - ith);
589
590 trace->flags = flags;
ca0002a1
MM
591}
592
c2724775
MM
593
594static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
595 enum ds_qualifier qual, struct task_struct *task,
596 void *base, size_t size, size_t th, unsigned int flags)
ca0002a1
MM
597{
598 struct ds_context *context;
ca0002a1 599 int error;
93fa7636 600
6abb11ae
MM
601 error = -EINVAL;
602 if (!base)
603 goto out;
604
93fa7636 605 /* we require some space to do alignment adjustments below */
ca0002a1
MM
606 error = -EINVAL;
607 if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
608 goto out;
93fa7636 609
ca0002a1
MM
610 if (th != (size_t)-1) {
611 th *= ds_cfg.sizeof_rec[qual];
612
613 error = -EINVAL;
614 if (size <= th)
615 goto out;
616 }
617
ca0002a1
MM
618 tracer->buffer = base;
619 tracer->size = size;
93fa7636 620
ca0002a1
MM
621 error = -ENOMEM;
622 context = ds_get_context(task);
93fa7636 623 if (!context)
ca0002a1
MM
624 goto out;
625 tracer->context = context;
626
c2724775 627 ds_init_ds_trace(trace, qual, base, size, th, flags);
de90add3 628
c2724775 629 error = 0;
ca0002a1 630 out:
93fa7636 631 return error;
eee3af4a 632}
93fa7636 633
ca0002a1
MM
634struct bts_tracer *ds_request_bts(struct task_struct *task,
635 void *base, size_t size,
c2724775
MM
636 bts_ovfl_callback_t ovfl, size_t th,
637 unsigned int flags)
eee3af4a 638{
ca0002a1 639 struct bts_tracer *tracer;
c2724775 640 unsigned long irq;
ca0002a1 641 int error;
93fa7636 642
c2724775
MM
643 error = -EOPNOTSUPP;
644 if (!ds_cfg.ctl[dsf_bts])
645 goto out;
646
ca0002a1
MM
647 /* buffer overflow notification is not yet implemented */
648 error = -EOPNOTSUPP;
649 if (ovfl)
650 goto out;
651
652 error = -ENOMEM;
653 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
654 if (!tracer)
655 goto out;
656 tracer->ovfl = ovfl;
657
c2724775
MM
658 error = ds_request(&tracer->ds, &tracer->trace.ds,
659 ds_bts, task, base, size, th, flags);
ca0002a1
MM
660 if (error < 0)
661 goto out_tracer;
662
c2724775
MM
663
664 spin_lock_irqsave(&ds_lock, irq);
665
666 error = -EPERM;
667 if (!check_tracer(task))
668 goto out_unlock;
669 get_tracer(task);
670
671 error = -EPERM;
672 if (tracer->ds.context->bts_master)
673 goto out_put_tracer;
674 tracer->ds.context->bts_master = tracer;
675
676 spin_unlock_irqrestore(&ds_lock, irq);
677
678
679 tracer->trace.read = bts_read;
680 tracer->trace.write = bts_write;
681
682 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
683 ds_resume_bts(tracer);
684
ca0002a1
MM
685 return tracer;
686
c2724775
MM
687 out_put_tracer:
688 put_tracer(task);
689 out_unlock:
690 spin_unlock_irqrestore(&ds_lock, irq);
691 ds_put_context(tracer->ds.context);
ca0002a1 692 out_tracer:
6abb11ae 693 kfree(tracer);
ca0002a1
MM
694 out:
695 return ERR_PTR(error);
eee3af4a 696}
93fa7636 697
ca0002a1
MM
698struct pebs_tracer *ds_request_pebs(struct task_struct *task,
699 void *base, size_t size,
c2724775
MM
700 pebs_ovfl_callback_t ovfl, size_t th,
701 unsigned int flags)
eee3af4a 702{
ca0002a1 703 struct pebs_tracer *tracer;
c2724775 704 unsigned long irq;
93fa7636
MM
705 int error;
706
ca0002a1
MM
707 /* buffer overflow notification is not yet implemented */
708 error = -EOPNOTSUPP;
709 if (ovfl)
93fa7636
MM
710 goto out;
711
ca0002a1
MM
712 error = -ENOMEM;
713 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
714 if (!tracer)
715 goto out;
716 tracer->ovfl = ovfl;
93fa7636 717
c2724775
MM
718 error = ds_request(&tracer->ds, &tracer->trace.ds,
719 ds_pebs, task, base, size, th, flags);
ca0002a1
MM
720 if (error < 0)
721 goto out_tracer;
93fa7636 722
c2724775
MM
723 spin_lock_irqsave(&ds_lock, irq);
724
725 error = -EPERM;
726 if (!check_tracer(task))
727 goto out_unlock;
728 get_tracer(task);
729
730 error = -EPERM;
731 if (tracer->ds.context->pebs_master)
732 goto out_put_tracer;
733 tracer->ds.context->pebs_master = tracer;
734
735 spin_unlock_irqrestore(&ds_lock, irq);
736
737 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
738 ds_resume_pebs(tracer);
739
ca0002a1
MM
740 return tracer;
741
c2724775
MM
742 out_put_tracer:
743 put_tracer(task);
744 out_unlock:
745 spin_unlock_irqrestore(&ds_lock, irq);
746 ds_put_context(tracer->ds.context);
ca0002a1 747 out_tracer:
6abb11ae 748 kfree(tracer);
93fa7636 749 out:
ca0002a1
MM
750 return ERR_PTR(error);
751}
752
c2724775 753void ds_release_bts(struct bts_tracer *tracer)
eee3af4a 754{
ca0002a1 755 if (!tracer)
c2724775 756 return;
ca0002a1 757
c2724775 758 ds_suspend_bts(tracer);
ca0002a1 759
c2724775
MM
760 WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
761 tracer->ds.context->bts_master = NULL;
93fa7636 762
c2724775
MM
763 put_tracer(tracer->ds.context->task);
764 ds_put_context(tracer->ds.context);
ca0002a1 765
ca0002a1 766 kfree(tracer);
eee3af4a 767}
93fa7636 768
c2724775 769void ds_suspend_bts(struct bts_tracer *tracer)
eee3af4a 770{
c2724775 771 struct task_struct *task;
ca0002a1 772
ca0002a1 773 if (!tracer)
c2724775 774 return;
ca0002a1 775
c2724775 776 task = tracer->ds.context->task;
ca0002a1 777
c2724775
MM
778 if (!task || (task == current))
779 update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
ca0002a1 780
c2724775
MM
781 if (task) {
782 task->thread.debugctlmsr &= ~BTS_CONTROL;
eee3af4a 783
c2724775
MM
784 if (!task->thread.debugctlmsr)
785 clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
786 }
93fa7636 787}
eee3af4a 788
c2724775 789void ds_resume_bts(struct bts_tracer *tracer)
93fa7636 790{
c2724775
MM
791 struct task_struct *task;
792 unsigned long control;
eee3af4a 793
ca0002a1 794 if (!tracer)
c2724775 795 return;
eee3af4a 796
c2724775 797 task = tracer->ds.context->task;
ca0002a1 798
c2724775
MM
799 control = ds_cfg.ctl[dsf_bts];
800 if (!(tracer->trace.ds.flags & BTS_KERNEL))
801 control |= ds_cfg.ctl[dsf_bts_kernel];
802 if (!(tracer->trace.ds.flags & BTS_USER))
803 control |= ds_cfg.ctl[dsf_bts_user];
eee3af4a 804
c2724775
MM
805 if (task) {
806 task->thread.debugctlmsr |= control;
807 set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
808 }
ca0002a1 809
c2724775
MM
810 if (!task || (task == current))
811 update_debugctlmsr(get_debugctlmsr() | control);
eee3af4a
MM
812}
813
c2724775 814void ds_release_pebs(struct pebs_tracer *tracer)
eee3af4a 815{
ca0002a1 816 if (!tracer)
c2724775 817 return;
93fa7636 818
c2724775 819 ds_suspend_pebs(tracer);
93fa7636 820
c2724775
MM
821 WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
822 tracer->ds.context->pebs_master = NULL;
eee3af4a 823
c2724775
MM
824 put_tracer(tracer->ds.context->task);
825 ds_put_context(tracer->ds.context);
eee3af4a 826
c2724775 827 kfree(tracer);
a95d67f8
MM
828}
829
c2724775 830void ds_suspend_pebs(struct pebs_tracer *tracer)
a95d67f8 831{
a95d67f8 832
93fa7636 833}
eee3af4a 834
c2724775 835void ds_resume_pebs(struct pebs_tracer *tracer)
93fa7636 836{
eee3af4a 837
eee3af4a
MM
838}
839
c2724775 840const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
eee3af4a 841{
ca0002a1 842 if (!tracer)
c2724775 843 return NULL;
ca0002a1 844
c2724775
MM
845 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
846 return &tracer->trace;
93fa7636 847}
eee3af4a 848
c2724775 849const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
93fa7636 850{
ca0002a1 851 if (!tracer)
c2724775 852 return NULL;
ca0002a1 853
c2724775
MM
854 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
855 tracer->trace.reset_value =
856 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
ca0002a1 857
c2724775 858 return &tracer->trace;
93fa7636 859}
eee3af4a 860
c2724775 861int ds_reset_bts(struct bts_tracer *tracer)
93fa7636 862{
ca0002a1
MM
863 if (!tracer)
864 return -EINVAL;
865
c2724775 866 tracer->trace.ds.top = tracer->trace.ds.begin;
ca0002a1 867
c2724775
MM
868 ds_set(tracer->ds.context->ds, ds_bts, ds_index,
869 (unsigned long)tracer->trace.ds.top);
ca0002a1
MM
870
871 return 0;
93fa7636 872}
eee3af4a 873
c2724775 874int ds_reset_pebs(struct pebs_tracer *tracer)
93fa7636 875{
ca0002a1
MM
876 if (!tracer)
877 return -EINVAL;
eee3af4a 878
c2724775 879 tracer->trace.ds.top = tracer->trace.ds.begin;
eee3af4a 880
c2724775
MM
881 ds_set(tracer->ds.context->ds, ds_bts, ds_index,
882 (unsigned long)tracer->trace.ds.top);
93fa7636 883
ca0002a1 884 return 0;
eee3af4a
MM
885}
886
ca0002a1 887int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
eee3af4a 888{
ca0002a1
MM
889 if (!tracer)
890 return -EINVAL;
eee3af4a 891
ca0002a1 892 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
93fa7636 893
ca0002a1 894 return 0;
93fa7636
MM
895}
896
c2724775
MM
897static const struct ds_configuration ds_cfg_netburst = {
898 .name = "netburst",
899 .ctl[dsf_bts] = (1 << 2) | (1 << 3),
900 .ctl[dsf_bts_kernel] = (1 << 5),
901 .ctl[dsf_bts_user] = (1 << 6),
902
903 .sizeof_field = sizeof(long),
904 .sizeof_rec[ds_bts] = sizeof(long) * 3,
c4858ffc 905#ifdef __i386__
c2724775 906 .sizeof_rec[ds_pebs] = sizeof(long) * 10,
c4858ffc 907#else
c2724775 908 .sizeof_rec[ds_pebs] = sizeof(long) * 18,
c4858ffc 909#endif
eee3af4a 910};
c2724775
MM
911static const struct ds_configuration ds_cfg_pentium_m = {
912 .name = "pentium m",
913 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
914
915 .sizeof_field = sizeof(long),
916 .sizeof_rec[ds_bts] = sizeof(long) * 3,
c4858ffc 917#ifdef __i386__
c2724775 918 .sizeof_rec[ds_pebs] = sizeof(long) * 10,
c4858ffc 919#else
c2724775 920 .sizeof_rec[ds_pebs] = sizeof(long) * 18,
c4858ffc 921#endif
eee3af4a 922};
c2724775
MM
923static const struct ds_configuration ds_cfg_core2 = {
924 .name = "core 2",
925 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
926 .ctl[dsf_bts_kernel] = (1 << 9),
927 .ctl[dsf_bts_user] = (1 << 10),
928
929 .sizeof_field = 8,
930 .sizeof_rec[ds_bts] = 8 * 3,
931 .sizeof_rec[ds_pebs] = 8 * 18,
932};
eee3af4a 933
c2724775 934static void
eee3af4a
MM
935ds_configure(const struct ds_configuration *cfg)
936{
c2724775 937 memset(&ds_cfg, 0, sizeof(ds_cfg));
eee3af4a 938 ds_cfg = *cfg;
ca0002a1 939
c2724775
MM
940 printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name);
941
942 if (!cpu_has_bts) {
943 ds_cfg.ctl[dsf_bts] = 0;
944 printk(KERN_INFO "[ds] bts not available\n");
945 }
946 if (!cpu_has_pebs)
947 printk(KERN_INFO "[ds] pebs not available\n");
ca0002a1 948
c2724775 949 WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field));
eee3af4a
MM
950}
951
952void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
953{
954 switch (c->x86) {
955 case 0x6:
956 switch (c->x86_model) {
f4166c54
MM
957 case 0 ... 0xC:
958 /* sorry, don't know about them */
959 break;
eee3af4a
MM
960 case 0xD:
961 case 0xE: /* Pentium M */
c2724775 962 ds_configure(&ds_cfg_pentium_m);
eee3af4a 963 break;
f4166c54 964 default: /* Core2, Atom, ... */
c2724775 965 ds_configure(&ds_cfg_core2);
eee3af4a 966 break;
eee3af4a
MM
967 }
968 break;
969 case 0xF:
970 switch (c->x86_model) {
eee3af4a
MM
971 case 0x0:
972 case 0x1:
973 case 0x2: /* Netburst */
c2724775 974 ds_configure(&ds_cfg_netburst);
eee3af4a 975 break;
eee3af4a
MM
976 default:
977 /* sorry, don't know about them */
978 break;
979 }
980 break;
981 default:
982 /* sorry, don't know about them */
983 break;
984 }
985}
93fa7636 986
c2724775
MM
987/*
988 * Change the DS configuration from tracing prev to tracing next.
989 */
990void ds_switch_to(struct task_struct *prev, struct task_struct *next)
93fa7636 991{
c2724775
MM
992 struct ds_context *prev_ctx = prev->thread.ds_ctx;
993 struct ds_context *next_ctx = next->thread.ds_ctx;
994
995 if (prev_ctx) {
996 update_debugctlmsr(0);
997
998 if (prev_ctx->bts_master &&
999 (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
1000 struct bts_struct ts = {
1001 .qualifier = bts_task_departs,
1002 .variant.timestamp.jiffies = jiffies_64,
1003 .variant.timestamp.pid = prev->pid
1004 };
1005 bts_write(prev_ctx->bts_master, &ts);
1006 }
1007 }
1008
1009 if (next_ctx) {
1010 if (next_ctx->bts_master &&
1011 (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
1012 struct bts_struct ts = {
1013 .qualifier = bts_task_arrives,
1014 .variant.timestamp.jiffies = jiffies_64,
1015 .variant.timestamp.pid = next->pid
1016 };
1017 bts_write(next_ctx->bts_master, &ts);
1018 }
1019
1020 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
ca0002a1 1021 }
c2724775
MM
1022
1023 update_debugctlmsr(next->thread.debugctlmsr);
93fa7636 1024}