]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - arch/x86/kernel/ds.c
x86, bts: provide in-kernel branch-trace interface
[mirror_ubuntu-bionic-kernel.git] / arch / x86 / kernel / ds.c
1 /*
2 * Debug Store support
3 *
4 * This provides a low-level interface to the hardware's Debug Store
5 * feature that is used for branch trace store (BTS) and
6 * precise-event based sampling (PEBS).
7 *
8 * It manages:
9 * - DS and BTS hardware configuration
10 * - buffer overflow handling (to be done)
11 * - buffer access
12 *
13 * It does not do:
14 * - security checking (is the caller allowed to trace the task)
15 * - buffer allocation (memory accounting)
16 *
17 *
18 * Copyright (C) 2007-2008 Intel Corporation.
19 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
20 */
21
22
23 #include <asm/ds.h>
24
25 #include <linux/errno.h>
26 #include <linux/string.h>
27 #include <linux/slab.h>
28 #include <linux/sched.h>
29 #include <linux/mm.h>
30 #include <linux/kernel.h>
31
32
33 /*
34 * The configuration for a particular DS hardware implementation.
35 */
36 struct ds_configuration {
37 /* the name of the configuration */
38 const char *name;
39 /* the size of one pointer-typed field in the DS structure and
40 in the BTS and PEBS buffers in bytes;
41 this covers the first 8 DS fields related to buffer management. */
42 unsigned char sizeof_field;
43 /* the size of a BTS/PEBS record in bytes */
44 unsigned char sizeof_rec[2];
45 /* a series of bit-masks to control various features indexed
46 * by enum ds_feature */
47 unsigned long ctl[dsf_ctl_max];
48 };
49 static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
50
51 #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
52
53 #define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */
54 #define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */
55 #define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
56
57 #define BTS_CONTROL \
58 (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\
59 ds_cfg.ctl[dsf_bts_overflow])
60
61
62 /*
63 * A BTS or PEBS tracer.
64 *
65 * This holds the configuration of the tracer and serves as a handle
66 * to identify tracers.
67 */
68 struct ds_tracer {
69 /* the DS context (partially) owned by this tracer */
70 struct ds_context *context;
71 /* the buffer provided on ds_request() and its size in bytes */
72 void *buffer;
73 size_t size;
74 };
75
76 struct bts_tracer {
77 /* the common DS part */
78 struct ds_tracer ds;
79 /* the trace including the DS configuration */
80 struct bts_trace trace;
81 /* buffer overflow notification function */
82 bts_ovfl_callback_t ovfl;
83 };
84
85 struct pebs_tracer {
86 /* the common DS part */
87 struct ds_tracer ds;
88 /* the trace including the DS configuration */
89 struct pebs_trace trace;
90 /* buffer overflow notification function */
91 pebs_ovfl_callback_t ovfl;
92 };
93
94 /*
95 * Debug Store (DS) save area configuration (see Intel64 and IA32
96 * Architectures Software Developer's Manual, section 18.5)
97 *
98 * The DS configuration consists of the following fields; different
99 * architetures vary in the size of those fields.
100 * - double-word aligned base linear address of the BTS buffer
101 * - write pointer into the BTS buffer
102 * - end linear address of the BTS buffer (one byte beyond the end of
103 * the buffer)
104 * - interrupt pointer into BTS buffer
105 * (interrupt occurs when write pointer passes interrupt pointer)
106 * - double-word aligned base linear address of the PEBS buffer
107 * - write pointer into the PEBS buffer
108 * - end linear address of the PEBS buffer (one byte beyond the end of
109 * the buffer)
110 * - interrupt pointer into PEBS buffer
111 * (interrupt occurs when write pointer passes interrupt pointer)
112 * - value to which counter is reset following counter overflow
113 *
114 * Later architectures use 64bit pointers throughout, whereas earlier
115 * architectures use 32bit pointers in 32bit mode.
116 *
117 *
118 * We compute the base address for the first 8 fields based on:
119 * - the field size stored in the DS configuration
120 * - the relative field position
121 * - an offset giving the start of the respective region
122 *
123 * This offset is further used to index various arrays holding
124 * information for BTS and PEBS at the respective index.
125 *
126 * On later 32bit processors, we only access the lower 32bit of the
127 * 64bit pointer fields. The upper halves will be zeroed out.
128 */
129
130 enum ds_field {
131 ds_buffer_base = 0,
132 ds_index,
133 ds_absolute_maximum,
134 ds_interrupt_threshold,
135 };
136
137 enum ds_qualifier {
138 ds_bts = 0,
139 ds_pebs
140 };
141
142 static inline unsigned long ds_get(const unsigned char *base,
143 enum ds_qualifier qual, enum ds_field field)
144 {
145 base += (ds_cfg.sizeof_field * (field + (4 * qual)));
146 return *(unsigned long *)base;
147 }
148
149 static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
150 enum ds_field field, unsigned long value)
151 {
152 base += (ds_cfg.sizeof_field * (field + (4 * qual)));
153 (*(unsigned long *)base) = value;
154 }
155
156
157 /*
158 * Locking is done only for allocating BTS or PEBS resources.
159 */
160 static DEFINE_SPINLOCK(ds_lock);
161
162
163 /*
164 * We either support (system-wide) per-cpu or per-thread allocation.
165 * We distinguish the two based on the task_struct pointer, where a
166 * NULL pointer indicates per-cpu allocation for the current cpu.
167 *
168 * Allocations are use-counted. As soon as resources are allocated,
169 * further allocations must be of the same type (per-cpu or
170 * per-thread). We model this by counting allocations (i.e. the number
171 * of tracers of a certain type) for one type negatively:
172 * =0 no tracers
173 * >0 number of per-thread tracers
174 * <0 number of per-cpu tracers
175 *
176 * Tracers essentially gives the number of ds contexts for a certain
177 * type of allocation.
178 */
179 static atomic_t tracers = ATOMIC_INIT(0);
180
181 static inline void get_tracer(struct task_struct *task)
182 {
183 if (task)
184 atomic_inc(&tracers);
185 else
186 atomic_dec(&tracers);
187 }
188
189 static inline void put_tracer(struct task_struct *task)
190 {
191 if (task)
192 atomic_dec(&tracers);
193 else
194 atomic_inc(&tracers);
195 }
196
197 static inline int check_tracer(struct task_struct *task)
198 {
199 return task ?
200 (atomic_read(&tracers) >= 0) :
201 (atomic_read(&tracers) <= 0);
202 }
203
204
205 /*
206 * The DS context is either attached to a thread or to a cpu:
207 * - in the former case, the thread_struct contains a pointer to the
208 * attached context.
209 * - in the latter case, we use a static array of per-cpu context
210 * pointers.
211 *
212 * Contexts are use-counted. They are allocated on first access and
213 * deallocated when the last user puts the context.
214 */
215 struct ds_context {
216 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
217 unsigned char ds[MAX_SIZEOF_DS];
218 /* the owner of the BTS and PEBS configuration, respectively */
219 struct bts_tracer *bts_master;
220 struct pebs_tracer *pebs_master;
221 /* use count */
222 unsigned long count;
223 /* a pointer to the context location inside the thread_struct
224 * or the per_cpu context array */
225 struct ds_context **this;
226 /* a pointer to the task owning this context, or NULL, if the
227 * context is owned by a cpu */
228 struct task_struct *task;
229 };
230
231 static DEFINE_PER_CPU(struct ds_context *, system_context_array);
232
233 #define system_context per_cpu(system_context_array, smp_processor_id())
234
235 static inline struct ds_context *ds_get_context(struct task_struct *task)
236 {
237 struct ds_context **p_context =
238 (task ? &task->thread.ds_ctx : &system_context);
239 struct ds_context *context = *p_context;
240 unsigned long irq;
241
242 if (!context) {
243 context = kzalloc(sizeof(*context), GFP_KERNEL);
244 if (!context)
245 return NULL;
246
247 spin_lock_irqsave(&ds_lock, irq);
248
249 if (*p_context) {
250 kfree(context);
251
252 context = *p_context;
253 } else {
254 *p_context = context;
255
256 context->this = p_context;
257 context->task = task;
258
259 if (task)
260 set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
261
262 if (!task || (task == current))
263 wrmsrl(MSR_IA32_DS_AREA,
264 (unsigned long)context->ds);
265 }
266
267 context->count++;
268
269 spin_unlock_irqrestore(&ds_lock, irq);
270 } else {
271 spin_lock_irqsave(&ds_lock, irq);
272
273 context = *p_context;
274 if (context)
275 context->count++;
276
277 spin_unlock_irqrestore(&ds_lock, irq);
278
279 if (!context)
280 context = ds_get_context(task);
281 }
282
283 return context;
284 }
285
286 static inline void ds_put_context(struct ds_context *context)
287 {
288 unsigned long irq;
289
290 if (!context)
291 return;
292
293 spin_lock_irqsave(&ds_lock, irq);
294
295 if (--context->count) {
296 spin_unlock_irqrestore(&ds_lock, irq);
297 return;
298 }
299
300 *(context->this) = NULL;
301
302 if (context->task)
303 clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
304
305 if (!context->task || (context->task == current))
306 wrmsrl(MSR_IA32_DS_AREA, 0);
307
308 spin_unlock_irqrestore(&ds_lock, irq);
309
310 kfree(context);
311 }
312
313
314 /*
315 * Call the tracer's callback on a buffer overflow.
316 *
317 * context: the ds context
318 * qual: the buffer type
319 */
320 static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
321 {
322 switch (qual) {
323 case ds_bts:
324 if (context->bts_master &&
325 context->bts_master->ovfl)
326 context->bts_master->ovfl(context->bts_master);
327 break;
328 case ds_pebs:
329 if (context->pebs_master &&
330 context->pebs_master->ovfl)
331 context->pebs_master->ovfl(context->pebs_master);
332 break;
333 }
334 }
335
336
337 /*
338 * Write raw data into the BTS or PEBS buffer.
339 *
340 * The remainder of any partially written record is zeroed out.
341 *
342 * context: the DS context
343 * qual: the buffer type
344 * record: the data to write
345 * size: the size of the data
346 */
347 static int ds_write(struct ds_context *context, enum ds_qualifier qual,
348 const void *record, size_t size)
349 {
350 int bytes_written = 0;
351
352 if (!record)
353 return -EINVAL;
354
355 while (size) {
356 unsigned long base, index, end, write_end, int_th;
357 unsigned long write_size, adj_write_size;
358
359 /*
360 * write as much as possible without producing an
361 * overflow interrupt.
362 *
363 * interrupt_threshold must either be
364 * - bigger than absolute_maximum or
365 * - point to a record between buffer_base and absolute_maximum
366 *
367 * index points to a valid record.
368 */
369 base = ds_get(context->ds, qual, ds_buffer_base);
370 index = ds_get(context->ds, qual, ds_index);
371 end = ds_get(context->ds, qual, ds_absolute_maximum);
372 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
373
374 write_end = min(end, int_th);
375
376 /* if we are already beyond the interrupt threshold,
377 * we fill the entire buffer */
378 if (write_end <= index)
379 write_end = end;
380
381 if (write_end <= index)
382 break;
383
384 write_size = min((unsigned long) size, write_end - index);
385 memcpy((void *)index, record, write_size);
386
387 record = (const char *)record + write_size;
388 size -= write_size;
389 bytes_written += write_size;
390
391 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
392 adj_write_size *= ds_cfg.sizeof_rec[qual];
393
394 /* zero out trailing bytes */
395 memset((char *)index + write_size, 0,
396 adj_write_size - write_size);
397 index += adj_write_size;
398
399 if (index >= end)
400 index = base;
401 ds_set(context->ds, qual, ds_index, index);
402
403 if (index >= int_th)
404 ds_overflow(context, qual);
405 }
406
407 return bytes_written;
408 }
409
410
411 /*
412 * Branch Trace Store (BTS) uses the following format. Different
413 * architectures vary in the size of those fields.
414 * - source linear address
415 * - destination linear address
416 * - flags
417 *
418 * Later architectures use 64bit pointers throughout, whereas earlier
419 * architectures use 32bit pointers in 32bit mode.
420 *
421 * We compute the base address for the first 8 fields based on:
422 * - the field size stored in the DS configuration
423 * - the relative field position
424 *
425 * In order to store additional information in the BTS buffer, we use
426 * a special source address to indicate that the record requires
427 * special interpretation.
428 *
429 * Netburst indicated via a bit in the flags field whether the branch
430 * was predicted; this is ignored.
431 *
432 * We use two levels of abstraction:
433 * - the raw data level defined here
434 * - an arch-independent level defined in ds.h
435 */
436
437 enum bts_field {
438 bts_from,
439 bts_to,
440 bts_flags,
441
442 bts_qual = bts_from,
443 bts_jiffies = bts_to,
444 bts_pid = bts_flags,
445
446 bts_qual_mask = (bts_qual_max - 1),
447 bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
448 };
449
450 static inline unsigned long bts_get(const char *base, enum bts_field field)
451 {
452 base += (ds_cfg.sizeof_field * field);
453 return *(unsigned long *)base;
454 }
455
456 static inline void bts_set(char *base, enum bts_field field, unsigned long val)
457 {
458 base += (ds_cfg.sizeof_field * field);;
459 (*(unsigned long *)base) = val;
460 }
461
462
463 /*
464 * The raw BTS data is architecture dependent.
465 *
466 * For higher-level users, we give an arch-independent view.
467 * - ds.h defines struct bts_struct
468 * - bts_read translates one raw bts record into a bts_struct
469 * - bts_write translates one bts_struct into the raw format and
470 * writes it into the top of the parameter tracer's buffer.
471 *
472 * return: bytes read/written on success; -Eerrno, otherwise
473 */
474 static int bts_read(struct bts_tracer *tracer, const void *at,
475 struct bts_struct *out)
476 {
477 if (!tracer)
478 return -EINVAL;
479
480 if (at < tracer->trace.ds.begin)
481 return -EINVAL;
482
483 if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
484 return -EINVAL;
485
486 memset(out, 0, sizeof(*out));
487 if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
488 out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
489 out->variant.timestamp.jiffies = bts_get(at, bts_jiffies);
490 out->variant.timestamp.pid = bts_get(at, bts_pid);
491 } else {
492 out->qualifier = bts_branch;
493 out->variant.lbr.from = bts_get(at, bts_from);
494 out->variant.lbr.to = bts_get(at, bts_to);
495 }
496
497 return ds_cfg.sizeof_rec[ds_bts];
498 }
499
500 static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
501 {
502 unsigned char raw[MAX_SIZEOF_BTS];
503
504 if (!tracer)
505 return -EINVAL;
506
507 if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
508 return -EOVERFLOW;
509
510 switch (in->qualifier) {
511 case bts_invalid:
512 bts_set(raw, bts_from, 0);
513 bts_set(raw, bts_to, 0);
514 bts_set(raw, bts_flags, 0);
515 break;
516 case bts_branch:
517 bts_set(raw, bts_from, in->variant.lbr.from);
518 bts_set(raw, bts_to, in->variant.lbr.to);
519 bts_set(raw, bts_flags, 0);
520 break;
521 case bts_task_arrives:
522 case bts_task_departs:
523 bts_set(raw, bts_qual, (bts_escape | in->qualifier));
524 bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies);
525 bts_set(raw, bts_pid, in->variant.timestamp.pid);
526 break;
527 default:
528 return -EINVAL;
529 }
530
531 return ds_write(tracer->ds.context, ds_bts, raw,
532 ds_cfg.sizeof_rec[ds_bts]);
533 }
534
535
536 static void ds_write_config(struct ds_context *context,
537 struct ds_trace *cfg, enum ds_qualifier qual)
538 {
539 unsigned char *ds = context->ds;
540
541 ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
542 ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
543 ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
544 ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
545 }
546
547 static void ds_read_config(struct ds_context *context,
548 struct ds_trace *cfg, enum ds_qualifier qual)
549 {
550 unsigned char *ds = context->ds;
551
552 cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
553 cfg->top = (void *)ds_get(ds, qual, ds_index);
554 cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
555 cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
556 }
557
558 static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
559 void *base, size_t size, size_t ith,
560 unsigned int flags) {
561 unsigned long buffer, adj;
562
563 /* adjust the buffer address and size to meet alignment
564 * constraints:
565 * - buffer is double-word aligned
566 * - size is multiple of record size
567 *
568 * We checked the size at the very beginning; we have enough
569 * space to do the adjustment.
570 */
571 buffer = (unsigned long)base;
572
573 adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
574 buffer += adj;
575 size -= adj;
576
577 trace->n = size / ds_cfg.sizeof_rec[qual];
578 trace->size = ds_cfg.sizeof_rec[qual];
579
580 size = (trace->n * trace->size);
581
582 trace->begin = (void *)buffer;
583 trace->top = trace->begin;
584 trace->end = (void *)(buffer + size);
585 /* The value for 'no threshold' is -1, which will set the
586 * threshold outside of the buffer, just like we want it.
587 */
588 trace->ith = (void *)(buffer + size - ith);
589
590 trace->flags = flags;
591 }
592
593
594 static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
595 enum ds_qualifier qual, struct task_struct *task,
596 void *base, size_t size, size_t th, unsigned int flags)
597 {
598 struct ds_context *context;
599 int error;
600
601 error = -EINVAL;
602 if (!base)
603 goto out;
604
605 /* we require some space to do alignment adjustments below */
606 error = -EINVAL;
607 if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
608 goto out;
609
610 if (th != (size_t)-1) {
611 th *= ds_cfg.sizeof_rec[qual];
612
613 error = -EINVAL;
614 if (size <= th)
615 goto out;
616 }
617
618 tracer->buffer = base;
619 tracer->size = size;
620
621 error = -ENOMEM;
622 context = ds_get_context(task);
623 if (!context)
624 goto out;
625 tracer->context = context;
626
627 ds_init_ds_trace(trace, qual, base, size, th, flags);
628
629 error = 0;
630 out:
631 return error;
632 }
633
634 struct bts_tracer *ds_request_bts(struct task_struct *task,
635 void *base, size_t size,
636 bts_ovfl_callback_t ovfl, size_t th,
637 unsigned int flags)
638 {
639 struct bts_tracer *tracer;
640 unsigned long irq;
641 int error;
642
643 error = -EOPNOTSUPP;
644 if (!ds_cfg.ctl[dsf_bts])
645 goto out;
646
647 /* buffer overflow notification is not yet implemented */
648 error = -EOPNOTSUPP;
649 if (ovfl)
650 goto out;
651
652 error = -ENOMEM;
653 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
654 if (!tracer)
655 goto out;
656 tracer->ovfl = ovfl;
657
658 error = ds_request(&tracer->ds, &tracer->trace.ds,
659 ds_bts, task, base, size, th, flags);
660 if (error < 0)
661 goto out_tracer;
662
663
664 spin_lock_irqsave(&ds_lock, irq);
665
666 error = -EPERM;
667 if (!check_tracer(task))
668 goto out_unlock;
669 get_tracer(task);
670
671 error = -EPERM;
672 if (tracer->ds.context->bts_master)
673 goto out_put_tracer;
674 tracer->ds.context->bts_master = tracer;
675
676 spin_unlock_irqrestore(&ds_lock, irq);
677
678
679 tracer->trace.read = bts_read;
680 tracer->trace.write = bts_write;
681
682 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
683 ds_resume_bts(tracer);
684
685 return tracer;
686
687 out_put_tracer:
688 put_tracer(task);
689 out_unlock:
690 spin_unlock_irqrestore(&ds_lock, irq);
691 ds_put_context(tracer->ds.context);
692 out_tracer:
693 kfree(tracer);
694 out:
695 return ERR_PTR(error);
696 }
697
698 struct pebs_tracer *ds_request_pebs(struct task_struct *task,
699 void *base, size_t size,
700 pebs_ovfl_callback_t ovfl, size_t th,
701 unsigned int flags)
702 {
703 struct pebs_tracer *tracer;
704 unsigned long irq;
705 int error;
706
707 /* buffer overflow notification is not yet implemented */
708 error = -EOPNOTSUPP;
709 if (ovfl)
710 goto out;
711
712 error = -ENOMEM;
713 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
714 if (!tracer)
715 goto out;
716 tracer->ovfl = ovfl;
717
718 error = ds_request(&tracer->ds, &tracer->trace.ds,
719 ds_pebs, task, base, size, th, flags);
720 if (error < 0)
721 goto out_tracer;
722
723 spin_lock_irqsave(&ds_lock, irq);
724
725 error = -EPERM;
726 if (!check_tracer(task))
727 goto out_unlock;
728 get_tracer(task);
729
730 error = -EPERM;
731 if (tracer->ds.context->pebs_master)
732 goto out_put_tracer;
733 tracer->ds.context->pebs_master = tracer;
734
735 spin_unlock_irqrestore(&ds_lock, irq);
736
737 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
738 ds_resume_pebs(tracer);
739
740 return tracer;
741
742 out_put_tracer:
743 put_tracer(task);
744 out_unlock:
745 spin_unlock_irqrestore(&ds_lock, irq);
746 ds_put_context(tracer->ds.context);
747 out_tracer:
748 kfree(tracer);
749 out:
750 return ERR_PTR(error);
751 }
752
753 void ds_release_bts(struct bts_tracer *tracer)
754 {
755 if (!tracer)
756 return;
757
758 ds_suspend_bts(tracer);
759
760 WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
761 tracer->ds.context->bts_master = NULL;
762
763 put_tracer(tracer->ds.context->task);
764 ds_put_context(tracer->ds.context);
765
766 kfree(tracer);
767 }
768
769 void ds_suspend_bts(struct bts_tracer *tracer)
770 {
771 struct task_struct *task;
772
773 if (!tracer)
774 return;
775
776 task = tracer->ds.context->task;
777
778 if (!task || (task == current))
779 update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
780
781 if (task) {
782 task->thread.debugctlmsr &= ~BTS_CONTROL;
783
784 if (!task->thread.debugctlmsr)
785 clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
786 }
787 }
788
789 void ds_resume_bts(struct bts_tracer *tracer)
790 {
791 struct task_struct *task;
792 unsigned long control;
793
794 if (!tracer)
795 return;
796
797 task = tracer->ds.context->task;
798
799 control = ds_cfg.ctl[dsf_bts];
800 if (!(tracer->trace.ds.flags & BTS_KERNEL))
801 control |= ds_cfg.ctl[dsf_bts_kernel];
802 if (!(tracer->trace.ds.flags & BTS_USER))
803 control |= ds_cfg.ctl[dsf_bts_user];
804
805 if (task) {
806 task->thread.debugctlmsr |= control;
807 set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
808 }
809
810 if (!task || (task == current))
811 update_debugctlmsr(get_debugctlmsr() | control);
812 }
813
814 void ds_release_pebs(struct pebs_tracer *tracer)
815 {
816 if (!tracer)
817 return;
818
819 ds_suspend_pebs(tracer);
820
821 WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
822 tracer->ds.context->pebs_master = NULL;
823
824 put_tracer(tracer->ds.context->task);
825 ds_put_context(tracer->ds.context);
826
827 kfree(tracer);
828 }
829
830 void ds_suspend_pebs(struct pebs_tracer *tracer)
831 {
832
833 }
834
835 void ds_resume_pebs(struct pebs_tracer *tracer)
836 {
837
838 }
839
840 const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
841 {
842 if (!tracer)
843 return NULL;
844
845 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
846 return &tracer->trace;
847 }
848
849 const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
850 {
851 if (!tracer)
852 return NULL;
853
854 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
855 tracer->trace.reset_value =
856 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
857
858 return &tracer->trace;
859 }
860
861 int ds_reset_bts(struct bts_tracer *tracer)
862 {
863 if (!tracer)
864 return -EINVAL;
865
866 tracer->trace.ds.top = tracer->trace.ds.begin;
867
868 ds_set(tracer->ds.context->ds, ds_bts, ds_index,
869 (unsigned long)tracer->trace.ds.top);
870
871 return 0;
872 }
873
874 int ds_reset_pebs(struct pebs_tracer *tracer)
875 {
876 if (!tracer)
877 return -EINVAL;
878
879 tracer->trace.ds.top = tracer->trace.ds.begin;
880
881 ds_set(tracer->ds.context->ds, ds_bts, ds_index,
882 (unsigned long)tracer->trace.ds.top);
883
884 return 0;
885 }
886
887 int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
888 {
889 if (!tracer)
890 return -EINVAL;
891
892 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
893
894 return 0;
895 }
896
897 static const struct ds_configuration ds_cfg_netburst = {
898 .name = "netburst",
899 .ctl[dsf_bts] = (1 << 2) | (1 << 3),
900 .ctl[dsf_bts_kernel] = (1 << 5),
901 .ctl[dsf_bts_user] = (1 << 6),
902
903 .sizeof_field = sizeof(long),
904 .sizeof_rec[ds_bts] = sizeof(long) * 3,
905 #ifdef __i386__
906 .sizeof_rec[ds_pebs] = sizeof(long) * 10,
907 #else
908 .sizeof_rec[ds_pebs] = sizeof(long) * 18,
909 #endif
910 };
911 static const struct ds_configuration ds_cfg_pentium_m = {
912 .name = "pentium m",
913 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
914
915 .sizeof_field = sizeof(long),
916 .sizeof_rec[ds_bts] = sizeof(long) * 3,
917 #ifdef __i386__
918 .sizeof_rec[ds_pebs] = sizeof(long) * 10,
919 #else
920 .sizeof_rec[ds_pebs] = sizeof(long) * 18,
921 #endif
922 };
923 static const struct ds_configuration ds_cfg_core2 = {
924 .name = "core 2",
925 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
926 .ctl[dsf_bts_kernel] = (1 << 9),
927 .ctl[dsf_bts_user] = (1 << 10),
928
929 .sizeof_field = 8,
930 .sizeof_rec[ds_bts] = 8 * 3,
931 .sizeof_rec[ds_pebs] = 8 * 18,
932 };
933
934 static void
935 ds_configure(const struct ds_configuration *cfg)
936 {
937 memset(&ds_cfg, 0, sizeof(ds_cfg));
938 ds_cfg = *cfg;
939
940 printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name);
941
942 if (!cpu_has_bts) {
943 ds_cfg.ctl[dsf_bts] = 0;
944 printk(KERN_INFO "[ds] bts not available\n");
945 }
946 if (!cpu_has_pebs)
947 printk(KERN_INFO "[ds] pebs not available\n");
948
949 WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field));
950 }
951
952 void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
953 {
954 switch (c->x86) {
955 case 0x6:
956 switch (c->x86_model) {
957 case 0 ... 0xC:
958 /* sorry, don't know about them */
959 break;
960 case 0xD:
961 case 0xE: /* Pentium M */
962 ds_configure(&ds_cfg_pentium_m);
963 break;
964 default: /* Core2, Atom, ... */
965 ds_configure(&ds_cfg_core2);
966 break;
967 }
968 break;
969 case 0xF:
970 switch (c->x86_model) {
971 case 0x0:
972 case 0x1:
973 case 0x2: /* Netburst */
974 ds_configure(&ds_cfg_netburst);
975 break;
976 default:
977 /* sorry, don't know about them */
978 break;
979 }
980 break;
981 default:
982 /* sorry, don't know about them */
983 break;
984 }
985 }
986
987 /*
988 * Change the DS configuration from tracing prev to tracing next.
989 */
990 void ds_switch_to(struct task_struct *prev, struct task_struct *next)
991 {
992 struct ds_context *prev_ctx = prev->thread.ds_ctx;
993 struct ds_context *next_ctx = next->thread.ds_ctx;
994
995 if (prev_ctx) {
996 update_debugctlmsr(0);
997
998 if (prev_ctx->bts_master &&
999 (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
1000 struct bts_struct ts = {
1001 .qualifier = bts_task_departs,
1002 .variant.timestamp.jiffies = jiffies_64,
1003 .variant.timestamp.pid = prev->pid
1004 };
1005 bts_write(prev_ctx->bts_master, &ts);
1006 }
1007 }
1008
1009 if (next_ctx) {
1010 if (next_ctx->bts_master &&
1011 (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
1012 struct bts_struct ts = {
1013 .qualifier = bts_task_arrives,
1014 .variant.timestamp.jiffies = jiffies_64,
1015 .variant.timestamp.pid = next->pid
1016 };
1017 bts_write(next_ctx->bts_master, &ts);
1018 }
1019
1020 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
1021 }
1022
1023 update_debugctlmsr(next->thread.debugctlmsr);
1024 }