]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - arch/x86/kernel/cpu/perf_event_intel_lbr.c
perf/x86: Sync branch stack sampling with precise_sampling
[mirror_ubuntu-hirsute-kernel.git] / arch / x86 / kernel / cpu / perf_event_intel_lbr.c
CommitLineData
de0428a7
KW
1#include <linux/perf_event.h>
2#include <linux/types.h>
3
4#include <asm/perf_event.h>
5#include <asm/msr.h>
6
7#include "perf_event.h"
caff2bef
PZ
8
9enum {
10 LBR_FORMAT_32 = 0x00,
11 LBR_FORMAT_LIP = 0x01,
12 LBR_FORMAT_EIP = 0x02,
13 LBR_FORMAT_EIP_FLAGS = 0x03,
14};
15
16/*
17 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
18 * otherwise it becomes near impossible to get a reliable stack.
19 */
20
caff2bef
PZ
21static void __intel_pmu_lbr_enable(void)
22{
23 u64 debugctl;
24
25 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
7c5ecaf7 26 debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
caff2bef
PZ
27 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
28}
29
30static void __intel_pmu_lbr_disable(void)
31{
32 u64 debugctl;
33
34 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
7c5ecaf7 35 debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
caff2bef
PZ
36 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
37}
38
39static void intel_pmu_lbr_reset_32(void)
40{
41 int i;
42
43 for (i = 0; i < x86_pmu.lbr_nr; i++)
44 wrmsrl(x86_pmu.lbr_from + i, 0);
45}
46
47static void intel_pmu_lbr_reset_64(void)
48{
49 int i;
50
51 for (i = 0; i < x86_pmu.lbr_nr; i++) {
52 wrmsrl(x86_pmu.lbr_from + i, 0);
53 wrmsrl(x86_pmu.lbr_to + i, 0);
54 }
55}
56
de0428a7 57void intel_pmu_lbr_reset(void)
caff2bef 58{
74846d35
PZ
59 if (!x86_pmu.lbr_nr)
60 return;
61
8db909a7 62 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
caff2bef
PZ
63 intel_pmu_lbr_reset_32();
64 else
65 intel_pmu_lbr_reset_64();
66}
67
de0428a7 68void intel_pmu_lbr_enable(struct perf_event *event)
caff2bef
PZ
69{
70 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
71
72 if (!x86_pmu.lbr_nr)
73 return;
74
caff2bef 75 /*
b83a46e7
PZ
76 * Reset the LBR stack if we changed task context to
77 * avoid data leaks.
caff2bef
PZ
78 */
79
b83a46e7 80 if (event->ctx->task && cpuc->lbr_context != event->ctx) {
caff2bef
PZ
81 intel_pmu_lbr_reset();
82 cpuc->lbr_context = event->ctx;
83 }
84
85 cpuc->lbr_users++;
86}
87
de0428a7 88void intel_pmu_lbr_disable(struct perf_event *event)
caff2bef
PZ
89{
90 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
91
92 if (!x86_pmu.lbr_nr)
93 return;
94
95 cpuc->lbr_users--;
b83a46e7 96 WARN_ON_ONCE(cpuc->lbr_users < 0);
2df202bf
PZ
97
98 if (cpuc->enabled && !cpuc->lbr_users)
99 __intel_pmu_lbr_disable();
caff2bef
PZ
100}
101
de0428a7 102void intel_pmu_lbr_enable_all(void)
caff2bef
PZ
103{
104 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
105
106 if (cpuc->lbr_users)
107 __intel_pmu_lbr_enable();
108}
109
de0428a7 110void intel_pmu_lbr_disable_all(void)
caff2bef
PZ
111{
112 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
113
114 if (cpuc->lbr_users)
115 __intel_pmu_lbr_disable();
116}
117
118static inline u64 intel_pmu_lbr_tos(void)
119{
120 u64 tos;
121
122 rdmsrl(x86_pmu.lbr_tos, tos);
123
124 return tos;
125}
126
127static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
128{
129 unsigned long mask = x86_pmu.lbr_nr - 1;
130 u64 tos = intel_pmu_lbr_tos();
131 int i;
132
63fb3f9b 133 for (i = 0; i < x86_pmu.lbr_nr; i++) {
caff2bef
PZ
134 unsigned long lbr_idx = (tos - i) & mask;
135 union {
136 struct {
137 u32 from;
138 u32 to;
139 };
140 u64 lbr;
141 } msr_lastbranch;
142
143 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
144
bce38cd5
SE
145 cpuc->lbr_entries[i].from = msr_lastbranch.from;
146 cpuc->lbr_entries[i].to = msr_lastbranch.to;
147 cpuc->lbr_entries[i].mispred = 0;
148 cpuc->lbr_entries[i].predicted = 0;
149 cpuc->lbr_entries[i].reserved = 0;
caff2bef
PZ
150 }
151 cpuc->lbr_stack.nr = i;
152}
153
154#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
155
156/*
157 * Due to lack of segmentation in Linux the effective address (offset)
158 * is the same as the linear address, allowing us to merge the LIP and EIP
159 * LBR formats.
160 */
161static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
162{
163 unsigned long mask = x86_pmu.lbr_nr - 1;
8db909a7 164 int lbr_format = x86_pmu.intel_cap.lbr_format;
caff2bef
PZ
165 u64 tos = intel_pmu_lbr_tos();
166 int i;
167
63fb3f9b 168 for (i = 0; i < x86_pmu.lbr_nr; i++) {
caff2bef 169 unsigned long lbr_idx = (tos - i) & mask;
bce38cd5 170 u64 from, to, mis = 0, pred = 0;
caff2bef
PZ
171
172 rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
173 rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
174
8db909a7 175 if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
bce38cd5
SE
176 mis = !!(from & LBR_FROM_FLAG_MISPRED);
177 pred = !mis;
caff2bef
PZ
178 from = (u64)((((s64)from) << 1) >> 1);
179 }
180
bce38cd5
SE
181 cpuc->lbr_entries[i].from = from;
182 cpuc->lbr_entries[i].to = to;
183 cpuc->lbr_entries[i].mispred = mis;
184 cpuc->lbr_entries[i].predicted = pred;
185 cpuc->lbr_entries[i].reserved = 0;
caff2bef
PZ
186 }
187 cpuc->lbr_stack.nr = i;
188}
189
de0428a7 190void intel_pmu_lbr_read(void)
caff2bef
PZ
191{
192 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
193
194 if (!cpuc->lbr_users)
195 return;
196
8db909a7 197 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
caff2bef
PZ
198 intel_pmu_lbr_read_32(cpuc);
199 else
200 intel_pmu_lbr_read_64(cpuc);
201}
202
de0428a7 203void intel_pmu_lbr_init_core(void)
caff2bef 204{
caff2bef 205 x86_pmu.lbr_nr = 4;
225ce539
SE
206 x86_pmu.lbr_tos = MSR_LBR_TOS;
207 x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
208 x86_pmu.lbr_to = MSR_LBR_CORE_TO;
caff2bef
PZ
209}
210
de0428a7 211void intel_pmu_lbr_init_nhm(void)
caff2bef 212{
caff2bef 213 x86_pmu.lbr_nr = 16;
225ce539
SE
214 x86_pmu.lbr_tos = MSR_LBR_TOS;
215 x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
216 x86_pmu.lbr_to = MSR_LBR_NHM_TO;
caff2bef
PZ
217}
218
de0428a7 219void intel_pmu_lbr_init_atom(void)
caff2bef 220{
caff2bef 221 x86_pmu.lbr_nr = 8;
225ce539
SE
222 x86_pmu.lbr_tos = MSR_LBR_TOS;
223 x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
224 x86_pmu.lbr_to = MSR_LBR_CORE_TO;
caff2bef 225}