]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
e717bf4e VW |
2 | /* Driver for Intel Xeon Phi "Knights Corner" PMU */ |
3 | ||
4 | #include <linux/perf_event.h> | |
5 | #include <linux/types.h> | |
6 | ||
e4074b30 VW |
7 | #include <asm/hardirq.h> |
8 | ||
27f6d22b | 9 | #include "../perf_event.h" |
e717bf4e VW |
10 | |
11 | static const u64 knc_perfmon_event_map[] = | |
12 | { | |
13 | [PERF_COUNT_HW_CPU_CYCLES] = 0x002a, | |
14 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x0016, | |
15 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028, | |
16 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0029, | |
17 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012, | |
18 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x002b, | |
19 | }; | |
20 | ||
c391c788 | 21 | static const u64 __initconst knc_hw_cache_event_ids |
e717bf4e VW |
22 | [PERF_COUNT_HW_CACHE_MAX] |
23 | [PERF_COUNT_HW_CACHE_OP_MAX] | |
24 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | |
25 | { | |
26 | [ C(L1D) ] = { | |
27 | [ C(OP_READ) ] = { | |
28 | /* On Xeon Phi event "0" is a valid DATA_READ */ | |
29 | /* (L1 Data Cache Reads) Instruction. */ | |
30 | /* We code this as ARCH_PERFMON_EVENTSEL_INT as this */ | |
31 | /* bit will always be set in x86_pmu_hw_config(). */ | |
32 | [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, | |
33 | /* DATA_READ */ | |
34 | [ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */ | |
35 | }, | |
36 | [ C(OP_WRITE) ] = { | |
37 | [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ | |
38 | [ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */ | |
39 | }, | |
40 | [ C(OP_PREFETCH) ] = { | |
41 | [ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */ | |
42 | [ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */ | |
43 | }, | |
44 | }, | |
45 | [ C(L1I ) ] = { | |
46 | [ C(OP_READ) ] = { | |
47 | [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ | |
48 | [ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */ | |
49 | }, | |
50 | [ C(OP_WRITE) ] = { | |
51 | [ C(RESULT_ACCESS) ] = -1, | |
52 | [ C(RESULT_MISS) ] = -1, | |
53 | }, | |
54 | [ C(OP_PREFETCH) ] = { | |
55 | [ C(RESULT_ACCESS) ] = 0x0, | |
56 | [ C(RESULT_MISS) ] = 0x0, | |
57 | }, | |
58 | }, | |
59 | [ C(LL ) ] = { | |
60 | [ C(OP_READ) ] = { | |
61 | [ C(RESULT_ACCESS) ] = 0, | |
62 | [ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */ | |
63 | }, | |
64 | [ C(OP_WRITE) ] = { | |
65 | [ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */ | |
66 | [ C(RESULT_MISS) ] = 0, | |
67 | }, | |
68 | [ C(OP_PREFETCH) ] = { | |
69 | [ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */ | |
70 | [ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */ | |
71 | }, | |
72 | }, | |
73 | [ C(DTLB) ] = { | |
74 | [ C(OP_READ) ] = { | |
75 | [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, | |
76 | /* DATA_READ */ | |
77 | /* see note on L1 OP_READ */ | |
78 | [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ | |
79 | }, | |
80 | [ C(OP_WRITE) ] = { | |
81 | [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ | |
82 | [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ | |
83 | }, | |
84 | [ C(OP_PREFETCH) ] = { | |
85 | [ C(RESULT_ACCESS) ] = 0x0, | |
86 | [ C(RESULT_MISS) ] = 0x0, | |
87 | }, | |
88 | }, | |
89 | [ C(ITLB) ] = { | |
90 | [ C(OP_READ) ] = { | |
91 | [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ | |
92 | [ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */ | |
93 | }, | |
94 | [ C(OP_WRITE) ] = { | |
95 | [ C(RESULT_ACCESS) ] = -1, | |
96 | [ C(RESULT_MISS) ] = -1, | |
97 | }, | |
98 | [ C(OP_PREFETCH) ] = { | |
99 | [ C(RESULT_ACCESS) ] = -1, | |
100 | [ C(RESULT_MISS) ] = -1, | |
101 | }, | |
102 | }, | |
103 | [ C(BPU ) ] = { | |
104 | [ C(OP_READ) ] = { | |
105 | [ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */ | |
106 | [ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */ | |
107 | }, | |
108 | [ C(OP_WRITE) ] = { | |
109 | [ C(RESULT_ACCESS) ] = -1, | |
110 | [ C(RESULT_MISS) ] = -1, | |
111 | }, | |
112 | [ C(OP_PREFETCH) ] = { | |
113 | [ C(RESULT_ACCESS) ] = -1, | |
114 | [ C(RESULT_MISS) ] = -1, | |
115 | }, | |
116 | }, | |
117 | }; | |
118 | ||
119 | ||
120 | static u64 knc_pmu_event_map(int hw_event) | |
121 | { | |
122 | return knc_perfmon_event_map[hw_event]; | |
123 | } | |
124 | ||
125 | static struct event_constraint knc_event_constraints[] = | |
126 | { | |
127 | INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */ | |
128 | INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */ | |
129 | INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */ | |
130 | INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */ | |
131 | INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */ | |
132 | INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */ | |
133 | INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */ | |
134 | INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */ | |
135 | INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */ | |
136 | INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */ | |
137 | INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */ | |
138 | INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */ | |
139 | INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */ | |
140 | INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */ | |
141 | INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */ | |
142 | INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */ | |
143 | INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */ | |
144 | INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */ | |
145 | INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */ | |
146 | INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */ | |
147 | INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */ | |
148 | EVENT_CONSTRAINT_END | |
149 | }; | |
150 | ||
151 | #define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d | |
152 | #define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e | |
153 | #define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f | |
154 | ||
155 | #define KNC_ENABLE_COUNTER0 0x00000001 | |
156 | #define KNC_ENABLE_COUNTER1 0x00000002 | |
157 | ||
158 | static void knc_pmu_disable_all(void) | |
159 | { | |
160 | u64 val; | |
161 | ||
162 | rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | |
163 | val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); | |
164 | wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | |
165 | } | |
166 | ||
167 | static void knc_pmu_enable_all(int added) | |
168 | { | |
169 | u64 val; | |
170 | ||
171 | rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | |
172 | val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); | |
173 | wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | |
174 | } | |
175 | ||
176 | static inline void | |
177 | knc_pmu_disable_event(struct perf_event *event) | |
178 | { | |
e717bf4e VW |
179 | struct hw_perf_event *hwc = &event->hw; |
180 | u64 val; | |
181 | ||
182 | val = hwc->config; | |
7d011962 | 183 | val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; |
e717bf4e VW |
184 | |
185 | (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); | |
186 | } | |
187 | ||
188 | static void knc_pmu_enable_event(struct perf_event *event) | |
189 | { | |
e717bf4e VW |
190 | struct hw_perf_event *hwc = &event->hw; |
191 | u64 val; | |
192 | ||
193 | val = hwc->config; | |
7d011962 | 194 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; |
e717bf4e VW |
195 | |
196 | (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); | |
197 | } | |
198 | ||
e4074b30 VW |
199 | static inline u64 knc_pmu_get_status(void) |
200 | { | |
201 | u64 status; | |
202 | ||
203 | rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status); | |
204 | ||
205 | return status; | |
206 | } | |
207 | ||
208 | static inline void knc_pmu_ack_status(u64 ack) | |
209 | { | |
210 | wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack); | |
211 | } | |
212 | ||
213 | static int knc_pmu_handle_irq(struct pt_regs *regs) | |
214 | { | |
215 | struct perf_sample_data data; | |
216 | struct cpu_hw_events *cpuc; | |
217 | int handled = 0; | |
218 | int bit, loops; | |
219 | u64 status; | |
220 | ||
89cbc767 | 221 | cpuc = this_cpu_ptr(&cpu_hw_events); |
e4074b30 VW |
222 | |
223 | knc_pmu_disable_all(); | |
224 | ||
225 | status = knc_pmu_get_status(); | |
226 | if (!status) { | |
227 | knc_pmu_enable_all(0); | |
228 | return handled; | |
229 | } | |
230 | ||
231 | loops = 0; | |
232 | again: | |
233 | knc_pmu_ack_status(status); | |
234 | if (++loops > 100) { | |
235 | WARN_ONCE(1, "perf: irq loop stuck!\n"); | |
236 | perf_event_print_debug(); | |
237 | goto done; | |
238 | } | |
239 | ||
240 | inc_irq_stat(apic_perf_irqs); | |
241 | ||
242 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | |
243 | struct perf_event *event = cpuc->events[bit]; | |
244 | ||
245 | handled++; | |
246 | ||
247 | if (!test_bit(bit, cpuc->active_mask)) | |
248 | continue; | |
249 | ||
250 | if (!intel_pmu_save_and_restart(event)) | |
251 | continue; | |
252 | ||
253 | perf_sample_data_init(&data, 0, event->hw.last_period); | |
254 | ||
255 | if (perf_event_overflow(event, &data, regs)) | |
256 | x86_pmu_stop(event, 0); | |
257 | } | |
258 | ||
259 | /* | |
260 | * Repeat if there is more work to be done: | |
261 | */ | |
262 | status = knc_pmu_get_status(); | |
263 | if (status) | |
264 | goto again; | |
265 | ||
266 | done: | |
c3d266c8 KL |
267 | /* Only restore PMU state when it's active. See x86_pmu_disable(). */ |
268 | if (cpuc->enabled) | |
269 | knc_pmu_enable_all(0); | |
e4074b30 VW |
270 | |
271 | return handled; | |
272 | } | |
273 | ||
274 | ||
e717bf4e VW |
275 | PMU_FORMAT_ATTR(event, "config:0-7" ); |
276 | PMU_FORMAT_ATTR(umask, "config:8-15" ); | |
277 | PMU_FORMAT_ATTR(edge, "config:18" ); | |
278 | PMU_FORMAT_ATTR(inv, "config:23" ); | |
279 | PMU_FORMAT_ATTR(cmask, "config:24-31" ); | |
280 | ||
281 | static struct attribute *intel_knc_formats_attr[] = { | |
282 | &format_attr_event.attr, | |
283 | &format_attr_umask.attr, | |
284 | &format_attr_edge.attr, | |
285 | &format_attr_inv.attr, | |
286 | &format_attr_cmask.attr, | |
287 | NULL, | |
288 | }; | |
289 | ||
c391c788 | 290 | static const struct x86_pmu knc_pmu __initconst = { |
e717bf4e | 291 | .name = "knc", |
e4074b30 | 292 | .handle_irq = knc_pmu_handle_irq, |
e717bf4e VW |
293 | .disable_all = knc_pmu_disable_all, |
294 | .enable_all = knc_pmu_enable_all, | |
295 | .enable = knc_pmu_enable_event, | |
296 | .disable = knc_pmu_disable_event, | |
297 | .hw_config = x86_pmu_hw_config, | |
298 | .schedule_events = x86_schedule_events, | |
299 | .eventsel = MSR_KNC_EVNTSEL0, | |
300 | .perfctr = MSR_KNC_PERFCTR0, | |
301 | .event_map = knc_pmu_event_map, | |
302 | .max_events = ARRAY_SIZE(knc_perfmon_event_map), | |
303 | .apic = 1, | |
ae5ba47a | 304 | .max_period = (1ULL << 39) - 1, |
e717bf4e VW |
305 | .version = 0, |
306 | .num_counters = 2, | |
ae5ba47a VW |
307 | .cntval_bits = 40, |
308 | .cntval_mask = (1ULL << 40) - 1, | |
e717bf4e VW |
309 | .get_event_constraints = x86_get_event_constraints, |
310 | .event_constraints = knc_event_constraints, | |
311 | .format_attrs = intel_knc_formats_attr, | |
312 | }; | |
313 | ||
314 | __init int knc_pmu_init(void) | |
315 | { | |
316 | x86_pmu = knc_pmu; | |
317 | ||
318 | memcpy(hw_cache_event_ids, knc_hw_cache_event_ids, | |
319 | sizeof(hw_cache_event_ids)); | |
320 | ||
321 | return 0; | |
322 | } |