]>
Commit | Line | Data |
---|---|---|
e717bf4e VW |
1 | /* Driver for Intel Xeon Phi "Knights Corner" PMU */ |
2 | ||
3 | #include <linux/perf_event.h> | |
4 | #include <linux/types.h> | |
5 | ||
e4074b30 VW |
6 | #include <asm/hardirq.h> |
7 | ||
e717bf4e VW |
8 | #include "perf_event.h" |
9 | ||
10 | static const u64 knc_perfmon_event_map[] = | |
11 | { | |
12 | [PERF_COUNT_HW_CPU_CYCLES] = 0x002a, | |
13 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x0016, | |
14 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028, | |
15 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0029, | |
16 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012, | |
17 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x002b, | |
18 | }; | |
19 | ||
c391c788 | 20 | static const u64 __initconst knc_hw_cache_event_ids |
e717bf4e VW |
21 | [PERF_COUNT_HW_CACHE_MAX] |
22 | [PERF_COUNT_HW_CACHE_OP_MAX] | |
23 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | |
24 | { | |
25 | [ C(L1D) ] = { | |
26 | [ C(OP_READ) ] = { | |
27 | /* On Xeon Phi event "0" is a valid DATA_READ */ | |
28 | /* (L1 Data Cache Reads) Instruction. */ | |
29 | /* We code this as ARCH_PERFMON_EVENTSEL_INT as this */ | |
30 | /* bit will always be set in x86_pmu_hw_config(). */ | |
31 | [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, | |
32 | /* DATA_READ */ | |
33 | [ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */ | |
34 | }, | |
35 | [ C(OP_WRITE) ] = { | |
36 | [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ | |
37 | [ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */ | |
38 | }, | |
39 | [ C(OP_PREFETCH) ] = { | |
40 | [ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */ | |
41 | [ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */ | |
42 | }, | |
43 | }, | |
44 | [ C(L1I ) ] = { | |
45 | [ C(OP_READ) ] = { | |
46 | [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ | |
47 | [ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */ | |
48 | }, | |
49 | [ C(OP_WRITE) ] = { | |
50 | [ C(RESULT_ACCESS) ] = -1, | |
51 | [ C(RESULT_MISS) ] = -1, | |
52 | }, | |
53 | [ C(OP_PREFETCH) ] = { | |
54 | [ C(RESULT_ACCESS) ] = 0x0, | |
55 | [ C(RESULT_MISS) ] = 0x0, | |
56 | }, | |
57 | }, | |
58 | [ C(LL ) ] = { | |
59 | [ C(OP_READ) ] = { | |
60 | [ C(RESULT_ACCESS) ] = 0, | |
61 | [ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */ | |
62 | }, | |
63 | [ C(OP_WRITE) ] = { | |
64 | [ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */ | |
65 | [ C(RESULT_MISS) ] = 0, | |
66 | }, | |
67 | [ C(OP_PREFETCH) ] = { | |
68 | [ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */ | |
69 | [ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */ | |
70 | }, | |
71 | }, | |
72 | [ C(DTLB) ] = { | |
73 | [ C(OP_READ) ] = { | |
74 | [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, | |
75 | /* DATA_READ */ | |
76 | /* see note on L1 OP_READ */ | |
77 | [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ | |
78 | }, | |
79 | [ C(OP_WRITE) ] = { | |
80 | [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ | |
81 | [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ | |
82 | }, | |
83 | [ C(OP_PREFETCH) ] = { | |
84 | [ C(RESULT_ACCESS) ] = 0x0, | |
85 | [ C(RESULT_MISS) ] = 0x0, | |
86 | }, | |
87 | }, | |
88 | [ C(ITLB) ] = { | |
89 | [ C(OP_READ) ] = { | |
90 | [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ | |
91 | [ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */ | |
92 | }, | |
93 | [ C(OP_WRITE) ] = { | |
94 | [ C(RESULT_ACCESS) ] = -1, | |
95 | [ C(RESULT_MISS) ] = -1, | |
96 | }, | |
97 | [ C(OP_PREFETCH) ] = { | |
98 | [ C(RESULT_ACCESS) ] = -1, | |
99 | [ C(RESULT_MISS) ] = -1, | |
100 | }, | |
101 | }, | |
102 | [ C(BPU ) ] = { | |
103 | [ C(OP_READ) ] = { | |
104 | [ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */ | |
105 | [ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */ | |
106 | }, | |
107 | [ C(OP_WRITE) ] = { | |
108 | [ C(RESULT_ACCESS) ] = -1, | |
109 | [ C(RESULT_MISS) ] = -1, | |
110 | }, | |
111 | [ C(OP_PREFETCH) ] = { | |
112 | [ C(RESULT_ACCESS) ] = -1, | |
113 | [ C(RESULT_MISS) ] = -1, | |
114 | }, | |
115 | }, | |
116 | }; | |
117 | ||
118 | ||
119 | static u64 knc_pmu_event_map(int hw_event) | |
120 | { | |
121 | return knc_perfmon_event_map[hw_event]; | |
122 | } | |
123 | ||
124 | static struct event_constraint knc_event_constraints[] = | |
125 | { | |
126 | INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */ | |
127 | INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */ | |
128 | INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */ | |
129 | INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */ | |
130 | INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */ | |
131 | INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */ | |
132 | INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */ | |
133 | INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */ | |
134 | INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */ | |
135 | INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */ | |
136 | INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */ | |
137 | INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */ | |
138 | INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */ | |
139 | INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */ | |
140 | INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */ | |
141 | INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */ | |
142 | INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */ | |
143 | INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */ | |
144 | INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */ | |
145 | INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */ | |
146 | INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */ | |
147 | EVENT_CONSTRAINT_END | |
148 | }; | |
149 | ||
150 | #define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d | |
151 | #define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e | |
152 | #define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f | |
153 | ||
154 | #define KNC_ENABLE_COUNTER0 0x00000001 | |
155 | #define KNC_ENABLE_COUNTER1 0x00000002 | |
156 | ||
157 | static void knc_pmu_disable_all(void) | |
158 | { | |
159 | u64 val; | |
160 | ||
161 | rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | |
162 | val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); | |
163 | wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | |
164 | } | |
165 | ||
166 | static void knc_pmu_enable_all(int added) | |
167 | { | |
168 | u64 val; | |
169 | ||
170 | rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | |
171 | val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); | |
172 | wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | |
173 | } | |
174 | ||
175 | static inline void | |
176 | knc_pmu_disable_event(struct perf_event *event) | |
177 | { | |
e717bf4e VW |
178 | struct hw_perf_event *hwc = &event->hw; |
179 | u64 val; | |
180 | ||
181 | val = hwc->config; | |
7d011962 | 182 | val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; |
e717bf4e VW |
183 | |
184 | (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); | |
185 | } | |
186 | ||
187 | static void knc_pmu_enable_event(struct perf_event *event) | |
188 | { | |
e717bf4e VW |
189 | struct hw_perf_event *hwc = &event->hw; |
190 | u64 val; | |
191 | ||
192 | val = hwc->config; | |
7d011962 | 193 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; |
e717bf4e VW |
194 | |
195 | (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); | |
196 | } | |
197 | ||
e4074b30 VW |
198 | static inline u64 knc_pmu_get_status(void) |
199 | { | |
200 | u64 status; | |
201 | ||
202 | rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status); | |
203 | ||
204 | return status; | |
205 | } | |
206 | ||
207 | static inline void knc_pmu_ack_status(u64 ack) | |
208 | { | |
209 | wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack); | |
210 | } | |
211 | ||
212 | static int knc_pmu_handle_irq(struct pt_regs *regs) | |
213 | { | |
214 | struct perf_sample_data data; | |
215 | struct cpu_hw_events *cpuc; | |
216 | int handled = 0; | |
217 | int bit, loops; | |
218 | u64 status; | |
219 | ||
89cbc767 | 220 | cpuc = this_cpu_ptr(&cpu_hw_events); |
e4074b30 VW |
221 | |
222 | knc_pmu_disable_all(); | |
223 | ||
224 | status = knc_pmu_get_status(); | |
225 | if (!status) { | |
226 | knc_pmu_enable_all(0); | |
227 | return handled; | |
228 | } | |
229 | ||
230 | loops = 0; | |
231 | again: | |
232 | knc_pmu_ack_status(status); | |
233 | if (++loops > 100) { | |
234 | WARN_ONCE(1, "perf: irq loop stuck!\n"); | |
235 | perf_event_print_debug(); | |
236 | goto done; | |
237 | } | |
238 | ||
239 | inc_irq_stat(apic_perf_irqs); | |
240 | ||
241 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | |
242 | struct perf_event *event = cpuc->events[bit]; | |
243 | ||
244 | handled++; | |
245 | ||
246 | if (!test_bit(bit, cpuc->active_mask)) | |
247 | continue; | |
248 | ||
249 | if (!intel_pmu_save_and_restart(event)) | |
250 | continue; | |
251 | ||
252 | perf_sample_data_init(&data, 0, event->hw.last_period); | |
253 | ||
254 | if (perf_event_overflow(event, &data, regs)) | |
255 | x86_pmu_stop(event, 0); | |
256 | } | |
257 | ||
258 | /* | |
259 | * Repeat if there is more work to be done: | |
260 | */ | |
261 | status = knc_pmu_get_status(); | |
262 | if (status) | |
263 | goto again; | |
264 | ||
265 | done: | |
266 | knc_pmu_enable_all(0); | |
267 | ||
268 | return handled; | |
269 | } | |
270 | ||
271 | ||
e717bf4e VW |
272 | PMU_FORMAT_ATTR(event, "config:0-7" ); |
273 | PMU_FORMAT_ATTR(umask, "config:8-15" ); | |
274 | PMU_FORMAT_ATTR(edge, "config:18" ); | |
275 | PMU_FORMAT_ATTR(inv, "config:23" ); | |
276 | PMU_FORMAT_ATTR(cmask, "config:24-31" ); | |
277 | ||
278 | static struct attribute *intel_knc_formats_attr[] = { | |
279 | &format_attr_event.attr, | |
280 | &format_attr_umask.attr, | |
281 | &format_attr_edge.attr, | |
282 | &format_attr_inv.attr, | |
283 | &format_attr_cmask.attr, | |
284 | NULL, | |
285 | }; | |
286 | ||
c391c788 | 287 | static const struct x86_pmu knc_pmu __initconst = { |
e717bf4e | 288 | .name = "knc", |
e4074b30 | 289 | .handle_irq = knc_pmu_handle_irq, |
e717bf4e VW |
290 | .disable_all = knc_pmu_disable_all, |
291 | .enable_all = knc_pmu_enable_all, | |
292 | .enable = knc_pmu_enable_event, | |
293 | .disable = knc_pmu_disable_event, | |
294 | .hw_config = x86_pmu_hw_config, | |
295 | .schedule_events = x86_schedule_events, | |
296 | .eventsel = MSR_KNC_EVNTSEL0, | |
297 | .perfctr = MSR_KNC_PERFCTR0, | |
298 | .event_map = knc_pmu_event_map, | |
299 | .max_events = ARRAY_SIZE(knc_perfmon_event_map), | |
300 | .apic = 1, | |
ae5ba47a | 301 | .max_period = (1ULL << 39) - 1, |
e717bf4e VW |
302 | .version = 0, |
303 | .num_counters = 2, | |
ae5ba47a VW |
304 | .cntval_bits = 40, |
305 | .cntval_mask = (1ULL << 40) - 1, | |
e717bf4e VW |
306 | .get_event_constraints = x86_get_event_constraints, |
307 | .event_constraints = knc_event_constraints, | |
308 | .format_attrs = intel_knc_formats_attr, | |
309 | }; | |
310 | ||
311 | __init int knc_pmu_init(void) | |
312 | { | |
313 | x86_pmu = knc_pmu; | |
314 | ||
315 | memcpy(hw_cache_event_ids, knc_hw_cache_event_ids, | |
316 | sizeof(hw_cache_event_ids)); | |
317 | ||
318 | return 0; | |
319 | } |