]>
Commit | Line | Data |
---|---|---|
a072738e | 1 | /* |
0d2eb44f | 2 | * Netburst Performance Events (P4, old Xeon) |
a072738e CG |
3 | * |
4 | * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> | |
5 | * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> | |
6 | * | |
7 | * For licencing details see kernel-base/COPYING | |
8 | */ | |
9 | ||
de0428a7 | 10 | #include <linux/perf_event.h> |
a072738e CG |
11 | |
12 | #include <asm/perf_event_p4.h> | |
de0428a7 KW |
13 | #include <asm/hardirq.h> |
14 | #include <asm/apic.h> | |
15 | ||
16 | #include "perf_event.h" | |
a072738e | 17 | |
d814f301 | 18 | #define P4_CNTR_LIMIT 3 |
a072738e CG |
19 | /* |
20 | * array indices: 0,1 - HT threads, used with HT enabled cpu | |
21 | */ | |
d814f301 CG |
22 | struct p4_event_bind { |
23 | unsigned int opcode; /* Event code and ESCR selector */ | |
24 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ | |
c9cf4a01 CG |
25 | unsigned int escr_emask; /* valid ESCR EventMask bits */ |
26 | unsigned int shared; /* event is shared across threads */ | |
1ff3d7d7 | 27 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ |
a072738e CG |
28 | }; |
29 | ||
39ef13a4 | 30 | struct p4_pebs_bind { |
d814f301 CG |
31 | unsigned int metric_pebs; |
32 | unsigned int metric_vert; | |
a072738e CG |
33 | }; |
34 | ||
39ef13a4 CG |
35 | /* it sets P4_PEBS_ENABLE_UOP_TAG as well */ |
36 | #define P4_GEN_PEBS_BIND(name, pebs, vert) \ | |
37 | [P4_PEBS_METRIC__##name] = { \ | |
38 | .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \ | |
39 | .metric_vert = vert, \ | |
d814f301 CG |
40 | } |
41 | ||
39ef13a4 CG |
42 | /* |
43 | * note we have P4_PEBS_ENABLE_UOP_TAG always set here | |
44 | * | |
45 | * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of | |
46 | * event configuration to find out which values are to be | |
47 | * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT | |
48 | * resgisters | |
49 | */ | |
50 | static struct p4_pebs_bind p4_pebs_bind_map[] = { | |
51 | P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001), | |
52 | P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001), | |
53 | P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001), | |
54 | P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002), | |
55 | P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003), | |
56 | P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010), | |
57 | P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001), | |
58 | P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001), | |
59 | P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002), | |
d814f301 CG |
60 | }; |
61 | ||
62 | /* | |
63 | * Note that we don't use CCCR1 here, there is an | |
64 | * exception for P4_BSQ_ALLOCATION but we just have | |
65 | * no workaround | |
66 | * | |
67 | * consider this binding as resources which particular | |
68 | * event may borrow, it doesn't contain EventMask, | |
69 | * Tags and friends -- they are left to a caller | |
70 | */ | |
71 | static struct p4_event_bind p4_event_bind_map[] = { | |
72 | [P4_EVENT_TC_DELIVER_MODE] = { | |
73 | .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), | |
74 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | |
c9cf4a01 CG |
75 | .escr_emask = |
76 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) | | |
77 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) | | |
78 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) | | |
79 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) | | |
80 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) | | |
81 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) | | |
82 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID), | |
83 | .shared = 1, | |
d814f301 CG |
84 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
85 | }, | |
86 | [P4_EVENT_BPU_FETCH_REQUEST] = { | |
87 | .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), | |
88 | .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, | |
c9cf4a01 CG |
89 | .escr_emask = |
90 | P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS), | |
d814f301 CG |
91 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
92 | }, | |
93 | [P4_EVENT_ITLB_REFERENCE] = { | |
94 | .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), | |
95 | .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, | |
c9cf4a01 CG |
96 | .escr_emask = |
97 | P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) | | |
98 | P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) | | |
99 | P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK), | |
d814f301 CG |
100 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
101 | }, | |
102 | [P4_EVENT_MEMORY_CANCEL] = { | |
103 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), | |
104 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | |
c9cf4a01 CG |
105 | .escr_emask = |
106 | P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) | | |
107 | P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF), | |
d814f301 CG |
108 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
109 | }, | |
110 | [P4_EVENT_MEMORY_COMPLETE] = { | |
111 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), | |
112 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | |
c9cf4a01 CG |
113 | .escr_emask = |
114 | P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) | | |
115 | P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC), | |
d814f301 CG |
116 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
117 | }, | |
118 | [P4_EVENT_LOAD_PORT_REPLAY] = { | |
119 | .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), | |
120 | .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, | |
c9cf4a01 CG |
121 | .escr_emask = |
122 | P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD), | |
d814f301 CG |
123 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
124 | }, | |
125 | [P4_EVENT_STORE_PORT_REPLAY] = { | |
126 | .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), | |
127 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | |
c9cf4a01 CG |
128 | .escr_emask = |
129 | P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST), | |
d814f301 CG |
130 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
131 | }, | |
132 | [P4_EVENT_MOB_LOAD_REPLAY] = { | |
133 | .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), | |
134 | .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, | |
c9cf4a01 CG |
135 | .escr_emask = |
136 | P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) | | |
137 | P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) | | |
138 | P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) | | |
139 | P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR), | |
d814f301 CG |
140 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
141 | }, | |
142 | [P4_EVENT_PAGE_WALK_TYPE] = { | |
143 | .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), | |
144 | .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, | |
c9cf4a01 CG |
145 | .escr_emask = |
146 | P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) | | |
147 | P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS), | |
148 | .shared = 1, | |
d814f301 CG |
149 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
150 | }, | |
151 | [P4_EVENT_BSQ_CACHE_REFERENCE] = { | |
152 | .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), | |
153 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | |
c9cf4a01 CG |
154 | .escr_emask = |
155 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | | |
156 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | | |
157 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | | |
158 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | | |
159 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | | |
160 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) | | |
161 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | | |
162 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | | |
163 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS), | |
d814f301 CG |
164 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
165 | }, | |
166 | [P4_EVENT_IOQ_ALLOCATION] = { | |
167 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), | |
168 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | |
c9cf4a01 CG |
169 | .escr_emask = |
170 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) | | |
171 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) | | |
172 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) | | |
173 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) | | |
174 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) | | |
175 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) | | |
176 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) | | |
177 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) | | |
178 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) | | |
179 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) | | |
180 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH), | |
d814f301 CG |
181 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
182 | }, | |
183 | [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | |
184 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), | |
185 | .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, | |
c9cf4a01 CG |
186 | .escr_emask = |
187 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) | | |
188 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) | | |
189 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) | | |
190 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) | | |
191 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) | | |
192 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) | | |
193 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) | | |
194 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) | | |
195 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) | | |
196 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) | | |
197 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH), | |
d814f301 CG |
198 | .cntr = { {2, -1, -1}, {3, -1, -1} }, |
199 | }, | |
200 | [P4_EVENT_FSB_DATA_ACTIVITY] = { | |
201 | .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), | |
202 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | |
c9cf4a01 CG |
203 | .escr_emask = |
204 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | | |
205 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) | | |
206 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) | | |
207 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) | | |
208 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) | | |
209 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER), | |
210 | .shared = 1, | |
d814f301 CG |
211 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
212 | }, | |
213 | [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ | |
214 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), | |
215 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, | |
c9cf4a01 CG |
216 | .escr_emask = |
217 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) | | |
218 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) | | |
219 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) | | |
220 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) | | |
221 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) | | |
222 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) | | |
223 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) | | |
224 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) | | |
225 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) | | |
226 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) | | |
227 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) | | |
228 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) | | |
229 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2), | |
d814f301 CG |
230 | .cntr = { {0, -1, -1}, {1, -1, -1} }, |
231 | }, | |
232 | [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | |
233 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), | |
234 | .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, | |
c9cf4a01 CG |
235 | .escr_emask = |
236 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) | | |
237 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) | | |
238 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) | | |
239 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) | | |
240 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) | | |
241 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) | | |
242 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) | | |
243 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) | | |
244 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) | | |
245 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) | | |
246 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) | | |
247 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) | | |
248 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2), | |
d814f301 CG |
249 | .cntr = { {2, -1, -1}, {3, -1, -1} }, |
250 | }, | |
251 | [P4_EVENT_SSE_INPUT_ASSIST] = { | |
252 | .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), | |
253 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | |
c9cf4a01 CG |
254 | .escr_emask = |
255 | P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL), | |
256 | .shared = 1, | |
d814f301 CG |
257 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
258 | }, | |
259 | [P4_EVENT_PACKED_SP_UOP] = { | |
260 | .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), | |
261 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | |
c9cf4a01 CG |
262 | .escr_emask = |
263 | P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL), | |
264 | .shared = 1, | |
d814f301 CG |
265 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
266 | }, | |
267 | [P4_EVENT_PACKED_DP_UOP] = { | |
268 | .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), | |
269 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | |
c9cf4a01 CG |
270 | .escr_emask = |
271 | P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL), | |
272 | .shared = 1, | |
d814f301 CG |
273 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
274 | }, | |
275 | [P4_EVENT_SCALAR_SP_UOP] = { | |
276 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), | |
277 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | |
c9cf4a01 CG |
278 | .escr_emask = |
279 | P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL), | |
280 | .shared = 1, | |
d814f301 CG |
281 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
282 | }, | |
283 | [P4_EVENT_SCALAR_DP_UOP] = { | |
284 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), | |
285 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | |
c9cf4a01 CG |
286 | .escr_emask = |
287 | P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL), | |
288 | .shared = 1, | |
d814f301 CG |
289 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
290 | }, | |
291 | [P4_EVENT_64BIT_MMX_UOP] = { | |
292 | .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), | |
293 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | |
c9cf4a01 CG |
294 | .escr_emask = |
295 | P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL), | |
296 | .shared = 1, | |
d814f301 CG |
297 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
298 | }, | |
299 | [P4_EVENT_128BIT_MMX_UOP] = { | |
300 | .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), | |
301 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | |
c9cf4a01 CG |
302 | .escr_emask = |
303 | P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL), | |
304 | .shared = 1, | |
d814f301 CG |
305 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
306 | }, | |
307 | [P4_EVENT_X87_FP_UOP] = { | |
308 | .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), | |
309 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | |
c9cf4a01 CG |
310 | .escr_emask = |
311 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL), | |
312 | .shared = 1, | |
d814f301 CG |
313 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
314 | }, | |
315 | [P4_EVENT_TC_MISC] = { | |
316 | .opcode = P4_OPCODE(P4_EVENT_TC_MISC), | |
317 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | |
c9cf4a01 CG |
318 | .escr_emask = |
319 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH), | |
d814f301 CG |
320 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
321 | }, | |
322 | [P4_EVENT_GLOBAL_POWER_EVENTS] = { | |
323 | .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), | |
324 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | |
c9cf4a01 CG |
325 | .escr_emask = |
326 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING), | |
d814f301 CG |
327 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
328 | }, | |
329 | [P4_EVENT_TC_MS_XFER] = { | |
330 | .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), | |
331 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | |
c9cf4a01 CG |
332 | .escr_emask = |
333 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC), | |
d814f301 CG |
334 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
335 | }, | |
336 | [P4_EVENT_UOP_QUEUE_WRITES] = { | |
337 | .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), | |
338 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | |
c9cf4a01 CG |
339 | .escr_emask = |
340 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) | | |
341 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) | | |
342 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM), | |
d814f301 CG |
343 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
344 | }, | |
345 | [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { | |
346 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), | |
347 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, | |
c9cf4a01 CG |
348 | .escr_emask = |
349 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) | | |
350 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) | | |
351 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) | | |
352 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT), | |
d814f301 CG |
353 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
354 | }, | |
355 | [P4_EVENT_RETIRED_BRANCH_TYPE] = { | |
356 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), | |
357 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, | |
c9cf4a01 CG |
358 | .escr_emask = |
359 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | | |
360 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | | |
361 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | | |
362 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT), | |
d814f301 CG |
363 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
364 | }, | |
365 | [P4_EVENT_RESOURCE_STALL] = { | |
366 | .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), | |
367 | .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, | |
c9cf4a01 CG |
368 | .escr_emask = |
369 | P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL), | |
d814f301 CG |
370 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
371 | }, | |
372 | [P4_EVENT_WC_BUFFER] = { | |
373 | .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), | |
374 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | |
c9cf4a01 CG |
375 | .escr_emask = |
376 | P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) | | |
377 | P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS), | |
378 | .shared = 1, | |
d814f301 CG |
379 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
380 | }, | |
381 | [P4_EVENT_B2B_CYCLES] = { | |
382 | .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), | |
383 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | |
c9cf4a01 | 384 | .escr_emask = 0, |
d814f301 CG |
385 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
386 | }, | |
387 | [P4_EVENT_BNR] = { | |
388 | .opcode = P4_OPCODE(P4_EVENT_BNR), | |
389 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | |
c9cf4a01 | 390 | .escr_emask = 0, |
d814f301 CG |
391 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
392 | }, | |
393 | [P4_EVENT_SNOOP] = { | |
394 | .opcode = P4_OPCODE(P4_EVENT_SNOOP), | |
395 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | |
c9cf4a01 | 396 | .escr_emask = 0, |
d814f301 CG |
397 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
398 | }, | |
399 | [P4_EVENT_RESPONSE] = { | |
400 | .opcode = P4_OPCODE(P4_EVENT_RESPONSE), | |
401 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | |
c9cf4a01 | 402 | .escr_emask = 0, |
d814f301 CG |
403 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
404 | }, | |
405 | [P4_EVENT_FRONT_END_EVENT] = { | |
406 | .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), | |
407 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | |
c9cf4a01 CG |
408 | .escr_emask = |
409 | P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) | | |
410 | P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS), | |
d814f301 CG |
411 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
412 | }, | |
413 | [P4_EVENT_EXECUTION_EVENT] = { | |
414 | .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), | |
415 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | |
c9cf4a01 CG |
416 | .escr_emask = |
417 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) | | |
418 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) | | |
419 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) | | |
420 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) | | |
421 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) | | |
422 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) | | |
423 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) | | |
424 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3), | |
d814f301 CG |
425 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
426 | }, | |
427 | [P4_EVENT_REPLAY_EVENT] = { | |
428 | .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), | |
429 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | |
c9cf4a01 CG |
430 | .escr_emask = |
431 | P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) | | |
432 | P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS), | |
d814f301 CG |
433 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
434 | }, | |
435 | [P4_EVENT_INSTR_RETIRED] = { | |
436 | .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), | |
437 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | |
c9cf4a01 CG |
438 | .escr_emask = |
439 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | | |
440 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) | | |
441 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) | | |
442 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG), | |
d814f301 CG |
443 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
444 | }, | |
445 | [P4_EVENT_UOPS_RETIRED] = { | |
446 | .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), | |
447 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | |
c9cf4a01 CG |
448 | .escr_emask = |
449 | P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) | | |
450 | P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS), | |
d814f301 CG |
451 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
452 | }, | |
453 | [P4_EVENT_UOP_TYPE] = { | |
454 | .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), | |
455 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, | |
c9cf4a01 CG |
456 | .escr_emask = |
457 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) | | |
458 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES), | |
d814f301 CG |
459 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
460 | }, | |
461 | [P4_EVENT_BRANCH_RETIRED] = { | |
462 | .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), | |
463 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | |
c9cf4a01 CG |
464 | .escr_emask = |
465 | P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) | | |
466 | P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) | | |
467 | P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) | | |
468 | P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM), | |
d814f301 CG |
469 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
470 | }, | |
471 | [P4_EVENT_MISPRED_BRANCH_RETIRED] = { | |
472 | .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), | |
473 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | |
c9cf4a01 | 474 | .escr_emask = |
1d321881 | 475 | P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), |
d814f301 CG |
476 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
477 | }, | |
478 | [P4_EVENT_X87_ASSIST] = { | |
479 | .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), | |
480 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | |
c9cf4a01 CG |
481 | .escr_emask = |
482 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) | | |
483 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) | | |
484 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) | | |
485 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) | | |
486 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA), | |
d814f301 CG |
487 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
488 | }, | |
489 | [P4_EVENT_MACHINE_CLEAR] = { | |
490 | .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), | |
491 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | |
c9cf4a01 CG |
492 | .escr_emask = |
493 | P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) | | |
494 | P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) | | |
495 | P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR), | |
d814f301 CG |
496 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
497 | }, | |
498 | [P4_EVENT_INSTR_COMPLETED] = { | |
499 | .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), | |
500 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | |
c9cf4a01 CG |
501 | .escr_emask = |
502 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) | | |
503 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS), | |
d814f301 CG |
504 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
505 | }, | |
506 | }; | |
a072738e | 507 | |
39ef13a4 | 508 | #define P4_GEN_CACHE_EVENT(event, bit, metric) \ |
d814f301 CG |
509 | p4_config_pack_escr(P4_ESCR_EVENT(event) | \ |
510 | P4_ESCR_EMASK_BIT(event, bit)) | \ | |
39ef13a4 | 511 | p4_config_pack_cccr(metric | \ |
d814f301 | 512 | P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) |
cb7d6b50 | 513 | |
caaa8be3 | 514 | static __initconst const u64 p4_hw_cache_event_ids |
cb7d6b50 LM |
515 | [PERF_COUNT_HW_CACHE_MAX] |
516 | [PERF_COUNT_HW_CACHE_OP_MAX] | |
517 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | |
518 | { | |
519 | [ C(L1D ) ] = { | |
520 | [ C(OP_READ) ] = { | |
521 | [ C(RESULT_ACCESS) ] = 0x0, | |
d814f301 | 522 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
39ef13a4 | 523 | P4_PEBS_METRIC__1stl_cache_load_miss_retired), |
cb7d6b50 LM |
524 | }, |
525 | }, | |
526 | [ C(LL ) ] = { | |
527 | [ C(OP_READ) ] = { | |
528 | [ C(RESULT_ACCESS) ] = 0x0, | |
d814f301 | 529 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
39ef13a4 | 530 | P4_PEBS_METRIC__2ndl_cache_load_miss_retired), |
cb7d6b50 | 531 | }, |
d814f301 | 532 | }, |
cb7d6b50 LM |
533 | [ C(DTLB) ] = { |
534 | [ C(OP_READ) ] = { | |
535 | [ C(RESULT_ACCESS) ] = 0x0, | |
d814f301 | 536 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
39ef13a4 | 537 | P4_PEBS_METRIC__dtlb_load_miss_retired), |
cb7d6b50 LM |
538 | }, |
539 | [ C(OP_WRITE) ] = { | |
540 | [ C(RESULT_ACCESS) ] = 0x0, | |
d814f301 | 541 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
39ef13a4 | 542 | P4_PEBS_METRIC__dtlb_store_miss_retired), |
cb7d6b50 LM |
543 | }, |
544 | }, | |
545 | [ C(ITLB) ] = { | |
546 | [ C(OP_READ) ] = { | |
d814f301 | 547 | [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, |
39ef13a4 | 548 | P4_PEBS_METRIC__none), |
d814f301 | 549 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, |
39ef13a4 | 550 | P4_PEBS_METRIC__none), |
cb7d6b50 LM |
551 | }, |
552 | [ C(OP_WRITE) ] = { | |
553 | [ C(RESULT_ACCESS) ] = -1, | |
554 | [ C(RESULT_MISS) ] = -1, | |
555 | }, | |
556 | [ C(OP_PREFETCH) ] = { | |
557 | [ C(RESULT_ACCESS) ] = -1, | |
558 | [ C(RESULT_MISS) ] = -1, | |
559 | }, | |
560 | }, | |
89d6c0b5 PZ |
561 | [ C(NODE) ] = { |
562 | [ C(OP_READ) ] = { | |
563 | [ C(RESULT_ACCESS) ] = -1, | |
564 | [ C(RESULT_MISS) ] = -1, | |
565 | }, | |
566 | [ C(OP_WRITE) ] = { | |
567 | [ C(RESULT_ACCESS) ] = -1, | |
568 | [ C(RESULT_MISS) ] = -1, | |
569 | }, | |
570 | [ C(OP_PREFETCH) ] = { | |
571 | [ C(RESULT_ACCESS) ] = -1, | |
572 | [ C(RESULT_MISS) ] = -1, | |
573 | }, | |
574 | }, | |
cb7d6b50 LM |
575 | }; |
576 | ||
f9129870 | 577 | /* |
f53173e4 CG |
578 | * Because of Netburst being quite restricted in how many |
579 | * identical events may run simultaneously, we introduce event aliases, | |
580 | * ie the different events which have the same functionality but | |
581 | * utilize non-intersected resources (ESCR/CCCR/counter registers). | |
f9129870 | 582 | * |
f53173e4 CG |
583 | * This allow us to relax restrictions a bit and run two or more |
584 | * identical events together. | |
f9129870 | 585 | * |
f53173e4 CG |
586 | * Never set any custom internal bits such as P4_CONFIG_HT, |
587 | * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are | |
588 | * either up to date automatically or not applicable at all. | |
f9129870 CG |
589 | */ |
590 | struct p4_event_alias { | |
f53173e4 CG |
591 | u64 original; |
592 | u64 alternative; | |
f9129870 CG |
593 | } p4_event_aliases[] = { |
594 | { | |
595 | /* | |
f53173e4 CG |
596 | * Non-halted cycles can be substituted with non-sleeping cycles (see |
597 | * Intel SDM Vol3b for details). We need this alias to be able | |
598 | * to run nmi-watchdog and 'perf top' (or any other user space tool | |
599 | * which is interested in running PERF_COUNT_HW_CPU_CYCLES) | |
600 | * simultaneously. | |
f9129870 | 601 | */ |
f53173e4 | 602 | .original = |
f9129870 CG |
603 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | |
604 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), | |
f53173e4 | 605 | .alternative = |
f9129870 CG |
606 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT) | |
607 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)| | |
608 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)| | |
609 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)| | |
610 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)| | |
611 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) | | |
612 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) | | |
613 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) | | |
614 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))| | |
615 | p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT | | |
616 | P4_CCCR_COMPARE), | |
617 | }, | |
618 | }; | |
619 | ||
620 | static u64 p4_get_alias_event(u64 config) | |
621 | { | |
622 | u64 config_match; | |
623 | int i; | |
624 | ||
625 | /* | |
f53173e4 CG |
626 | * Only event with special mark is allowed, |
627 | * we're to be sure it didn't come as malformed | |
628 | * RAW event. | |
f9129870 CG |
629 | */ |
630 | if (!(config & P4_CONFIG_ALIASABLE)) | |
631 | return 0; | |
632 | ||
633 | config_match = config & P4_CONFIG_EVENT_ALIAS_MASK; | |
634 | ||
f9129870 | 635 | for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) { |
f53173e4 CG |
636 | if (config_match == p4_event_aliases[i].original) { |
637 | config_match = p4_event_aliases[i].alternative; | |
f9129870 | 638 | break; |
f53173e4 CG |
639 | } else if (config_match == p4_event_aliases[i].alternative) { |
640 | config_match = p4_event_aliases[i].original; | |
f9129870 CG |
641 | break; |
642 | } | |
643 | } | |
644 | ||
645 | if (i >= ARRAY_SIZE(p4_event_aliases)) | |
646 | return 0; | |
647 | ||
f53173e4 | 648 | return config_match | (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS); |
f9129870 CG |
649 | } |
650 | ||
d814f301 CG |
651 | static u64 p4_general_events[PERF_COUNT_HW_MAX] = { |
652 | /* non-halted CPU clocks */ | |
653 | [PERF_COUNT_HW_CPU_CYCLES] = | |
654 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | | |
f9129870 CG |
655 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)) | |
656 | P4_CONFIG_ALIASABLE, | |
d814f301 CG |
657 | |
658 | /* | |
659 | * retired instructions | |
660 | * in a sake of simplicity we don't use the FSB tagging | |
661 | */ | |
662 | [PERF_COUNT_HW_INSTRUCTIONS] = | |
663 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) | | |
664 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | | |
665 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)), | |
666 | ||
667 | /* cache hits */ | |
668 | [PERF_COUNT_HW_CACHE_REFERENCES] = | |
669 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | | |
670 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | | |
671 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | | |
672 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | | |
673 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | | |
674 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | | |
675 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)), | |
676 | ||
677 | /* cache misses */ | |
678 | [PERF_COUNT_HW_CACHE_MISSES] = | |
679 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | | |
680 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | | |
681 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | | |
682 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)), | |
683 | ||
684 | /* branch instructions retired */ | |
685 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = | |
686 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) | | |
687 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | | |
688 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | | |
689 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | | |
690 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)), | |
691 | ||
692 | /* mispredicted branches retired */ | |
693 | [PERF_COUNT_HW_BRANCH_MISSES] = | |
694 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) | | |
695 | P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)), | |
696 | ||
697 | /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ | |
698 | [PERF_COUNT_HW_BUS_CYCLES] = | |
699 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) | | |
700 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | | |
701 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) | | |
702 | p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), | |
a072738e CG |
703 | }; |
704 | ||
d814f301 CG |
705 | static struct p4_event_bind *p4_config_get_bind(u64 config) |
706 | { | |
707 | unsigned int evnt = p4_config_unpack_event(config); | |
708 | struct p4_event_bind *bind = NULL; | |
709 | ||
710 | if (evnt < ARRAY_SIZE(p4_event_bind_map)) | |
711 | bind = &p4_event_bind_map[evnt]; | |
712 | ||
713 | return bind; | |
714 | } | |
715 | ||
a072738e CG |
716 | static u64 p4_pmu_event_map(int hw_event) |
717 | { | |
d814f301 CG |
718 | struct p4_event_bind *bind; |
719 | unsigned int esel; | |
a072738e CG |
720 | u64 config; |
721 | ||
d814f301 CG |
722 | config = p4_general_events[hw_event]; |
723 | bind = p4_config_get_bind(config); | |
724 | esel = P4_OPCODE_ESEL(bind->opcode); | |
725 | config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); | |
a072738e | 726 | |
a072738e CG |
727 | return config; |
728 | } | |
729 | ||
c9cf4a01 CG |
730 | /* check cpu model specifics */ |
731 | static bool p4_event_match_cpu_model(unsigned int event_idx) | |
732 | { | |
733 | /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */ | |
734 | if (event_idx == P4_EVENT_INSTR_COMPLETED) { | |
735 | if (boot_cpu_data.x86_model != 3 && | |
736 | boot_cpu_data.x86_model != 4 && | |
737 | boot_cpu_data.x86_model != 6) | |
738 | return false; | |
739 | } | |
740 | ||
741 | /* | |
742 | * For info | |
743 | * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2 | |
744 | */ | |
745 | ||
746 | return true; | |
747 | } | |
748 | ||
39ef13a4 CG |
749 | static int p4_validate_raw_event(struct perf_event *event) |
750 | { | |
c9cf4a01 | 751 | unsigned int v, emask; |
39ef13a4 | 752 | |
c9cf4a01 | 753 | /* User data may have out-of-bound event index */ |
39ef13a4 | 754 | v = p4_config_unpack_event(event->attr.config); |
c9cf4a01 CG |
755 | if (v >= ARRAY_SIZE(p4_event_bind_map)) |
756 | return -EINVAL; | |
757 | ||
758 | /* It may be unsupported: */ | |
759 | if (!p4_event_match_cpu_model(v)) | |
39ef13a4 | 760 | return -EINVAL; |
c9cf4a01 CG |
761 | |
762 | /* | |
763 | * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as | |
764 | * in Architectural Performance Monitoring, it means not | |
765 | * on _which_ logical cpu to count but rather _when_, ie it | |
766 | * depends on logical cpu state -- count event if one cpu active, | |
767 | * none, both or any, so we just allow user to pass any value | |
768 | * desired. | |
769 | * | |
770 | * In turn we always set Tx_OS/Tx_USR bits bound to logical | |
771 | * cpu without their propagation to another cpu | |
772 | */ | |
773 | ||
774 | /* | |
0d2eb44f | 775 | * if an event is shared across the logical threads |
c9cf4a01 CG |
776 | * the user needs special permissions to be able to use it |
777 | */ | |
d038b12c | 778 | if (p4_ht_active() && p4_event_bind_map[v].shared) { |
c9cf4a01 CG |
779 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) |
780 | return -EACCES; | |
39ef13a4 CG |
781 | } |
782 | ||
c9cf4a01 CG |
783 | /* ESCR EventMask bits may be invalid */ |
784 | emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK; | |
785 | if (emask & ~p4_event_bind_map[v].escr_emask) | |
786 | return -EINVAL; | |
787 | ||
39ef13a4 | 788 | /* |
c9cf4a01 | 789 | * it may have some invalid PEBS bits |
39ef13a4 | 790 | */ |
c9cf4a01 | 791 | if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) |
39ef13a4 | 792 | return -EINVAL; |
c9cf4a01 | 793 | |
39ef13a4 | 794 | v = p4_config_unpack_metric(event->attr.config); |
c9cf4a01 | 795 | if (v >= ARRAY_SIZE(p4_pebs_bind_map)) |
39ef13a4 | 796 | return -EINVAL; |
39ef13a4 CG |
797 | |
798 | return 0; | |
799 | } | |
800 | ||
b4cdc5c2 | 801 | static int p4_hw_config(struct perf_event *event) |
a072738e | 802 | { |
137351e0 CG |
803 | int cpu = get_cpu(); |
804 | int rc = 0; | |
d814f301 | 805 | u32 escr, cccr; |
a072738e CG |
806 | |
807 | /* | |
808 | * the reason we use cpu that early is that: if we get scheduled | |
809 | * first time on the same cpu -- we will not need swap thread | |
810 | * specific flags in config (and will save some cpu cycles) | |
811 | */ | |
812 | ||
d814f301 | 813 | cccr = p4_default_cccr_conf(cpu); |
b4cdc5c2 PZ |
814 | escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel, |
815 | event->attr.exclude_user); | |
816 | event->hw.config = p4_config_pack_escr(escr) | | |
817 | p4_config_pack_cccr(cccr); | |
a072738e | 818 | |
cb7d6b50 | 819 | if (p4_ht_active() && p4_ht_thread(cpu)) |
b4cdc5c2 PZ |
820 | event->hw.config = p4_set_ht_bit(event->hw.config); |
821 | ||
de902d96 | 822 | if (event->attr.type == PERF_TYPE_RAW) { |
d038b12c SE |
823 | struct p4_event_bind *bind; |
824 | unsigned int esel; | |
c9cf4a01 CG |
825 | /* |
826 | * Clear bits we reserve to be managed by kernel itself | |
827 | * and never allowed from a user space | |
828 | */ | |
829 | event->attr.config &= P4_CONFIG_MASK; | |
830 | ||
39ef13a4 CG |
831 | rc = p4_validate_raw_event(event); |
832 | if (rc) | |
c7993165 | 833 | goto out; |
c7993165 | 834 | |
de902d96 | 835 | /* |
39ef13a4 CG |
836 | * Note that for RAW events we allow user to use P4_CCCR_RESERVED |
837 | * bits since we keep additional info here (for cache events and etc) | |
de902d96 | 838 | */ |
c9cf4a01 | 839 | event->hw.config |= event->attr.config; |
d038b12c SE |
840 | bind = p4_config_get_bind(event->attr.config); |
841 | if (!bind) { | |
842 | rc = -EINVAL; | |
843 | goto out; | |
844 | } | |
845 | esel = P4_OPCODE_ESEL(bind->opcode); | |
846 | event->hw.config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); | |
de902d96 | 847 | } |
cb7d6b50 | 848 | |
137351e0 | 849 | rc = x86_setup_perfctr(event); |
c7993165 | 850 | out: |
137351e0 | 851 | put_cpu(); |
137351e0 | 852 | return rc; |
a072738e CG |
853 | } |
854 | ||
0db1a7bc | 855 | static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) |
a072738e | 856 | { |
047a3772 | 857 | u64 v; |
a072738e | 858 | |
047a3772 | 859 | /* an official way for overflow indication */ |
73d6e522 | 860 | rdmsrl(hwc->config_base, v); |
047a3772 | 861 | if (v & P4_CCCR_OVF) { |
73d6e522 | 862 | wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF); |
047a3772 | 863 | return 1; |
a072738e | 864 | } |
0db1a7bc | 865 | |
7d44ec19 CG |
866 | /* |
867 | * In some circumstances the overflow might issue an NMI but did | |
868 | * not set P4_CCCR_OVF bit. Because a counter holds a negative value | |
869 | * we simply check for high bit being set, if it's cleared it means | |
870 | * the counter has reached zero value and continued counting before | |
871 | * real NMI signal was received: | |
872 | */ | |
242214f9 | 873 | rdmsrl(hwc->event_base, v); |
7d44ec19 | 874 | if (!(v & ARCH_P4_UNFLAGGED_BIT)) |
047a3772 CG |
875 | return 1; |
876 | ||
877 | return 0; | |
a072738e CG |
878 | } |
879 | ||
39ef13a4 CG |
880 | static void p4_pmu_disable_pebs(void) |
881 | { | |
882 | /* | |
883 | * FIXME | |
884 | * | |
885 | * It's still allowed that two threads setup same cache | |
886 | * events so we can't simply clear metrics until we knew | |
0d2eb44f | 887 | * no one is depending on us, so we need kind of counter |
39ef13a4 CG |
888 | * for "ReplayEvent" users. |
889 | * | |
890 | * What is more complex -- RAW events, if user (for some | |
891 | * reason) will pass some cache event metric with improper | |
892 | * event opcode -- it's fine from hardware point of view | |
0d2eb44f | 893 | * but completely nonsense from "meaning" of such action. |
39ef13a4 CG |
894 | * |
895 | * So at moment let leave metrics turned on forever -- it's | |
896 | * ok for now but need to be revisited! | |
897 | * | |
5ac2b5c2 IM |
898 | * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, 0); |
899 | * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, 0); | |
39ef13a4 CG |
900 | */ |
901 | } | |
902 | ||
a072738e CG |
903 | static inline void p4_pmu_disable_event(struct perf_event *event) |
904 | { | |
905 | struct hw_perf_event *hwc = &event->hw; | |
906 | ||
907 | /* | |
908 | * If event gets disabled while counter is in overflowed | |
909 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get | |
910 | * asserted again and again | |
911 | */ | |
715c85b1 | 912 | (void)wrmsrl_safe(hwc->config_base, |
5ac2b5c2 | 913 | p4_config_unpack_cccr(hwc->config) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); |
a072738e CG |
914 | } |
915 | ||
916 | static void p4_pmu_disable_all(void) | |
917 | { | |
918 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | |
919 | int idx; | |
920 | ||
948b1bb8 | 921 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
a072738e CG |
922 | struct perf_event *event = cpuc->events[idx]; |
923 | if (!test_bit(idx, cpuc->active_mask)) | |
924 | continue; | |
925 | p4_pmu_disable_event(event); | |
926 | } | |
39ef13a4 CG |
927 | |
928 | p4_pmu_disable_pebs(); | |
929 | } | |
930 | ||
931 | /* configuration must be valid */ | |
932 | static void p4_pmu_enable_pebs(u64 config) | |
933 | { | |
934 | struct p4_pebs_bind *bind; | |
935 | unsigned int idx; | |
936 | ||
937 | BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK); | |
938 | ||
939 | idx = p4_config_unpack_metric(config); | |
940 | if (idx == P4_PEBS_METRIC__none) | |
941 | return; | |
942 | ||
943 | bind = &p4_pebs_bind_map[idx]; | |
944 | ||
715c85b1 PA |
945 | (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); |
946 | (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); | |
a072738e CG |
947 | } |
948 | ||
949 | static void p4_pmu_enable_event(struct perf_event *event) | |
950 | { | |
951 | struct hw_perf_event *hwc = &event->hw; | |
952 | int thread = p4_ht_config_thread(hwc->config); | |
953 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); | |
d814f301 | 954 | unsigned int idx = p4_config_unpack_event(hwc->config); |
d814f301 | 955 | struct p4_event_bind *bind; |
d814f301 | 956 | u64 escr_addr, cccr; |
a072738e | 957 | |
d814f301 | 958 | bind = &p4_event_bind_map[idx]; |
5ac2b5c2 | 959 | escr_addr = bind->escr_msr[thread]; |
a072738e CG |
960 | |
961 | /* | |
962 | * - we dont support cascaded counters yet | |
963 | * - and counter 1 is broken (erratum) | |
964 | */ | |
965 | WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); | |
966 | WARN_ON_ONCE(hwc->idx == 1); | |
967 | ||
d814f301 CG |
968 | /* we need a real Event value */ |
969 | escr_conf &= ~P4_ESCR_EVENT_MASK; | |
970 | escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode)); | |
971 | ||
972 | cccr = p4_config_unpack_cccr(hwc->config); | |
973 | ||
974 | /* | |
39ef13a4 CG |
975 | * it could be Cache event so we need to write metrics |
976 | * into additional MSRs | |
d814f301 | 977 | */ |
39ef13a4 | 978 | p4_pmu_enable_pebs(hwc->config); |
d814f301 | 979 | |
715c85b1 PA |
980 | (void)wrmsrl_safe(escr_addr, escr_conf); |
981 | (void)wrmsrl_safe(hwc->config_base, | |
d814f301 | 982 | (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); |
a072738e CG |
983 | } |
984 | ||
11164cd4 | 985 | static void p4_pmu_enable_all(int added) |
a072738e CG |
986 | { |
987 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | |
988 | int idx; | |
989 | ||
948b1bb8 | 990 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
a072738e CG |
991 | struct perf_event *event = cpuc->events[idx]; |
992 | if (!test_bit(idx, cpuc->active_mask)) | |
993 | continue; | |
994 | p4_pmu_enable_event(event); | |
995 | } | |
996 | } | |
997 | ||
998 | static int p4_pmu_handle_irq(struct pt_regs *regs) | |
999 | { | |
1000 | struct perf_sample_data data; | |
1001 | struct cpu_hw_events *cpuc; | |
1002 | struct perf_event *event; | |
1003 | struct hw_perf_event *hwc; | |
1004 | int idx, handled = 0; | |
1005 | u64 val; | |
1006 | ||
a072738e CG |
1007 | cpuc = &__get_cpu_var(cpu_hw_events); |
1008 | ||
948b1bb8 | 1009 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1c250d70 | 1010 | int overflow; |
a072738e | 1011 | |
03e22198 CG |
1012 | if (!test_bit(idx, cpuc->active_mask)) { |
1013 | /* catch in-flight IRQs */ | |
1014 | if (__test_and_clear_bit(idx, cpuc->running)) | |
1015 | handled++; | |
a072738e | 1016 | continue; |
03e22198 | 1017 | } |
a072738e CG |
1018 | |
1019 | event = cpuc->events[idx]; | |
1020 | hwc = &event->hw; | |
1021 | ||
1022 | WARN_ON_ONCE(hwc->idx != idx); | |
1023 | ||
0db1a7bc | 1024 | /* it might be unflagged overflow */ |
1c250d70 | 1025 | overflow = p4_pmu_clear_cccr_ovf(hwc); |
a072738e CG |
1026 | |
1027 | val = x86_perf_event_update(event); | |
1c250d70 | 1028 | if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1)))) |
a072738e CG |
1029 | continue; |
1030 | ||
1c250d70 CG |
1031 | handled += overflow; |
1032 | ||
0db1a7bc | 1033 | /* event overflow for sure */ |
fd0d000b | 1034 | perf_sample_data_init(&data, 0, hwc->last_period); |
a072738e CG |
1035 | |
1036 | if (!x86_perf_event_set_period(event)) | |
1037 | continue; | |
fd0d000b RR |
1038 | |
1039 | ||
a8b0ca17 | 1040 | if (perf_event_overflow(event, &data, regs)) |
1ea5a6af | 1041 | x86_pmu_stop(event, 0); |
a072738e CG |
1042 | } |
1043 | ||
2bce5dac | 1044 | if (handled) |
a072738e | 1045 | inc_irq_stat(apic_perf_irqs); |
2bce5dac DZ |
1046 | |
1047 | /* | |
1048 | * When dealing with the unmasking of the LVTPC on P4 perf hw, it has | |
1049 | * been observed that the OVF bit flag has to be cleared first _before_ | |
1050 | * the LVTPC can be unmasked. | |
1051 | * | |
1052 | * The reason is the NMI line will continue to be asserted while the OVF | |
1053 | * bit is set. This causes a second NMI to generate if the LVTPC is | |
1054 | * unmasked before the OVF bit is cleared, leading to unknown NMI | |
1055 | * messages. | |
1056 | */ | |
1057 | apic_write(APIC_LVTPC, APIC_DM_NMI); | |
a072738e | 1058 | |
de725dec | 1059 | return handled; |
a072738e CG |
1060 | } |
1061 | ||
1062 | /* | |
1063 | * swap thread specific fields according to a thread | |
1064 | * we are going to run on | |
1065 | */ | |
1066 | static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) | |
1067 | { | |
1068 | u32 escr, cccr; | |
1069 | ||
1070 | /* | |
1071 | * we either lucky and continue on same cpu or no HT support | |
1072 | */ | |
1073 | if (!p4_should_swap_ts(hwc->config, cpu)) | |
1074 | return; | |
1075 | ||
1076 | /* | |
1077 | * the event is migrated from an another logical | |
1078 | * cpu, so we need to swap thread specific flags | |
1079 | */ | |
1080 | ||
1081 | escr = p4_config_unpack_escr(hwc->config); | |
1082 | cccr = p4_config_unpack_cccr(hwc->config); | |
1083 | ||
1084 | if (p4_ht_thread(cpu)) { | |
1085 | cccr &= ~P4_CCCR_OVF_PMI_T0; | |
1086 | cccr |= P4_CCCR_OVF_PMI_T1; | |
d814f301 CG |
1087 | if (escr & P4_ESCR_T0_OS) { |
1088 | escr &= ~P4_ESCR_T0_OS; | |
1089 | escr |= P4_ESCR_T1_OS; | |
a072738e | 1090 | } |
d814f301 CG |
1091 | if (escr & P4_ESCR_T0_USR) { |
1092 | escr &= ~P4_ESCR_T0_USR; | |
1093 | escr |= P4_ESCR_T1_USR; | |
a072738e CG |
1094 | } |
1095 | hwc->config = p4_config_pack_escr(escr); | |
1096 | hwc->config |= p4_config_pack_cccr(cccr); | |
1097 | hwc->config |= P4_CONFIG_HT; | |
1098 | } else { | |
1099 | cccr &= ~P4_CCCR_OVF_PMI_T1; | |
1100 | cccr |= P4_CCCR_OVF_PMI_T0; | |
d814f301 CG |
1101 | if (escr & P4_ESCR_T1_OS) { |
1102 | escr &= ~P4_ESCR_T1_OS; | |
1103 | escr |= P4_ESCR_T0_OS; | |
a072738e | 1104 | } |
d814f301 CG |
1105 | if (escr & P4_ESCR_T1_USR) { |
1106 | escr &= ~P4_ESCR_T1_USR; | |
1107 | escr |= P4_ESCR_T0_USR; | |
a072738e CG |
1108 | } |
1109 | hwc->config = p4_config_pack_escr(escr); | |
1110 | hwc->config |= p4_config_pack_cccr(cccr); | |
1111 | hwc->config &= ~P4_CONFIG_HT; | |
1112 | } | |
1113 | } | |
1114 | ||
72001990 CG |
1115 | /* |
1116 | * ESCR address hashing is tricky, ESCRs are not sequential | |
623aab89 | 1117 | * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and |
72001990 CG |
1118 | * the metric between any ESCRs is laid in range [0xa0,0xe1] |
1119 | * | |
1120 | * so we make ~70% filled hashtable | |
1121 | */ | |
1122 | ||
1123 | #define P4_ESCR_MSR_BASE 0x000003a0 | |
1124 | #define P4_ESCR_MSR_MAX 0x000003e1 | |
1125 | #define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1) | |
1126 | #define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE) | |
1127 | #define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr | |
1128 | ||
1129 | static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = { | |
1130 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0), | |
1131 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1), | |
1132 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0), | |
1133 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1), | |
1134 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0), | |
1135 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1), | |
1136 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0), | |
1137 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1), | |
1138 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2), | |
1139 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3), | |
1140 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4), | |
1141 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5), | |
1142 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0), | |
1143 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1), | |
1144 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0), | |
1145 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1), | |
1146 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0), | |
1147 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1), | |
1148 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0), | |
1149 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1), | |
1150 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0), | |
1151 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1), | |
1152 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0), | |
1153 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1), | |
1154 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0), | |
1155 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1), | |
1156 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0), | |
1157 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1), | |
1158 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0), | |
1159 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1), | |
1160 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0), | |
1161 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1), | |
1162 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0), | |
1163 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1), | |
1164 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0), | |
1165 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1), | |
1166 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0), | |
1167 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1), | |
1168 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0), | |
1169 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1), | |
1170 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0), | |
1171 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1), | |
1172 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0), | |
1173 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1), | |
1174 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0), | |
1175 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1), | |
a072738e CG |
1176 | }; |
1177 | ||
1178 | static int p4_get_escr_idx(unsigned int addr) | |
1179 | { | |
72001990 | 1180 | unsigned int idx = P4_ESCR_MSR_IDX(addr); |
a072738e | 1181 | |
623aab89 CG |
1182 | if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || |
1183 | !p4_escr_table[idx] || | |
1184 | p4_escr_table[idx] != addr)) { | |
72001990 CG |
1185 | WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); |
1186 | return -1; | |
a072738e CG |
1187 | } |
1188 | ||
72001990 | 1189 | return idx; |
a072738e CG |
1190 | } |
1191 | ||
d814f301 CG |
1192 | static int p4_next_cntr(int thread, unsigned long *used_mask, |
1193 | struct p4_event_bind *bind) | |
1194 | { | |
1ff3d7d7 | 1195 | int i, j; |
d814f301 CG |
1196 | |
1197 | for (i = 0; i < P4_CNTR_LIMIT; i++) { | |
1ff3d7d7 CG |
1198 | j = bind->cntr[thread][i]; |
1199 | if (j != -1 && !test_bit(j, used_mask)) | |
d814f301 CG |
1200 | return j; |
1201 | } | |
1202 | ||
1203 | return -1; | |
1204 | } | |
1205 | ||
a072738e CG |
1206 | static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) |
1207 | { | |
1208 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | |
72001990 | 1209 | unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; |
9d36dfcf | 1210 | int cpu = smp_processor_id(); |
d814f301 CG |
1211 | struct hw_perf_event *hwc; |
1212 | struct p4_event_bind *bind; | |
1213 | unsigned int i, thread, num; | |
1214 | int cntr_idx, escr_idx; | |
f9129870 CG |
1215 | u64 config_alias; |
1216 | int pass; | |
a072738e CG |
1217 | |
1218 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | |
72001990 | 1219 | bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); |
a072738e | 1220 | |
a072738e | 1221 | for (i = 0, num = n; i < n; i++, num--) { |
d814f301 | 1222 | |
a072738e | 1223 | hwc = &cpuc->event_list[i]->hw; |
a072738e | 1224 | thread = p4_ht_thread(cpu); |
f9129870 CG |
1225 | pass = 0; |
1226 | ||
1227 | again: | |
1228 | /* | |
f53173e4 CG |
1229 | * It's possible to hit a circular lock |
1230 | * between original and alternative events | |
1231 | * if both are scheduled already. | |
f9129870 CG |
1232 | */ |
1233 | if (pass > 2) | |
1234 | goto done; | |
1235 | ||
d814f301 CG |
1236 | bind = p4_config_get_bind(hwc->config); |
1237 | escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); | |
72001990 CG |
1238 | if (unlikely(escr_idx == -1)) |
1239 | goto done; | |
a072738e | 1240 | |
a072738e | 1241 | if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) { |
d814f301 | 1242 | cntr_idx = hwc->idx; |
a072738e CG |
1243 | if (assign) |
1244 | assign[i] = hwc->idx; | |
a072738e CG |
1245 | goto reserve; |
1246 | } | |
1247 | ||
d814f301 | 1248 | cntr_idx = p4_next_cntr(thread, used_mask, bind); |
f9129870 CG |
1249 | if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) { |
1250 | /* | |
f53173e4 | 1251 | * Check whether an event alias is still available. |
f9129870 CG |
1252 | */ |
1253 | config_alias = p4_get_alias_event(hwc->config); | |
1254 | if (!config_alias) | |
1255 | goto done; | |
1256 | hwc->config = config_alias; | |
1257 | pass++; | |
1258 | goto again; | |
1259 | } | |
a072738e | 1260 | |
a072738e | 1261 | p4_pmu_swap_config_ts(hwc, cpu); |
d814f301 CG |
1262 | if (assign) |
1263 | assign[i] = cntr_idx; | |
a072738e | 1264 | reserve: |
d814f301 | 1265 | set_bit(cntr_idx, used_mask); |
a072738e CG |
1266 | set_bit(escr_idx, escr_mask); |
1267 | } | |
1268 | ||
1269 | done: | |
aa2bc1ad | 1270 | return num ? -EINVAL : 0; |
a072738e CG |
1271 | } |
1272 | ||
7b8e6da4 PZ |
1273 | PMU_FORMAT_ATTR(cccr, "config:0-31" ); |
1274 | PMU_FORMAT_ATTR(escr, "config:32-62"); | |
1275 | PMU_FORMAT_ATTR(ht, "config:63" ); | |
1276 | ||
1277 | static struct attribute *intel_p4_formats_attr[] = { | |
1278 | &format_attr_cccr.attr, | |
1279 | &format_attr_escr.attr, | |
1280 | &format_attr_ht.attr, | |
1281 | NULL, | |
1282 | }; | |
1283 | ||
caaa8be3 | 1284 | static __initconst const struct x86_pmu p4_pmu = { |
a072738e CG |
1285 | .name = "Netburst P4/Xeon", |
1286 | .handle_irq = p4_pmu_handle_irq, | |
1287 | .disable_all = p4_pmu_disable_all, | |
1288 | .enable_all = p4_pmu_enable_all, | |
1289 | .enable = p4_pmu_enable_event, | |
1290 | .disable = p4_pmu_disable_event, | |
1291 | .eventsel = MSR_P4_BPU_CCCR0, | |
1292 | .perfctr = MSR_P4_BPU_PERFCTR0, | |
1293 | .event_map = p4_pmu_event_map, | |
d814f301 | 1294 | .max_events = ARRAY_SIZE(p4_general_events), |
a072738e CG |
1295 | .get_event_constraints = x86_get_event_constraints, |
1296 | /* | |
1297 | * IF HT disabled we may need to use all | |
1298 | * ARCH_P4_MAX_CCCR counters simulaneously | |
1299 | * though leave it restricted at moment assuming | |
1300 | * HT is on | |
1301 | */ | |
948b1bb8 | 1302 | .num_counters = ARCH_P4_MAX_CCCR, |
a072738e | 1303 | .apic = 1, |
047a3772 CG |
1304 | .cntval_bits = ARCH_P4_CNTRVAL_BITS, |
1305 | .cntval_mask = ARCH_P4_CNTRVAL_MASK, | |
1306 | .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1, | |
a072738e CG |
1307 | .hw_config = p4_hw_config, |
1308 | .schedule_events = p4_pmu_schedule_events, | |
68aa00ac CG |
1309 | /* |
1310 | * This handles erratum N15 in intel doc 249199-029, | |
1311 | * the counter may not be updated correctly on write | |
1312 | * so we need a second write operation to do the trick | |
1313 | * (the official workaround didn't work) | |
1314 | * | |
1315 | * the former idea is taken from OProfile code | |
1316 | */ | |
1317 | .perfctr_second_write = 1, | |
7b8e6da4 PZ |
1318 | |
1319 | .format_attrs = intel_p4_formats_attr, | |
a072738e CG |
1320 | }; |
1321 | ||
de0428a7 | 1322 | __init int p4_pmu_init(void) |
a072738e CG |
1323 | { |
1324 | unsigned int low, high; | |
1325 | ||
fa7b6947 | 1326 | /* If we get stripped -- indexing fails */ |
15c7ad51 | 1327 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC); |
a072738e CG |
1328 | |
1329 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); | |
1330 | if (!(low & (1 << 7))) { | |
1331 | pr_cont("unsupported Netburst CPU model %d ", | |
1332 | boot_cpu_data.x86_model); | |
1333 | return -ENODEV; | |
1334 | } | |
1335 | ||
cb7d6b50 | 1336 | memcpy(hw_cache_event_ids, p4_hw_cache_event_ids, |
d814f301 | 1337 | sizeof(hw_cache_event_ids)); |
cb7d6b50 | 1338 | |
a072738e CG |
1339 | pr_cont("Netburst events, "); |
1340 | ||
1341 | x86_pmu = p4_pmu; | |
1342 | ||
1343 | return 0; | |
1344 | } |