]>
Commit | Line | Data |
---|---|---|
f87027b9 JO |
1 | #include <stdio.h> |
2 | #include "evsel.h" | |
3 | #include "stat.h" | |
4 | #include "color.h" | |
fb4605ba | 5 | #include "pmu.h" |
37932c18 AK |
6 | #include "rblist.h" |
7 | #include "evlist.h" | |
8 | #include "expr.h" | |
f87027b9 JO |
9 | |
10 | enum { | |
11 | CTX_BIT_USER = 1 << 0, | |
12 | CTX_BIT_KERNEL = 1 << 1, | |
13 | CTX_BIT_HV = 1 << 2, | |
14 | CTX_BIT_HOST = 1 << 3, | |
15 | CTX_BIT_IDLE = 1 << 4, | |
16 | CTX_BIT_MAX = 1 << 5, | |
17 | }; | |
18 | ||
19 | #define NUM_CTX CTX_BIT_MAX | |
20 | ||
44d49a60 AK |
21 | /* |
22 | * AGGR_GLOBAL: Use CPU 0 | |
23 | * AGGR_SOCKET: Use first CPU of socket | |
24 | * AGGR_CORE: Use first CPU of core | |
25 | * AGGR_NONE: Use matching CPU | |
26 | * AGGR_THREAD: Not supported? | |
27 | */ | |
f87027b9 JO |
28 | static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; |
29 | static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; | |
30 | static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; | |
31 | static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS]; | |
32 | static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS]; | |
33 | static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS]; | |
34 | static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS]; | |
35 | static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS]; | |
36 | static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS]; | |
37 | static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; | |
38 | static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; | |
39 | static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; | |
40 | static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; | |
41 | static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; | |
239bd47f AK |
42 | static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS]; |
43 | static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; | |
44 | static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; | |
45 | static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; | |
46 | static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; | |
daefd0bc KL |
47 | static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS]; |
48 | static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS]; | |
37932c18 | 49 | static struct rblist runtime_saved_values; |
fb4605ba | 50 | static bool have_frontend_stalled; |
f87027b9 JO |
51 | |
52 | struct stats walltime_nsecs_stats; | |
53 | ||
37932c18 AK |
54 | struct saved_value { |
55 | struct rb_node rb_node; | |
56 | struct perf_evsel *evsel; | |
57 | int cpu; | |
58 | int ctx; | |
59 | struct stats stats; | |
60 | }; | |
61 | ||
62 | static int saved_value_cmp(struct rb_node *rb_node, const void *entry) | |
63 | { | |
64 | struct saved_value *a = container_of(rb_node, | |
65 | struct saved_value, | |
66 | rb_node); | |
67 | const struct saved_value *b = entry; | |
68 | ||
69 | if (a->ctx != b->ctx) | |
70 | return a->ctx - b->ctx; | |
71 | if (a->cpu != b->cpu) | |
72 | return a->cpu - b->cpu; | |
73 | return a->evsel - b->evsel; | |
74 | } | |
75 | ||
76 | static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, | |
77 | const void *entry) | |
78 | { | |
79 | struct saved_value *nd = malloc(sizeof(struct saved_value)); | |
80 | ||
81 | if (!nd) | |
82 | return NULL; | |
83 | memcpy(nd, entry, sizeof(struct saved_value)); | |
84 | return &nd->rb_node; | |
85 | } | |
86 | ||
87 | static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, | |
88 | int cpu, int ctx, | |
89 | bool create) | |
90 | { | |
91 | struct rb_node *nd; | |
92 | struct saved_value dm = { | |
93 | .cpu = cpu, | |
94 | .ctx = ctx, | |
95 | .evsel = evsel, | |
96 | }; | |
97 | nd = rblist__find(&runtime_saved_values, &dm); | |
98 | if (nd) | |
99 | return container_of(nd, struct saved_value, rb_node); | |
100 | if (create) { | |
101 | rblist__add_node(&runtime_saved_values, &dm); | |
102 | nd = rblist__find(&runtime_saved_values, &dm); | |
103 | if (nd) | |
104 | return container_of(nd, struct saved_value, rb_node); | |
105 | } | |
106 | return NULL; | |
107 | } | |
108 | ||
fb4605ba AK |
109 | void perf_stat__init_shadow_stats(void) |
110 | { | |
111 | have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); | |
37932c18 AK |
112 | rblist__init(&runtime_saved_values); |
113 | runtime_saved_values.node_cmp = saved_value_cmp; | |
114 | runtime_saved_values.node_new = saved_value_new; | |
115 | /* No delete for now */ | |
fb4605ba AK |
116 | } |
117 | ||
f87027b9 JO |
118 | static int evsel_context(struct perf_evsel *evsel) |
119 | { | |
120 | int ctx = 0; | |
121 | ||
122 | if (evsel->attr.exclude_kernel) | |
123 | ctx |= CTX_BIT_KERNEL; | |
124 | if (evsel->attr.exclude_user) | |
125 | ctx |= CTX_BIT_USER; | |
126 | if (evsel->attr.exclude_hv) | |
127 | ctx |= CTX_BIT_HV; | |
128 | if (evsel->attr.exclude_host) | |
129 | ctx |= CTX_BIT_HOST; | |
130 | if (evsel->attr.exclude_idle) | |
131 | ctx |= CTX_BIT_IDLE; | |
132 | ||
133 | return ctx; | |
134 | } | |
135 | ||
136 | void perf_stat__reset_shadow_stats(void) | |
137 | { | |
37932c18 AK |
138 | struct rb_node *pos, *next; |
139 | ||
f87027b9 JO |
140 | memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); |
141 | memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); | |
142 | memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); | |
143 | memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); | |
144 | memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); | |
145 | memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); | |
146 | memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); | |
147 | memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); | |
148 | memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); | |
149 | memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); | |
150 | memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); | |
151 | memset(runtime_cycles_in_tx_stats, 0, | |
152 | sizeof(runtime_cycles_in_tx_stats)); | |
153 | memset(runtime_transaction_stats, 0, | |
154 | sizeof(runtime_transaction_stats)); | |
155 | memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); | |
156 | memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); | |
239bd47f AK |
157 | memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots)); |
158 | memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired)); | |
159 | memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); | |
160 | memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); | |
161 | memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); | |
daefd0bc KL |
162 | memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats)); |
163 | memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats)); | |
37932c18 AK |
164 | |
165 | next = rb_first(&runtime_saved_values.entries); | |
166 | while (next) { | |
167 | pos = next; | |
168 | next = rb_next(pos); | |
169 | memset(&container_of(pos, struct saved_value, rb_node)->stats, | |
170 | 0, | |
171 | sizeof(struct stats)); | |
172 | } | |
f87027b9 JO |
173 | } |
174 | ||
175 | /* | |
176 | * Update various tracking values we maintain to print | |
177 | * more semantic information such as miss/hit ratios, | |
178 | * instruction rates, etc: | |
179 | */ | |
180 | void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, | |
181 | int cpu) | |
182 | { | |
183 | int ctx = evsel_context(counter); | |
184 | ||
daf4f478 NK |
185 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || |
186 | perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) | |
f87027b9 JO |
187 | update_stats(&runtime_nsecs_stats[cpu], count[0]); |
188 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | |
189 | update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); | |
190 | else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) | |
54976285 | 191 | update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]); |
f87027b9 JO |
192 | else if (perf_stat_evsel__is(counter, TRANSACTION_START)) |
193 | update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); | |
194 | else if (perf_stat_evsel__is(counter, ELISION_START)) | |
195 | update_stats(&runtime_elision_stats[ctx][cpu], count[0]); | |
239bd47f AK |
196 | else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) |
197 | update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]); | |
198 | else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) | |
199 | update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]); | |
200 | else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) | |
201 | update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]); | |
202 | else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) | |
203 | update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]); | |
204 | else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) | |
205 | update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]); | |
f87027b9 JO |
206 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) |
207 | update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); | |
208 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) | |
209 | update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]); | |
210 | else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | |
211 | update_stats(&runtime_branches_stats[ctx][cpu], count[0]); | |
212 | else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) | |
213 | update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]); | |
214 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) | |
215 | update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]); | |
216 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) | |
217 | update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); | |
218 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) | |
219 | update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); | |
220 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) | |
221 | update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); | |
222 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) | |
223 | update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); | |
daefd0bc KL |
224 | else if (perf_stat_evsel__is(counter, SMI_NUM)) |
225 | update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]); | |
226 | else if (perf_stat_evsel__is(counter, APERF)) | |
227 | update_stats(&runtime_aperf_stats[ctx][cpu], count[0]); | |
37932c18 AK |
228 | |
229 | if (counter->collect_stat) { | |
230 | struct saved_value *v = saved_value_lookup(counter, cpu, ctx, | |
231 | true); | |
232 | update_stats(&v->stats, count[0]); | |
233 | } | |
f87027b9 JO |
234 | } |
235 | ||
236 | /* used for get_ratio_color() */ | |
237 | enum grc_type { | |
238 | GRC_STALLED_CYCLES_FE, | |
239 | GRC_STALLED_CYCLES_BE, | |
240 | GRC_CACHE_MISSES, | |
241 | GRC_MAX_NR | |
242 | }; | |
243 | ||
244 | static const char *get_ratio_color(enum grc_type type, double ratio) | |
245 | { | |
246 | static const double grc_table[GRC_MAX_NR][3] = { | |
247 | [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, | |
248 | [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, | |
249 | [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, | |
250 | }; | |
251 | const char *color = PERF_COLOR_NORMAL; | |
252 | ||
253 | if (ratio > grc_table[type][0]) | |
254 | color = PERF_COLOR_RED; | |
255 | else if (ratio > grc_table[type][1]) | |
256 | color = PERF_COLOR_MAGENTA; | |
257 | else if (ratio > grc_table[type][2]) | |
258 | color = PERF_COLOR_YELLOW; | |
259 | ||
260 | return color; | |
261 | } | |
262 | ||
37932c18 AK |
263 | static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list, |
264 | const char *name) | |
265 | { | |
266 | struct perf_evsel *c2; | |
267 | ||
268 | evlist__for_each_entry (evsel_list, c2) { | |
269 | if (!strcasecmp(c2->name, name)) | |
270 | return c2; | |
271 | } | |
272 | return NULL; | |
273 | } | |
274 | ||
275 | /* Mark MetricExpr target events and link events using them to them. */ | |
276 | void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list) | |
277 | { | |
278 | struct perf_evsel *counter, *leader, **metric_events, *oc; | |
279 | bool found; | |
280 | const char **metric_names; | |
281 | int i; | |
282 | int num_metric_names; | |
283 | ||
284 | evlist__for_each_entry(evsel_list, counter) { | |
285 | bool invalid = false; | |
286 | ||
287 | leader = counter->leader; | |
288 | if (!counter->metric_expr) | |
289 | continue; | |
290 | metric_events = counter->metric_events; | |
291 | if (!metric_events) { | |
292 | if (expr__find_other(counter->metric_expr, counter->name, | |
293 | &metric_names, &num_metric_names) < 0) | |
294 | continue; | |
295 | ||
296 | metric_events = calloc(sizeof(struct perf_evsel *), | |
297 | num_metric_names + 1); | |
298 | if (!metric_events) | |
299 | return; | |
300 | counter->metric_events = metric_events; | |
301 | } | |
302 | ||
303 | for (i = 0; i < num_metric_names; i++) { | |
304 | found = false; | |
305 | if (leader) { | |
306 | /* Search in group */ | |
307 | for_each_group_member (oc, leader) { | |
308 | if (!strcasecmp(oc->name, metric_names[i])) { | |
309 | found = true; | |
310 | break; | |
311 | } | |
312 | } | |
313 | } | |
314 | if (!found) { | |
315 | /* Search ignoring groups */ | |
316 | oc = perf_stat__find_event(evsel_list, metric_names[i]); | |
317 | } | |
318 | if (!oc) { | |
319 | /* Deduping one is good enough to handle duplicated PMUs. */ | |
320 | static char *printed; | |
321 | ||
322 | /* | |
323 | * Adding events automatically would be difficult, because | |
324 | * it would risk creating groups that are not schedulable. | |
325 | * perf stat doesn't understand all the scheduling constraints | |
326 | * of events. So we ask the user instead to add the missing | |
327 | * events. | |
328 | */ | |
329 | if (!printed || strcasecmp(printed, metric_names[i])) { | |
330 | fprintf(stderr, | |
331 | "Add %s event to groups to get metric expression for %s\n", | |
332 | metric_names[i], | |
333 | counter->name); | |
334 | printed = strdup(metric_names[i]); | |
335 | } | |
336 | invalid = true; | |
337 | continue; | |
338 | } | |
339 | metric_events[i] = oc; | |
340 | oc->collect_stat = true; | |
341 | } | |
342 | metric_events[i] = NULL; | |
343 | free(metric_names); | |
344 | if (invalid) { | |
345 | free(metric_events); | |
346 | counter->metric_events = NULL; | |
347 | counter->metric_expr = NULL; | |
348 | } | |
349 | } | |
350 | } | |
351 | ||
140aeadc | 352 | static void print_stalled_cycles_frontend(int cpu, |
b8f8eb84 | 353 | struct perf_evsel *evsel, double avg, |
140aeadc | 354 | struct perf_stat_output_ctx *out) |
f87027b9 JO |
355 | { |
356 | double total, ratio = 0.0; | |
357 | const char *color; | |
358 | int ctx = evsel_context(evsel); | |
359 | ||
360 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | |
361 | ||
362 | if (total) | |
363 | ratio = avg / total * 100.0; | |
364 | ||
365 | color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); | |
366 | ||
140aeadc AK |
367 | if (ratio) |
368 | out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle", | |
369 | ratio); | |
370 | else | |
371 | out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0); | |
f87027b9 JO |
372 | } |
373 | ||
140aeadc | 374 | static void print_stalled_cycles_backend(int cpu, |
b8f8eb84 | 375 | struct perf_evsel *evsel, double avg, |
140aeadc | 376 | struct perf_stat_output_ctx *out) |
f87027b9 JO |
377 | { |
378 | double total, ratio = 0.0; | |
379 | const char *color; | |
380 | int ctx = evsel_context(evsel); | |
381 | ||
382 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | |
383 | ||
384 | if (total) | |
385 | ratio = avg / total * 100.0; | |
386 | ||
387 | color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); | |
388 | ||
b0404be8 | 389 | out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); |
f87027b9 JO |
390 | } |
391 | ||
140aeadc | 392 | static void print_branch_misses(int cpu, |
b8f8eb84 | 393 | struct perf_evsel *evsel, |
140aeadc AK |
394 | double avg, |
395 | struct perf_stat_output_ctx *out) | |
f87027b9 JO |
396 | { |
397 | double total, ratio = 0.0; | |
398 | const char *color; | |
399 | int ctx = evsel_context(evsel); | |
400 | ||
401 | total = avg_stats(&runtime_branches_stats[ctx][cpu]); | |
402 | ||
403 | if (total) | |
404 | ratio = avg / total * 100.0; | |
405 | ||
406 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | |
407 | ||
140aeadc | 408 | out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio); |
f87027b9 JO |
409 | } |
410 | ||
140aeadc | 411 | static void print_l1_dcache_misses(int cpu, |
b8f8eb84 | 412 | struct perf_evsel *evsel, |
140aeadc AK |
413 | double avg, |
414 | struct perf_stat_output_ctx *out) | |
f87027b9 JO |
415 | { |
416 | double total, ratio = 0.0; | |
417 | const char *color; | |
418 | int ctx = evsel_context(evsel); | |
419 | ||
420 | total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]); | |
421 | ||
422 | if (total) | |
423 | ratio = avg / total * 100.0; | |
424 | ||
425 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | |
426 | ||
140aeadc | 427 | out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); |
f87027b9 JO |
428 | } |
429 | ||
140aeadc | 430 | static void print_l1_icache_misses(int cpu, |
b8f8eb84 | 431 | struct perf_evsel *evsel, |
140aeadc AK |
432 | double avg, |
433 | struct perf_stat_output_ctx *out) | |
f87027b9 JO |
434 | { |
435 | double total, ratio = 0.0; | |
436 | const char *color; | |
437 | int ctx = evsel_context(evsel); | |
438 | ||
439 | total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]); | |
440 | ||
441 | if (total) | |
442 | ratio = avg / total * 100.0; | |
443 | ||
444 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | |
140aeadc | 445 | out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); |
f87027b9 JO |
446 | } |
447 | ||
140aeadc | 448 | static void print_dtlb_cache_misses(int cpu, |
b8f8eb84 | 449 | struct perf_evsel *evsel, |
140aeadc AK |
450 | double avg, |
451 | struct perf_stat_output_ctx *out) | |
f87027b9 JO |
452 | { |
453 | double total, ratio = 0.0; | |
454 | const char *color; | |
455 | int ctx = evsel_context(evsel); | |
456 | ||
457 | total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]); | |
458 | ||
459 | if (total) | |
460 | ratio = avg / total * 100.0; | |
461 | ||
462 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | |
140aeadc | 463 | out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); |
f87027b9 JO |
464 | } |
465 | ||
140aeadc | 466 | static void print_itlb_cache_misses(int cpu, |
b8f8eb84 | 467 | struct perf_evsel *evsel, |
140aeadc AK |
468 | double avg, |
469 | struct perf_stat_output_ctx *out) | |
f87027b9 JO |
470 | { |
471 | double total, ratio = 0.0; | |
472 | const char *color; | |
473 | int ctx = evsel_context(evsel); | |
474 | ||
475 | total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]); | |
476 | ||
477 | if (total) | |
478 | ratio = avg / total * 100.0; | |
479 | ||
480 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | |
140aeadc | 481 | out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); |
f87027b9 JO |
482 | } |
483 | ||
140aeadc | 484 | static void print_ll_cache_misses(int cpu, |
b8f8eb84 | 485 | struct perf_evsel *evsel, |
140aeadc AK |
486 | double avg, |
487 | struct perf_stat_output_ctx *out) | |
f87027b9 JO |
488 | { |
489 | double total, ratio = 0.0; | |
490 | const char *color; | |
491 | int ctx = evsel_context(evsel); | |
492 | ||
493 | total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]); | |
494 | ||
495 | if (total) | |
496 | ratio = avg / total * 100.0; | |
497 | ||
498 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | |
140aeadc | 499 | out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); |
f87027b9 JO |
500 | } |
501 | ||
239bd47f AK |
502 | /* |
503 | * High level "TopDown" CPU core pipe line bottleneck break down. | |
504 | * | |
505 | * Basic concept following | |
506 | * Yasin, A Top Down Method for Performance analysis and Counter architecture | |
507 | * ISPASS14 | |
508 | * | |
509 | * The CPU pipeline is divided into 4 areas that can be bottlenecks: | |
510 | * | |
511 | * Frontend -> Backend -> Retiring | |
512 | * BadSpeculation in addition means out of order execution that is thrown away | |
513 | * (for example branch mispredictions) | |
514 | * Frontend is instruction decoding. | |
515 | * Backend is execution, like computation and accessing data in memory | |
516 | * Retiring is good execution that is not directly bottlenecked | |
517 | * | |
518 | * The formulas are computed in slots. | |
519 | * A slot is an entry in the pipeline each for the pipeline width | |
520 | * (for example a 4-wide pipeline has 4 slots for each cycle) | |
521 | * | |
522 | * Formulas: | |
523 | * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) / | |
524 | * TotalSlots | |
525 | * Retiring = SlotsRetired / TotalSlots | |
526 | * FrontendBound = FetchBubbles / TotalSlots | |
527 | * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound | |
528 | * | |
529 | * The kernel provides the mapping to the low level CPU events and any scaling | |
530 | * needed for the CPU pipeline width, for example: | |
531 | * | |
532 | * TotalSlots = Cycles * 4 | |
533 | * | |
534 | * The scaling factor is communicated in the sysfs unit. | |
535 | * | |
536 | * In some cases the CPU may not be able to measure all the formulas due to | |
537 | * missing events. In this case multiple formulas are combined, as possible. | |
538 | * | |
539 | * Full TopDown supports more levels to sub-divide each area: for example | |
540 | * BackendBound into computing bound and memory bound. For now we only | |
541 | * support Level 1 TopDown. | |
542 | */ | |
543 | ||
544 | static double sanitize_val(double x) | |
545 | { | |
546 | if (x < 0 && x >= -0.02) | |
547 | return 0.0; | |
548 | return x; | |
549 | } | |
550 | ||
551 | static double td_total_slots(int ctx, int cpu) | |
552 | { | |
553 | return avg_stats(&runtime_topdown_total_slots[ctx][cpu]); | |
554 | } | |
555 | ||
556 | static double td_bad_spec(int ctx, int cpu) | |
557 | { | |
558 | double bad_spec = 0; | |
559 | double total_slots; | |
560 | double total; | |
561 | ||
562 | total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) - | |
563 | avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) + | |
564 | avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]); | |
565 | total_slots = td_total_slots(ctx, cpu); | |
566 | if (total_slots) | |
567 | bad_spec = total / total_slots; | |
568 | return sanitize_val(bad_spec); | |
569 | } | |
570 | ||
571 | static double td_retiring(int ctx, int cpu) | |
572 | { | |
573 | double retiring = 0; | |
574 | double total_slots = td_total_slots(ctx, cpu); | |
575 | double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]); | |
576 | ||
577 | if (total_slots) | |
578 | retiring = ret_slots / total_slots; | |
579 | return retiring; | |
580 | } | |
581 | ||
582 | static double td_fe_bound(int ctx, int cpu) | |
583 | { | |
584 | double fe_bound = 0; | |
585 | double total_slots = td_total_slots(ctx, cpu); | |
586 | double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]); | |
587 | ||
588 | if (total_slots) | |
589 | fe_bound = fetch_bub / total_slots; | |
590 | return fe_bound; | |
591 | } | |
592 | ||
593 | static double td_be_bound(int ctx, int cpu) | |
594 | { | |
595 | double sum = (td_fe_bound(ctx, cpu) + | |
596 | td_bad_spec(ctx, cpu) + | |
597 | td_retiring(ctx, cpu)); | |
598 | if (sum == 0) | |
599 | return 0; | |
600 | return sanitize_val(1.0 - sum); | |
601 | } | |
602 | ||
daefd0bc KL |
603 | static void print_smi_cost(int cpu, struct perf_evsel *evsel, |
604 | struct perf_stat_output_ctx *out) | |
605 | { | |
606 | double smi_num, aperf, cycles, cost = 0.0; | |
607 | int ctx = evsel_context(evsel); | |
608 | const char *color = NULL; | |
609 | ||
610 | smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]); | |
611 | aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]); | |
612 | cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]); | |
613 | ||
614 | if ((cycles == 0) || (aperf == 0)) | |
615 | return; | |
616 | ||
617 | if (smi_num) | |
618 | cost = (aperf - cycles) / aperf * 100.00; | |
619 | ||
620 | if (cost > 10) | |
621 | color = PERF_COLOR_RED; | |
622 | out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost); | |
623 | out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num); | |
624 | } | |
625 | ||
140aeadc AK |
626 | void perf_stat__print_shadow_stats(struct perf_evsel *evsel, |
627 | double avg, int cpu, | |
628 | struct perf_stat_output_ctx *out) | |
f87027b9 | 629 | { |
140aeadc AK |
630 | void *ctxp = out->ctx; |
631 | print_metric_t print_metric = out->print_metric; | |
f87027b9 | 632 | double total, ratio = 0.0, total2; |
239bd47f | 633 | const char *color = NULL; |
f87027b9 JO |
634 | int ctx = evsel_context(evsel); |
635 | ||
636 | if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { | |
637 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | |
638 | if (total) { | |
639 | ratio = avg / total; | |
140aeadc AK |
640 | print_metric(ctxp, NULL, "%7.2f ", |
641 | "insn per cycle", ratio); | |
f87027b9 | 642 | } else { |
140aeadc | 643 | print_metric(ctxp, NULL, NULL, "insn per cycle", 0); |
f87027b9 JO |
644 | } |
645 | total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); | |
646 | total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); | |
647 | ||
648 | if (total && avg) { | |
92a61f64 | 649 | out->new_line(ctxp); |
f87027b9 | 650 | ratio = total / avg; |
140aeadc AK |
651 | print_metric(ctxp, NULL, "%7.2f ", |
652 | "stalled cycles per insn", | |
653 | ratio); | |
fb4605ba | 654 | } else if (have_frontend_stalled) { |
140aeadc AK |
655 | print_metric(ctxp, NULL, NULL, |
656 | "stalled cycles per insn", 0); | |
f87027b9 | 657 | } |
140aeadc AK |
658 | } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { |
659 | if (runtime_branches_stats[ctx][cpu].n != 0) | |
660 | print_branch_misses(cpu, evsel, avg, out); | |
661 | else | |
662 | print_metric(ctxp, NULL, NULL, "of all branches", 0); | |
f87027b9 JO |
663 | } else if ( |
664 | evsel->attr.type == PERF_TYPE_HW_CACHE && | |
665 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | | |
666 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | |
140aeadc AK |
667 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
668 | if (runtime_l1_dcache_stats[ctx][cpu].n != 0) | |
669 | print_l1_dcache_misses(cpu, evsel, avg, out); | |
670 | else | |
671 | print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); | |
f87027b9 JO |
672 | } else if ( |
673 | evsel->attr.type == PERF_TYPE_HW_CACHE && | |
674 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | | |
675 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | |
140aeadc AK |
676 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
677 | if (runtime_l1_icache_stats[ctx][cpu].n != 0) | |
678 | print_l1_icache_misses(cpu, evsel, avg, out); | |
679 | else | |
680 | print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); | |
f87027b9 JO |
681 | } else if ( |
682 | evsel->attr.type == PERF_TYPE_HW_CACHE && | |
683 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | | |
684 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | |
140aeadc AK |
685 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
686 | if (runtime_dtlb_cache_stats[ctx][cpu].n != 0) | |
687 | print_dtlb_cache_misses(cpu, evsel, avg, out); | |
688 | else | |
689 | print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); | |
f87027b9 JO |
690 | } else if ( |
691 | evsel->attr.type == PERF_TYPE_HW_CACHE && | |
692 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | | |
693 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | |
140aeadc AK |
694 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
695 | if (runtime_itlb_cache_stats[ctx][cpu].n != 0) | |
696 | print_itlb_cache_misses(cpu, evsel, avg, out); | |
697 | else | |
698 | print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); | |
f87027b9 JO |
699 | } else if ( |
700 | evsel->attr.type == PERF_TYPE_HW_CACHE && | |
701 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | | |
702 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | |
140aeadc AK |
703 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
704 | if (runtime_ll_cache_stats[ctx][cpu].n != 0) | |
705 | print_ll_cache_misses(cpu, evsel, avg, out); | |
706 | else | |
707 | print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); | |
708 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { | |
f87027b9 JO |
709 | total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); |
710 | ||
711 | if (total) | |
712 | ratio = avg * 100 / total; | |
713 | ||
140aeadc AK |
714 | if (runtime_cacherefs_stats[ctx][cpu].n != 0) |
715 | print_metric(ctxp, NULL, "%8.3f %%", | |
716 | "of all cache refs", ratio); | |
717 | else | |
718 | print_metric(ctxp, NULL, NULL, "of all cache refs", 0); | |
f87027b9 | 719 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { |
140aeadc | 720 | print_stalled_cycles_frontend(cpu, evsel, avg, out); |
f87027b9 | 721 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { |
140aeadc | 722 | print_stalled_cycles_backend(cpu, evsel, avg, out); |
f87027b9 JO |
723 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { |
724 | total = avg_stats(&runtime_nsecs_stats[cpu]); | |
725 | ||
726 | if (total) { | |
727 | ratio = avg / total; | |
140aeadc | 728 | print_metric(ctxp, NULL, "%8.3f", "GHz", ratio); |
f87027b9 | 729 | } else { |
140aeadc | 730 | print_metric(ctxp, NULL, NULL, "Ghz", 0); |
f87027b9 JO |
731 | } |
732 | } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { | |
733 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | |
734 | if (total) | |
140aeadc AK |
735 | print_metric(ctxp, NULL, |
736 | "%7.2f%%", "transactional cycles", | |
737 | 100.0 * (avg / total)); | |
738 | else | |
739 | print_metric(ctxp, NULL, NULL, "transactional cycles", | |
740 | 0); | |
f87027b9 JO |
741 | } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { |
742 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | |
743 | total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | |
744 | if (total2 < avg) | |
745 | total2 = avg; | |
746 | if (total) | |
140aeadc | 747 | print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles", |
f87027b9 | 748 | 100.0 * ((total2-avg) / total)); |
140aeadc AK |
749 | else |
750 | print_metric(ctxp, NULL, NULL, "aborted cycles", 0); | |
751 | } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { | |
f87027b9 JO |
752 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); |
753 | ||
54976285 | 754 | if (avg) |
f87027b9 JO |
755 | ratio = total / avg; |
756 | ||
140aeadc AK |
757 | if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0) |
758 | print_metric(ctxp, NULL, "%8.0f", | |
759 | "cycles / transaction", ratio); | |
760 | else | |
761 | print_metric(ctxp, NULL, NULL, "cycles / transaction", | |
762 | 0); | |
763 | } else if (perf_stat_evsel__is(evsel, ELISION_START)) { | |
f87027b9 JO |
764 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); |
765 | ||
54976285 | 766 | if (avg) |
f87027b9 JO |
767 | ratio = total / avg; |
768 | ||
140aeadc | 769 | print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio); |
daf4f478 NK |
770 | } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK) || |
771 | perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK)) { | |
4579ecc8 | 772 | if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) |
140aeadc AK |
773 | print_metric(ctxp, NULL, "%8.3f", "CPUs utilized", |
774 | avg / ratio); | |
4579ecc8 | 775 | else |
140aeadc | 776 | print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); |
239bd47f AK |
777 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { |
778 | double fe_bound = td_fe_bound(ctx, cpu); | |
779 | ||
780 | if (fe_bound > 0.2) | |
781 | color = PERF_COLOR_RED; | |
782 | print_metric(ctxp, color, "%8.1f%%", "frontend bound", | |
783 | fe_bound * 100.); | |
784 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { | |
785 | double retiring = td_retiring(ctx, cpu); | |
786 | ||
787 | if (retiring > 0.7) | |
788 | color = PERF_COLOR_GREEN; | |
789 | print_metric(ctxp, color, "%8.1f%%", "retiring", | |
790 | retiring * 100.); | |
791 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { | |
792 | double bad_spec = td_bad_spec(ctx, cpu); | |
793 | ||
794 | if (bad_spec > 0.1) | |
795 | color = PERF_COLOR_RED; | |
796 | print_metric(ctxp, color, "%8.1f%%", "bad speculation", | |
797 | bad_spec * 100.); | |
798 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { | |
799 | double be_bound = td_be_bound(ctx, cpu); | |
800 | const char *name = "backend bound"; | |
801 | static int have_recovery_bubbles = -1; | |
802 | ||
803 | /* In case the CPU does not support topdown-recovery-bubbles */ | |
804 | if (have_recovery_bubbles < 0) | |
805 | have_recovery_bubbles = pmu_have_event("cpu", | |
806 | "topdown-recovery-bubbles"); | |
807 | if (!have_recovery_bubbles) | |
808 | name = "backend bound/bad spec"; | |
809 | ||
810 | if (be_bound > 0.2) | |
811 | color = PERF_COLOR_RED; | |
812 | if (td_total_slots(ctx, cpu) > 0) | |
813 | print_metric(ctxp, color, "%8.1f%%", name, | |
814 | be_bound * 100.); | |
815 | else | |
816 | print_metric(ctxp, NULL, NULL, name, 0); | |
37932c18 AK |
817 | } else if (evsel->metric_expr) { |
818 | struct parse_ctx pctx; | |
819 | int i; | |
820 | ||
821 | expr__ctx_init(&pctx); | |
822 | expr__add_id(&pctx, evsel->name, avg); | |
823 | for (i = 0; evsel->metric_events[i]; i++) { | |
824 | struct saved_value *v; | |
825 | ||
826 | v = saved_value_lookup(evsel->metric_events[i], cpu, ctx, false); | |
827 | if (!v) | |
828 | break; | |
829 | expr__add_id(&pctx, evsel->metric_events[i]->name, | |
830 | avg_stats(&v->stats)); | |
831 | } | |
832 | if (!evsel->metric_events[i]) { | |
833 | const char *p = evsel->metric_expr; | |
834 | ||
835 | if (expr__parse(&ratio, &pctx, &p) == 0) | |
836 | print_metric(ctxp, NULL, "%8.1f", | |
96284814 AK |
837 | evsel->metric_name ? |
838 | evsel->metric_name : | |
839 | out->force_header ? evsel->name : "", | |
37932c18 AK |
840 | ratio); |
841 | else | |
842 | print_metric(ctxp, NULL, NULL, "", 0); | |
843 | } else | |
844 | print_metric(ctxp, NULL, NULL, "", 0); | |
f87027b9 JO |
845 | } else if (runtime_nsecs_stats[cpu].n != 0) { |
846 | char unit = 'M'; | |
140aeadc | 847 | char unit_buf[10]; |
f87027b9 JO |
848 | |
849 | total = avg_stats(&runtime_nsecs_stats[cpu]); | |
850 | ||
851 | if (total) | |
852 | ratio = 1000.0 * avg / total; | |
853 | if (ratio < 0.001) { | |
854 | ratio *= 1000; | |
855 | unit = 'K'; | |
856 | } | |
140aeadc AK |
857 | snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); |
858 | print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); | |
daefd0bc KL |
859 | } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { |
860 | print_smi_cost(cpu, evsel, out); | |
f87027b9 | 861 | } else { |
140aeadc | 862 | print_metric(ctxp, NULL, NULL, NULL, 0); |
f87027b9 JO |
863 | } |
864 | } |