11 CTX_BIT_USER
= 1 << 0,
12 CTX_BIT_KERNEL
= 1 << 1,
14 CTX_BIT_HOST
= 1 << 3,
15 CTX_BIT_IDLE
= 1 << 4,
19 #define NUM_CTX CTX_BIT_MAX
22 * AGGR_GLOBAL: Use CPU 0
23 * AGGR_SOCKET: Use first CPU of socket
24 * AGGR_CORE: Use first CPU of core
25 * AGGR_NONE: Use matching CPU
26 * AGGR_THREAD: Not supported?
28 static struct stats runtime_nsecs_stats
[MAX_NR_CPUS
];
29 static struct stats runtime_cycles_stats
[NUM_CTX
][MAX_NR_CPUS
];
30 static struct stats runtime_stalled_cycles_front_stats
[NUM_CTX
][MAX_NR_CPUS
];
31 static struct stats runtime_stalled_cycles_back_stats
[NUM_CTX
][MAX_NR_CPUS
];
32 static struct stats runtime_branches_stats
[NUM_CTX
][MAX_NR_CPUS
];
33 static struct stats runtime_cacherefs_stats
[NUM_CTX
][MAX_NR_CPUS
];
34 static struct stats runtime_l1_dcache_stats
[NUM_CTX
][MAX_NR_CPUS
];
35 static struct stats runtime_l1_icache_stats
[NUM_CTX
][MAX_NR_CPUS
];
36 static struct stats runtime_ll_cache_stats
[NUM_CTX
][MAX_NR_CPUS
];
37 static struct stats runtime_itlb_cache_stats
[NUM_CTX
][MAX_NR_CPUS
];
38 static struct stats runtime_dtlb_cache_stats
[NUM_CTX
][MAX_NR_CPUS
];
39 static struct stats runtime_cycles_in_tx_stats
[NUM_CTX
][MAX_NR_CPUS
];
40 static struct stats runtime_transaction_stats
[NUM_CTX
][MAX_NR_CPUS
];
41 static struct stats runtime_elision_stats
[NUM_CTX
][MAX_NR_CPUS
];
42 static struct stats runtime_topdown_total_slots
[NUM_CTX
][MAX_NR_CPUS
];
43 static struct stats runtime_topdown_slots_issued
[NUM_CTX
][MAX_NR_CPUS
];
44 static struct stats runtime_topdown_slots_retired
[NUM_CTX
][MAX_NR_CPUS
];
45 static struct stats runtime_topdown_fetch_bubbles
[NUM_CTX
][MAX_NR_CPUS
];
46 static struct stats runtime_topdown_recovery_bubbles
[NUM_CTX
][MAX_NR_CPUS
];
47 static struct stats runtime_smi_num_stats
[NUM_CTX
][MAX_NR_CPUS
];
48 static struct stats runtime_aperf_stats
[NUM_CTX
][MAX_NR_CPUS
];
49 static struct rblist runtime_saved_values
;
50 static bool have_frontend_stalled
;
52 struct stats walltime_nsecs_stats
;
55 struct rb_node rb_node
;
56 struct perf_evsel
*evsel
;
62 static int saved_value_cmp(struct rb_node
*rb_node
, const void *entry
)
64 struct saved_value
*a
= container_of(rb_node
,
67 const struct saved_value
*b
= entry
;
70 return a
->ctx
- b
->ctx
;
72 return a
->cpu
- b
->cpu
;
73 if (a
->evsel
== b
->evsel
)
75 if ((char *)a
->evsel
< (char *)b
->evsel
)
80 static struct rb_node
*saved_value_new(struct rblist
*rblist __maybe_unused
,
83 struct saved_value
*nd
= malloc(sizeof(struct saved_value
));
87 memcpy(nd
, entry
, sizeof(struct saved_value
));
91 static struct saved_value
*saved_value_lookup(struct perf_evsel
*evsel
,
96 struct saved_value dm
= {
101 nd
= rblist__find(&runtime_saved_values
, &dm
);
103 return container_of(nd
, struct saved_value
, rb_node
);
105 rblist__add_node(&runtime_saved_values
, &dm
);
106 nd
= rblist__find(&runtime_saved_values
, &dm
);
108 return container_of(nd
, struct saved_value
, rb_node
);
113 void perf_stat__init_shadow_stats(void)
115 have_frontend_stalled
= pmu_have_event("cpu", "stalled-cycles-frontend");
116 rblist__init(&runtime_saved_values
);
117 runtime_saved_values
.node_cmp
= saved_value_cmp
;
118 runtime_saved_values
.node_new
= saved_value_new
;
119 /* No delete for now */
122 static int evsel_context(struct perf_evsel
*evsel
)
126 if (evsel
->attr
.exclude_kernel
)
127 ctx
|= CTX_BIT_KERNEL
;
128 if (evsel
->attr
.exclude_user
)
130 if (evsel
->attr
.exclude_hv
)
132 if (evsel
->attr
.exclude_host
)
134 if (evsel
->attr
.exclude_idle
)
140 void perf_stat__reset_shadow_stats(void)
142 struct rb_node
*pos
, *next
;
144 memset(runtime_nsecs_stats
, 0, sizeof(runtime_nsecs_stats
));
145 memset(runtime_cycles_stats
, 0, sizeof(runtime_cycles_stats
));
146 memset(runtime_stalled_cycles_front_stats
, 0, sizeof(runtime_stalled_cycles_front_stats
));
147 memset(runtime_stalled_cycles_back_stats
, 0, sizeof(runtime_stalled_cycles_back_stats
));
148 memset(runtime_branches_stats
, 0, sizeof(runtime_branches_stats
));
149 memset(runtime_cacherefs_stats
, 0, sizeof(runtime_cacherefs_stats
));
150 memset(runtime_l1_dcache_stats
, 0, sizeof(runtime_l1_dcache_stats
));
151 memset(runtime_l1_icache_stats
, 0, sizeof(runtime_l1_icache_stats
));
152 memset(runtime_ll_cache_stats
, 0, sizeof(runtime_ll_cache_stats
));
153 memset(runtime_itlb_cache_stats
, 0, sizeof(runtime_itlb_cache_stats
));
154 memset(runtime_dtlb_cache_stats
, 0, sizeof(runtime_dtlb_cache_stats
));
155 memset(runtime_cycles_in_tx_stats
, 0,
156 sizeof(runtime_cycles_in_tx_stats
));
157 memset(runtime_transaction_stats
, 0,
158 sizeof(runtime_transaction_stats
));
159 memset(runtime_elision_stats
, 0, sizeof(runtime_elision_stats
));
160 memset(&walltime_nsecs_stats
, 0, sizeof(walltime_nsecs_stats
));
161 memset(runtime_topdown_total_slots
, 0, sizeof(runtime_topdown_total_slots
));
162 memset(runtime_topdown_slots_retired
, 0, sizeof(runtime_topdown_slots_retired
));
163 memset(runtime_topdown_slots_issued
, 0, sizeof(runtime_topdown_slots_issued
));
164 memset(runtime_topdown_fetch_bubbles
, 0, sizeof(runtime_topdown_fetch_bubbles
));
165 memset(runtime_topdown_recovery_bubbles
, 0, sizeof(runtime_topdown_recovery_bubbles
));
166 memset(runtime_smi_num_stats
, 0, sizeof(runtime_smi_num_stats
));
167 memset(runtime_aperf_stats
, 0, sizeof(runtime_aperf_stats
));
169 next
= rb_first(&runtime_saved_values
.entries
);
173 memset(&container_of(pos
, struct saved_value
, rb_node
)->stats
,
175 sizeof(struct stats
));
180 * Update various tracking values we maintain to print
181 * more semantic information such as miss/hit ratios,
182 * instruction rates, etc:
184 void perf_stat__update_shadow_stats(struct perf_evsel
*counter
, u64
*count
,
187 int ctx
= evsel_context(counter
);
189 if (perf_evsel__match(counter
, SOFTWARE
, SW_TASK_CLOCK
) ||
190 perf_evsel__match(counter
, SOFTWARE
, SW_CPU_CLOCK
))
191 update_stats(&runtime_nsecs_stats
[cpu
], count
[0]);
192 else if (perf_evsel__match(counter
, HARDWARE
, HW_CPU_CYCLES
))
193 update_stats(&runtime_cycles_stats
[ctx
][cpu
], count
[0]);
194 else if (perf_stat_evsel__is(counter
, CYCLES_IN_TX
))
195 update_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
], count
[0]);
196 else if (perf_stat_evsel__is(counter
, TRANSACTION_START
))
197 update_stats(&runtime_transaction_stats
[ctx
][cpu
], count
[0]);
198 else if (perf_stat_evsel__is(counter
, ELISION_START
))
199 update_stats(&runtime_elision_stats
[ctx
][cpu
], count
[0]);
200 else if (perf_stat_evsel__is(counter
, TOPDOWN_TOTAL_SLOTS
))
201 update_stats(&runtime_topdown_total_slots
[ctx
][cpu
], count
[0]);
202 else if (perf_stat_evsel__is(counter
, TOPDOWN_SLOTS_ISSUED
))
203 update_stats(&runtime_topdown_slots_issued
[ctx
][cpu
], count
[0]);
204 else if (perf_stat_evsel__is(counter
, TOPDOWN_SLOTS_RETIRED
))
205 update_stats(&runtime_topdown_slots_retired
[ctx
][cpu
], count
[0]);
206 else if (perf_stat_evsel__is(counter
, TOPDOWN_FETCH_BUBBLES
))
207 update_stats(&runtime_topdown_fetch_bubbles
[ctx
][cpu
],count
[0]);
208 else if (perf_stat_evsel__is(counter
, TOPDOWN_RECOVERY_BUBBLES
))
209 update_stats(&runtime_topdown_recovery_bubbles
[ctx
][cpu
], count
[0]);
210 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
))
211 update_stats(&runtime_stalled_cycles_front_stats
[ctx
][cpu
], count
[0]);
212 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
))
213 update_stats(&runtime_stalled_cycles_back_stats
[ctx
][cpu
], count
[0]);
214 else if (perf_evsel__match(counter
, HARDWARE
, HW_BRANCH_INSTRUCTIONS
))
215 update_stats(&runtime_branches_stats
[ctx
][cpu
], count
[0]);
216 else if (perf_evsel__match(counter
, HARDWARE
, HW_CACHE_REFERENCES
))
217 update_stats(&runtime_cacherefs_stats
[ctx
][cpu
], count
[0]);
218 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1D
))
219 update_stats(&runtime_l1_dcache_stats
[ctx
][cpu
], count
[0]);
220 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1I
))
221 update_stats(&runtime_ll_cache_stats
[ctx
][cpu
], count
[0]);
222 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_LL
))
223 update_stats(&runtime_ll_cache_stats
[ctx
][cpu
], count
[0]);
224 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_DTLB
))
225 update_stats(&runtime_dtlb_cache_stats
[ctx
][cpu
], count
[0]);
226 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_ITLB
))
227 update_stats(&runtime_itlb_cache_stats
[ctx
][cpu
], count
[0]);
228 else if (perf_stat_evsel__is(counter
, SMI_NUM
))
229 update_stats(&runtime_smi_num_stats
[ctx
][cpu
], count
[0]);
230 else if (perf_stat_evsel__is(counter
, APERF
))
231 update_stats(&runtime_aperf_stats
[ctx
][cpu
], count
[0]);
233 if (counter
->collect_stat
) {
234 struct saved_value
*v
= saved_value_lookup(counter
, cpu
, ctx
,
236 update_stats(&v
->stats
, count
[0]);
240 /* used for get_ratio_color() */
242 GRC_STALLED_CYCLES_FE
,
243 GRC_STALLED_CYCLES_BE
,
248 static const char *get_ratio_color(enum grc_type type
, double ratio
)
250 static const double grc_table
[GRC_MAX_NR
][3] = {
251 [GRC_STALLED_CYCLES_FE
] = { 50.0, 30.0, 10.0 },
252 [GRC_STALLED_CYCLES_BE
] = { 75.0, 50.0, 20.0 },
253 [GRC_CACHE_MISSES
] = { 20.0, 10.0, 5.0 },
255 const char *color
= PERF_COLOR_NORMAL
;
257 if (ratio
> grc_table
[type
][0])
258 color
= PERF_COLOR_RED
;
259 else if (ratio
> grc_table
[type
][1])
260 color
= PERF_COLOR_MAGENTA
;
261 else if (ratio
> grc_table
[type
][2])
262 color
= PERF_COLOR_YELLOW
;
267 static struct perf_evsel
*perf_stat__find_event(struct perf_evlist
*evsel_list
,
270 struct perf_evsel
*c2
;
272 evlist__for_each_entry (evsel_list
, c2
) {
273 if (!strcasecmp(c2
->name
, name
))
279 /* Mark MetricExpr target events and link events using them to them. */
280 void perf_stat__collect_metric_expr(struct perf_evlist
*evsel_list
)
282 struct perf_evsel
*counter
, *leader
, **metric_events
, *oc
;
284 const char **metric_names
;
286 int num_metric_names
;
288 evlist__for_each_entry(evsel_list
, counter
) {
289 bool invalid
= false;
291 leader
= counter
->leader
;
292 if (!counter
->metric_expr
)
294 metric_events
= counter
->metric_events
;
295 if (!metric_events
) {
296 if (expr__find_other(counter
->metric_expr
, counter
->name
,
297 &metric_names
, &num_metric_names
) < 0)
300 metric_events
= calloc(sizeof(struct perf_evsel
*),
301 num_metric_names
+ 1);
304 counter
->metric_events
= metric_events
;
307 for (i
= 0; i
< num_metric_names
; i
++) {
310 /* Search in group */
311 for_each_group_member (oc
, leader
) {
312 if (!strcasecmp(oc
->name
, metric_names
[i
])) {
319 /* Search ignoring groups */
320 oc
= perf_stat__find_event(evsel_list
, metric_names
[i
]);
323 /* Deduping one is good enough to handle duplicated PMUs. */
324 static char *printed
;
327 * Adding events automatically would be difficult, because
328 * it would risk creating groups that are not schedulable.
329 * perf stat doesn't understand all the scheduling constraints
330 * of events. So we ask the user instead to add the missing
333 if (!printed
|| strcasecmp(printed
, metric_names
[i
])) {
335 "Add %s event to groups to get metric expression for %s\n",
338 printed
= strdup(metric_names
[i
]);
343 metric_events
[i
] = oc
;
344 oc
->collect_stat
= true;
346 metric_events
[i
] = NULL
;
350 counter
->metric_events
= NULL
;
351 counter
->metric_expr
= NULL
;
356 static void print_stalled_cycles_frontend(int cpu
,
357 struct perf_evsel
*evsel
, double avg
,
358 struct perf_stat_output_ctx
*out
)
360 double total
, ratio
= 0.0;
362 int ctx
= evsel_context(evsel
);
364 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
367 ratio
= avg
/ total
* 100.0;
369 color
= get_ratio_color(GRC_STALLED_CYCLES_FE
, ratio
);
372 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "frontend cycles idle",
375 out
->print_metric(out
->ctx
, NULL
, NULL
, "frontend cycles idle", 0);
378 static void print_stalled_cycles_backend(int cpu
,
379 struct perf_evsel
*evsel
, double avg
,
380 struct perf_stat_output_ctx
*out
)
382 double total
, ratio
= 0.0;
384 int ctx
= evsel_context(evsel
);
386 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
389 ratio
= avg
/ total
* 100.0;
391 color
= get_ratio_color(GRC_STALLED_CYCLES_BE
, ratio
);
393 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "backend cycles idle", ratio
);
396 static void print_branch_misses(int cpu
,
397 struct perf_evsel
*evsel
,
399 struct perf_stat_output_ctx
*out
)
401 double total
, ratio
= 0.0;
403 int ctx
= evsel_context(evsel
);
405 total
= avg_stats(&runtime_branches_stats
[ctx
][cpu
]);
408 ratio
= avg
/ total
* 100.0;
410 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
412 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all branches", ratio
);
415 static void print_l1_dcache_misses(int cpu
,
416 struct perf_evsel
*evsel
,
418 struct perf_stat_output_ctx
*out
)
420 double total
, ratio
= 0.0;
422 int ctx
= evsel_context(evsel
);
424 total
= avg_stats(&runtime_l1_dcache_stats
[ctx
][cpu
]);
427 ratio
= avg
/ total
* 100.0;
429 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
431 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all L1-dcache hits", ratio
);
434 static void print_l1_icache_misses(int cpu
,
435 struct perf_evsel
*evsel
,
437 struct perf_stat_output_ctx
*out
)
439 double total
, ratio
= 0.0;
441 int ctx
= evsel_context(evsel
);
443 total
= avg_stats(&runtime_l1_icache_stats
[ctx
][cpu
]);
446 ratio
= avg
/ total
* 100.0;
448 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
449 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all L1-icache hits", ratio
);
452 static void print_dtlb_cache_misses(int cpu
,
453 struct perf_evsel
*evsel
,
455 struct perf_stat_output_ctx
*out
)
457 double total
, ratio
= 0.0;
459 int ctx
= evsel_context(evsel
);
461 total
= avg_stats(&runtime_dtlb_cache_stats
[ctx
][cpu
]);
464 ratio
= avg
/ total
* 100.0;
466 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
467 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all dTLB cache hits", ratio
);
470 static void print_itlb_cache_misses(int cpu
,
471 struct perf_evsel
*evsel
,
473 struct perf_stat_output_ctx
*out
)
475 double total
, ratio
= 0.0;
477 int ctx
= evsel_context(evsel
);
479 total
= avg_stats(&runtime_itlb_cache_stats
[ctx
][cpu
]);
482 ratio
= avg
/ total
* 100.0;
484 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
485 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all iTLB cache hits", ratio
);
488 static void print_ll_cache_misses(int cpu
,
489 struct perf_evsel
*evsel
,
491 struct perf_stat_output_ctx
*out
)
493 double total
, ratio
= 0.0;
495 int ctx
= evsel_context(evsel
);
497 total
= avg_stats(&runtime_ll_cache_stats
[ctx
][cpu
]);
500 ratio
= avg
/ total
* 100.0;
502 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
503 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all LL-cache hits", ratio
);
507 * High level "TopDown" CPU core pipe line bottleneck break down.
509 * Basic concept following
510 * Yasin, A Top Down Method for Performance analysis and Counter architecture
513 * The CPU pipeline is divided into 4 areas that can be bottlenecks:
515 * Frontend -> Backend -> Retiring
516 * BadSpeculation in addition means out of order execution that is thrown away
517 * (for example branch mispredictions)
518 * Frontend is instruction decoding.
519 * Backend is execution, like computation and accessing data in memory
520 * Retiring is good execution that is not directly bottlenecked
522 * The formulas are computed in slots.
523 * A slot is an entry in the pipeline each for the pipeline width
524 * (for example a 4-wide pipeline has 4 slots for each cycle)
527 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
529 * Retiring = SlotsRetired / TotalSlots
530 * FrontendBound = FetchBubbles / TotalSlots
531 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
533 * The kernel provides the mapping to the low level CPU events and any scaling
534 * needed for the CPU pipeline width, for example:
536 * TotalSlots = Cycles * 4
538 * The scaling factor is communicated in the sysfs unit.
540 * In some cases the CPU may not be able to measure all the formulas due to
541 * missing events. In this case multiple formulas are combined, as possible.
543 * Full TopDown supports more levels to sub-divide each area: for example
544 * BackendBound into computing bound and memory bound. For now we only
545 * support Level 1 TopDown.
548 static double sanitize_val(double x
)
550 if (x
< 0 && x
>= -0.02)
555 static double td_total_slots(int ctx
, int cpu
)
557 return avg_stats(&runtime_topdown_total_slots
[ctx
][cpu
]);
560 static double td_bad_spec(int ctx
, int cpu
)
566 total
= avg_stats(&runtime_topdown_slots_issued
[ctx
][cpu
]) -
567 avg_stats(&runtime_topdown_slots_retired
[ctx
][cpu
]) +
568 avg_stats(&runtime_topdown_recovery_bubbles
[ctx
][cpu
]);
569 total_slots
= td_total_slots(ctx
, cpu
);
571 bad_spec
= total
/ total_slots
;
572 return sanitize_val(bad_spec
);
575 static double td_retiring(int ctx
, int cpu
)
578 double total_slots
= td_total_slots(ctx
, cpu
);
579 double ret_slots
= avg_stats(&runtime_topdown_slots_retired
[ctx
][cpu
]);
582 retiring
= ret_slots
/ total_slots
;
586 static double td_fe_bound(int ctx
, int cpu
)
589 double total_slots
= td_total_slots(ctx
, cpu
);
590 double fetch_bub
= avg_stats(&runtime_topdown_fetch_bubbles
[ctx
][cpu
]);
593 fe_bound
= fetch_bub
/ total_slots
;
597 static double td_be_bound(int ctx
, int cpu
)
599 double sum
= (td_fe_bound(ctx
, cpu
) +
600 td_bad_spec(ctx
, cpu
) +
601 td_retiring(ctx
, cpu
));
604 return sanitize_val(1.0 - sum
);
607 static void print_smi_cost(int cpu
, struct perf_evsel
*evsel
,
608 struct perf_stat_output_ctx
*out
)
610 double smi_num
, aperf
, cycles
, cost
= 0.0;
611 int ctx
= evsel_context(evsel
);
612 const char *color
= NULL
;
614 smi_num
= avg_stats(&runtime_smi_num_stats
[ctx
][cpu
]);
615 aperf
= avg_stats(&runtime_aperf_stats
[ctx
][cpu
]);
616 cycles
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
618 if ((cycles
== 0) || (aperf
== 0))
622 cost
= (aperf
- cycles
) / aperf
* 100.00;
625 color
= PERF_COLOR_RED
;
626 out
->print_metric(out
->ctx
, color
, "%8.1f%%", "SMI cycles%", cost
);
627 out
->print_metric(out
->ctx
, NULL
, "%4.0f", "SMI#", smi_num
);
630 void perf_stat__print_shadow_stats(struct perf_evsel
*evsel
,
632 struct perf_stat_output_ctx
*out
)
634 void *ctxp
= out
->ctx
;
635 print_metric_t print_metric
= out
->print_metric
;
636 double total
, ratio
= 0.0, total2
;
637 const char *color
= NULL
;
638 int ctx
= evsel_context(evsel
);
640 if (perf_evsel__match(evsel
, HARDWARE
, HW_INSTRUCTIONS
)) {
641 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
644 print_metric(ctxp
, NULL
, "%7.2f ",
645 "insn per cycle", ratio
);
647 print_metric(ctxp
, NULL
, NULL
, "insn per cycle", 0);
649 total
= avg_stats(&runtime_stalled_cycles_front_stats
[ctx
][cpu
]);
650 total
= max(total
, avg_stats(&runtime_stalled_cycles_back_stats
[ctx
][cpu
]));
655 print_metric(ctxp
, NULL
, "%7.2f ",
656 "stalled cycles per insn",
658 } else if (have_frontend_stalled
) {
659 print_metric(ctxp
, NULL
, NULL
,
660 "stalled cycles per insn", 0);
662 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_BRANCH_MISSES
)) {
663 if (runtime_branches_stats
[ctx
][cpu
].n
!= 0)
664 print_branch_misses(cpu
, evsel
, avg
, out
);
666 print_metric(ctxp
, NULL
, NULL
, "of all branches", 0);
668 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
669 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_L1D
|
670 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
671 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
672 if (runtime_l1_dcache_stats
[ctx
][cpu
].n
!= 0)
673 print_l1_dcache_misses(cpu
, evsel
, avg
, out
);
675 print_metric(ctxp
, NULL
, NULL
, "of all L1-dcache hits", 0);
677 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
678 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_L1I
|
679 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
680 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
681 if (runtime_l1_icache_stats
[ctx
][cpu
].n
!= 0)
682 print_l1_icache_misses(cpu
, evsel
, avg
, out
);
684 print_metric(ctxp
, NULL
, NULL
, "of all L1-icache hits", 0);
686 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
687 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_DTLB
|
688 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
689 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
690 if (runtime_dtlb_cache_stats
[ctx
][cpu
].n
!= 0)
691 print_dtlb_cache_misses(cpu
, evsel
, avg
, out
);
693 print_metric(ctxp
, NULL
, NULL
, "of all dTLB cache hits", 0);
695 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
696 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_ITLB
|
697 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
698 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
699 if (runtime_itlb_cache_stats
[ctx
][cpu
].n
!= 0)
700 print_itlb_cache_misses(cpu
, evsel
, avg
, out
);
702 print_metric(ctxp
, NULL
, NULL
, "of all iTLB cache hits", 0);
704 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
705 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_LL
|
706 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
707 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
708 if (runtime_ll_cache_stats
[ctx
][cpu
].n
!= 0)
709 print_ll_cache_misses(cpu
, evsel
, avg
, out
);
711 print_metric(ctxp
, NULL
, NULL
, "of all LL-cache hits", 0);
712 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CACHE_MISSES
)) {
713 total
= avg_stats(&runtime_cacherefs_stats
[ctx
][cpu
]);
716 ratio
= avg
* 100 / total
;
718 if (runtime_cacherefs_stats
[ctx
][cpu
].n
!= 0)
719 print_metric(ctxp
, NULL
, "%8.3f %%",
720 "of all cache refs", ratio
);
722 print_metric(ctxp
, NULL
, NULL
, "of all cache refs", 0);
723 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
)) {
724 print_stalled_cycles_frontend(cpu
, evsel
, avg
, out
);
725 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
)) {
726 print_stalled_cycles_backend(cpu
, evsel
, avg
, out
);
727 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CPU_CYCLES
)) {
728 total
= avg_stats(&runtime_nsecs_stats
[cpu
]);
732 print_metric(ctxp
, NULL
, "%8.3f", "GHz", ratio
);
734 print_metric(ctxp
, NULL
, NULL
, "Ghz", 0);
736 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX
)) {
737 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
739 print_metric(ctxp
, NULL
,
740 "%7.2f%%", "transactional cycles",
741 100.0 * (avg
/ total
));
743 print_metric(ctxp
, NULL
, NULL
, "transactional cycles",
745 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX_CP
)) {
746 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
747 total2
= avg_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
]);
751 print_metric(ctxp
, NULL
, "%7.2f%%", "aborted cycles",
752 100.0 * ((total2
-avg
) / total
));
754 print_metric(ctxp
, NULL
, NULL
, "aborted cycles", 0);
755 } else if (perf_stat_evsel__is(evsel
, TRANSACTION_START
)) {
756 total
= avg_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
]);
761 if (runtime_cycles_in_tx_stats
[ctx
][cpu
].n
!= 0)
762 print_metric(ctxp
, NULL
, "%8.0f",
763 "cycles / transaction", ratio
);
765 print_metric(ctxp
, NULL
, NULL
, "cycles / transaction",
767 } else if (perf_stat_evsel__is(evsel
, ELISION_START
)) {
768 total
= avg_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
]);
773 print_metric(ctxp
, NULL
, "%8.0f", "cycles / elision", ratio
);
774 } else if (perf_evsel__match(evsel
, SOFTWARE
, SW_TASK_CLOCK
) ||
775 perf_evsel__match(evsel
, SOFTWARE
, SW_CPU_CLOCK
)) {
776 if ((ratio
= avg_stats(&walltime_nsecs_stats
)) != 0)
777 print_metric(ctxp
, NULL
, "%8.3f", "CPUs utilized",
780 print_metric(ctxp
, NULL
, NULL
, "CPUs utilized", 0);
781 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_FETCH_BUBBLES
)) {
782 double fe_bound
= td_fe_bound(ctx
, cpu
);
785 color
= PERF_COLOR_RED
;
786 print_metric(ctxp
, color
, "%8.1f%%", "frontend bound",
788 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_SLOTS_RETIRED
)) {
789 double retiring
= td_retiring(ctx
, cpu
);
792 color
= PERF_COLOR_GREEN
;
793 print_metric(ctxp
, color
, "%8.1f%%", "retiring",
795 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_RECOVERY_BUBBLES
)) {
796 double bad_spec
= td_bad_spec(ctx
, cpu
);
799 color
= PERF_COLOR_RED
;
800 print_metric(ctxp
, color
, "%8.1f%%", "bad speculation",
802 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_SLOTS_ISSUED
)) {
803 double be_bound
= td_be_bound(ctx
, cpu
);
804 const char *name
= "backend bound";
805 static int have_recovery_bubbles
= -1;
807 /* In case the CPU does not support topdown-recovery-bubbles */
808 if (have_recovery_bubbles
< 0)
809 have_recovery_bubbles
= pmu_have_event("cpu",
810 "topdown-recovery-bubbles");
811 if (!have_recovery_bubbles
)
812 name
= "backend bound/bad spec";
815 color
= PERF_COLOR_RED
;
816 if (td_total_slots(ctx
, cpu
) > 0)
817 print_metric(ctxp
, color
, "%8.1f%%", name
,
820 print_metric(ctxp
, NULL
, NULL
, name
, 0);
821 } else if (evsel
->metric_expr
) {
822 struct parse_ctx pctx
;
825 expr__ctx_init(&pctx
);
826 expr__add_id(&pctx
, evsel
->name
, avg
);
827 for (i
= 0; evsel
->metric_events
[i
]; i
++) {
828 struct saved_value
*v
;
830 v
= saved_value_lookup(evsel
->metric_events
[i
], cpu
, ctx
, false);
833 expr__add_id(&pctx
, evsel
->metric_events
[i
]->name
,
834 avg_stats(&v
->stats
));
836 if (!evsel
->metric_events
[i
]) {
837 const char *p
= evsel
->metric_expr
;
839 if (expr__parse(&ratio
, &pctx
, &p
) == 0)
840 print_metric(ctxp
, NULL
, "%8.1f",
843 out
->force_header
? evsel
->name
: "",
846 print_metric(ctxp
, NULL
, NULL
, "", 0);
848 print_metric(ctxp
, NULL
, NULL
, "", 0);
849 } else if (runtime_nsecs_stats
[cpu
].n
!= 0) {
853 total
= avg_stats(&runtime_nsecs_stats
[cpu
]);
856 ratio
= 1000.0 * avg
/ total
;
861 snprintf(unit_buf
, sizeof(unit_buf
), "%c/sec", unit
);
862 print_metric(ctxp
, NULL
, "%8.3f", unit_buf
, ratio
);
863 } else if (perf_stat_evsel__is(evsel
, SMI_NUM
)) {
864 print_smi_cost(cpu
, evsel
, out
);
866 print_metric(ctxp
, NULL
, NULL
, NULL
, 0);