]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
a43783ae | 2 | #include <errno.h> |
fd20e811 | 3 | #include <inttypes.h> |
0007ecea | 4 | #include <math.h> |
0007ecea | 5 | #include "stat.h" |
24e34f68 | 6 | #include "evlist.h" |
e2f56da1 | 7 | #include "evsel.h" |
24e34f68 | 8 | #include "thread_map.h" |
7f7c536f | 9 | #include <linux/zalloc.h> |
0007ecea XG |
10 | |
11 | void update_stats(struct stats *stats, u64 val) | |
12 | { | |
13 | double delta; | |
14 | ||
15 | stats->n++; | |
16 | delta = val - stats->mean; | |
17 | stats->mean += delta / stats->n; | |
18 | stats->M2 += delta*(val - stats->mean); | |
ffe4f3c0 DA |
19 | |
20 | if (val > stats->max) | |
21 | stats->max = val; | |
22 | ||
23 | if (val < stats->min) | |
24 | stats->min = val; | |
0007ecea XG |
25 | } |
26 | ||
27 | double avg_stats(struct stats *stats) | |
28 | { | |
29 | return stats->mean; | |
30 | } | |
31 | ||
32 | /* | |
33 | * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance | |
34 | * | |
35 | * (\Sum n_i^2) - ((\Sum n_i)^2)/n | |
36 | * s^2 = ------------------------------- | |
37 | * n - 1 | |
38 | * | |
39 | * http://en.wikipedia.org/wiki/Stddev | |
40 | * | |
41 | * The std dev of the mean is related to the std dev by: | |
42 | * | |
43 | * s | |
44 | * s_mean = ------- | |
45 | * sqrt(n) | |
46 | * | |
47 | */ | |
48 | double stddev_stats(struct stats *stats) | |
49 | { | |
50 | double variance, variance_mean; | |
51 | ||
45528f7c | 52 | if (stats->n < 2) |
0007ecea XG |
53 | return 0.0; |
54 | ||
55 | variance = stats->M2 / (stats->n - 1); | |
56 | variance_mean = variance / stats->n; | |
57 | ||
58 | return sqrt(variance_mean); | |
59 | } | |
60 | ||
61 | double rel_stddev_stats(double stddev, double avg) | |
62 | { | |
63 | double pct = 0.0; | |
64 | ||
65 | if (avg) | |
66 | pct = 100.0 * stddev/avg; | |
67 | ||
68 | return pct; | |
69 | } | |
e2f56da1 | 70 | |
32dcd021 | 71 | bool __perf_evsel_stat__is(struct evsel *evsel, |
e2f56da1 JO |
72 | enum perf_stat_evsel_id id) |
73 | { | |
e669e833 | 74 | struct perf_stat_evsel *ps = evsel->stats; |
e2f56da1 JO |
75 | |
76 | return ps->id == id; | |
77 | } | |
78 | ||
79 | #define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name | |
80 | static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { | |
4c358d5c JO |
81 | ID(NONE, x), |
82 | ID(CYCLES_IN_TX, cpu/cycles-t/), | |
83 | ID(TRANSACTION_START, cpu/tx-start/), | |
84 | ID(ELISION_START, cpu/el-start/), | |
85 | ID(CYCLES_IN_TX_CP, cpu/cycles-ct/), | |
239bd47f AK |
86 | ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots), |
87 | ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued), | |
88 | ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired), | |
89 | ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles), | |
90 | ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles), | |
daefd0bc KL |
91 | ID(SMI_NUM, msr/smi/), |
92 | ID(APERF, msr/aperf/), | |
e2f56da1 JO |
93 | }; |
94 | #undef ID | |
95 | ||
32dcd021 | 96 | static void perf_stat_evsel_id_init(struct evsel *evsel) |
e2f56da1 | 97 | { |
e669e833 | 98 | struct perf_stat_evsel *ps = evsel->stats; |
e2f56da1 JO |
99 | int i; |
100 | ||
101 | /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */ | |
102 | ||
103 | for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) { | |
104 | if (!strcmp(perf_evsel__name(evsel), id_str[i])) { | |
105 | ps->id = i; | |
106 | break; | |
107 | } | |
108 | } | |
109 | } | |
a9a3a4d9 | 110 | |
32dcd021 | 111 | static void perf_evsel__reset_stat_priv(struct evsel *evsel) |
9689edfa JO |
112 | { |
113 | int i; | |
e669e833 | 114 | struct perf_stat_evsel *ps = evsel->stats; |
9689edfa JO |
115 | |
116 | for (i = 0; i < 3; i++) | |
117 | init_stats(&ps->res_stats[i]); | |
118 | ||
119 | perf_stat_evsel_id_init(evsel); | |
120 | } | |
121 | ||
32dcd021 | 122 | static int perf_evsel__alloc_stat_priv(struct evsel *evsel) |
9689edfa | 123 | { |
e669e833 ACM |
124 | evsel->stats = zalloc(sizeof(struct perf_stat_evsel)); |
125 | if (evsel->stats == NULL) | |
9689edfa JO |
126 | return -ENOMEM; |
127 | perf_evsel__reset_stat_priv(evsel); | |
128 | return 0; | |
129 | } | |
130 | ||
32dcd021 | 131 | static void perf_evsel__free_stat_priv(struct evsel *evsel) |
9689edfa | 132 | { |
e669e833 | 133 | struct perf_stat_evsel *ps = evsel->stats; |
f7794d52 JO |
134 | |
135 | if (ps) | |
d8f9da24 | 136 | zfree(&ps->group_data); |
e669e833 | 137 | zfree(&evsel->stats); |
9689edfa | 138 | } |
a939512d | 139 | |
32dcd021 | 140 | static int perf_evsel__alloc_prev_raw_counts(struct evsel *evsel, |
86a2cf31 | 141 | int ncpus, int nthreads) |
a939512d JO |
142 | { |
143 | struct perf_counts *counts; | |
144 | ||
145 | counts = perf_counts__new(ncpus, nthreads); | |
146 | if (counts) | |
147 | evsel->prev_raw_counts = counts; | |
148 | ||
149 | return counts ? 0 : -ENOMEM; | |
150 | } | |
151 | ||
32dcd021 | 152 | static void perf_evsel__free_prev_raw_counts(struct evsel *evsel) |
a939512d JO |
153 | { |
154 | perf_counts__delete(evsel->prev_raw_counts); | |
155 | evsel->prev_raw_counts = NULL; | |
156 | } | |
24e34f68 | 157 | |
32dcd021 | 158 | static int perf_evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) |
a7d0a102 JO |
159 | { |
160 | int ncpus = perf_evsel__nr_cpus(evsel); | |
161 | int nthreads = thread_map__nr(evsel->threads); | |
162 | ||
163 | if (perf_evsel__alloc_stat_priv(evsel) < 0 || | |
164 | perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || | |
165 | (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) | |
166 | return -ENOMEM; | |
167 | ||
168 | return 0; | |
169 | } | |
170 | ||
63503dba | 171 | int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw) |
24e34f68 | 172 | { |
32dcd021 | 173 | struct evsel *evsel; |
24e34f68 | 174 | |
e5cadb93 | 175 | evlist__for_each_entry(evlist, evsel) { |
a7d0a102 | 176 | if (perf_evsel__alloc_stats(evsel, alloc_raw)) |
24e34f68 JO |
177 | goto out_free; |
178 | } | |
179 | ||
180 | return 0; | |
181 | ||
182 | out_free: | |
183 | perf_evlist__free_stats(evlist); | |
184 | return -1; | |
185 | } | |
186 | ||
63503dba | 187 | void perf_evlist__free_stats(struct evlist *evlist) |
24e34f68 | 188 | { |
32dcd021 | 189 | struct evsel *evsel; |
24e34f68 | 190 | |
e5cadb93 | 191 | evlist__for_each_entry(evlist, evsel) { |
24e34f68 JO |
192 | perf_evsel__free_stat_priv(evsel); |
193 | perf_evsel__free_counts(evsel); | |
194 | perf_evsel__free_prev_raw_counts(evsel); | |
195 | } | |
196 | } | |
197 | ||
63503dba | 198 | void perf_evlist__reset_stats(struct evlist *evlist) |
24e34f68 | 199 | { |
32dcd021 | 200 | struct evsel *evsel; |
24e34f68 | 201 | |
e5cadb93 | 202 | evlist__for_each_entry(evlist, evsel) { |
24e34f68 JO |
203 | perf_evsel__reset_stat_priv(evsel); |
204 | perf_evsel__reset_counts(evsel); | |
205 | } | |
206 | } | |
f80010eb | 207 | |
32dcd021 | 208 | static void zero_per_pkg(struct evsel *counter) |
f80010eb JO |
209 | { |
210 | if (counter->per_pkg_mask) | |
211 | memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); | |
212 | } | |
213 | ||
32dcd021 | 214 | static int check_per_pkg(struct evsel *counter, |
02d8dabc | 215 | struct perf_counts_values *vals, int cpu, bool *skip) |
f80010eb JO |
216 | { |
217 | unsigned long *mask = counter->per_pkg_mask; | |
b49aca3e | 218 | struct perf_cpu_map *cpus = evsel__cpus(counter); |
f80010eb JO |
219 | int s; |
220 | ||
221 | *skip = false; | |
222 | ||
223 | if (!counter->per_pkg) | |
224 | return 0; | |
225 | ||
226 | if (cpu_map__empty(cpus)) | |
227 | return 0; | |
228 | ||
229 | if (!mask) { | |
230 | mask = zalloc(MAX_NR_CPUS); | |
231 | if (!mask) | |
232 | return -ENOMEM; | |
233 | ||
234 | counter->per_pkg_mask = mask; | |
235 | } | |
236 | ||
02d8dabc SE |
237 | /* |
238 | * we do not consider an event that has not run as a good | |
239 | * instance to mark a package as used (skip=1). Otherwise | |
240 | * we may run into a situation where the first CPU in a package | |
241 | * is not running anything, yet the second is, and this function | |
242 | * would mark the package as used after the first CPU and would | |
243 | * not read the values from the second CPU. | |
244 | */ | |
245 | if (!(vals->run && vals->ena)) | |
246 | return 0; | |
247 | ||
1fe7a300 | 248 | s = cpu_map__get_socket(cpus, cpu, NULL); |
f80010eb JO |
249 | if (s < 0) |
250 | return -1; | |
251 | ||
252 | *skip = test_and_set_bit(s, mask) == 1; | |
253 | return 0; | |
254 | } | |
255 | ||
256 | static int | |
32dcd021 | 257 | process_counter_values(struct perf_stat_config *config, struct evsel *evsel, |
f80010eb JO |
258 | int cpu, int thread, |
259 | struct perf_counts_values *count) | |
260 | { | |
261 | struct perf_counts_values *aggr = &evsel->counts->aggr; | |
262 | static struct perf_counts_values zero; | |
263 | bool skip = false; | |
264 | ||
02d8dabc | 265 | if (check_per_pkg(evsel, count, cpu, &skip)) { |
f80010eb JO |
266 | pr_err("failed to read per-pkg counter\n"); |
267 | return -1; | |
268 | } | |
269 | ||
270 | if (skip) | |
271 | count = &zero; | |
272 | ||
273 | switch (config->aggr_mode) { | |
274 | case AGGR_THREAD: | |
275 | case AGGR_CORE: | |
db5742b6 | 276 | case AGGR_DIE: |
f80010eb JO |
277 | case AGGR_SOCKET: |
278 | case AGGR_NONE: | |
279 | if (!evsel->snapshot) | |
280 | perf_evsel__compute_deltas(evsel, cpu, thread, count); | |
281 | perf_counts_values__scale(count, config->scale, NULL); | |
4fc4d8df JY |
282 | if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) { |
283 | perf_stat__update_shadow_stats(evsel, count->val, | |
284 | cpu, &rt_stat); | |
285 | } | |
286 | ||
14e72a21 JY |
287 | if (config->aggr_mode == AGGR_THREAD) { |
288 | if (config->stats) | |
289 | perf_stat__update_shadow_stats(evsel, | |
290 | count->val, 0, &config->stats[thread]); | |
291 | else | |
292 | perf_stat__update_shadow_stats(evsel, | |
293 | count->val, 0, &rt_stat); | |
294 | } | |
f80010eb JO |
295 | break; |
296 | case AGGR_GLOBAL: | |
297 | aggr->val += count->val; | |
75998bb2 AK |
298 | aggr->ena += count->ena; |
299 | aggr->run += count->run; | |
208df99e | 300 | case AGGR_UNSET: |
f80010eb JO |
301 | default: |
302 | break; | |
303 | } | |
304 | ||
305 | return 0; | |
306 | } | |
307 | ||
308 | static int process_counter_maps(struct perf_stat_config *config, | |
32dcd021 | 309 | struct evsel *counter) |
f80010eb JO |
310 | { |
311 | int nthreads = thread_map__nr(counter->threads); | |
312 | int ncpus = perf_evsel__nr_cpus(counter); | |
313 | int cpu, thread; | |
314 | ||
315 | if (counter->system_wide) | |
316 | nthreads = 1; | |
317 | ||
318 | for (thread = 0; thread < nthreads; thread++) { | |
319 | for (cpu = 0; cpu < ncpus; cpu++) { | |
320 | if (process_counter_values(config, counter, cpu, thread, | |
321 | perf_counts(counter->counts, cpu, thread))) | |
322 | return -1; | |
323 | } | |
324 | } | |
325 | ||
326 | return 0; | |
327 | } | |
328 | ||
329 | int perf_stat_process_counter(struct perf_stat_config *config, | |
32dcd021 | 330 | struct evsel *counter) |
f80010eb JO |
331 | { |
332 | struct perf_counts_values *aggr = &counter->counts->aggr; | |
e669e833 | 333 | struct perf_stat_evsel *ps = counter->stats; |
f80010eb JO |
334 | u64 *count = counter->counts->aggr.values; |
335 | int i, ret; | |
336 | ||
337 | aggr->val = aggr->ena = aggr->run = 0; | |
f80010eb | 338 | |
51fd2df1 JO |
339 | /* |
340 | * We calculate counter's data every interval, | |
341 | * and the display code shows ps->res_stats | |
342 | * avg value. We need to zero the stats for | |
343 | * interval mode, otherwise overall avg running | |
344 | * averages will be shown for each interval. | |
345 | */ | |
346 | if (config->interval) | |
347 | init_stats(ps->res_stats); | |
348 | ||
f80010eb JO |
349 | if (counter->per_pkg) |
350 | zero_per_pkg(counter); | |
351 | ||
352 | ret = process_counter_maps(config, counter); | |
353 | if (ret) | |
354 | return ret; | |
355 | ||
356 | if (config->aggr_mode != AGGR_GLOBAL) | |
357 | return 0; | |
358 | ||
359 | if (!counter->snapshot) | |
360 | perf_evsel__compute_deltas(counter, -1, -1, aggr); | |
361 | perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); | |
362 | ||
363 | for (i = 0; i < 3; i++) | |
364 | update_stats(&ps->res_stats[i], count[i]); | |
365 | ||
bb963e16 | 366 | if (verbose > 0) { |
f80010eb JO |
367 | fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", |
368 | perf_evsel__name(counter), count[0], count[1], count[2]); | |
369 | } | |
370 | ||
371 | /* | |
372 | * Save the full runtime - to allow normalization during printout: | |
373 | */ | |
1fcd0394 | 374 | perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat); |
f80010eb JO |
375 | |
376 | return 0; | |
377 | } | |
0ea0e355 | 378 | |
89f1688a JO |
379 | int perf_event__process_stat_event(struct perf_session *session, |
380 | union perf_event *event) | |
0ea0e355 JO |
381 | { |
382 | struct perf_counts_values count; | |
383 | struct stat_event *st = &event->stat; | |
32dcd021 | 384 | struct evsel *counter; |
0ea0e355 JO |
385 | |
386 | count.val = st->val; | |
387 | count.ena = st->ena; | |
388 | count.run = st->run; | |
389 | ||
390 | counter = perf_evlist__id2evsel(session->evlist, st->id); | |
391 | if (!counter) { | |
392 | pr_err("Failed to resolve counter for stat event.\n"); | |
393 | return -EINVAL; | |
394 | } | |
395 | ||
396 | *perf_counts(counter->counts, st->cpu, st->thread) = count; | |
397 | counter->supported = true; | |
398 | return 0; | |
399 | } | |
e08a4564 JO |
400 | |
401 | size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp) | |
402 | { | |
403 | struct stat_event *st = (struct stat_event *) event; | |
404 | size_t ret; | |
405 | ||
406 | ret = fprintf(fp, "\n... id %" PRIu64 ", cpu %d, thread %d\n", | |
407 | st->id, st->cpu, st->thread); | |
408 | ret += fprintf(fp, "... value %" PRIu64 ", enabled %" PRIu64 ", running %" PRIu64 "\n", | |
409 | st->val, st->ena, st->run); | |
410 | ||
411 | return ret; | |
412 | } | |
413 | ||
414 | size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp) | |
415 | { | |
416 | struct stat_round_event *rd = (struct stat_round_event *)event; | |
417 | size_t ret; | |
418 | ||
419 | ret = fprintf(fp, "\n... time %" PRIu64 ", type %s\n", rd->time, | |
420 | rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL"); | |
421 | ||
422 | return ret; | |
423 | } | |
424 | ||
425 | size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp) | |
426 | { | |
427 | struct perf_stat_config sc; | |
428 | size_t ret; | |
429 | ||
430 | perf_event__read_stat_config(&sc, &event->stat_config); | |
431 | ||
432 | ret = fprintf(fp, "\n"); | |
433 | ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode); | |
434 | ret += fprintf(fp, "... scale %d\n", sc.scale); | |
435 | ret += fprintf(fp, "... interval %u\n", sc.interval); | |
436 | ||
437 | return ret; | |
438 | } | |
d09cefd2 | 439 | |
32dcd021 | 440 | int create_perf_stat_counter(struct evsel *evsel, |
d09cefd2 JO |
441 | struct perf_stat_config *config, |
442 | struct target *target) | |
443 | { | |
1fc632ce | 444 | struct perf_event_attr *attr = &evsel->core.attr; |
32dcd021 | 445 | struct evsel *leader = evsel->leader; |
d09cefd2 | 446 | |
75998bb2 AK |
447 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | |
448 | PERF_FORMAT_TOTAL_TIME_RUNNING; | |
d09cefd2 JO |
449 | |
450 | /* | |
451 | * The event is part of non trivial group, let's enable | |
452 | * the group read (for leader) and ID retrieval for all | |
453 | * members. | |
454 | */ | |
455 | if (leader->nr_members > 1) | |
456 | attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; | |
457 | ||
458 | attr->inherit = !config->no_inherit; | |
459 | ||
460 | /* | |
461 | * Some events get initialized with sample_(period/type) set, | |
462 | * like tracepoints. Clear it up for counting. | |
463 | */ | |
464 | attr->sample_period = 0; | |
465 | ||
466 | if (config->identifier) | |
467 | attr->sample_type = PERF_SAMPLE_IDENTIFIER; | |
468 | ||
469 | /* | |
470 | * Disabling all counters initially, they will be enabled | |
471 | * either manually by us or by kernel via enable_on_exec | |
472 | * set later. | |
473 | */ | |
474 | if (perf_evsel__is_group_leader(evsel)) { | |
475 | attr->disabled = 1; | |
476 | ||
477 | /* | |
478 | * In case of initial_delay we enable tracee | |
479 | * events manually. | |
480 | */ | |
481 | if (target__none(target) && !config->initial_delay) | |
482 | attr->enable_on_exec = 1; | |
483 | } | |
484 | ||
485 | if (target__has_cpu(target) && !target__has_per_thread(target)) | |
b49aca3e | 486 | return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel)); |
d09cefd2 JO |
487 | |
488 | return perf_evsel__open_per_thread(evsel, evsel->threads); | |
489 | } | |
0a4e64d3 JO |
490 | |
491 | int perf_stat_synthesize_config(struct perf_stat_config *config, | |
492 | struct perf_tool *tool, | |
63503dba | 493 | struct evlist *evlist, |
0a4e64d3 JO |
494 | perf_event__handler_t process, |
495 | bool attrs) | |
496 | { | |
497 | int err; | |
498 | ||
499 | if (attrs) { | |
500 | err = perf_event__synthesize_attrs(tool, evlist, process); | |
501 | if (err < 0) { | |
502 | pr_err("Couldn't synthesize attrs.\n"); | |
503 | return err; | |
504 | } | |
505 | } | |
506 | ||
507 | err = perf_event__synthesize_extra_attr(tool, evlist, process, | |
508 | attrs); | |
509 | ||
510 | err = perf_event__synthesize_thread_map2(tool, evlist->threads, | |
511 | process, NULL); | |
512 | if (err < 0) { | |
513 | pr_err("Couldn't synthesize thread map.\n"); | |
514 | return err; | |
515 | } | |
516 | ||
517 | err = perf_event__synthesize_cpu_map(tool, evlist->cpus, | |
518 | process, NULL); | |
519 | if (err < 0) { | |
520 | pr_err("Couldn't synthesize thread map.\n"); | |
521 | return err; | |
522 | } | |
523 | ||
524 | err = perf_event__synthesize_stat_config(tool, config, process, NULL); | |
525 | if (err < 0) { | |
526 | pr_err("Couldn't synthesize config.\n"); | |
527 | return err; | |
528 | } | |
529 | ||
530 | return 0; | |
531 | } |