]> git.proxmox.com Git - mirror_qemu.git/blob - util/qsp.c
b0c2575d108668aa6cfea14e1cb8b5201a8bbe46
[mirror_qemu.git] / util / qsp.c
1 /*
2 * qsp.c - QEMU Synchronization Profiler
3 *
4 * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
5 *
6 * License: GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 * QSP profiles the time spent in synchronization primitives, which can
10 * help diagnose performance problems, e.g. scalability issues when
11 * contention is high.
12 *
13 * The primitives currently supported are mutexes, recursive mutexes and
14 * condition variables. Note that not all related functions are intercepted;
15 * instead we profile only those functions that can have a performance impact,
16 * either due to blocking (e.g. cond_wait, mutex_lock) or cache line
17 * contention (e.g. mutex_lock, mutex_trylock).
18 *
19 * QSP's design focuses on speed and scalability. This is achieved
20 * by having threads do their profiling entirely on thread-local data.
21 * The appropriate thread-local data is found via a QHT, i.e. a concurrent hash
22 * table. To aggregate data in order to generate a report, we iterate over
23 * all entries in the hash table. Depending on the number of threads and
24 * synchronization objects this might be expensive, but note that it is
25 * very rarely called -- reports are generated only when requested by users.
26 *
27 * Reports are generated as a table where each row represents a call site. A
28 * call site is the triplet formed by the __file__ and __LINE__ of the caller
29 * as well as the address of the "object" (i.e. mutex, rec. mutex or condvar)
30 * being operated on. Optionally, call sites that operate on different objects
31 * of the same type can be coalesced, which can be particularly useful when
32 * profiling dynamically-allocated objects.
33 *
34 * Alternative designs considered:
35 *
36 * - Use an off-the-shelf profiler such as mutrace. This is not a viable option
37 * for us because QEMU has __malloc_hook set (by one of the libraries it
38 * uses); leaving this hook unset is required to avoid deadlock in mutrace.
39 *
40 * - Use a glib HT for each thread, protecting each HT with its own lock.
41 * This isn't simpler than the current design, and is 10% slower in the
42 * atomic_add-bench microbenchmark (-m option).
43 *
44 * - For reports, just use a binary tree as we aggregate data, instead of having
45 * an intermediate hash table. This would simplify the code only slightly, but
46 * would perform badly if there were many threads and objects to track.
47 *
48 * - Wrap operations on qsp entries with RCU read-side critical sections, so
49 * that qsp_reset() can delete entries. Unfortunately, the overhead of calling
50 * rcu_read_lock/unlock slows down atomic_add-bench -m by 24%. Having
51 * a snapshot that is updated on qsp_reset() avoids this overhead.
52 *
53 * Related Work:
54 * - Lennart Poettering's mutrace: http://0pointer.de/blog/projects/mutrace.html
55 * - Lozi, David, Thomas, Lawall and Muller. "Remote Core Locking: Migrating
56 * Critical-Section Execution to Improve the Performance of Multithreaded
57 * Applications", USENIX ATC'12.
58 */
59 #include "qemu/osdep.h"
60 #include "qemu/thread.h"
61 #include "qemu/timer.h"
62 #include "qemu/qht.h"
63 #include "qemu/rcu.h"
64 #include "exec/tb-hash-xx.h"
65
66 enum QSPType {
67 QSP_MUTEX,
68 QSP_BQL_MUTEX,
69 QSP_REC_MUTEX,
70 QSP_CONDVAR,
71 };
72
73 struct QSPCallSite {
74 const void *obj;
75 const char *file; /* i.e. __FILE__; shortened later */
76 int line;
77 enum QSPType type;
78 };
79 typedef struct QSPCallSite QSPCallSite;
80
81 struct QSPEntry {
82 void *thread_ptr;
83 const QSPCallSite *callsite;
84 uint64_t n_acqs;
85 uint64_t ns;
86 unsigned int n_objs; /* count of coalesced objs; only used for reporting */
87 #ifndef CONFIG_ATOMIC64
88 /*
89 * If we cannot update the counts atomically, then use a seqlock.
90 * We don't need an associated lock because the updates are thread-local.
91 */
92 QemuSeqLock sequence;
93 #endif
94 };
95 typedef struct QSPEntry QSPEntry;
96
97 struct QSPSnapshot {
98 struct rcu_head rcu;
99 struct qht ht;
100 };
101 typedef struct QSPSnapshot QSPSnapshot;
102
103 /* initial sizing for hash tables */
104 #define QSP_INITIAL_SIZE 64
105
106 /* If this file is moved, QSP_REL_PATH should be updated accordingly */
107 #define QSP_REL_PATH "util/qsp.c"
108
109 /* this file's full path. Used to present all call sites with relative paths */
110 static size_t qsp_qemu_path_len;
111
112 /* the address of qsp_thread gives us a unique 'thread ID' */
113 static __thread int qsp_thread;
114
115 /*
116 * Call sites are the same for all threads, so we track them in a separate hash
117 * table to save memory.
118 */
119 static struct qht qsp_callsite_ht;
120
121 static struct qht qsp_ht;
122 static QSPSnapshot *qsp_snapshot;
123 static bool qsp_initialized, qsp_initializing;
124
125 static const char * const qsp_typenames[] = {
126 [QSP_MUTEX] = "mutex",
127 [QSP_BQL_MUTEX] = "BQL mutex",
128 [QSP_REC_MUTEX] = "rec_mutex",
129 [QSP_CONDVAR] = "condvar",
130 };
131
132 QemuMutexLockFunc qemu_bql_mutex_lock_func = qemu_mutex_lock_impl;
133 QemuMutexLockFunc qemu_mutex_lock_func = qemu_mutex_lock_impl;
134 QemuMutexTrylockFunc qemu_mutex_trylock_func = qemu_mutex_trylock_impl;
135 QemuRecMutexLockFunc qemu_rec_mutex_lock_func = qemu_rec_mutex_lock_impl;
136 QemuRecMutexTrylockFunc qemu_rec_mutex_trylock_func =
137 qemu_rec_mutex_trylock_impl;
138 QemuCondWaitFunc qemu_cond_wait_func = qemu_cond_wait_impl;
139
140 /*
141 * It pays off to _not_ hash callsite->file; hashing a string is slow, and
142 * without it we still get a pretty unique hash.
143 */
144 static inline
145 uint32_t do_qsp_callsite_hash(const QSPCallSite *callsite, uint64_t a)
146 {
147 uint64_t b = (uint64_t)(uintptr_t)callsite->obj;
148 uint32_t e = callsite->line;
149 uint32_t f = callsite->type;
150
151 return tb_hash_func7(a, b, e, f, 0);
152 }
153
154 static inline
155 uint32_t qsp_callsite_hash(const QSPCallSite *callsite)
156 {
157 return do_qsp_callsite_hash(callsite, 0);
158 }
159
160 static inline uint32_t do_qsp_entry_hash(const QSPEntry *entry, uint64_t a)
161 {
162 return do_qsp_callsite_hash(entry->callsite, a);
163 }
164
165 static uint32_t qsp_entry_hash(const QSPEntry *entry)
166 {
167 return do_qsp_entry_hash(entry, (uint64_t)(uintptr_t)entry->thread_ptr);
168 }
169
170 static uint32_t qsp_entry_no_thread_hash(const QSPEntry *entry)
171 {
172 return do_qsp_entry_hash(entry, 0);
173 }
174
175 /* without the objects we need to hash the file name to get a decent hash */
176 static uint32_t qsp_entry_no_thread_obj_hash(const QSPEntry *entry)
177 {
178 const QSPCallSite *callsite = entry->callsite;
179 uint64_t a = g_str_hash(callsite->file);
180 uint64_t b = callsite->line;
181 uint32_t e = callsite->type;
182
183 return tb_hash_func7(a, b, e, 0, 0);
184 }
185
186 static bool qsp_callsite_cmp(const void *ap, const void *bp)
187 {
188 const QSPCallSite *a = ap;
189 const QSPCallSite *b = bp;
190
191 return a == b ||
192 (a->obj == b->obj &&
193 a->line == b->line &&
194 a->type == b->type &&
195 (a->file == b->file || !strcmp(a->file, b->file)));
196 }
197
198 static bool qsp_callsite_no_obj_cmp(const void *ap, const void *bp)
199 {
200 const QSPCallSite *a = ap;
201 const QSPCallSite *b = bp;
202
203 return a == b ||
204 (a->line == b->line &&
205 a->type == b->type &&
206 (a->file == b->file || !strcmp(a->file, b->file)));
207 }
208
209 static bool qsp_entry_no_thread_cmp(const void *ap, const void *bp)
210 {
211 const QSPEntry *a = ap;
212 const QSPEntry *b = bp;
213
214 return qsp_callsite_cmp(a->callsite, b->callsite);
215 }
216
217 static bool qsp_entry_no_thread_obj_cmp(const void *ap, const void *bp)
218 {
219 const QSPEntry *a = ap;
220 const QSPEntry *b = bp;
221
222 return qsp_callsite_no_obj_cmp(a->callsite, b->callsite);
223 }
224
225 static bool qsp_entry_cmp(const void *ap, const void *bp)
226 {
227 const QSPEntry *a = ap;
228 const QSPEntry *b = bp;
229
230 return a->thread_ptr == b->thread_ptr &&
231 qsp_callsite_cmp(a->callsite, b->callsite);
232 }
233
234 /*
235 * Normally we'd call this from a constructor function, but we want it to work
236 * via libutil as well.
237 */
238 static void qsp_do_init(void)
239 {
240 /* make sure this file's path in the tree is up to date with QSP_REL_PATH */
241 g_assert(strstr(__FILE__, QSP_REL_PATH));
242 qsp_qemu_path_len = strlen(__FILE__) - strlen(QSP_REL_PATH);
243
244 qht_init(&qsp_ht, qsp_entry_cmp, QSP_INITIAL_SIZE,
245 QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
246 qht_init(&qsp_callsite_ht, qsp_callsite_cmp, QSP_INITIAL_SIZE,
247 QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
248 }
249
250 static __attribute__((noinline)) void qsp_init__slowpath(void)
251 {
252 if (atomic_cmpxchg(&qsp_initializing, false, true) == false) {
253 qsp_do_init();
254 atomic_set(&qsp_initialized, true);
255 } else {
256 while (!atomic_read(&qsp_initialized)) {
257 cpu_relax();
258 }
259 }
260 }
261
262 /* qsp_init() must be called from _all_ exported functions */
263 static inline void qsp_init(void)
264 {
265 if (likely(atomic_read(&qsp_initialized))) {
266 return;
267 }
268 qsp_init__slowpath();
269 }
270
271 static QSPCallSite *qsp_callsite_find(const QSPCallSite *orig)
272 {
273 QSPCallSite *callsite;
274 uint32_t hash;
275
276 hash = qsp_callsite_hash(orig);
277 callsite = qht_lookup(&qsp_callsite_ht, orig, hash);
278 if (callsite == NULL) {
279 void *existing = NULL;
280
281 callsite = g_new(QSPCallSite, 1);
282 memcpy(callsite, orig, sizeof(*callsite));
283 qht_insert(&qsp_callsite_ht, callsite, hash, &existing);
284 if (unlikely(existing)) {
285 g_free(callsite);
286 callsite = existing;
287 }
288 }
289 return callsite;
290 }
291
292 static QSPEntry *
293 qsp_entry_create(struct qht *ht, const QSPEntry *entry, uint32_t hash)
294 {
295 QSPEntry *e;
296 void *existing = NULL;
297
298 e = g_new0(QSPEntry, 1);
299 e->thread_ptr = entry->thread_ptr;
300 e->callsite = qsp_callsite_find(entry->callsite);
301
302 qht_insert(ht, e, hash, &existing);
303 if (unlikely(existing)) {
304 g_free(e);
305 e = existing;
306 }
307 return e;
308 }
309
310 static QSPEntry *
311 qsp_entry_find(struct qht *ht, const QSPEntry *entry, uint32_t hash)
312 {
313 QSPEntry *e;
314
315 e = qht_lookup(ht, entry, hash);
316 if (e == NULL) {
317 e = qsp_entry_create(ht, entry, hash);
318 }
319 return e;
320 }
321
322 /*
323 * Note: Entries are never removed, so callers do not have to be in an RCU
324 * read-side critical section.
325 */
326 static QSPEntry *qsp_entry_get(const void *obj, const char *file, int line,
327 enum QSPType type)
328 {
329 QSPCallSite callsite = {
330 .obj = obj,
331 .file = file,
332 .line = line,
333 .type = type,
334 };
335 QSPEntry orig;
336 uint32_t hash;
337
338 qsp_init();
339
340 orig.thread_ptr = &qsp_thread;
341 orig.callsite = &callsite;
342
343 hash = qsp_entry_hash(&orig);
344 return qsp_entry_find(&qsp_ht, &orig, hash);
345 }
346
347 /*
348 * @from is in the global hash table; read it atomically if the host
349 * supports it, otherwise use the seqlock.
350 */
351 static void qsp_entry_aggregate(QSPEntry *to, const QSPEntry *from)
352 {
353 #ifdef CONFIG_ATOMIC64
354 to->ns += atomic_read__nocheck(&from->ns);
355 to->n_acqs += atomic_read__nocheck(&from->n_acqs);
356 #else
357 unsigned int version;
358 uint64_t ns, n_acqs;
359
360 do {
361 version = seqlock_read_begin(&from->sequence);
362 ns = atomic_read__nocheck(&from->ns);
363 n_acqs = atomic_read__nocheck(&from->n_acqs);
364 } while (seqlock_read_retry(&from->sequence, version));
365
366 to->ns += ns;
367 to->n_acqs += n_acqs;
368 #endif
369 }
370
371 /*
372 * @e is in the global hash table; it is only written to by the current thread,
373 * so we write to it atomically (as in "write once") to prevent torn reads.
374 * If the host doesn't support u64 atomics, use the seqlock.
375 */
376 static inline void do_qsp_entry_record(QSPEntry *e, int64_t delta, bool acq)
377 {
378 #ifndef CONFIG_ATOMIC64
379 seqlock_write_begin(&e->sequence);
380 #endif
381 atomic_set__nocheck(&e->ns, e->ns + delta);
382 if (acq) {
383 atomic_set__nocheck(&e->n_acqs, e->n_acqs + 1);
384 }
385 #ifndef CONFIG_ATOMIC64
386 seqlock_write_end(&e->sequence);
387 #endif
388 }
389
390 static inline void qsp_entry_record(QSPEntry *e, int64_t delta)
391 {
392 do_qsp_entry_record(e, delta, true);
393 }
394
395 #define QSP_GEN_VOID(type_, qsp_t_, func_, impl_) \
396 static void func_(type_ *obj, const char *file, int line) \
397 { \
398 QSPEntry *e; \
399 int64_t t0, t1; \
400 \
401 t0 = get_clock(); \
402 impl_(obj, file, line); \
403 t1 = get_clock(); \
404 \
405 e = qsp_entry_get(obj, file, line, qsp_t_); \
406 qsp_entry_record(e, t1 - t0); \
407 }
408
409 #define QSP_GEN_RET1(type_, qsp_t_, func_, impl_) \
410 static int func_(type_ *obj, const char *file, int line) \
411 { \
412 QSPEntry *e; \
413 int64_t t0, t1; \
414 int err; \
415 \
416 t0 = get_clock(); \
417 err = impl_(obj, file, line); \
418 t1 = get_clock(); \
419 \
420 e = qsp_entry_get(obj, file, line, qsp_t_); \
421 do_qsp_entry_record(e, t1 - t0, !err); \
422 return err; \
423 }
424
425 QSP_GEN_VOID(QemuMutex, QSP_BQL_MUTEX, qsp_bql_mutex_lock, qemu_mutex_lock_impl)
426 QSP_GEN_VOID(QemuMutex, QSP_MUTEX, qsp_mutex_lock, qemu_mutex_lock_impl)
427 QSP_GEN_RET1(QemuMutex, QSP_MUTEX, qsp_mutex_trylock, qemu_mutex_trylock_impl)
428
429 QSP_GEN_VOID(QemuRecMutex, QSP_REC_MUTEX, qsp_rec_mutex_lock,
430 qemu_rec_mutex_lock_impl)
431 QSP_GEN_RET1(QemuRecMutex, QSP_REC_MUTEX, qsp_rec_mutex_trylock,
432 qemu_rec_mutex_trylock_impl)
433
434 #undef QSP_GEN_RET1
435 #undef QSP_GEN_VOID
436
437 static void
438 qsp_cond_wait(QemuCond *cond, QemuMutex *mutex, const char *file, int line)
439 {
440 QSPEntry *e;
441 int64_t t0, t1;
442
443 t0 = get_clock();
444 qemu_cond_wait_impl(cond, mutex, file, line);
445 t1 = get_clock();
446
447 e = qsp_entry_get(cond, file, line, QSP_CONDVAR);
448 qsp_entry_record(e, t1 - t0);
449 }
450
451 bool qsp_is_enabled(void)
452 {
453 return atomic_read(&qemu_mutex_lock_func) == qsp_mutex_lock;
454 }
455
456 void qsp_enable(void)
457 {
458 atomic_set(&qemu_mutex_lock_func, qsp_mutex_lock);
459 atomic_set(&qemu_mutex_trylock_func, qsp_mutex_trylock);
460 atomic_set(&qemu_bql_mutex_lock_func, qsp_bql_mutex_lock);
461 atomic_set(&qemu_rec_mutex_lock_func, qsp_rec_mutex_lock);
462 atomic_set(&qemu_rec_mutex_trylock_func, qsp_rec_mutex_trylock);
463 atomic_set(&qemu_cond_wait_func, qsp_cond_wait);
464 }
465
466 void qsp_disable(void)
467 {
468 atomic_set(&qemu_mutex_lock_func, qemu_mutex_lock_impl);
469 atomic_set(&qemu_mutex_trylock_func, qemu_mutex_trylock_impl);
470 atomic_set(&qemu_bql_mutex_lock_func, qemu_mutex_lock_impl);
471 atomic_set(&qemu_rec_mutex_lock_func, qemu_rec_mutex_lock_impl);
472 atomic_set(&qemu_rec_mutex_trylock_func, qemu_rec_mutex_trylock_impl);
473 atomic_set(&qemu_cond_wait_func, qemu_cond_wait_impl);
474 }
475
476 static gint qsp_tree_cmp(gconstpointer ap, gconstpointer bp, gpointer up)
477 {
478 const QSPEntry *a = ap;
479 const QSPEntry *b = bp;
480 enum QSPSortBy sort_by = *(enum QSPSortBy *)up;
481 const QSPCallSite *ca;
482 const QSPCallSite *cb;
483
484 switch (sort_by) {
485 case QSP_SORT_BY_TOTAL_WAIT_TIME:
486 if (a->ns > b->ns) {
487 return -1;
488 } else if (a->ns < b->ns) {
489 return 1;
490 }
491 break;
492 case QSP_SORT_BY_AVG_WAIT_TIME:
493 {
494 double avg_a = a->n_acqs ? a->ns / a->n_acqs : 0;
495 double avg_b = b->n_acqs ? b->ns / b->n_acqs : 0;
496
497 if (avg_a > avg_b) {
498 return -1;
499 } else if (avg_a < avg_b) {
500 return 1;
501 }
502 break;
503 }
504 default:
505 g_assert_not_reached();
506 }
507
508 ca = a->callsite;
509 cb = b->callsite;
510 /* Break the tie with the object's address */
511 if (ca->obj < cb->obj) {
512 return -1;
513 } else if (ca->obj > cb->obj) {
514 return 1;
515 } else {
516 int cmp;
517
518 /* same obj. Break the tie with the callsite's file */
519 cmp = strcmp(ca->file, cb->file);
520 if (cmp) {
521 return cmp;
522 }
523 /* same callsite file. Break the tie with the callsite's line */
524 g_assert(ca->line != cb->line);
525 if (ca->line < cb->line) {
526 return -1;
527 } else if (ca->line > cb->line) {
528 return 1;
529 } else {
530 /* break the tie with the callsite's type */
531 return cb->type - ca->type;
532 }
533 }
534 }
535
536 static void qsp_sort(struct qht *ht, void *p, uint32_t h, void *userp)
537 {
538 QSPEntry *e = p;
539 GTree *tree = userp;
540
541 g_tree_insert(tree, e, NULL);
542 }
543
544 static void qsp_aggregate(struct qht *global_ht, void *p, uint32_t h, void *up)
545 {
546 struct qht *ht = up;
547 const QSPEntry *e = p;
548 QSPEntry *agg;
549 uint32_t hash;
550
551 hash = qsp_entry_no_thread_hash(e);
552 agg = qsp_entry_find(ht, e, hash);
553 qsp_entry_aggregate(agg, e);
554 }
555
556 static void qsp_iter_diff(struct qht *orig, void *p, uint32_t hash, void *htp)
557 {
558 struct qht *ht = htp;
559 QSPEntry *old = p;
560 QSPEntry *new;
561
562 new = qht_lookup(ht, old, hash);
563 /* entries are never deleted, so we must have this one */
564 g_assert(new != NULL);
565 /* our reading of the stats happened after the snapshot was taken */
566 g_assert(new->n_acqs >= old->n_acqs);
567 g_assert(new->ns >= old->ns);
568
569 new->n_acqs -= old->n_acqs;
570 new->ns -= old->ns;
571
572 /* No point in reporting an empty entry */
573 if (new->n_acqs == 0 && new->ns == 0) {
574 bool removed = qht_remove(ht, new, hash);
575
576 g_assert(removed);
577 g_free(new);
578 }
579 }
580
581 static void qsp_diff(struct qht *orig, struct qht *new)
582 {
583 qht_iter(orig, qsp_iter_diff, new);
584 }
585
586 static void
587 qsp_iter_callsite_coalesce(struct qht *orig, void *p, uint32_t h, void *htp)
588 {
589 struct qht *ht = htp;
590 QSPEntry *old = p;
591 QSPEntry *e;
592 uint32_t hash;
593
594 hash = qsp_entry_no_thread_obj_hash(old);
595 e = qht_lookup(ht, old, hash);
596 if (e == NULL) {
597 e = qsp_entry_create(ht, old, hash);
598 e->n_objs = 1;
599 } else if (e->callsite->obj != old->callsite->obj) {
600 e->n_objs++;
601 }
602 e->ns += old->ns;
603 e->n_acqs += old->n_acqs;
604 }
605
606 static void qsp_ht_delete(struct qht *ht, void *p, uint32_t h, void *htp)
607 {
608 g_free(p);
609 }
610
611 static void qsp_mktree(GTree *tree, bool callsite_coalesce)
612 {
613 QSPSnapshot *snap;
614 struct qht ht, coalesce_ht;
615 struct qht *htp;
616
617 /*
618 * First, see if there's a prior snapshot, so that we read the global hash
619 * table _after_ the snapshot has been created, which guarantees that
620 * the entries we'll read will be a superset of the snapshot's entries.
621 *
622 * We must remain in an RCU read-side critical section until we're done
623 * with the snapshot.
624 */
625 rcu_read_lock();
626 snap = atomic_rcu_read(&qsp_snapshot);
627
628 /* Aggregate all results from the global hash table into a local one */
629 qht_init(&ht, qsp_entry_no_thread_cmp, QSP_INITIAL_SIZE,
630 QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
631 qht_iter(&qsp_ht, qsp_aggregate, &ht);
632
633 /* compute the difference wrt the snapshot, if any */
634 if (snap) {
635 qsp_diff(&snap->ht, &ht);
636 }
637 /* done with the snapshot; RCU can reclaim it */
638 rcu_read_unlock();
639
640 htp = &ht;
641 if (callsite_coalesce) {
642 qht_init(&coalesce_ht, qsp_entry_no_thread_obj_cmp, QSP_INITIAL_SIZE,
643 QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
644 qht_iter(&ht, qsp_iter_callsite_coalesce, &coalesce_ht);
645
646 /* free the previous hash table, and point htp to coalesce_ht */
647 qht_iter(&ht, qsp_ht_delete, NULL);
648 qht_destroy(&ht);
649 htp = &coalesce_ht;
650 }
651
652 /* sort the hash table elements by using a tree */
653 qht_iter(htp, qsp_sort, tree);
654
655 /* free the hash table, but keep the elements (those are in the tree now) */
656 qht_destroy(htp);
657 }
658
659 /* free string with g_free */
660 static char *qsp_at(const QSPCallSite *callsite)
661 {
662 GString *s = g_string_new(NULL);
663 const char *shortened;
664
665 /* remove the absolute path to qemu */
666 if (unlikely(strlen(callsite->file) < qsp_qemu_path_len)) {
667 shortened = callsite->file;
668 } else {
669 shortened = callsite->file + qsp_qemu_path_len;
670 }
671 g_string_append_printf(s, "%s:%u", shortened, callsite->line);
672 return g_string_free(s, FALSE);
673 }
674
675 struct QSPReportEntry {
676 const void *obj;
677 char *callsite_at;
678 const char *typename;
679 double time_s;
680 double ns_avg;
681 uint64_t n_acqs;
682 unsigned int n_objs;
683 };
684 typedef struct QSPReportEntry QSPReportEntry;
685
686 struct QSPReport {
687 QSPReportEntry *entries;
688 size_t n_entries;
689 size_t max_n_entries;
690 };
691 typedef struct QSPReport QSPReport;
692
693 static gboolean qsp_tree_report(gpointer key, gpointer value, gpointer udata)
694 {
695 const QSPEntry *e = key;
696 QSPReport *report = udata;
697 QSPReportEntry *entry;
698
699 if (report->n_entries == report->max_n_entries) {
700 return TRUE;
701 }
702 entry = &report->entries[report->n_entries];
703 report->n_entries++;
704
705 entry->obj = e->callsite->obj;
706 entry->n_objs = e->n_objs;
707 entry->callsite_at = qsp_at(e->callsite);
708 entry->typename = qsp_typenames[e->callsite->type];
709 entry->time_s = e->ns * 1e-9;
710 entry->n_acqs = e->n_acqs;
711 entry->ns_avg = e->n_acqs ? e->ns / e->n_acqs : 0;
712 return FALSE;
713 }
714
715 static void
716 pr_report(const QSPReport *rep, FILE *f, fprintf_function pr)
717 {
718 char *dashes;
719 size_t max_len = 0;
720 int callsite_len = 0;
721 int callsite_rspace;
722 int n_dashes;
723 size_t i;
724
725 /* find out the maximum length of all 'callsite' fields */
726 for (i = 0; i < rep->n_entries; i++) {
727 const QSPReportEntry *e = &rep->entries[i];
728 size_t len = strlen(e->callsite_at);
729
730 if (len > max_len) {
731 max_len = len;
732 }
733 }
734
735 callsite_len = MAX(max_len, strlen("Call site"));
736 /* white space to leave to the right of "Call site" */
737 callsite_rspace = callsite_len - strlen("Call site");
738
739 pr(f, "Type Object Call site%*s Wait Time (s) "
740 " Count Average (us)\n", callsite_rspace, "");
741
742 /* build a horizontal rule with dashes */
743 n_dashes = 79 + callsite_rspace;
744 dashes = g_malloc(n_dashes + 1);
745 memset(dashes, '-', n_dashes);
746 dashes[n_dashes] = '\0';
747 pr(f, "%s\n", dashes);
748
749 for (i = 0; i < rep->n_entries; i++) {
750 const QSPReportEntry *e = &rep->entries[i];
751 GString *s = g_string_new(NULL);
752
753 g_string_append_printf(s, "%-9s ", e->typename);
754 if (e->n_objs > 1) {
755 g_string_append_printf(s, "[%12u]", e->n_objs);
756 } else {
757 g_string_append_printf(s, "%14p", e->obj);
758 }
759 g_string_append_printf(s, " %s%*s %13.5f %12" PRIu64 " %12.2f\n",
760 e->callsite_at,
761 callsite_len - (int)strlen(e->callsite_at), "",
762 e->time_s, e->n_acqs, e->ns_avg * 1e-3);
763 pr(f, "%s", s->str);
764 g_string_free(s, TRUE);
765 }
766
767 pr(f, "%s\n", dashes);
768 g_free(dashes);
769 }
770
771 static void report_destroy(QSPReport *rep)
772 {
773 size_t i;
774
775 for (i = 0; i < rep->n_entries; i++) {
776 QSPReportEntry *e = &rep->entries[i];
777
778 g_free(e->callsite_at);
779 }
780 g_free(rep->entries);
781 }
782
783 void qsp_report(FILE *f, fprintf_function cpu_fprintf, size_t max,
784 enum QSPSortBy sort_by, bool callsite_coalesce)
785 {
786 GTree *tree = g_tree_new_full(qsp_tree_cmp, &sort_by, g_free, NULL);
787 QSPReport rep;
788
789 qsp_init();
790
791 rep.entries = g_new0(QSPReportEntry, max);
792 rep.n_entries = 0;
793 rep.max_n_entries = max;
794
795 qsp_mktree(tree, callsite_coalesce);
796 g_tree_foreach(tree, qsp_tree_report, &rep);
797 g_tree_destroy(tree);
798
799 pr_report(&rep, f, cpu_fprintf);
800 report_destroy(&rep);
801 }
802
803 static void qsp_snapshot_destroy(QSPSnapshot *snap)
804 {
805 qht_iter(&snap->ht, qsp_ht_delete, NULL);
806 qht_destroy(&snap->ht);
807 g_free(snap);
808 }
809
810 void qsp_reset(void)
811 {
812 QSPSnapshot *new = g_new(QSPSnapshot, 1);
813 QSPSnapshot *old;
814
815 qsp_init();
816
817 qht_init(&new->ht, qsp_entry_cmp, QSP_INITIAL_SIZE,
818 QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
819
820 /* take a snapshot of the current state */
821 qht_iter(&qsp_ht, qsp_aggregate, &new->ht);
822
823 /* replace the previous snapshot, if any */
824 old = atomic_xchg(&qsp_snapshot, new);
825 if (old) {
826 call_rcu(old, qsp_snapshot_destroy, rcu);
827 }
828 }