* Critical-Section Execution to Improve the Performance of Multithreaded
* Applications", USENIX ATC'12.
*/
+
#include "qemu/osdep.h"
+#include "qemu/qemu-print.h"
#include "qemu/thread.h"
#include "qemu/timer.h"
#include "qemu/qht.h"
#include "qemu/rcu.h"
-#include "exec/tb-hash-xx.h"
+#include "qemu/xxhash.h"
enum QSPType {
QSP_MUTEX,
uint64_t n_acqs;
uint64_t ns;
unsigned int n_objs; /* count of coalesced objs; only used for reporting */
-#ifndef CONFIG_ATOMIC64
- /*
- * If we cannot update the counts atomically, then use a seqlock.
- * We don't need an associated lock because the updates are thread-local.
- */
- QemuSeqLock sequence;
-#endif
};
typedef struct QSPEntry QSPEntry;
* without it we still get a pretty unique hash.
*/
static inline
-uint32_t do_qsp_callsite_hash(const QSPCallSite *callsite, uint64_t a)
+uint32_t do_qsp_callsite_hash(const QSPCallSite *callsite, uint64_t ab)
{
- uint64_t b = (uint64_t)(uintptr_t)callsite->obj;
+ uint64_t cd = (uint64_t)(uintptr_t)callsite->obj;
uint32_t e = callsite->line;
uint32_t f = callsite->type;
- return tb_hash_func7(a, b, e, f, 0);
+ return qemu_xxhash6(ab, cd, e, f);
}
static inline
static uint32_t qsp_entry_no_thread_obj_hash(const QSPEntry *entry)
{
const QSPCallSite *callsite = entry->callsite;
- uint64_t a = g_str_hash(callsite->file);
- uint64_t b = callsite->line;
+ uint64_t ab = g_str_hash(callsite->file);
+ uint64_t cd = callsite->line;
uint32_t e = callsite->type;
- return tb_hash_func7(a, b, e, 0, 0);
+ return qemu_xxhash5(ab, cd, e);
}
static bool qsp_callsite_cmp(const void *ap, const void *bp)
return qsp_entry_find(&qsp_ht, &orig, hash);
}
-/*
- * @from is in the global hash table; read it atomically if the host
- * supports it, otherwise use the seqlock.
- */
-static void qsp_entry_aggregate(QSPEntry *to, const QSPEntry *from)
-{
-#ifdef CONFIG_ATOMIC64
- to->ns += atomic_read__nocheck(&from->ns);
- to->n_acqs += atomic_read__nocheck(&from->n_acqs);
-#else
- unsigned int version;
- uint64_t ns, n_acqs;
-
- do {
- version = seqlock_read_begin(&from->sequence);
- ns = atomic_read__nocheck(&from->ns);
- n_acqs = atomic_read__nocheck(&from->n_acqs);
- } while (seqlock_read_retry(&from->sequence, version));
-
- to->ns += ns;
- to->n_acqs += n_acqs;
-#endif
-}
-
/*
* @e is in the global hash table; it is only written to by the current thread,
* so we write to it atomically (as in "write once") to prevent torn reads.
- * If the host doesn't support u64 atomics, use the seqlock.
*/
static inline void do_qsp_entry_record(QSPEntry *e, int64_t delta, bool acq)
{
-#ifndef CONFIG_ATOMIC64
- seqlock_write_begin(&e->sequence);
-#endif
- atomic_set__nocheck(&e->ns, e->ns + delta);
+ atomic_set_u64(&e->ns, e->ns + delta);
if (acq) {
- atomic_set__nocheck(&e->n_acqs, e->n_acqs + 1);
+ atomic_set_u64(&e->n_acqs, e->n_acqs + 1);
}
-#ifndef CONFIG_ATOMIC64
- seqlock_write_end(&e->sequence);
-#endif
}
static inline void qsp_entry_record(QSPEntry *e, int64_t delta)
}
}
-static void qsp_sort(struct qht *ht, void *p, uint32_t h, void *userp)
+static void qsp_sort(void *p, uint32_t h, void *userp)
{
QSPEntry *e = p;
GTree *tree = userp;
g_tree_insert(tree, e, NULL);
}
-static void qsp_aggregate(struct qht *global_ht, void *p, uint32_t h, void *up)
+static void qsp_aggregate(void *p, uint32_t h, void *up)
{
struct qht *ht = up;
const QSPEntry *e = p;
hash = qsp_entry_no_thread_hash(e);
agg = qsp_entry_find(ht, e, hash);
- qsp_entry_aggregate(agg, e);
+ /*
+ * The entry is in the global hash table; read from it atomically (as in
+ * "read once").
+ */
+ agg->ns += atomic_read_u64(&e->ns);
+ agg->n_acqs += atomic_read_u64(&e->n_acqs);
}
-static void qsp_iter_diff(struct qht *orig, void *p, uint32_t hash, void *htp)
+static void qsp_iter_diff(void *p, uint32_t hash, void *htp)
{
struct qht *ht = htp;
QSPEntry *old = p;
qht_iter(orig, qsp_iter_diff, new);
}
-static void
-qsp_iter_callsite_coalesce(struct qht *orig, void *p, uint32_t h, void *htp)
+static void qsp_iter_callsite_coalesce(void *p, uint32_t h, void *htp)
{
struct qht *ht = htp;
QSPEntry *old = p;
e->n_acqs += old->n_acqs;
}
-static void qsp_ht_delete(struct qht *ht, void *p, uint32_t h, void *htp)
+static void qsp_ht_delete(void *p, uint32_t h, void *htp)
{
g_free(p);
}
return FALSE;
}
-static void
-pr_report(const QSPReport *rep, FILE *f, fprintf_function pr)
+static void pr_report(const QSPReport *rep)
{
char *dashes;
size_t max_len = 0;
/* white space to leave to the right of "Call site" */
callsite_rspace = callsite_len - strlen("Call site");
- pr(f, "Type Object Call site%*s Wait Time (s) "
- " Count Average (us)\n", callsite_rspace, "");
+ qemu_printf("Type Object Call site%*s Wait Time (s) "
+ " Count Average (us)\n", callsite_rspace, "");
/* build a horizontal rule with dashes */
n_dashes = 79 + callsite_rspace;
dashes = g_malloc(n_dashes + 1);
memset(dashes, '-', n_dashes);
dashes[n_dashes] = '\0';
- pr(f, "%s\n", dashes);
+ qemu_printf("%s\n", dashes);
for (i = 0; i < rep->n_entries; i++) {
const QSPReportEntry *e = &rep->entries[i];
e->callsite_at,
callsite_len - (int)strlen(e->callsite_at), "",
e->time_s, e->n_acqs, e->ns_avg * 1e-3);
- pr(f, "%s", s->str);
+ qemu_printf("%s", s->str);
g_string_free(s, TRUE);
}
- pr(f, "%s\n", dashes);
+ qemu_printf("%s\n", dashes);
g_free(dashes);
}
g_free(rep->entries);
}
-void qsp_report(FILE *f, fprintf_function cpu_fprintf, size_t max,
- enum QSPSortBy sort_by, bool callsite_coalesce)
+void qsp_report(size_t max, enum QSPSortBy sort_by,
+ bool callsite_coalesce)
{
GTree *tree = g_tree_new_full(qsp_tree_cmp, &sort_by, g_free, NULL);
QSPReport rep;
g_tree_foreach(tree, qsp_tree_report, &rep);
g_tree_destroy(tree);
- pr_report(&rep, f, cpu_fprintf);
+ pr_report(&rep);
report_destroy(&rep);
}