]>
Commit | Line | Data |
---|---|---|
515864a0 EC |
1 | /* |
2 | * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> | |
3 | * | |
4 | * License: GNU GPL, version 2 or later. | |
5 | * See the COPYING file in the top-level directory. | |
6 | */ | |
7 | #include "qemu/osdep.h" | |
515864a0 EC |
8 | #include "qemu/processor.h" |
9 | #include "qemu/atomic.h" | |
10 | #include "qemu/qht.h" | |
11 | #include "qemu/rcu.h" | |
fe656e31 | 12 | #include "qemu/xxhash.h" |
515864a0 EC |
13 | |
14 | struct thread_stats { | |
15 | size_t rd; | |
16 | size_t not_rd; | |
17 | size_t in; | |
18 | size_t not_in; | |
19 | size_t rm; | |
20 | size_t not_rm; | |
21 | size_t rz; | |
22 | size_t not_rz; | |
23 | }; | |
24 | ||
25 | struct thread_info { | |
26 | void (*func)(struct thread_info *); | |
27 | struct thread_stats stats; | |
d11f8249 RH |
28 | /* |
29 | * Seed is in the range [1..UINT64_MAX], because the RNG requires | |
30 | * a non-zero seed. To use, subtract 1 and compare against the | |
31 | * threshold with </>=. This lets threshold = 0 never match (0% hit), | |
32 | * and threshold = UINT64_MAX always match (100% hit). | |
33 | */ | |
34 | uint64_t seed; | |
515864a0 EC |
35 | bool write_op; /* writes alternate between insertions and removals */ |
36 | bool resize_down; | |
37 | } QEMU_ALIGNED(64); /* avoid false sharing among threads */ | |
38 | ||
39 | static struct qht ht; | |
40 | static QemuThread *rw_threads; | |
41 | ||
42 | #define DEFAULT_RANGE (4096) | |
43 | #define DEFAULT_QHT_N_ELEMS DEFAULT_RANGE | |
44 | ||
45 | static unsigned int duration = 1; | |
46 | static unsigned int n_rw_threads = 1; | |
47 | static unsigned long lookup_range = DEFAULT_RANGE; | |
48 | static unsigned long update_range = DEFAULT_RANGE; | |
49 | static size_t init_range = DEFAULT_RANGE; | |
50 | static size_t init_size = DEFAULT_RANGE; | |
51 | static size_t n_ready_threads; | |
52 | static long populate_offset; | |
53 | static long *keys; | |
54 | ||
55 | static size_t resize_min; | |
56 | static size_t resize_max; | |
57 | static struct thread_info *rz_info; | |
58 | static unsigned long resize_delay = 1000; | |
59 | static double resize_rate; /* 0.0 to 1.0 */ | |
60 | static unsigned int n_rz_threads = 1; | |
61 | static QemuThread *rz_threads; | |
bd224fce | 62 | static bool precompute_hash; |
515864a0 EC |
63 | |
64 | static double update_rate; /* 0.0 to 1.0 */ | |
65 | static uint64_t update_threshold; | |
66 | static uint64_t resize_threshold; | |
67 | ||
68 | static size_t qht_n_elems = DEFAULT_QHT_N_ELEMS; | |
69 | static int qht_mode; | |
70 | ||
71 | static bool test_start; | |
72 | static bool test_stop; | |
73 | ||
74 | static struct thread_info *rw_info; | |
75 | ||
76 | static const char commands_string[] = | |
77 | " -d = duration, in seconds\n" | |
78 | " -n = number of threads\n" | |
79 | "\n" | |
80 | " -o = offset at which keys start\n" | |
e132fde2 | 81 | " -p = precompute hashes\n" |
515864a0 EC |
82 | "\n" |
83 | " -g = set -s,-k,-K,-l,-r to the same value\n" | |
84 | " -s = initial size hint\n" | |
85 | " -k = initial number of keys\n" | |
86 | " -K = initial range of keys (will be rounded up to pow2)\n" | |
87 | " -l = lookup range of keys (will be rounded up to pow2)\n" | |
88 | " -r = update range of keys (will be rounded up to pow2)\n" | |
89 | "\n" | |
90 | " -u = update rate (0.0 to 100.0), 50/50 split of insertions/removals\n" | |
91 | "\n" | |
92 | " -R = enable auto-resize\n" | |
93 | " -S = resize rate (0.0 to 100.0)\n" | |
94 | " -D = delay (in us) between potential resizes\n" | |
95 | " -N = number of resize threads"; | |
96 | ||
97 | static void usage_complete(int argc, char *argv[]) | |
98 | { | |
99 | fprintf(stderr, "Usage: %s [options]\n", argv[0]); | |
100 | fprintf(stderr, "options:\n%s\n", commands_string); | |
101 | exit(-1); | |
102 | } | |
103 | ||
61b8cef1 | 104 | static bool is_equal(const void *ap, const void *bp) |
515864a0 | 105 | { |
61b8cef1 EC |
106 | const long *a = ap; |
107 | const long *b = bp; | |
515864a0 EC |
108 | |
109 | return *a == *b; | |
110 | } | |
111 | ||
bd224fce | 112 | static uint32_t h(unsigned long v) |
515864a0 | 113 | { |
c971d8fa | 114 | return qemu_xxhash2(v); |
515864a0 EC |
115 | } |
116 | ||
bd224fce EC |
117 | static uint32_t hval(unsigned long v) |
118 | { | |
119 | return v; | |
120 | } | |
121 | ||
122 | static uint32_t (*hfunc)(unsigned long v) = h; | |
123 | ||
515864a0 EC |
124 | /* |
125 | * From: https://en.wikipedia.org/wiki/Xorshift | |
126 | * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only | |
127 | * guaranteed to be >= INT_MAX). | |
128 | */ | |
129 | static uint64_t xorshift64star(uint64_t x) | |
130 | { | |
131 | x ^= x >> 12; /* a */ | |
132 | x ^= x << 25; /* b */ | |
133 | x ^= x >> 27; /* c */ | |
134 | return x * UINT64_C(2685821657736338717); | |
135 | } | |
136 | ||
137 | static void do_rz(struct thread_info *info) | |
138 | { | |
139 | struct thread_stats *stats = &info->stats; | |
d11f8249 | 140 | uint64_t r = info->seed - 1; |
515864a0 | 141 | |
d11f8249 | 142 | if (r < resize_threshold) { |
515864a0 EC |
143 | size_t size = info->resize_down ? resize_min : resize_max; |
144 | bool resized; | |
145 | ||
146 | resized = qht_resize(&ht, size); | |
147 | info->resize_down = !info->resize_down; | |
148 | ||
149 | if (resized) { | |
150 | stats->rz++; | |
151 | } else { | |
152 | stats->not_rz++; | |
153 | } | |
154 | } | |
155 | g_usleep(resize_delay); | |
156 | } | |
157 | ||
158 | static void do_rw(struct thread_info *info) | |
159 | { | |
160 | struct thread_stats *stats = &info->stats; | |
d11f8249 | 161 | uint64_t r = info->seed - 1; |
515864a0 EC |
162 | uint32_t hash; |
163 | long *p; | |
164 | ||
d11f8249 | 165 | if (r >= update_threshold) { |
515864a0 EC |
166 | bool read; |
167 | ||
d11f8249 | 168 | p = &keys[r & (lookup_range - 1)]; |
bd224fce | 169 | hash = hfunc(*p); |
61b8cef1 | 170 | read = qht_lookup(&ht, p, hash); |
515864a0 EC |
171 | if (read) { |
172 | stats->rd++; | |
173 | } else { | |
174 | stats->not_rd++; | |
175 | } | |
176 | } else { | |
d11f8249 | 177 | p = &keys[r & (update_range - 1)]; |
bd224fce | 178 | hash = hfunc(*p); |
515864a0 EC |
179 | if (info->write_op) { |
180 | bool written = false; | |
181 | ||
61b8cef1 | 182 | if (qht_lookup(&ht, p, hash) == NULL) { |
32359d52 | 183 | written = qht_insert(&ht, p, hash, NULL); |
515864a0 EC |
184 | } |
185 | if (written) { | |
186 | stats->in++; | |
187 | } else { | |
188 | stats->not_in++; | |
189 | } | |
190 | } else { | |
191 | bool removed = false; | |
192 | ||
61b8cef1 | 193 | if (qht_lookup(&ht, p, hash)) { |
515864a0 EC |
194 | removed = qht_remove(&ht, p, hash); |
195 | } | |
196 | if (removed) { | |
197 | stats->rm++; | |
198 | } else { | |
199 | stats->not_rm++; | |
200 | } | |
201 | } | |
202 | info->write_op = !info->write_op; | |
203 | } | |
204 | } | |
205 | ||
206 | static void *thread_func(void *p) | |
207 | { | |
208 | struct thread_info *info = p; | |
209 | ||
210 | rcu_register_thread(); | |
211 | ||
d73415a3 SH |
212 | qatomic_inc(&n_ready_threads); |
213 | while (!qatomic_read(&test_start)) { | |
515864a0 EC |
214 | cpu_relax(); |
215 | } | |
216 | ||
217 | rcu_read_lock(); | |
d73415a3 | 218 | while (!qatomic_read(&test_stop)) { |
d11f8249 | 219 | info->seed = xorshift64star(info->seed); |
515864a0 EC |
220 | info->func(info); |
221 | } | |
222 | rcu_read_unlock(); | |
223 | ||
224 | rcu_unregister_thread(); | |
225 | return NULL; | |
226 | } | |
227 | ||
228 | /* sets everything except info->func */ | |
229 | static void prepare_thread_info(struct thread_info *info, int i) | |
230 | { | |
231 | /* seed for the RNG; each thread should have a different one */ | |
d11f8249 | 232 | info->seed = (i + 1) ^ time(NULL); |
515864a0 EC |
233 | /* the first update will be a write */ |
234 | info->write_op = true; | |
235 | /* the first resize will be down */ | |
236 | info->resize_down = true; | |
237 | ||
238 | memset(&info->stats, 0, sizeof(info->stats)); | |
239 | } | |
240 | ||
241 | static void | |
242 | th_create_n(QemuThread **threads, struct thread_info **infos, const char *name, | |
243 | void (*func)(struct thread_info *), int offset, int n) | |
244 | { | |
245 | struct thread_info *info; | |
246 | QemuThread *th; | |
247 | int i; | |
248 | ||
249 | th = g_malloc(sizeof(*th) * n); | |
250 | *threads = th; | |
251 | ||
252 | info = qemu_memalign(64, sizeof(*info) * n); | |
253 | *infos = info; | |
254 | ||
255 | for (i = 0; i < n; i++) { | |
256 | prepare_thread_info(&info[i], offset + i); | |
257 | info[i].func = func; | |
258 | qemu_thread_create(&th[i], name, thread_func, &info[i], | |
259 | QEMU_THREAD_JOINABLE); | |
260 | } | |
261 | } | |
262 | ||
263 | static void create_threads(void) | |
264 | { | |
265 | th_create_n(&rw_threads, &rw_info, "rw", do_rw, 0, n_rw_threads); | |
266 | th_create_n(&rz_threads, &rz_info, "rz", do_rz, n_rw_threads, n_rz_threads); | |
267 | } | |
268 | ||
269 | static void pr_params(void) | |
270 | { | |
271 | printf("Parameters:\n"); | |
272 | printf(" duration: %d s\n", duration); | |
273 | printf(" # of threads: %u\n", n_rw_threads); | |
274 | printf(" initial # of keys: %zu\n", init_size); | |
275 | printf(" initial size hint: %zu\n", qht_n_elems); | |
276 | printf(" auto-resize: %s\n", | |
277 | qht_mode & QHT_MODE_AUTO_RESIZE ? "on" : "off"); | |
278 | if (resize_rate) { | |
279 | printf(" resize_rate: %f%%\n", resize_rate * 100.0); | |
280 | printf(" resize range: %zu-%zu\n", resize_min, resize_max); | |
281 | printf(" # resize threads %u\n", n_rz_threads); | |
282 | } | |
283 | printf(" update rate: %f%%\n", update_rate * 100.0); | |
284 | printf(" offset: %ld\n", populate_offset); | |
285 | printf(" initial key range: %zu\n", init_range); | |
286 | printf(" lookup range: %lu\n", lookup_range); | |
287 | printf(" update range: %lu\n", update_range); | |
288 | } | |
289 | ||
290 | static void do_threshold(double rate, uint64_t *threshold) | |
291 | { | |
78441c04 RH |
292 | /* |
293 | * For 0 <= rate <= 1, scale to fit in a uint64_t. | |
294 | * | |
295 | * Scale by 2**64, with a special case for 1.0. | |
296 | * The remainder of the possible values are scattered between 0 | |
297 | * and 0xfffffffffffff800 (nextafter(0x1p64, 0)). | |
298 | * | |
299 | * Note that we cannot simply scale by UINT64_MAX, because that | |
300 | * value is not representable as an IEEE double value. | |
301 | * | |
302 | * If we scale by the next largest value, nextafter(0x1p64, 0), | |
303 | * then the remainder of the possible values are scattered between | |
304 | * 0 and 0xfffffffffffff000. Which leaves us with a gap between | |
305 | * the final two inputs that is twice as large as any other. | |
306 | */ | |
515864a0 EC |
307 | if (rate == 1.0) { |
308 | *threshold = UINT64_MAX; | |
309 | } else { | |
78441c04 | 310 | *threshold = rate * 0x1p64; |
515864a0 EC |
311 | } |
312 | } | |
313 | ||
314 | static void htable_init(void) | |
315 | { | |
316 | unsigned long n = MAX(init_range, update_range); | |
317 | uint64_t r = time(NULL); | |
318 | size_t retries = 0; | |
319 | size_t i; | |
320 | ||
321 | /* avoid allocating memory later by allocating all the keys now */ | |
322 | keys = g_malloc(sizeof(*keys) * n); | |
323 | for (i = 0; i < n; i++) { | |
bd224fce EC |
324 | long val = populate_offset + i; |
325 | ||
326 | keys[i] = precompute_hash ? h(val) : hval(val); | |
515864a0 EC |
327 | } |
328 | ||
329 | /* some sanity checks */ | |
330 | g_assert_cmpuint(lookup_range, <=, n); | |
331 | ||
332 | /* compute thresholds */ | |
333 | do_threshold(update_rate, &update_threshold); | |
334 | do_threshold(resize_rate, &resize_threshold); | |
335 | ||
336 | if (resize_rate) { | |
337 | resize_min = n / 2; | |
338 | resize_max = n; | |
339 | assert(resize_min < resize_max); | |
340 | } else { | |
341 | n_rz_threads = 0; | |
342 | } | |
343 | ||
344 | /* initialize the hash table */ | |
61b8cef1 | 345 | qht_init(&ht, is_equal, qht_n_elems, qht_mode); |
515864a0 EC |
346 | assert(init_size <= init_range); |
347 | ||
348 | pr_params(); | |
349 | ||
350 | fprintf(stderr, "Initialization: populating %zu items...", init_size); | |
351 | for (i = 0; i < init_size; i++) { | |
352 | for (;;) { | |
353 | uint32_t hash; | |
354 | long *p; | |
355 | ||
356 | r = xorshift64star(r); | |
357 | p = &keys[r & (init_range - 1)]; | |
bd224fce | 358 | hash = hfunc(*p); |
32359d52 | 359 | if (qht_insert(&ht, p, hash, NULL)) { |
515864a0 EC |
360 | break; |
361 | } | |
362 | retries++; | |
363 | } | |
364 | } | |
365 | fprintf(stderr, " populated after %zu retries\n", retries); | |
366 | } | |
367 | ||
368 | static void add_stats(struct thread_stats *s, struct thread_info *info, int n) | |
369 | { | |
370 | int i; | |
371 | ||
372 | for (i = 0; i < n; i++) { | |
373 | struct thread_stats *stats = &info[i].stats; | |
374 | ||
375 | s->rd += stats->rd; | |
376 | s->not_rd += stats->not_rd; | |
377 | ||
378 | s->in += stats->in; | |
379 | s->not_in += stats->not_in; | |
380 | ||
381 | s->rm += stats->rm; | |
382 | s->not_rm += stats->not_rm; | |
383 | ||
384 | s->rz += stats->rz; | |
385 | s->not_rz += stats->not_rz; | |
386 | } | |
387 | } | |
388 | ||
389 | static void pr_stats(void) | |
390 | { | |
391 | struct thread_stats s = {}; | |
392 | double tx; | |
393 | ||
394 | add_stats(&s, rw_info, n_rw_threads); | |
395 | add_stats(&s, rz_info, n_rz_threads); | |
396 | ||
397 | printf("Results:\n"); | |
398 | ||
399 | if (resize_rate) { | |
400 | printf(" Resizes: %zu (%.2f%% of %zu)\n", | |
401 | s.rz, (double)s.rz / (s.rz + s.not_rz) * 100, s.rz + s.not_rz); | |
402 | } | |
403 | ||
404 | printf(" Read: %.2f M (%.2f%% of %.2fM)\n", | |
405 | (double)s.rd / 1e6, | |
406 | (double)s.rd / (s.rd + s.not_rd) * 100, | |
407 | (double)(s.rd + s.not_rd) / 1e6); | |
408 | printf(" Inserted: %.2f M (%.2f%% of %.2fM)\n", | |
409 | (double)s.in / 1e6, | |
410 | (double)s.in / (s.in + s.not_in) * 100, | |
411 | (double)(s.in + s.not_in) / 1e6); | |
412 | printf(" Removed: %.2f M (%.2f%% of %.2fM)\n", | |
413 | (double)s.rm / 1e6, | |
414 | (double)s.rm / (s.rm + s.not_rm) * 100, | |
415 | (double)(s.rm + s.not_rm) / 1e6); | |
416 | ||
417 | tx = (s.rd + s.not_rd + s.in + s.not_in + s.rm + s.not_rm) / 1e6 / duration; | |
418 | printf(" Throughput: %.2f MT/s\n", tx); | |
419 | printf(" Throughput/thread: %.2f MT/s/thread\n", tx / n_rw_threads); | |
420 | } | |
421 | ||
422 | static void run_test(void) | |
423 | { | |
515864a0 EC |
424 | int i; |
425 | ||
d73415a3 | 426 | while (qatomic_read(&n_ready_threads) != n_rw_threads + n_rz_threads) { |
515864a0 EC |
427 | cpu_relax(); |
428 | } | |
eb4f8e10 | 429 | |
d73415a3 | 430 | qatomic_set(&test_start, true); |
eb4f8e10 | 431 | g_usleep(duration * G_USEC_PER_SEC); |
d73415a3 | 432 | qatomic_set(&test_stop, true); |
515864a0 EC |
433 | |
434 | for (i = 0; i < n_rw_threads; i++) { | |
435 | qemu_thread_join(&rw_threads[i]); | |
436 | } | |
437 | for (i = 0; i < n_rz_threads; i++) { | |
438 | qemu_thread_join(&rz_threads[i]); | |
439 | } | |
440 | } | |
441 | ||
442 | static void parse_args(int argc, char *argv[]) | |
443 | { | |
444 | int c; | |
445 | ||
446 | for (;;) { | |
bd224fce | 447 | c = getopt(argc, argv, "d:D:g:k:K:l:hn:N:o:pr:Rs:S:u:"); |
515864a0 EC |
448 | if (c < 0) { |
449 | break; | |
450 | } | |
451 | switch (c) { | |
452 | case 'd': | |
453 | duration = atoi(optarg); | |
454 | break; | |
455 | case 'D': | |
456 | resize_delay = atol(optarg); | |
457 | break; | |
458 | case 'g': | |
459 | init_range = pow2ceil(atol(optarg)); | |
460 | lookup_range = pow2ceil(atol(optarg)); | |
461 | update_range = pow2ceil(atol(optarg)); | |
462 | qht_n_elems = atol(optarg); | |
463 | init_size = atol(optarg); | |
464 | break; | |
465 | case 'h': | |
466 | usage_complete(argc, argv); | |
467 | exit(0); | |
468 | case 'k': | |
469 | init_size = atol(optarg); | |
470 | break; | |
471 | case 'K': | |
472 | init_range = pow2ceil(atol(optarg)); | |
473 | break; | |
474 | case 'l': | |
475 | lookup_range = pow2ceil(atol(optarg)); | |
476 | break; | |
477 | case 'n': | |
478 | n_rw_threads = atoi(optarg); | |
479 | break; | |
480 | case 'N': | |
481 | n_rz_threads = atoi(optarg); | |
482 | break; | |
483 | case 'o': | |
484 | populate_offset = atol(optarg); | |
485 | break; | |
bd224fce EC |
486 | case 'p': |
487 | precompute_hash = true; | |
488 | hfunc = hval; | |
489 | break; | |
515864a0 EC |
490 | case 'r': |
491 | update_range = pow2ceil(atol(optarg)); | |
492 | break; | |
493 | case 'R': | |
494 | qht_mode |= QHT_MODE_AUTO_RESIZE; | |
495 | break; | |
496 | case 's': | |
497 | qht_n_elems = atol(optarg); | |
498 | break; | |
499 | case 'S': | |
500 | resize_rate = atof(optarg) / 100.0; | |
501 | if (resize_rate > 1.0) { | |
502 | resize_rate = 1.0; | |
503 | } | |
504 | break; | |
505 | case 'u': | |
506 | update_rate = atof(optarg) / 100.0; | |
507 | if (update_rate > 1.0) { | |
508 | update_rate = 1.0; | |
509 | } | |
510 | break; | |
511 | } | |
512 | } | |
513 | } | |
514 | ||
515 | int main(int argc, char *argv[]) | |
516 | { | |
517 | parse_args(argc, argv); | |
518 | htable_init(); | |
519 | create_threads(); | |
520 | run_test(); | |
521 | pr_stats(); | |
522 | return 0; | |
523 | } |