]>
Commit | Line | Data |
---|---|---|
cad347fa MS |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * A memslot-related performance benchmark. | |
4 | * | |
5 | * Copyright (C) 2021 Oracle and/or its affiliates. | |
6 | * | |
7 | * Basic guest setup / host vCPU thread code lifted from set_memory_region_test. | |
8 | */ | |
9 | #include <pthread.h> | |
10 | #include <sched.h> | |
11 | #include <semaphore.h> | |
12 | #include <stdatomic.h> | |
13 | #include <stdbool.h> | |
14 | #include <stdint.h> | |
15 | #include <stdio.h> | |
16 | #include <stdlib.h> | |
17 | #include <string.h> | |
18 | #include <sys/mman.h> | |
19 | #include <time.h> | |
20 | #include <unistd.h> | |
21 | ||
22 | #include <linux/compiler.h> | |
23 | ||
24 | #include <test_util.h> | |
25 | #include <kvm_util.h> | |
26 | #include <processor.h> | |
27 | ||
28 | #define VCPU_ID 0 | |
29 | ||
30 | #define MEM_SIZE ((512U << 20) + 4096) | |
31 | #define MEM_SIZE_PAGES (MEM_SIZE / 4096) | |
32 | #define MEM_GPA 0x10000000UL | |
33 | #define MEM_AUX_GPA MEM_GPA | |
34 | #define MEM_SYNC_GPA MEM_AUX_GPA | |
35 | #define MEM_TEST_GPA (MEM_AUX_GPA + 4096) | |
36 | #define MEM_TEST_SIZE (MEM_SIZE - 4096) | |
37 | static_assert(MEM_SIZE % 4096 == 0, "invalid mem size"); | |
38 | static_assert(MEM_TEST_SIZE % 4096 == 0, "invalid mem test size"); | |
39 | ||
40 | /* | |
41 | * 32 MiB is max size that gets well over 100 iterations on 509 slots. | |
42 | * Considering that each slot needs to have at least one page up to | |
43 | * 8194 slots in use can then be tested (although with slightly | |
44 | * limited resolution). | |
45 | */ | |
46 | #define MEM_SIZE_MAP ((32U << 20) + 4096) | |
47 | #define MEM_SIZE_MAP_PAGES (MEM_SIZE_MAP / 4096) | |
48 | #define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - 4096) | |
49 | #define MEM_TEST_MAP_SIZE_PAGES (MEM_TEST_MAP_SIZE / 4096) | |
50 | static_assert(MEM_SIZE_MAP % 4096 == 0, "invalid map test region size"); | |
51 | static_assert(MEM_TEST_MAP_SIZE % 4096 == 0, "invalid map test region size"); | |
52 | static_assert(MEM_TEST_MAP_SIZE_PAGES % 2 == 0, "invalid map test region size"); | |
53 | static_assert(MEM_TEST_MAP_SIZE_PAGES > 2, "invalid map test region size"); | |
54 | ||
55 | /* | |
56 | * 128 MiB is min size that fills 32k slots with at least one page in each | |
57 | * while at the same time gets 100+ iterations in such test | |
58 | */ | |
59 | #define MEM_TEST_UNMAP_SIZE (128U << 20) | |
60 | #define MEM_TEST_UNMAP_SIZE_PAGES (MEM_TEST_UNMAP_SIZE / 4096) | |
61 | /* 2 MiB chunk size like a typical huge page */ | |
62 | #define MEM_TEST_UNMAP_CHUNK_PAGES (2U << (20 - 12)) | |
63 | static_assert(MEM_TEST_UNMAP_SIZE <= MEM_TEST_SIZE, | |
64 | "invalid unmap test region size"); | |
65 | static_assert(MEM_TEST_UNMAP_SIZE % 4096 == 0, | |
66 | "invalid unmap test region size"); | |
67 | static_assert(MEM_TEST_UNMAP_SIZE_PAGES % | |
68 | (2 * MEM_TEST_UNMAP_CHUNK_PAGES) == 0, | |
69 | "invalid unmap test region size"); | |
70 | ||
71 | /* | |
72 | * For the move active test the middle of the test area is placed on | |
73 | * a memslot boundary: half lies in the memslot being moved, half in | |
74 | * other memslot(s). | |
75 | * | |
76 | * When running this test with 32k memslots (32764, really) each memslot | |
77 | * contains 4 pages. | |
78 | * The last one additionally contains the remaining 21 pages of memory, | |
79 | * for the total size of 25 pages. | |
80 | * Hence, the maximum size here is 50 pages. | |
81 | */ | |
82 | #define MEM_TEST_MOVE_SIZE_PAGES (50) | |
83 | #define MEM_TEST_MOVE_SIZE (MEM_TEST_MOVE_SIZE_PAGES * 4096) | |
84 | #define MEM_TEST_MOVE_GPA_DEST (MEM_GPA + MEM_SIZE) | |
85 | static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE, | |
86 | "invalid move test region size"); | |
87 | ||
88 | #define MEM_TEST_VAL_1 0x1122334455667788 | |
89 | #define MEM_TEST_VAL_2 0x99AABBCCDDEEFF00 | |
90 | ||
91 | struct vm_data { | |
92 | struct kvm_vm *vm; | |
93 | pthread_t vcpu_thread; | |
94 | uint32_t nslots; | |
95 | uint64_t npages; | |
96 | uint64_t pages_per_slot; | |
97 | void **hva_slots; | |
98 | bool mmio_ok; | |
99 | uint64_t mmio_gpa_min; | |
100 | uint64_t mmio_gpa_max; | |
101 | }; | |
102 | ||
103 | struct sync_area { | |
104 | atomic_bool start_flag; | |
105 | atomic_bool exit_flag; | |
106 | atomic_bool sync_flag; | |
107 | void *move_area_ptr; | |
108 | }; | |
109 | ||
110 | /* | |
111 | * Technically, we need also for the atomic bool to be address-free, which | |
112 | * is recommended, but not strictly required, by C11 for lockless | |
113 | * implementations. | |
114 | * However, in practice both GCC and Clang fulfill this requirement on | |
115 | * all KVM-supported platforms. | |
116 | */ | |
117 | static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless"); | |
118 | ||
119 | static sem_t vcpu_ready; | |
120 | ||
121 | static bool map_unmap_verify; | |
122 | ||
123 | static bool verbose; | |
124 | #define pr_info_v(...) \ | |
125 | do { \ | |
126 | if (verbose) \ | |
127 | pr_info(__VA_ARGS__); \ | |
128 | } while (0) | |
129 | ||
130 | static void *vcpu_worker(void *data) | |
131 | { | |
132 | struct vm_data *vm = data; | |
133 | struct kvm_run *run; | |
134 | struct ucall uc; | |
135 | uint64_t cmd; | |
136 | ||
137 | run = vcpu_state(vm->vm, VCPU_ID); | |
138 | while (1) { | |
139 | vcpu_run(vm->vm, VCPU_ID); | |
140 | ||
141 | if (run->exit_reason == KVM_EXIT_IO) { | |
142 | cmd = get_ucall(vm->vm, VCPU_ID, &uc); | |
143 | if (cmd != UCALL_SYNC) | |
144 | break; | |
145 | ||
146 | sem_post(&vcpu_ready); | |
147 | continue; | |
148 | } | |
149 | ||
150 | if (run->exit_reason != KVM_EXIT_MMIO) | |
151 | break; | |
152 | ||
153 | TEST_ASSERT(vm->mmio_ok, "Unexpected mmio exit"); | |
154 | TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read"); | |
155 | TEST_ASSERT(run->mmio.len == 8, | |
156 | "Unexpected exit mmio size = %u", run->mmio.len); | |
157 | TEST_ASSERT(run->mmio.phys_addr >= vm->mmio_gpa_min && | |
158 | run->mmio.phys_addr <= vm->mmio_gpa_max, | |
159 | "Unexpected exit mmio address = 0x%llx", | |
160 | run->mmio.phys_addr); | |
161 | } | |
162 | ||
163 | if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT) | |
164 | TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc.args[0], | |
165 | __FILE__, uc.args[1], uc.args[2]); | |
166 | ||
167 | return NULL; | |
168 | } | |
169 | ||
170 | static void wait_for_vcpu(void) | |
171 | { | |
172 | struct timespec ts; | |
173 | ||
174 | TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts), | |
175 | "clock_gettime() failed: %d\n", errno); | |
176 | ||
177 | ts.tv_sec += 2; | |
178 | TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts), | |
179 | "sem_timedwait() failed: %d\n", errno); | |
180 | } | |
181 | ||
182 | static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages) | |
183 | { | |
184 | uint64_t gpage, pgoffs; | |
185 | uint32_t slot, slotoffs; | |
186 | void *base; | |
187 | ||
188 | TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate"); | |
189 | TEST_ASSERT(gpa < MEM_GPA + data->npages * 4096, | |
190 | "Too high gpa to translate"); | |
191 | gpa -= MEM_GPA; | |
192 | ||
193 | gpage = gpa / 4096; | |
194 | pgoffs = gpa % 4096; | |
195 | slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1); | |
196 | slotoffs = gpage - (slot * data->pages_per_slot); | |
197 | ||
198 | if (rempages) { | |
199 | uint64_t slotpages; | |
200 | ||
201 | if (slot == data->nslots - 1) | |
202 | slotpages = data->npages - slot * data->pages_per_slot; | |
203 | else | |
204 | slotpages = data->pages_per_slot; | |
205 | ||
206 | TEST_ASSERT(!pgoffs, | |
207 | "Asking for remaining pages in slot but gpa not page aligned"); | |
208 | *rempages = slotpages - slotoffs; | |
209 | } | |
210 | ||
211 | base = data->hva_slots[slot]; | |
212 | return (uint8_t *)base + slotoffs * 4096 + pgoffs; | |
213 | } | |
214 | ||
215 | static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot) | |
216 | { | |
217 | TEST_ASSERT(slot < data->nslots, "Too high slot number"); | |
218 | ||
219 | return MEM_GPA + slot * data->pages_per_slot * 4096; | |
220 | } | |
221 | ||
222 | static struct vm_data *alloc_vm(void) | |
223 | { | |
224 | struct vm_data *data; | |
225 | ||
226 | data = malloc(sizeof(*data)); | |
227 | TEST_ASSERT(data, "malloc(vmdata) failed"); | |
228 | ||
229 | data->vm = NULL; | |
230 | data->hva_slots = NULL; | |
231 | ||
232 | return data; | |
233 | } | |
234 | ||
235 | static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, | |
236 | void *guest_code, uint64_t mempages, | |
237 | struct timespec *slot_runtime) | |
238 | { | |
239 | uint32_t max_mem_slots; | |
240 | uint64_t rempages; | |
241 | uint64_t guest_addr; | |
242 | uint32_t slot; | |
243 | struct timespec tstart; | |
244 | struct sync_area *sync; | |
245 | ||
246 | max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); | |
247 | TEST_ASSERT(max_mem_slots > 1, | |
248 | "KVM_CAP_NR_MEMSLOTS should be greater than 1"); | |
249 | TEST_ASSERT(nslots > 1 || nslots == -1, | |
250 | "Slot count cap should be greater than 1"); | |
251 | if (nslots != -1) | |
252 | max_mem_slots = min(max_mem_slots, (uint32_t)nslots); | |
253 | pr_info_v("Allowed number of memory slots: %"PRIu32"\n", max_mem_slots); | |
254 | ||
255 | TEST_ASSERT(mempages > 1, | |
256 | "Can't test without any memory"); | |
257 | ||
258 | data->npages = mempages; | |
259 | data->nslots = max_mem_slots - 1; | |
260 | data->pages_per_slot = mempages / data->nslots; | |
261 | if (!data->pages_per_slot) { | |
262 | *maxslots = mempages + 1; | |
263 | return false; | |
264 | } | |
265 | ||
266 | rempages = mempages % data->nslots; | |
267 | data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots); | |
268 | TEST_ASSERT(data->hva_slots, "malloc() fail"); | |
269 | ||
270 | data->vm = vm_create_default(VCPU_ID, mempages, guest_code); | |
271 | ||
272 | pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n", | |
273 | max_mem_slots - 1, data->pages_per_slot, rempages); | |
274 | ||
275 | clock_gettime(CLOCK_MONOTONIC, &tstart); | |
276 | for (slot = 1, guest_addr = MEM_GPA; slot < max_mem_slots; slot++) { | |
277 | uint64_t npages; | |
278 | ||
279 | npages = data->pages_per_slot; | |
280 | if (slot == max_mem_slots - 1) | |
281 | npages += rempages; | |
282 | ||
283 | vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS, | |
284 | guest_addr, slot, npages, | |
285 | 0); | |
286 | guest_addr += npages * 4096; | |
287 | } | |
288 | *slot_runtime = timespec_elapsed(tstart); | |
289 | ||
290 | for (slot = 0, guest_addr = MEM_GPA; slot < max_mem_slots - 1; slot++) { | |
291 | uint64_t npages; | |
292 | uint64_t gpa; | |
293 | ||
294 | npages = data->pages_per_slot; | |
295 | if (slot == max_mem_slots - 2) | |
296 | npages += rempages; | |
297 | ||
298 | gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, | |
299 | slot + 1); | |
300 | TEST_ASSERT(gpa == guest_addr, | |
301 | "vm_phy_pages_alloc() failed\n"); | |
302 | ||
303 | data->hva_slots[slot] = addr_gpa2hva(data->vm, guest_addr); | |
304 | memset(data->hva_slots[slot], 0, npages * 4096); | |
305 | ||
306 | guest_addr += npages * 4096; | |
307 | } | |
308 | ||
309 | virt_map(data->vm, MEM_GPA, MEM_GPA, mempages, 0); | |
310 | ||
311 | sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL); | |
312 | atomic_init(&sync->start_flag, false); | |
313 | atomic_init(&sync->exit_flag, false); | |
314 | atomic_init(&sync->sync_flag, false); | |
315 | ||
316 | data->mmio_ok = false; | |
317 | ||
318 | return true; | |
319 | } | |
320 | ||
321 | static void launch_vm(struct vm_data *data) | |
322 | { | |
323 | pr_info_v("Launching the test VM\n"); | |
324 | ||
325 | pthread_create(&data->vcpu_thread, NULL, vcpu_worker, data); | |
326 | ||
327 | /* Ensure the guest thread is spun up. */ | |
328 | wait_for_vcpu(); | |
329 | } | |
330 | ||
331 | static void free_vm(struct vm_data *data) | |
332 | { | |
333 | kvm_vm_free(data->vm); | |
334 | free(data->hva_slots); | |
335 | free(data); | |
336 | } | |
337 | ||
338 | static void wait_guest_exit(struct vm_data *data) | |
339 | { | |
340 | pthread_join(data->vcpu_thread, NULL); | |
341 | } | |
342 | ||
343 | static void let_guest_run(struct sync_area *sync) | |
344 | { | |
345 | atomic_store_explicit(&sync->start_flag, true, memory_order_release); | |
346 | } | |
347 | ||
348 | static void guest_spin_until_start(void) | |
349 | { | |
350 | struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; | |
351 | ||
352 | while (!atomic_load_explicit(&sync->start_flag, memory_order_acquire)) | |
353 | ; | |
354 | } | |
355 | ||
356 | static void make_guest_exit(struct sync_area *sync) | |
357 | { | |
358 | atomic_store_explicit(&sync->exit_flag, true, memory_order_release); | |
359 | } | |
360 | ||
361 | static bool _guest_should_exit(void) | |
362 | { | |
363 | struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; | |
364 | ||
365 | return atomic_load_explicit(&sync->exit_flag, memory_order_acquire); | |
366 | } | |
367 | ||
368 | #define guest_should_exit() unlikely(_guest_should_exit()) | |
369 | ||
370 | /* | |
371 | * noinline so we can easily see how much time the host spends waiting | |
372 | * for the guest. | |
373 | * For the same reason use alarm() instead of polling clock_gettime() | |
374 | * to implement a wait timeout. | |
375 | */ | |
376 | static noinline void host_perform_sync(struct sync_area *sync) | |
377 | { | |
378 | alarm(2); | |
379 | ||
380 | atomic_store_explicit(&sync->sync_flag, true, memory_order_release); | |
381 | while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire)) | |
382 | ; | |
383 | ||
384 | alarm(0); | |
385 | } | |
386 | ||
387 | static bool guest_perform_sync(void) | |
388 | { | |
389 | struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; | |
390 | bool expected; | |
391 | ||
392 | do { | |
393 | if (guest_should_exit()) | |
394 | return false; | |
395 | ||
396 | expected = true; | |
397 | } while (!atomic_compare_exchange_weak_explicit(&sync->sync_flag, | |
398 | &expected, false, | |
399 | memory_order_acq_rel, | |
400 | memory_order_relaxed)); | |
401 | ||
402 | return true; | |
403 | } | |
404 | ||
405 | static void guest_code_test_memslot_move(void) | |
406 | { | |
407 | struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; | |
408 | uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr); | |
409 | ||
410 | GUEST_SYNC(0); | |
411 | ||
412 | guest_spin_until_start(); | |
413 | ||
414 | while (!guest_should_exit()) { | |
415 | uintptr_t ptr; | |
416 | ||
417 | for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE; | |
418 | ptr += 4096) | |
419 | *(uint64_t *)ptr = MEM_TEST_VAL_1; | |
420 | ||
421 | /* | |
422 | * No host sync here since the MMIO exits are so expensive | |
423 | * that the host would spend most of its time waiting for | |
424 | * the guest and so instead of measuring memslot move | |
425 | * performance we would measure the performance and | |
426 | * likelihood of MMIO exits | |
427 | */ | |
428 | } | |
429 | ||
430 | GUEST_DONE(); | |
431 | } | |
432 | ||
433 | static void guest_code_test_memslot_map(void) | |
434 | { | |
435 | struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; | |
436 | ||
437 | GUEST_SYNC(0); | |
438 | ||
439 | guest_spin_until_start(); | |
440 | ||
441 | while (1) { | |
442 | uintptr_t ptr; | |
443 | ||
444 | for (ptr = MEM_TEST_GPA; | |
445 | ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; ptr += 4096) | |
446 | *(uint64_t *)ptr = MEM_TEST_VAL_1; | |
447 | ||
448 | if (!guest_perform_sync()) | |
449 | break; | |
450 | ||
451 | for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; | |
452 | ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; ptr += 4096) | |
453 | *(uint64_t *)ptr = MEM_TEST_VAL_2; | |
454 | ||
455 | if (!guest_perform_sync()) | |
456 | break; | |
457 | } | |
458 | ||
459 | GUEST_DONE(); | |
460 | } | |
461 | ||
462 | static void guest_code_test_memslot_unmap(void) | |
463 | { | |
464 | struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; | |
465 | ||
466 | GUEST_SYNC(0); | |
467 | ||
468 | guest_spin_until_start(); | |
469 | ||
470 | while (1) { | |
471 | uintptr_t ptr = MEM_TEST_GPA; | |
472 | ||
473 | /* | |
474 | * We can afford to access (map) just a small number of pages | |
475 | * per host sync as otherwise the host will spend | |
476 | * a significant amount of its time waiting for the guest | |
477 | * (instead of doing unmap operations), so this will | |
478 | * effectively turn this test into a map performance test. | |
479 | * | |
480 | * Just access a single page to be on the safe side. | |
481 | */ | |
482 | *(uint64_t *)ptr = MEM_TEST_VAL_1; | |
483 | ||
484 | if (!guest_perform_sync()) | |
485 | break; | |
486 | ||
487 | ptr += MEM_TEST_UNMAP_SIZE / 2; | |
488 | *(uint64_t *)ptr = MEM_TEST_VAL_2; | |
489 | ||
490 | if (!guest_perform_sync()) | |
491 | break; | |
492 | } | |
493 | ||
494 | GUEST_DONE(); | |
495 | } | |
496 | ||
497 | static void guest_code_test_memslot_rw(void) | |
498 | { | |
499 | GUEST_SYNC(0); | |
500 | ||
501 | guest_spin_until_start(); | |
502 | ||
503 | while (1) { | |
504 | uintptr_t ptr; | |
505 | ||
506 | for (ptr = MEM_TEST_GPA; | |
507 | ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096) | |
508 | *(uint64_t *)ptr = MEM_TEST_VAL_1; | |
509 | ||
510 | if (!guest_perform_sync()) | |
511 | break; | |
512 | ||
513 | for (ptr = MEM_TEST_GPA + 4096 / 2; | |
514 | ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096) { | |
515 | uint64_t val = *(uint64_t *)ptr; | |
516 | ||
517 | GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val); | |
518 | *(uint64_t *)ptr = 0; | |
519 | } | |
520 | ||
521 | if (!guest_perform_sync()) | |
522 | break; | |
523 | } | |
524 | ||
525 | GUEST_DONE(); | |
526 | } | |
527 | ||
528 | static bool test_memslot_move_prepare(struct vm_data *data, | |
529 | struct sync_area *sync, | |
530 | uint64_t *maxslots, bool isactive) | |
531 | { | |
532 | uint64_t movesrcgpa, movetestgpa; | |
533 | ||
534 | movesrcgpa = vm_slot2gpa(data, data->nslots - 1); | |
535 | ||
536 | if (isactive) { | |
537 | uint64_t lastpages; | |
538 | ||
539 | vm_gpa2hva(data, movesrcgpa, &lastpages); | |
540 | if (lastpages < MEM_TEST_MOVE_SIZE_PAGES / 2) { | |
541 | *maxslots = 0; | |
542 | return false; | |
543 | } | |
544 | } | |
545 | ||
546 | movetestgpa = movesrcgpa - (MEM_TEST_MOVE_SIZE / (isactive ? 2 : 1)); | |
547 | sync->move_area_ptr = (void *)movetestgpa; | |
548 | ||
549 | if (isactive) { | |
550 | data->mmio_ok = true; | |
551 | data->mmio_gpa_min = movesrcgpa; | |
552 | data->mmio_gpa_max = movesrcgpa + MEM_TEST_MOVE_SIZE / 2 - 1; | |
553 | } | |
554 | ||
555 | return true; | |
556 | } | |
557 | ||
558 | static bool test_memslot_move_prepare_active(struct vm_data *data, | |
559 | struct sync_area *sync, | |
560 | uint64_t *maxslots) | |
561 | { | |
562 | return test_memslot_move_prepare(data, sync, maxslots, true); | |
563 | } | |
564 | ||
565 | static bool test_memslot_move_prepare_inactive(struct vm_data *data, | |
566 | struct sync_area *sync, | |
567 | uint64_t *maxslots) | |
568 | { | |
569 | return test_memslot_move_prepare(data, sync, maxslots, false); | |
570 | } | |
571 | ||
572 | static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync) | |
573 | { | |
574 | uint64_t movesrcgpa; | |
575 | ||
576 | movesrcgpa = vm_slot2gpa(data, data->nslots - 1); | |
577 | vm_mem_region_move(data->vm, data->nslots - 1 + 1, | |
578 | MEM_TEST_MOVE_GPA_DEST); | |
579 | vm_mem_region_move(data->vm, data->nslots - 1 + 1, movesrcgpa); | |
580 | } | |
581 | ||
582 | static void test_memslot_do_unmap(struct vm_data *data, | |
583 | uint64_t offsp, uint64_t count) | |
584 | { | |
585 | uint64_t gpa, ctr; | |
586 | ||
587 | for (gpa = MEM_TEST_GPA + offsp * 4096, ctr = 0; ctr < count; ) { | |
588 | uint64_t npages; | |
589 | void *hva; | |
590 | int ret; | |
591 | ||
592 | hva = vm_gpa2hva(data, gpa, &npages); | |
593 | TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa); | |
594 | npages = min(npages, count - ctr); | |
595 | ret = madvise(hva, npages * 4096, MADV_DONTNEED); | |
596 | TEST_ASSERT(!ret, | |
597 | "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64, | |
598 | hva, gpa); | |
599 | ctr += npages; | |
600 | gpa += npages * 4096; | |
601 | } | |
602 | TEST_ASSERT(ctr == count, | |
603 | "madvise(MADV_DONTNEED) should exactly cover all of the requested area"); | |
604 | } | |
605 | ||
606 | static void test_memslot_map_unmap_check(struct vm_data *data, | |
607 | uint64_t offsp, uint64_t valexp) | |
608 | { | |
609 | uint64_t gpa; | |
610 | uint64_t *val; | |
611 | ||
612 | if (!map_unmap_verify) | |
613 | return; | |
614 | ||
615 | gpa = MEM_TEST_GPA + offsp * 4096; | |
616 | val = (typeof(val))vm_gpa2hva(data, gpa, NULL); | |
617 | TEST_ASSERT(*val == valexp, | |
618 | "Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")", | |
619 | *val, valexp, gpa); | |
620 | *val = 0; | |
621 | } | |
622 | ||
623 | static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) | |
624 | { | |
625 | /* | |
626 | * Unmap the second half of the test area while guest writes to (maps) | |
627 | * the first half. | |
628 | */ | |
629 | test_memslot_do_unmap(data, MEM_TEST_MAP_SIZE_PAGES / 2, | |
630 | MEM_TEST_MAP_SIZE_PAGES / 2); | |
631 | ||
632 | /* | |
633 | * Wait for the guest to finish writing the first half of the test | |
634 | * area, verify the written value on the first and the last page of | |
635 | * this area and then unmap it. | |
636 | * Meanwhile, the guest is writing to (mapping) the second half of | |
637 | * the test area. | |
638 | */ | |
639 | host_perform_sync(sync); | |
640 | test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1); | |
641 | test_memslot_map_unmap_check(data, | |
642 | MEM_TEST_MAP_SIZE_PAGES / 2 - 1, | |
643 | MEM_TEST_VAL_1); | |
644 | test_memslot_do_unmap(data, 0, MEM_TEST_MAP_SIZE_PAGES / 2); | |
645 | ||
646 | ||
647 | /* | |
648 | * Wait for the guest to finish writing the second half of the test | |
649 | * area and verify the written value on the first and the last page | |
650 | * of this area. | |
651 | * The area will be unmapped at the beginning of the next loop | |
652 | * iteration. | |
653 | * Meanwhile, the guest is writing to (mapping) the first half of | |
654 | * the test area. | |
655 | */ | |
656 | host_perform_sync(sync); | |
657 | test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES / 2, | |
658 | MEM_TEST_VAL_2); | |
659 | test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES - 1, | |
660 | MEM_TEST_VAL_2); | |
661 | } | |
662 | ||
663 | static void test_memslot_unmap_loop_common(struct vm_data *data, | |
664 | struct sync_area *sync, | |
665 | uint64_t chunk) | |
666 | { | |
667 | uint64_t ctr; | |
668 | ||
669 | /* | |
670 | * Wait for the guest to finish mapping page(s) in the first half | |
671 | * of the test area, verify the written value and then perform unmap | |
672 | * of this area. | |
673 | * Meanwhile, the guest is writing to (mapping) page(s) in the second | |
674 | * half of the test area. | |
675 | */ | |
676 | host_perform_sync(sync); | |
677 | test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1); | |
678 | for (ctr = 0; ctr < MEM_TEST_UNMAP_SIZE_PAGES / 2; ctr += chunk) | |
679 | test_memslot_do_unmap(data, ctr, chunk); | |
680 | ||
681 | /* Likewise, but for the opposite host / guest areas */ | |
682 | host_perform_sync(sync); | |
683 | test_memslot_map_unmap_check(data, MEM_TEST_UNMAP_SIZE_PAGES / 2, | |
684 | MEM_TEST_VAL_2); | |
685 | for (ctr = MEM_TEST_UNMAP_SIZE_PAGES / 2; | |
686 | ctr < MEM_TEST_UNMAP_SIZE_PAGES; ctr += chunk) | |
687 | test_memslot_do_unmap(data, ctr, chunk); | |
688 | } | |
689 | ||
690 | static void test_memslot_unmap_loop(struct vm_data *data, | |
691 | struct sync_area *sync) | |
692 | { | |
693 | test_memslot_unmap_loop_common(data, sync, 1); | |
694 | } | |
695 | ||
696 | static void test_memslot_unmap_loop_chunked(struct vm_data *data, | |
697 | struct sync_area *sync) | |
698 | { | |
699 | test_memslot_unmap_loop_common(data, sync, MEM_TEST_UNMAP_CHUNK_PAGES); | |
700 | } | |
701 | ||
702 | static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync) | |
703 | { | |
704 | uint64_t gptr; | |
705 | ||
706 | for (gptr = MEM_TEST_GPA + 4096 / 2; | |
707 | gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096) | |
708 | *(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2; | |
709 | ||
710 | host_perform_sync(sync); | |
711 | ||
712 | for (gptr = MEM_TEST_GPA; | |
713 | gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096) { | |
714 | uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL); | |
715 | uint64_t val = *vptr; | |
716 | ||
717 | TEST_ASSERT(val == MEM_TEST_VAL_1, | |
718 | "Guest written values should read back correctly (is %"PRIu64" @ %"PRIx64")", | |
719 | val, gptr); | |
720 | *vptr = 0; | |
721 | } | |
722 | ||
723 | host_perform_sync(sync); | |
724 | } | |
725 | ||
726 | struct test_data { | |
727 | const char *name; | |
728 | uint64_t mem_size; | |
729 | void (*guest_code)(void); | |
730 | bool (*prepare)(struct vm_data *data, struct sync_area *sync, | |
731 | uint64_t *maxslots); | |
732 | void (*loop)(struct vm_data *data, struct sync_area *sync); | |
733 | }; | |
734 | ||
735 | static bool test_execute(int nslots, uint64_t *maxslots, | |
736 | unsigned int maxtime, | |
737 | const struct test_data *tdata, | |
738 | uint64_t *nloops, | |
739 | struct timespec *slot_runtime, | |
740 | struct timespec *guest_runtime) | |
741 | { | |
742 | uint64_t mem_size = tdata->mem_size ? : MEM_SIZE_PAGES; | |
743 | struct vm_data *data; | |
744 | struct sync_area *sync; | |
745 | struct timespec tstart; | |
746 | bool ret = true; | |
747 | ||
748 | data = alloc_vm(); | |
749 | if (!prepare_vm(data, nslots, maxslots, tdata->guest_code, | |
750 | mem_size, slot_runtime)) { | |
751 | ret = false; | |
752 | goto exit_free; | |
753 | } | |
754 | ||
755 | sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL); | |
756 | ||
757 | if (tdata->prepare && | |
758 | !tdata->prepare(data, sync, maxslots)) { | |
759 | ret = false; | |
760 | goto exit_free; | |
761 | } | |
762 | ||
763 | launch_vm(data); | |
764 | ||
765 | clock_gettime(CLOCK_MONOTONIC, &tstart); | |
766 | let_guest_run(sync); | |
767 | ||
768 | while (1) { | |
769 | *guest_runtime = timespec_elapsed(tstart); | |
770 | if (guest_runtime->tv_sec >= maxtime) | |
771 | break; | |
772 | ||
773 | tdata->loop(data, sync); | |
774 | ||
775 | (*nloops)++; | |
776 | } | |
777 | ||
778 | make_guest_exit(sync); | |
779 | wait_guest_exit(data); | |
780 | ||
781 | exit_free: | |
782 | free_vm(data); | |
783 | ||
784 | return ret; | |
785 | } | |
786 | ||
787 | static const struct test_data tests[] = { | |
788 | { | |
789 | .name = "map", | |
790 | .mem_size = MEM_SIZE_MAP_PAGES, | |
791 | .guest_code = guest_code_test_memslot_map, | |
792 | .loop = test_memslot_map_loop, | |
793 | }, | |
794 | { | |
795 | .name = "unmap", | |
796 | .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1, | |
797 | .guest_code = guest_code_test_memslot_unmap, | |
798 | .loop = test_memslot_unmap_loop, | |
799 | }, | |
800 | { | |
801 | .name = "unmap chunked", | |
802 | .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1, | |
803 | .guest_code = guest_code_test_memslot_unmap, | |
804 | .loop = test_memslot_unmap_loop_chunked, | |
805 | }, | |
806 | { | |
807 | .name = "move active area", | |
808 | .guest_code = guest_code_test_memslot_move, | |
809 | .prepare = test_memslot_move_prepare_active, | |
810 | .loop = test_memslot_move_loop, | |
811 | }, | |
812 | { | |
813 | .name = "move inactive area", | |
814 | .guest_code = guest_code_test_memslot_move, | |
815 | .prepare = test_memslot_move_prepare_inactive, | |
816 | .loop = test_memslot_move_loop, | |
817 | }, | |
818 | { | |
819 | .name = "RW", | |
820 | .guest_code = guest_code_test_memslot_rw, | |
821 | .loop = test_memslot_rw_loop | |
822 | }, | |
823 | }; | |
824 | ||
825 | #define NTESTS ARRAY_SIZE(tests) | |
826 | ||
827 | struct test_args { | |
828 | int tfirst; | |
829 | int tlast; | |
830 | int nslots; | |
831 | int seconds; | |
832 | int runs; | |
833 | }; | |
834 | ||
835 | static void help(char *name, struct test_args *targs) | |
836 | { | |
837 | int ctr; | |
838 | ||
839 | pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count]\n", | |
840 | name); | |
841 | pr_info(" -h: print this help screen.\n"); | |
842 | pr_info(" -v: enable verbose mode (not for benchmarking).\n"); | |
843 | pr_info(" -d: enable extra debug checks.\n"); | |
844 | pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n", | |
845 | targs->nslots); | |
846 | pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n", | |
847 | targs->tfirst, NTESTS - 1); | |
848 | pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n", | |
849 | targs->tlast, NTESTS - 1); | |
850 | pr_info(" -l: specify the test length in seconds (currently: %i)\n", | |
851 | targs->seconds); | |
852 | pr_info(" -r: specify the number of runs per test (currently: %i)\n", | |
853 | targs->runs); | |
854 | ||
855 | pr_info("\nAvailable tests:\n"); | |
856 | for (ctr = 0; ctr < NTESTS; ctr++) | |
857 | pr_info("%d: %s\n", ctr, tests[ctr].name); | |
858 | } | |
859 | ||
860 | static bool parse_args(int argc, char *argv[], | |
861 | struct test_args *targs) | |
862 | { | |
863 | int opt; | |
864 | ||
865 | while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) { | |
866 | switch (opt) { | |
867 | case 'h': | |
868 | default: | |
869 | help(argv[0], targs); | |
870 | return false; | |
871 | case 'v': | |
872 | verbose = true; | |
873 | break; | |
874 | case 'd': | |
875 | map_unmap_verify = true; | |
876 | break; | |
877 | case 's': | |
878 | targs->nslots = atoi(optarg); | |
879 | if (targs->nslots <= 0 && targs->nslots != -1) { | |
880 | pr_info("Slot count cap has to be positive or -1 for no cap\n"); | |
881 | return false; | |
882 | } | |
883 | break; | |
884 | case 'f': | |
885 | targs->tfirst = atoi(optarg); | |
886 | if (targs->tfirst < 0) { | |
887 | pr_info("First test to run has to be non-negative\n"); | |
888 | return false; | |
889 | } | |
890 | break; | |
891 | case 'e': | |
892 | targs->tlast = atoi(optarg); | |
893 | if (targs->tlast < 0 || targs->tlast >= NTESTS) { | |
894 | pr_info("Last test to run has to be non-negative and less than %zu\n", | |
895 | NTESTS); | |
896 | return false; | |
897 | } | |
898 | break; | |
899 | case 'l': | |
900 | targs->seconds = atoi(optarg); | |
901 | if (targs->seconds < 0) { | |
902 | pr_info("Test length in seconds has to be non-negative\n"); | |
903 | return false; | |
904 | } | |
905 | break; | |
906 | case 'r': | |
907 | targs->runs = atoi(optarg); | |
908 | if (targs->runs <= 0) { | |
909 | pr_info("Runs per test has to be positive\n"); | |
910 | return false; | |
911 | } | |
912 | break; | |
913 | } | |
914 | } | |
915 | ||
916 | if (optind < argc) { | |
917 | help(argv[0], targs); | |
918 | return false; | |
919 | } | |
920 | ||
921 | if (targs->tfirst > targs->tlast) { | |
922 | pr_info("First test to run cannot be greater than the last test to run\n"); | |
923 | return false; | |
924 | } | |
925 | ||
926 | return true; | |
927 | } | |
928 | ||
929 | struct test_result { | |
930 | struct timespec slot_runtime, guest_runtime, iter_runtime; | |
931 | int64_t slottimens, runtimens; | |
932 | uint64_t nloops; | |
933 | }; | |
934 | ||
935 | static bool test_loop(const struct test_data *data, | |
936 | const struct test_args *targs, | |
937 | struct test_result *rbestslottime, | |
938 | struct test_result *rbestruntime) | |
939 | { | |
940 | uint64_t maxslots; | |
941 | struct test_result result; | |
942 | ||
943 | result.nloops = 0; | |
944 | if (!test_execute(targs->nslots, &maxslots, targs->seconds, data, | |
945 | &result.nloops, | |
946 | &result.slot_runtime, &result.guest_runtime)) { | |
947 | if (maxslots) | |
948 | pr_info("Memslot count too high for this test, decrease the cap (max is %"PRIu64")\n", | |
949 | maxslots); | |
950 | else | |
951 | pr_info("Memslot count may be too high for this test, try adjusting the cap\n"); | |
952 | ||
953 | return false; | |
954 | } | |
955 | ||
956 | pr_info("Test took %ld.%.9lds for slot setup + %ld.%.9lds all iterations\n", | |
957 | result.slot_runtime.tv_sec, result.slot_runtime.tv_nsec, | |
958 | result.guest_runtime.tv_sec, result.guest_runtime.tv_nsec); | |
959 | if (!result.nloops) { | |
960 | pr_info("No full loops done - too short test time or system too loaded?\n"); | |
961 | return true; | |
962 | } | |
963 | ||
964 | result.iter_runtime = timespec_div(result.guest_runtime, | |
965 | result.nloops); | |
966 | pr_info("Done %"PRIu64" iterations, avg %ld.%.9lds each\n", | |
967 | result.nloops, | |
968 | result.iter_runtime.tv_sec, | |
969 | result.iter_runtime.tv_nsec); | |
970 | result.slottimens = timespec_to_ns(result.slot_runtime); | |
971 | result.runtimens = timespec_to_ns(result.iter_runtime); | |
972 | ||
973 | /* | |
974 | * Only rank the slot setup time for tests using the whole test memory | |
975 | * area so they are comparable | |
976 | */ | |
977 | if (!data->mem_size && | |
978 | (!rbestslottime->slottimens || | |
979 | result.slottimens < rbestslottime->slottimens)) | |
980 | *rbestslottime = result; | |
981 | if (!rbestruntime->runtimens || | |
982 | result.runtimens < rbestruntime->runtimens) | |
983 | *rbestruntime = result; | |
984 | ||
985 | return true; | |
986 | } | |
987 | ||
988 | int main(int argc, char *argv[]) | |
989 | { | |
990 | struct test_args targs = { | |
991 | .tfirst = 0, | |
992 | .tlast = NTESTS - 1, | |
993 | .nslots = -1, | |
994 | .seconds = 5, | |
995 | .runs = 20, | |
996 | }; | |
997 | struct test_result rbestslottime; | |
998 | int tctr; | |
999 | ||
1000 | /* Tell stdout not to buffer its content */ | |
1001 | setbuf(stdout, NULL); | |
1002 | ||
1003 | if (!parse_args(argc, argv, &targs)) | |
1004 | return -1; | |
1005 | ||
1006 | rbestslottime.slottimens = 0; | |
1007 | for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) { | |
1008 | const struct test_data *data = &tests[tctr]; | |
1009 | unsigned int runctr; | |
1010 | struct test_result rbestruntime; | |
1011 | ||
1012 | if (tctr > targs.tfirst) | |
1013 | pr_info("\n"); | |
1014 | ||
1015 | pr_info("Testing %s performance with %i runs, %d seconds each\n", | |
1016 | data->name, targs.runs, targs.seconds); | |
1017 | ||
1018 | rbestruntime.runtimens = 0; | |
1019 | for (runctr = 0; runctr < targs.runs; runctr++) | |
1020 | if (!test_loop(data, &targs, | |
1021 | &rbestslottime, &rbestruntime)) | |
1022 | break; | |
1023 | ||
1024 | if (rbestruntime.runtimens) | |
1025 | pr_info("Best runtime result was %ld.%.9lds per iteration (with %"PRIu64" iterations)\n", | |
1026 | rbestruntime.iter_runtime.tv_sec, | |
1027 | rbestruntime.iter_runtime.tv_nsec, | |
1028 | rbestruntime.nloops); | |
1029 | } | |
1030 | ||
1031 | if (rbestslottime.slottimens) | |
1032 | pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n", | |
1033 | rbestslottime.slot_runtime.tv_sec, | |
1034 | rbestslottime.slot_runtime.tv_nsec); | |
1035 | ||
1036 | return 0; | |
1037 | } |