1 // SPDX-License-Identifier: GPL-2.0
3 * A memslot-related performance benchmark.
5 * Copyright (C) 2021 Oracle and/or its affiliates.
7 * Basic guest setup / host vCPU thread code lifted from set_memory_region_test.
11 #include <semaphore.h>
12 #include <stdatomic.h>
22 #include <linux/compiler.h>
24 #include <test_util.h>
26 #include <processor.h>
30 #define MEM_SIZE ((512U << 20) + 4096)
31 #define MEM_SIZE_PAGES (MEM_SIZE / 4096)
32 #define MEM_GPA 0x10000000UL
33 #define MEM_AUX_GPA MEM_GPA
34 #define MEM_SYNC_GPA MEM_AUX_GPA
35 #define MEM_TEST_GPA (MEM_AUX_GPA + 4096)
36 #define MEM_TEST_SIZE (MEM_SIZE - 4096)
37 static_assert(MEM_SIZE
% 4096 == 0, "invalid mem size");
38 static_assert(MEM_TEST_SIZE
% 4096 == 0, "invalid mem test size");
41 * 32 MiB is max size that gets well over 100 iterations on 509 slots.
42 * Considering that each slot needs to have at least one page up to
43 * 8194 slots in use can then be tested (although with slightly
44 * limited resolution).
46 #define MEM_SIZE_MAP ((32U << 20) + 4096)
47 #define MEM_SIZE_MAP_PAGES (MEM_SIZE_MAP / 4096)
48 #define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - 4096)
49 #define MEM_TEST_MAP_SIZE_PAGES (MEM_TEST_MAP_SIZE / 4096)
50 static_assert(MEM_SIZE_MAP
% 4096 == 0, "invalid map test region size");
51 static_assert(MEM_TEST_MAP_SIZE
% 4096 == 0, "invalid map test region size");
52 static_assert(MEM_TEST_MAP_SIZE_PAGES
% 2 == 0, "invalid map test region size");
53 static_assert(MEM_TEST_MAP_SIZE_PAGES
> 2, "invalid map test region size");
56 * 128 MiB is min size that fills 32k slots with at least one page in each
57 * while at the same time gets 100+ iterations in such test
59 #define MEM_TEST_UNMAP_SIZE (128U << 20)
60 #define MEM_TEST_UNMAP_SIZE_PAGES (MEM_TEST_UNMAP_SIZE / 4096)
61 /* 2 MiB chunk size like a typical huge page */
62 #define MEM_TEST_UNMAP_CHUNK_PAGES (2U << (20 - 12))
63 static_assert(MEM_TEST_UNMAP_SIZE
<= MEM_TEST_SIZE
,
64 "invalid unmap test region size");
65 static_assert(MEM_TEST_UNMAP_SIZE
% 4096 == 0,
66 "invalid unmap test region size");
67 static_assert(MEM_TEST_UNMAP_SIZE_PAGES
%
68 (2 * MEM_TEST_UNMAP_CHUNK_PAGES
) == 0,
69 "invalid unmap test region size");
72 * For the move active test the middle of the test area is placed on
73 * a memslot boundary: half lies in the memslot being moved, half in
76 * When running this test with 32k memslots (32764, really) each memslot
78 * The last one additionally contains the remaining 21 pages of memory,
79 * for the total size of 25 pages.
80 * Hence, the maximum size here is 50 pages.
82 #define MEM_TEST_MOVE_SIZE_PAGES (50)
83 #define MEM_TEST_MOVE_SIZE (MEM_TEST_MOVE_SIZE_PAGES * 4096)
84 #define MEM_TEST_MOVE_GPA_DEST (MEM_GPA + MEM_SIZE)
85 static_assert(MEM_TEST_MOVE_SIZE
<= MEM_TEST_SIZE
,
86 "invalid move test region size");
88 #define MEM_TEST_VAL_1 0x1122334455667788
89 #define MEM_TEST_VAL_2 0x99AABBCCDDEEFF00
93 pthread_t vcpu_thread
;
96 uint64_t pages_per_slot
;
99 uint64_t mmio_gpa_min
;
100 uint64_t mmio_gpa_max
;
104 atomic_bool start_flag
;
105 atomic_bool exit_flag
;
106 atomic_bool sync_flag
;
111 * Technically, we need also for the atomic bool to be address-free, which
112 * is recommended, but not strictly required, by C11 for lockless
114 * However, in practice both GCC and Clang fulfill this requirement on
115 * all KVM-supported platforms.
117 static_assert(ATOMIC_BOOL_LOCK_FREE
== 2, "atomic bool is not lockless");
119 static sem_t vcpu_ready
;
121 static bool map_unmap_verify
;
124 #define pr_info_v(...) \
127 pr_info(__VA_ARGS__); \
130 static void *vcpu_worker(void *data
)
132 struct vm_data
*vm
= data
;
137 run
= vcpu_state(vm
->vm
, VCPU_ID
);
139 vcpu_run(vm
->vm
, VCPU_ID
);
141 if (run
->exit_reason
== KVM_EXIT_IO
) {
142 cmd
= get_ucall(vm
->vm
, VCPU_ID
, &uc
);
143 if (cmd
!= UCALL_SYNC
)
146 sem_post(&vcpu_ready
);
150 if (run
->exit_reason
!= KVM_EXIT_MMIO
)
153 TEST_ASSERT(vm
->mmio_ok
, "Unexpected mmio exit");
154 TEST_ASSERT(run
->mmio
.is_write
, "Unexpected mmio read");
155 TEST_ASSERT(run
->mmio
.len
== 8,
156 "Unexpected exit mmio size = %u", run
->mmio
.len
);
157 TEST_ASSERT(run
->mmio
.phys_addr
>= vm
->mmio_gpa_min
&&
158 run
->mmio
.phys_addr
<= vm
->mmio_gpa_max
,
159 "Unexpected exit mmio address = 0x%llx",
160 run
->mmio
.phys_addr
);
163 if (run
->exit_reason
== KVM_EXIT_IO
&& cmd
== UCALL_ABORT
)
164 TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc
.args
[0],
165 __FILE__
, uc
.args
[1], uc
.args
[2]);
170 static void wait_for_vcpu(void)
174 TEST_ASSERT(!clock_gettime(CLOCK_REALTIME
, &ts
),
175 "clock_gettime() failed: %d\n", errno
);
178 TEST_ASSERT(!sem_timedwait(&vcpu_ready
, &ts
),
179 "sem_timedwait() failed: %d\n", errno
);
182 static void *vm_gpa2hva(struct vm_data
*data
, uint64_t gpa
, uint64_t *rempages
)
184 uint64_t gpage
, pgoffs
;
185 uint32_t slot
, slotoffs
;
188 TEST_ASSERT(gpa
>= MEM_GPA
, "Too low gpa to translate");
189 TEST_ASSERT(gpa
< MEM_GPA
+ data
->npages
* 4096,
190 "Too high gpa to translate");
195 slot
= min(gpage
/ data
->pages_per_slot
, (uint64_t)data
->nslots
- 1);
196 slotoffs
= gpage
- (slot
* data
->pages_per_slot
);
201 if (slot
== data
->nslots
- 1)
202 slotpages
= data
->npages
- slot
* data
->pages_per_slot
;
204 slotpages
= data
->pages_per_slot
;
207 "Asking for remaining pages in slot but gpa not page aligned");
208 *rempages
= slotpages
- slotoffs
;
211 base
= data
->hva_slots
[slot
];
212 return (uint8_t *)base
+ slotoffs
* 4096 + pgoffs
;
215 static uint64_t vm_slot2gpa(struct vm_data
*data
, uint32_t slot
)
217 TEST_ASSERT(slot
< data
->nslots
, "Too high slot number");
219 return MEM_GPA
+ slot
* data
->pages_per_slot
* 4096;
222 static struct vm_data
*alloc_vm(void)
224 struct vm_data
*data
;
226 data
= malloc(sizeof(*data
));
227 TEST_ASSERT(data
, "malloc(vmdata) failed");
230 data
->hva_slots
= NULL
;
235 static bool prepare_vm(struct vm_data
*data
, int nslots
, uint64_t *maxslots
,
236 void *guest_code
, uint64_t mempages
,
237 struct timespec
*slot_runtime
)
239 uint32_t max_mem_slots
;
243 struct timespec tstart
;
244 struct sync_area
*sync
;
246 max_mem_slots
= kvm_check_cap(KVM_CAP_NR_MEMSLOTS
);
247 TEST_ASSERT(max_mem_slots
> 1,
248 "KVM_CAP_NR_MEMSLOTS should be greater than 1");
249 TEST_ASSERT(nslots
> 1 || nslots
== -1,
250 "Slot count cap should be greater than 1");
252 max_mem_slots
= min(max_mem_slots
, (uint32_t)nslots
);
253 pr_info_v("Allowed number of memory slots: %"PRIu32
"\n", max_mem_slots
);
255 TEST_ASSERT(mempages
> 1,
256 "Can't test without any memory");
258 data
->npages
= mempages
;
259 data
->nslots
= max_mem_slots
- 1;
260 data
->pages_per_slot
= mempages
/ data
->nslots
;
261 if (!data
->pages_per_slot
) {
262 *maxslots
= mempages
+ 1;
266 rempages
= mempages
% data
->nslots
;
267 data
->hva_slots
= malloc(sizeof(*data
->hva_slots
) * data
->nslots
);
268 TEST_ASSERT(data
->hva_slots
, "malloc() fail");
270 data
->vm
= vm_create_default(VCPU_ID
, mempages
, guest_code
);
272 pr_info_v("Adding slots 1..%i, each slot with %"PRIu64
" pages + %"PRIu64
" extra pages last\n",
273 max_mem_slots
- 1, data
->pages_per_slot
, rempages
);
275 clock_gettime(CLOCK_MONOTONIC
, &tstart
);
276 for (slot
= 1, guest_addr
= MEM_GPA
; slot
< max_mem_slots
; slot
++) {
279 npages
= data
->pages_per_slot
;
280 if (slot
== max_mem_slots
- 1)
283 vm_userspace_mem_region_add(data
->vm
, VM_MEM_SRC_ANONYMOUS
,
284 guest_addr
, slot
, npages
,
286 guest_addr
+= npages
* 4096;
288 *slot_runtime
= timespec_elapsed(tstart
);
290 for (slot
= 0, guest_addr
= MEM_GPA
; slot
< max_mem_slots
- 1; slot
++) {
294 npages
= data
->pages_per_slot
;
295 if (slot
== max_mem_slots
- 2)
298 gpa
= vm_phy_pages_alloc(data
->vm
, npages
, guest_addr
,
300 TEST_ASSERT(gpa
== guest_addr
,
301 "vm_phy_pages_alloc() failed\n");
303 data
->hva_slots
[slot
] = addr_gpa2hva(data
->vm
, guest_addr
);
304 memset(data
->hva_slots
[slot
], 0, npages
* 4096);
306 guest_addr
+= npages
* 4096;
309 virt_map(data
->vm
, MEM_GPA
, MEM_GPA
, mempages
, 0);
311 sync
= (typeof(sync
))vm_gpa2hva(data
, MEM_SYNC_GPA
, NULL
);
312 atomic_init(&sync
->start_flag
, false);
313 atomic_init(&sync
->exit_flag
, false);
314 atomic_init(&sync
->sync_flag
, false);
316 data
->mmio_ok
= false;
321 static void launch_vm(struct vm_data
*data
)
323 pr_info_v("Launching the test VM\n");
325 pthread_create(&data
->vcpu_thread
, NULL
, vcpu_worker
, data
);
327 /* Ensure the guest thread is spun up. */
331 static void free_vm(struct vm_data
*data
)
333 kvm_vm_free(data
->vm
);
334 free(data
->hva_slots
);
338 static void wait_guest_exit(struct vm_data
*data
)
340 pthread_join(data
->vcpu_thread
, NULL
);
343 static void let_guest_run(struct sync_area
*sync
)
345 atomic_store_explicit(&sync
->start_flag
, true, memory_order_release
);
348 static void guest_spin_until_start(void)
350 struct sync_area
*sync
= (typeof(sync
))MEM_SYNC_GPA
;
352 while (!atomic_load_explicit(&sync
->start_flag
, memory_order_acquire
))
356 static void make_guest_exit(struct sync_area
*sync
)
358 atomic_store_explicit(&sync
->exit_flag
, true, memory_order_release
);
361 static bool _guest_should_exit(void)
363 struct sync_area
*sync
= (typeof(sync
))MEM_SYNC_GPA
;
365 return atomic_load_explicit(&sync
->exit_flag
, memory_order_acquire
);
368 #define guest_should_exit() unlikely(_guest_should_exit())
371 * noinline so we can easily see how much time the host spends waiting
373 * For the same reason use alarm() instead of polling clock_gettime()
374 * to implement a wait timeout.
376 static noinline
void host_perform_sync(struct sync_area
*sync
)
380 atomic_store_explicit(&sync
->sync_flag
, true, memory_order_release
);
381 while (atomic_load_explicit(&sync
->sync_flag
, memory_order_acquire
))
387 static bool guest_perform_sync(void)
389 struct sync_area
*sync
= (typeof(sync
))MEM_SYNC_GPA
;
393 if (guest_should_exit())
397 } while (!atomic_compare_exchange_weak_explicit(&sync
->sync_flag
,
399 memory_order_acq_rel
,
400 memory_order_relaxed
));
405 static void guest_code_test_memslot_move(void)
407 struct sync_area
*sync
= (typeof(sync
))MEM_SYNC_GPA
;
408 uintptr_t base
= (typeof(base
))READ_ONCE(sync
->move_area_ptr
);
412 guest_spin_until_start();
414 while (!guest_should_exit()) {
417 for (ptr
= base
; ptr
< base
+ MEM_TEST_MOVE_SIZE
;
419 *(uint64_t *)ptr
= MEM_TEST_VAL_1
;
422 * No host sync here since the MMIO exits are so expensive
423 * that the host would spend most of its time waiting for
424 * the guest and so instead of measuring memslot move
425 * performance we would measure the performance and
426 * likelihood of MMIO exits
433 static void guest_code_test_memslot_map(void)
435 struct sync_area
*sync
= (typeof(sync
))MEM_SYNC_GPA
;
439 guest_spin_until_start();
444 for (ptr
= MEM_TEST_GPA
;
445 ptr
< MEM_TEST_GPA
+ MEM_TEST_MAP_SIZE
/ 2; ptr
+= 4096)
446 *(uint64_t *)ptr
= MEM_TEST_VAL_1
;
448 if (!guest_perform_sync())
451 for (ptr
= MEM_TEST_GPA
+ MEM_TEST_MAP_SIZE
/ 2;
452 ptr
< MEM_TEST_GPA
+ MEM_TEST_MAP_SIZE
; ptr
+= 4096)
453 *(uint64_t *)ptr
= MEM_TEST_VAL_2
;
455 if (!guest_perform_sync())
462 static void guest_code_test_memslot_unmap(void)
464 struct sync_area
*sync
= (typeof(sync
))MEM_SYNC_GPA
;
468 guest_spin_until_start();
471 uintptr_t ptr
= MEM_TEST_GPA
;
474 * We can afford to access (map) just a small number of pages
475 * per host sync as otherwise the host will spend
476 * a significant amount of its time waiting for the guest
477 * (instead of doing unmap operations), so this will
478 * effectively turn this test into a map performance test.
480 * Just access a single page to be on the safe side.
482 *(uint64_t *)ptr
= MEM_TEST_VAL_1
;
484 if (!guest_perform_sync())
487 ptr
+= MEM_TEST_UNMAP_SIZE
/ 2;
488 *(uint64_t *)ptr
= MEM_TEST_VAL_2
;
490 if (!guest_perform_sync())
497 static void guest_code_test_memslot_rw(void)
501 guest_spin_until_start();
506 for (ptr
= MEM_TEST_GPA
;
507 ptr
< MEM_TEST_GPA
+ MEM_TEST_SIZE
; ptr
+= 4096)
508 *(uint64_t *)ptr
= MEM_TEST_VAL_1
;
510 if (!guest_perform_sync())
513 for (ptr
= MEM_TEST_GPA
+ 4096 / 2;
514 ptr
< MEM_TEST_GPA
+ MEM_TEST_SIZE
; ptr
+= 4096) {
515 uint64_t val
= *(uint64_t *)ptr
;
517 GUEST_ASSERT_1(val
== MEM_TEST_VAL_2
, val
);
518 *(uint64_t *)ptr
= 0;
521 if (!guest_perform_sync())
528 static bool test_memslot_move_prepare(struct vm_data
*data
,
529 struct sync_area
*sync
,
530 uint64_t *maxslots
, bool isactive
)
532 uint64_t movesrcgpa
, movetestgpa
;
534 movesrcgpa
= vm_slot2gpa(data
, data
->nslots
- 1);
539 vm_gpa2hva(data
, movesrcgpa
, &lastpages
);
540 if (lastpages
< MEM_TEST_MOVE_SIZE_PAGES
/ 2) {
546 movetestgpa
= movesrcgpa
- (MEM_TEST_MOVE_SIZE
/ (isactive
? 2 : 1));
547 sync
->move_area_ptr
= (void *)movetestgpa
;
550 data
->mmio_ok
= true;
551 data
->mmio_gpa_min
= movesrcgpa
;
552 data
->mmio_gpa_max
= movesrcgpa
+ MEM_TEST_MOVE_SIZE
/ 2 - 1;
558 static bool test_memslot_move_prepare_active(struct vm_data
*data
,
559 struct sync_area
*sync
,
562 return test_memslot_move_prepare(data
, sync
, maxslots
, true);
565 static bool test_memslot_move_prepare_inactive(struct vm_data
*data
,
566 struct sync_area
*sync
,
569 return test_memslot_move_prepare(data
, sync
, maxslots
, false);
572 static void test_memslot_move_loop(struct vm_data
*data
, struct sync_area
*sync
)
576 movesrcgpa
= vm_slot2gpa(data
, data
->nslots
- 1);
577 vm_mem_region_move(data
->vm
, data
->nslots
- 1 + 1,
578 MEM_TEST_MOVE_GPA_DEST
);
579 vm_mem_region_move(data
->vm
, data
->nslots
- 1 + 1, movesrcgpa
);
582 static void test_memslot_do_unmap(struct vm_data
*data
,
583 uint64_t offsp
, uint64_t count
)
587 for (gpa
= MEM_TEST_GPA
+ offsp
* 4096, ctr
= 0; ctr
< count
; ) {
592 hva
= vm_gpa2hva(data
, gpa
, &npages
);
593 TEST_ASSERT(npages
, "Empty memory slot at gptr 0x%"PRIx64
, gpa
);
594 npages
= min(npages
, count
- ctr
);
595 ret
= madvise(hva
, npages
* 4096, MADV_DONTNEED
);
597 "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64
,
600 gpa
+= npages
* 4096;
602 TEST_ASSERT(ctr
== count
,
603 "madvise(MADV_DONTNEED) should exactly cover all of the requested area");
606 static void test_memslot_map_unmap_check(struct vm_data
*data
,
607 uint64_t offsp
, uint64_t valexp
)
612 if (!map_unmap_verify
)
615 gpa
= MEM_TEST_GPA
+ offsp
* 4096;
616 val
= (typeof(val
))vm_gpa2hva(data
, gpa
, NULL
);
617 TEST_ASSERT(*val
== valexp
,
618 "Guest written values should read back correctly before unmap (%"PRIu64
" vs %"PRIu64
" @ %"PRIx64
")",
623 static void test_memslot_map_loop(struct vm_data
*data
, struct sync_area
*sync
)
626 * Unmap the second half of the test area while guest writes to (maps)
629 test_memslot_do_unmap(data
, MEM_TEST_MAP_SIZE_PAGES
/ 2,
630 MEM_TEST_MAP_SIZE_PAGES
/ 2);
633 * Wait for the guest to finish writing the first half of the test
634 * area, verify the written value on the first and the last page of
635 * this area and then unmap it.
636 * Meanwhile, the guest is writing to (mapping) the second half of
639 host_perform_sync(sync
);
640 test_memslot_map_unmap_check(data
, 0, MEM_TEST_VAL_1
);
641 test_memslot_map_unmap_check(data
,
642 MEM_TEST_MAP_SIZE_PAGES
/ 2 - 1,
644 test_memslot_do_unmap(data
, 0, MEM_TEST_MAP_SIZE_PAGES
/ 2);
648 * Wait for the guest to finish writing the second half of the test
649 * area and verify the written value on the first and the last page
651 * The area will be unmapped at the beginning of the next loop
653 * Meanwhile, the guest is writing to (mapping) the first half of
656 host_perform_sync(sync
);
657 test_memslot_map_unmap_check(data
, MEM_TEST_MAP_SIZE_PAGES
/ 2,
659 test_memslot_map_unmap_check(data
, MEM_TEST_MAP_SIZE_PAGES
- 1,
663 static void test_memslot_unmap_loop_common(struct vm_data
*data
,
664 struct sync_area
*sync
,
670 * Wait for the guest to finish mapping page(s) in the first half
671 * of the test area, verify the written value and then perform unmap
673 * Meanwhile, the guest is writing to (mapping) page(s) in the second
674 * half of the test area.
676 host_perform_sync(sync
);
677 test_memslot_map_unmap_check(data
, 0, MEM_TEST_VAL_1
);
678 for (ctr
= 0; ctr
< MEM_TEST_UNMAP_SIZE_PAGES
/ 2; ctr
+= chunk
)
679 test_memslot_do_unmap(data
, ctr
, chunk
);
681 /* Likewise, but for the opposite host / guest areas */
682 host_perform_sync(sync
);
683 test_memslot_map_unmap_check(data
, MEM_TEST_UNMAP_SIZE_PAGES
/ 2,
685 for (ctr
= MEM_TEST_UNMAP_SIZE_PAGES
/ 2;
686 ctr
< MEM_TEST_UNMAP_SIZE_PAGES
; ctr
+= chunk
)
687 test_memslot_do_unmap(data
, ctr
, chunk
);
690 static void test_memslot_unmap_loop(struct vm_data
*data
,
691 struct sync_area
*sync
)
693 test_memslot_unmap_loop_common(data
, sync
, 1);
696 static void test_memslot_unmap_loop_chunked(struct vm_data
*data
,
697 struct sync_area
*sync
)
699 test_memslot_unmap_loop_common(data
, sync
, MEM_TEST_UNMAP_CHUNK_PAGES
);
702 static void test_memslot_rw_loop(struct vm_data
*data
, struct sync_area
*sync
)
706 for (gptr
= MEM_TEST_GPA
+ 4096 / 2;
707 gptr
< MEM_TEST_GPA
+ MEM_TEST_SIZE
; gptr
+= 4096)
708 *(uint64_t *)vm_gpa2hva(data
, gptr
, NULL
) = MEM_TEST_VAL_2
;
710 host_perform_sync(sync
);
712 for (gptr
= MEM_TEST_GPA
;
713 gptr
< MEM_TEST_GPA
+ MEM_TEST_SIZE
; gptr
+= 4096) {
714 uint64_t *vptr
= (typeof(vptr
))vm_gpa2hva(data
, gptr
, NULL
);
715 uint64_t val
= *vptr
;
717 TEST_ASSERT(val
== MEM_TEST_VAL_1
,
718 "Guest written values should read back correctly (is %"PRIu64
" @ %"PRIx64
")",
723 host_perform_sync(sync
);
729 void (*guest_code
)(void);
730 bool (*prepare
)(struct vm_data
*data
, struct sync_area
*sync
,
732 void (*loop
)(struct vm_data
*data
, struct sync_area
*sync
);
735 static bool test_execute(int nslots
, uint64_t *maxslots
,
736 unsigned int maxtime
,
737 const struct test_data
*tdata
,
739 struct timespec
*slot_runtime
,
740 struct timespec
*guest_runtime
)
742 uint64_t mem_size
= tdata
->mem_size
? : MEM_SIZE_PAGES
;
743 struct vm_data
*data
;
744 struct sync_area
*sync
;
745 struct timespec tstart
;
749 if (!prepare_vm(data
, nslots
, maxslots
, tdata
->guest_code
,
750 mem_size
, slot_runtime
)) {
755 sync
= (typeof(sync
))vm_gpa2hva(data
, MEM_SYNC_GPA
, NULL
);
757 if (tdata
->prepare
&&
758 !tdata
->prepare(data
, sync
, maxslots
)) {
765 clock_gettime(CLOCK_MONOTONIC
, &tstart
);
769 *guest_runtime
= timespec_elapsed(tstart
);
770 if (guest_runtime
->tv_sec
>= maxtime
)
773 tdata
->loop(data
, sync
);
778 make_guest_exit(sync
);
779 wait_guest_exit(data
);
787 static const struct test_data tests
[] = {
790 .mem_size
= MEM_SIZE_MAP_PAGES
,
791 .guest_code
= guest_code_test_memslot_map
,
792 .loop
= test_memslot_map_loop
,
796 .mem_size
= MEM_TEST_UNMAP_SIZE_PAGES
+ 1,
797 .guest_code
= guest_code_test_memslot_unmap
,
798 .loop
= test_memslot_unmap_loop
,
801 .name
= "unmap chunked",
802 .mem_size
= MEM_TEST_UNMAP_SIZE_PAGES
+ 1,
803 .guest_code
= guest_code_test_memslot_unmap
,
804 .loop
= test_memslot_unmap_loop_chunked
,
807 .name
= "move active area",
808 .guest_code
= guest_code_test_memslot_move
,
809 .prepare
= test_memslot_move_prepare_active
,
810 .loop
= test_memslot_move_loop
,
813 .name
= "move inactive area",
814 .guest_code
= guest_code_test_memslot_move
,
815 .prepare
= test_memslot_move_prepare_inactive
,
816 .loop
= test_memslot_move_loop
,
820 .guest_code
= guest_code_test_memslot_rw
,
821 .loop
= test_memslot_rw_loop
825 #define NTESTS ARRAY_SIZE(tests)
835 static void help(char *name
, struct test_args
*targs
)
839 pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count]\n",
841 pr_info(" -h: print this help screen.\n");
842 pr_info(" -v: enable verbose mode (not for benchmarking).\n");
843 pr_info(" -d: enable extra debug checks.\n");
844 pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n",
846 pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n",
847 targs
->tfirst
, NTESTS
- 1);
848 pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n",
849 targs
->tlast
, NTESTS
- 1);
850 pr_info(" -l: specify the test length in seconds (currently: %i)\n",
852 pr_info(" -r: specify the number of runs per test (currently: %i)\n",
855 pr_info("\nAvailable tests:\n");
856 for (ctr
= 0; ctr
< NTESTS
; ctr
++)
857 pr_info("%d: %s\n", ctr
, tests
[ctr
].name
);
860 static bool parse_args(int argc
, char *argv
[],
861 struct test_args
*targs
)
865 while ((opt
= getopt(argc
, argv
, "hvds:f:e:l:r:")) != -1) {
869 help(argv
[0], targs
);
875 map_unmap_verify
= true;
878 targs
->nslots
= atoi(optarg
);
879 if (targs
->nslots
<= 0 && targs
->nslots
!= -1) {
880 pr_info("Slot count cap has to be positive or -1 for no cap\n");
885 targs
->tfirst
= atoi(optarg
);
886 if (targs
->tfirst
< 0) {
887 pr_info("First test to run has to be non-negative\n");
892 targs
->tlast
= atoi(optarg
);
893 if (targs
->tlast
< 0 || targs
->tlast
>= NTESTS
) {
894 pr_info("Last test to run has to be non-negative and less than %zu\n",
900 targs
->seconds
= atoi(optarg
);
901 if (targs
->seconds
< 0) {
902 pr_info("Test length in seconds has to be non-negative\n");
907 targs
->runs
= atoi(optarg
);
908 if (targs
->runs
<= 0) {
909 pr_info("Runs per test has to be positive\n");
917 help(argv
[0], targs
);
921 if (targs
->tfirst
> targs
->tlast
) {
922 pr_info("First test to run cannot be greater than the last test to run\n");
930 struct timespec slot_runtime
, guest_runtime
, iter_runtime
;
931 int64_t slottimens
, runtimens
;
935 static bool test_loop(const struct test_data
*data
,
936 const struct test_args
*targs
,
937 struct test_result
*rbestslottime
,
938 struct test_result
*rbestruntime
)
941 struct test_result result
;
944 if (!test_execute(targs
->nslots
, &maxslots
, targs
->seconds
, data
,
946 &result
.slot_runtime
, &result
.guest_runtime
)) {
948 pr_info("Memslot count too high for this test, decrease the cap (max is %"PRIu64
")\n",
951 pr_info("Memslot count may be too high for this test, try adjusting the cap\n");
956 pr_info("Test took %ld.%.9lds for slot setup + %ld.%.9lds all iterations\n",
957 result
.slot_runtime
.tv_sec
, result
.slot_runtime
.tv_nsec
,
958 result
.guest_runtime
.tv_sec
, result
.guest_runtime
.tv_nsec
);
959 if (!result
.nloops
) {
960 pr_info("No full loops done - too short test time or system too loaded?\n");
964 result
.iter_runtime
= timespec_div(result
.guest_runtime
,
966 pr_info("Done %"PRIu64
" iterations, avg %ld.%.9lds each\n",
968 result
.iter_runtime
.tv_sec
,
969 result
.iter_runtime
.tv_nsec
);
970 result
.slottimens
= timespec_to_ns(result
.slot_runtime
);
971 result
.runtimens
= timespec_to_ns(result
.iter_runtime
);
974 * Only rank the slot setup time for tests using the whole test memory
975 * area so they are comparable
977 if (!data
->mem_size
&&
978 (!rbestslottime
->slottimens
||
979 result
.slottimens
< rbestslottime
->slottimens
))
980 *rbestslottime
= result
;
981 if (!rbestruntime
->runtimens
||
982 result
.runtimens
< rbestruntime
->runtimens
)
983 *rbestruntime
= result
;
988 int main(int argc
, char *argv
[])
990 struct test_args targs
= {
997 struct test_result rbestslottime
;
1000 /* Tell stdout not to buffer its content */
1001 setbuf(stdout
, NULL
);
1003 if (!parse_args(argc
, argv
, &targs
))
1006 rbestslottime
.slottimens
= 0;
1007 for (tctr
= targs
.tfirst
; tctr
<= targs
.tlast
; tctr
++) {
1008 const struct test_data
*data
= &tests
[tctr
];
1009 unsigned int runctr
;
1010 struct test_result rbestruntime
;
1012 if (tctr
> targs
.tfirst
)
1015 pr_info("Testing %s performance with %i runs, %d seconds each\n",
1016 data
->name
, targs
.runs
, targs
.seconds
);
1018 rbestruntime
.runtimens
= 0;
1019 for (runctr
= 0; runctr
< targs
.runs
; runctr
++)
1020 if (!test_loop(data
, &targs
,
1021 &rbestslottime
, &rbestruntime
))
1024 if (rbestruntime
.runtimens
)
1025 pr_info("Best runtime result was %ld.%.9lds per iteration (with %"PRIu64
" iterations)\n",
1026 rbestruntime
.iter_runtime
.tv_sec
,
1027 rbestruntime
.iter_runtime
.tv_nsec
,
1028 rbestruntime
.nloops
);
1031 if (rbestslottime
.slottimens
)
1032 pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n",
1033 rbestslottime
.slot_runtime
.tv_sec
,
1034 rbestslottime
.slot_runtime
.tv_nsec
);