1 // SPDX-License-Identifier: GPL-2.0-only
3 * tools/testing/selftests/kvm/lib/x86_64/processor.c
5 * Copyright (C) 2018, Google LLC.
10 #include "../kvm_util_internal.h"
11 #include "processor.h"
13 #ifndef NUM_INTERRUPTS
14 #define NUM_INTERRUPTS 256
17 #define DEFAULT_CODE_SELECTOR 0x8
18 #define DEFAULT_DATA_SELECTOR 0x10
20 vm_vaddr_t exception_handlers
;
22 /* Virtual translation table structure declarations */
23 struct pageUpperEntry
{
27 uint64_t write_through
:1;
28 uint64_t cache_disable
:1;
30 uint64_t ignored_06
:1;
32 uint64_t ignored_11_08
:4;
34 uint64_t ignored_62_52
:11;
35 uint64_t execute_disable
:1;
38 struct pageTableEntry
{
42 uint64_t write_through
:1;
43 uint64_t cache_disable
:1;
46 uint64_t reserved_07
:1;
48 uint64_t ignored_11_09
:3;
50 uint64_t ignored_62_52
:11;
51 uint64_t execute_disable
:1;
54 void regs_dump(FILE *stream
, struct kvm_regs
*regs
,
57 fprintf(stream
, "%*srax: 0x%.16llx rbx: 0x%.16llx "
58 "rcx: 0x%.16llx rdx: 0x%.16llx\n",
60 regs
->rax
, regs
->rbx
, regs
->rcx
, regs
->rdx
);
61 fprintf(stream
, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
62 "rsp: 0x%.16llx rbp: 0x%.16llx\n",
64 regs
->rsi
, regs
->rdi
, regs
->rsp
, regs
->rbp
);
65 fprintf(stream
, "%*sr8: 0x%.16llx r9: 0x%.16llx "
66 "r10: 0x%.16llx r11: 0x%.16llx\n",
68 regs
->r8
, regs
->r9
, regs
->r10
, regs
->r11
);
69 fprintf(stream
, "%*sr12: 0x%.16llx r13: 0x%.16llx "
70 "r14: 0x%.16llx r15: 0x%.16llx\n",
72 regs
->r12
, regs
->r13
, regs
->r14
, regs
->r15
);
73 fprintf(stream
, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
75 regs
->rip
, regs
->rflags
);
82 * stream - Output FILE stream
83 * segment - KVM segment
84 * indent - Left margin indent amount
90 * Dumps the state of the KVM segment given by @segment, to the FILE stream
93 static void segment_dump(FILE *stream
, struct kvm_segment
*segment
,
96 fprintf(stream
, "%*sbase: 0x%.16llx limit: 0x%.8x "
97 "selector: 0x%.4x type: 0x%.2x\n",
98 indent
, "", segment
->base
, segment
->limit
,
99 segment
->selector
, segment
->type
);
100 fprintf(stream
, "%*spresent: 0x%.2x dpl: 0x%.2x "
101 "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
102 indent
, "", segment
->present
, segment
->dpl
,
103 segment
->db
, segment
->s
, segment
->l
);
104 fprintf(stream
, "%*sg: 0x%.2x avl: 0x%.2x "
105 "unusable: 0x%.2x padding: 0x%.2x\n",
106 indent
, "", segment
->g
, segment
->avl
,
107 segment
->unusable
, segment
->padding
);
114 * stream - Output FILE stream
115 * dtable - KVM dtable
116 * indent - Left margin indent amount
122 * Dumps the state of the KVM dtable given by @dtable, to the FILE stream
125 static void dtable_dump(FILE *stream
, struct kvm_dtable
*dtable
,
128 fprintf(stream
, "%*sbase: 0x%.16llx limit: 0x%.4x "
129 "padding: 0x%.4x 0x%.4x 0x%.4x\n",
130 indent
, "", dtable
->base
, dtable
->limit
,
131 dtable
->padding
[0], dtable
->padding
[1], dtable
->padding
[2]);
134 void sregs_dump(FILE *stream
, struct kvm_sregs
*sregs
,
139 fprintf(stream
, "%*scs:\n", indent
, "");
140 segment_dump(stream
, &sregs
->cs
, indent
+ 2);
141 fprintf(stream
, "%*sds:\n", indent
, "");
142 segment_dump(stream
, &sregs
->ds
, indent
+ 2);
143 fprintf(stream
, "%*ses:\n", indent
, "");
144 segment_dump(stream
, &sregs
->es
, indent
+ 2);
145 fprintf(stream
, "%*sfs:\n", indent
, "");
146 segment_dump(stream
, &sregs
->fs
, indent
+ 2);
147 fprintf(stream
, "%*sgs:\n", indent
, "");
148 segment_dump(stream
, &sregs
->gs
, indent
+ 2);
149 fprintf(stream
, "%*sss:\n", indent
, "");
150 segment_dump(stream
, &sregs
->ss
, indent
+ 2);
151 fprintf(stream
, "%*str:\n", indent
, "");
152 segment_dump(stream
, &sregs
->tr
, indent
+ 2);
153 fprintf(stream
, "%*sldt:\n", indent
, "");
154 segment_dump(stream
, &sregs
->ldt
, indent
+ 2);
156 fprintf(stream
, "%*sgdt:\n", indent
, "");
157 dtable_dump(stream
, &sregs
->gdt
, indent
+ 2);
158 fprintf(stream
, "%*sidt:\n", indent
, "");
159 dtable_dump(stream
, &sregs
->idt
, indent
+ 2);
161 fprintf(stream
, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
162 "cr3: 0x%.16llx cr4: 0x%.16llx\n",
164 sregs
->cr0
, sregs
->cr2
, sregs
->cr3
, sregs
->cr4
);
165 fprintf(stream
, "%*scr8: 0x%.16llx efer: 0x%.16llx "
166 "apic_base: 0x%.16llx\n",
168 sregs
->cr8
, sregs
->efer
, sregs
->apic_base
);
170 fprintf(stream
, "%*sinterrupt_bitmap:\n", indent
, "");
171 for (i
= 0; i
< (KVM_NR_INTERRUPTS
+ 63) / 64; i
++) {
172 fprintf(stream
, "%*s%.16llx\n", indent
+ 2, "",
173 sregs
->interrupt_bitmap
[i
]);
177 void virt_pgd_alloc(struct kvm_vm
*vm
)
179 TEST_ASSERT(vm
->mode
== VM_MODE_PXXV48_4K
, "Attempt to use "
180 "unknown or unsupported guest mode, mode: 0x%x", vm
->mode
);
182 /* If needed, create page map l4 table. */
183 if (!vm
->pgd_created
) {
184 vm
->pgd
= vm_alloc_page_table(vm
);
185 vm
->pgd_created
= true;
189 static void *virt_get_pte(struct kvm_vm
*vm
, uint64_t pt_pfn
, uint64_t vaddr
,
192 uint64_t *page_table
= addr_gpa2hva(vm
, pt_pfn
<< vm
->page_shift
);
193 int index
= vaddr
>> (vm
->page_shift
+ level
* 9) & 0x1ffu
;
195 return &page_table
[index
];
198 static struct pageUpperEntry
*virt_create_upper_pte(struct kvm_vm
*vm
,
203 enum x86_page_size page_size
)
205 struct pageUpperEntry
*pte
= virt_get_pte(vm
, pt_pfn
, vaddr
, level
);
208 pte
->writable
= true;
210 pte
->page_size
= (level
== page_size
);
212 pte
->pfn
= paddr
>> vm
->page_shift
;
214 pte
->pfn
= vm_alloc_page_table(vm
) >> vm
->page_shift
;
217 * Entry already present. Assert that the caller doesn't want
218 * a hugepage at this level, and that there isn't a hugepage at
221 TEST_ASSERT(level
!= page_size
,
222 "Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
224 TEST_ASSERT(!pte
->page_size
,
225 "Cannot create page table at level: %u, vaddr: 0x%lx\n",
231 void __virt_pg_map(struct kvm_vm
*vm
, uint64_t vaddr
, uint64_t paddr
,
232 enum x86_page_size page_size
)
234 const uint64_t pg_size
= 1ull << ((page_size
* 9) + 12);
235 struct pageUpperEntry
*pml4e
, *pdpe
, *pde
;
236 struct pageTableEntry
*pte
;
238 TEST_ASSERT(vm
->mode
== VM_MODE_PXXV48_4K
,
239 "Unknown or unsupported guest mode, mode: 0x%x", vm
->mode
);
241 TEST_ASSERT((vaddr
% pg_size
) == 0,
242 "Virtual address not aligned,\n"
243 "vaddr: 0x%lx page size: 0x%lx", vaddr
, pg_size
);
244 TEST_ASSERT(sparsebit_is_set(vm
->vpages_valid
, (vaddr
>> vm
->page_shift
)),
245 "Invalid virtual address, vaddr: 0x%lx", vaddr
);
246 TEST_ASSERT((paddr
% pg_size
) == 0,
247 "Physical address not aligned,\n"
248 " paddr: 0x%lx page size: 0x%lx", paddr
, pg_size
);
249 TEST_ASSERT((paddr
>> vm
->page_shift
) <= vm
->max_gfn
,
250 "Physical address beyond maximum supported,\n"
251 " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
252 paddr
, vm
->max_gfn
, vm
->page_size
);
255 * Allocate upper level page tables, if not already present. Return
256 * early if a hugepage was created.
258 pml4e
= virt_create_upper_pte(vm
, vm
->pgd
>> vm
->page_shift
,
259 vaddr
, paddr
, 3, page_size
);
260 if (pml4e
->page_size
)
263 pdpe
= virt_create_upper_pte(vm
, pml4e
->pfn
, vaddr
, paddr
, 2, page_size
);
267 pde
= virt_create_upper_pte(vm
, pdpe
->pfn
, vaddr
, paddr
, 1, page_size
);
271 /* Fill in page table entry. */
272 pte
= virt_get_pte(vm
, pde
->pfn
, vaddr
, 0);
273 TEST_ASSERT(!pte
->present
,
274 "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr
);
275 pte
->pfn
= paddr
>> vm
->page_shift
;
276 pte
->writable
= true;
280 void virt_pg_map(struct kvm_vm
*vm
, uint64_t vaddr
, uint64_t paddr
)
282 __virt_pg_map(vm
, vaddr
, paddr
, X86_PAGE_SIZE_4K
);
285 static struct pageTableEntry
*_vm_get_page_table_entry(struct kvm_vm
*vm
, int vcpuid
,
289 struct pageUpperEntry
*pml4e
, *pdpe
, *pde
;
290 struct pageTableEntry
*pte
;
291 struct kvm_cpuid_entry2
*entry
;
292 struct kvm_sregs sregs
;
294 /* Set the bottom 52 bits. */
295 uint64_t rsvd_mask
= 0x000fffffffffffff;
297 entry
= kvm_get_supported_cpuid_index(0x80000008, 0);
298 max_phy_addr
= entry
->eax
& 0x000000ff;
299 /* Clear the bottom bits of the reserved mask. */
300 rsvd_mask
= (rsvd_mask
>> max_phy_addr
) << max_phy_addr
;
303 * SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries
304 * with 4-Level Paging and 5-Level Paging".
305 * If IA32_EFER.NXE = 0 and the P flag of a paging-structure entry is 1,
306 * the XD flag (bit 63) is reserved.
308 vcpu_sregs_get(vm
, vcpuid
, &sregs
);
309 if ((sregs
.efer
& EFER_NX
) == 0) {
310 rsvd_mask
|= (1ull << 63);
313 TEST_ASSERT(vm
->mode
== VM_MODE_PXXV48_4K
, "Attempt to use "
314 "unknown or unsupported guest mode, mode: 0x%x", vm
->mode
);
315 TEST_ASSERT(sparsebit_is_set(vm
->vpages_valid
,
316 (vaddr
>> vm
->page_shift
)),
317 "Invalid virtual address, vaddr: 0x%lx",
320 * Based on the mode check above there are 48 bits in the vaddr, so
321 * shift 16 to sign extend the last bit (bit-47),
323 TEST_ASSERT(vaddr
== (((int64_t)vaddr
<< 16) >> 16),
324 "Canonical check failed. The virtual address is invalid.");
326 index
[0] = (vaddr
>> 12) & 0x1ffu
;
327 index
[1] = (vaddr
>> 21) & 0x1ffu
;
328 index
[2] = (vaddr
>> 30) & 0x1ffu
;
329 index
[3] = (vaddr
>> 39) & 0x1ffu
;
331 pml4e
= addr_gpa2hva(vm
, vm
->pgd
);
332 TEST_ASSERT(pml4e
[index
[3]].present
,
333 "Expected pml4e to be present for gva: 0x%08lx", vaddr
);
334 TEST_ASSERT((*(uint64_t*)(&pml4e
[index
[3]]) &
335 (rsvd_mask
| (1ull << 7))) == 0,
336 "Unexpected reserved bits set.");
338 pdpe
= addr_gpa2hva(vm
, pml4e
[index
[3]].pfn
* vm
->page_size
);
339 TEST_ASSERT(pdpe
[index
[2]].present
,
340 "Expected pdpe to be present for gva: 0x%08lx", vaddr
);
341 TEST_ASSERT(pdpe
[index
[2]].page_size
== 0,
342 "Expected pdpe to map a pde not a 1-GByte page.");
343 TEST_ASSERT((*(uint64_t*)(&pdpe
[index
[2]]) & rsvd_mask
) == 0,
344 "Unexpected reserved bits set.");
346 pde
= addr_gpa2hva(vm
, pdpe
[index
[2]].pfn
* vm
->page_size
);
347 TEST_ASSERT(pde
[index
[1]].present
,
348 "Expected pde to be present for gva: 0x%08lx", vaddr
);
349 TEST_ASSERT(pde
[index
[1]].page_size
== 0,
350 "Expected pde to map a pte not a 2-MByte page.");
351 TEST_ASSERT((*(uint64_t*)(&pde
[index
[1]]) & rsvd_mask
) == 0,
352 "Unexpected reserved bits set.");
354 pte
= addr_gpa2hva(vm
, pde
[index
[1]].pfn
* vm
->page_size
);
355 TEST_ASSERT(pte
[index
[0]].present
,
356 "Expected pte to be present for gva: 0x%08lx", vaddr
);
358 return &pte
[index
[0]];
361 uint64_t vm_get_page_table_entry(struct kvm_vm
*vm
, int vcpuid
, uint64_t vaddr
)
363 struct pageTableEntry
*pte
= _vm_get_page_table_entry(vm
, vcpuid
, vaddr
);
365 return *(uint64_t *)pte
;
368 void vm_set_page_table_entry(struct kvm_vm
*vm
, int vcpuid
, uint64_t vaddr
,
371 struct pageTableEntry
*new_pte
= _vm_get_page_table_entry(vm
, vcpuid
,
374 *(uint64_t *)new_pte
= pte
;
377 void virt_dump(FILE *stream
, struct kvm_vm
*vm
, uint8_t indent
)
379 struct pageUpperEntry
*pml4e
, *pml4e_start
;
380 struct pageUpperEntry
*pdpe
, *pdpe_start
;
381 struct pageUpperEntry
*pde
, *pde_start
;
382 struct pageTableEntry
*pte
, *pte_start
;
384 if (!vm
->pgd_created
)
387 fprintf(stream
, "%*s "
388 " no\n", indent
, "");
389 fprintf(stream
, "%*s index hvaddr gpaddr "
390 "addr w exec dirty\n",
392 pml4e_start
= (struct pageUpperEntry
*) addr_gpa2hva(vm
, vm
->pgd
);
393 for (uint16_t n1
= 0; n1
<= 0x1ffu
; n1
++) {
394 pml4e
= &pml4e_start
[n1
];
397 fprintf(stream
, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
400 pml4e
- pml4e_start
, pml4e
,
401 addr_hva2gpa(vm
, pml4e
), (uint64_t) pml4e
->pfn
,
402 pml4e
->writable
, pml4e
->execute_disable
);
404 pdpe_start
= addr_gpa2hva(vm
, pml4e
->pfn
* vm
->page_size
);
405 for (uint16_t n2
= 0; n2
<= 0x1ffu
; n2
++) {
406 pdpe
= &pdpe_start
[n2
];
409 fprintf(stream
, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx "
412 pdpe
- pdpe_start
, pdpe
,
413 addr_hva2gpa(vm
, pdpe
),
414 (uint64_t) pdpe
->pfn
, pdpe
->writable
,
415 pdpe
->execute_disable
);
417 pde_start
= addr_gpa2hva(vm
, pdpe
->pfn
* vm
->page_size
);
418 for (uint16_t n3
= 0; n3
<= 0x1ffu
; n3
++) {
419 pde
= &pde_start
[n3
];
422 fprintf(stream
, "%*spde 0x%-3zx %p "
423 "0x%-12lx 0x%-10lx %u %u\n",
424 indent
, "", pde
- pde_start
, pde
,
425 addr_hva2gpa(vm
, pde
),
426 (uint64_t) pde
->pfn
, pde
->writable
,
427 pde
->execute_disable
);
429 pte_start
= addr_gpa2hva(vm
, pde
->pfn
* vm
->page_size
);
430 for (uint16_t n4
= 0; n4
<= 0x1ffu
; n4
++) {
431 pte
= &pte_start
[n4
];
434 fprintf(stream
, "%*spte 0x%-3zx %p "
435 "0x%-12lx 0x%-10lx %u %u "
438 pte
- pte_start
, pte
,
439 addr_hva2gpa(vm
, pte
),
442 pte
->execute_disable
,
444 ((uint64_t) n1
<< 27)
445 | ((uint64_t) n2
<< 18)
446 | ((uint64_t) n3
<< 9)
455 * Set Unusable Segment
460 * segp - Pointer to segment register
464 * Sets the segment register pointed to by @segp to an unusable state.
466 static void kvm_seg_set_unusable(struct kvm_segment
*segp
)
468 memset(segp
, 0, sizeof(*segp
));
469 segp
->unusable
= true;
472 static void kvm_seg_fill_gdt_64bit(struct kvm_vm
*vm
, struct kvm_segment
*segp
)
474 void *gdt
= addr_gva2hva(vm
, vm
->gdt
);
475 struct desc64
*desc
= gdt
+ (segp
->selector
>> 3) * 8;
477 desc
->limit0
= segp
->limit
& 0xFFFF;
478 desc
->base0
= segp
->base
& 0xFFFF;
479 desc
->base1
= segp
->base
>> 16;
480 desc
->type
= segp
->type
;
482 desc
->dpl
= segp
->dpl
;
483 desc
->p
= segp
->present
;
484 desc
->limit1
= segp
->limit
>> 16;
485 desc
->avl
= segp
->avl
;
489 desc
->base2
= segp
->base
>> 24;
491 desc
->base3
= segp
->base
>> 32;
496 * Set Long Mode Flat Kernel Code Segment
499 * vm - VM whose GDT is being filled, or NULL to only write segp
500 * selector - selector value
503 * segp - Pointer to KVM segment
507 * Sets up the KVM segment pointed to by @segp, to be a code segment
508 * with the selector value given by @selector.
510 static void kvm_seg_set_kernel_code_64bit(struct kvm_vm
*vm
, uint16_t selector
,
511 struct kvm_segment
*segp
)
513 memset(segp
, 0, sizeof(*segp
));
514 segp
->selector
= selector
;
515 segp
->limit
= 0xFFFFFFFFu
;
516 segp
->s
= 0x1; /* kTypeCodeData */
517 segp
->type
= 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
518 * | kFlagCodeReadable
524 kvm_seg_fill_gdt_64bit(vm
, segp
);
528 * Set Long Mode Flat Kernel Data Segment
531 * vm - VM whose GDT is being filled, or NULL to only write segp
532 * selector - selector value
535 * segp - Pointer to KVM segment
539 * Sets up the KVM segment pointed to by @segp, to be a data segment
540 * with the selector value given by @selector.
542 static void kvm_seg_set_kernel_data_64bit(struct kvm_vm
*vm
, uint16_t selector
,
543 struct kvm_segment
*segp
)
545 memset(segp
, 0, sizeof(*segp
));
546 segp
->selector
= selector
;
547 segp
->limit
= 0xFFFFFFFFu
;
548 segp
->s
= 0x1; /* kTypeCodeData */
549 segp
->type
= 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
550 * | kFlagDataWritable
553 segp
->present
= true;
555 kvm_seg_fill_gdt_64bit(vm
, segp
);
558 vm_paddr_t
addr_gva2gpa(struct kvm_vm
*vm
, vm_vaddr_t gva
)
561 struct pageUpperEntry
*pml4e
, *pdpe
, *pde
;
562 struct pageTableEntry
*pte
;
564 TEST_ASSERT(vm
->mode
== VM_MODE_PXXV48_4K
, "Attempt to use "
565 "unknown or unsupported guest mode, mode: 0x%x", vm
->mode
);
567 index
[0] = (gva
>> 12) & 0x1ffu
;
568 index
[1] = (gva
>> 21) & 0x1ffu
;
569 index
[2] = (gva
>> 30) & 0x1ffu
;
570 index
[3] = (gva
>> 39) & 0x1ffu
;
572 if (!vm
->pgd_created
)
574 pml4e
= addr_gpa2hva(vm
, vm
->pgd
);
575 if (!pml4e
[index
[3]].present
)
578 pdpe
= addr_gpa2hva(vm
, pml4e
[index
[3]].pfn
* vm
->page_size
);
579 if (!pdpe
[index
[2]].present
)
582 pde
= addr_gpa2hva(vm
, pdpe
[index
[2]].pfn
* vm
->page_size
);
583 if (!pde
[index
[1]].present
)
586 pte
= addr_gpa2hva(vm
, pde
[index
[1]].pfn
* vm
->page_size
);
587 if (!pte
[index
[0]].present
)
590 return (pte
[index
[0]].pfn
* vm
->page_size
) + (gva
& 0xfffu
);
593 TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva
);
597 static void kvm_setup_gdt(struct kvm_vm
*vm
, struct kvm_dtable
*dt
)
600 vm
->gdt
= vm_vaddr_alloc_page(vm
);
603 dt
->limit
= getpagesize();
606 static void kvm_setup_tss_64bit(struct kvm_vm
*vm
, struct kvm_segment
*segp
,
610 vm
->tss
= vm_vaddr_alloc_page(vm
);
612 memset(segp
, 0, sizeof(*segp
));
613 segp
->base
= vm
->tss
;
615 segp
->selector
= selector
;
618 kvm_seg_fill_gdt_64bit(vm
, segp
);
621 static void vcpu_setup(struct kvm_vm
*vm
, int vcpuid
)
623 struct kvm_sregs sregs
;
625 /* Set mode specific system register values. */
626 vcpu_sregs_get(vm
, vcpuid
, &sregs
);
630 kvm_setup_gdt(vm
, &sregs
.gdt
);
633 case VM_MODE_PXXV48_4K
:
634 sregs
.cr0
= X86_CR0_PE
| X86_CR0_NE
| X86_CR0_PG
;
635 sregs
.cr4
|= X86_CR4_PAE
| X86_CR4_OSFXSR
;
636 sregs
.efer
|= (EFER_LME
| EFER_LMA
| EFER_NX
);
638 kvm_seg_set_unusable(&sregs
.ldt
);
639 kvm_seg_set_kernel_code_64bit(vm
, DEFAULT_CODE_SELECTOR
, &sregs
.cs
);
640 kvm_seg_set_kernel_data_64bit(vm
, DEFAULT_DATA_SELECTOR
, &sregs
.ds
);
641 kvm_seg_set_kernel_data_64bit(vm
, DEFAULT_DATA_SELECTOR
, &sregs
.es
);
642 kvm_setup_tss_64bit(vm
, &sregs
.tr
, 0x18);
646 TEST_FAIL("Unknown guest mode, mode: 0x%x", vm
->mode
);
650 vcpu_sregs_set(vm
, vcpuid
, &sregs
);
653 void vm_vcpu_add_default(struct kvm_vm
*vm
, uint32_t vcpuid
, void *guest_code
)
655 struct kvm_mp_state mp_state
;
656 struct kvm_regs regs
;
657 vm_vaddr_t stack_vaddr
;
658 stack_vaddr
= vm_vaddr_alloc(vm
, DEFAULT_STACK_PGS
* getpagesize(),
659 DEFAULT_GUEST_STACK_VADDR_MIN
);
662 vm_vcpu_add(vm
, vcpuid
);
663 vcpu_setup(vm
, vcpuid
);
665 /* Setup guest general purpose registers */
666 vcpu_regs_get(vm
, vcpuid
, ®s
);
667 regs
.rflags
= regs
.rflags
| 0x2;
668 regs
.rsp
= stack_vaddr
+ (DEFAULT_STACK_PGS
* getpagesize());
669 regs
.rip
= (unsigned long) guest_code
;
670 vcpu_regs_set(vm
, vcpuid
, ®s
);
672 /* Setup the MP state */
673 mp_state
.mp_state
= 0;
674 vcpu_set_mp_state(vm
, vcpuid
, &mp_state
);
676 /* Setup supported CPUIDs */
677 vcpu_set_cpuid(vm
, vcpuid
, kvm_get_supported_cpuid());
681 * Allocate an instance of struct kvm_cpuid2
687 * Return: A pointer to the allocated struct. The caller is responsible
688 * for freeing this struct.
690 * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
691 * array to be decided at allocation time, allocation is slightly
692 * complicated. This function uses a reasonable default length for
693 * the array and performs the appropriate allocation.
695 static struct kvm_cpuid2
*allocate_kvm_cpuid2(void)
697 struct kvm_cpuid2
*cpuid
;
701 size
= sizeof(*cpuid
);
702 size
+= nent
* sizeof(struct kvm_cpuid_entry2
);
703 cpuid
= malloc(size
);
715 * KVM Supported CPUID Get
721 * Return: The supported KVM CPUID
723 * Get the guest CPUID supported by KVM.
725 struct kvm_cpuid2
*kvm_get_supported_cpuid(void)
727 static struct kvm_cpuid2
*cpuid
;
734 cpuid
= allocate_kvm_cpuid2();
735 kvm_fd
= open_kvm_dev_path_or_exit();
737 ret
= ioctl(kvm_fd
, KVM_GET_SUPPORTED_CPUID
, cpuid
);
738 TEST_ASSERT(ret
== 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
749 * msr_index - Index of MSR
753 * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
755 * Get value of MSR for VCPU.
757 uint64_t kvm_get_feature_msr(uint64_t msr_index
)
760 struct kvm_msrs header
;
761 struct kvm_msr_entry entry
;
765 buffer
.header
.nmsrs
= 1;
766 buffer
.entry
.index
= msr_index
;
767 kvm_fd
= open_kvm_dev_path_or_exit();
769 r
= ioctl(kvm_fd
, KVM_GET_MSRS
, &buffer
.header
);
770 TEST_ASSERT(r
== 1, "KVM_GET_MSRS IOCTL failed,\n"
771 " rc: %i errno: %i", r
, errno
);
774 return buffer
.entry
.data
;
781 * vm - Virtual Machine
786 * Return: KVM CPUID (KVM_GET_CPUID2)
788 * Set the VCPU's CPUID.
790 struct kvm_cpuid2
*vcpu_get_cpuid(struct kvm_vm
*vm
, uint32_t vcpuid
)
792 struct vcpu
*vcpu
= vcpu_find(vm
, vcpuid
);
793 struct kvm_cpuid2
*cpuid
;
797 TEST_ASSERT(vcpu
!= NULL
, "vcpu not found, vcpuid: %u", vcpuid
);
799 cpuid
= allocate_kvm_cpuid2();
800 max_ent
= cpuid
->nent
;
802 for (cpuid
->nent
= 1; cpuid
->nent
<= max_ent
; cpuid
->nent
++) {
803 rc
= ioctl(vcpu
->fd
, KVM_GET_CPUID2
, cpuid
);
807 TEST_ASSERT(rc
== -1 && errno
== E2BIG
,
808 "KVM_GET_CPUID2 should either succeed or give E2BIG: %d %d",
812 TEST_ASSERT(rc
== 0, "KVM_GET_CPUID2 failed, rc: %i errno: %i",
821 * Locate a cpuid entry.
824 * function: The function of the cpuid entry to find.
825 * index: The index of the cpuid entry.
829 * Return: A pointer to the cpuid entry. Never returns NULL.
831 struct kvm_cpuid_entry2
*
832 kvm_get_supported_cpuid_index(uint32_t function
, uint32_t index
)
834 struct kvm_cpuid2
*cpuid
;
835 struct kvm_cpuid_entry2
*entry
= NULL
;
838 cpuid
= kvm_get_supported_cpuid();
839 for (i
= 0; i
< cpuid
->nent
; i
++) {
840 if (cpuid
->entries
[i
].function
== function
&&
841 cpuid
->entries
[i
].index
== index
) {
842 entry
= &cpuid
->entries
[i
];
847 TEST_ASSERT(entry
, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
856 * vm - Virtual Machine
858 * cpuid - The CPUID values to set.
864 * Set the VCPU's CPUID.
866 void vcpu_set_cpuid(struct kvm_vm
*vm
,
867 uint32_t vcpuid
, struct kvm_cpuid2
*cpuid
)
869 struct vcpu
*vcpu
= vcpu_find(vm
, vcpuid
);
872 TEST_ASSERT(vcpu
!= NULL
, "vcpu not found, vcpuid: %u", vcpuid
);
874 rc
= ioctl(vcpu
->fd
, KVM_SET_CPUID2
, cpuid
);
875 TEST_ASSERT(rc
== 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
884 * vm - Virtual Machine
886 * msr_index - Index of MSR
890 * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
892 * Get value of MSR for VCPU.
894 uint64_t vcpu_get_msr(struct kvm_vm
*vm
, uint32_t vcpuid
, uint64_t msr_index
)
896 struct vcpu
*vcpu
= vcpu_find(vm
, vcpuid
);
898 struct kvm_msrs header
;
899 struct kvm_msr_entry entry
;
903 TEST_ASSERT(vcpu
!= NULL
, "vcpu not found, vcpuid: %u", vcpuid
);
904 buffer
.header
.nmsrs
= 1;
905 buffer
.entry
.index
= msr_index
;
906 r
= ioctl(vcpu
->fd
, KVM_GET_MSRS
, &buffer
.header
);
907 TEST_ASSERT(r
== 1, "KVM_GET_MSRS IOCTL failed,\n"
908 " rc: %i errno: %i", r
, errno
);
910 return buffer
.entry
.data
;
917 * vm - Virtual Machine
919 * msr_index - Index of MSR
920 * msr_value - New value of MSR
924 * Return: The result of KVM_SET_MSRS.
926 * Sets the value of an MSR for the given VCPU.
928 int _vcpu_set_msr(struct kvm_vm
*vm
, uint32_t vcpuid
, uint64_t msr_index
,
931 struct vcpu
*vcpu
= vcpu_find(vm
, vcpuid
);
933 struct kvm_msrs header
;
934 struct kvm_msr_entry entry
;
938 TEST_ASSERT(vcpu
!= NULL
, "vcpu not found, vcpuid: %u", vcpuid
);
939 memset(&buffer
, 0, sizeof(buffer
));
940 buffer
.header
.nmsrs
= 1;
941 buffer
.entry
.index
= msr_index
;
942 buffer
.entry
.data
= msr_value
;
943 r
= ioctl(vcpu
->fd
, KVM_SET_MSRS
, &buffer
.header
);
951 * vm - Virtual Machine
953 * msr_index - Index of MSR
954 * msr_value - New value of MSR
958 * Return: On success, nothing. On failure a TEST_ASSERT is produced.
960 * Set value of MSR for VCPU.
962 void vcpu_set_msr(struct kvm_vm
*vm
, uint32_t vcpuid
, uint64_t msr_index
,
967 r
= _vcpu_set_msr(vm
, vcpuid
, msr_index
, msr_value
);
968 TEST_ASSERT(r
== 1, "KVM_SET_MSRS IOCTL failed,\n"
969 " rc: %i errno: %i", r
, errno
);
972 void vcpu_args_set(struct kvm_vm
*vm
, uint32_t vcpuid
, unsigned int num
, ...)
975 struct kvm_regs regs
;
977 TEST_ASSERT(num
>= 1 && num
<= 6, "Unsupported number of args,\n"
982 vcpu_regs_get(vm
, vcpuid
, ®s
);
985 regs
.rdi
= va_arg(ap
, uint64_t);
988 regs
.rsi
= va_arg(ap
, uint64_t);
991 regs
.rdx
= va_arg(ap
, uint64_t);
994 regs
.rcx
= va_arg(ap
, uint64_t);
997 regs
.r8
= va_arg(ap
, uint64_t);
1000 regs
.r9
= va_arg(ap
, uint64_t);
1002 vcpu_regs_set(vm
, vcpuid
, ®s
);
1006 void vcpu_dump(FILE *stream
, struct kvm_vm
*vm
, uint32_t vcpuid
, uint8_t indent
)
1008 struct kvm_regs regs
;
1009 struct kvm_sregs sregs
;
1011 fprintf(stream
, "%*scpuid: %u\n", indent
, "", vcpuid
);
1013 fprintf(stream
, "%*sregs:\n", indent
+ 2, "");
1014 vcpu_regs_get(vm
, vcpuid
, ®s
);
1015 regs_dump(stream
, ®s
, indent
+ 4);
1017 fprintf(stream
, "%*ssregs:\n", indent
+ 2, "");
1018 vcpu_sregs_get(vm
, vcpuid
, &sregs
);
1019 sregs_dump(stream
, &sregs
, indent
+ 4);
1022 struct kvm_x86_state
{
1023 struct kvm_vcpu_events events
;
1024 struct kvm_mp_state mp_state
;
1025 struct kvm_regs regs
;
1026 struct kvm_xsave xsave
;
1027 struct kvm_xcrs xcrs
;
1028 struct kvm_sregs sregs
;
1029 struct kvm_debugregs debugregs
;
1031 struct kvm_nested_state nested
;
1032 char nested_
[16384];
1034 struct kvm_msrs msrs
;
1037 static int kvm_get_num_msrs_fd(int kvm_fd
)
1039 struct kvm_msr_list nmsrs
;
1043 r
= ioctl(kvm_fd
, KVM_GET_MSR_INDEX_LIST
, &nmsrs
);
1044 TEST_ASSERT(r
== -1 && errno
== E2BIG
, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
1050 static int kvm_get_num_msrs(struct kvm_vm
*vm
)
1052 return kvm_get_num_msrs_fd(vm
->kvm_fd
);
1055 struct kvm_msr_list
*kvm_get_msr_index_list(void)
1057 struct kvm_msr_list
*list
;
1058 int nmsrs
, r
, kvm_fd
;
1060 kvm_fd
= open_kvm_dev_path_or_exit();
1062 nmsrs
= kvm_get_num_msrs_fd(kvm_fd
);
1063 list
= malloc(sizeof(*list
) + nmsrs
* sizeof(list
->indices
[0]));
1064 list
->nmsrs
= nmsrs
;
1065 r
= ioctl(kvm_fd
, KVM_GET_MSR_INDEX_LIST
, list
);
1068 TEST_ASSERT(r
== 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
1074 struct kvm_x86_state
*vcpu_save_state(struct kvm_vm
*vm
, uint32_t vcpuid
)
1076 struct vcpu
*vcpu
= vcpu_find(vm
, vcpuid
);
1077 struct kvm_msr_list
*list
;
1078 struct kvm_x86_state
*state
;
1080 static int nested_size
= -1;
1082 if (nested_size
== -1) {
1083 nested_size
= kvm_check_cap(KVM_CAP_NESTED_STATE
);
1084 TEST_ASSERT(nested_size
<= sizeof(state
->nested_
),
1085 "Nested state size too big, %i > %zi",
1086 nested_size
, sizeof(state
->nested_
));
1090 * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
1091 * guest state is consistent only after userspace re-enters the
1092 * kernel with KVM_RUN. Complete IO prior to migrating state
1095 vcpu_run_complete_io(vm
, vcpuid
);
1097 nmsrs
= kvm_get_num_msrs(vm
);
1098 list
= malloc(sizeof(*list
) + nmsrs
* sizeof(list
->indices
[0]));
1099 list
->nmsrs
= nmsrs
;
1100 r
= ioctl(vm
->kvm_fd
, KVM_GET_MSR_INDEX_LIST
, list
);
1101 TEST_ASSERT(r
== 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
1104 state
= malloc(sizeof(*state
) + nmsrs
* sizeof(state
->msrs
.entries
[0]));
1105 r
= ioctl(vcpu
->fd
, KVM_GET_VCPU_EVENTS
, &state
->events
);
1106 TEST_ASSERT(r
== 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
1109 r
= ioctl(vcpu
->fd
, KVM_GET_MP_STATE
, &state
->mp_state
);
1110 TEST_ASSERT(r
== 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
1113 r
= ioctl(vcpu
->fd
, KVM_GET_REGS
, &state
->regs
);
1114 TEST_ASSERT(r
== 0, "Unexpected result from KVM_GET_REGS, r: %i",
1117 r
= ioctl(vcpu
->fd
, KVM_GET_XSAVE
, &state
->xsave
);
1118 TEST_ASSERT(r
== 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
1121 if (kvm_check_cap(KVM_CAP_XCRS
)) {
1122 r
= ioctl(vcpu
->fd
, KVM_GET_XCRS
, &state
->xcrs
);
1123 TEST_ASSERT(r
== 0, "Unexpected result from KVM_GET_XCRS, r: %i",
1127 r
= ioctl(vcpu
->fd
, KVM_GET_SREGS
, &state
->sregs
);
1128 TEST_ASSERT(r
== 0, "Unexpected result from KVM_GET_SREGS, r: %i",
1132 state
->nested
.size
= sizeof(state
->nested_
);
1133 r
= ioctl(vcpu
->fd
, KVM_GET_NESTED_STATE
, &state
->nested
);
1134 TEST_ASSERT(r
== 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i",
1136 TEST_ASSERT(state
->nested
.size
<= nested_size
,
1137 "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
1138 state
->nested
.size
, nested_size
);
1140 state
->nested
.size
= 0;
1142 state
->msrs
.nmsrs
= nmsrs
;
1143 for (i
= 0; i
< nmsrs
; i
++)
1144 state
->msrs
.entries
[i
].index
= list
->indices
[i
];
1145 r
= ioctl(vcpu
->fd
, KVM_GET_MSRS
, &state
->msrs
);
1146 TEST_ASSERT(r
== nmsrs
, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)",
1147 r
, r
== nmsrs
? -1 : list
->indices
[r
]);
1149 r
= ioctl(vcpu
->fd
, KVM_GET_DEBUGREGS
, &state
->debugregs
);
1150 TEST_ASSERT(r
== 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
1157 void vcpu_load_state(struct kvm_vm
*vm
, uint32_t vcpuid
, struct kvm_x86_state
*state
)
1159 struct vcpu
*vcpu
= vcpu_find(vm
, vcpuid
);
1162 r
= ioctl(vcpu
->fd
, KVM_SET_XSAVE
, &state
->xsave
);
1163 TEST_ASSERT(r
== 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
1166 if (kvm_check_cap(KVM_CAP_XCRS
)) {
1167 r
= ioctl(vcpu
->fd
, KVM_SET_XCRS
, &state
->xcrs
);
1168 TEST_ASSERT(r
== 0, "Unexpected result from KVM_SET_XCRS, r: %i",
1172 r
= ioctl(vcpu
->fd
, KVM_SET_SREGS
, &state
->sregs
);
1173 TEST_ASSERT(r
== 0, "Unexpected result from KVM_SET_SREGS, r: %i",
1176 r
= ioctl(vcpu
->fd
, KVM_SET_MSRS
, &state
->msrs
);
1177 TEST_ASSERT(r
== state
->msrs
.nmsrs
, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
1178 r
, r
== state
->msrs
.nmsrs
? -1 : state
->msrs
.entries
[r
].index
);
1180 r
= ioctl(vcpu
->fd
, KVM_SET_VCPU_EVENTS
, &state
->events
);
1181 TEST_ASSERT(r
== 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
1184 r
= ioctl(vcpu
->fd
, KVM_SET_MP_STATE
, &state
->mp_state
);
1185 TEST_ASSERT(r
== 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
1188 r
= ioctl(vcpu
->fd
, KVM_SET_DEBUGREGS
, &state
->debugregs
);
1189 TEST_ASSERT(r
== 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
1192 r
= ioctl(vcpu
->fd
, KVM_SET_REGS
, &state
->regs
);
1193 TEST_ASSERT(r
== 0, "Unexpected result from KVM_SET_REGS, r: %i",
1196 if (state
->nested
.size
) {
1197 r
= ioctl(vcpu
->fd
, KVM_SET_NESTED_STATE
, &state
->nested
);
1198 TEST_ASSERT(r
== 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
1203 bool is_intel_cpu(void)
1205 int eax
, ebx
, ecx
, edx
;
1206 const uint32_t *chunk
;
1209 __asm__
__volatile__(
1211 : /* output */ "=a"(eax
), "=b"(ebx
),
1212 "=c"(ecx
), "=d"(edx
)
1213 : /* input */ "0"(leaf
), "2"(0));
1215 chunk
= (const uint32_t *)("GenuineIntel");
1216 return (ebx
== chunk
[0] && edx
== chunk
[1] && ecx
== chunk
[2]);
1219 uint32_t kvm_get_cpuid_max_basic(void)
1221 return kvm_get_supported_cpuid_entry(0)->eax
;
1224 uint32_t kvm_get_cpuid_max_extended(void)
1226 return kvm_get_supported_cpuid_entry(0x80000000)->eax
;
1229 void kvm_get_cpu_address_width(unsigned int *pa_bits
, unsigned int *va_bits
)
1231 struct kvm_cpuid_entry2
*entry
;
1235 if (kvm_get_cpuid_max_extended() < 0x80000008) {
1236 pae
= kvm_get_supported_cpuid_entry(1)->edx
& (1 << 6);
1237 *pa_bits
= pae
? 36 : 32;
1240 entry
= kvm_get_supported_cpuid_entry(0x80000008);
1241 *pa_bits
= entry
->eax
& 0xff;
1242 *va_bits
= (entry
->eax
>> 8) & 0xff;
1256 uint32_t offset2
; uint32_t reserved
;
1259 static void set_idt_entry(struct kvm_vm
*vm
, int vector
, unsigned long addr
,
1260 int dpl
, unsigned short selector
)
1262 struct idt_entry
*base
=
1263 (struct idt_entry
*)addr_gva2hva(vm
, vm
->idt
);
1264 struct idt_entry
*e
= &base
[vector
];
1266 memset(e
, 0, sizeof(*e
));
1268 e
->selector
= selector
;
1273 e
->offset1
= addr
>> 16;
1274 e
->offset2
= addr
>> 32;
1277 void kvm_exit_unexpected_vector(uint32_t value
)
1279 ucall(UCALL_UNHANDLED
, 1, value
);
1282 void route_exception(struct ex_regs
*regs
)
1284 typedef void(*handler
)(struct ex_regs
*);
1285 handler
*handlers
= (handler
*)exception_handlers
;
1287 if (handlers
&& handlers
[regs
->vector
]) {
1288 handlers
[regs
->vector
](regs
);
1292 kvm_exit_unexpected_vector(regs
->vector
);
1295 void vm_init_descriptor_tables(struct kvm_vm
*vm
)
1297 extern void *idt_handlers
;
1300 vm
->idt
= vm_vaddr_alloc_page(vm
);
1301 vm
->handlers
= vm_vaddr_alloc_page(vm
);
1302 /* Handlers have the same address in both address spaces.*/
1303 for (i
= 0; i
< NUM_INTERRUPTS
; i
++)
1304 set_idt_entry(vm
, i
, (unsigned long)(&idt_handlers
)[i
], 0,
1305 DEFAULT_CODE_SELECTOR
);
1308 void vcpu_init_descriptor_tables(struct kvm_vm
*vm
, uint32_t vcpuid
)
1310 struct kvm_sregs sregs
;
1312 vcpu_sregs_get(vm
, vcpuid
, &sregs
);
1313 sregs
.idt
.base
= vm
->idt
;
1314 sregs
.idt
.limit
= NUM_INTERRUPTS
* sizeof(struct idt_entry
) - 1;
1315 sregs
.gdt
.base
= vm
->gdt
;
1316 sregs
.gdt
.limit
= getpagesize() - 1;
1317 kvm_seg_set_kernel_data_64bit(NULL
, DEFAULT_DATA_SELECTOR
, &sregs
.gs
);
1318 vcpu_sregs_set(vm
, vcpuid
, &sregs
);
1319 *(vm_vaddr_t
*)addr_gva2hva(vm
, (vm_vaddr_t
)(&exception_handlers
)) = vm
->handlers
;
1322 void vm_install_exception_handler(struct kvm_vm
*vm
, int vector
,
1323 void (*handler
)(struct ex_regs
*))
1325 vm_vaddr_t
*handlers
= (vm_vaddr_t
*)addr_gva2hva(vm
, vm
->handlers
);
1327 handlers
[vector
] = (vm_vaddr_t
)handler
;
1330 void assert_on_unhandled_exception(struct kvm_vm
*vm
, uint32_t vcpuid
)
1334 if (get_ucall(vm
, vcpuid
, &uc
) == UCALL_UNHANDLED
) {
1335 uint64_t vector
= uc
.args
[0];
1337 TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)",
1342 bool set_cpuid(struct kvm_cpuid2
*cpuid
,
1343 struct kvm_cpuid_entry2
*ent
)
1347 for (i
= 0; i
< cpuid
->nent
; i
++) {
1348 struct kvm_cpuid_entry2
*cur
= &cpuid
->entries
[i
];
1350 if (cur
->function
!= ent
->function
|| cur
->index
!= ent
->index
)
1353 memcpy(cur
, ent
, sizeof(struct kvm_cpuid_entry2
));
1360 uint64_t kvm_hypercall(uint64_t nr
, uint64_t a0
, uint64_t a1
, uint64_t a2
,
1365 asm volatile("vmcall"
1367 : "b"(a0
), "c"(a1
), "d"(a2
), "S"(a3
));
1371 struct kvm_cpuid2
*kvm_get_supported_hv_cpuid(void)
1373 static struct kvm_cpuid2
*cpuid
;
1380 cpuid
= allocate_kvm_cpuid2();
1381 kvm_fd
= open_kvm_dev_path_or_exit();
1383 ret
= ioctl(kvm_fd
, KVM_GET_SUPPORTED_HV_CPUID
, cpuid
);
1384 TEST_ASSERT(ret
== 0, "KVM_GET_SUPPORTED_HV_CPUID failed %d %d\n",
1391 void vcpu_set_hv_cpuid(struct kvm_vm
*vm
, uint32_t vcpuid
)
1393 static struct kvm_cpuid2
*cpuid_full
;
1394 struct kvm_cpuid2
*cpuid_sys
, *cpuid_hv
;
1398 cpuid_sys
= kvm_get_supported_cpuid();
1399 cpuid_hv
= kvm_get_supported_hv_cpuid();
1401 cpuid_full
= malloc(sizeof(*cpuid_full
) +
1402 (cpuid_sys
->nent
+ cpuid_hv
->nent
) *
1403 sizeof(struct kvm_cpuid_entry2
));
1409 /* Need to skip KVM CPUID leaves 0x400000xx */
1410 for (i
= 0; i
< cpuid_sys
->nent
; i
++) {
1411 if (cpuid_sys
->entries
[i
].function
>= 0x40000000 &&
1412 cpuid_sys
->entries
[i
].function
< 0x40000100)
1414 cpuid_full
->entries
[nent
] = cpuid_sys
->entries
[i
];
1418 memcpy(&cpuid_full
->entries
[nent
], cpuid_hv
->entries
,
1419 cpuid_hv
->nent
* sizeof(struct kvm_cpuid_entry2
));
1420 cpuid_full
->nent
= nent
+ cpuid_hv
->nent
;
1423 vcpu_set_cpuid(vm
, vcpuid
, cpuid_full
);
1426 struct kvm_cpuid2
*vcpu_get_supported_hv_cpuid(struct kvm_vm
*vm
, uint32_t vcpuid
)
1428 static struct kvm_cpuid2
*cpuid
;
1430 cpuid
= allocate_kvm_cpuid2();
1432 vcpu_ioctl(vm
, vcpuid
, KVM_GET_SUPPORTED_HV_CPUID
, cpuid
);
1437 #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
1438 #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
1439 #define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65
1441 static inline unsigned x86_family(unsigned int eax
)
1445 x86
= (eax
>> 8) & 0xf;
1448 x86
+= (eax
>> 20) & 0xff;
1453 unsigned long vm_compute_max_gfn(struct kvm_vm
*vm
)
1455 const unsigned long num_ht_pages
= 12 << (30 - vm
->page_shift
); /* 12 GiB */
1456 unsigned long ht_gfn
, max_gfn
, max_pfn
;
1457 uint32_t eax
, ebx
, ecx
, edx
, max_ext_leaf
;
1459 max_gfn
= (1ULL << (vm
->pa_bits
- vm
->page_shift
)) - 1;
1461 /* Avoid reserved HyperTransport region on AMD processors. */
1463 cpuid(&eax
, &ebx
, &ecx
, &edx
);
1464 if (ebx
!= X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx
||
1465 ecx
!= X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx
||
1466 edx
!= X86EMUL_CPUID_VENDOR_AuthenticAMD_edx
)
1469 /* On parts with <40 physical address bits, the area is fully hidden */
1470 if (vm
->pa_bits
< 40)
1473 /* Before family 17h, the HyperTransport area is just below 1T. */
1474 ht_gfn
= (1 << 28) - num_ht_pages
;
1476 cpuid(&eax
, &ebx
, &ecx
, &edx
);
1477 if (x86_family(eax
) < 0x17)
1481 * Otherwise it's at the top of the physical address space, possibly
1482 * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use
1483 * the old conservative value if MAXPHYADDR is not enumerated.
1486 cpuid(&eax
, &ebx
, &ecx
, &edx
);
1488 if (max_ext_leaf
< 0x80000008)
1492 cpuid(&eax
, &ebx
, &ecx
, &edx
);
1493 max_pfn
= (1ULL << ((eax
& 0xff) - vm
->page_shift
)) - 1;
1494 if (max_ext_leaf
>= 0x8000001f) {
1496 cpuid(&eax
, &ebx
, &ecx
, &edx
);
1497 max_pfn
>>= (ebx
>> 6) & 0x3f;
1500 ht_gfn
= max_pfn
- num_ht_pages
;
1502 return min(max_gfn
, ht_gfn
- 1);