2 * QEMU Windows Hypervisor Platform accelerator (WHPX)
4 * Copyright Microsoft Corp. 2017
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
11 #include "qemu/osdep.h"
13 #include "exec/address-spaces.h"
14 #include "exec/ioport.h"
15 #include "qemu-common.h"
16 #include "qemu/accel.h"
17 #include "sysemu/whpx.h"
18 #include "sysemu/cpus.h"
19 #include "sysemu/runstate.h"
20 #include "qemu/main-loop.h"
21 #include "hw/boards.h"
22 #include "hw/i386/ioapic.h"
23 #include "hw/i386/apic_internal.h"
24 #include "qemu/error-report.h"
25 #include "qapi/error.h"
26 #include "qapi/qapi-types-common.h"
27 #include "qapi/qapi-visit-common.h"
28 #include "migration/blocker.h"
31 #include "whpx-internal.h"
32 #include "whpx-accel-ops.h"
34 #include <WinHvPlatform.h>
35 #include <WinHvEmulation.h>
37 #define HYPERV_APIC_BUS_FREQUENCY (200000000ULL)
39 static const WHV_REGISTER_NAME whpx_register_names
[] = {
41 /* X64 General purpose registers */
61 /* X64 Segment registers */
71 /* X64 Table registers */
75 /* X64 Control Registers */
82 /* X64 Debug Registers */
92 /* X64 Floating Point and Vector Registers */
109 WHvX64RegisterFpMmx0
,
110 WHvX64RegisterFpMmx1
,
111 WHvX64RegisterFpMmx2
,
112 WHvX64RegisterFpMmx3
,
113 WHvX64RegisterFpMmx4
,
114 WHvX64RegisterFpMmx5
,
115 WHvX64RegisterFpMmx6
,
116 WHvX64RegisterFpMmx7
,
117 WHvX64RegisterFpControlStatus
,
118 WHvX64RegisterXmmControlStatus
,
123 WHvX64RegisterKernelGsBase
,
125 WHvX64RegisterApicBase
,
126 /* WHvX64RegisterPat, */
127 WHvX64RegisterSysenterCs
,
128 WHvX64RegisterSysenterEip
,
129 WHvX64RegisterSysenterEsp
,
134 WHvX64RegisterSfmask
,
137 /* Interrupt / Event Registers */
139 * WHvRegisterPendingInterruption,
140 * WHvRegisterInterruptState,
141 * WHvRegisterPendingEvent0,
142 * WHvRegisterPendingEvent1
143 * WHvX64RegisterDeliverabilityNotifications,
147 struct whpx_register_set
{
148 WHV_REGISTER_VALUE values
[RTL_NUMBER_OF(whpx_register_names
)];
152 WHV_EMULATOR_HANDLE emulator
;
153 bool window_registered
;
155 bool ready_for_pic_interrupt
;
158 bool interruption_pending
;
160 /* Must be the last field as it may have a tail */
161 WHV_RUN_VP_EXIT_CONTEXT exit_ctx
;
164 static bool whpx_allowed
;
165 static bool whp_dispatch_initialized
;
166 static HMODULE hWinHvPlatform
, hWinHvEmulation
;
167 static uint32_t max_vcpu_index
;
168 struct whpx_state whpx_global
;
169 struct WHPDispatch whp_dispatch
;
176 static struct whpx_vcpu
*get_whpx_vcpu(CPUState
*cpu
)
178 return (struct whpx_vcpu
*)cpu
->hax_vcpu
;
181 static WHV_X64_SEGMENT_REGISTER
whpx_seg_q2h(const SegmentCache
*qs
, int v86
,
184 WHV_X64_SEGMENT_REGISTER hs
;
185 unsigned flags
= qs
->flags
;
188 hs
.Limit
= qs
->limit
;
189 hs
.Selector
= qs
->selector
;
195 hs
.DescriptorPrivilegeLevel
= 3;
196 hs
.NonSystemSegment
= 1;
199 hs
.Attributes
= (flags
>> DESC_TYPE_SHIFT
);
202 /* hs.Base &= 0xfffff; */
209 static SegmentCache
whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER
*hs
)
214 qs
.limit
= hs
->Limit
;
215 qs
.selector
= hs
->Selector
;
217 qs
.flags
= ((uint32_t)hs
->Attributes
) << DESC_TYPE_SHIFT
;
222 static int whpx_set_tsc(CPUState
*cpu
)
224 CPUX86State
*env
= cpu
->env_ptr
;
225 WHV_REGISTER_NAME tsc_reg
= WHvX64RegisterTsc
;
226 WHV_REGISTER_VALUE tsc_val
;
228 struct whpx_state
*whpx
= &whpx_global
;
231 * Suspend the partition prior to setting the TSC to reduce the variance
232 * in TSC across vCPUs. When the first vCPU runs post suspend, the
233 * partition is automatically resumed.
235 if (whp_dispatch
.WHvSuspendPartitionTime
) {
238 * Unable to suspend partition while setting TSC is not a fatal
239 * error. It just increases the likelihood of TSC variance between
240 * vCPUs and some guest OS are able to handle that just fine.
242 hr
= whp_dispatch
.WHvSuspendPartitionTime(whpx
->partition
);
244 warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr
);
248 tsc_val
.Reg64
= env
->tsc
;
249 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
250 whpx
->partition
, cpu
->cpu_index
, &tsc_reg
, 1, &tsc_val
);
252 error_report("WHPX: Failed to set TSC, hr=%08lx", hr
);
260 * The CR8 register in the CPU is mapped to the TPR register of the APIC,
261 * however, they use a slightly different encoding. Specifically:
263 * APIC.TPR[bits 7:4] = CR8[bits 3:0]
265 * This mechanism is described in section 10.8.6.1 of Volume 3 of Intel 64
266 * and IA-32 Architectures Software Developer's Manual.
269 static uint64_t whpx_apic_tpr_to_cr8(uint64_t tpr
)
274 static void whpx_set_registers(CPUState
*cpu
, int level
)
276 struct whpx_state
*whpx
= &whpx_global
;
277 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
278 CPUX86State
*env
= cpu
->env_ptr
;
279 X86CPU
*x86_cpu
= X86_CPU(cpu
);
280 struct whpx_register_set vcxt
;
287 assert(cpu_is_stopped(cpu
) || qemu_cpu_is_self(cpu
));
290 * Following MSRs have side effects on the guest or are too heavy for
291 * runtime. Limit them to full state update.
293 if (level
>= WHPX_SET_RESET_STATE
) {
297 memset(&vcxt
, 0, sizeof(struct whpx_register_set
));
299 v86
= (env
->eflags
& VM_MASK
);
300 r86
= !(env
->cr
[0] & CR0_PE_MASK
);
302 vcpu
->tpr
= whpx_apic_tpr_to_cr8(cpu_get_apic_tpr(x86_cpu
->apic_state
));
303 vcpu
->apic_base
= cpu_get_apic_base(x86_cpu
->apic_state
);
307 /* Indexes for first 16 registers match between HV and QEMU definitions */
309 for (idx
= 0; idx
< CPU_NB_REGS
; idx
+= 1) {
310 vcxt
.values
[idx
].Reg64
= (uint64_t)env
->regs
[idx
];
314 /* Same goes for RIP and RFLAGS */
315 assert(whpx_register_names
[idx
] == WHvX64RegisterRip
);
316 vcxt
.values
[idx
++].Reg64
= env
->eip
;
318 assert(whpx_register_names
[idx
] == WHvX64RegisterRflags
);
319 vcxt
.values
[idx
++].Reg64
= env
->eflags
;
321 /* Translate 6+4 segment registers. HV and QEMU order matches */
322 assert(idx
== WHvX64RegisterEs
);
323 for (i
= 0; i
< 6; i
+= 1, idx
+= 1) {
324 vcxt
.values
[idx
].Segment
= whpx_seg_q2h(&env
->segs
[i
], v86
, r86
);
327 assert(idx
== WHvX64RegisterLdtr
);
328 vcxt
.values
[idx
++].Segment
= whpx_seg_q2h(&env
->ldt
, 0, 0);
330 assert(idx
== WHvX64RegisterTr
);
331 vcxt
.values
[idx
++].Segment
= whpx_seg_q2h(&env
->tr
, 0, 0);
333 assert(idx
== WHvX64RegisterIdtr
);
334 vcxt
.values
[idx
].Table
.Base
= env
->idt
.base
;
335 vcxt
.values
[idx
].Table
.Limit
= env
->idt
.limit
;
338 assert(idx
== WHvX64RegisterGdtr
);
339 vcxt
.values
[idx
].Table
.Base
= env
->gdt
.base
;
340 vcxt
.values
[idx
].Table
.Limit
= env
->gdt
.limit
;
343 /* CR0, 2, 3, 4, 8 */
344 assert(whpx_register_names
[idx
] == WHvX64RegisterCr0
);
345 vcxt
.values
[idx
++].Reg64
= env
->cr
[0];
346 assert(whpx_register_names
[idx
] == WHvX64RegisterCr2
);
347 vcxt
.values
[idx
++].Reg64
= env
->cr
[2];
348 assert(whpx_register_names
[idx
] == WHvX64RegisterCr3
);
349 vcxt
.values
[idx
++].Reg64
= env
->cr
[3];
350 assert(whpx_register_names
[idx
] == WHvX64RegisterCr4
);
351 vcxt
.values
[idx
++].Reg64
= env
->cr
[4];
352 assert(whpx_register_names
[idx
] == WHvX64RegisterCr8
);
353 vcxt
.values
[idx
++].Reg64
= vcpu
->tpr
;
355 /* 8 Debug Registers - Skipped */
357 /* 16 XMM registers */
358 assert(whpx_register_names
[idx
] == WHvX64RegisterXmm0
);
360 for (i
= 0; i
< sizeof(env
->xmm_regs
) / sizeof(ZMMReg
); i
+= 1, idx
+= 1) {
361 vcxt
.values
[idx
].Reg128
.Low64
= env
->xmm_regs
[i
].ZMM_Q(0);
362 vcxt
.values
[idx
].Reg128
.High64
= env
->xmm_regs
[i
].ZMM_Q(1);
367 assert(whpx_register_names
[idx
] == WHvX64RegisterFpMmx0
);
368 for (i
= 0; i
< 8; i
+= 1, idx
+= 1) {
369 vcxt
.values
[idx
].Fp
.AsUINT128
.Low64
= env
->fpregs
[i
].mmx
.MMX_Q(0);
370 /* vcxt.values[idx].Fp.AsUINT128.High64 =
371 env->fpregs[i].mmx.MMX_Q(1);
375 /* FP control status register */
376 assert(whpx_register_names
[idx
] == WHvX64RegisterFpControlStatus
);
377 vcxt
.values
[idx
].FpControlStatus
.FpControl
= env
->fpuc
;
378 vcxt
.values
[idx
].FpControlStatus
.FpStatus
=
379 (env
->fpus
& ~0x3800) | (env
->fpstt
& 0x7) << 11;
380 vcxt
.values
[idx
].FpControlStatus
.FpTag
= 0;
381 for (i
= 0; i
< 8; ++i
) {
382 vcxt
.values
[idx
].FpControlStatus
.FpTag
|= (!env
->fptags
[i
]) << i
;
384 vcxt
.values
[idx
].FpControlStatus
.Reserved
= 0;
385 vcxt
.values
[idx
].FpControlStatus
.LastFpOp
= env
->fpop
;
386 vcxt
.values
[idx
].FpControlStatus
.LastFpRip
= env
->fpip
;
389 /* XMM control status register */
390 assert(whpx_register_names
[idx
] == WHvX64RegisterXmmControlStatus
);
391 vcxt
.values
[idx
].XmmControlStatus
.LastFpRdp
= 0;
392 vcxt
.values
[idx
].XmmControlStatus
.XmmStatusControl
= env
->mxcsr
;
393 vcxt
.values
[idx
].XmmControlStatus
.XmmStatusControlMask
= 0x0000ffff;
397 assert(whpx_register_names
[idx
] == WHvX64RegisterEfer
);
398 vcxt
.values
[idx
++].Reg64
= env
->efer
;
400 assert(whpx_register_names
[idx
] == WHvX64RegisterKernelGsBase
);
401 vcxt
.values
[idx
++].Reg64
= env
->kernelgsbase
;
404 assert(whpx_register_names
[idx
] == WHvX64RegisterApicBase
);
405 vcxt
.values
[idx
++].Reg64
= vcpu
->apic_base
;
407 /* WHvX64RegisterPat - Skipped */
409 assert(whpx_register_names
[idx
] == WHvX64RegisterSysenterCs
);
410 vcxt
.values
[idx
++].Reg64
= env
->sysenter_cs
;
411 assert(whpx_register_names
[idx
] == WHvX64RegisterSysenterEip
);
412 vcxt
.values
[idx
++].Reg64
= env
->sysenter_eip
;
413 assert(whpx_register_names
[idx
] == WHvX64RegisterSysenterEsp
);
414 vcxt
.values
[idx
++].Reg64
= env
->sysenter_esp
;
415 assert(whpx_register_names
[idx
] == WHvX64RegisterStar
);
416 vcxt
.values
[idx
++].Reg64
= env
->star
;
418 assert(whpx_register_names
[idx
] == WHvX64RegisterLstar
);
419 vcxt
.values
[idx
++].Reg64
= env
->lstar
;
420 assert(whpx_register_names
[idx
] == WHvX64RegisterCstar
);
421 vcxt
.values
[idx
++].Reg64
= env
->cstar
;
422 assert(whpx_register_names
[idx
] == WHvX64RegisterSfmask
);
423 vcxt
.values
[idx
++].Reg64
= env
->fmask
;
426 /* Interrupt / Event Registers - Skipped */
428 assert(idx
== RTL_NUMBER_OF(whpx_register_names
));
430 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
431 whpx
->partition
, cpu
->cpu_index
,
433 RTL_NUMBER_OF(whpx_register_names
),
437 error_report("WHPX: Failed to set virtual processor context, hr=%08lx",
444 static int whpx_get_tsc(CPUState
*cpu
)
446 CPUX86State
*env
= cpu
->env_ptr
;
447 WHV_REGISTER_NAME tsc_reg
= WHvX64RegisterTsc
;
448 WHV_REGISTER_VALUE tsc_val
;
450 struct whpx_state
*whpx
= &whpx_global
;
452 hr
= whp_dispatch
.WHvGetVirtualProcessorRegisters(
453 whpx
->partition
, cpu
->cpu_index
, &tsc_reg
, 1, &tsc_val
);
455 error_report("WHPX: Failed to get TSC, hr=%08lx", hr
);
459 env
->tsc
= tsc_val
.Reg64
;
463 static void whpx_get_registers(CPUState
*cpu
)
465 struct whpx_state
*whpx
= &whpx_global
;
466 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
467 CPUX86State
*env
= cpu
->env_ptr
;
468 X86CPU
*x86_cpu
= X86_CPU(cpu
);
469 struct whpx_register_set vcxt
;
470 uint64_t tpr
, apic_base
;
476 assert(cpu_is_stopped(cpu
) || qemu_cpu_is_self(cpu
));
478 if (!env
->tsc_valid
) {
480 env
->tsc_valid
= !runstate_is_running();
483 hr
= whp_dispatch
.WHvGetVirtualProcessorRegisters(
484 whpx
->partition
, cpu
->cpu_index
,
486 RTL_NUMBER_OF(whpx_register_names
),
489 error_report("WHPX: Failed to get virtual processor context, hr=%08lx",
493 if (whpx_apic_in_platform()) {
495 * Fetch the TPR value from the emulated APIC. It may get overwritten
496 * below with the value from CR8 returned by
497 * WHvGetVirtualProcessorRegisters().
499 whpx_apic_get(x86_cpu
->apic_state
);
500 vcpu
->tpr
= whpx_apic_tpr_to_cr8(
501 cpu_get_apic_tpr(x86_cpu
->apic_state
));
506 /* Indexes for first 16 registers match between HV and QEMU definitions */
508 for (idx
= 0; idx
< CPU_NB_REGS
; idx
+= 1) {
509 env
->regs
[idx
] = vcxt
.values
[idx
].Reg64
;
513 /* Same goes for RIP and RFLAGS */
514 assert(whpx_register_names
[idx
] == WHvX64RegisterRip
);
515 env
->eip
= vcxt
.values
[idx
++].Reg64
;
516 assert(whpx_register_names
[idx
] == WHvX64RegisterRflags
);
517 env
->eflags
= vcxt
.values
[idx
++].Reg64
;
519 /* Translate 6+4 segment registers. HV and QEMU order matches */
520 assert(idx
== WHvX64RegisterEs
);
521 for (i
= 0; i
< 6; i
+= 1, idx
+= 1) {
522 env
->segs
[i
] = whpx_seg_h2q(&vcxt
.values
[idx
].Segment
);
525 assert(idx
== WHvX64RegisterLdtr
);
526 env
->ldt
= whpx_seg_h2q(&vcxt
.values
[idx
++].Segment
);
527 assert(idx
== WHvX64RegisterTr
);
528 env
->tr
= whpx_seg_h2q(&vcxt
.values
[idx
++].Segment
);
529 assert(idx
== WHvX64RegisterIdtr
);
530 env
->idt
.base
= vcxt
.values
[idx
].Table
.Base
;
531 env
->idt
.limit
= vcxt
.values
[idx
].Table
.Limit
;
533 assert(idx
== WHvX64RegisterGdtr
);
534 env
->gdt
.base
= vcxt
.values
[idx
].Table
.Base
;
535 env
->gdt
.limit
= vcxt
.values
[idx
].Table
.Limit
;
538 /* CR0, 2, 3, 4, 8 */
539 assert(whpx_register_names
[idx
] == WHvX64RegisterCr0
);
540 env
->cr
[0] = vcxt
.values
[idx
++].Reg64
;
541 assert(whpx_register_names
[idx
] == WHvX64RegisterCr2
);
542 env
->cr
[2] = vcxt
.values
[idx
++].Reg64
;
543 assert(whpx_register_names
[idx
] == WHvX64RegisterCr3
);
544 env
->cr
[3] = vcxt
.values
[idx
++].Reg64
;
545 assert(whpx_register_names
[idx
] == WHvX64RegisterCr4
);
546 env
->cr
[4] = vcxt
.values
[idx
++].Reg64
;
547 assert(whpx_register_names
[idx
] == WHvX64RegisterCr8
);
548 tpr
= vcxt
.values
[idx
++].Reg64
;
549 if (tpr
!= vcpu
->tpr
) {
551 cpu_set_apic_tpr(x86_cpu
->apic_state
, tpr
);
554 /* 8 Debug Registers - Skipped */
556 /* 16 XMM registers */
557 assert(whpx_register_names
[idx
] == WHvX64RegisterXmm0
);
559 for (i
= 0; i
< sizeof(env
->xmm_regs
) / sizeof(ZMMReg
); i
+= 1, idx
+= 1) {
560 env
->xmm_regs
[i
].ZMM_Q(0) = vcxt
.values
[idx
].Reg128
.Low64
;
561 env
->xmm_regs
[i
].ZMM_Q(1) = vcxt
.values
[idx
].Reg128
.High64
;
566 assert(whpx_register_names
[idx
] == WHvX64RegisterFpMmx0
);
567 for (i
= 0; i
< 8; i
+= 1, idx
+= 1) {
568 env
->fpregs
[i
].mmx
.MMX_Q(0) = vcxt
.values
[idx
].Fp
.AsUINT128
.Low64
;
569 /* env->fpregs[i].mmx.MMX_Q(1) =
570 vcxt.values[idx].Fp.AsUINT128.High64;
574 /* FP control status register */
575 assert(whpx_register_names
[idx
] == WHvX64RegisterFpControlStatus
);
576 env
->fpuc
= vcxt
.values
[idx
].FpControlStatus
.FpControl
;
577 env
->fpstt
= (vcxt
.values
[idx
].FpControlStatus
.FpStatus
>> 11) & 0x7;
578 env
->fpus
= vcxt
.values
[idx
].FpControlStatus
.FpStatus
& ~0x3800;
579 for (i
= 0; i
< 8; ++i
) {
580 env
->fptags
[i
] = !((vcxt
.values
[idx
].FpControlStatus
.FpTag
>> i
) & 1);
582 env
->fpop
= vcxt
.values
[idx
].FpControlStatus
.LastFpOp
;
583 env
->fpip
= vcxt
.values
[idx
].FpControlStatus
.LastFpRip
;
586 /* XMM control status register */
587 assert(whpx_register_names
[idx
] == WHvX64RegisterXmmControlStatus
);
588 env
->mxcsr
= vcxt
.values
[idx
].XmmControlStatus
.XmmStatusControl
;
592 assert(whpx_register_names
[idx
] == WHvX64RegisterEfer
);
593 env
->efer
= vcxt
.values
[idx
++].Reg64
;
595 assert(whpx_register_names
[idx
] == WHvX64RegisterKernelGsBase
);
596 env
->kernelgsbase
= vcxt
.values
[idx
++].Reg64
;
599 assert(whpx_register_names
[idx
] == WHvX64RegisterApicBase
);
600 apic_base
= vcxt
.values
[idx
++].Reg64
;
601 if (apic_base
!= vcpu
->apic_base
) {
602 vcpu
->apic_base
= apic_base
;
603 cpu_set_apic_base(x86_cpu
->apic_state
, vcpu
->apic_base
);
606 /* WHvX64RegisterPat - Skipped */
608 assert(whpx_register_names
[idx
] == WHvX64RegisterSysenterCs
);
609 env
->sysenter_cs
= vcxt
.values
[idx
++].Reg64
;
610 assert(whpx_register_names
[idx
] == WHvX64RegisterSysenterEip
);
611 env
->sysenter_eip
= vcxt
.values
[idx
++].Reg64
;
612 assert(whpx_register_names
[idx
] == WHvX64RegisterSysenterEsp
);
613 env
->sysenter_esp
= vcxt
.values
[idx
++].Reg64
;
614 assert(whpx_register_names
[idx
] == WHvX64RegisterStar
);
615 env
->star
= vcxt
.values
[idx
++].Reg64
;
617 assert(whpx_register_names
[idx
] == WHvX64RegisterLstar
);
618 env
->lstar
= vcxt
.values
[idx
++].Reg64
;
619 assert(whpx_register_names
[idx
] == WHvX64RegisterCstar
);
620 env
->cstar
= vcxt
.values
[idx
++].Reg64
;
621 assert(whpx_register_names
[idx
] == WHvX64RegisterSfmask
);
622 env
->fmask
= vcxt
.values
[idx
++].Reg64
;
625 /* Interrupt / Event Registers - Skipped */
627 assert(idx
== RTL_NUMBER_OF(whpx_register_names
));
629 if (whpx_apic_in_platform()) {
630 whpx_apic_get(x86_cpu
->apic_state
);
633 x86_update_hflags(env
);
638 static HRESULT CALLBACK
whpx_emu_ioport_callback(
640 WHV_EMULATOR_IO_ACCESS_INFO
*IoAccess
)
642 MemTxAttrs attrs
= { 0 };
643 address_space_rw(&address_space_io
, IoAccess
->Port
, attrs
,
644 &IoAccess
->Data
, IoAccess
->AccessSize
,
645 IoAccess
->Direction
);
649 static HRESULT CALLBACK
whpx_emu_mmio_callback(
651 WHV_EMULATOR_MEMORY_ACCESS_INFO
*ma
)
653 cpu_physical_memory_rw(ma
->GpaAddress
, ma
->Data
, ma
->AccessSize
,
658 static HRESULT CALLBACK
whpx_emu_getreg_callback(
660 const WHV_REGISTER_NAME
*RegisterNames
,
661 UINT32 RegisterCount
,
662 WHV_REGISTER_VALUE
*RegisterValues
)
665 struct whpx_state
*whpx
= &whpx_global
;
666 CPUState
*cpu
= (CPUState
*)ctx
;
668 hr
= whp_dispatch
.WHvGetVirtualProcessorRegisters(
669 whpx
->partition
, cpu
->cpu_index
,
670 RegisterNames
, RegisterCount
,
673 error_report("WHPX: Failed to get virtual processor registers,"
680 static HRESULT CALLBACK
whpx_emu_setreg_callback(
682 const WHV_REGISTER_NAME
*RegisterNames
,
683 UINT32 RegisterCount
,
684 const WHV_REGISTER_VALUE
*RegisterValues
)
687 struct whpx_state
*whpx
= &whpx_global
;
688 CPUState
*cpu
= (CPUState
*)ctx
;
690 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
691 whpx
->partition
, cpu
->cpu_index
,
692 RegisterNames
, RegisterCount
,
695 error_report("WHPX: Failed to set virtual processor registers,"
700 * The emulator just successfully wrote the register state. We clear the
701 * dirty state so we avoid the double write on resume of the VP.
703 cpu
->vcpu_dirty
= false;
708 static HRESULT CALLBACK
whpx_emu_translate_callback(
710 WHV_GUEST_VIRTUAL_ADDRESS Gva
,
711 WHV_TRANSLATE_GVA_FLAGS TranslateFlags
,
712 WHV_TRANSLATE_GVA_RESULT_CODE
*TranslationResult
,
713 WHV_GUEST_PHYSICAL_ADDRESS
*Gpa
)
716 struct whpx_state
*whpx
= &whpx_global
;
717 CPUState
*cpu
= (CPUState
*)ctx
;
718 WHV_TRANSLATE_GVA_RESULT res
;
720 hr
= whp_dispatch
.WHvTranslateGva(whpx
->partition
, cpu
->cpu_index
,
721 Gva
, TranslateFlags
, &res
, Gpa
);
723 error_report("WHPX: Failed to translate GVA, hr=%08lx", hr
);
725 *TranslationResult
= res
.ResultCode
;
731 static const WHV_EMULATOR_CALLBACKS whpx_emu_callbacks
= {
732 .Size
= sizeof(WHV_EMULATOR_CALLBACKS
),
733 .WHvEmulatorIoPortCallback
= whpx_emu_ioport_callback
,
734 .WHvEmulatorMemoryCallback
= whpx_emu_mmio_callback
,
735 .WHvEmulatorGetVirtualProcessorRegisters
= whpx_emu_getreg_callback
,
736 .WHvEmulatorSetVirtualProcessorRegisters
= whpx_emu_setreg_callback
,
737 .WHvEmulatorTranslateGvaPage
= whpx_emu_translate_callback
,
740 static int whpx_handle_mmio(CPUState
*cpu
, WHV_MEMORY_ACCESS_CONTEXT
*ctx
)
743 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
744 WHV_EMULATOR_STATUS emu_status
;
746 hr
= whp_dispatch
.WHvEmulatorTryMmioEmulation(
748 &vcpu
->exit_ctx
.VpContext
, ctx
,
751 error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr
);
755 if (!emu_status
.EmulationSuccessful
) {
756 error_report("WHPX: Failed to emulate MMIO access with"
757 " EmulatorReturnStatus: %u", emu_status
.AsUINT32
);
764 static int whpx_handle_portio(CPUState
*cpu
,
765 WHV_X64_IO_PORT_ACCESS_CONTEXT
*ctx
)
768 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
769 WHV_EMULATOR_STATUS emu_status
;
771 hr
= whp_dispatch
.WHvEmulatorTryIoEmulation(
773 &vcpu
->exit_ctx
.VpContext
, ctx
,
776 error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr
);
780 if (!emu_status
.EmulationSuccessful
) {
781 error_report("WHPX: Failed to emulate PortIO access with"
782 " EmulatorReturnStatus: %u", emu_status
.AsUINT32
);
789 static int whpx_handle_halt(CPUState
*cpu
)
791 CPUX86State
*env
= cpu
->env_ptr
;
794 qemu_mutex_lock_iothread();
795 if (!((cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
796 (env
->eflags
& IF_MASK
)) &&
797 !(cpu
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
798 cpu
->exception_index
= EXCP_HLT
;
802 qemu_mutex_unlock_iothread();
807 static void whpx_vcpu_pre_run(CPUState
*cpu
)
810 struct whpx_state
*whpx
= &whpx_global
;
811 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
812 CPUX86State
*env
= cpu
->env_ptr
;
813 X86CPU
*x86_cpu
= X86_CPU(cpu
);
816 WHV_X64_PENDING_INTERRUPTION_REGISTER new_int
;
817 UINT32 reg_count
= 0;
818 WHV_REGISTER_VALUE reg_values
[3];
819 WHV_REGISTER_NAME reg_names
[3];
821 memset(&new_int
, 0, sizeof(new_int
));
822 memset(reg_values
, 0, sizeof(reg_values
));
824 qemu_mutex_lock_iothread();
827 if (!vcpu
->interruption_pending
&&
828 cpu
->interrupt_request
& (CPU_INTERRUPT_NMI
| CPU_INTERRUPT_SMI
)) {
829 if (cpu
->interrupt_request
& CPU_INTERRUPT_NMI
) {
830 cpu
->interrupt_request
&= ~CPU_INTERRUPT_NMI
;
831 vcpu
->interruptable
= false;
832 new_int
.InterruptionType
= WHvX64PendingNmi
;
833 new_int
.InterruptionPending
= 1;
834 new_int
.InterruptionVector
= 2;
836 if (cpu
->interrupt_request
& CPU_INTERRUPT_SMI
) {
837 cpu
->interrupt_request
&= ~CPU_INTERRUPT_SMI
;
842 * Force the VCPU out of its inner loop to process any INIT requests or
843 * commit pending TPR access.
845 if (cpu
->interrupt_request
& (CPU_INTERRUPT_INIT
| CPU_INTERRUPT_TPR
)) {
846 if ((cpu
->interrupt_request
& CPU_INTERRUPT_INIT
) &&
847 !(env
->hflags
& HF_SMM_MASK
)) {
848 cpu
->exit_request
= 1;
850 if (cpu
->interrupt_request
& CPU_INTERRUPT_TPR
) {
851 cpu
->exit_request
= 1;
855 /* Get pending hard interruption or replay one that was overwritten */
856 if (!whpx_apic_in_platform()) {
857 if (!vcpu
->interruption_pending
&&
858 vcpu
->interruptable
&& (env
->eflags
& IF_MASK
)) {
859 assert(!new_int
.InterruptionPending
);
860 if (cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) {
861 cpu
->interrupt_request
&= ~CPU_INTERRUPT_HARD
;
862 irq
= cpu_get_pic_interrupt(env
);
864 new_int
.InterruptionType
= WHvX64PendingInterrupt
;
865 new_int
.InterruptionPending
= 1;
866 new_int
.InterruptionVector
= irq
;
871 /* Setup interrupt state if new one was prepared */
872 if (new_int
.InterruptionPending
) {
873 reg_values
[reg_count
].PendingInterruption
= new_int
;
874 reg_names
[reg_count
] = WHvRegisterPendingInterruption
;
877 } else if (vcpu
->ready_for_pic_interrupt
&&
878 (cpu
->interrupt_request
& CPU_INTERRUPT_HARD
)) {
879 cpu
->interrupt_request
&= ~CPU_INTERRUPT_HARD
;
880 irq
= cpu_get_pic_interrupt(env
);
882 reg_names
[reg_count
] = WHvRegisterPendingEvent
;
883 reg_values
[reg_count
].ExtIntEvent
= (WHV_X64_PENDING_EXT_INT_EVENT
)
886 .EventType
= WHvX64PendingEventExtInt
,
893 /* Sync the TPR to the CR8 if was modified during the intercept */
894 tpr
= cpu_get_apic_tpr(x86_cpu
->apic_state
);
895 if (tpr
!= vcpu
->tpr
) {
897 reg_values
[reg_count
].Reg64
= tpr
;
898 cpu
->exit_request
= 1;
899 reg_names
[reg_count
] = WHvX64RegisterCr8
;
903 /* Update the state of the interrupt delivery notification */
904 if (!vcpu
->window_registered
&&
905 cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) {
906 reg_values
[reg_count
].DeliverabilityNotifications
=
907 (WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER
) {
908 .InterruptNotification
= 1
910 vcpu
->window_registered
= 1;
911 reg_names
[reg_count
] = WHvX64RegisterDeliverabilityNotifications
;
915 qemu_mutex_unlock_iothread();
916 vcpu
->ready_for_pic_interrupt
= false;
919 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
920 whpx
->partition
, cpu
->cpu_index
,
921 reg_names
, reg_count
, reg_values
);
923 error_report("WHPX: Failed to set interrupt state registers,"
931 static void whpx_vcpu_post_run(CPUState
*cpu
)
933 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
934 CPUX86State
*env
= cpu
->env_ptr
;
935 X86CPU
*x86_cpu
= X86_CPU(cpu
);
937 env
->eflags
= vcpu
->exit_ctx
.VpContext
.Rflags
;
939 uint64_t tpr
= vcpu
->exit_ctx
.VpContext
.Cr8
;
940 if (vcpu
->tpr
!= tpr
) {
942 qemu_mutex_lock_iothread();
943 cpu_set_apic_tpr(x86_cpu
->apic_state
, vcpu
->tpr
);
944 qemu_mutex_unlock_iothread();
947 vcpu
->interruption_pending
=
948 vcpu
->exit_ctx
.VpContext
.ExecutionState
.InterruptionPending
;
950 vcpu
->interruptable
=
951 !vcpu
->exit_ctx
.VpContext
.ExecutionState
.InterruptShadow
;
956 static void whpx_vcpu_process_async_events(CPUState
*cpu
)
958 CPUX86State
*env
= cpu
->env_ptr
;
959 X86CPU
*x86_cpu
= X86_CPU(cpu
);
960 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
962 if ((cpu
->interrupt_request
& CPU_INTERRUPT_INIT
) &&
963 !(env
->hflags
& HF_SMM_MASK
)) {
964 whpx_cpu_synchronize_state(cpu
);
965 do_cpu_init(x86_cpu
);
966 vcpu
->interruptable
= true;
969 if (cpu
->interrupt_request
& CPU_INTERRUPT_POLL
) {
970 cpu
->interrupt_request
&= ~CPU_INTERRUPT_POLL
;
971 apic_poll_irq(x86_cpu
->apic_state
);
974 if (((cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
975 (env
->eflags
& IF_MASK
)) ||
976 (cpu
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
980 if (cpu
->interrupt_request
& CPU_INTERRUPT_SIPI
) {
981 whpx_cpu_synchronize_state(cpu
);
982 do_cpu_sipi(x86_cpu
);
985 if (cpu
->interrupt_request
& CPU_INTERRUPT_TPR
) {
986 cpu
->interrupt_request
&= ~CPU_INTERRUPT_TPR
;
987 whpx_cpu_synchronize_state(cpu
);
988 apic_handle_tpr_access_report(x86_cpu
->apic_state
, env
->eip
,
989 env
->tpr_access_type
);
995 static int whpx_vcpu_run(CPUState
*cpu
)
998 struct whpx_state
*whpx
= &whpx_global
;
999 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
1002 whpx_vcpu_process_async_events(cpu
);
1003 if (cpu
->halted
&& !whpx_apic_in_platform()) {
1004 cpu
->exception_index
= EXCP_HLT
;
1005 qatomic_set(&cpu
->exit_request
, false);
1009 qemu_mutex_unlock_iothread();
1010 cpu_exec_start(cpu
);
1013 if (cpu
->vcpu_dirty
) {
1014 whpx_set_registers(cpu
, WHPX_SET_RUNTIME_STATE
);
1015 cpu
->vcpu_dirty
= false;
1018 whpx_vcpu_pre_run(cpu
);
1020 if (qatomic_read(&cpu
->exit_request
)) {
1021 whpx_vcpu_kick(cpu
);
1024 hr
= whp_dispatch
.WHvRunVirtualProcessor(
1025 whpx
->partition
, cpu
->cpu_index
,
1026 &vcpu
->exit_ctx
, sizeof(vcpu
->exit_ctx
));
1029 error_report("WHPX: Failed to exec a virtual processor,"
1035 whpx_vcpu_post_run(cpu
);
1037 switch (vcpu
->exit_ctx
.ExitReason
) {
1038 case WHvRunVpExitReasonMemoryAccess
:
1039 ret
= whpx_handle_mmio(cpu
, &vcpu
->exit_ctx
.MemoryAccess
);
1042 case WHvRunVpExitReasonX64IoPortAccess
:
1043 ret
= whpx_handle_portio(cpu
, &vcpu
->exit_ctx
.IoPortAccess
);
1046 case WHvRunVpExitReasonX64InterruptWindow
:
1047 vcpu
->ready_for_pic_interrupt
= 1;
1048 vcpu
->window_registered
= 0;
1052 case WHvRunVpExitReasonX64ApicEoi
:
1053 assert(whpx_apic_in_platform());
1054 ioapic_eoi_broadcast(vcpu
->exit_ctx
.ApicEoi
.InterruptVector
);
1057 case WHvRunVpExitReasonX64Halt
:
1058 ret
= whpx_handle_halt(cpu
);
1061 case WHvRunVpExitReasonX64ApicInitSipiTrap
: {
1062 WHV_INTERRUPT_CONTROL ipi
= {0};
1063 uint64_t icr
= vcpu
->exit_ctx
.ApicInitSipi
.ApicIcr
;
1064 uint32_t delivery_mode
=
1065 (icr
& APIC_ICR_DELIV_MOD
) >> APIC_ICR_DELIV_MOD_SHIFT
;
1066 int dest_shorthand
=
1067 (icr
& APIC_ICR_DEST_SHORT
) >> APIC_ICR_DEST_SHORT_SHIFT
;
1068 bool broadcast
= false;
1069 bool include_self
= false;
1072 /* We only registered for INIT and SIPI exits. */
1073 if ((delivery_mode
!= APIC_DM_INIT
) &&
1074 (delivery_mode
!= APIC_DM_SIPI
)) {
1076 "WHPX: Unexpected APIC exit that is not a INIT or SIPI");
1080 if (delivery_mode
== APIC_DM_INIT
) {
1081 ipi
.Type
= WHvX64InterruptTypeInit
;
1083 ipi
.Type
= WHvX64InterruptTypeSipi
;
1086 ipi
.DestinationMode
=
1087 ((icr
& APIC_ICR_DEST_MOD
) >> APIC_ICR_DEST_MOD_SHIFT
) ?
1088 WHvX64InterruptDestinationModeLogical
:
1089 WHvX64InterruptDestinationModePhysical
;
1092 ((icr
& APIC_ICR_TRIGGER_MOD
) >> APIC_ICR_TRIGGER_MOD_SHIFT
) ?
1093 WHvX64InterruptTriggerModeLevel
:
1094 WHvX64InterruptTriggerModeEdge
;
1096 ipi
.Vector
= icr
& APIC_VECTOR_MASK
;
1097 switch (dest_shorthand
) {
1098 /* no shorthand. Bits 56-63 contain the destination. */
1100 ipi
.Destination
= (icr
>> 56) & APIC_VECTOR_MASK
;
1101 hr
= whp_dispatch
.WHvRequestInterrupt(whpx
->partition
,
1104 error_report("WHPX: Failed to request interrupt hr=%08lx",
1112 include_self
= true;
1115 /* broadcast, including self */
1118 include_self
= true;
1121 /* broadcast, excluding self */
1127 if (!broadcast
&& !include_self
) {
1131 for (i
= 0; i
<= max_vcpu_index
; i
++) {
1132 if (i
== cpu
->cpu_index
&& !include_self
) {
1137 * Assuming that APIC Ids are identity mapped since
1138 * WHvX64RegisterApicId & WHvX64RegisterInitialApicId registers
1139 * are not handled yet and the hypervisor doesn't allow the
1140 * guest to modify the APIC ID.
1142 ipi
.Destination
= i
;
1143 hr
= whp_dispatch
.WHvRequestInterrupt(whpx
->partition
,
1147 "WHPX: Failed to request SIPI for %d, hr=%08lx",
1155 case WHvRunVpExitReasonCanceled
:
1156 cpu
->exception_index
= EXCP_INTERRUPT
;
1160 case WHvRunVpExitReasonX64MsrAccess
: {
1161 WHV_REGISTER_VALUE reg_values
[3] = {0};
1162 WHV_REGISTER_NAME reg_names
[3];
1165 reg_names
[0] = WHvX64RegisterRip
;
1166 reg_names
[1] = WHvX64RegisterRax
;
1167 reg_names
[2] = WHvX64RegisterRdx
;
1169 reg_values
[0].Reg64
=
1170 vcpu
->exit_ctx
.VpContext
.Rip
+
1171 vcpu
->exit_ctx
.VpContext
.InstructionLength
;
1174 * For all unsupported MSR access we:
1178 reg_count
= vcpu
->exit_ctx
.MsrAccess
.AccessInfo
.IsWrite
?
1181 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
1184 reg_names
, reg_count
,
1188 error_report("WHPX: Failed to set MsrAccess state "
1189 " registers, hr=%08lx", hr
);
1194 case WHvRunVpExitReasonX64Cpuid
: {
1195 WHV_REGISTER_VALUE reg_values
[5];
1196 WHV_REGISTER_NAME reg_names
[5];
1197 UINT32 reg_count
= 5;
1198 UINT64 cpuid_fn
, rip
= 0, rax
= 0, rcx
= 0, rdx
= 0, rbx
= 0;
1199 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1200 CPUX86State
*env
= &x86_cpu
->env
;
1202 memset(reg_values
, 0, sizeof(reg_values
));
1204 rip
= vcpu
->exit_ctx
.VpContext
.Rip
+
1205 vcpu
->exit_ctx
.VpContext
.InstructionLength
;
1206 cpuid_fn
= vcpu
->exit_ctx
.CpuidAccess
.Rax
;
1209 * Ideally, these should be supplied to the hypervisor during VCPU
1210 * initialization and it should be able to satisfy this request.
1211 * But, currently, WHPX doesn't support setting CPUID values in the
1212 * hypervisor once the partition has been setup, which is too late
1213 * since VCPUs are realized later. For now, use the values from
1214 * QEMU to satisfy these requests, until WHPX adds support for
1215 * being able to set these values in the hypervisor at runtime.
1217 cpu_x86_cpuid(env
, cpuid_fn
, 0, (UINT32
*)&rax
, (UINT32
*)&rbx
,
1218 (UINT32
*)&rcx
, (UINT32
*)&rdx
);
1221 /* Expose the vmware cpu frequency cpuid leaf */
1223 rbx
= rcx
= rdx
= 0;
1228 rbx
= env
->apic_bus_freq
/ 1000; /* Hz to KHz */
1233 /* Remove any support of OSVW */
1234 rcx
&= ~CPUID_EXT3_OSVW
;
1238 reg_names
[0] = WHvX64RegisterRip
;
1239 reg_names
[1] = WHvX64RegisterRax
;
1240 reg_names
[2] = WHvX64RegisterRcx
;
1241 reg_names
[3] = WHvX64RegisterRdx
;
1242 reg_names
[4] = WHvX64RegisterRbx
;
1244 reg_values
[0].Reg64
= rip
;
1245 reg_values
[1].Reg64
= rax
;
1246 reg_values
[2].Reg64
= rcx
;
1247 reg_values
[3].Reg64
= rdx
;
1248 reg_values
[4].Reg64
= rbx
;
1250 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
1251 whpx
->partition
, cpu
->cpu_index
,
1257 error_report("WHPX: Failed to set CpuidAccess state registers,"
1263 case WHvRunVpExitReasonNone
:
1264 case WHvRunVpExitReasonUnrecoverableException
:
1265 case WHvRunVpExitReasonInvalidVpRegisterValue
:
1266 case WHvRunVpExitReasonUnsupportedFeature
:
1267 case WHvRunVpExitReasonException
:
1269 error_report("WHPX: Unexpected VP exit code %d",
1270 vcpu
->exit_ctx
.ExitReason
);
1271 whpx_get_registers(cpu
);
1272 qemu_mutex_lock_iothread();
1273 qemu_system_guest_panicked(cpu_get_crash_info(cpu
));
1274 qemu_mutex_unlock_iothread();
1281 qemu_mutex_lock_iothread();
1284 qatomic_set(&cpu
->exit_request
, false);
1289 static void do_whpx_cpu_synchronize_state(CPUState
*cpu
, run_on_cpu_data arg
)
1291 if (!cpu
->vcpu_dirty
) {
1292 whpx_get_registers(cpu
);
1293 cpu
->vcpu_dirty
= true;
1297 static void do_whpx_cpu_synchronize_post_reset(CPUState
*cpu
,
1298 run_on_cpu_data arg
)
1300 whpx_set_registers(cpu
, WHPX_SET_RESET_STATE
);
1301 cpu
->vcpu_dirty
= false;
1304 static void do_whpx_cpu_synchronize_post_init(CPUState
*cpu
,
1305 run_on_cpu_data arg
)
1307 whpx_set_registers(cpu
, WHPX_SET_FULL_STATE
);
1308 cpu
->vcpu_dirty
= false;
1311 static void do_whpx_cpu_synchronize_pre_loadvm(CPUState
*cpu
,
1312 run_on_cpu_data arg
)
1314 cpu
->vcpu_dirty
= true;
1321 void whpx_cpu_synchronize_state(CPUState
*cpu
)
1323 if (!cpu
->vcpu_dirty
) {
1324 run_on_cpu(cpu
, do_whpx_cpu_synchronize_state
, RUN_ON_CPU_NULL
);
1328 void whpx_cpu_synchronize_post_reset(CPUState
*cpu
)
1330 run_on_cpu(cpu
, do_whpx_cpu_synchronize_post_reset
, RUN_ON_CPU_NULL
);
1333 void whpx_cpu_synchronize_post_init(CPUState
*cpu
)
1335 run_on_cpu(cpu
, do_whpx_cpu_synchronize_post_init
, RUN_ON_CPU_NULL
);
1338 void whpx_cpu_synchronize_pre_loadvm(CPUState
*cpu
)
1340 run_on_cpu(cpu
, do_whpx_cpu_synchronize_pre_loadvm
, RUN_ON_CPU_NULL
);
1347 static Error
*whpx_migration_blocker
;
1349 static void whpx_cpu_update_state(void *opaque
, bool running
, RunState state
)
1351 CPUX86State
*env
= opaque
;
1354 env
->tsc_valid
= false;
1358 int whpx_init_vcpu(CPUState
*cpu
)
1361 struct whpx_state
*whpx
= &whpx_global
;
1362 struct whpx_vcpu
*vcpu
= NULL
;
1363 Error
*local_error
= NULL
;
1364 CPUX86State
*env
= cpu
->env_ptr
;
1365 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1369 /* Add migration blockers for all unsupported features of the
1370 * Windows Hypervisor Platform
1372 if (whpx_migration_blocker
== NULL
) {
1373 error_setg(&whpx_migration_blocker
,
1374 "State blocked due to non-migratable CPUID feature support,"
1375 "dirty memory tracking support, and XSAVE/XRSTOR support");
1377 if (migrate_add_blocker(whpx_migration_blocker
, &local_error
) < 0) {
1378 error_report_err(local_error
);
1379 error_free(whpx_migration_blocker
);
1385 vcpu
= g_new0(struct whpx_vcpu
, 1);
1388 error_report("WHPX: Failed to allocte VCPU context.");
1393 hr
= whp_dispatch
.WHvEmulatorCreateEmulator(
1394 &whpx_emu_callbacks
,
1397 error_report("WHPX: Failed to setup instruction completion support,"
1403 hr
= whp_dispatch
.WHvCreateVirtualProcessor(
1404 whpx
->partition
, cpu
->cpu_index
, 0);
1406 error_report("WHPX: Failed to create a virtual processor,"
1408 whp_dispatch
.WHvEmulatorDestroyEmulator(vcpu
->emulator
);
1414 * vcpu's TSC frequency is either specified by user, or use the value
1415 * provided by Hyper-V if the former is not present. In the latter case, we
1416 * query it from Hyper-V and record in env->tsc_khz, so that vcpu's TSC
1417 * frequency can be migrated later via this field.
1419 if (!env
->tsc_khz
) {
1420 hr
= whp_dispatch
.WHvGetCapability(
1421 WHvCapabilityCodeProcessorClockFrequency
, &freq
, sizeof(freq
),
1423 if (hr
!= WHV_E_UNKNOWN_CAPABILITY
) {
1425 printf("WHPX: Failed to query tsc frequency, hr=0x%08lx\n", hr
);
1427 env
->tsc_khz
= freq
/ 1000; /* Hz to KHz */
1432 env
->apic_bus_freq
= HYPERV_APIC_BUS_FREQUENCY
;
1433 hr
= whp_dispatch
.WHvGetCapability(
1434 WHvCapabilityCodeInterruptClockFrequency
, &freq
, sizeof(freq
), NULL
);
1435 if (hr
!= WHV_E_UNKNOWN_CAPABILITY
) {
1437 printf("WHPX: Failed to query apic bus frequency hr=0x%08lx\n", hr
);
1439 env
->apic_bus_freq
= freq
;
1444 * If the vmware cpuid frequency leaf option is set, and we have a valid
1445 * tsc value, trap the corresponding cpuid's.
1447 if (x86_cpu
->vmware_cpuid_freq
&& env
->tsc_khz
) {
1448 UINT32 cpuidExitList
[] = {1, 0x80000001, 0x40000000, 0x40000010};
1450 hr
= whp_dispatch
.WHvSetPartitionProperty(
1452 WHvPartitionPropertyCodeCpuidExitList
,
1454 RTL_NUMBER_OF(cpuidExitList
) * sizeof(UINT32
));
1457 error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
1464 vcpu
->interruptable
= true;
1465 cpu
->vcpu_dirty
= true;
1466 cpu
->hax_vcpu
= (struct hax_vcpu_state
*)vcpu
;
1467 max_vcpu_index
= max(max_vcpu_index
, cpu
->cpu_index
);
1468 qemu_add_vm_change_state_handler(whpx_cpu_update_state
, cpu
->env_ptr
);
1478 int whpx_vcpu_exec(CPUState
*cpu
)
1484 if (cpu
->exception_index
>= EXCP_INTERRUPT
) {
1485 ret
= cpu
->exception_index
;
1486 cpu
->exception_index
= -1;
1490 fatal
= whpx_vcpu_run(cpu
);
1493 error_report("WHPX: Failed to exec a virtual processor");
1501 void whpx_destroy_vcpu(CPUState
*cpu
)
1503 struct whpx_state
*whpx
= &whpx_global
;
1504 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
1506 whp_dispatch
.WHvDeleteVirtualProcessor(whpx
->partition
, cpu
->cpu_index
);
1507 whp_dispatch
.WHvEmulatorDestroyEmulator(vcpu
->emulator
);
1508 g_free(cpu
->hax_vcpu
);
1512 void whpx_vcpu_kick(CPUState
*cpu
)
1514 struct whpx_state
*whpx
= &whpx_global
;
1515 whp_dispatch
.WHvCancelRunVirtualProcessor(
1516 whpx
->partition
, cpu
->cpu_index
, 0);
1523 static void whpx_update_mapping(hwaddr start_pa
, ram_addr_t size
,
1524 void *host_va
, int add
, int rom
,
1527 struct whpx_state
*whpx
= &whpx_global
;
1532 printf("WHPX: ADD PA:%p Size:%p, Host:%p, %s, '%s'\n",
1533 (void*)start_pa, (void*)size, host_va,
1534 (rom ? "ROM" : "RAM"), name);
1536 printf("WHPX: DEL PA:%p Size:%p, Host:%p, '%s'\n",
1537 (void*)start_pa, (void*)size, host_va, name);
1542 hr
= whp_dispatch
.WHvMapGpaRange(whpx
->partition
,
1546 (WHvMapGpaRangeFlagRead
|
1547 WHvMapGpaRangeFlagExecute
|
1548 (rom
? 0 : WHvMapGpaRangeFlagWrite
)));
1550 hr
= whp_dispatch
.WHvUnmapGpaRange(whpx
->partition
,
1556 error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes,"
1557 " Host:%p, hr=%08lx",
1558 (add
? "MAP" : "UNMAP"), name
,
1559 (void *)(uintptr_t)start_pa
, (void *)size
, host_va
, hr
);
1563 static void whpx_process_section(MemoryRegionSection
*section
, int add
)
1565 MemoryRegion
*mr
= section
->mr
;
1566 hwaddr start_pa
= section
->offset_within_address_space
;
1567 ram_addr_t size
= int128_get64(section
->size
);
1571 if (!memory_region_is_ram(mr
)) {
1575 delta
= qemu_real_host_page_size() - (start_pa
& ~qemu_real_host_page_mask());
1576 delta
&= ~qemu_real_host_page_mask();
1582 size
&= qemu_real_host_page_mask();
1583 if (!size
|| (start_pa
& ~qemu_real_host_page_mask())) {
1587 host_va
= (uintptr_t)memory_region_get_ram_ptr(mr
)
1588 + section
->offset_within_region
+ delta
;
1590 whpx_update_mapping(start_pa
, size
, (void *)(uintptr_t)host_va
, add
,
1591 memory_region_is_rom(mr
), mr
->name
);
1594 static void whpx_region_add(MemoryListener
*listener
,
1595 MemoryRegionSection
*section
)
1597 memory_region_ref(section
->mr
);
1598 whpx_process_section(section
, 1);
1601 static void whpx_region_del(MemoryListener
*listener
,
1602 MemoryRegionSection
*section
)
1604 whpx_process_section(section
, 0);
1605 memory_region_unref(section
->mr
);
1608 static void whpx_transaction_begin(MemoryListener
*listener
)
1612 static void whpx_transaction_commit(MemoryListener
*listener
)
1616 static void whpx_log_sync(MemoryListener
*listener
,
1617 MemoryRegionSection
*section
)
1619 MemoryRegion
*mr
= section
->mr
;
1621 if (!memory_region_is_ram(mr
)) {
1625 memory_region_set_dirty(mr
, 0, int128_get64(section
->size
));
1628 static MemoryListener whpx_memory_listener
= {
1630 .begin
= whpx_transaction_begin
,
1631 .commit
= whpx_transaction_commit
,
1632 .region_add
= whpx_region_add
,
1633 .region_del
= whpx_region_del
,
1634 .log_sync
= whpx_log_sync
,
1638 static void whpx_memory_init(void)
1640 memory_listener_register(&whpx_memory_listener
, &address_space_memory
);
1644 * Load the functions from the given library, using the given handle. If a
1645 * handle is provided, it is used, otherwise the library is opened. The
1646 * handle will be updated on return with the opened one.
1648 static bool load_whp_dispatch_fns(HMODULE
*handle
,
1649 WHPFunctionList function_list
)
1651 HMODULE hLib
= *handle
;
1653 #define WINHV_PLATFORM_DLL "WinHvPlatform.dll"
1654 #define WINHV_EMULATION_DLL "WinHvEmulation.dll"
1655 #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \
1656 whp_dispatch.function_name = \
1657 (function_name ## _t)GetProcAddress(hLib, #function_name); \
1659 #define WHP_LOAD_FIELD(return_type, function_name, signature) \
1660 whp_dispatch.function_name = \
1661 (function_name ## _t)GetProcAddress(hLib, #function_name); \
1662 if (!whp_dispatch.function_name) { \
1663 error_report("Could not load function %s", #function_name); \
1667 #define WHP_LOAD_LIB(lib_name, handle_lib) \
1668 if (!handle_lib) { \
1669 handle_lib = LoadLibrary(lib_name); \
1670 if (!handle_lib) { \
1671 error_report("Could not load library %s.", lib_name); \
1676 switch (function_list) {
1677 case WINHV_PLATFORM_FNS_DEFAULT
:
1678 WHP_LOAD_LIB(WINHV_PLATFORM_DLL
, hLib
)
1679 LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD
)
1682 case WINHV_EMULATION_FNS_DEFAULT
:
1683 WHP_LOAD_LIB(WINHV_EMULATION_DLL
, hLib
)
1684 LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD
)
1687 case WINHV_PLATFORM_FNS_SUPPLEMENTAL
:
1688 WHP_LOAD_LIB(WINHV_PLATFORM_DLL
, hLib
)
1689 LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL
)
1704 static void whpx_set_kernel_irqchip(Object
*obj
, Visitor
*v
,
1705 const char *name
, void *opaque
,
1708 struct whpx_state
*whpx
= &whpx_global
;
1711 if (!visit_type_OnOffSplit(v
, name
, &mode
, errp
)) {
1716 case ON_OFF_SPLIT_ON
:
1717 whpx
->kernel_irqchip_allowed
= true;
1718 whpx
->kernel_irqchip_required
= true;
1721 case ON_OFF_SPLIT_OFF
:
1722 whpx
->kernel_irqchip_allowed
= false;
1723 whpx
->kernel_irqchip_required
= false;
1726 case ON_OFF_SPLIT_SPLIT
:
1727 error_setg(errp
, "WHPX: split irqchip currently not supported");
1728 error_append_hint(errp
,
1729 "Try without kernel-irqchip or with kernel-irqchip=on|off");
1734 * The value was checked in visit_type_OnOffSplit() above. If
1735 * we get here, then something is wrong in QEMU.
1745 static int whpx_accel_init(MachineState
*ms
)
1747 struct whpx_state
*whpx
;
1750 WHV_CAPABILITY whpx_cap
;
1751 UINT32 whpx_cap_size
;
1752 WHV_PARTITION_PROPERTY prop
;
1753 UINT32 cpuidExitList
[] = {1, 0x80000001};
1754 WHV_CAPABILITY_FEATURES features
= {0};
1756 whpx
= &whpx_global
;
1758 if (!init_whp_dispatch()) {
1763 whpx
->mem_quota
= ms
->ram_size
;
1765 hr
= whp_dispatch
.WHvGetCapability(
1766 WHvCapabilityCodeHypervisorPresent
, &whpx_cap
,
1767 sizeof(whpx_cap
), &whpx_cap_size
);
1768 if (FAILED(hr
) || !whpx_cap
.HypervisorPresent
) {
1769 error_report("WHPX: No accelerator found, hr=%08lx", hr
);
1774 hr
= whp_dispatch
.WHvGetCapability(
1775 WHvCapabilityCodeFeatures
, &features
, sizeof(features
), NULL
);
1777 error_report("WHPX: Failed to query capabilities, hr=%08lx", hr
);
1782 hr
= whp_dispatch
.WHvCreatePartition(&whpx
->partition
);
1784 error_report("WHPX: Failed to create partition, hr=%08lx", hr
);
1789 memset(&prop
, 0, sizeof(WHV_PARTITION_PROPERTY
));
1790 prop
.ProcessorCount
= ms
->smp
.cpus
;
1791 hr
= whp_dispatch
.WHvSetPartitionProperty(
1793 WHvPartitionPropertyCodeProcessorCount
,
1795 sizeof(WHV_PARTITION_PROPERTY
));
1798 error_report("WHPX: Failed to set partition core count to %d,"
1799 " hr=%08lx", ms
->smp
.cores
, hr
);
1805 * Error out if WHP doesn't support apic emulation and user is requiring
1808 if (whpx
->kernel_irqchip_required
&& (!features
.LocalApicEmulation
||
1809 !whp_dispatch
.WHvSetVirtualProcessorInterruptControllerState2
)) {
1810 error_report("WHPX: kernel irqchip requested, but unavailable. "
1811 "Try without kernel-irqchip or with kernel-irqchip=off");
1816 if (whpx
->kernel_irqchip_allowed
&& features
.LocalApicEmulation
&&
1817 whp_dispatch
.WHvSetVirtualProcessorInterruptControllerState2
) {
1818 WHV_X64_LOCAL_APIC_EMULATION_MODE mode
=
1819 WHvX64LocalApicEmulationModeXApic
;
1820 printf("WHPX: setting APIC emulation mode in the hypervisor\n");
1821 hr
= whp_dispatch
.WHvSetPartitionProperty(
1823 WHvPartitionPropertyCodeLocalApicEmulationMode
,
1827 error_report("WHPX: Failed to enable kernel irqchip hr=%08lx", hr
);
1828 if (whpx
->kernel_irqchip_required
) {
1829 error_report("WHPX: kernel irqchip requested, but unavailable");
1834 whpx
->apic_in_platform
= true;
1838 /* Register for MSR and CPUID exits */
1839 memset(&prop
, 0, sizeof(WHV_PARTITION_PROPERTY
));
1840 prop
.ExtendedVmExits
.X64MsrExit
= 1;
1841 prop
.ExtendedVmExits
.X64CpuidExit
= 1;
1842 if (whpx_apic_in_platform()) {
1843 prop
.ExtendedVmExits
.X64ApicInitSipiExitTrap
= 1;
1846 hr
= whp_dispatch
.WHvSetPartitionProperty(
1848 WHvPartitionPropertyCodeExtendedVmExits
,
1850 sizeof(WHV_PARTITION_PROPERTY
));
1852 error_report("WHPX: Failed to enable MSR & CPUIDexit, hr=%08lx", hr
);
1857 hr
= whp_dispatch
.WHvSetPartitionProperty(
1859 WHvPartitionPropertyCodeCpuidExitList
,
1861 RTL_NUMBER_OF(cpuidExitList
) * sizeof(UINT32
));
1864 error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
1870 hr
= whp_dispatch
.WHvSetupPartition(whpx
->partition
);
1872 error_report("WHPX: Failed to setup partition, hr=%08lx", hr
);
1879 printf("Windows Hypervisor Platform accelerator is operational\n");
1884 if (NULL
!= whpx
->partition
) {
1885 whp_dispatch
.WHvDeletePartition(whpx
->partition
);
1886 whpx
->partition
= NULL
;
1892 int whpx_enabled(void)
1894 return whpx_allowed
;
1897 bool whpx_apic_in_platform(void) {
1898 return whpx_global
.apic_in_platform
;
1901 static void whpx_accel_class_init(ObjectClass
*oc
, void *data
)
1903 AccelClass
*ac
= ACCEL_CLASS(oc
);
1905 ac
->init_machine
= whpx_accel_init
;
1906 ac
->allowed
= &whpx_allowed
;
1908 object_class_property_add(oc
, "kernel-irqchip", "on|off|split",
1909 NULL
, whpx_set_kernel_irqchip
,
1911 object_class_property_set_description(oc
, "kernel-irqchip",
1912 "Configure WHPX in-kernel irqchip");
1915 static void whpx_accel_instance_init(Object
*obj
)
1917 struct whpx_state
*whpx
= &whpx_global
;
1919 memset(whpx
, 0, sizeof(struct whpx_state
));
1920 /* Turn on kernel-irqchip, by default */
1921 whpx
->kernel_irqchip_allowed
= true;
1924 static const TypeInfo whpx_accel_type
= {
1925 .name
= ACCEL_CLASS_NAME("whpx"),
1926 .parent
= TYPE_ACCEL
,
1927 .instance_init
= whpx_accel_instance_init
,
1928 .class_init
= whpx_accel_class_init
,
1931 static void whpx_type_init(void)
1933 type_register_static(&whpx_accel_type
);
1936 bool init_whp_dispatch(void)
1938 if (whp_dispatch_initialized
) {
1942 if (!load_whp_dispatch_fns(&hWinHvPlatform
, WINHV_PLATFORM_FNS_DEFAULT
)) {
1946 if (!load_whp_dispatch_fns(&hWinHvEmulation
, WINHV_EMULATION_FNS_DEFAULT
)) {
1950 assert(load_whp_dispatch_fns(&hWinHvPlatform
,
1951 WINHV_PLATFORM_FNS_SUPPLEMENTAL
));
1952 whp_dispatch_initialized
= true;
1956 if (hWinHvPlatform
) {
1957 FreeLibrary(hWinHvPlatform
);
1960 if (hWinHvEmulation
) {
1961 FreeLibrary(hWinHvEmulation
);
1967 type_init(whpx_type_init
);