]> git.proxmox.com Git - mirror_qemu.git/blame - target/i386/whpx/whpx-all.c
Merge tag 'pull-tcg-20221004' of https://gitlab.com/rth7680/qemu into staging
[mirror_qemu.git] / target / i386 / whpx / whpx-all.c
CommitLineData
812d49f2
JTV
1/*
2 * QEMU Windows Hypervisor Platform accelerator (WHPX)
3 *
4 * Copyright Microsoft Corp. 2017
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 */
10
11#include "qemu/osdep.h"
12#include "cpu.h"
13#include "exec/address-spaces.h"
812d49f2 14#include "exec/ioport.h"
d7482ffe 15#include "exec/gdbstub.h"
940e43aa 16#include "qemu/accel.h"
812d49f2 17#include "sysemu/whpx.h"
812d49f2 18#include "sysemu/cpus.h"
54d31236 19#include "sysemu/runstate.h"
812d49f2 20#include "qemu/main-loop.h"
754f2871 21#include "hw/boards.h"
faf20793
SM
22#include "hw/i386/ioapic.h"
23#include "hw/i386/apic_internal.h"
812d49f2 24#include "qemu/error-report.h"
812d49f2 25#include "qapi/error.h"
faf20793
SM
26#include "qapi/qapi-types-common.h"
27#include "qapi/qapi-visit-common.h"
812d49f2 28#include "migration/blocker.h"
faf20793 29#include <winerror.h>
812d49f2 30
9102c968 31#include "whpx-internal.h"
b86f59c7
CF
32#include "whpx-accel-ops.h"
33
34#include <WinHvPlatform.h>
35#include <WinHvEmulation.h>
812d49f2 36
5c8e1e83
SM
37#define HYPERV_APIC_BUS_FREQUENCY (200000000ULL)
38
812d49f2
JTV
39static const WHV_REGISTER_NAME whpx_register_names[] = {
40
41 /* X64 General purpose registers */
42 WHvX64RegisterRax,
43 WHvX64RegisterRcx,
44 WHvX64RegisterRdx,
45 WHvX64RegisterRbx,
46 WHvX64RegisterRsp,
47 WHvX64RegisterRbp,
48 WHvX64RegisterRsi,
49 WHvX64RegisterRdi,
50 WHvX64RegisterR8,
51 WHvX64RegisterR9,
52 WHvX64RegisterR10,
53 WHvX64RegisterR11,
54 WHvX64RegisterR12,
55 WHvX64RegisterR13,
56 WHvX64RegisterR14,
57 WHvX64RegisterR15,
58 WHvX64RegisterRip,
59 WHvX64RegisterRflags,
60
61 /* X64 Segment registers */
62 WHvX64RegisterEs,
63 WHvX64RegisterCs,
64 WHvX64RegisterSs,
65 WHvX64RegisterDs,
66 WHvX64RegisterFs,
67 WHvX64RegisterGs,
68 WHvX64RegisterLdtr,
69 WHvX64RegisterTr,
70
71 /* X64 Table registers */
72 WHvX64RegisterIdtr,
73 WHvX64RegisterGdtr,
74
75 /* X64 Control Registers */
76 WHvX64RegisterCr0,
77 WHvX64RegisterCr2,
78 WHvX64RegisterCr3,
79 WHvX64RegisterCr4,
80 WHvX64RegisterCr8,
81
82 /* X64 Debug Registers */
83 /*
84 * WHvX64RegisterDr0,
85 * WHvX64RegisterDr1,
86 * WHvX64RegisterDr2,
87 * WHvX64RegisterDr3,
88 * WHvX64RegisterDr6,
89 * WHvX64RegisterDr7,
90 */
91
92 /* X64 Floating Point and Vector Registers */
93 WHvX64RegisterXmm0,
94 WHvX64RegisterXmm1,
95 WHvX64RegisterXmm2,
96 WHvX64RegisterXmm3,
97 WHvX64RegisterXmm4,
98 WHvX64RegisterXmm5,
99 WHvX64RegisterXmm6,
100 WHvX64RegisterXmm7,
101 WHvX64RegisterXmm8,
102 WHvX64RegisterXmm9,
103 WHvX64RegisterXmm10,
104 WHvX64RegisterXmm11,
105 WHvX64RegisterXmm12,
106 WHvX64RegisterXmm13,
107 WHvX64RegisterXmm14,
108 WHvX64RegisterXmm15,
109 WHvX64RegisterFpMmx0,
110 WHvX64RegisterFpMmx1,
111 WHvX64RegisterFpMmx2,
112 WHvX64RegisterFpMmx3,
113 WHvX64RegisterFpMmx4,
114 WHvX64RegisterFpMmx5,
115 WHvX64RegisterFpMmx6,
116 WHvX64RegisterFpMmx7,
117 WHvX64RegisterFpControlStatus,
118 WHvX64RegisterXmmControlStatus,
119
120 /* X64 MSRs */
812d49f2
JTV
121 WHvX64RegisterEfer,
122#ifdef TARGET_X86_64
123 WHvX64RegisterKernelGsBase,
124#endif
125 WHvX64RegisterApicBase,
126 /* WHvX64RegisterPat, */
127 WHvX64RegisterSysenterCs,
128 WHvX64RegisterSysenterEip,
129 WHvX64RegisterSysenterEsp,
130 WHvX64RegisterStar,
131#ifdef TARGET_X86_64
132 WHvX64RegisterLstar,
133 WHvX64RegisterCstar,
134 WHvX64RegisterSfmask,
135#endif
136
137 /* Interrupt / Event Registers */
138 /*
139 * WHvRegisterPendingInterruption,
140 * WHvRegisterInterruptState,
141 * WHvRegisterPendingEvent0,
142 * WHvRegisterPendingEvent1
143 * WHvX64RegisterDeliverabilityNotifications,
144 */
145};
146
147struct whpx_register_set {
148 WHV_REGISTER_VALUE values[RTL_NUMBER_OF(whpx_register_names)];
149};
150
d7482ffe
IS
151/*
152 * The current implementation of instruction stepping sets the TF flag
153 * in RFLAGS, causing the CPU to raise an INT1 after each instruction.
154 * This corresponds to the WHvX64ExceptionTypeDebugTrapOrFault exception.
155 *
156 * This approach has a few limitations:
157 * 1. Stepping over a PUSHF/SAHF instruction will save the TF flag
158 * along with the other flags, possibly restoring it later. It would
159 * result in another INT1 when the flags are restored, triggering
160 * a stop in gdb that could be cleared by doing another step.
161 *
162 * Stepping over a POPF/LAHF instruction will let it overwrite the
163 * TF flags, ending the stepping mode.
164 *
165 * 2. Stepping over an instruction raising an exception (e.g. INT, DIV,
166 * or anything that could result in a page fault) will save the flags
167 * to the stack, clear the TF flag, and let the guest execute the
168 * handler. Normally, the guest will restore the original flags,
169 * that will continue single-stepping.
170 *
171 * 3. Debuggers running on the guest may wish to set TF to do instruction
172 * stepping. INT1 events generated by it would be intercepted by us,
173 * as long as the gdb is connected to QEMU.
174 *
175 * In practice this means that:
176 * 1. Stepping through flags-modifying instructions may cause gdb to
177 * continue or stop in unexpected places. This will be fully recoverable
178 * and will not crash the target.
179 *
180 * 2. Stepping over an instruction that triggers an exception will step
181 * over the exception handler, not into it.
182 *
183 * 3. Debugging the guest via gdb, while running debugger on the guest
184 * at the same time may lead to unexpected effects. Removing all
185 * breakpoints set via QEMU will prevent any further interference
186 * with the guest-level debuggers.
187 *
188 * The limitations can be addressed as shown below:
189 * 1. PUSHF/SAHF/POPF/LAHF/IRET instructions can be emulated instead of
190 * stepping through them. The exact semantics of the instructions is
191 * defined in the "Combined Volume Set of Intel 64 and IA-32
192 * Architectures Software Developer's Manuals", however it involves a
193 * fair amount of corner cases due to compatibility with real mode,
194 * virtual 8086 mode, and differences between 64-bit and 32-bit modes.
195 *
196 * 2. We could step into the guest's exception handlers using the following
197 * sequence:
198 * a. Temporarily enable catching of all exception types via
199 * whpx_set_exception_exit_bitmap().
200 * b. Once an exception is intercepted, read the IDT/GDT and locate
201 * the original handler.
202 * c. Patch the original handler, injecting an INT3 at the beginning.
203 * d. Update the exception exit bitmap to only catch the
204 * WHvX64ExceptionTypeBreakpointTrap exception.
205 * e. Let the affected CPU run in the exclusive mode.
206 * f. Restore the original handler and the exception exit bitmap.
207 * Note that handling all corner cases related to IDT/GDT is harder
208 * than it may seem. See x86_cpu_get_phys_page_attrs_debug() for a
209 * rough idea.
210 *
211 * 3. In order to properly support guest-level debugging in parallel with
212 * the QEMU-level debugging, we would need to be able to pass some INT1
213 * events to the guest. This could be done via the following methods:
214 * a. Using the WHvRegisterPendingEvent register. As of Windows 21H1,
215 * it seems to only work for interrupts and not software
216 * exceptions.
217 * b. Locating and patching the original handler by parsing IDT/GDT.
218 * This involves relatively complex logic outlined in the previous
219 * paragraph.
220 * c. Emulating the exception invocation (i.e. manually updating RIP,
221 * RFLAGS, and pushing the old values to stack). This is even more
222 * complicated than the previous option, since it involves checking
223 * CPL, gate attributes, and doing various adjustments depending
224 * on the current CPU mode, whether the CPL is changing, etc.
225 */
226typedef enum WhpxStepMode {
227 WHPX_STEP_NONE = 0,
228 /* Halt other VCPUs */
229 WHPX_STEP_EXCLUSIVE,
230} WhpxStepMode;
231
812d49f2
JTV
232struct whpx_vcpu {
233 WHV_EMULATOR_HANDLE emulator;
234 bool window_registered;
235 bool interruptable;
faf20793 236 bool ready_for_pic_interrupt;
812d49f2
JTV
237 uint64_t tpr;
238 uint64_t apic_base;
4e286099 239 bool interruption_pending;
812d49f2
JTV
240
241 /* Must be the last field as it may have a tail */
242 WHV_RUN_VP_EXIT_CONTEXT exit_ctx;
243};
244
245static bool whpx_allowed;
327fccb2
LP
246static bool whp_dispatch_initialized;
247static HMODULE hWinHvPlatform, hWinHvEmulation;
faf20793 248static uint32_t max_vcpu_index;
b6b3da99
SM
249static WHV_PROCESSOR_XSAVE_FEATURES whpx_xsave_cap;
250
812d49f2 251struct whpx_state whpx_global;
327fccb2 252struct WHPDispatch whp_dispatch;
812d49f2 253
b6b3da99
SM
254static bool whpx_has_xsave(void)
255{
256 return whpx_xsave_cap.XsaveSupport;
257}
812d49f2
JTV
258
259/*
260 * VP support
261 */
262
263static struct whpx_vcpu *get_whpx_vcpu(CPUState *cpu)
264{
265 return (struct whpx_vcpu *)cpu->hax_vcpu;
266}
267
268static WHV_X64_SEGMENT_REGISTER whpx_seg_q2h(const SegmentCache *qs, int v86,
269 int r86)
270{
271 WHV_X64_SEGMENT_REGISTER hs;
272 unsigned flags = qs->flags;
273
274 hs.Base = qs->base;
275 hs.Limit = qs->limit;
276 hs.Selector = qs->selector;
277
278 if (v86) {
279 hs.Attributes = 0;
280 hs.SegmentType = 3;
281 hs.Present = 1;
282 hs.DescriptorPrivilegeLevel = 3;
283 hs.NonSystemSegment = 1;
284
285 } else {
286 hs.Attributes = (flags >> DESC_TYPE_SHIFT);
287
288 if (r86) {
289 /* hs.Base &= 0xfffff; */
290 }
291 }
292
293 return hs;
294}
295
296static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs)
297{
298 SegmentCache qs;
299
300 qs.base = hs->Base;
301 qs.limit = hs->Limit;
302 qs.selector = hs->Selector;
303
304 qs.flags = ((uint32_t)hs->Attributes) << DESC_TYPE_SHIFT;
305
306 return qs;
307}
308
b6b3da99
SM
309/* X64 Extended Control Registers */
310static void whpx_set_xcrs(CPUState *cpu)
311{
312 CPUX86State *env = cpu->env_ptr;
313 HRESULT hr;
314 struct whpx_state *whpx = &whpx_global;
315 WHV_REGISTER_VALUE xcr0;
316 WHV_REGISTER_NAME xcr0_name = WHvX64RegisterXCr0;
317
318 if (!whpx_has_xsave()) {
319 return;
320 }
321
322 /* Only xcr0 is supported by the hypervisor currently */
323 xcr0.Reg64 = env->xcr0;
324 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
325 whpx->partition, cpu->cpu_index, &xcr0_name, 1, &xcr0);
326 if (FAILED(hr)) {
327 error_report("WHPX: Failed to set register xcr0, hr=%08lx", hr);
328 }
329}
330
6785e767
SM
331static int whpx_set_tsc(CPUState *cpu)
332{
95e862d7 333 CPUX86State *env = cpu->env_ptr;
6785e767
SM
334 WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
335 WHV_REGISTER_VALUE tsc_val;
336 HRESULT hr;
337 struct whpx_state *whpx = &whpx_global;
338
339 /*
340 * Suspend the partition prior to setting the TSC to reduce the variance
341 * in TSC across vCPUs. When the first vCPU runs post suspend, the
342 * partition is automatically resumed.
343 */
344 if (whp_dispatch.WHvSuspendPartitionTime) {
345
346 /*
347 * Unable to suspend partition while setting TSC is not a fatal
348 * error. It just increases the likelihood of TSC variance between
349 * vCPUs and some guest OS are able to handle that just fine.
350 */
351 hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition);
352 if (FAILED(hr)) {
353 warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr);
354 }
355 }
356
357 tsc_val.Reg64 = env->tsc;
358 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
359 whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
360 if (FAILED(hr)) {
361 error_report("WHPX: Failed to set TSC, hr=%08lx", hr);
362 return -1;
363 }
364
365 return 0;
366}
367
5ad93fd3
IS
368/*
369 * The CR8 register in the CPU is mapped to the TPR register of the APIC,
370 * however, they use a slightly different encoding. Specifically:
371 *
372 * APIC.TPR[bits 7:4] = CR8[bits 3:0]
373 *
374 * This mechanism is described in section 10.8.6.1 of Volume 3 of Intel 64
375 * and IA-32 Architectures Software Developer's Manual.
f000bc74
IS
376 *
377 * The functions below translate the value of CR8 to TPR and vice versa.
5ad93fd3
IS
378 */
379
380static uint64_t whpx_apic_tpr_to_cr8(uint64_t tpr)
381{
382 return tpr >> 4;
383}
384
f000bc74
IS
385static uint64_t whpx_cr8_to_apic_tpr(uint64_t cr8)
386{
387 return cr8 << 4;
388}
389
6785e767 390static void whpx_set_registers(CPUState *cpu, int level)
812d49f2
JTV
391{
392 struct whpx_state *whpx = &whpx_global;
393 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
95e862d7 394 CPUX86State *env = cpu->env_ptr;
812d49f2 395 X86CPU *x86_cpu = X86_CPU(cpu);
c3942bf2 396 struct whpx_register_set vcxt;
812d49f2 397 HRESULT hr;
c3942bf2
LP
398 int idx;
399 int idx_next;
812d49f2
JTV
400 int i;
401 int v86, r86;
402
403 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
404
6785e767
SM
405 /*
406 * Following MSRs have side effects on the guest or are too heavy for
407 * runtime. Limit them to full state update.
408 */
409 if (level >= WHPX_SET_RESET_STATE) {
410 whpx_set_tsc(cpu);
411 }
412
c3942bf2
LP
413 memset(&vcxt, 0, sizeof(struct whpx_register_set));
414
812d49f2
JTV
415 v86 = (env->eflags & VM_MASK);
416 r86 = !(env->cr[0] & CR0_PE_MASK);
417
5ad93fd3 418 vcpu->tpr = whpx_apic_tpr_to_cr8(cpu_get_apic_tpr(x86_cpu->apic_state));
812d49f2
JTV
419 vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state);
420
c3942bf2
LP
421 idx = 0;
422
812d49f2 423 /* Indexes for first 16 registers match between HV and QEMU definitions */
c3942bf2
LP
424 idx_next = 16;
425 for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
426 vcxt.values[idx].Reg64 = (uint64_t)env->regs[idx];
812d49f2 427 }
c3942bf2 428 idx = idx_next;
812d49f2
JTV
429
430 /* Same goes for RIP and RFLAGS */
431 assert(whpx_register_names[idx] == WHvX64RegisterRip);
432 vcxt.values[idx++].Reg64 = env->eip;
433
434 assert(whpx_register_names[idx] == WHvX64RegisterRflags);
435 vcxt.values[idx++].Reg64 = env->eflags;
436
437 /* Translate 6+4 segment registers. HV and QEMU order matches */
438 assert(idx == WHvX64RegisterEs);
439 for (i = 0; i < 6; i += 1, idx += 1) {
440 vcxt.values[idx].Segment = whpx_seg_q2h(&env->segs[i], v86, r86);
441 }
442
443 assert(idx == WHvX64RegisterLdtr);
444 vcxt.values[idx++].Segment = whpx_seg_q2h(&env->ldt, 0, 0);
445
446 assert(idx == WHvX64RegisterTr);
447 vcxt.values[idx++].Segment = whpx_seg_q2h(&env->tr, 0, 0);
448
449 assert(idx == WHvX64RegisterIdtr);
450 vcxt.values[idx].Table.Base = env->idt.base;
451 vcxt.values[idx].Table.Limit = env->idt.limit;
452 idx += 1;
453
454 assert(idx == WHvX64RegisterGdtr);
455 vcxt.values[idx].Table.Base = env->gdt.base;
456 vcxt.values[idx].Table.Limit = env->gdt.limit;
457 idx += 1;
458
459 /* CR0, 2, 3, 4, 8 */
460 assert(whpx_register_names[idx] == WHvX64RegisterCr0);
461 vcxt.values[idx++].Reg64 = env->cr[0];
462 assert(whpx_register_names[idx] == WHvX64RegisterCr2);
463 vcxt.values[idx++].Reg64 = env->cr[2];
464 assert(whpx_register_names[idx] == WHvX64RegisterCr3);
465 vcxt.values[idx++].Reg64 = env->cr[3];
466 assert(whpx_register_names[idx] == WHvX64RegisterCr4);
467 vcxt.values[idx++].Reg64 = env->cr[4];
468 assert(whpx_register_names[idx] == WHvX64RegisterCr8);
469 vcxt.values[idx++].Reg64 = vcpu->tpr;
470
471 /* 8 Debug Registers - Skipped */
472
b6b3da99
SM
473 /*
474 * Extended control registers needs to be handled separately depending
475 * on whether xsave is supported/enabled or not.
476 */
477 whpx_set_xcrs(cpu);
478
812d49f2
JTV
479 /* 16 XMM registers */
480 assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
c3942bf2
LP
481 idx_next = idx + 16;
482 for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
812d49f2
JTV
483 vcxt.values[idx].Reg128.Low64 = env->xmm_regs[i].ZMM_Q(0);
484 vcxt.values[idx].Reg128.High64 = env->xmm_regs[i].ZMM_Q(1);
485 }
c3942bf2 486 idx = idx_next;
812d49f2
JTV
487
488 /* 8 FP registers */
489 assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
490 for (i = 0; i < 8; i += 1, idx += 1) {
491 vcxt.values[idx].Fp.AsUINT128.Low64 = env->fpregs[i].mmx.MMX_Q(0);
492 /* vcxt.values[idx].Fp.AsUINT128.High64 =
493 env->fpregs[i].mmx.MMX_Q(1);
494 */
495 }
496
497 /* FP control status register */
498 assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
499 vcxt.values[idx].FpControlStatus.FpControl = env->fpuc;
500 vcxt.values[idx].FpControlStatus.FpStatus =
501 (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
502 vcxt.values[idx].FpControlStatus.FpTag = 0;
503 for (i = 0; i < 8; ++i) {
504 vcxt.values[idx].FpControlStatus.FpTag |= (!env->fptags[i]) << i;
505 }
506 vcxt.values[idx].FpControlStatus.Reserved = 0;
507 vcxt.values[idx].FpControlStatus.LastFpOp = env->fpop;
508 vcxt.values[idx].FpControlStatus.LastFpRip = env->fpip;
509 idx += 1;
510
511 /* XMM control status register */
512 assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
513 vcxt.values[idx].XmmControlStatus.LastFpRdp = 0;
514 vcxt.values[idx].XmmControlStatus.XmmStatusControl = env->mxcsr;
515 vcxt.values[idx].XmmControlStatus.XmmStatusControlMask = 0x0000ffff;
516 idx += 1;
517
518 /* MSRs */
812d49f2
JTV
519 assert(whpx_register_names[idx] == WHvX64RegisterEfer);
520 vcxt.values[idx++].Reg64 = env->efer;
521#ifdef TARGET_X86_64
522 assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
523 vcxt.values[idx++].Reg64 = env->kernelgsbase;
524#endif
525
526 assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
527 vcxt.values[idx++].Reg64 = vcpu->apic_base;
528
529 /* WHvX64RegisterPat - Skipped */
530
531 assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
532 vcxt.values[idx++].Reg64 = env->sysenter_cs;
533 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
534 vcxt.values[idx++].Reg64 = env->sysenter_eip;
535 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
536 vcxt.values[idx++].Reg64 = env->sysenter_esp;
537 assert(whpx_register_names[idx] == WHvX64RegisterStar);
538 vcxt.values[idx++].Reg64 = env->star;
539#ifdef TARGET_X86_64
540 assert(whpx_register_names[idx] == WHvX64RegisterLstar);
541 vcxt.values[idx++].Reg64 = env->lstar;
542 assert(whpx_register_names[idx] == WHvX64RegisterCstar);
543 vcxt.values[idx++].Reg64 = env->cstar;
544 assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
545 vcxt.values[idx++].Reg64 = env->fmask;
546#endif
547
548 /* Interrupt / Event Registers - Skipped */
549
550 assert(idx == RTL_NUMBER_OF(whpx_register_names));
551
327fccb2
LP
552 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
553 whpx->partition, cpu->cpu_index,
554 whpx_register_names,
555 RTL_NUMBER_OF(whpx_register_names),
556 &vcxt.values[0]);
812d49f2
JTV
557
558 if (FAILED(hr)) {
559 error_report("WHPX: Failed to set virtual processor context, hr=%08lx",
560 hr);
812d49f2
JTV
561 }
562
563 return;
564}
565
6785e767
SM
566static int whpx_get_tsc(CPUState *cpu)
567{
95e862d7 568 CPUX86State *env = cpu->env_ptr;
6785e767
SM
569 WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
570 WHV_REGISTER_VALUE tsc_val;
571 HRESULT hr;
572 struct whpx_state *whpx = &whpx_global;
573
574 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
575 whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
576 if (FAILED(hr)) {
577 error_report("WHPX: Failed to get TSC, hr=%08lx", hr);
578 return -1;
579 }
580
581 env->tsc = tsc_val.Reg64;
582 return 0;
583}
584
b6b3da99
SM
585/* X64 Extended Control Registers */
586static void whpx_get_xcrs(CPUState *cpu)
587{
588 CPUX86State *env = cpu->env_ptr;
589 HRESULT hr;
590 struct whpx_state *whpx = &whpx_global;
591 WHV_REGISTER_VALUE xcr0;
592 WHV_REGISTER_NAME xcr0_name = WHvX64RegisterXCr0;
593
594 if (!whpx_has_xsave()) {
595 return;
596 }
597
598 /* Only xcr0 is supported by the hypervisor currently */
599 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
600 whpx->partition, cpu->cpu_index, &xcr0_name, 1, &xcr0);
601 if (FAILED(hr)) {
602 error_report("WHPX: Failed to get register xcr0, hr=%08lx", hr);
603 return;
604 }
605
606 env->xcr0 = xcr0.Reg64;
607}
608
812d49f2
JTV
609static void whpx_get_registers(CPUState *cpu)
610{
611 struct whpx_state *whpx = &whpx_global;
612 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
95e862d7 613 CPUX86State *env = cpu->env_ptr;
812d49f2
JTV
614 X86CPU *x86_cpu = X86_CPU(cpu);
615 struct whpx_register_set vcxt;
616 uint64_t tpr, apic_base;
617 HRESULT hr;
c3942bf2
LP
618 int idx;
619 int idx_next;
812d49f2
JTV
620 int i;
621
622 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
623
6785e767
SM
624 if (!env->tsc_valid) {
625 whpx_get_tsc(cpu);
626 env->tsc_valid = !runstate_is_running();
627 }
628
327fccb2
LP
629 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
630 whpx->partition, cpu->cpu_index,
631 whpx_register_names,
632 RTL_NUMBER_OF(whpx_register_names),
633 &vcxt.values[0]);
812d49f2
JTV
634 if (FAILED(hr)) {
635 error_report("WHPX: Failed to get virtual processor context, hr=%08lx",
636 hr);
812d49f2
JTV
637 }
638
5ad93fd3
IS
639 if (whpx_apic_in_platform()) {
640 /*
641 * Fetch the TPR value from the emulated APIC. It may get overwritten
642 * below with the value from CR8 returned by
643 * WHvGetVirtualProcessorRegisters().
644 */
645 whpx_apic_get(x86_cpu->apic_state);
646 vcpu->tpr = whpx_apic_tpr_to_cr8(
647 cpu_get_apic_tpr(x86_cpu->apic_state));
648 }
649
c3942bf2
LP
650 idx = 0;
651
812d49f2 652 /* Indexes for first 16 registers match between HV and QEMU definitions */
c3942bf2
LP
653 idx_next = 16;
654 for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
812d49f2
JTV
655 env->regs[idx] = vcxt.values[idx].Reg64;
656 }
c3942bf2 657 idx = idx_next;
812d49f2
JTV
658
659 /* Same goes for RIP and RFLAGS */
660 assert(whpx_register_names[idx] == WHvX64RegisterRip);
661 env->eip = vcxt.values[idx++].Reg64;
662 assert(whpx_register_names[idx] == WHvX64RegisterRflags);
663 env->eflags = vcxt.values[idx++].Reg64;
664
665 /* Translate 6+4 segment registers. HV and QEMU order matches */
666 assert(idx == WHvX64RegisterEs);
667 for (i = 0; i < 6; i += 1, idx += 1) {
668 env->segs[i] = whpx_seg_h2q(&vcxt.values[idx].Segment);
669 }
670
671 assert(idx == WHvX64RegisterLdtr);
672 env->ldt = whpx_seg_h2q(&vcxt.values[idx++].Segment);
673 assert(idx == WHvX64RegisterTr);
674 env->tr = whpx_seg_h2q(&vcxt.values[idx++].Segment);
675 assert(idx == WHvX64RegisterIdtr);
676 env->idt.base = vcxt.values[idx].Table.Base;
677 env->idt.limit = vcxt.values[idx].Table.Limit;
678 idx += 1;
679 assert(idx == WHvX64RegisterGdtr);
680 env->gdt.base = vcxt.values[idx].Table.Base;
681 env->gdt.limit = vcxt.values[idx].Table.Limit;
682 idx += 1;
683
684 /* CR0, 2, 3, 4, 8 */
685 assert(whpx_register_names[idx] == WHvX64RegisterCr0);
686 env->cr[0] = vcxt.values[idx++].Reg64;
687 assert(whpx_register_names[idx] == WHvX64RegisterCr2);
688 env->cr[2] = vcxt.values[idx++].Reg64;
689 assert(whpx_register_names[idx] == WHvX64RegisterCr3);
690 env->cr[3] = vcxt.values[idx++].Reg64;
691 assert(whpx_register_names[idx] == WHvX64RegisterCr4);
692 env->cr[4] = vcxt.values[idx++].Reg64;
693 assert(whpx_register_names[idx] == WHvX64RegisterCr8);
694 tpr = vcxt.values[idx++].Reg64;
695 if (tpr != vcpu->tpr) {
696 vcpu->tpr = tpr;
f000bc74 697 cpu_set_apic_tpr(x86_cpu->apic_state, whpx_cr8_to_apic_tpr(tpr));
812d49f2
JTV
698 }
699
700 /* 8 Debug Registers - Skipped */
701
b6b3da99
SM
702 /*
703 * Extended control registers needs to be handled separately depending
704 * on whether xsave is supported/enabled or not.
705 */
706 whpx_get_xcrs(cpu);
707
812d49f2
JTV
708 /* 16 XMM registers */
709 assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
c3942bf2
LP
710 idx_next = idx + 16;
711 for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
812d49f2
JTV
712 env->xmm_regs[i].ZMM_Q(0) = vcxt.values[idx].Reg128.Low64;
713 env->xmm_regs[i].ZMM_Q(1) = vcxt.values[idx].Reg128.High64;
714 }
c3942bf2 715 idx = idx_next;
812d49f2
JTV
716
717 /* 8 FP registers */
718 assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
719 for (i = 0; i < 8; i += 1, idx += 1) {
720 env->fpregs[i].mmx.MMX_Q(0) = vcxt.values[idx].Fp.AsUINT128.Low64;
721 /* env->fpregs[i].mmx.MMX_Q(1) =
722 vcxt.values[idx].Fp.AsUINT128.High64;
723 */
724 }
725
726 /* FP control status register */
727 assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
728 env->fpuc = vcxt.values[idx].FpControlStatus.FpControl;
729 env->fpstt = (vcxt.values[idx].FpControlStatus.FpStatus >> 11) & 0x7;
730 env->fpus = vcxt.values[idx].FpControlStatus.FpStatus & ~0x3800;
731 for (i = 0; i < 8; ++i) {
732 env->fptags[i] = !((vcxt.values[idx].FpControlStatus.FpTag >> i) & 1);
733 }
734 env->fpop = vcxt.values[idx].FpControlStatus.LastFpOp;
735 env->fpip = vcxt.values[idx].FpControlStatus.LastFpRip;
736 idx += 1;
737
738 /* XMM control status register */
739 assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
740 env->mxcsr = vcxt.values[idx].XmmControlStatus.XmmStatusControl;
741 idx += 1;
742
743 /* MSRs */
812d49f2
JTV
744 assert(whpx_register_names[idx] == WHvX64RegisterEfer);
745 env->efer = vcxt.values[idx++].Reg64;
746#ifdef TARGET_X86_64
747 assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
748 env->kernelgsbase = vcxt.values[idx++].Reg64;
749#endif
750
751 assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
752 apic_base = vcxt.values[idx++].Reg64;
753 if (apic_base != vcpu->apic_base) {
754 vcpu->apic_base = apic_base;
755 cpu_set_apic_base(x86_cpu->apic_state, vcpu->apic_base);
756 }
757
758 /* WHvX64RegisterPat - Skipped */
759
760 assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
7c98f0f8 761 env->sysenter_cs = vcxt.values[idx++].Reg64;
812d49f2
JTV
762 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
763 env->sysenter_eip = vcxt.values[idx++].Reg64;
764 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
765 env->sysenter_esp = vcxt.values[idx++].Reg64;
766 assert(whpx_register_names[idx] == WHvX64RegisterStar);
767 env->star = vcxt.values[idx++].Reg64;
768#ifdef TARGET_X86_64
769 assert(whpx_register_names[idx] == WHvX64RegisterLstar);
770 env->lstar = vcxt.values[idx++].Reg64;
771 assert(whpx_register_names[idx] == WHvX64RegisterCstar);
772 env->cstar = vcxt.values[idx++].Reg64;
773 assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
774 env->fmask = vcxt.values[idx++].Reg64;
775#endif
776
777 /* Interrupt / Event Registers - Skipped */
778
779 assert(idx == RTL_NUMBER_OF(whpx_register_names));
780
faf20793
SM
781 if (whpx_apic_in_platform()) {
782 whpx_apic_get(x86_cpu->apic_state);
783 }
784
e5618908
IS
785 x86_update_hflags(env);
786
812d49f2
JTV
787 return;
788}
789
790static HRESULT CALLBACK whpx_emu_ioport_callback(
791 void *ctx,
792 WHV_EMULATOR_IO_ACCESS_INFO *IoAccess)
793{
794 MemTxAttrs attrs = { 0 };
795 address_space_rw(&address_space_io, IoAccess->Port, attrs,
b7cbebf2 796 &IoAccess->Data, IoAccess->AccessSize,
812d49f2
JTV
797 IoAccess->Direction);
798 return S_OK;
799}
800
f875f04c 801static HRESULT CALLBACK whpx_emu_mmio_callback(
812d49f2
JTV
802 void *ctx,
803 WHV_EMULATOR_MEMORY_ACCESS_INFO *ma)
804{
805 cpu_physical_memory_rw(ma->GpaAddress, ma->Data, ma->AccessSize,
806 ma->Direction);
807 return S_OK;
808}
809
810static HRESULT CALLBACK whpx_emu_getreg_callback(
811 void *ctx,
812 const WHV_REGISTER_NAME *RegisterNames,
813 UINT32 RegisterCount,
814 WHV_REGISTER_VALUE *RegisterValues)
815{
816 HRESULT hr;
817 struct whpx_state *whpx = &whpx_global;
818 CPUState *cpu = (CPUState *)ctx;
819
327fccb2
LP
820 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
821 whpx->partition, cpu->cpu_index,
822 RegisterNames, RegisterCount,
823 RegisterValues);
812d49f2
JTV
824 if (FAILED(hr)) {
825 error_report("WHPX: Failed to get virtual processor registers,"
826 " hr=%08lx", hr);
812d49f2
JTV
827 }
828
829 return hr;
830}
831
832static HRESULT CALLBACK whpx_emu_setreg_callback(
833 void *ctx,
834 const WHV_REGISTER_NAME *RegisterNames,
835 UINT32 RegisterCount,
836 const WHV_REGISTER_VALUE *RegisterValues)
837{
838 HRESULT hr;
839 struct whpx_state *whpx = &whpx_global;
840 CPUState *cpu = (CPUState *)ctx;
841
327fccb2
LP
842 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
843 whpx->partition, cpu->cpu_index,
844 RegisterNames, RegisterCount,
845 RegisterValues);
812d49f2
JTV
846 if (FAILED(hr)) {
847 error_report("WHPX: Failed to set virtual processor registers,"
848 " hr=%08lx", hr);
812d49f2
JTV
849 }
850
851 /*
852 * The emulator just successfully wrote the register state. We clear the
853 * dirty state so we avoid the double write on resume of the VP.
854 */
855 cpu->vcpu_dirty = false;
856
857 return hr;
858}
859
860static HRESULT CALLBACK whpx_emu_translate_callback(
861 void *ctx,
862 WHV_GUEST_VIRTUAL_ADDRESS Gva,
863 WHV_TRANSLATE_GVA_FLAGS TranslateFlags,
864 WHV_TRANSLATE_GVA_RESULT_CODE *TranslationResult,
865 WHV_GUEST_PHYSICAL_ADDRESS *Gpa)
866{
867 HRESULT hr;
868 struct whpx_state *whpx = &whpx_global;
869 CPUState *cpu = (CPUState *)ctx;
870 WHV_TRANSLATE_GVA_RESULT res;
871
327fccb2
LP
872 hr = whp_dispatch.WHvTranslateGva(whpx->partition, cpu->cpu_index,
873 Gva, TranslateFlags, &res, Gpa);
812d49f2
JTV
874 if (FAILED(hr)) {
875 error_report("WHPX: Failed to translate GVA, hr=%08lx", hr);
812d49f2
JTV
876 } else {
877 *TranslationResult = res.ResultCode;
878 }
879
880 return hr;
881}
882
883static const WHV_EMULATOR_CALLBACKS whpx_emu_callbacks = {
914e2ab3 884 .Size = sizeof(WHV_EMULATOR_CALLBACKS),
812d49f2 885 .WHvEmulatorIoPortCallback = whpx_emu_ioport_callback,
f875f04c 886 .WHvEmulatorMemoryCallback = whpx_emu_mmio_callback,
812d49f2
JTV
887 .WHvEmulatorGetVirtualProcessorRegisters = whpx_emu_getreg_callback,
888 .WHvEmulatorSetVirtualProcessorRegisters = whpx_emu_setreg_callback,
889 .WHvEmulatorTranslateGvaPage = whpx_emu_translate_callback,
890};
891
892static int whpx_handle_mmio(CPUState *cpu, WHV_MEMORY_ACCESS_CONTEXT *ctx)
893{
894 HRESULT hr;
895 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
896 WHV_EMULATOR_STATUS emu_status;
897
327fccb2
LP
898 hr = whp_dispatch.WHvEmulatorTryMmioEmulation(
899 vcpu->emulator, cpu,
900 &vcpu->exit_ctx.VpContext, ctx,
901 &emu_status);
812d49f2 902 if (FAILED(hr)) {
812d49f2
JTV
903 error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr);
904 return -1;
905 }
906
907 if (!emu_status.EmulationSuccessful) {
327fccb2
LP
908 error_report("WHPX: Failed to emulate MMIO access with"
909 " EmulatorReturnStatus: %u", emu_status.AsUINT32);
812d49f2
JTV
910 return -1;
911 }
912
913 return 0;
914}
915
916static int whpx_handle_portio(CPUState *cpu,
917 WHV_X64_IO_PORT_ACCESS_CONTEXT *ctx)
918{
919 HRESULT hr;
920 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
921 WHV_EMULATOR_STATUS emu_status;
922
327fccb2
LP
923 hr = whp_dispatch.WHvEmulatorTryIoEmulation(
924 vcpu->emulator, cpu,
925 &vcpu->exit_ctx.VpContext, ctx,
926 &emu_status);
812d49f2 927 if (FAILED(hr)) {
812d49f2
JTV
928 error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr);
929 return -1;
930 }
931
932 if (!emu_status.EmulationSuccessful) {
327fccb2
LP
933 error_report("WHPX: Failed to emulate PortIO access with"
934 " EmulatorReturnStatus: %u", emu_status.AsUINT32);
812d49f2
JTV
935 return -1;
936 }
937
938 return 0;
939}
940
d7482ffe
IS
941/*
942 * Controls whether we should intercept various exceptions on the guest,
943 * namely breakpoint/single-step events.
944 *
945 * The 'exceptions' argument accepts a bitmask, e.g:
946 * (1 << WHvX64ExceptionTypeDebugTrapOrFault) | (...)
947 */
948static HRESULT whpx_set_exception_exit_bitmap(UINT64 exceptions)
949{
950 struct whpx_state *whpx = &whpx_global;
951 WHV_PARTITION_PROPERTY prop = { 0, };
952 HRESULT hr;
953
954 if (exceptions == whpx->exception_exit_bitmap) {
955 return S_OK;
956 }
957
958 prop.ExceptionExitBitmap = exceptions;
959
960 hr = whp_dispatch.WHvSetPartitionProperty(
961 whpx->partition,
962 WHvPartitionPropertyCodeExceptionExitBitmap,
963 &prop,
964 sizeof(WHV_PARTITION_PROPERTY));
965
966 if (SUCCEEDED(hr)) {
967 whpx->exception_exit_bitmap = exceptions;
968 }
969
970 return hr;
971}
972
973
974/*
975 * This function is called before/after stepping over a single instruction.
976 * It will update the CPU registers to arm/disarm the instruction stepping
977 * accordingly.
978 */
979static HRESULT whpx_vcpu_configure_single_stepping(CPUState *cpu,
980 bool set,
981 uint64_t *exit_context_rflags)
982{
983 WHV_REGISTER_NAME reg_name;
984 WHV_REGISTER_VALUE reg_value;
985 HRESULT hr;
986 struct whpx_state *whpx = &whpx_global;
987
988 /*
989 * If we are trying to step over a single instruction, we need to set the
990 * TF bit in rflags. Otherwise, clear it.
991 */
992 reg_name = WHvX64RegisterRflags;
993 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
994 whpx->partition,
995 cpu->cpu_index,
996 &reg_name,
997 1,
998 &reg_value);
999
1000 if (FAILED(hr)) {
1001 error_report("WHPX: Failed to get rflags, hr=%08lx", hr);
1002 return hr;
1003 }
1004
1005 if (exit_context_rflags) {
1006 assert(*exit_context_rflags == reg_value.Reg64);
1007 }
1008
1009 if (set) {
1010 /* Raise WHvX64ExceptionTypeDebugTrapOrFault after each instruction */
1011 reg_value.Reg64 |= TF_MASK;
1012 } else {
1013 reg_value.Reg64 &= ~TF_MASK;
1014 }
1015
1016 if (exit_context_rflags) {
1017 *exit_context_rflags = reg_value.Reg64;
1018 }
1019
1020 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1021 whpx->partition,
1022 cpu->cpu_index,
1023 &reg_name,
1024 1,
1025 &reg_value);
1026
1027 if (FAILED(hr)) {
1028 error_report("WHPX: Failed to set rflags,"
1029 " hr=%08lx",
1030 hr);
1031 return hr;
1032 }
1033
1034 reg_name = WHvRegisterInterruptState;
1035 reg_value.Reg64 = 0;
1036
1037 /* Suspend delivery of hardware interrupts during single-stepping. */
1038 reg_value.InterruptState.InterruptShadow = set != 0;
1039
1040 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1041 whpx->partition,
1042 cpu->cpu_index,
1043 &reg_name,
1044 1,
1045 &reg_value);
1046
1047 if (FAILED(hr)) {
1048 error_report("WHPX: Failed to set InterruptState,"
1049 " hr=%08lx",
1050 hr);
1051 return hr;
1052 }
1053
1054 if (!set) {
1055 /*
1056 * We have just finished stepping over a single instruction,
1057 * and intercepted the INT1 generated by it.
1058 * We need to now hide the INT1 from the guest,
1059 * as it would not be expecting it.
1060 */
1061
1062 reg_name = WHvX64RegisterPendingDebugException;
1063 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
1064 whpx->partition,
1065 cpu->cpu_index,
1066 &reg_name,
1067 1,
1068 &reg_value);
1069
1070 if (FAILED(hr)) {
1071 error_report("WHPX: Failed to get pending debug exceptions,"
1072 "hr=%08lx", hr);
1073 return hr;
1074 }
1075
1076 if (reg_value.PendingDebugException.SingleStep) {
1077 reg_value.PendingDebugException.SingleStep = 0;
1078
1079 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1080 whpx->partition,
1081 cpu->cpu_index,
1082 &reg_name,
1083 1,
1084 &reg_value);
1085
1086 if (FAILED(hr)) {
1087 error_report("WHPX: Failed to clear pending debug exceptions,"
1088 "hr=%08lx", hr);
1089 return hr;
1090 }
1091 }
1092
1093 }
1094
1095 return S_OK;
1096}
1097
1098/* Tries to find a breakpoint at the specified address. */
1099static struct whpx_breakpoint *whpx_lookup_breakpoint_by_addr(uint64_t address)
1100{
1101 struct whpx_state *whpx = &whpx_global;
1102 int i;
1103
1104 if (whpx->breakpoints.breakpoints) {
1105 for (i = 0; i < whpx->breakpoints.breakpoints->used; i++) {
1106 if (address == whpx->breakpoints.breakpoints->data[i].address) {
1107 return &whpx->breakpoints.breakpoints->data[i];
1108 }
1109 }
1110 }
1111
1112 return NULL;
1113}
1114
1115/*
1116 * Linux uses int3 (0xCC) during startup (see int3_selftest()) and for
1117 * debugging user-mode applications. Since the WHPX API does not offer
1118 * an easy way to pass the intercepted exception back to the guest, we
1119 * resort to using INT1 instead, and let the guest always handle INT3.
1120 */
1121static const uint8_t whpx_breakpoint_instruction = 0xF1;
1122
1123/*
1124 * The WHPX QEMU backend implements breakpoints by writing the INT1
1125 * instruction into memory (ignoring the DRx registers). This raises a few
1126 * issues that need to be carefully handled:
1127 *
1128 * 1. Although unlikely, other parts of QEMU may set multiple breakpoints
1129 * at the same location, and later remove them in arbitrary order.
1130 * This should not cause memory corruption, and should only remove the
1131 * physical breakpoint instruction when the last QEMU breakpoint is gone.
1132 *
1133 * 2. Writing arbitrary virtual memory may fail if it's not mapped to a valid
1134 * physical location. Hence, physically adding/removing a breakpoint can
1135 * theoretically fail at any time. We need to keep track of it.
1136 *
1137 * The function below rebuilds a list of low-level breakpoints (one per
1138 * address, tracking the original instruction and any errors) from the list of
1139 * high-level breakpoints (set via cpu_breakpoint_insert()).
1140 *
1141 * In order to optimize performance, this function stores the list of
1142 * high-level breakpoints (a.k.a. CPU breakpoints) used to compute the
1143 * low-level ones, so that it won't be re-invoked until these breakpoints
1144 * change.
1145 *
1146 * Note that this function decides which breakpoints should be inserted into,
1147 * memory, but doesn't actually do it. The memory accessing is done in
1148 * whpx_apply_breakpoints().
1149 */
1150static void whpx_translate_cpu_breakpoints(
1151 struct whpx_breakpoints *breakpoints,
1152 CPUState *cpu,
1153 int cpu_breakpoint_count)
1154{
1155 CPUBreakpoint *bp;
1156 int cpu_bp_index = 0;
1157
1158 breakpoints->original_addresses =
1159 g_renew(vaddr, breakpoints->original_addresses, cpu_breakpoint_count);
1160
1161 breakpoints->original_address_count = cpu_breakpoint_count;
1162
1163 int max_breakpoints = cpu_breakpoint_count +
1164 (breakpoints->breakpoints ? breakpoints->breakpoints->used : 0);
1165
1166 struct whpx_breakpoint_collection *new_breakpoints =
1167 (struct whpx_breakpoint_collection *)g_malloc0(
1168 sizeof(struct whpx_breakpoint_collection) +
1169 max_breakpoints * sizeof(struct whpx_breakpoint));
1170
1171 new_breakpoints->allocated = max_breakpoints;
1172 new_breakpoints->used = 0;
1173
1174 /*
1175 * 1. Preserve all old breakpoints that could not be automatically
1176 * cleared when the CPU got stopped.
1177 */
1178 if (breakpoints->breakpoints) {
1179 int i;
1180 for (i = 0; i < breakpoints->breakpoints->used; i++) {
1181 if (breakpoints->breakpoints->data[i].state != WHPX_BP_CLEARED) {
1182 new_breakpoints->data[new_breakpoints->used++] =
1183 breakpoints->breakpoints->data[i];
1184 }
1185 }
1186 }
1187
1188 /* 2. Map all CPU breakpoints to WHPX breakpoints */
1189 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
1190 int i;
1191 bool found = false;
1192
1193 /* This will be used to detect changed CPU breakpoints later. */
1194 breakpoints->original_addresses[cpu_bp_index++] = bp->pc;
1195
1196 for (i = 0; i < new_breakpoints->used; i++) {
1197 /*
1198 * WARNING: This loop has O(N^2) complexity, where N is the
1199 * number of breakpoints. It should not be a bottleneck in
1200 * real-world scenarios, since it only needs to run once after
1201 * the breakpoints have been modified.
1202 * If this ever becomes a concern, it can be optimized by storing
1203 * high-level breakpoint objects in a tree or hash map.
1204 */
1205
1206 if (new_breakpoints->data[i].address == bp->pc) {
1207 /* There was already a breakpoint at this address. */
1208 if (new_breakpoints->data[i].state == WHPX_BP_CLEAR_PENDING) {
1209 new_breakpoints->data[i].state = WHPX_BP_SET;
1210 } else if (new_breakpoints->data[i].state == WHPX_BP_SET) {
1211 new_breakpoints->data[i].state = WHPX_BP_SET_PENDING;
1212 }
1213
1214 found = true;
1215 break;
1216 }
1217 }
1218
1219 if (!found && new_breakpoints->used < new_breakpoints->allocated) {
1220 /* No WHPX breakpoint at this address. Create one. */
1221 new_breakpoints->data[new_breakpoints->used].address = bp->pc;
1222 new_breakpoints->data[new_breakpoints->used].state =
1223 WHPX_BP_SET_PENDING;
1224 new_breakpoints->used++;
1225 }
1226 }
1227
76eb88b1
MA
1228 /*
1229 * Free the previous breakpoint list. This can be optimized by keeping
1230 * it as shadow buffer for the next computation instead of freeing
1231 * it immediately.
1232 */
1233 g_free(breakpoints->breakpoints);
d7482ffe
IS
1234
1235 breakpoints->breakpoints = new_breakpoints;
1236}
1237
1238/*
1239 * Physically inserts/removes the breakpoints by reading and writing the
1240 * physical memory, keeping a track of the failed attempts.
1241 *
1242 * Passing resuming=true will try to set all previously unset breakpoints.
1243 * Passing resuming=false will remove all inserted ones.
1244 */
1245static void whpx_apply_breakpoints(
1246 struct whpx_breakpoint_collection *breakpoints,
1247 CPUState *cpu,
1248 bool resuming)
1249{
1250 int i, rc;
1251 if (!breakpoints) {
1252 return;
1253 }
1254
1255 for (i = 0; i < breakpoints->used; i++) {
1256 /* Decide what to do right now based on the last known state. */
1257 WhpxBreakpointState state = breakpoints->data[i].state;
1258 switch (state) {
1259 case WHPX_BP_CLEARED:
1260 if (resuming) {
1261 state = WHPX_BP_SET_PENDING;
1262 }
1263 break;
1264 case WHPX_BP_SET_PENDING:
1265 if (!resuming) {
1266 state = WHPX_BP_CLEARED;
1267 }
1268 break;
1269 case WHPX_BP_SET:
1270 if (!resuming) {
1271 state = WHPX_BP_CLEAR_PENDING;
1272 }
1273 break;
1274 case WHPX_BP_CLEAR_PENDING:
1275 if (resuming) {
1276 state = WHPX_BP_SET;
1277 }
1278 break;
1279 }
1280
1281 if (state == WHPX_BP_SET_PENDING) {
1282 /* Remember the original instruction. */
1283 rc = cpu_memory_rw_debug(cpu,
1284 breakpoints->data[i].address,
1285 &breakpoints->data[i].original_instruction,
1286 1,
1287 false);
1288
1289 if (!rc) {
1290 /* Write the breakpoint instruction. */
1291 rc = cpu_memory_rw_debug(cpu,
1292 breakpoints->data[i].address,
1293 (void *)&whpx_breakpoint_instruction,
1294 1,
1295 true);
1296 }
1297
1298 if (!rc) {
1299 state = WHPX_BP_SET;
1300 }
1301
1302 }
1303
1304 if (state == WHPX_BP_CLEAR_PENDING) {
1305 /* Restore the original instruction. */
1306 rc = cpu_memory_rw_debug(cpu,
1307 breakpoints->data[i].address,
1308 &breakpoints->data[i].original_instruction,
1309 1,
1310 true);
1311
1312 if (!rc) {
1313 state = WHPX_BP_CLEARED;
1314 }
1315 }
1316
1317 breakpoints->data[i].state = state;
1318 }
1319}
1320
1321/*
1322 * This function is called when the a VCPU is about to start and no other
1323 * VCPUs have been started so far. Since the VCPU start order could be
1324 * arbitrary, it doesn't have to be VCPU#0.
1325 *
1326 * It is used to commit the breakpoints into memory, and configure WHPX
1327 * to intercept debug exceptions.
1328 *
1329 * Note that whpx_set_exception_exit_bitmap() cannot be called if one or
1330 * more VCPUs are already running, so this is the best place to do it.
1331 */
1332static int whpx_first_vcpu_starting(CPUState *cpu)
1333{
1334 struct whpx_state *whpx = &whpx_global;
1335 HRESULT hr;
1336
1337 g_assert(qemu_mutex_iothread_locked());
1338
1339 if (!QTAILQ_EMPTY(&cpu->breakpoints) ||
1340 (whpx->breakpoints.breakpoints &&
1341 whpx->breakpoints.breakpoints->used)) {
1342 CPUBreakpoint *bp;
1343 int i = 0;
1344 bool update_pending = false;
1345
1346 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
1347 if (i >= whpx->breakpoints.original_address_count ||
1348 bp->pc != whpx->breakpoints.original_addresses[i]) {
1349 update_pending = true;
1350 }
1351
1352 i++;
1353 }
1354
1355 if (i != whpx->breakpoints.original_address_count) {
1356 update_pending = true;
1357 }
1358
1359 if (update_pending) {
1360 /*
1361 * The CPU breakpoints have changed since the last call to
1362 * whpx_translate_cpu_breakpoints(). WHPX breakpoints must
1363 * now be recomputed.
1364 */
1365 whpx_translate_cpu_breakpoints(&whpx->breakpoints, cpu, i);
1366 }
1367
1368 /* Actually insert the breakpoints into the memory. */
1369 whpx_apply_breakpoints(whpx->breakpoints.breakpoints, cpu, true);
1370 }
1371
1372 uint64_t exception_mask;
1373 if (whpx->step_pending ||
1374 (whpx->breakpoints.breakpoints &&
1375 whpx->breakpoints.breakpoints->used)) {
1376 /*
1377 * We are either attempting to single-step one or more CPUs, or
1378 * have one or more breakpoints enabled. Both require intercepting
1379 * the WHvX64ExceptionTypeBreakpointTrap exception.
1380 */
1381
1382 exception_mask = 1UL << WHvX64ExceptionTypeDebugTrapOrFault;
1383 } else {
1384 /* Let the guest handle all exceptions. */
1385 exception_mask = 0;
1386 }
1387
1388 hr = whpx_set_exception_exit_bitmap(exception_mask);
1389 if (!SUCCEEDED(hr)) {
1390 error_report("WHPX: Failed to update exception exit mask,"
1391 "hr=%08lx.", hr);
1392 return 1;
1393 }
1394
1395 return 0;
1396}
1397
1398/*
1399 * This function is called when the last VCPU has finished running.
1400 * It is used to remove any previously set breakpoints from memory.
1401 */
1402static int whpx_last_vcpu_stopping(CPUState *cpu)
1403{
1404 whpx_apply_breakpoints(whpx_global.breakpoints.breakpoints, cpu, false);
1405 return 0;
1406}
1407
1408/* Returns the address of the next instruction that is about to be executed. */
1409static vaddr whpx_vcpu_get_pc(CPUState *cpu, bool exit_context_valid)
1410{
1411 if (cpu->vcpu_dirty) {
1412 /* The CPU registers have been modified by other parts of QEMU. */
1413 CPUArchState *env = (CPUArchState *)(cpu->env_ptr);
1414 return env->eip;
1415 } else if (exit_context_valid) {
1416 /*
1417 * The CPU registers have not been modified by neither other parts
1418 * of QEMU, nor this port by calling WHvSetVirtualProcessorRegisters().
1419 * This is the most common case.
1420 */
1421 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
1422 return vcpu->exit_ctx.VpContext.Rip;
1423 } else {
1424 /*
1425 * The CPU registers have been modified by a call to
1426 * WHvSetVirtualProcessorRegisters() and must be re-queried from
1427 * the target.
1428 */
1429 WHV_REGISTER_VALUE reg_value;
1430 WHV_REGISTER_NAME reg_name = WHvX64RegisterRip;
1431 HRESULT hr;
1432 struct whpx_state *whpx = &whpx_global;
1433
1434 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
1435 whpx->partition,
1436 cpu->cpu_index,
1437 &reg_name,
1438 1,
1439 &reg_value);
1440
1441 if (FAILED(hr)) {
1442 error_report("WHPX: Failed to get PC, hr=%08lx", hr);
1443 return 0;
1444 }
1445
1446 return reg_value.Reg64;
1447 }
1448}
1449
812d49f2
JTV
1450static int whpx_handle_halt(CPUState *cpu)
1451{
95e862d7 1452 CPUX86State *env = cpu->env_ptr;
812d49f2
JTV
1453 int ret = 0;
1454
1455 qemu_mutex_lock_iothread();
1456 if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
1457 (env->eflags & IF_MASK)) &&
1458 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
1459 cpu->exception_index = EXCP_HLT;
1460 cpu->halted = true;
1461 ret = 1;
1462 }
1463 qemu_mutex_unlock_iothread();
1464
1465 return ret;
1466}
1467
1468static void whpx_vcpu_pre_run(CPUState *cpu)
1469{
1470 HRESULT hr;
1471 struct whpx_state *whpx = &whpx_global;
1472 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
95e862d7 1473 CPUX86State *env = cpu->env_ptr;
812d49f2
JTV
1474 X86CPU *x86_cpu = X86_CPU(cpu);
1475 int irq;
2bf3e74d 1476 uint8_t tpr;
c3942bf2 1477 WHV_X64_PENDING_INTERRUPTION_REGISTER new_int;
812d49f2 1478 UINT32 reg_count = 0;
c3942bf2 1479 WHV_REGISTER_VALUE reg_values[3];
812d49f2
JTV
1480 WHV_REGISTER_NAME reg_names[3];
1481
c3942bf2
LP
1482 memset(&new_int, 0, sizeof(new_int));
1483 memset(reg_values, 0, sizeof(reg_values));
1484
812d49f2
JTV
1485 qemu_mutex_lock_iothread();
1486
1487 /* Inject NMI */
4e286099 1488 if (!vcpu->interruption_pending &&
812d49f2
JTV
1489 cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) {
1490 if (cpu->interrupt_request & CPU_INTERRUPT_NMI) {
1491 cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
1492 vcpu->interruptable = false;
1493 new_int.InterruptionType = WHvX64PendingNmi;
1494 new_int.InterruptionPending = 1;
1495 new_int.InterruptionVector = 2;
1496 }
1497 if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
812d49f2 1498 cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
812d49f2
JTV
1499 }
1500 }
1501
1502 /*
1503 * Force the VCPU out of its inner loop to process any INIT requests or
1504 * commit pending TPR access.
1505 */
1506 if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
1507 if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
1508 !(env->hflags & HF_SMM_MASK)) {
1509 cpu->exit_request = 1;
1510 }
1511 if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
1512 cpu->exit_request = 1;
1513 }
1514 }
1515
1516 /* Get pending hard interruption or replay one that was overwritten */
faf20793
SM
1517 if (!whpx_apic_in_platform()) {
1518 if (!vcpu->interruption_pending &&
1519 vcpu->interruptable && (env->eflags & IF_MASK)) {
1520 assert(!new_int.InterruptionPending);
1521 if (cpu->interrupt_request & CPU_INTERRUPT_HARD) {
1522 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
1523 irq = cpu_get_pic_interrupt(env);
1524 if (irq >= 0) {
1525 new_int.InterruptionType = WHvX64PendingInterrupt;
1526 new_int.InterruptionPending = 1;
1527 new_int.InterruptionVector = irq;
1528 }
812d49f2
JTV
1529 }
1530 }
812d49f2 1531
faf20793
SM
1532 /* Setup interrupt state if new one was prepared */
1533 if (new_int.InterruptionPending) {
1534 reg_values[reg_count].PendingInterruption = new_int;
1535 reg_names[reg_count] = WHvRegisterPendingInterruption;
1536 reg_count += 1;
1537 }
1538 } else if (vcpu->ready_for_pic_interrupt &&
1539 (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
1540 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
1541 irq = cpu_get_pic_interrupt(env);
1542 if (irq >= 0) {
1543 reg_names[reg_count] = WHvRegisterPendingEvent;
1544 reg_values[reg_count].ExtIntEvent = (WHV_X64_PENDING_EXT_INT_EVENT)
1545 {
1546 .EventPending = 1,
1547 .EventType = WHvX64PendingEventExtInt,
1548 .Vector = irq,
1549 };
1550 reg_count += 1;
1551 }
1552 }
812d49f2
JTV
1553
1554 /* Sync the TPR to the CR8 if was modified during the intercept */
f000bc74 1555 tpr = whpx_apic_tpr_to_cr8(cpu_get_apic_tpr(x86_cpu->apic_state));
2bf3e74d
JTV
1556 if (tpr != vcpu->tpr) {
1557 vcpu->tpr = tpr;
1558 reg_values[reg_count].Reg64 = tpr;
812d49f2
JTV
1559 cpu->exit_request = 1;
1560 reg_names[reg_count] = WHvX64RegisterCr8;
1561 reg_count += 1;
1562 }
1563
1564 /* Update the state of the interrupt delivery notification */
eb1fe944
JTV
1565 if (!vcpu->window_registered &&
1566 cpu->interrupt_request & CPU_INTERRUPT_HARD) {
faf20793
SM
1567 reg_values[reg_count].DeliverabilityNotifications =
1568 (WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER) {
1569 .InterruptNotification = 1
1570 };
eb1fe944 1571 vcpu->window_registered = 1;
812d49f2
JTV
1572 reg_names[reg_count] = WHvX64RegisterDeliverabilityNotifications;
1573 reg_count += 1;
1574 }
1575
1576 qemu_mutex_unlock_iothread();
faf20793 1577 vcpu->ready_for_pic_interrupt = false;
812d49f2
JTV
1578
1579 if (reg_count) {
327fccb2
LP
1580 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1581 whpx->partition, cpu->cpu_index,
1582 reg_names, reg_count, reg_values);
812d49f2
JTV
1583 if (FAILED(hr)) {
1584 error_report("WHPX: Failed to set interrupt state registers,"
1585 " hr=%08lx", hr);
812d49f2
JTV
1586 }
1587 }
1588
1589 return;
1590}
1591
1592static void whpx_vcpu_post_run(CPUState *cpu)
1593{
812d49f2 1594 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
95e862d7 1595 CPUX86State *env = cpu->env_ptr;
812d49f2 1596 X86CPU *x86_cpu = X86_CPU(cpu);
812d49f2 1597
4e286099 1598 env->eflags = vcpu->exit_ctx.VpContext.Rflags;
812d49f2 1599
4e286099
JTV
1600 uint64_t tpr = vcpu->exit_ctx.VpContext.Cr8;
1601 if (vcpu->tpr != tpr) {
1602 vcpu->tpr = tpr;
812d49f2 1603 qemu_mutex_lock_iothread();
f000bc74 1604 cpu_set_apic_tpr(x86_cpu->apic_state, whpx_cr8_to_apic_tpr(vcpu->tpr));
812d49f2
JTV
1605 qemu_mutex_unlock_iothread();
1606 }
1607
4e286099
JTV
1608 vcpu->interruption_pending =
1609 vcpu->exit_ctx.VpContext.ExecutionState.InterruptionPending;
812d49f2 1610
4e286099
JTV
1611 vcpu->interruptable =
1612 !vcpu->exit_ctx.VpContext.ExecutionState.InterruptShadow;
812d49f2
JTV
1613
1614 return;
1615}
1616
1617static void whpx_vcpu_process_async_events(CPUState *cpu)
1618{
95e862d7 1619 CPUX86State *env = cpu->env_ptr;
812d49f2
JTV
1620 X86CPU *x86_cpu = X86_CPU(cpu);
1621 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
1622
1623 if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
1624 !(env->hflags & HF_SMM_MASK)) {
4df28c93 1625 whpx_cpu_synchronize_state(cpu);
812d49f2 1626 do_cpu_init(x86_cpu);
812d49f2
JTV
1627 vcpu->interruptable = true;
1628 }
1629
1630 if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
1631 cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
1632 apic_poll_irq(x86_cpu->apic_state);
1633 }
1634
1635 if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
1636 (env->eflags & IF_MASK)) ||
1637 (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
1638 cpu->halted = false;
1639 }
1640
1641 if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
4df28c93 1642 whpx_cpu_synchronize_state(cpu);
812d49f2
JTV
1643 do_cpu_sipi(x86_cpu);
1644 }
1645
1646 if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
1647 cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
4df28c93 1648 whpx_cpu_synchronize_state(cpu);
812d49f2
JTV
1649 apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
1650 env->tpr_access_type);
1651 }
1652
1653 return;
1654}
1655
1656static int whpx_vcpu_run(CPUState *cpu)
1657{
1658 HRESULT hr;
1659 struct whpx_state *whpx = &whpx_global;
1660 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
d7482ffe
IS
1661 struct whpx_breakpoint *stepped_over_bp = NULL;
1662 WhpxStepMode exclusive_step_mode = WHPX_STEP_NONE;
812d49f2
JTV
1663 int ret;
1664
d7482ffe
IS
1665 g_assert(qemu_mutex_iothread_locked());
1666
1667 if (whpx->running_cpus++ == 0) {
1668 /* Insert breakpoints into memory, update exception exit bitmap. */
1669 ret = whpx_first_vcpu_starting(cpu);
1670 if (ret != 0) {
1671 return ret;
1672 }
1673 }
1674
1675 if (whpx->breakpoints.breakpoints &&
1676 whpx->breakpoints.breakpoints->used > 0)
1677 {
1678 uint64_t pc = whpx_vcpu_get_pc(cpu, true);
1679 stepped_over_bp = whpx_lookup_breakpoint_by_addr(pc);
1680 if (stepped_over_bp && stepped_over_bp->state != WHPX_BP_SET) {
1681 stepped_over_bp = NULL;
1682 }
1683
1684 if (stepped_over_bp) {
1685 /*
1686 * We are trying to run the instruction overwritten by an active
1687 * breakpoint. We will temporarily disable the breakpoint, suspend
1688 * other CPUs, and step over the instruction.
1689 */
1690 exclusive_step_mode = WHPX_STEP_EXCLUSIVE;
1691 }
1692 }
1693
1694 if (exclusive_step_mode == WHPX_STEP_NONE) {
1695 whpx_vcpu_process_async_events(cpu);
1696 if (cpu->halted && !whpx_apic_in_platform()) {
1697 cpu->exception_index = EXCP_HLT;
1698 qatomic_set(&cpu->exit_request, false);
1699 return 0;
1700 }
812d49f2
JTV
1701 }
1702
1703 qemu_mutex_unlock_iothread();
d7482ffe
IS
1704
1705 if (exclusive_step_mode != WHPX_STEP_NONE) {
1706 start_exclusive();
1707 g_assert(cpu == current_cpu);
1708 g_assert(!cpu->running);
1709 cpu->running = true;
1710
1711 hr = whpx_set_exception_exit_bitmap(
1712 1UL << WHvX64ExceptionTypeDebugTrapOrFault);
1713 if (!SUCCEEDED(hr)) {
1714 error_report("WHPX: Failed to update exception exit mask, "
1715 "hr=%08lx.", hr);
1716 return 1;
1717 }
1718
1719 if (stepped_over_bp) {
1720 /* Temporarily disable the triggered breakpoint. */
1721 cpu_memory_rw_debug(cpu,
1722 stepped_over_bp->address,
1723 &stepped_over_bp->original_instruction,
1724 1,
1725 true);
1726 }
1727 } else {
1728 cpu_exec_start(cpu);
1729 }
812d49f2
JTV
1730
1731 do {
1732 if (cpu->vcpu_dirty) {
6785e767 1733 whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE);
812d49f2
JTV
1734 cpu->vcpu_dirty = false;
1735 }
1736
d7482ffe
IS
1737 if (exclusive_step_mode == WHPX_STEP_NONE) {
1738 whpx_vcpu_pre_run(cpu);
1739
1740 if (qatomic_read(&cpu->exit_request)) {
1741 whpx_vcpu_kick(cpu);
1742 }
1743 }
812d49f2 1744
d7482ffe
IS
1745 if (exclusive_step_mode != WHPX_STEP_NONE || cpu->singlestep_enabled) {
1746 whpx_vcpu_configure_single_stepping(cpu, true, NULL);
812d49f2
JTV
1747 }
1748
327fccb2
LP
1749 hr = whp_dispatch.WHvRunVirtualProcessor(
1750 whpx->partition, cpu->cpu_index,
1751 &vcpu->exit_ctx, sizeof(vcpu->exit_ctx));
812d49f2
JTV
1752
1753 if (FAILED(hr)) {
1754 error_report("WHPX: Failed to exec a virtual processor,"
1755 " hr=%08lx", hr);
1756 ret = -1;
1757 break;
1758 }
1759
d7482ffe
IS
1760 if (exclusive_step_mode != WHPX_STEP_NONE || cpu->singlestep_enabled) {
1761 whpx_vcpu_configure_single_stepping(cpu,
1762 false,
1763 &vcpu->exit_ctx.VpContext.Rflags);
1764 }
1765
812d49f2
JTV
1766 whpx_vcpu_post_run(cpu);
1767
1768 switch (vcpu->exit_ctx.ExitReason) {
1769 case WHvRunVpExitReasonMemoryAccess:
1770 ret = whpx_handle_mmio(cpu, &vcpu->exit_ctx.MemoryAccess);
1771 break;
1772
1773 case WHvRunVpExitReasonX64IoPortAccess:
1774 ret = whpx_handle_portio(cpu, &vcpu->exit_ctx.IoPortAccess);
1775 break;
1776
1777 case WHvRunVpExitReasonX64InterruptWindow:
faf20793 1778 vcpu->ready_for_pic_interrupt = 1;
812d49f2 1779 vcpu->window_registered = 0;
e7ca549f 1780 ret = 0;
812d49f2
JTV
1781 break;
1782
faf20793
SM
1783 case WHvRunVpExitReasonX64ApicEoi:
1784 assert(whpx_apic_in_platform());
1785 ioapic_eoi_broadcast(vcpu->exit_ctx.ApicEoi.InterruptVector);
1786 break;
1787
812d49f2 1788 case WHvRunVpExitReasonX64Halt:
d7482ffe
IS
1789 /*
1790 * WARNING: as of build 19043.1526 (21H1), this exit reason is no
1791 * longer used.
1792 */
812d49f2
JTV
1793 ret = whpx_handle_halt(cpu);
1794 break;
1795
faf20793
SM
1796 case WHvRunVpExitReasonX64ApicInitSipiTrap: {
1797 WHV_INTERRUPT_CONTROL ipi = {0};
1798 uint64_t icr = vcpu->exit_ctx.ApicInitSipi.ApicIcr;
1799 uint32_t delivery_mode =
1800 (icr & APIC_ICR_DELIV_MOD) >> APIC_ICR_DELIV_MOD_SHIFT;
1801 int dest_shorthand =
1802 (icr & APIC_ICR_DEST_SHORT) >> APIC_ICR_DEST_SHORT_SHIFT;
1803 bool broadcast = false;
1804 bool include_self = false;
1805 uint32_t i;
1806
1807 /* We only registered for INIT and SIPI exits. */
1808 if ((delivery_mode != APIC_DM_INIT) &&
1809 (delivery_mode != APIC_DM_SIPI)) {
1810 error_report(
1811 "WHPX: Unexpected APIC exit that is not a INIT or SIPI");
1812 break;
1813 }
1814
1815 if (delivery_mode == APIC_DM_INIT) {
1816 ipi.Type = WHvX64InterruptTypeInit;
1817 } else {
1818 ipi.Type = WHvX64InterruptTypeSipi;
1819 }
1820
1821 ipi.DestinationMode =
1822 ((icr & APIC_ICR_DEST_MOD) >> APIC_ICR_DEST_MOD_SHIFT) ?
1823 WHvX64InterruptDestinationModeLogical :
1824 WHvX64InterruptDestinationModePhysical;
1825
1826 ipi.TriggerMode =
1827 ((icr & APIC_ICR_TRIGGER_MOD) >> APIC_ICR_TRIGGER_MOD_SHIFT) ?
1828 WHvX64InterruptTriggerModeLevel :
1829 WHvX64InterruptTriggerModeEdge;
1830
1831 ipi.Vector = icr & APIC_VECTOR_MASK;
1832 switch (dest_shorthand) {
1833 /* no shorthand. Bits 56-63 contain the destination. */
1834 case 0:
1835 ipi.Destination = (icr >> 56) & APIC_VECTOR_MASK;
1836 hr = whp_dispatch.WHvRequestInterrupt(whpx->partition,
1837 &ipi, sizeof(ipi));
1838 if (FAILED(hr)) {
1839 error_report("WHPX: Failed to request interrupt hr=%08lx",
1840 hr);
1841 }
1842
1843 break;
1844
1845 /* self */
1846 case 1:
1847 include_self = true;
1848 break;
1849
1850 /* broadcast, including self */
1851 case 2:
1852 broadcast = true;
1853 include_self = true;
1854 break;
1855
1856 /* broadcast, excluding self */
1857 case 3:
1858 broadcast = true;
1859 break;
1860 }
1861
1862 if (!broadcast && !include_self) {
1863 break;
1864 }
1865
1866 for (i = 0; i <= max_vcpu_index; i++) {
1867 if (i == cpu->cpu_index && !include_self) {
1868 continue;
1869 }
1870
1871 /*
1872 * Assuming that APIC Ids are identity mapped since
1873 * WHvX64RegisterApicId & WHvX64RegisterInitialApicId registers
1874 * are not handled yet and the hypervisor doesn't allow the
1875 * guest to modify the APIC ID.
1876 */
1877 ipi.Destination = i;
1878 hr = whp_dispatch.WHvRequestInterrupt(whpx->partition,
1879 &ipi, sizeof(ipi));
1880 if (FAILED(hr)) {
1881 error_report(
1882 "WHPX: Failed to request SIPI for %d, hr=%08lx",
1883 i, hr);
1884 }
1885 }
1886
1887 break;
1888 }
1889
812d49f2 1890 case WHvRunVpExitReasonCanceled:
d7482ffe
IS
1891 if (exclusive_step_mode != WHPX_STEP_NONE) {
1892 /*
1893 * We are trying to step over a single instruction, and
1894 * likely got a request to stop from another thread.
1895 * Delay it until we are done stepping
1896 * over.
1897 */
1898 ret = 0;
1899 } else {
1900 cpu->exception_index = EXCP_INTERRUPT;
1901 ret = 1;
1902 }
812d49f2 1903 break;
e7ca549f
JTV
1904 case WHvRunVpExitReasonX64MsrAccess: {
1905 WHV_REGISTER_VALUE reg_values[3] = {0};
1906 WHV_REGISTER_NAME reg_names[3];
1907 UINT32 reg_count;
1908
1909 reg_names[0] = WHvX64RegisterRip;
1910 reg_names[1] = WHvX64RegisterRax;
1911 reg_names[2] = WHvX64RegisterRdx;
1912
1913 reg_values[0].Reg64 =
1914 vcpu->exit_ctx.VpContext.Rip +
1915 vcpu->exit_ctx.VpContext.InstructionLength;
1916
1917 /*
1918 * For all unsupported MSR access we:
1919 * ignore writes
1920 * return 0 on read.
1921 */
1922 reg_count = vcpu->exit_ctx.MsrAccess.AccessInfo.IsWrite ?
1923 1 : 3;
1924
1925 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1926 whpx->partition,
1927 cpu->cpu_index,
1928 reg_names, reg_count,
1929 reg_values);
1930
1931 if (FAILED(hr)) {
1932 error_report("WHPX: Failed to set MsrAccess state "
1933 " registers, hr=%08lx", hr);
1934 }
1935 ret = 0;
1936 break;
1937 }
7becac84 1938 case WHvRunVpExitReasonX64Cpuid: {
c3942bf2 1939 WHV_REGISTER_VALUE reg_values[5];
7becac84
JTV
1940 WHV_REGISTER_NAME reg_names[5];
1941 UINT32 reg_count = 5;
dadf3011
SM
1942 UINT64 cpuid_fn, rip = 0, rax = 0, rcx = 0, rdx = 0, rbx = 0;
1943 X86CPU *x86_cpu = X86_CPU(cpu);
1944 CPUX86State *env = &x86_cpu->env;
7becac84 1945
c3942bf2
LP
1946 memset(reg_values, 0, sizeof(reg_values));
1947
7becac84
JTV
1948 rip = vcpu->exit_ctx.VpContext.Rip +
1949 vcpu->exit_ctx.VpContext.InstructionLength;
dadf3011
SM
1950 cpuid_fn = vcpu->exit_ctx.CpuidAccess.Rax;
1951
1952 /*
1953 * Ideally, these should be supplied to the hypervisor during VCPU
1954 * initialization and it should be able to satisfy this request.
1955 * But, currently, WHPX doesn't support setting CPUID values in the
1956 * hypervisor once the partition has been setup, which is too late
1957 * since VCPUs are realized later. For now, use the values from
1958 * QEMU to satisfy these requests, until WHPX adds support for
1959 * being able to set these values in the hypervisor at runtime.
1960 */
1961 cpu_x86_cpuid(env, cpuid_fn, 0, (UINT32 *)&rax, (UINT32 *)&rbx,
1962 (UINT32 *)&rcx, (UINT32 *)&rdx);
1963 switch (cpuid_fn) {
5c8e1e83
SM
1964 case 0x40000000:
1965 /* Expose the vmware cpu frequency cpuid leaf */
1966 rax = 0x40000010;
1967 rbx = rcx = rdx = 0;
1968 break;
1969
1970 case 0x40000010:
1971 rax = env->tsc_khz;
1972 rbx = env->apic_bus_freq / 1000; /* Hz to KHz */
1973 rcx = rdx = 0;
1974 break;
1975
e1753a7e 1976 case 0x80000001:
e1753a7e 1977 /* Remove any support of OSVW */
dadf3011 1978 rcx &= ~CPUID_EXT3_OSVW;
7becac84 1979 break;
7becac84
JTV
1980 }
1981
1982 reg_names[0] = WHvX64RegisterRip;
1983 reg_names[1] = WHvX64RegisterRax;
1984 reg_names[2] = WHvX64RegisterRcx;
1985 reg_names[3] = WHvX64RegisterRdx;
1986 reg_names[4] = WHvX64RegisterRbx;
1987
1988 reg_values[0].Reg64 = rip;
1989 reg_values[1].Reg64 = rax;
1990 reg_values[2].Reg64 = rcx;
1991 reg_values[3].Reg64 = rdx;
1992 reg_values[4].Reg64 = rbx;
1993
327fccb2
LP
1994 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1995 whpx->partition, cpu->cpu_index,
1996 reg_names,
1997 reg_count,
1998 reg_values);
7becac84
JTV
1999
2000 if (FAILED(hr)) {
2001 error_report("WHPX: Failed to set CpuidAccess state registers,"
2002 " hr=%08lx", hr);
2003 }
2004 ret = 0;
2005 break;
2006 }
d7482ffe
IS
2007 case WHvRunVpExitReasonException:
2008 whpx_get_registers(cpu);
2009
2010 if ((vcpu->exit_ctx.VpException.ExceptionType ==
2011 WHvX64ExceptionTypeDebugTrapOrFault) &&
2012 (vcpu->exit_ctx.VpException.InstructionByteCount >= 1) &&
2013 (vcpu->exit_ctx.VpException.InstructionBytes[0] ==
2014 whpx_breakpoint_instruction)) {
2015 /* Stopped at a software breakpoint. */
2016 cpu->exception_index = EXCP_DEBUG;
2017 } else if ((vcpu->exit_ctx.VpException.ExceptionType ==
2018 WHvX64ExceptionTypeDebugTrapOrFault) &&
2019 !cpu->singlestep_enabled) {
2020 /*
2021 * Just finished stepping over a breakpoint, but the
2022 * gdb does not expect us to do single-stepping.
2023 * Don't do anything special.
2024 */
2025 cpu->exception_index = EXCP_INTERRUPT;
2026 } else {
2027 /* Another exception or debug event. Report it to GDB. */
2028 cpu->exception_index = EXCP_DEBUG;
2029 }
2030
2031 ret = 1;
2032 break;
812d49f2
JTV
2033 case WHvRunVpExitReasonNone:
2034 case WHvRunVpExitReasonUnrecoverableException:
2035 case WHvRunVpExitReasonInvalidVpRegisterValue:
2036 case WHvRunVpExitReasonUnsupportedFeature:
812d49f2
JTV
2037 default:
2038 error_report("WHPX: Unexpected VP exit code %d",
2039 vcpu->exit_ctx.ExitReason);
2040 whpx_get_registers(cpu);
2041 qemu_mutex_lock_iothread();
2042 qemu_system_guest_panicked(cpu_get_crash_info(cpu));
2043 qemu_mutex_unlock_iothread();
2044 break;
2045 }
2046
2047 } while (!ret);
2048
d7482ffe
IS
2049 if (stepped_over_bp) {
2050 /* Restore the breakpoint we stepped over */
2051 cpu_memory_rw_debug(cpu,
2052 stepped_over_bp->address,
2053 (void *)&whpx_breakpoint_instruction,
2054 1,
2055 true);
2056 }
2057
2058 if (exclusive_step_mode != WHPX_STEP_NONE) {
2059 g_assert(cpu_in_exclusive_context(cpu));
2060 cpu->running = false;
2061 end_exclusive();
2062
2063 exclusive_step_mode = WHPX_STEP_NONE;
2064 } else {
2065 cpu_exec_end(cpu);
2066 }
2067
812d49f2
JTV
2068 qemu_mutex_lock_iothread();
2069 current_cpu = cpu;
2070
d7482ffe
IS
2071 if (--whpx->running_cpus == 0) {
2072 whpx_last_vcpu_stopping(cpu);
2073 }
2074
d73415a3 2075 qatomic_set(&cpu->exit_request, false);
812d49f2
JTV
2076
2077 return ret < 0;
2078}
2079
2080static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
2081{
4df28c93
SM
2082 if (!cpu->vcpu_dirty) {
2083 whpx_get_registers(cpu);
2084 cpu->vcpu_dirty = true;
2085 }
812d49f2
JTV
2086}
2087
2088static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu,
2089 run_on_cpu_data arg)
2090{
6785e767 2091 whpx_set_registers(cpu, WHPX_SET_RESET_STATE);
812d49f2
JTV
2092 cpu->vcpu_dirty = false;
2093}
2094
2095static void do_whpx_cpu_synchronize_post_init(CPUState *cpu,
2096 run_on_cpu_data arg)
2097{
6785e767 2098 whpx_set_registers(cpu, WHPX_SET_FULL_STATE);
812d49f2
JTV
2099 cpu->vcpu_dirty = false;
2100}
2101
2102static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu,
2103 run_on_cpu_data arg)
2104{
2105 cpu->vcpu_dirty = true;
2106}
2107
2108/*
2109 * CPU support.
2110 */
2111
2112void whpx_cpu_synchronize_state(CPUState *cpu)
2113{
2114 if (!cpu->vcpu_dirty) {
2115 run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL);
2116 }
2117}
2118
2119void whpx_cpu_synchronize_post_reset(CPUState *cpu)
2120{
2121 run_on_cpu(cpu, do_whpx_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
2122}
2123
2124void whpx_cpu_synchronize_post_init(CPUState *cpu)
2125{
2126 run_on_cpu(cpu, do_whpx_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
2127}
2128
2129void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu)
2130{
2131 run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
2132}
2133
d7482ffe
IS
2134void whpx_cpu_synchronize_pre_resume(bool step_pending)
2135{
2136 whpx_global.step_pending = step_pending;
2137}
2138
812d49f2
JTV
2139/*
2140 * Vcpu support.
2141 */
2142
2143static Error *whpx_migration_blocker;
2144
538f0497 2145static void whpx_cpu_update_state(void *opaque, bool running, RunState state)
6785e767
SM
2146{
2147 CPUX86State *env = opaque;
2148
2149 if (running) {
2150 env->tsc_valid = false;
2151 }
2152}
2153
812d49f2
JTV
2154int whpx_init_vcpu(CPUState *cpu)
2155{
2156 HRESULT hr;
2157 struct whpx_state *whpx = &whpx_global;
5c8e1e83 2158 struct whpx_vcpu *vcpu = NULL;
812d49f2 2159 Error *local_error = NULL;
95e862d7 2160 CPUX86State *env = cpu->env_ptr;
5c8e1e83
SM
2161 X86CPU *x86_cpu = X86_CPU(cpu);
2162 UINT64 freq = 0;
2163 int ret;
812d49f2
JTV
2164
2165 /* Add migration blockers for all unsupported features of the
2166 * Windows Hypervisor Platform
2167 */
2168 if (whpx_migration_blocker == NULL) {
2169 error_setg(&whpx_migration_blocker,
2170 "State blocked due to non-migratable CPUID feature support,"
2171 "dirty memory tracking support, and XSAVE/XRSTOR support");
2172
436c831a 2173 if (migrate_add_blocker(whpx_migration_blocker, &local_error) < 0) {
812d49f2 2174 error_report_err(local_error);
327fccb2 2175 error_free(whpx_migration_blocker);
5c8e1e83
SM
2176 ret = -EINVAL;
2177 goto error;
812d49f2
JTV
2178 }
2179 }
2180
b21e2380 2181 vcpu = g_new0(struct whpx_vcpu, 1);
812d49f2
JTV
2182
2183 if (!vcpu) {
2184 error_report("WHPX: Failed to allocte VCPU context.");
5c8e1e83
SM
2185 ret = -ENOMEM;
2186 goto error;
812d49f2
JTV
2187 }
2188
327fccb2
LP
2189 hr = whp_dispatch.WHvEmulatorCreateEmulator(
2190 &whpx_emu_callbacks,
2191 &vcpu->emulator);
812d49f2
JTV
2192 if (FAILED(hr)) {
2193 error_report("WHPX: Failed to setup instruction completion support,"
2194 " hr=%08lx", hr);
5c8e1e83
SM
2195 ret = -EINVAL;
2196 goto error;
812d49f2
JTV
2197 }
2198
327fccb2
LP
2199 hr = whp_dispatch.WHvCreateVirtualProcessor(
2200 whpx->partition, cpu->cpu_index, 0);
812d49f2
JTV
2201 if (FAILED(hr)) {
2202 error_report("WHPX: Failed to create a virtual processor,"
2203 " hr=%08lx", hr);
327fccb2 2204 whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
5c8e1e83
SM
2205 ret = -EINVAL;
2206 goto error;
812d49f2
JTV
2207 }
2208
5c8e1e83
SM
2209 /*
2210 * vcpu's TSC frequency is either specified by user, or use the value
2211 * provided by Hyper-V if the former is not present. In the latter case, we
2212 * query it from Hyper-V and record in env->tsc_khz, so that vcpu's TSC
2213 * frequency can be migrated later via this field.
2214 */
2215 if (!env->tsc_khz) {
2216 hr = whp_dispatch.WHvGetCapability(
2217 WHvCapabilityCodeProcessorClockFrequency, &freq, sizeof(freq),
2218 NULL);
2219 if (hr != WHV_E_UNKNOWN_CAPABILITY) {
2220 if (FAILED(hr)) {
2221 printf("WHPX: Failed to query tsc frequency, hr=0x%08lx\n", hr);
2222 } else {
2223 env->tsc_khz = freq / 1000; /* Hz to KHz */
2224 }
2225 }
2226 }
812d49f2 2227
5c8e1e83
SM
2228 env->apic_bus_freq = HYPERV_APIC_BUS_FREQUENCY;
2229 hr = whp_dispatch.WHvGetCapability(
2230 WHvCapabilityCodeInterruptClockFrequency, &freq, sizeof(freq), NULL);
2231 if (hr != WHV_E_UNKNOWN_CAPABILITY) {
2232 if (FAILED(hr)) {
2233 printf("WHPX: Failed to query apic bus frequency hr=0x%08lx\n", hr);
2234 } else {
2235 env->apic_bus_freq = freq;
2236 }
2237 }
2238
2239 /*
2240 * If the vmware cpuid frequency leaf option is set, and we have a valid
2241 * tsc value, trap the corresponding cpuid's.
2242 */
2243 if (x86_cpu->vmware_cpuid_freq && env->tsc_khz) {
2244 UINT32 cpuidExitList[] = {1, 0x80000001, 0x40000000, 0x40000010};
2245
2246 hr = whp_dispatch.WHvSetPartitionProperty(
2247 whpx->partition,
2248 WHvPartitionPropertyCodeCpuidExitList,
2249 cpuidExitList,
2250 RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
2251
2252 if (FAILED(hr)) {
2253 error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
2254 hr);
2255 ret = -EINVAL;
2256 goto error;
2257 }
2258 }
2259
2260 vcpu->interruptable = true;
812d49f2
JTV
2261 cpu->vcpu_dirty = true;
2262 cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu;
faf20793 2263 max_vcpu_index = max(max_vcpu_index, cpu->cpu_index);
6785e767 2264 qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr);
812d49f2
JTV
2265
2266 return 0;
5c8e1e83
SM
2267
2268error:
2269 g_free(vcpu);
2270
2271 return ret;
812d49f2
JTV
2272}
2273
2274int whpx_vcpu_exec(CPUState *cpu)
2275{
2276 int ret;
2277 int fatal;
2278
2279 for (;;) {
2280 if (cpu->exception_index >= EXCP_INTERRUPT) {
2281 ret = cpu->exception_index;
2282 cpu->exception_index = -1;
2283 break;
2284 }
2285
2286 fatal = whpx_vcpu_run(cpu);
2287
2288 if (fatal) {
2289 error_report("WHPX: Failed to exec a virtual processor");
2290 abort();
2291 }
2292 }
2293
2294 return ret;
2295}
2296
2297void whpx_destroy_vcpu(CPUState *cpu)
2298{
2299 struct whpx_state *whpx = &whpx_global;
2300 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
2301
327fccb2
LP
2302 whp_dispatch.WHvDeleteVirtualProcessor(whpx->partition, cpu->cpu_index);
2303 whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
812d49f2
JTV
2304 g_free(cpu->hax_vcpu);
2305 return;
2306}
2307
2308void whpx_vcpu_kick(CPUState *cpu)
2309{
2310 struct whpx_state *whpx = &whpx_global;
327fccb2
LP
2311 whp_dispatch.WHvCancelRunVirtualProcessor(
2312 whpx->partition, cpu->cpu_index, 0);
812d49f2
JTV
2313}
2314
2315/*
2316 * Memory support.
2317 */
2318
2319static void whpx_update_mapping(hwaddr start_pa, ram_addr_t size,
2320 void *host_va, int add, int rom,
2321 const char *name)
2322{
2323 struct whpx_state *whpx = &whpx_global;
2324 HRESULT hr;
2325
2326 /*
2327 if (add) {
2328 printf("WHPX: ADD PA:%p Size:%p, Host:%p, %s, '%s'\n",
2329 (void*)start_pa, (void*)size, host_va,
2330 (rom ? "ROM" : "RAM"), name);
2331 } else {
2332 printf("WHPX: DEL PA:%p Size:%p, Host:%p, '%s'\n",
2333 (void*)start_pa, (void*)size, host_va, name);
2334 }
2335 */
2336
2337 if (add) {
327fccb2
LP
2338 hr = whp_dispatch.WHvMapGpaRange(whpx->partition,
2339 host_va,
2340 start_pa,
2341 size,
2342 (WHvMapGpaRangeFlagRead |
2343 WHvMapGpaRangeFlagExecute |
2344 (rom ? 0 : WHvMapGpaRangeFlagWrite)));
812d49f2 2345 } else {
327fccb2
LP
2346 hr = whp_dispatch.WHvUnmapGpaRange(whpx->partition,
2347 start_pa,
2348 size);
812d49f2
JTV
2349 }
2350
2351 if (FAILED(hr)) {
2352 error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes,"
2353 " Host:%p, hr=%08lx",
2354 (add ? "MAP" : "UNMAP"), name,
c3942bf2 2355 (void *)(uintptr_t)start_pa, (void *)size, host_va, hr);
812d49f2
JTV
2356 }
2357}
2358
2359static void whpx_process_section(MemoryRegionSection *section, int add)
2360{
2361 MemoryRegion *mr = section->mr;
2362 hwaddr start_pa = section->offset_within_address_space;
2363 ram_addr_t size = int128_get64(section->size);
2364 unsigned int delta;
2365 uint64_t host_va;
2366
2367 if (!memory_region_is_ram(mr)) {
2368 return;
2369 }
2370
8e3b0cbb
MAL
2371 delta = qemu_real_host_page_size() - (start_pa & ~qemu_real_host_page_mask());
2372 delta &= ~qemu_real_host_page_mask();
812d49f2
JTV
2373 if (delta > size) {
2374 return;
2375 }
2376 start_pa += delta;
2377 size -= delta;
8e3b0cbb
MAL
2378 size &= qemu_real_host_page_mask();
2379 if (!size || (start_pa & ~qemu_real_host_page_mask())) {
812d49f2
JTV
2380 return;
2381 }
2382
2383 host_va = (uintptr_t)memory_region_get_ram_ptr(mr)
2384 + section->offset_within_region + delta;
2385
c3942bf2
LP
2386 whpx_update_mapping(start_pa, size, (void *)(uintptr_t)host_va, add,
2387 memory_region_is_rom(mr), mr->name);
812d49f2
JTV
2388}
2389
2390static void whpx_region_add(MemoryListener *listener,
2391 MemoryRegionSection *section)
2392{
2393 memory_region_ref(section->mr);
2394 whpx_process_section(section, 1);
2395}
2396
2397static void whpx_region_del(MemoryListener *listener,
2398 MemoryRegionSection *section)
2399{
2400 whpx_process_section(section, 0);
2401 memory_region_unref(section->mr);
2402}
2403
2404static void whpx_transaction_begin(MemoryListener *listener)
2405{
2406}
2407
2408static void whpx_transaction_commit(MemoryListener *listener)
2409{
2410}
2411
2412static void whpx_log_sync(MemoryListener *listener,
2413 MemoryRegionSection *section)
2414{
2415 MemoryRegion *mr = section->mr;
2416
2417 if (!memory_region_is_ram(mr)) {
2418 return;
2419 }
2420
2421 memory_region_set_dirty(mr, 0, int128_get64(section->size));
2422}
2423
2424static MemoryListener whpx_memory_listener = {
142518bd 2425 .name = "whpx",
812d49f2
JTV
2426 .begin = whpx_transaction_begin,
2427 .commit = whpx_transaction_commit,
2428 .region_add = whpx_region_add,
2429 .region_del = whpx_region_del,
2430 .log_sync = whpx_log_sync,
2431 .priority = 10,
2432};
2433
2434static void whpx_memory_init(void)
2435{
2436 memory_listener_register(&whpx_memory_listener, &address_space_memory);
2437}
2438
b902710f
SM
2439/*
2440 * Load the functions from the given library, using the given handle. If a
2441 * handle is provided, it is used, otherwise the library is opened. The
2442 * handle will be updated on return with the opened one.
2443 */
2444static bool load_whp_dispatch_fns(HMODULE *handle,
2445 WHPFunctionList function_list)
2446{
2447 HMODULE hLib = *handle;
2448
2449 #define WINHV_PLATFORM_DLL "WinHvPlatform.dll"
2450 #define WINHV_EMULATION_DLL "WinHvEmulation.dll"
6785e767
SM
2451 #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \
2452 whp_dispatch.function_name = \
2453 (function_name ## _t)GetProcAddress(hLib, #function_name); \
2454
b902710f
SM
2455 #define WHP_LOAD_FIELD(return_type, function_name, signature) \
2456 whp_dispatch.function_name = \
2457 (function_name ## _t)GetProcAddress(hLib, #function_name); \
2458 if (!whp_dispatch.function_name) { \
2459 error_report("Could not load function %s", #function_name); \
2460 goto error; \
2461 } \
2462
2463 #define WHP_LOAD_LIB(lib_name, handle_lib) \
2464 if (!handle_lib) { \
2465 handle_lib = LoadLibrary(lib_name); \
2466 if (!handle_lib) { \
2467 error_report("Could not load library %s.", lib_name); \
2468 goto error; \
2469 } \
2470 } \
2471
2472 switch (function_list) {
2473 case WINHV_PLATFORM_FNS_DEFAULT:
2474 WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
2475 LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD)
2476 break;
2477
2478 case WINHV_EMULATION_FNS_DEFAULT:
2479 WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib)
2480 LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD)
2481 break;
6785e767
SM
2482
2483 case WINHV_PLATFORM_FNS_SUPPLEMENTAL:
2484 WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
2485 LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL)
2486 break;
b902710f
SM
2487 }
2488
2489 *handle = hLib;
2490 return true;
2491
2492error:
2493 if (hLib) {
2494 FreeLibrary(hLib);
2495 }
2496
2497 return false;
2498}
2499
faf20793
SM
2500static void whpx_set_kernel_irqchip(Object *obj, Visitor *v,
2501 const char *name, void *opaque,
2502 Error **errp)
2503{
2504 struct whpx_state *whpx = &whpx_global;
2505 OnOffSplit mode;
2506
2507 if (!visit_type_OnOffSplit(v, name, &mode, errp)) {
2508 return;
2509 }
2510
2511 switch (mode) {
2512 case ON_OFF_SPLIT_ON:
2513 whpx->kernel_irqchip_allowed = true;
2514 whpx->kernel_irqchip_required = true;
2515 break;
2516
2517 case ON_OFF_SPLIT_OFF:
2518 whpx->kernel_irqchip_allowed = false;
2519 whpx->kernel_irqchip_required = false;
2520 break;
2521
2522 case ON_OFF_SPLIT_SPLIT:
2523 error_setg(errp, "WHPX: split irqchip currently not supported");
2524 error_append_hint(errp,
2525 "Try without kernel-irqchip or with kernel-irqchip=on|off");
2526 break;
2527
2528 default:
2529 /*
2530 * The value was checked in visit_type_OnOffSplit() above. If
2531 * we get here, then something is wrong in QEMU.
2532 */
2533 abort();
2534 }
2535}
2536
812d49f2
JTV
2537/*
2538 * Partition support
2539 */
2540
2541static int whpx_accel_init(MachineState *ms)
2542{
2543 struct whpx_state *whpx;
2544 int ret;
2545 HRESULT hr;
2546 WHV_CAPABILITY whpx_cap;
3907e631 2547 UINT32 whpx_cap_size;
812d49f2 2548 WHV_PARTITION_PROPERTY prop;
5c8e1e83 2549 UINT32 cpuidExitList[] = {1, 0x80000001};
faf20793 2550 WHV_CAPABILITY_FEATURES features = {0};
812d49f2
JTV
2551
2552 whpx = &whpx_global;
2553
327fccb2
LP
2554 if (!init_whp_dispatch()) {
2555 ret = -ENOSYS;
2556 goto error;
2557 }
2558
812d49f2
JTV
2559 whpx->mem_quota = ms->ram_size;
2560
327fccb2
LP
2561 hr = whp_dispatch.WHvGetCapability(
2562 WHvCapabilityCodeHypervisorPresent, &whpx_cap,
2563 sizeof(whpx_cap), &whpx_cap_size);
812d49f2
JTV
2564 if (FAILED(hr) || !whpx_cap.HypervisorPresent) {
2565 error_report("WHPX: No accelerator found, hr=%08lx", hr);
2566 ret = -ENOSPC;
2567 goto error;
2568 }
2569
faf20793
SM
2570 hr = whp_dispatch.WHvGetCapability(
2571 WHvCapabilityCodeFeatures, &features, sizeof(features), NULL);
2572 if (FAILED(hr)) {
2573 error_report("WHPX: Failed to query capabilities, hr=%08lx", hr);
2574 ret = -EINVAL;
2575 goto error;
2576 }
2577
327fccb2 2578 hr = whp_dispatch.WHvCreatePartition(&whpx->partition);
812d49f2
JTV
2579 if (FAILED(hr)) {
2580 error_report("WHPX: Failed to create partition, hr=%08lx", hr);
2581 ret = -EINVAL;
2582 goto error;
2583 }
2584
b6b3da99
SM
2585 /*
2586 * Query the XSAVE capability of the partition. Any error here is not
2587 * considered fatal.
2588 */
2589 hr = whp_dispatch.WHvGetPartitionProperty(
2590 whpx->partition,
2591 WHvPartitionPropertyCodeProcessorXsaveFeatures,
2592 &whpx_xsave_cap,
2593 sizeof(whpx_xsave_cap),
2594 &whpx_cap_size);
2595
2596 /*
2597 * Windows version which don't support this property will return with the
2598 * specific error code.
2599 */
2600 if (FAILED(hr) && hr != WHV_E_UNKNOWN_PROPERTY) {
2601 error_report("WHPX: Failed to query XSAVE capability, hr=%08lx", hr);
2602 }
2603
2604 if (!whpx_has_xsave()) {
2605 printf("WHPX: Partition is not XSAVE capable\n");
2606 }
2607
812d49f2 2608 memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
f2b143a2 2609 prop.ProcessorCount = ms->smp.cpus;
327fccb2
LP
2610 hr = whp_dispatch.WHvSetPartitionProperty(
2611 whpx->partition,
2612 WHvPartitionPropertyCodeProcessorCount,
2613 &prop,
2614 sizeof(WHV_PARTITION_PROPERTY));
812d49f2
JTV
2615
2616 if (FAILED(hr)) {
2617 error_report("WHPX: Failed to set partition core count to %d,"
f2b143a2 2618 " hr=%08lx", ms->smp.cores, hr);
812d49f2
JTV
2619 ret = -EINVAL;
2620 goto error;
7becac84
JTV
2621 }
2622
faf20793
SM
2623 /*
2624 * Error out if WHP doesn't support apic emulation and user is requiring
2625 * it.
2626 */
2627 if (whpx->kernel_irqchip_required && (!features.LocalApicEmulation ||
2628 !whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2)) {
2629 error_report("WHPX: kernel irqchip requested, but unavailable. "
2630 "Try without kernel-irqchip or with kernel-irqchip=off");
2631 ret = -EINVAL;
2632 goto error;
2633 }
2634
2635 if (whpx->kernel_irqchip_allowed && features.LocalApicEmulation &&
2636 whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2) {
2637 WHV_X64_LOCAL_APIC_EMULATION_MODE mode =
2638 WHvX64LocalApicEmulationModeXApic;
2639 printf("WHPX: setting APIC emulation mode in the hypervisor\n");
2640 hr = whp_dispatch.WHvSetPartitionProperty(
2641 whpx->partition,
2642 WHvPartitionPropertyCodeLocalApicEmulationMode,
2643 &mode,
2644 sizeof(mode));
2645 if (FAILED(hr)) {
2646 error_report("WHPX: Failed to enable kernel irqchip hr=%08lx", hr);
2647 if (whpx->kernel_irqchip_required) {
2648 error_report("WHPX: kernel irqchip requested, but unavailable");
2649 ret = -EINVAL;
2650 goto error;
2651 }
2652 } else {
2653 whpx->apic_in_platform = true;
2654 }
2655 }
2656
2657 /* Register for MSR and CPUID exits */
7becac84 2658 memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
e7ca549f 2659 prop.ExtendedVmExits.X64MsrExit = 1;
7becac84 2660 prop.ExtendedVmExits.X64CpuidExit = 1;
d7482ffe 2661 prop.ExtendedVmExits.ExceptionExit = 1;
faf20793
SM
2662 if (whpx_apic_in_platform()) {
2663 prop.ExtendedVmExits.X64ApicInitSipiExitTrap = 1;
2664 }
7becac84 2665
faf20793
SM
2666 hr = whp_dispatch.WHvSetPartitionProperty(
2667 whpx->partition,
2668 WHvPartitionPropertyCodeExtendedVmExits,
2669 &prop,
2670 sizeof(WHV_PARTITION_PROPERTY));
7becac84 2671 if (FAILED(hr)) {
faf20793 2672 error_report("WHPX: Failed to enable MSR & CPUIDexit, hr=%08lx", hr);
7becac84
JTV
2673 ret = -EINVAL;
2674 goto error;
2675 }
2676
327fccb2
LP
2677 hr = whp_dispatch.WHvSetPartitionProperty(
2678 whpx->partition,
2679 WHvPartitionPropertyCodeCpuidExitList,
2680 cpuidExitList,
2681 RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
e1753a7e 2682
7becac84
JTV
2683 if (FAILED(hr)) {
2684 error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
2685 hr);
2686 ret = -EINVAL;
2687 goto error;
812d49f2
JTV
2688 }
2689
d7482ffe
IS
2690 /*
2691 * We do not want to intercept any exceptions from the guest,
2692 * until we actually start debugging with gdb.
2693 */
2694 whpx->exception_exit_bitmap = -1;
2695 hr = whpx_set_exception_exit_bitmap(0);
2696
2697 if (FAILED(hr)) {
2698 error_report("WHPX: Failed to set exception exit bitmap, hr=%08lx", hr);
2699 ret = -EINVAL;
2700 goto error;
2701 }
2702
327fccb2 2703 hr = whp_dispatch.WHvSetupPartition(whpx->partition);
812d49f2
JTV
2704 if (FAILED(hr)) {
2705 error_report("WHPX: Failed to setup partition, hr=%08lx", hr);
2706 ret = -EINVAL;
2707 goto error;
2708 }
2709
812d49f2
JTV
2710 whpx_memory_init();
2711
812d49f2
JTV
2712 printf("Windows Hypervisor Platform accelerator is operational\n");
2713 return 0;
2714
5c8e1e83 2715error:
812d49f2
JTV
2716
2717 if (NULL != whpx->partition) {
327fccb2 2718 whp_dispatch.WHvDeletePartition(whpx->partition);
812d49f2
JTV
2719 whpx->partition = NULL;
2720 }
2721
812d49f2
JTV
2722 return ret;
2723}
2724
2725int whpx_enabled(void)
2726{
2727 return whpx_allowed;
2728}
2729
84f4ef17
PB
2730bool whpx_apic_in_platform(void) {
2731 return whpx_global.apic_in_platform;
2732}
2733
812d49f2
JTV
2734static void whpx_accel_class_init(ObjectClass *oc, void *data)
2735{
2736 AccelClass *ac = ACCEL_CLASS(oc);
2737 ac->name = "WHPX";
2738 ac->init_machine = whpx_accel_init;
2739 ac->allowed = &whpx_allowed;
faf20793
SM
2740
2741 object_class_property_add(oc, "kernel-irqchip", "on|off|split",
2742 NULL, whpx_set_kernel_irqchip,
2743 NULL, NULL);
2744 object_class_property_set_description(oc, "kernel-irqchip",
2745 "Configure WHPX in-kernel irqchip");
2746}
2747
2748static void whpx_accel_instance_init(Object *obj)
2749{
2750 struct whpx_state *whpx = &whpx_global;
2751
2752 memset(whpx, 0, sizeof(struct whpx_state));
2753 /* Turn on kernel-irqchip, by default */
2754 whpx->kernel_irqchip_allowed = true;
812d49f2
JTV
2755}
2756
2757static const TypeInfo whpx_accel_type = {
2758 .name = ACCEL_CLASS_NAME("whpx"),
2759 .parent = TYPE_ACCEL,
faf20793 2760 .instance_init = whpx_accel_instance_init,
812d49f2
JTV
2761 .class_init = whpx_accel_class_init,
2762};
2763
2764static void whpx_type_init(void)
2765{
2766 type_register_static(&whpx_accel_type);
2767}
2768
327fccb2
LP
2769bool init_whp_dispatch(void)
2770{
327fccb2
LP
2771 if (whp_dispatch_initialized) {
2772 return true;
2773 }
2774
b902710f 2775 if (!load_whp_dispatch_fns(&hWinHvPlatform, WINHV_PLATFORM_FNS_DEFAULT)) {
327fccb2
LP
2776 goto error;
2777 }
327fccb2 2778
b902710f 2779 if (!load_whp_dispatch_fns(&hWinHvEmulation, WINHV_EMULATION_FNS_DEFAULT)) {
327fccb2
LP
2780 goto error;
2781 }
327fccb2 2782
6785e767
SM
2783 assert(load_whp_dispatch_fns(&hWinHvPlatform,
2784 WINHV_PLATFORM_FNS_SUPPLEMENTAL));
327fccb2 2785 whp_dispatch_initialized = true;
327fccb2 2786
b902710f
SM
2787 return true;
2788error:
327fccb2
LP
2789 if (hWinHvPlatform) {
2790 FreeLibrary(hWinHvPlatform);
2791 }
b902710f 2792
327fccb2
LP
2793 if (hWinHvEmulation) {
2794 FreeLibrary(hWinHvEmulation);
2795 }
b902710f 2796
327fccb2
LP
2797 return false;
2798}
2799
812d49f2 2800type_init(whpx_type_init);