]>
Commit | Line | Data |
---|---|---|
812d49f2 JTV |
1 | /* |
2 | * QEMU Windows Hypervisor Platform accelerator (WHPX) | |
3 | * | |
4 | * Copyright Microsoft Corp. 2017 | |
5 | * | |
6 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
7 | * See the COPYING file in the top-level directory. | |
8 | * | |
9 | */ | |
10 | ||
11 | #include "qemu/osdep.h" | |
12 | #include "cpu.h" | |
13 | #include "exec/address-spaces.h" | |
812d49f2 | 14 | #include "exec/ioport.h" |
d7482ffe | 15 | #include "exec/gdbstub.h" |
940e43aa | 16 | #include "qemu/accel.h" |
812d49f2 | 17 | #include "sysemu/whpx.h" |
812d49f2 | 18 | #include "sysemu/cpus.h" |
54d31236 | 19 | #include "sysemu/runstate.h" |
812d49f2 | 20 | #include "qemu/main-loop.h" |
754f2871 | 21 | #include "hw/boards.h" |
faf20793 SM |
22 | #include "hw/i386/ioapic.h" |
23 | #include "hw/i386/apic_internal.h" | |
812d49f2 | 24 | #include "qemu/error-report.h" |
812d49f2 | 25 | #include "qapi/error.h" |
faf20793 SM |
26 | #include "qapi/qapi-types-common.h" |
27 | #include "qapi/qapi-visit-common.h" | |
812d49f2 | 28 | #include "migration/blocker.h" |
faf20793 | 29 | #include <winerror.h> |
812d49f2 | 30 | |
9102c968 | 31 | #include "whpx-internal.h" |
b86f59c7 CF |
32 | #include "whpx-accel-ops.h" |
33 | ||
34 | #include <WinHvPlatform.h> | |
35 | #include <WinHvEmulation.h> | |
812d49f2 | 36 | |
5c8e1e83 SM |
37 | #define HYPERV_APIC_BUS_FREQUENCY (200000000ULL) |
38 | ||
812d49f2 JTV |
39 | static const WHV_REGISTER_NAME whpx_register_names[] = { |
40 | ||
41 | /* X64 General purpose registers */ | |
42 | WHvX64RegisterRax, | |
43 | WHvX64RegisterRcx, | |
44 | WHvX64RegisterRdx, | |
45 | WHvX64RegisterRbx, | |
46 | WHvX64RegisterRsp, | |
47 | WHvX64RegisterRbp, | |
48 | WHvX64RegisterRsi, | |
49 | WHvX64RegisterRdi, | |
50 | WHvX64RegisterR8, | |
51 | WHvX64RegisterR9, | |
52 | WHvX64RegisterR10, | |
53 | WHvX64RegisterR11, | |
54 | WHvX64RegisterR12, | |
55 | WHvX64RegisterR13, | |
56 | WHvX64RegisterR14, | |
57 | WHvX64RegisterR15, | |
58 | WHvX64RegisterRip, | |
59 | WHvX64RegisterRflags, | |
60 | ||
61 | /* X64 Segment registers */ | |
62 | WHvX64RegisterEs, | |
63 | WHvX64RegisterCs, | |
64 | WHvX64RegisterSs, | |
65 | WHvX64RegisterDs, | |
66 | WHvX64RegisterFs, | |
67 | WHvX64RegisterGs, | |
68 | WHvX64RegisterLdtr, | |
69 | WHvX64RegisterTr, | |
70 | ||
71 | /* X64 Table registers */ | |
72 | WHvX64RegisterIdtr, | |
73 | WHvX64RegisterGdtr, | |
74 | ||
75 | /* X64 Control Registers */ | |
76 | WHvX64RegisterCr0, | |
77 | WHvX64RegisterCr2, | |
78 | WHvX64RegisterCr3, | |
79 | WHvX64RegisterCr4, | |
80 | WHvX64RegisterCr8, | |
81 | ||
82 | /* X64 Debug Registers */ | |
83 | /* | |
84 | * WHvX64RegisterDr0, | |
85 | * WHvX64RegisterDr1, | |
86 | * WHvX64RegisterDr2, | |
87 | * WHvX64RegisterDr3, | |
88 | * WHvX64RegisterDr6, | |
89 | * WHvX64RegisterDr7, | |
90 | */ | |
91 | ||
92 | /* X64 Floating Point and Vector Registers */ | |
93 | WHvX64RegisterXmm0, | |
94 | WHvX64RegisterXmm1, | |
95 | WHvX64RegisterXmm2, | |
96 | WHvX64RegisterXmm3, | |
97 | WHvX64RegisterXmm4, | |
98 | WHvX64RegisterXmm5, | |
99 | WHvX64RegisterXmm6, | |
100 | WHvX64RegisterXmm7, | |
101 | WHvX64RegisterXmm8, | |
102 | WHvX64RegisterXmm9, | |
103 | WHvX64RegisterXmm10, | |
104 | WHvX64RegisterXmm11, | |
105 | WHvX64RegisterXmm12, | |
106 | WHvX64RegisterXmm13, | |
107 | WHvX64RegisterXmm14, | |
108 | WHvX64RegisterXmm15, | |
109 | WHvX64RegisterFpMmx0, | |
110 | WHvX64RegisterFpMmx1, | |
111 | WHvX64RegisterFpMmx2, | |
112 | WHvX64RegisterFpMmx3, | |
113 | WHvX64RegisterFpMmx4, | |
114 | WHvX64RegisterFpMmx5, | |
115 | WHvX64RegisterFpMmx6, | |
116 | WHvX64RegisterFpMmx7, | |
117 | WHvX64RegisterFpControlStatus, | |
118 | WHvX64RegisterXmmControlStatus, | |
119 | ||
120 | /* X64 MSRs */ | |
812d49f2 JTV |
121 | WHvX64RegisterEfer, |
122 | #ifdef TARGET_X86_64 | |
123 | WHvX64RegisterKernelGsBase, | |
124 | #endif | |
125 | WHvX64RegisterApicBase, | |
126 | /* WHvX64RegisterPat, */ | |
127 | WHvX64RegisterSysenterCs, | |
128 | WHvX64RegisterSysenterEip, | |
129 | WHvX64RegisterSysenterEsp, | |
130 | WHvX64RegisterStar, | |
131 | #ifdef TARGET_X86_64 | |
132 | WHvX64RegisterLstar, | |
133 | WHvX64RegisterCstar, | |
134 | WHvX64RegisterSfmask, | |
135 | #endif | |
136 | ||
137 | /* Interrupt / Event Registers */ | |
138 | /* | |
139 | * WHvRegisterPendingInterruption, | |
140 | * WHvRegisterInterruptState, | |
141 | * WHvRegisterPendingEvent0, | |
142 | * WHvRegisterPendingEvent1 | |
143 | * WHvX64RegisterDeliverabilityNotifications, | |
144 | */ | |
145 | }; | |
146 | ||
147 | struct whpx_register_set { | |
148 | WHV_REGISTER_VALUE values[RTL_NUMBER_OF(whpx_register_names)]; | |
149 | }; | |
150 | ||
d7482ffe IS |
151 | /* |
152 | * The current implementation of instruction stepping sets the TF flag | |
153 | * in RFLAGS, causing the CPU to raise an INT1 after each instruction. | |
154 | * This corresponds to the WHvX64ExceptionTypeDebugTrapOrFault exception. | |
155 | * | |
156 | * This approach has a few limitations: | |
157 | * 1. Stepping over a PUSHF/SAHF instruction will save the TF flag | |
158 | * along with the other flags, possibly restoring it later. It would | |
159 | * result in another INT1 when the flags are restored, triggering | |
160 | * a stop in gdb that could be cleared by doing another step. | |
161 | * | |
162 | * Stepping over a POPF/LAHF instruction will let it overwrite the | |
163 | * TF flags, ending the stepping mode. | |
164 | * | |
165 | * 2. Stepping over an instruction raising an exception (e.g. INT, DIV, | |
166 | * or anything that could result in a page fault) will save the flags | |
167 | * to the stack, clear the TF flag, and let the guest execute the | |
168 | * handler. Normally, the guest will restore the original flags, | |
169 | * that will continue single-stepping. | |
170 | * | |
171 | * 3. Debuggers running on the guest may wish to set TF to do instruction | |
172 | * stepping. INT1 events generated by it would be intercepted by us, | |
173 | * as long as the gdb is connected to QEMU. | |
174 | * | |
175 | * In practice this means that: | |
176 | * 1. Stepping through flags-modifying instructions may cause gdb to | |
177 | * continue or stop in unexpected places. This will be fully recoverable | |
178 | * and will not crash the target. | |
179 | * | |
180 | * 2. Stepping over an instruction that triggers an exception will step | |
181 | * over the exception handler, not into it. | |
182 | * | |
183 | * 3. Debugging the guest via gdb, while running debugger on the guest | |
184 | * at the same time may lead to unexpected effects. Removing all | |
185 | * breakpoints set via QEMU will prevent any further interference | |
186 | * with the guest-level debuggers. | |
187 | * | |
188 | * The limitations can be addressed as shown below: | |
189 | * 1. PUSHF/SAHF/POPF/LAHF/IRET instructions can be emulated instead of | |
190 | * stepping through them. The exact semantics of the instructions is | |
191 | * defined in the "Combined Volume Set of Intel 64 and IA-32 | |
192 | * Architectures Software Developer's Manuals", however it involves a | |
193 | * fair amount of corner cases due to compatibility with real mode, | |
194 | * virtual 8086 mode, and differences between 64-bit and 32-bit modes. | |
195 | * | |
196 | * 2. We could step into the guest's exception handlers using the following | |
197 | * sequence: | |
198 | * a. Temporarily enable catching of all exception types via | |
199 | * whpx_set_exception_exit_bitmap(). | |
200 | * b. Once an exception is intercepted, read the IDT/GDT and locate | |
201 | * the original handler. | |
202 | * c. Patch the original handler, injecting an INT3 at the beginning. | |
203 | * d. Update the exception exit bitmap to only catch the | |
204 | * WHvX64ExceptionTypeBreakpointTrap exception. | |
205 | * e. Let the affected CPU run in the exclusive mode. | |
206 | * f. Restore the original handler and the exception exit bitmap. | |
207 | * Note that handling all corner cases related to IDT/GDT is harder | |
208 | * than it may seem. See x86_cpu_get_phys_page_attrs_debug() for a | |
209 | * rough idea. | |
210 | * | |
211 | * 3. In order to properly support guest-level debugging in parallel with | |
212 | * the QEMU-level debugging, we would need to be able to pass some INT1 | |
213 | * events to the guest. This could be done via the following methods: | |
214 | * a. Using the WHvRegisterPendingEvent register. As of Windows 21H1, | |
215 | * it seems to only work for interrupts and not software | |
216 | * exceptions. | |
217 | * b. Locating and patching the original handler by parsing IDT/GDT. | |
218 | * This involves relatively complex logic outlined in the previous | |
219 | * paragraph. | |
220 | * c. Emulating the exception invocation (i.e. manually updating RIP, | |
221 | * RFLAGS, and pushing the old values to stack). This is even more | |
222 | * complicated than the previous option, since it involves checking | |
223 | * CPL, gate attributes, and doing various adjustments depending | |
224 | * on the current CPU mode, whether the CPL is changing, etc. | |
225 | */ | |
226 | typedef enum WhpxStepMode { | |
227 | WHPX_STEP_NONE = 0, | |
228 | /* Halt other VCPUs */ | |
229 | WHPX_STEP_EXCLUSIVE, | |
230 | } WhpxStepMode; | |
231 | ||
812d49f2 JTV |
232 | struct whpx_vcpu { |
233 | WHV_EMULATOR_HANDLE emulator; | |
234 | bool window_registered; | |
235 | bool interruptable; | |
faf20793 | 236 | bool ready_for_pic_interrupt; |
812d49f2 JTV |
237 | uint64_t tpr; |
238 | uint64_t apic_base; | |
4e286099 | 239 | bool interruption_pending; |
812d49f2 JTV |
240 | |
241 | /* Must be the last field as it may have a tail */ | |
242 | WHV_RUN_VP_EXIT_CONTEXT exit_ctx; | |
243 | }; | |
244 | ||
245 | static bool whpx_allowed; | |
327fccb2 LP |
246 | static bool whp_dispatch_initialized; |
247 | static HMODULE hWinHvPlatform, hWinHvEmulation; | |
faf20793 | 248 | static uint32_t max_vcpu_index; |
b6b3da99 SM |
249 | static WHV_PROCESSOR_XSAVE_FEATURES whpx_xsave_cap; |
250 | ||
812d49f2 | 251 | struct whpx_state whpx_global; |
327fccb2 | 252 | struct WHPDispatch whp_dispatch; |
812d49f2 | 253 | |
b6b3da99 SM |
254 | static bool whpx_has_xsave(void) |
255 | { | |
256 | return whpx_xsave_cap.XsaveSupport; | |
257 | } | |
812d49f2 JTV |
258 | |
259 | /* | |
260 | * VP support | |
261 | */ | |
262 | ||
263 | static struct whpx_vcpu *get_whpx_vcpu(CPUState *cpu) | |
264 | { | |
265 | return (struct whpx_vcpu *)cpu->hax_vcpu; | |
266 | } | |
267 | ||
268 | static WHV_X64_SEGMENT_REGISTER whpx_seg_q2h(const SegmentCache *qs, int v86, | |
269 | int r86) | |
270 | { | |
271 | WHV_X64_SEGMENT_REGISTER hs; | |
272 | unsigned flags = qs->flags; | |
273 | ||
274 | hs.Base = qs->base; | |
275 | hs.Limit = qs->limit; | |
276 | hs.Selector = qs->selector; | |
277 | ||
278 | if (v86) { | |
279 | hs.Attributes = 0; | |
280 | hs.SegmentType = 3; | |
281 | hs.Present = 1; | |
282 | hs.DescriptorPrivilegeLevel = 3; | |
283 | hs.NonSystemSegment = 1; | |
284 | ||
285 | } else { | |
286 | hs.Attributes = (flags >> DESC_TYPE_SHIFT); | |
287 | ||
288 | if (r86) { | |
289 | /* hs.Base &= 0xfffff; */ | |
290 | } | |
291 | } | |
292 | ||
293 | return hs; | |
294 | } | |
295 | ||
296 | static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs) | |
297 | { | |
298 | SegmentCache qs; | |
299 | ||
300 | qs.base = hs->Base; | |
301 | qs.limit = hs->Limit; | |
302 | qs.selector = hs->Selector; | |
303 | ||
304 | qs.flags = ((uint32_t)hs->Attributes) << DESC_TYPE_SHIFT; | |
305 | ||
306 | return qs; | |
307 | } | |
308 | ||
b6b3da99 SM |
309 | /* X64 Extended Control Registers */ |
310 | static void whpx_set_xcrs(CPUState *cpu) | |
311 | { | |
312 | CPUX86State *env = cpu->env_ptr; | |
313 | HRESULT hr; | |
314 | struct whpx_state *whpx = &whpx_global; | |
315 | WHV_REGISTER_VALUE xcr0; | |
316 | WHV_REGISTER_NAME xcr0_name = WHvX64RegisterXCr0; | |
317 | ||
318 | if (!whpx_has_xsave()) { | |
319 | return; | |
320 | } | |
321 | ||
322 | /* Only xcr0 is supported by the hypervisor currently */ | |
323 | xcr0.Reg64 = env->xcr0; | |
324 | hr = whp_dispatch.WHvSetVirtualProcessorRegisters( | |
325 | whpx->partition, cpu->cpu_index, &xcr0_name, 1, &xcr0); | |
326 | if (FAILED(hr)) { | |
327 | error_report("WHPX: Failed to set register xcr0, hr=%08lx", hr); | |
328 | } | |
329 | } | |
330 | ||
6785e767 SM |
331 | static int whpx_set_tsc(CPUState *cpu) |
332 | { | |
95e862d7 | 333 | CPUX86State *env = cpu->env_ptr; |
6785e767 SM |
334 | WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc; |
335 | WHV_REGISTER_VALUE tsc_val; | |
336 | HRESULT hr; | |
337 | struct whpx_state *whpx = &whpx_global; | |
338 | ||
339 | /* | |
340 | * Suspend the partition prior to setting the TSC to reduce the variance | |
341 | * in TSC across vCPUs. When the first vCPU runs post suspend, the | |
342 | * partition is automatically resumed. | |
343 | */ | |
344 | if (whp_dispatch.WHvSuspendPartitionTime) { | |
345 | ||
346 | /* | |
347 | * Unable to suspend partition while setting TSC is not a fatal | |
348 | * error. It just increases the likelihood of TSC variance between | |
349 | * vCPUs and some guest OS are able to handle that just fine. | |
350 | */ | |
351 | hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition); | |
352 | if (FAILED(hr)) { | |
353 | warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr); | |
354 | } | |
355 | } | |
356 | ||
357 | tsc_val.Reg64 = env->tsc; | |
358 | hr = whp_dispatch.WHvSetVirtualProcessorRegisters( | |
359 | whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val); | |
360 | if (FAILED(hr)) { | |
361 | error_report("WHPX: Failed to set TSC, hr=%08lx", hr); | |
362 | return -1; | |
363 | } | |
364 | ||
365 | return 0; | |
366 | } | |
367 | ||
5ad93fd3 IS |
368 | /* |
369 | * The CR8 register in the CPU is mapped to the TPR register of the APIC, | |
370 | * however, they use a slightly different encoding. Specifically: | |
371 | * | |
372 | * APIC.TPR[bits 7:4] = CR8[bits 3:0] | |
373 | * | |
374 | * This mechanism is described in section 10.8.6.1 of Volume 3 of Intel 64 | |
375 | * and IA-32 Architectures Software Developer's Manual. | |
f000bc74 IS |
376 | * |
377 | * The functions below translate the value of CR8 to TPR and vice versa. | |
5ad93fd3 IS |
378 | */ |
379 | ||
380 | static uint64_t whpx_apic_tpr_to_cr8(uint64_t tpr) | |
381 | { | |
382 | return tpr >> 4; | |
383 | } | |
384 | ||
f000bc74 IS |
385 | static uint64_t whpx_cr8_to_apic_tpr(uint64_t cr8) |
386 | { | |
387 | return cr8 << 4; | |
388 | } | |
389 | ||
6785e767 | 390 | static void whpx_set_registers(CPUState *cpu, int level) |
812d49f2 JTV |
391 | { |
392 | struct whpx_state *whpx = &whpx_global; | |
393 | struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); | |
95e862d7 | 394 | CPUX86State *env = cpu->env_ptr; |
812d49f2 | 395 | X86CPU *x86_cpu = X86_CPU(cpu); |
c3942bf2 | 396 | struct whpx_register_set vcxt; |
812d49f2 | 397 | HRESULT hr; |
c3942bf2 LP |
398 | int idx; |
399 | int idx_next; | |
812d49f2 JTV |
400 | int i; |
401 | int v86, r86; | |
402 | ||
403 | assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); | |
404 | ||
6785e767 SM |
405 | /* |
406 | * Following MSRs have side effects on the guest or are too heavy for | |
407 | * runtime. Limit them to full state update. | |
408 | */ | |
409 | if (level >= WHPX_SET_RESET_STATE) { | |
410 | whpx_set_tsc(cpu); | |
411 | } | |
412 | ||
c3942bf2 LP |
413 | memset(&vcxt, 0, sizeof(struct whpx_register_set)); |
414 | ||
812d49f2 JTV |
415 | v86 = (env->eflags & VM_MASK); |
416 | r86 = !(env->cr[0] & CR0_PE_MASK); | |
417 | ||
5ad93fd3 | 418 | vcpu->tpr = whpx_apic_tpr_to_cr8(cpu_get_apic_tpr(x86_cpu->apic_state)); |
812d49f2 JTV |
419 | vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state); |
420 | ||
c3942bf2 LP |
421 | idx = 0; |
422 | ||
812d49f2 | 423 | /* Indexes for first 16 registers match between HV and QEMU definitions */ |
c3942bf2 LP |
424 | idx_next = 16; |
425 | for (idx = 0; idx < CPU_NB_REGS; idx += 1) { | |
426 | vcxt.values[idx].Reg64 = (uint64_t)env->regs[idx]; | |
812d49f2 | 427 | } |
c3942bf2 | 428 | idx = idx_next; |
812d49f2 JTV |
429 | |
430 | /* Same goes for RIP and RFLAGS */ | |
431 | assert(whpx_register_names[idx] == WHvX64RegisterRip); | |
432 | vcxt.values[idx++].Reg64 = env->eip; | |
433 | ||
434 | assert(whpx_register_names[idx] == WHvX64RegisterRflags); | |
435 | vcxt.values[idx++].Reg64 = env->eflags; | |
436 | ||
437 | /* Translate 6+4 segment registers. HV and QEMU order matches */ | |
438 | assert(idx == WHvX64RegisterEs); | |
439 | for (i = 0; i < 6; i += 1, idx += 1) { | |
440 | vcxt.values[idx].Segment = whpx_seg_q2h(&env->segs[i], v86, r86); | |
441 | } | |
442 | ||
443 | assert(idx == WHvX64RegisterLdtr); | |
444 | vcxt.values[idx++].Segment = whpx_seg_q2h(&env->ldt, 0, 0); | |
445 | ||
446 | assert(idx == WHvX64RegisterTr); | |
447 | vcxt.values[idx++].Segment = whpx_seg_q2h(&env->tr, 0, 0); | |
448 | ||
449 | assert(idx == WHvX64RegisterIdtr); | |
450 | vcxt.values[idx].Table.Base = env->idt.base; | |
451 | vcxt.values[idx].Table.Limit = env->idt.limit; | |
452 | idx += 1; | |
453 | ||
454 | assert(idx == WHvX64RegisterGdtr); | |
455 | vcxt.values[idx].Table.Base = env->gdt.base; | |
456 | vcxt.values[idx].Table.Limit = env->gdt.limit; | |
457 | idx += 1; | |
458 | ||
459 | /* CR0, 2, 3, 4, 8 */ | |
460 | assert(whpx_register_names[idx] == WHvX64RegisterCr0); | |
461 | vcxt.values[idx++].Reg64 = env->cr[0]; | |
462 | assert(whpx_register_names[idx] == WHvX64RegisterCr2); | |
463 | vcxt.values[idx++].Reg64 = env->cr[2]; | |
464 | assert(whpx_register_names[idx] == WHvX64RegisterCr3); | |
465 | vcxt.values[idx++].Reg64 = env->cr[3]; | |
466 | assert(whpx_register_names[idx] == WHvX64RegisterCr4); | |
467 | vcxt.values[idx++].Reg64 = env->cr[4]; | |
468 | assert(whpx_register_names[idx] == WHvX64RegisterCr8); | |
469 | vcxt.values[idx++].Reg64 = vcpu->tpr; | |
470 | ||
471 | /* 8 Debug Registers - Skipped */ | |
472 | ||
b6b3da99 SM |
473 | /* |
474 | * Extended control registers needs to be handled separately depending | |
475 | * on whether xsave is supported/enabled or not. | |
476 | */ | |
477 | whpx_set_xcrs(cpu); | |
478 | ||
812d49f2 JTV |
479 | /* 16 XMM registers */ |
480 | assert(whpx_register_names[idx] == WHvX64RegisterXmm0); | |
c3942bf2 LP |
481 | idx_next = idx + 16; |
482 | for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) { | |
812d49f2 JTV |
483 | vcxt.values[idx].Reg128.Low64 = env->xmm_regs[i].ZMM_Q(0); |
484 | vcxt.values[idx].Reg128.High64 = env->xmm_regs[i].ZMM_Q(1); | |
485 | } | |
c3942bf2 | 486 | idx = idx_next; |
812d49f2 JTV |
487 | |
488 | /* 8 FP registers */ | |
489 | assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0); | |
490 | for (i = 0; i < 8; i += 1, idx += 1) { | |
491 | vcxt.values[idx].Fp.AsUINT128.Low64 = env->fpregs[i].mmx.MMX_Q(0); | |
492 | /* vcxt.values[idx].Fp.AsUINT128.High64 = | |
493 | env->fpregs[i].mmx.MMX_Q(1); | |
494 | */ | |
495 | } | |
496 | ||
497 | /* FP control status register */ | |
498 | assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus); | |
499 | vcxt.values[idx].FpControlStatus.FpControl = env->fpuc; | |
500 | vcxt.values[idx].FpControlStatus.FpStatus = | |
501 | (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; | |
502 | vcxt.values[idx].FpControlStatus.FpTag = 0; | |
503 | for (i = 0; i < 8; ++i) { | |
504 | vcxt.values[idx].FpControlStatus.FpTag |= (!env->fptags[i]) << i; | |
505 | } | |
506 | vcxt.values[idx].FpControlStatus.Reserved = 0; | |
507 | vcxt.values[idx].FpControlStatus.LastFpOp = env->fpop; | |
508 | vcxt.values[idx].FpControlStatus.LastFpRip = env->fpip; | |
509 | idx += 1; | |
510 | ||
511 | /* XMM control status register */ | |
512 | assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus); | |
513 | vcxt.values[idx].XmmControlStatus.LastFpRdp = 0; | |
514 | vcxt.values[idx].XmmControlStatus.XmmStatusControl = env->mxcsr; | |
515 | vcxt.values[idx].XmmControlStatus.XmmStatusControlMask = 0x0000ffff; | |
516 | idx += 1; | |
517 | ||
518 | /* MSRs */ | |
812d49f2 JTV |
519 | assert(whpx_register_names[idx] == WHvX64RegisterEfer); |
520 | vcxt.values[idx++].Reg64 = env->efer; | |
521 | #ifdef TARGET_X86_64 | |
522 | assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase); | |
523 | vcxt.values[idx++].Reg64 = env->kernelgsbase; | |
524 | #endif | |
525 | ||
526 | assert(whpx_register_names[idx] == WHvX64RegisterApicBase); | |
527 | vcxt.values[idx++].Reg64 = vcpu->apic_base; | |
528 | ||
529 | /* WHvX64RegisterPat - Skipped */ | |
530 | ||
531 | assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs); | |
532 | vcxt.values[idx++].Reg64 = env->sysenter_cs; | |
533 | assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip); | |
534 | vcxt.values[idx++].Reg64 = env->sysenter_eip; | |
535 | assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp); | |
536 | vcxt.values[idx++].Reg64 = env->sysenter_esp; | |
537 | assert(whpx_register_names[idx] == WHvX64RegisterStar); | |
538 | vcxt.values[idx++].Reg64 = env->star; | |
539 | #ifdef TARGET_X86_64 | |
540 | assert(whpx_register_names[idx] == WHvX64RegisterLstar); | |
541 | vcxt.values[idx++].Reg64 = env->lstar; | |
542 | assert(whpx_register_names[idx] == WHvX64RegisterCstar); | |
543 | vcxt.values[idx++].Reg64 = env->cstar; | |
544 | assert(whpx_register_names[idx] == WHvX64RegisterSfmask); | |
545 | vcxt.values[idx++].Reg64 = env->fmask; | |
546 | #endif | |
547 | ||
548 | /* Interrupt / Event Registers - Skipped */ | |
549 | ||
550 | assert(idx == RTL_NUMBER_OF(whpx_register_names)); | |
551 | ||
327fccb2 LP |
552 | hr = whp_dispatch.WHvSetVirtualProcessorRegisters( |
553 | whpx->partition, cpu->cpu_index, | |
554 | whpx_register_names, | |
555 | RTL_NUMBER_OF(whpx_register_names), | |
556 | &vcxt.values[0]); | |
812d49f2 JTV |
557 | |
558 | if (FAILED(hr)) { | |
559 | error_report("WHPX: Failed to set virtual processor context, hr=%08lx", | |
560 | hr); | |
812d49f2 JTV |
561 | } |
562 | ||
563 | return; | |
564 | } | |
565 | ||
6785e767 SM |
566 | static int whpx_get_tsc(CPUState *cpu) |
567 | { | |
95e862d7 | 568 | CPUX86State *env = cpu->env_ptr; |
6785e767 SM |
569 | WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc; |
570 | WHV_REGISTER_VALUE tsc_val; | |
571 | HRESULT hr; | |
572 | struct whpx_state *whpx = &whpx_global; | |
573 | ||
574 | hr = whp_dispatch.WHvGetVirtualProcessorRegisters( | |
575 | whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val); | |
576 | if (FAILED(hr)) { | |
577 | error_report("WHPX: Failed to get TSC, hr=%08lx", hr); | |
578 | return -1; | |
579 | } | |
580 | ||
581 | env->tsc = tsc_val.Reg64; | |
582 | return 0; | |
583 | } | |
584 | ||
b6b3da99 SM |
585 | /* X64 Extended Control Registers */ |
586 | static void whpx_get_xcrs(CPUState *cpu) | |
587 | { | |
588 | CPUX86State *env = cpu->env_ptr; | |
589 | HRESULT hr; | |
590 | struct whpx_state *whpx = &whpx_global; | |
591 | WHV_REGISTER_VALUE xcr0; | |
592 | WHV_REGISTER_NAME xcr0_name = WHvX64RegisterXCr0; | |
593 | ||
594 | if (!whpx_has_xsave()) { | |
595 | return; | |
596 | } | |
597 | ||
598 | /* Only xcr0 is supported by the hypervisor currently */ | |
599 | hr = whp_dispatch.WHvGetVirtualProcessorRegisters( | |
600 | whpx->partition, cpu->cpu_index, &xcr0_name, 1, &xcr0); | |
601 | if (FAILED(hr)) { | |
602 | error_report("WHPX: Failed to get register xcr0, hr=%08lx", hr); | |
603 | return; | |
604 | } | |
605 | ||
606 | env->xcr0 = xcr0.Reg64; | |
607 | } | |
608 | ||
812d49f2 JTV |
609 | static void whpx_get_registers(CPUState *cpu) |
610 | { | |
611 | struct whpx_state *whpx = &whpx_global; | |
612 | struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); | |
95e862d7 | 613 | CPUX86State *env = cpu->env_ptr; |
812d49f2 JTV |
614 | X86CPU *x86_cpu = X86_CPU(cpu); |
615 | struct whpx_register_set vcxt; | |
616 | uint64_t tpr, apic_base; | |
617 | HRESULT hr; | |
c3942bf2 LP |
618 | int idx; |
619 | int idx_next; | |
812d49f2 JTV |
620 | int i; |
621 | ||
622 | assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); | |
623 | ||
6785e767 SM |
624 | if (!env->tsc_valid) { |
625 | whpx_get_tsc(cpu); | |
626 | env->tsc_valid = !runstate_is_running(); | |
627 | } | |
628 | ||
327fccb2 LP |
629 | hr = whp_dispatch.WHvGetVirtualProcessorRegisters( |
630 | whpx->partition, cpu->cpu_index, | |
631 | whpx_register_names, | |
632 | RTL_NUMBER_OF(whpx_register_names), | |
633 | &vcxt.values[0]); | |
812d49f2 JTV |
634 | if (FAILED(hr)) { |
635 | error_report("WHPX: Failed to get virtual processor context, hr=%08lx", | |
636 | hr); | |
812d49f2 JTV |
637 | } |
638 | ||
5ad93fd3 IS |
639 | if (whpx_apic_in_platform()) { |
640 | /* | |
641 | * Fetch the TPR value from the emulated APIC. It may get overwritten | |
642 | * below with the value from CR8 returned by | |
643 | * WHvGetVirtualProcessorRegisters(). | |
644 | */ | |
645 | whpx_apic_get(x86_cpu->apic_state); | |
646 | vcpu->tpr = whpx_apic_tpr_to_cr8( | |
647 | cpu_get_apic_tpr(x86_cpu->apic_state)); | |
648 | } | |
649 | ||
c3942bf2 LP |
650 | idx = 0; |
651 | ||
812d49f2 | 652 | /* Indexes for first 16 registers match between HV and QEMU definitions */ |
c3942bf2 LP |
653 | idx_next = 16; |
654 | for (idx = 0; idx < CPU_NB_REGS; idx += 1) { | |
812d49f2 JTV |
655 | env->regs[idx] = vcxt.values[idx].Reg64; |
656 | } | |
c3942bf2 | 657 | idx = idx_next; |
812d49f2 JTV |
658 | |
659 | /* Same goes for RIP and RFLAGS */ | |
660 | assert(whpx_register_names[idx] == WHvX64RegisterRip); | |
661 | env->eip = vcxt.values[idx++].Reg64; | |
662 | assert(whpx_register_names[idx] == WHvX64RegisterRflags); | |
663 | env->eflags = vcxt.values[idx++].Reg64; | |
664 | ||
665 | /* Translate 6+4 segment registers. HV and QEMU order matches */ | |
666 | assert(idx == WHvX64RegisterEs); | |
667 | for (i = 0; i < 6; i += 1, idx += 1) { | |
668 | env->segs[i] = whpx_seg_h2q(&vcxt.values[idx].Segment); | |
669 | } | |
670 | ||
671 | assert(idx == WHvX64RegisterLdtr); | |
672 | env->ldt = whpx_seg_h2q(&vcxt.values[idx++].Segment); | |
673 | assert(idx == WHvX64RegisterTr); | |
674 | env->tr = whpx_seg_h2q(&vcxt.values[idx++].Segment); | |
675 | assert(idx == WHvX64RegisterIdtr); | |
676 | env->idt.base = vcxt.values[idx].Table.Base; | |
677 | env->idt.limit = vcxt.values[idx].Table.Limit; | |
678 | idx += 1; | |
679 | assert(idx == WHvX64RegisterGdtr); | |
680 | env->gdt.base = vcxt.values[idx].Table.Base; | |
681 | env->gdt.limit = vcxt.values[idx].Table.Limit; | |
682 | idx += 1; | |
683 | ||
684 | /* CR0, 2, 3, 4, 8 */ | |
685 | assert(whpx_register_names[idx] == WHvX64RegisterCr0); | |
686 | env->cr[0] = vcxt.values[idx++].Reg64; | |
687 | assert(whpx_register_names[idx] == WHvX64RegisterCr2); | |
688 | env->cr[2] = vcxt.values[idx++].Reg64; | |
689 | assert(whpx_register_names[idx] == WHvX64RegisterCr3); | |
690 | env->cr[3] = vcxt.values[idx++].Reg64; | |
691 | assert(whpx_register_names[idx] == WHvX64RegisterCr4); | |
692 | env->cr[4] = vcxt.values[idx++].Reg64; | |
693 | assert(whpx_register_names[idx] == WHvX64RegisterCr8); | |
694 | tpr = vcxt.values[idx++].Reg64; | |
695 | if (tpr != vcpu->tpr) { | |
696 | vcpu->tpr = tpr; | |
f000bc74 | 697 | cpu_set_apic_tpr(x86_cpu->apic_state, whpx_cr8_to_apic_tpr(tpr)); |
812d49f2 JTV |
698 | } |
699 | ||
700 | /* 8 Debug Registers - Skipped */ | |
701 | ||
b6b3da99 SM |
702 | /* |
703 | * Extended control registers needs to be handled separately depending | |
704 | * on whether xsave is supported/enabled or not. | |
705 | */ | |
706 | whpx_get_xcrs(cpu); | |
707 | ||
812d49f2 JTV |
708 | /* 16 XMM registers */ |
709 | assert(whpx_register_names[idx] == WHvX64RegisterXmm0); | |
c3942bf2 LP |
710 | idx_next = idx + 16; |
711 | for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) { | |
812d49f2 JTV |
712 | env->xmm_regs[i].ZMM_Q(0) = vcxt.values[idx].Reg128.Low64; |
713 | env->xmm_regs[i].ZMM_Q(1) = vcxt.values[idx].Reg128.High64; | |
714 | } | |
c3942bf2 | 715 | idx = idx_next; |
812d49f2 JTV |
716 | |
717 | /* 8 FP registers */ | |
718 | assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0); | |
719 | for (i = 0; i < 8; i += 1, idx += 1) { | |
720 | env->fpregs[i].mmx.MMX_Q(0) = vcxt.values[idx].Fp.AsUINT128.Low64; | |
721 | /* env->fpregs[i].mmx.MMX_Q(1) = | |
722 | vcxt.values[idx].Fp.AsUINT128.High64; | |
723 | */ | |
724 | } | |
725 | ||
726 | /* FP control status register */ | |
727 | assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus); | |
728 | env->fpuc = vcxt.values[idx].FpControlStatus.FpControl; | |
729 | env->fpstt = (vcxt.values[idx].FpControlStatus.FpStatus >> 11) & 0x7; | |
730 | env->fpus = vcxt.values[idx].FpControlStatus.FpStatus & ~0x3800; | |
731 | for (i = 0; i < 8; ++i) { | |
732 | env->fptags[i] = !((vcxt.values[idx].FpControlStatus.FpTag >> i) & 1); | |
733 | } | |
734 | env->fpop = vcxt.values[idx].FpControlStatus.LastFpOp; | |
735 | env->fpip = vcxt.values[idx].FpControlStatus.LastFpRip; | |
736 | idx += 1; | |
737 | ||
738 | /* XMM control status register */ | |
739 | assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus); | |
740 | env->mxcsr = vcxt.values[idx].XmmControlStatus.XmmStatusControl; | |
741 | idx += 1; | |
742 | ||
743 | /* MSRs */ | |
812d49f2 JTV |
744 | assert(whpx_register_names[idx] == WHvX64RegisterEfer); |
745 | env->efer = vcxt.values[idx++].Reg64; | |
746 | #ifdef TARGET_X86_64 | |
747 | assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase); | |
748 | env->kernelgsbase = vcxt.values[idx++].Reg64; | |
749 | #endif | |
750 | ||
751 | assert(whpx_register_names[idx] == WHvX64RegisterApicBase); | |
752 | apic_base = vcxt.values[idx++].Reg64; | |
753 | if (apic_base != vcpu->apic_base) { | |
754 | vcpu->apic_base = apic_base; | |
755 | cpu_set_apic_base(x86_cpu->apic_state, vcpu->apic_base); | |
756 | } | |
757 | ||
758 | /* WHvX64RegisterPat - Skipped */ | |
759 | ||
760 | assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs); | |
7c98f0f8 | 761 | env->sysenter_cs = vcxt.values[idx++].Reg64; |
812d49f2 JTV |
762 | assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip); |
763 | env->sysenter_eip = vcxt.values[idx++].Reg64; | |
764 | assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp); | |
765 | env->sysenter_esp = vcxt.values[idx++].Reg64; | |
766 | assert(whpx_register_names[idx] == WHvX64RegisterStar); | |
767 | env->star = vcxt.values[idx++].Reg64; | |
768 | #ifdef TARGET_X86_64 | |
769 | assert(whpx_register_names[idx] == WHvX64RegisterLstar); | |
770 | env->lstar = vcxt.values[idx++].Reg64; | |
771 | assert(whpx_register_names[idx] == WHvX64RegisterCstar); | |
772 | env->cstar = vcxt.values[idx++].Reg64; | |
773 | assert(whpx_register_names[idx] == WHvX64RegisterSfmask); | |
774 | env->fmask = vcxt.values[idx++].Reg64; | |
775 | #endif | |
776 | ||
777 | /* Interrupt / Event Registers - Skipped */ | |
778 | ||
779 | assert(idx == RTL_NUMBER_OF(whpx_register_names)); | |
780 | ||
faf20793 SM |
781 | if (whpx_apic_in_platform()) { |
782 | whpx_apic_get(x86_cpu->apic_state); | |
783 | } | |
784 | ||
e5618908 IS |
785 | x86_update_hflags(env); |
786 | ||
812d49f2 JTV |
787 | return; |
788 | } | |
789 | ||
790 | static HRESULT CALLBACK whpx_emu_ioport_callback( | |
791 | void *ctx, | |
792 | WHV_EMULATOR_IO_ACCESS_INFO *IoAccess) | |
793 | { | |
794 | MemTxAttrs attrs = { 0 }; | |
795 | address_space_rw(&address_space_io, IoAccess->Port, attrs, | |
b7cbebf2 | 796 | &IoAccess->Data, IoAccess->AccessSize, |
812d49f2 JTV |
797 | IoAccess->Direction); |
798 | return S_OK; | |
799 | } | |
800 | ||
f875f04c | 801 | static HRESULT CALLBACK whpx_emu_mmio_callback( |
812d49f2 JTV |
802 | void *ctx, |
803 | WHV_EMULATOR_MEMORY_ACCESS_INFO *ma) | |
804 | { | |
805 | cpu_physical_memory_rw(ma->GpaAddress, ma->Data, ma->AccessSize, | |
806 | ma->Direction); | |
807 | return S_OK; | |
808 | } | |
809 | ||
810 | static HRESULT CALLBACK whpx_emu_getreg_callback( | |
811 | void *ctx, | |
812 | const WHV_REGISTER_NAME *RegisterNames, | |
813 | UINT32 RegisterCount, | |
814 | WHV_REGISTER_VALUE *RegisterValues) | |
815 | { | |
816 | HRESULT hr; | |
817 | struct whpx_state *whpx = &whpx_global; | |
818 | CPUState *cpu = (CPUState *)ctx; | |
819 | ||
327fccb2 LP |
820 | hr = whp_dispatch.WHvGetVirtualProcessorRegisters( |
821 | whpx->partition, cpu->cpu_index, | |
822 | RegisterNames, RegisterCount, | |
823 | RegisterValues); | |
812d49f2 JTV |
824 | if (FAILED(hr)) { |
825 | error_report("WHPX: Failed to get virtual processor registers," | |
826 | " hr=%08lx", hr); | |
812d49f2 JTV |
827 | } |
828 | ||
829 | return hr; | |
830 | } | |
831 | ||
832 | static HRESULT CALLBACK whpx_emu_setreg_callback( | |
833 | void *ctx, | |
834 | const WHV_REGISTER_NAME *RegisterNames, | |
835 | UINT32 RegisterCount, | |
836 | const WHV_REGISTER_VALUE *RegisterValues) | |
837 | { | |
838 | HRESULT hr; | |
839 | struct whpx_state *whpx = &whpx_global; | |
840 | CPUState *cpu = (CPUState *)ctx; | |
841 | ||
327fccb2 LP |
842 | hr = whp_dispatch.WHvSetVirtualProcessorRegisters( |
843 | whpx->partition, cpu->cpu_index, | |
844 | RegisterNames, RegisterCount, | |
845 | RegisterValues); | |
812d49f2 JTV |
846 | if (FAILED(hr)) { |
847 | error_report("WHPX: Failed to set virtual processor registers," | |
848 | " hr=%08lx", hr); | |
812d49f2 JTV |
849 | } |
850 | ||
851 | /* | |
852 | * The emulator just successfully wrote the register state. We clear the | |
853 | * dirty state so we avoid the double write on resume of the VP. | |
854 | */ | |
855 | cpu->vcpu_dirty = false; | |
856 | ||
857 | return hr; | |
858 | } | |
859 | ||
860 | static HRESULT CALLBACK whpx_emu_translate_callback( | |
861 | void *ctx, | |
862 | WHV_GUEST_VIRTUAL_ADDRESS Gva, | |
863 | WHV_TRANSLATE_GVA_FLAGS TranslateFlags, | |
864 | WHV_TRANSLATE_GVA_RESULT_CODE *TranslationResult, | |
865 | WHV_GUEST_PHYSICAL_ADDRESS *Gpa) | |
866 | { | |
867 | HRESULT hr; | |
868 | struct whpx_state *whpx = &whpx_global; | |
869 | CPUState *cpu = (CPUState *)ctx; | |
870 | WHV_TRANSLATE_GVA_RESULT res; | |
871 | ||
327fccb2 LP |
872 | hr = whp_dispatch.WHvTranslateGva(whpx->partition, cpu->cpu_index, |
873 | Gva, TranslateFlags, &res, Gpa); | |
812d49f2 JTV |
874 | if (FAILED(hr)) { |
875 | error_report("WHPX: Failed to translate GVA, hr=%08lx", hr); | |
812d49f2 JTV |
876 | } else { |
877 | *TranslationResult = res.ResultCode; | |
878 | } | |
879 | ||
880 | return hr; | |
881 | } | |
882 | ||
883 | static const WHV_EMULATOR_CALLBACKS whpx_emu_callbacks = { | |
914e2ab3 | 884 | .Size = sizeof(WHV_EMULATOR_CALLBACKS), |
812d49f2 | 885 | .WHvEmulatorIoPortCallback = whpx_emu_ioport_callback, |
f875f04c | 886 | .WHvEmulatorMemoryCallback = whpx_emu_mmio_callback, |
812d49f2 JTV |
887 | .WHvEmulatorGetVirtualProcessorRegisters = whpx_emu_getreg_callback, |
888 | .WHvEmulatorSetVirtualProcessorRegisters = whpx_emu_setreg_callback, | |
889 | .WHvEmulatorTranslateGvaPage = whpx_emu_translate_callback, | |
890 | }; | |
891 | ||
892 | static int whpx_handle_mmio(CPUState *cpu, WHV_MEMORY_ACCESS_CONTEXT *ctx) | |
893 | { | |
894 | HRESULT hr; | |
895 | struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); | |
896 | WHV_EMULATOR_STATUS emu_status; | |
897 | ||
327fccb2 LP |
898 | hr = whp_dispatch.WHvEmulatorTryMmioEmulation( |
899 | vcpu->emulator, cpu, | |
900 | &vcpu->exit_ctx.VpContext, ctx, | |
901 | &emu_status); | |
812d49f2 | 902 | if (FAILED(hr)) { |
812d49f2 JTV |
903 | error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr); |
904 | return -1; | |
905 | } | |
906 | ||
907 | if (!emu_status.EmulationSuccessful) { | |
327fccb2 LP |
908 | error_report("WHPX: Failed to emulate MMIO access with" |
909 | " EmulatorReturnStatus: %u", emu_status.AsUINT32); | |
812d49f2 JTV |
910 | return -1; |
911 | } | |
912 | ||
913 | return 0; | |
914 | } | |
915 | ||
916 | static int whpx_handle_portio(CPUState *cpu, | |
917 | WHV_X64_IO_PORT_ACCESS_CONTEXT *ctx) | |
918 | { | |
919 | HRESULT hr; | |
920 | struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); | |
921 | WHV_EMULATOR_STATUS emu_status; | |
922 | ||
327fccb2 LP |
923 | hr = whp_dispatch.WHvEmulatorTryIoEmulation( |
924 | vcpu->emulator, cpu, | |
925 | &vcpu->exit_ctx.VpContext, ctx, | |
926 | &emu_status); | |
812d49f2 | 927 | if (FAILED(hr)) { |
812d49f2 JTV |
928 | error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr); |
929 | return -1; | |
930 | } | |
931 | ||
932 | if (!emu_status.EmulationSuccessful) { | |
327fccb2 LP |
933 | error_report("WHPX: Failed to emulate PortIO access with" |
934 | " EmulatorReturnStatus: %u", emu_status.AsUINT32); | |
812d49f2 JTV |
935 | return -1; |
936 | } | |
937 | ||
938 | return 0; | |
939 | } | |
940 | ||
d7482ffe IS |
941 | /* |
942 | * Controls whether we should intercept various exceptions on the guest, | |
943 | * namely breakpoint/single-step events. | |
944 | * | |
945 | * The 'exceptions' argument accepts a bitmask, e.g: | |
946 | * (1 << WHvX64ExceptionTypeDebugTrapOrFault) | (...) | |
947 | */ | |
948 | static HRESULT whpx_set_exception_exit_bitmap(UINT64 exceptions) | |
949 | { | |
950 | struct whpx_state *whpx = &whpx_global; | |
951 | WHV_PARTITION_PROPERTY prop = { 0, }; | |
952 | HRESULT hr; | |
953 | ||
954 | if (exceptions == whpx->exception_exit_bitmap) { | |
955 | return S_OK; | |
956 | } | |
957 | ||
958 | prop.ExceptionExitBitmap = exceptions; | |
959 | ||
960 | hr = whp_dispatch.WHvSetPartitionProperty( | |
961 | whpx->partition, | |
962 | WHvPartitionPropertyCodeExceptionExitBitmap, | |
963 | &prop, | |
964 | sizeof(WHV_PARTITION_PROPERTY)); | |
965 | ||
966 | if (SUCCEEDED(hr)) { | |
967 | whpx->exception_exit_bitmap = exceptions; | |
968 | } | |
969 | ||
970 | return hr; | |
971 | } | |
972 | ||
973 | ||
974 | /* | |
975 | * This function is called before/after stepping over a single instruction. | |
976 | * It will update the CPU registers to arm/disarm the instruction stepping | |
977 | * accordingly. | |
978 | */ | |
979 | static HRESULT whpx_vcpu_configure_single_stepping(CPUState *cpu, | |
980 | bool set, | |
981 | uint64_t *exit_context_rflags) | |
982 | { | |
983 | WHV_REGISTER_NAME reg_name; | |
984 | WHV_REGISTER_VALUE reg_value; | |
985 | HRESULT hr; | |
986 | struct whpx_state *whpx = &whpx_global; | |
987 | ||
988 | /* | |
989 | * If we are trying to step over a single instruction, we need to set the | |
990 | * TF bit in rflags. Otherwise, clear it. | |
991 | */ | |
992 | reg_name = WHvX64RegisterRflags; | |
993 | hr = whp_dispatch.WHvGetVirtualProcessorRegisters( | |
994 | whpx->partition, | |
995 | cpu->cpu_index, | |
996 | ®_name, | |
997 | 1, | |
998 | ®_value); | |
999 | ||
1000 | if (FAILED(hr)) { | |
1001 | error_report("WHPX: Failed to get rflags, hr=%08lx", hr); | |
1002 | return hr; | |
1003 | } | |
1004 | ||
1005 | if (exit_context_rflags) { | |
1006 | assert(*exit_context_rflags == reg_value.Reg64); | |
1007 | } | |
1008 | ||
1009 | if (set) { | |
1010 | /* Raise WHvX64ExceptionTypeDebugTrapOrFault after each instruction */ | |
1011 | reg_value.Reg64 |= TF_MASK; | |
1012 | } else { | |
1013 | reg_value.Reg64 &= ~TF_MASK; | |
1014 | } | |
1015 | ||
1016 | if (exit_context_rflags) { | |
1017 | *exit_context_rflags = reg_value.Reg64; | |
1018 | } | |
1019 | ||
1020 | hr = whp_dispatch.WHvSetVirtualProcessorRegisters( | |
1021 | whpx->partition, | |
1022 | cpu->cpu_index, | |
1023 | ®_name, | |
1024 | 1, | |
1025 | ®_value); | |
1026 | ||
1027 | if (FAILED(hr)) { | |
1028 | error_report("WHPX: Failed to set rflags," | |
1029 | " hr=%08lx", | |
1030 | hr); | |
1031 | return hr; | |
1032 | } | |
1033 | ||
1034 | reg_name = WHvRegisterInterruptState; | |
1035 | reg_value.Reg64 = 0; | |
1036 | ||
1037 | /* Suspend delivery of hardware interrupts during single-stepping. */ | |
1038 | reg_value.InterruptState.InterruptShadow = set != 0; | |
1039 | ||
1040 | hr = whp_dispatch.WHvSetVirtualProcessorRegisters( | |
1041 | whpx->partition, | |
1042 | cpu->cpu_index, | |
1043 | ®_name, | |
1044 | 1, | |
1045 | ®_value); | |
1046 | ||
1047 | if (FAILED(hr)) { | |
1048 | error_report("WHPX: Failed to set InterruptState," | |
1049 | " hr=%08lx", | |
1050 | hr); | |
1051 | return hr; | |
1052 | } | |
1053 | ||
1054 | if (!set) { | |
1055 | /* | |
1056 | * We have just finished stepping over a single instruction, | |
1057 | * and intercepted the INT1 generated by it. | |
1058 | * We need to now hide the INT1 from the guest, | |
1059 | * as it would not be expecting it. | |
1060 | */ | |
1061 | ||
1062 | reg_name = WHvX64RegisterPendingDebugException; | |
1063 | hr = whp_dispatch.WHvGetVirtualProcessorRegisters( | |
1064 | whpx->partition, | |
1065 | cpu->cpu_index, | |
1066 | ®_name, | |
1067 | 1, | |
1068 | ®_value); | |
1069 | ||
1070 | if (FAILED(hr)) { | |
1071 | error_report("WHPX: Failed to get pending debug exceptions," | |
1072 | "hr=%08lx", hr); | |
1073 | return hr; | |
1074 | } | |
1075 | ||
1076 | if (reg_value.PendingDebugException.SingleStep) { | |
1077 | reg_value.PendingDebugException.SingleStep = 0; | |
1078 | ||
1079 | hr = whp_dispatch.WHvSetVirtualProcessorRegisters( | |
1080 | whpx->partition, | |
1081 | cpu->cpu_index, | |
1082 | ®_name, | |
1083 | 1, | |
1084 | ®_value); | |
1085 | ||
1086 | if (FAILED(hr)) { | |
1087 | error_report("WHPX: Failed to clear pending debug exceptions," | |
1088 | "hr=%08lx", hr); | |
1089 | return hr; | |
1090 | } | |
1091 | } | |
1092 | ||
1093 | } | |
1094 | ||
1095 | return S_OK; | |
1096 | } | |
1097 | ||
1098 | /* Tries to find a breakpoint at the specified address. */ | |
1099 | static struct whpx_breakpoint *whpx_lookup_breakpoint_by_addr(uint64_t address) | |
1100 | { | |
1101 | struct whpx_state *whpx = &whpx_global; | |
1102 | int i; | |
1103 | ||
1104 | if (whpx->breakpoints.breakpoints) { | |
1105 | for (i = 0; i < whpx->breakpoints.breakpoints->used; i++) { | |
1106 | if (address == whpx->breakpoints.breakpoints->data[i].address) { | |
1107 | return &whpx->breakpoints.breakpoints->data[i]; | |
1108 | } | |
1109 | } | |
1110 | } | |
1111 | ||
1112 | return NULL; | |
1113 | } | |
1114 | ||
1115 | /* | |
1116 | * Linux uses int3 (0xCC) during startup (see int3_selftest()) and for | |
1117 | * debugging user-mode applications. Since the WHPX API does not offer | |
1118 | * an easy way to pass the intercepted exception back to the guest, we | |
1119 | * resort to using INT1 instead, and let the guest always handle INT3. | |
1120 | */ | |
1121 | static const uint8_t whpx_breakpoint_instruction = 0xF1; | |
1122 | ||
1123 | /* | |
1124 | * The WHPX QEMU backend implements breakpoints by writing the INT1 | |
1125 | * instruction into memory (ignoring the DRx registers). This raises a few | |
1126 | * issues that need to be carefully handled: | |
1127 | * | |
1128 | * 1. Although unlikely, other parts of QEMU may set multiple breakpoints | |
1129 | * at the same location, and later remove them in arbitrary order. | |
1130 | * This should not cause memory corruption, and should only remove the | |
1131 | * physical breakpoint instruction when the last QEMU breakpoint is gone. | |
1132 | * | |
1133 | * 2. Writing arbitrary virtual memory may fail if it's not mapped to a valid | |
1134 | * physical location. Hence, physically adding/removing a breakpoint can | |
1135 | * theoretically fail at any time. We need to keep track of it. | |
1136 | * | |
1137 | * The function below rebuilds a list of low-level breakpoints (one per | |
1138 | * address, tracking the original instruction and any errors) from the list of | |
1139 | * high-level breakpoints (set via cpu_breakpoint_insert()). | |
1140 | * | |
1141 | * In order to optimize performance, this function stores the list of | |
1142 | * high-level breakpoints (a.k.a. CPU breakpoints) used to compute the | |
1143 | * low-level ones, so that it won't be re-invoked until these breakpoints | |
1144 | * change. | |
1145 | * | |
1146 | * Note that this function decides which breakpoints should be inserted into, | |
1147 | * memory, but doesn't actually do it. The memory accessing is done in | |
1148 | * whpx_apply_breakpoints(). | |
1149 | */ | |
1150 | static void whpx_translate_cpu_breakpoints( | |
1151 | struct whpx_breakpoints *breakpoints, | |
1152 | CPUState *cpu, | |
1153 | int cpu_breakpoint_count) | |
1154 | { | |
1155 | CPUBreakpoint *bp; | |
1156 | int cpu_bp_index = 0; | |
1157 | ||
1158 | breakpoints->original_addresses = | |
1159 | g_renew(vaddr, breakpoints->original_addresses, cpu_breakpoint_count); | |
1160 | ||
1161 | breakpoints->original_address_count = cpu_breakpoint_count; | |
1162 | ||
1163 | int max_breakpoints = cpu_breakpoint_count + | |
1164 | (breakpoints->breakpoints ? breakpoints->breakpoints->used : 0); | |
1165 | ||
1166 | struct whpx_breakpoint_collection *new_breakpoints = | |
1167 | (struct whpx_breakpoint_collection *)g_malloc0( | |
1168 | sizeof(struct whpx_breakpoint_collection) + | |
1169 | max_breakpoints * sizeof(struct whpx_breakpoint)); | |
1170 | ||
1171 | new_breakpoints->allocated = max_breakpoints; | |
1172 | new_breakpoints->used = 0; | |
1173 | ||
1174 | /* | |
1175 | * 1. Preserve all old breakpoints that could not be automatically | |
1176 | * cleared when the CPU got stopped. | |
1177 | */ | |
1178 | if (breakpoints->breakpoints) { | |
1179 | int i; | |
1180 | for (i = 0; i < breakpoints->breakpoints->used; i++) { | |
1181 | if (breakpoints->breakpoints->data[i].state != WHPX_BP_CLEARED) { | |
1182 | new_breakpoints->data[new_breakpoints->used++] = | |
1183 | breakpoints->breakpoints->data[i]; | |
1184 | } | |
1185 | } | |
1186 | } | |
1187 | ||
1188 | /* 2. Map all CPU breakpoints to WHPX breakpoints */ | |
1189 | QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) { | |
1190 | int i; | |
1191 | bool found = false; | |
1192 | ||
1193 | /* This will be used to detect changed CPU breakpoints later. */ | |
1194 | breakpoints->original_addresses[cpu_bp_index++] = bp->pc; | |
1195 | ||
1196 | for (i = 0; i < new_breakpoints->used; i++) { | |
1197 | /* | |
1198 | * WARNING: This loop has O(N^2) complexity, where N is the | |
1199 | * number of breakpoints. It should not be a bottleneck in | |
1200 | * real-world scenarios, since it only needs to run once after | |
1201 | * the breakpoints have been modified. | |
1202 | * If this ever becomes a concern, it can be optimized by storing | |
1203 | * high-level breakpoint objects in a tree or hash map. | |
1204 | */ | |
1205 | ||
1206 | if (new_breakpoints->data[i].address == bp->pc) { | |
1207 | /* There was already a breakpoint at this address. */ | |
1208 | if (new_breakpoints->data[i].state == WHPX_BP_CLEAR_PENDING) { | |
1209 | new_breakpoints->data[i].state = WHPX_BP_SET; | |
1210 | } else if (new_breakpoints->data[i].state == WHPX_BP_SET) { | |
1211 | new_breakpoints->data[i].state = WHPX_BP_SET_PENDING; | |
1212 | } | |
1213 | ||
1214 | found = true; | |
1215 | break; | |
1216 | } | |
1217 | } | |
1218 | ||
1219 | if (!found && new_breakpoints->used < new_breakpoints->allocated) { | |
1220 | /* No WHPX breakpoint at this address. Create one. */ | |
1221 | new_breakpoints->data[new_breakpoints->used].address = bp->pc; | |
1222 | new_breakpoints->data[new_breakpoints->used].state = | |
1223 | WHPX_BP_SET_PENDING; | |
1224 | new_breakpoints->used++; | |
1225 | } | |
1226 | } | |
1227 | ||
76eb88b1 MA |
1228 | /* |
1229 | * Free the previous breakpoint list. This can be optimized by keeping | |
1230 | * it as shadow buffer for the next computation instead of freeing | |
1231 | * it immediately. | |
1232 | */ | |
1233 | g_free(breakpoints->breakpoints); | |
d7482ffe IS |
1234 | |
1235 | breakpoints->breakpoints = new_breakpoints; | |
1236 | } | |
1237 | ||
1238 | /* | |
1239 | * Physically inserts/removes the breakpoints by reading and writing the | |
1240 | * physical memory, keeping a track of the failed attempts. | |
1241 | * | |
1242 | * Passing resuming=true will try to set all previously unset breakpoints. | |
1243 | * Passing resuming=false will remove all inserted ones. | |
1244 | */ | |
1245 | static void whpx_apply_breakpoints( | |
1246 | struct whpx_breakpoint_collection *breakpoints, | |
1247 | CPUState *cpu, | |
1248 | bool resuming) | |
1249 | { | |
1250 | int i, rc; | |
1251 | if (!breakpoints) { | |
1252 | return; | |
1253 | } | |
1254 | ||
1255 | for (i = 0; i < breakpoints->used; i++) { | |
1256 | /* Decide what to do right now based on the last known state. */ | |
1257 | WhpxBreakpointState state = breakpoints->data[i].state; | |
1258 | switch (state) { | |
1259 | case WHPX_BP_CLEARED: | |
1260 | if (resuming) { | |
1261 | state = WHPX_BP_SET_PENDING; | |
1262 | } | |
1263 | break; | |
1264 | case WHPX_BP_SET_PENDING: | |
1265 | if (!resuming) { | |
1266 | state = WHPX_BP_CLEARED; | |
1267 | } | |
1268 | break; | |
1269 | case WHPX_BP_SET: | |
1270 | if (!resuming) { | |
1271 | state = WHPX_BP_CLEAR_PENDING; | |
1272 | } | |
1273 | break; | |
1274 | case WHPX_BP_CLEAR_PENDING: | |
1275 | if (resuming) { | |
1276 | state = WHPX_BP_SET; | |
1277 | } | |
1278 | break; | |
1279 | } | |
1280 | ||
1281 | if (state == WHPX_BP_SET_PENDING) { | |
1282 | /* Remember the original instruction. */ | |
1283 | rc = cpu_memory_rw_debug(cpu, | |
1284 | breakpoints->data[i].address, | |
1285 | &breakpoints->data[i].original_instruction, | |
1286 | 1, | |
1287 | false); | |
1288 | ||
1289 | if (!rc) { | |
1290 | /* Write the breakpoint instruction. */ | |
1291 | rc = cpu_memory_rw_debug(cpu, | |
1292 | breakpoints->data[i].address, | |
1293 | (void *)&whpx_breakpoint_instruction, | |
1294 | 1, | |
1295 | true); | |
1296 | } | |
1297 | ||
1298 | if (!rc) { | |
1299 | state = WHPX_BP_SET; | |
1300 | } | |
1301 | ||
1302 | } | |
1303 | ||
1304 | if (state == WHPX_BP_CLEAR_PENDING) { | |
1305 | /* Restore the original instruction. */ | |
1306 | rc = cpu_memory_rw_debug(cpu, | |
1307 | breakpoints->data[i].address, | |
1308 | &breakpoints->data[i].original_instruction, | |
1309 | 1, | |
1310 | true); | |
1311 | ||
1312 | if (!rc) { | |
1313 | state = WHPX_BP_CLEARED; | |
1314 | } | |
1315 | } | |
1316 | ||
1317 | breakpoints->data[i].state = state; | |
1318 | } | |
1319 | } | |
1320 | ||
1321 | /* | |
1322 | * This function is called when the a VCPU is about to start and no other | |
1323 | * VCPUs have been started so far. Since the VCPU start order could be | |
1324 | * arbitrary, it doesn't have to be VCPU#0. | |
1325 | * | |
1326 | * It is used to commit the breakpoints into memory, and configure WHPX | |
1327 | * to intercept debug exceptions. | |
1328 | * | |
1329 | * Note that whpx_set_exception_exit_bitmap() cannot be called if one or | |
1330 | * more VCPUs are already running, so this is the best place to do it. | |
1331 | */ | |
1332 | static int whpx_first_vcpu_starting(CPUState *cpu) | |
1333 | { | |
1334 | struct whpx_state *whpx = &whpx_global; | |
1335 | HRESULT hr; | |
1336 | ||
1337 | g_assert(qemu_mutex_iothread_locked()); | |
1338 | ||
1339 | if (!QTAILQ_EMPTY(&cpu->breakpoints) || | |
1340 | (whpx->breakpoints.breakpoints && | |
1341 | whpx->breakpoints.breakpoints->used)) { | |
1342 | CPUBreakpoint *bp; | |
1343 | int i = 0; | |
1344 | bool update_pending = false; | |
1345 | ||
1346 | QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) { | |
1347 | if (i >= whpx->breakpoints.original_address_count || | |
1348 | bp->pc != whpx->breakpoints.original_addresses[i]) { | |
1349 | update_pending = true; | |
1350 | } | |
1351 | ||
1352 | i++; | |
1353 | } | |
1354 | ||
1355 | if (i != whpx->breakpoints.original_address_count) { | |
1356 | update_pending = true; | |
1357 | } | |
1358 | ||
1359 | if (update_pending) { | |
1360 | /* | |
1361 | * The CPU breakpoints have changed since the last call to | |
1362 | * whpx_translate_cpu_breakpoints(). WHPX breakpoints must | |
1363 | * now be recomputed. | |
1364 | */ | |
1365 | whpx_translate_cpu_breakpoints(&whpx->breakpoints, cpu, i); | |
1366 | } | |
1367 | ||
1368 | /* Actually insert the breakpoints into the memory. */ | |
1369 | whpx_apply_breakpoints(whpx->breakpoints.breakpoints, cpu, true); | |
1370 | } | |
1371 | ||
1372 | uint64_t exception_mask; | |
1373 | if (whpx->step_pending || | |
1374 | (whpx->breakpoints.breakpoints && | |
1375 | whpx->breakpoints.breakpoints->used)) { | |
1376 | /* | |
1377 | * We are either attempting to single-step one or more CPUs, or | |
1378 | * have one or more breakpoints enabled. Both require intercepting | |
1379 | * the WHvX64ExceptionTypeBreakpointTrap exception. | |
1380 | */ | |
1381 | ||
1382 | exception_mask = 1UL << WHvX64ExceptionTypeDebugTrapOrFault; | |
1383 | } else { | |
1384 | /* Let the guest handle all exceptions. */ | |
1385 | exception_mask = 0; | |
1386 | } | |
1387 | ||
1388 | hr = whpx_set_exception_exit_bitmap(exception_mask); | |
1389 | if (!SUCCEEDED(hr)) { | |
1390 | error_report("WHPX: Failed to update exception exit mask," | |
1391 | "hr=%08lx.", hr); | |
1392 | return 1; | |
1393 | } | |
1394 | ||
1395 | return 0; | |
1396 | } | |
1397 | ||
1398 | /* | |
1399 | * This function is called when the last VCPU has finished running. | |
1400 | * It is used to remove any previously set breakpoints from memory. | |
1401 | */ | |
1402 | static int whpx_last_vcpu_stopping(CPUState *cpu) | |
1403 | { | |
1404 | whpx_apply_breakpoints(whpx_global.breakpoints.breakpoints, cpu, false); | |
1405 | return 0; | |
1406 | } | |
1407 | ||
1408 | /* Returns the address of the next instruction that is about to be executed. */ | |
1409 | static vaddr whpx_vcpu_get_pc(CPUState *cpu, bool exit_context_valid) | |
1410 | { | |
1411 | if (cpu->vcpu_dirty) { | |
1412 | /* The CPU registers have been modified by other parts of QEMU. */ | |
1413 | CPUArchState *env = (CPUArchState *)(cpu->env_ptr); | |
1414 | return env->eip; | |
1415 | } else if (exit_context_valid) { | |
1416 | /* | |
1417 | * The CPU registers have not been modified by neither other parts | |
1418 | * of QEMU, nor this port by calling WHvSetVirtualProcessorRegisters(). | |
1419 | * This is the most common case. | |
1420 | */ | |
1421 | struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); | |
1422 | return vcpu->exit_ctx.VpContext.Rip; | |
1423 | } else { | |
1424 | /* | |
1425 | * The CPU registers have been modified by a call to | |
1426 | * WHvSetVirtualProcessorRegisters() and must be re-queried from | |
1427 | * the target. | |
1428 | */ | |
1429 | WHV_REGISTER_VALUE reg_value; | |
1430 | WHV_REGISTER_NAME reg_name = WHvX64RegisterRip; | |
1431 | HRESULT hr; | |
1432 | struct whpx_state *whpx = &whpx_global; | |
1433 | ||
1434 | hr = whp_dispatch.WHvGetVirtualProcessorRegisters( | |
1435 | whpx->partition, | |
1436 | cpu->cpu_index, | |
1437 | ®_name, | |
1438 | 1, | |
1439 | ®_value); | |
1440 | ||
1441 | if (FAILED(hr)) { | |
1442 | error_report("WHPX: Failed to get PC, hr=%08lx", hr); | |
1443 | return 0; | |
1444 | } | |
1445 | ||
1446 | return reg_value.Reg64; | |
1447 | } | |
1448 | } | |
1449 | ||
812d49f2 JTV |
1450 | static int whpx_handle_halt(CPUState *cpu) |
1451 | { | |
95e862d7 | 1452 | CPUX86State *env = cpu->env_ptr; |
812d49f2 JTV |
1453 | int ret = 0; |
1454 | ||
1455 | qemu_mutex_lock_iothread(); | |
1456 | if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && | |
1457 | (env->eflags & IF_MASK)) && | |
1458 | !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) { | |
1459 | cpu->exception_index = EXCP_HLT; | |
1460 | cpu->halted = true; | |
1461 | ret = 1; | |
1462 | } | |
1463 | qemu_mutex_unlock_iothread(); | |
1464 | ||
1465 | return ret; | |
1466 | } | |
1467 | ||
1468 | static void whpx_vcpu_pre_run(CPUState *cpu) | |
1469 | { | |
1470 | HRESULT hr; | |
1471 | struct whpx_state *whpx = &whpx_global; | |
1472 | struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); | |
95e862d7 | 1473 | CPUX86State *env = cpu->env_ptr; |
812d49f2 JTV |
1474 | X86CPU *x86_cpu = X86_CPU(cpu); |
1475 | int irq; | |
2bf3e74d | 1476 | uint8_t tpr; |
c3942bf2 | 1477 | WHV_X64_PENDING_INTERRUPTION_REGISTER new_int; |
812d49f2 | 1478 | UINT32 reg_count = 0; |
c3942bf2 | 1479 | WHV_REGISTER_VALUE reg_values[3]; |
812d49f2 JTV |
1480 | WHV_REGISTER_NAME reg_names[3]; |
1481 | ||
c3942bf2 LP |
1482 | memset(&new_int, 0, sizeof(new_int)); |
1483 | memset(reg_values, 0, sizeof(reg_values)); | |
1484 | ||
812d49f2 JTV |
1485 | qemu_mutex_lock_iothread(); |
1486 | ||
1487 | /* Inject NMI */ | |
4e286099 | 1488 | if (!vcpu->interruption_pending && |
812d49f2 JTV |
1489 | cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { |
1490 | if (cpu->interrupt_request & CPU_INTERRUPT_NMI) { | |
1491 | cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; | |
1492 | vcpu->interruptable = false; | |
1493 | new_int.InterruptionType = WHvX64PendingNmi; | |
1494 | new_int.InterruptionPending = 1; | |
1495 | new_int.InterruptionVector = 2; | |
1496 | } | |
1497 | if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { | |
812d49f2 | 1498 | cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; |
812d49f2 JTV |
1499 | } |
1500 | } | |
1501 | ||
1502 | /* | |
1503 | * Force the VCPU out of its inner loop to process any INIT requests or | |
1504 | * commit pending TPR access. | |
1505 | */ | |
1506 | if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { | |
1507 | if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && | |
1508 | !(env->hflags & HF_SMM_MASK)) { | |
1509 | cpu->exit_request = 1; | |
1510 | } | |
1511 | if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { | |
1512 | cpu->exit_request = 1; | |
1513 | } | |
1514 | } | |
1515 | ||
1516 | /* Get pending hard interruption or replay one that was overwritten */ | |
faf20793 SM |
1517 | if (!whpx_apic_in_platform()) { |
1518 | if (!vcpu->interruption_pending && | |
1519 | vcpu->interruptable && (env->eflags & IF_MASK)) { | |
1520 | assert(!new_int.InterruptionPending); | |
1521 | if (cpu->interrupt_request & CPU_INTERRUPT_HARD) { | |
1522 | cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; | |
1523 | irq = cpu_get_pic_interrupt(env); | |
1524 | if (irq >= 0) { | |
1525 | new_int.InterruptionType = WHvX64PendingInterrupt; | |
1526 | new_int.InterruptionPending = 1; | |
1527 | new_int.InterruptionVector = irq; | |
1528 | } | |
812d49f2 JTV |
1529 | } |
1530 | } | |
812d49f2 | 1531 | |
faf20793 SM |
1532 | /* Setup interrupt state if new one was prepared */ |
1533 | if (new_int.InterruptionPending) { | |
1534 | reg_values[reg_count].PendingInterruption = new_int; | |
1535 | reg_names[reg_count] = WHvRegisterPendingInterruption; | |
1536 | reg_count += 1; | |
1537 | } | |
1538 | } else if (vcpu->ready_for_pic_interrupt && | |
1539 | (cpu->interrupt_request & CPU_INTERRUPT_HARD)) { | |
1540 | cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; | |
1541 | irq = cpu_get_pic_interrupt(env); | |
1542 | if (irq >= 0) { | |
1543 | reg_names[reg_count] = WHvRegisterPendingEvent; | |
1544 | reg_values[reg_count].ExtIntEvent = (WHV_X64_PENDING_EXT_INT_EVENT) | |
1545 | { | |
1546 | .EventPending = 1, | |
1547 | .EventType = WHvX64PendingEventExtInt, | |
1548 | .Vector = irq, | |
1549 | }; | |
1550 | reg_count += 1; | |
1551 | } | |
1552 | } | |
812d49f2 JTV |
1553 | |
1554 | /* Sync the TPR to the CR8 if was modified during the intercept */ | |
f000bc74 | 1555 | tpr = whpx_apic_tpr_to_cr8(cpu_get_apic_tpr(x86_cpu->apic_state)); |
2bf3e74d JTV |
1556 | if (tpr != vcpu->tpr) { |
1557 | vcpu->tpr = tpr; | |
1558 | reg_values[reg_count].Reg64 = tpr; | |
812d49f2 JTV |
1559 | cpu->exit_request = 1; |
1560 | reg_names[reg_count] = WHvX64RegisterCr8; | |
1561 | reg_count += 1; | |
1562 | } | |
1563 | ||
1564 | /* Update the state of the interrupt delivery notification */ | |
eb1fe944 JTV |
1565 | if (!vcpu->window_registered && |
1566 | cpu->interrupt_request & CPU_INTERRUPT_HARD) { | |
faf20793 SM |
1567 | reg_values[reg_count].DeliverabilityNotifications = |
1568 | (WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER) { | |
1569 | .InterruptNotification = 1 | |
1570 | }; | |
eb1fe944 | 1571 | vcpu->window_registered = 1; |
812d49f2 JTV |
1572 | reg_names[reg_count] = WHvX64RegisterDeliverabilityNotifications; |
1573 | reg_count += 1; | |
1574 | } | |
1575 | ||
1576 | qemu_mutex_unlock_iothread(); | |
faf20793 | 1577 | vcpu->ready_for_pic_interrupt = false; |
812d49f2 JTV |
1578 | |
1579 | if (reg_count) { | |
327fccb2 LP |
1580 | hr = whp_dispatch.WHvSetVirtualProcessorRegisters( |
1581 | whpx->partition, cpu->cpu_index, | |
1582 | reg_names, reg_count, reg_values); | |
812d49f2 JTV |
1583 | if (FAILED(hr)) { |
1584 | error_report("WHPX: Failed to set interrupt state registers," | |
1585 | " hr=%08lx", hr); | |
812d49f2 JTV |
1586 | } |
1587 | } | |
1588 | ||
1589 | return; | |
1590 | } | |
1591 | ||
1592 | static void whpx_vcpu_post_run(CPUState *cpu) | |
1593 | { | |
812d49f2 | 1594 | struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); |
95e862d7 | 1595 | CPUX86State *env = cpu->env_ptr; |
812d49f2 | 1596 | X86CPU *x86_cpu = X86_CPU(cpu); |
812d49f2 | 1597 | |
4e286099 | 1598 | env->eflags = vcpu->exit_ctx.VpContext.Rflags; |
812d49f2 | 1599 | |
4e286099 JTV |
1600 | uint64_t tpr = vcpu->exit_ctx.VpContext.Cr8; |
1601 | if (vcpu->tpr != tpr) { | |
1602 | vcpu->tpr = tpr; | |
812d49f2 | 1603 | qemu_mutex_lock_iothread(); |
f000bc74 | 1604 | cpu_set_apic_tpr(x86_cpu->apic_state, whpx_cr8_to_apic_tpr(vcpu->tpr)); |
812d49f2 JTV |
1605 | qemu_mutex_unlock_iothread(); |
1606 | } | |
1607 | ||
4e286099 JTV |
1608 | vcpu->interruption_pending = |
1609 | vcpu->exit_ctx.VpContext.ExecutionState.InterruptionPending; | |
812d49f2 | 1610 | |
4e286099 JTV |
1611 | vcpu->interruptable = |
1612 | !vcpu->exit_ctx.VpContext.ExecutionState.InterruptShadow; | |
812d49f2 JTV |
1613 | |
1614 | return; | |
1615 | } | |
1616 | ||
1617 | static void whpx_vcpu_process_async_events(CPUState *cpu) | |
1618 | { | |
95e862d7 | 1619 | CPUX86State *env = cpu->env_ptr; |
812d49f2 JTV |
1620 | X86CPU *x86_cpu = X86_CPU(cpu); |
1621 | struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); | |
1622 | ||
1623 | if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && | |
1624 | !(env->hflags & HF_SMM_MASK)) { | |
4df28c93 | 1625 | whpx_cpu_synchronize_state(cpu); |
812d49f2 | 1626 | do_cpu_init(x86_cpu); |
812d49f2 JTV |
1627 | vcpu->interruptable = true; |
1628 | } | |
1629 | ||
1630 | if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { | |
1631 | cpu->interrupt_request &= ~CPU_INTERRUPT_POLL; | |
1632 | apic_poll_irq(x86_cpu->apic_state); | |
1633 | } | |
1634 | ||
1635 | if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) && | |
1636 | (env->eflags & IF_MASK)) || | |
1637 | (cpu->interrupt_request & CPU_INTERRUPT_NMI)) { | |
1638 | cpu->halted = false; | |
1639 | } | |
1640 | ||
1641 | if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) { | |
4df28c93 | 1642 | whpx_cpu_synchronize_state(cpu); |
812d49f2 JTV |
1643 | do_cpu_sipi(x86_cpu); |
1644 | } | |
1645 | ||
1646 | if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { | |
1647 | cpu->interrupt_request &= ~CPU_INTERRUPT_TPR; | |
4df28c93 | 1648 | whpx_cpu_synchronize_state(cpu); |
812d49f2 JTV |
1649 | apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip, |
1650 | env->tpr_access_type); | |
1651 | } | |
1652 | ||
1653 | return; | |
1654 | } | |
1655 | ||
1656 | static int whpx_vcpu_run(CPUState *cpu) | |
1657 | { | |
1658 | HRESULT hr; | |
1659 | struct whpx_state *whpx = &whpx_global; | |
1660 | struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); | |
d7482ffe IS |
1661 | struct whpx_breakpoint *stepped_over_bp = NULL; |
1662 | WhpxStepMode exclusive_step_mode = WHPX_STEP_NONE; | |
812d49f2 JTV |
1663 | int ret; |
1664 | ||
d7482ffe IS |
1665 | g_assert(qemu_mutex_iothread_locked()); |
1666 | ||
1667 | if (whpx->running_cpus++ == 0) { | |
1668 | /* Insert breakpoints into memory, update exception exit bitmap. */ | |
1669 | ret = whpx_first_vcpu_starting(cpu); | |
1670 | if (ret != 0) { | |
1671 | return ret; | |
1672 | } | |
1673 | } | |
1674 | ||
1675 | if (whpx->breakpoints.breakpoints && | |
1676 | whpx->breakpoints.breakpoints->used > 0) | |
1677 | { | |
1678 | uint64_t pc = whpx_vcpu_get_pc(cpu, true); | |
1679 | stepped_over_bp = whpx_lookup_breakpoint_by_addr(pc); | |
1680 | if (stepped_over_bp && stepped_over_bp->state != WHPX_BP_SET) { | |
1681 | stepped_over_bp = NULL; | |
1682 | } | |
1683 | ||
1684 | if (stepped_over_bp) { | |
1685 | /* | |
1686 | * We are trying to run the instruction overwritten by an active | |
1687 | * breakpoint. We will temporarily disable the breakpoint, suspend | |
1688 | * other CPUs, and step over the instruction. | |
1689 | */ | |
1690 | exclusive_step_mode = WHPX_STEP_EXCLUSIVE; | |
1691 | } | |
1692 | } | |
1693 | ||
1694 | if (exclusive_step_mode == WHPX_STEP_NONE) { | |
1695 | whpx_vcpu_process_async_events(cpu); | |
1696 | if (cpu->halted && !whpx_apic_in_platform()) { | |
1697 | cpu->exception_index = EXCP_HLT; | |
1698 | qatomic_set(&cpu->exit_request, false); | |
1699 | return 0; | |
1700 | } | |
812d49f2 JTV |
1701 | } |
1702 | ||
1703 | qemu_mutex_unlock_iothread(); | |
d7482ffe IS |
1704 | |
1705 | if (exclusive_step_mode != WHPX_STEP_NONE) { | |
1706 | start_exclusive(); | |
1707 | g_assert(cpu == current_cpu); | |
1708 | g_assert(!cpu->running); | |
1709 | cpu->running = true; | |
1710 | ||
1711 | hr = whpx_set_exception_exit_bitmap( | |
1712 | 1UL << WHvX64ExceptionTypeDebugTrapOrFault); | |
1713 | if (!SUCCEEDED(hr)) { | |
1714 | error_report("WHPX: Failed to update exception exit mask, " | |
1715 | "hr=%08lx.", hr); | |
1716 | return 1; | |
1717 | } | |
1718 | ||
1719 | if (stepped_over_bp) { | |
1720 | /* Temporarily disable the triggered breakpoint. */ | |
1721 | cpu_memory_rw_debug(cpu, | |
1722 | stepped_over_bp->address, | |
1723 | &stepped_over_bp->original_instruction, | |
1724 | 1, | |
1725 | true); | |
1726 | } | |
1727 | } else { | |
1728 | cpu_exec_start(cpu); | |
1729 | } | |
812d49f2 JTV |
1730 | |
1731 | do { | |
1732 | if (cpu->vcpu_dirty) { | |
6785e767 | 1733 | whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE); |
812d49f2 JTV |
1734 | cpu->vcpu_dirty = false; |
1735 | } | |
1736 | ||
d7482ffe IS |
1737 | if (exclusive_step_mode == WHPX_STEP_NONE) { |
1738 | whpx_vcpu_pre_run(cpu); | |
1739 | ||
1740 | if (qatomic_read(&cpu->exit_request)) { | |
1741 | whpx_vcpu_kick(cpu); | |
1742 | } | |
1743 | } | |
812d49f2 | 1744 | |
d7482ffe IS |
1745 | if (exclusive_step_mode != WHPX_STEP_NONE || cpu->singlestep_enabled) { |
1746 | whpx_vcpu_configure_single_stepping(cpu, true, NULL); | |
812d49f2 JTV |
1747 | } |
1748 | ||
327fccb2 LP |
1749 | hr = whp_dispatch.WHvRunVirtualProcessor( |
1750 | whpx->partition, cpu->cpu_index, | |
1751 | &vcpu->exit_ctx, sizeof(vcpu->exit_ctx)); | |
812d49f2 JTV |
1752 | |
1753 | if (FAILED(hr)) { | |
1754 | error_report("WHPX: Failed to exec a virtual processor," | |
1755 | " hr=%08lx", hr); | |
1756 | ret = -1; | |
1757 | break; | |
1758 | } | |
1759 | ||
d7482ffe IS |
1760 | if (exclusive_step_mode != WHPX_STEP_NONE || cpu->singlestep_enabled) { |
1761 | whpx_vcpu_configure_single_stepping(cpu, | |
1762 | false, | |
1763 | &vcpu->exit_ctx.VpContext.Rflags); | |
1764 | } | |
1765 | ||
812d49f2 JTV |
1766 | whpx_vcpu_post_run(cpu); |
1767 | ||
1768 | switch (vcpu->exit_ctx.ExitReason) { | |
1769 | case WHvRunVpExitReasonMemoryAccess: | |
1770 | ret = whpx_handle_mmio(cpu, &vcpu->exit_ctx.MemoryAccess); | |
1771 | break; | |
1772 | ||
1773 | case WHvRunVpExitReasonX64IoPortAccess: | |
1774 | ret = whpx_handle_portio(cpu, &vcpu->exit_ctx.IoPortAccess); | |
1775 | break; | |
1776 | ||
1777 | case WHvRunVpExitReasonX64InterruptWindow: | |
faf20793 | 1778 | vcpu->ready_for_pic_interrupt = 1; |
812d49f2 | 1779 | vcpu->window_registered = 0; |
e7ca549f | 1780 | ret = 0; |
812d49f2 JTV |
1781 | break; |
1782 | ||
faf20793 SM |
1783 | case WHvRunVpExitReasonX64ApicEoi: |
1784 | assert(whpx_apic_in_platform()); | |
1785 | ioapic_eoi_broadcast(vcpu->exit_ctx.ApicEoi.InterruptVector); | |
1786 | break; | |
1787 | ||
812d49f2 | 1788 | case WHvRunVpExitReasonX64Halt: |
d7482ffe IS |
1789 | /* |
1790 | * WARNING: as of build 19043.1526 (21H1), this exit reason is no | |
1791 | * longer used. | |
1792 | */ | |
812d49f2 JTV |
1793 | ret = whpx_handle_halt(cpu); |
1794 | break; | |
1795 | ||
faf20793 SM |
1796 | case WHvRunVpExitReasonX64ApicInitSipiTrap: { |
1797 | WHV_INTERRUPT_CONTROL ipi = {0}; | |
1798 | uint64_t icr = vcpu->exit_ctx.ApicInitSipi.ApicIcr; | |
1799 | uint32_t delivery_mode = | |
1800 | (icr & APIC_ICR_DELIV_MOD) >> APIC_ICR_DELIV_MOD_SHIFT; | |
1801 | int dest_shorthand = | |
1802 | (icr & APIC_ICR_DEST_SHORT) >> APIC_ICR_DEST_SHORT_SHIFT; | |
1803 | bool broadcast = false; | |
1804 | bool include_self = false; | |
1805 | uint32_t i; | |
1806 | ||
1807 | /* We only registered for INIT and SIPI exits. */ | |
1808 | if ((delivery_mode != APIC_DM_INIT) && | |
1809 | (delivery_mode != APIC_DM_SIPI)) { | |
1810 | error_report( | |
1811 | "WHPX: Unexpected APIC exit that is not a INIT or SIPI"); | |
1812 | break; | |
1813 | } | |
1814 | ||
1815 | if (delivery_mode == APIC_DM_INIT) { | |
1816 | ipi.Type = WHvX64InterruptTypeInit; | |
1817 | } else { | |
1818 | ipi.Type = WHvX64InterruptTypeSipi; | |
1819 | } | |
1820 | ||
1821 | ipi.DestinationMode = | |
1822 | ((icr & APIC_ICR_DEST_MOD) >> APIC_ICR_DEST_MOD_SHIFT) ? | |
1823 | WHvX64InterruptDestinationModeLogical : | |
1824 | WHvX64InterruptDestinationModePhysical; | |
1825 | ||
1826 | ipi.TriggerMode = | |
1827 | ((icr & APIC_ICR_TRIGGER_MOD) >> APIC_ICR_TRIGGER_MOD_SHIFT) ? | |
1828 | WHvX64InterruptTriggerModeLevel : | |
1829 | WHvX64InterruptTriggerModeEdge; | |
1830 | ||
1831 | ipi.Vector = icr & APIC_VECTOR_MASK; | |
1832 | switch (dest_shorthand) { | |
1833 | /* no shorthand. Bits 56-63 contain the destination. */ | |
1834 | case 0: | |
1835 | ipi.Destination = (icr >> 56) & APIC_VECTOR_MASK; | |
1836 | hr = whp_dispatch.WHvRequestInterrupt(whpx->partition, | |
1837 | &ipi, sizeof(ipi)); | |
1838 | if (FAILED(hr)) { | |
1839 | error_report("WHPX: Failed to request interrupt hr=%08lx", | |
1840 | hr); | |
1841 | } | |
1842 | ||
1843 | break; | |
1844 | ||
1845 | /* self */ | |
1846 | case 1: | |
1847 | include_self = true; | |
1848 | break; | |
1849 | ||
1850 | /* broadcast, including self */ | |
1851 | case 2: | |
1852 | broadcast = true; | |
1853 | include_self = true; | |
1854 | break; | |
1855 | ||
1856 | /* broadcast, excluding self */ | |
1857 | case 3: | |
1858 | broadcast = true; | |
1859 | break; | |
1860 | } | |
1861 | ||
1862 | if (!broadcast && !include_self) { | |
1863 | break; | |
1864 | } | |
1865 | ||
1866 | for (i = 0; i <= max_vcpu_index; i++) { | |
1867 | if (i == cpu->cpu_index && !include_self) { | |
1868 | continue; | |
1869 | } | |
1870 | ||
1871 | /* | |
1872 | * Assuming that APIC Ids are identity mapped since | |
1873 | * WHvX64RegisterApicId & WHvX64RegisterInitialApicId registers | |
1874 | * are not handled yet and the hypervisor doesn't allow the | |
1875 | * guest to modify the APIC ID. | |
1876 | */ | |
1877 | ipi.Destination = i; | |
1878 | hr = whp_dispatch.WHvRequestInterrupt(whpx->partition, | |
1879 | &ipi, sizeof(ipi)); | |
1880 | if (FAILED(hr)) { | |
1881 | error_report( | |
1882 | "WHPX: Failed to request SIPI for %d, hr=%08lx", | |
1883 | i, hr); | |
1884 | } | |
1885 | } | |
1886 | ||
1887 | break; | |
1888 | } | |
1889 | ||
812d49f2 | 1890 | case WHvRunVpExitReasonCanceled: |
d7482ffe IS |
1891 | if (exclusive_step_mode != WHPX_STEP_NONE) { |
1892 | /* | |
1893 | * We are trying to step over a single instruction, and | |
1894 | * likely got a request to stop from another thread. | |
1895 | * Delay it until we are done stepping | |
1896 | * over. | |
1897 | */ | |
1898 | ret = 0; | |
1899 | } else { | |
1900 | cpu->exception_index = EXCP_INTERRUPT; | |
1901 | ret = 1; | |
1902 | } | |
812d49f2 | 1903 | break; |
e7ca549f JTV |
1904 | case WHvRunVpExitReasonX64MsrAccess: { |
1905 | WHV_REGISTER_VALUE reg_values[3] = {0}; | |
1906 | WHV_REGISTER_NAME reg_names[3]; | |
1907 | UINT32 reg_count; | |
1908 | ||
1909 | reg_names[0] = WHvX64RegisterRip; | |
1910 | reg_names[1] = WHvX64RegisterRax; | |
1911 | reg_names[2] = WHvX64RegisterRdx; | |
1912 | ||
1913 | reg_values[0].Reg64 = | |
1914 | vcpu->exit_ctx.VpContext.Rip + | |
1915 | vcpu->exit_ctx.VpContext.InstructionLength; | |
1916 | ||
1917 | /* | |
1918 | * For all unsupported MSR access we: | |
1919 | * ignore writes | |
1920 | * return 0 on read. | |
1921 | */ | |
1922 | reg_count = vcpu->exit_ctx.MsrAccess.AccessInfo.IsWrite ? | |
1923 | 1 : 3; | |
1924 | ||
1925 | hr = whp_dispatch.WHvSetVirtualProcessorRegisters( | |
1926 | whpx->partition, | |
1927 | cpu->cpu_index, | |
1928 | reg_names, reg_count, | |
1929 | reg_values); | |
1930 | ||
1931 | if (FAILED(hr)) { | |
1932 | error_report("WHPX: Failed to set MsrAccess state " | |
1933 | " registers, hr=%08lx", hr); | |
1934 | } | |
1935 | ret = 0; | |
1936 | break; | |
1937 | } | |
7becac84 | 1938 | case WHvRunVpExitReasonX64Cpuid: { |
c3942bf2 | 1939 | WHV_REGISTER_VALUE reg_values[5]; |
7becac84 JTV |
1940 | WHV_REGISTER_NAME reg_names[5]; |
1941 | UINT32 reg_count = 5; | |
dadf3011 SM |
1942 | UINT64 cpuid_fn, rip = 0, rax = 0, rcx = 0, rdx = 0, rbx = 0; |
1943 | X86CPU *x86_cpu = X86_CPU(cpu); | |
1944 | CPUX86State *env = &x86_cpu->env; | |
7becac84 | 1945 | |
c3942bf2 LP |
1946 | memset(reg_values, 0, sizeof(reg_values)); |
1947 | ||
7becac84 JTV |
1948 | rip = vcpu->exit_ctx.VpContext.Rip + |
1949 | vcpu->exit_ctx.VpContext.InstructionLength; | |
dadf3011 SM |
1950 | cpuid_fn = vcpu->exit_ctx.CpuidAccess.Rax; |
1951 | ||
1952 | /* | |
1953 | * Ideally, these should be supplied to the hypervisor during VCPU | |
1954 | * initialization and it should be able to satisfy this request. | |
1955 | * But, currently, WHPX doesn't support setting CPUID values in the | |
1956 | * hypervisor once the partition has been setup, which is too late | |
1957 | * since VCPUs are realized later. For now, use the values from | |
1958 | * QEMU to satisfy these requests, until WHPX adds support for | |
1959 | * being able to set these values in the hypervisor at runtime. | |
1960 | */ | |
1961 | cpu_x86_cpuid(env, cpuid_fn, 0, (UINT32 *)&rax, (UINT32 *)&rbx, | |
1962 | (UINT32 *)&rcx, (UINT32 *)&rdx); | |
1963 | switch (cpuid_fn) { | |
5c8e1e83 SM |
1964 | case 0x40000000: |
1965 | /* Expose the vmware cpu frequency cpuid leaf */ | |
1966 | rax = 0x40000010; | |
1967 | rbx = rcx = rdx = 0; | |
1968 | break; | |
1969 | ||
1970 | case 0x40000010: | |
1971 | rax = env->tsc_khz; | |
1972 | rbx = env->apic_bus_freq / 1000; /* Hz to KHz */ | |
1973 | rcx = rdx = 0; | |
1974 | break; | |
1975 | ||
e1753a7e | 1976 | case 0x80000001: |
e1753a7e | 1977 | /* Remove any support of OSVW */ |
dadf3011 | 1978 | rcx &= ~CPUID_EXT3_OSVW; |
7becac84 | 1979 | break; |
7becac84 JTV |
1980 | } |
1981 | ||
1982 | reg_names[0] = WHvX64RegisterRip; | |
1983 | reg_names[1] = WHvX64RegisterRax; | |
1984 | reg_names[2] = WHvX64RegisterRcx; | |
1985 | reg_names[3] = WHvX64RegisterRdx; | |
1986 | reg_names[4] = WHvX64RegisterRbx; | |
1987 | ||
1988 | reg_values[0].Reg64 = rip; | |
1989 | reg_values[1].Reg64 = rax; | |
1990 | reg_values[2].Reg64 = rcx; | |
1991 | reg_values[3].Reg64 = rdx; | |
1992 | reg_values[4].Reg64 = rbx; | |
1993 | ||
327fccb2 LP |
1994 | hr = whp_dispatch.WHvSetVirtualProcessorRegisters( |
1995 | whpx->partition, cpu->cpu_index, | |
1996 | reg_names, | |
1997 | reg_count, | |
1998 | reg_values); | |
7becac84 JTV |
1999 | |
2000 | if (FAILED(hr)) { | |
2001 | error_report("WHPX: Failed to set CpuidAccess state registers," | |
2002 | " hr=%08lx", hr); | |
2003 | } | |
2004 | ret = 0; | |
2005 | break; | |
2006 | } | |
d7482ffe IS |
2007 | case WHvRunVpExitReasonException: |
2008 | whpx_get_registers(cpu); | |
2009 | ||
2010 | if ((vcpu->exit_ctx.VpException.ExceptionType == | |
2011 | WHvX64ExceptionTypeDebugTrapOrFault) && | |
2012 | (vcpu->exit_ctx.VpException.InstructionByteCount >= 1) && | |
2013 | (vcpu->exit_ctx.VpException.InstructionBytes[0] == | |
2014 | whpx_breakpoint_instruction)) { | |
2015 | /* Stopped at a software breakpoint. */ | |
2016 | cpu->exception_index = EXCP_DEBUG; | |
2017 | } else if ((vcpu->exit_ctx.VpException.ExceptionType == | |
2018 | WHvX64ExceptionTypeDebugTrapOrFault) && | |
2019 | !cpu->singlestep_enabled) { | |
2020 | /* | |
2021 | * Just finished stepping over a breakpoint, but the | |
2022 | * gdb does not expect us to do single-stepping. | |
2023 | * Don't do anything special. | |
2024 | */ | |
2025 | cpu->exception_index = EXCP_INTERRUPT; | |
2026 | } else { | |
2027 | /* Another exception or debug event. Report it to GDB. */ | |
2028 | cpu->exception_index = EXCP_DEBUG; | |
2029 | } | |
2030 | ||
2031 | ret = 1; | |
2032 | break; | |
812d49f2 JTV |
2033 | case WHvRunVpExitReasonNone: |
2034 | case WHvRunVpExitReasonUnrecoverableException: | |
2035 | case WHvRunVpExitReasonInvalidVpRegisterValue: | |
2036 | case WHvRunVpExitReasonUnsupportedFeature: | |
812d49f2 JTV |
2037 | default: |
2038 | error_report("WHPX: Unexpected VP exit code %d", | |
2039 | vcpu->exit_ctx.ExitReason); | |
2040 | whpx_get_registers(cpu); | |
2041 | qemu_mutex_lock_iothread(); | |
2042 | qemu_system_guest_panicked(cpu_get_crash_info(cpu)); | |
2043 | qemu_mutex_unlock_iothread(); | |
2044 | break; | |
2045 | } | |
2046 | ||
2047 | } while (!ret); | |
2048 | ||
d7482ffe IS |
2049 | if (stepped_over_bp) { |
2050 | /* Restore the breakpoint we stepped over */ | |
2051 | cpu_memory_rw_debug(cpu, | |
2052 | stepped_over_bp->address, | |
2053 | (void *)&whpx_breakpoint_instruction, | |
2054 | 1, | |
2055 | true); | |
2056 | } | |
2057 | ||
2058 | if (exclusive_step_mode != WHPX_STEP_NONE) { | |
2059 | g_assert(cpu_in_exclusive_context(cpu)); | |
2060 | cpu->running = false; | |
2061 | end_exclusive(); | |
2062 | ||
2063 | exclusive_step_mode = WHPX_STEP_NONE; | |
2064 | } else { | |
2065 | cpu_exec_end(cpu); | |
2066 | } | |
2067 | ||
812d49f2 JTV |
2068 | qemu_mutex_lock_iothread(); |
2069 | current_cpu = cpu; | |
2070 | ||
d7482ffe IS |
2071 | if (--whpx->running_cpus == 0) { |
2072 | whpx_last_vcpu_stopping(cpu); | |
2073 | } | |
2074 | ||
d73415a3 | 2075 | qatomic_set(&cpu->exit_request, false); |
812d49f2 JTV |
2076 | |
2077 | return ret < 0; | |
2078 | } | |
2079 | ||
2080 | static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) | |
2081 | { | |
4df28c93 SM |
2082 | if (!cpu->vcpu_dirty) { |
2083 | whpx_get_registers(cpu); | |
2084 | cpu->vcpu_dirty = true; | |
2085 | } | |
812d49f2 JTV |
2086 | } |
2087 | ||
2088 | static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu, | |
2089 | run_on_cpu_data arg) | |
2090 | { | |
6785e767 | 2091 | whpx_set_registers(cpu, WHPX_SET_RESET_STATE); |
812d49f2 JTV |
2092 | cpu->vcpu_dirty = false; |
2093 | } | |
2094 | ||
2095 | static void do_whpx_cpu_synchronize_post_init(CPUState *cpu, | |
2096 | run_on_cpu_data arg) | |
2097 | { | |
6785e767 | 2098 | whpx_set_registers(cpu, WHPX_SET_FULL_STATE); |
812d49f2 JTV |
2099 | cpu->vcpu_dirty = false; |
2100 | } | |
2101 | ||
2102 | static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu, | |
2103 | run_on_cpu_data arg) | |
2104 | { | |
2105 | cpu->vcpu_dirty = true; | |
2106 | } | |
2107 | ||
2108 | /* | |
2109 | * CPU support. | |
2110 | */ | |
2111 | ||
2112 | void whpx_cpu_synchronize_state(CPUState *cpu) | |
2113 | { | |
2114 | if (!cpu->vcpu_dirty) { | |
2115 | run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL); | |
2116 | } | |
2117 | } | |
2118 | ||
2119 | void whpx_cpu_synchronize_post_reset(CPUState *cpu) | |
2120 | { | |
2121 | run_on_cpu(cpu, do_whpx_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); | |
2122 | } | |
2123 | ||
2124 | void whpx_cpu_synchronize_post_init(CPUState *cpu) | |
2125 | { | |
2126 | run_on_cpu(cpu, do_whpx_cpu_synchronize_post_init, RUN_ON_CPU_NULL); | |
2127 | } | |
2128 | ||
2129 | void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu) | |
2130 | { | |
2131 | run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); | |
2132 | } | |
2133 | ||
d7482ffe IS |
2134 | void whpx_cpu_synchronize_pre_resume(bool step_pending) |
2135 | { | |
2136 | whpx_global.step_pending = step_pending; | |
2137 | } | |
2138 | ||
812d49f2 JTV |
2139 | /* |
2140 | * Vcpu support. | |
2141 | */ | |
2142 | ||
2143 | static Error *whpx_migration_blocker; | |
2144 | ||
538f0497 | 2145 | static void whpx_cpu_update_state(void *opaque, bool running, RunState state) |
6785e767 SM |
2146 | { |
2147 | CPUX86State *env = opaque; | |
2148 | ||
2149 | if (running) { | |
2150 | env->tsc_valid = false; | |
2151 | } | |
2152 | } | |
2153 | ||
812d49f2 JTV |
2154 | int whpx_init_vcpu(CPUState *cpu) |
2155 | { | |
2156 | HRESULT hr; | |
2157 | struct whpx_state *whpx = &whpx_global; | |
5c8e1e83 | 2158 | struct whpx_vcpu *vcpu = NULL; |
812d49f2 | 2159 | Error *local_error = NULL; |
95e862d7 | 2160 | CPUX86State *env = cpu->env_ptr; |
5c8e1e83 SM |
2161 | X86CPU *x86_cpu = X86_CPU(cpu); |
2162 | UINT64 freq = 0; | |
2163 | int ret; | |
812d49f2 JTV |
2164 | |
2165 | /* Add migration blockers for all unsupported features of the | |
2166 | * Windows Hypervisor Platform | |
2167 | */ | |
2168 | if (whpx_migration_blocker == NULL) { | |
2169 | error_setg(&whpx_migration_blocker, | |
2170 | "State blocked due to non-migratable CPUID feature support," | |
2171 | "dirty memory tracking support, and XSAVE/XRSTOR support"); | |
2172 | ||
436c831a | 2173 | if (migrate_add_blocker(whpx_migration_blocker, &local_error) < 0) { |
812d49f2 | 2174 | error_report_err(local_error); |
327fccb2 | 2175 | error_free(whpx_migration_blocker); |
5c8e1e83 SM |
2176 | ret = -EINVAL; |
2177 | goto error; | |
812d49f2 JTV |
2178 | } |
2179 | } | |
2180 | ||
b21e2380 | 2181 | vcpu = g_new0(struct whpx_vcpu, 1); |
812d49f2 JTV |
2182 | |
2183 | if (!vcpu) { | |
2184 | error_report("WHPX: Failed to allocte VCPU context."); | |
5c8e1e83 SM |
2185 | ret = -ENOMEM; |
2186 | goto error; | |
812d49f2 JTV |
2187 | } |
2188 | ||
327fccb2 LP |
2189 | hr = whp_dispatch.WHvEmulatorCreateEmulator( |
2190 | &whpx_emu_callbacks, | |
2191 | &vcpu->emulator); | |
812d49f2 JTV |
2192 | if (FAILED(hr)) { |
2193 | error_report("WHPX: Failed to setup instruction completion support," | |
2194 | " hr=%08lx", hr); | |
5c8e1e83 SM |
2195 | ret = -EINVAL; |
2196 | goto error; | |
812d49f2 JTV |
2197 | } |
2198 | ||
327fccb2 LP |
2199 | hr = whp_dispatch.WHvCreateVirtualProcessor( |
2200 | whpx->partition, cpu->cpu_index, 0); | |
812d49f2 JTV |
2201 | if (FAILED(hr)) { |
2202 | error_report("WHPX: Failed to create a virtual processor," | |
2203 | " hr=%08lx", hr); | |
327fccb2 | 2204 | whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator); |
5c8e1e83 SM |
2205 | ret = -EINVAL; |
2206 | goto error; | |
812d49f2 JTV |
2207 | } |
2208 | ||
5c8e1e83 SM |
2209 | /* |
2210 | * vcpu's TSC frequency is either specified by user, or use the value | |
2211 | * provided by Hyper-V if the former is not present. In the latter case, we | |
2212 | * query it from Hyper-V and record in env->tsc_khz, so that vcpu's TSC | |
2213 | * frequency can be migrated later via this field. | |
2214 | */ | |
2215 | if (!env->tsc_khz) { | |
2216 | hr = whp_dispatch.WHvGetCapability( | |
2217 | WHvCapabilityCodeProcessorClockFrequency, &freq, sizeof(freq), | |
2218 | NULL); | |
2219 | if (hr != WHV_E_UNKNOWN_CAPABILITY) { | |
2220 | if (FAILED(hr)) { | |
2221 | printf("WHPX: Failed to query tsc frequency, hr=0x%08lx\n", hr); | |
2222 | } else { | |
2223 | env->tsc_khz = freq / 1000; /* Hz to KHz */ | |
2224 | } | |
2225 | } | |
2226 | } | |
812d49f2 | 2227 | |
5c8e1e83 SM |
2228 | env->apic_bus_freq = HYPERV_APIC_BUS_FREQUENCY; |
2229 | hr = whp_dispatch.WHvGetCapability( | |
2230 | WHvCapabilityCodeInterruptClockFrequency, &freq, sizeof(freq), NULL); | |
2231 | if (hr != WHV_E_UNKNOWN_CAPABILITY) { | |
2232 | if (FAILED(hr)) { | |
2233 | printf("WHPX: Failed to query apic bus frequency hr=0x%08lx\n", hr); | |
2234 | } else { | |
2235 | env->apic_bus_freq = freq; | |
2236 | } | |
2237 | } | |
2238 | ||
2239 | /* | |
2240 | * If the vmware cpuid frequency leaf option is set, and we have a valid | |
2241 | * tsc value, trap the corresponding cpuid's. | |
2242 | */ | |
2243 | if (x86_cpu->vmware_cpuid_freq && env->tsc_khz) { | |
2244 | UINT32 cpuidExitList[] = {1, 0x80000001, 0x40000000, 0x40000010}; | |
2245 | ||
2246 | hr = whp_dispatch.WHvSetPartitionProperty( | |
2247 | whpx->partition, | |
2248 | WHvPartitionPropertyCodeCpuidExitList, | |
2249 | cpuidExitList, | |
2250 | RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32)); | |
2251 | ||
2252 | if (FAILED(hr)) { | |
2253 | error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx", | |
2254 | hr); | |
2255 | ret = -EINVAL; | |
2256 | goto error; | |
2257 | } | |
2258 | } | |
2259 | ||
2260 | vcpu->interruptable = true; | |
812d49f2 JTV |
2261 | cpu->vcpu_dirty = true; |
2262 | cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu; | |
faf20793 | 2263 | max_vcpu_index = max(max_vcpu_index, cpu->cpu_index); |
6785e767 | 2264 | qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr); |
812d49f2 JTV |
2265 | |
2266 | return 0; | |
5c8e1e83 SM |
2267 | |
2268 | error: | |
2269 | g_free(vcpu); | |
2270 | ||
2271 | return ret; | |
812d49f2 JTV |
2272 | } |
2273 | ||
2274 | int whpx_vcpu_exec(CPUState *cpu) | |
2275 | { | |
2276 | int ret; | |
2277 | int fatal; | |
2278 | ||
2279 | for (;;) { | |
2280 | if (cpu->exception_index >= EXCP_INTERRUPT) { | |
2281 | ret = cpu->exception_index; | |
2282 | cpu->exception_index = -1; | |
2283 | break; | |
2284 | } | |
2285 | ||
2286 | fatal = whpx_vcpu_run(cpu); | |
2287 | ||
2288 | if (fatal) { | |
2289 | error_report("WHPX: Failed to exec a virtual processor"); | |
2290 | abort(); | |
2291 | } | |
2292 | } | |
2293 | ||
2294 | return ret; | |
2295 | } | |
2296 | ||
2297 | void whpx_destroy_vcpu(CPUState *cpu) | |
2298 | { | |
2299 | struct whpx_state *whpx = &whpx_global; | |
2300 | struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); | |
2301 | ||
327fccb2 LP |
2302 | whp_dispatch.WHvDeleteVirtualProcessor(whpx->partition, cpu->cpu_index); |
2303 | whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator); | |
812d49f2 JTV |
2304 | g_free(cpu->hax_vcpu); |
2305 | return; | |
2306 | } | |
2307 | ||
2308 | void whpx_vcpu_kick(CPUState *cpu) | |
2309 | { | |
2310 | struct whpx_state *whpx = &whpx_global; | |
327fccb2 LP |
2311 | whp_dispatch.WHvCancelRunVirtualProcessor( |
2312 | whpx->partition, cpu->cpu_index, 0); | |
812d49f2 JTV |
2313 | } |
2314 | ||
2315 | /* | |
2316 | * Memory support. | |
2317 | */ | |
2318 | ||
2319 | static void whpx_update_mapping(hwaddr start_pa, ram_addr_t size, | |
2320 | void *host_va, int add, int rom, | |
2321 | const char *name) | |
2322 | { | |
2323 | struct whpx_state *whpx = &whpx_global; | |
2324 | HRESULT hr; | |
2325 | ||
2326 | /* | |
2327 | if (add) { | |
2328 | printf("WHPX: ADD PA:%p Size:%p, Host:%p, %s, '%s'\n", | |
2329 | (void*)start_pa, (void*)size, host_va, | |
2330 | (rom ? "ROM" : "RAM"), name); | |
2331 | } else { | |
2332 | printf("WHPX: DEL PA:%p Size:%p, Host:%p, '%s'\n", | |
2333 | (void*)start_pa, (void*)size, host_va, name); | |
2334 | } | |
2335 | */ | |
2336 | ||
2337 | if (add) { | |
327fccb2 LP |
2338 | hr = whp_dispatch.WHvMapGpaRange(whpx->partition, |
2339 | host_va, | |
2340 | start_pa, | |
2341 | size, | |
2342 | (WHvMapGpaRangeFlagRead | | |
2343 | WHvMapGpaRangeFlagExecute | | |
2344 | (rom ? 0 : WHvMapGpaRangeFlagWrite))); | |
812d49f2 | 2345 | } else { |
327fccb2 LP |
2346 | hr = whp_dispatch.WHvUnmapGpaRange(whpx->partition, |
2347 | start_pa, | |
2348 | size); | |
812d49f2 JTV |
2349 | } |
2350 | ||
2351 | if (FAILED(hr)) { | |
2352 | error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes," | |
2353 | " Host:%p, hr=%08lx", | |
2354 | (add ? "MAP" : "UNMAP"), name, | |
c3942bf2 | 2355 | (void *)(uintptr_t)start_pa, (void *)size, host_va, hr); |
812d49f2 JTV |
2356 | } |
2357 | } | |
2358 | ||
2359 | static void whpx_process_section(MemoryRegionSection *section, int add) | |
2360 | { | |
2361 | MemoryRegion *mr = section->mr; | |
2362 | hwaddr start_pa = section->offset_within_address_space; | |
2363 | ram_addr_t size = int128_get64(section->size); | |
2364 | unsigned int delta; | |
2365 | uint64_t host_va; | |
2366 | ||
2367 | if (!memory_region_is_ram(mr)) { | |
2368 | return; | |
2369 | } | |
2370 | ||
8e3b0cbb MAL |
2371 | delta = qemu_real_host_page_size() - (start_pa & ~qemu_real_host_page_mask()); |
2372 | delta &= ~qemu_real_host_page_mask(); | |
812d49f2 JTV |
2373 | if (delta > size) { |
2374 | return; | |
2375 | } | |
2376 | start_pa += delta; | |
2377 | size -= delta; | |
8e3b0cbb MAL |
2378 | size &= qemu_real_host_page_mask(); |
2379 | if (!size || (start_pa & ~qemu_real_host_page_mask())) { | |
812d49f2 JTV |
2380 | return; |
2381 | } | |
2382 | ||
2383 | host_va = (uintptr_t)memory_region_get_ram_ptr(mr) | |
2384 | + section->offset_within_region + delta; | |
2385 | ||
c3942bf2 LP |
2386 | whpx_update_mapping(start_pa, size, (void *)(uintptr_t)host_va, add, |
2387 | memory_region_is_rom(mr), mr->name); | |
812d49f2 JTV |
2388 | } |
2389 | ||
2390 | static void whpx_region_add(MemoryListener *listener, | |
2391 | MemoryRegionSection *section) | |
2392 | { | |
2393 | memory_region_ref(section->mr); | |
2394 | whpx_process_section(section, 1); | |
2395 | } | |
2396 | ||
2397 | static void whpx_region_del(MemoryListener *listener, | |
2398 | MemoryRegionSection *section) | |
2399 | { | |
2400 | whpx_process_section(section, 0); | |
2401 | memory_region_unref(section->mr); | |
2402 | } | |
2403 | ||
2404 | static void whpx_transaction_begin(MemoryListener *listener) | |
2405 | { | |
2406 | } | |
2407 | ||
2408 | static void whpx_transaction_commit(MemoryListener *listener) | |
2409 | { | |
2410 | } | |
2411 | ||
2412 | static void whpx_log_sync(MemoryListener *listener, | |
2413 | MemoryRegionSection *section) | |
2414 | { | |
2415 | MemoryRegion *mr = section->mr; | |
2416 | ||
2417 | if (!memory_region_is_ram(mr)) { | |
2418 | return; | |
2419 | } | |
2420 | ||
2421 | memory_region_set_dirty(mr, 0, int128_get64(section->size)); | |
2422 | } | |
2423 | ||
2424 | static MemoryListener whpx_memory_listener = { | |
142518bd | 2425 | .name = "whpx", |
812d49f2 JTV |
2426 | .begin = whpx_transaction_begin, |
2427 | .commit = whpx_transaction_commit, | |
2428 | .region_add = whpx_region_add, | |
2429 | .region_del = whpx_region_del, | |
2430 | .log_sync = whpx_log_sync, | |
2431 | .priority = 10, | |
2432 | }; | |
2433 | ||
2434 | static void whpx_memory_init(void) | |
2435 | { | |
2436 | memory_listener_register(&whpx_memory_listener, &address_space_memory); | |
2437 | } | |
2438 | ||
b902710f SM |
2439 | /* |
2440 | * Load the functions from the given library, using the given handle. If a | |
2441 | * handle is provided, it is used, otherwise the library is opened. The | |
2442 | * handle will be updated on return with the opened one. | |
2443 | */ | |
2444 | static bool load_whp_dispatch_fns(HMODULE *handle, | |
2445 | WHPFunctionList function_list) | |
2446 | { | |
2447 | HMODULE hLib = *handle; | |
2448 | ||
2449 | #define WINHV_PLATFORM_DLL "WinHvPlatform.dll" | |
2450 | #define WINHV_EMULATION_DLL "WinHvEmulation.dll" | |
6785e767 SM |
2451 | #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \ |
2452 | whp_dispatch.function_name = \ | |
2453 | (function_name ## _t)GetProcAddress(hLib, #function_name); \ | |
2454 | ||
b902710f SM |
2455 | #define WHP_LOAD_FIELD(return_type, function_name, signature) \ |
2456 | whp_dispatch.function_name = \ | |
2457 | (function_name ## _t)GetProcAddress(hLib, #function_name); \ | |
2458 | if (!whp_dispatch.function_name) { \ | |
2459 | error_report("Could not load function %s", #function_name); \ | |
2460 | goto error; \ | |
2461 | } \ | |
2462 | ||
2463 | #define WHP_LOAD_LIB(lib_name, handle_lib) \ | |
2464 | if (!handle_lib) { \ | |
2465 | handle_lib = LoadLibrary(lib_name); \ | |
2466 | if (!handle_lib) { \ | |
2467 | error_report("Could not load library %s.", lib_name); \ | |
2468 | goto error; \ | |
2469 | } \ | |
2470 | } \ | |
2471 | ||
2472 | switch (function_list) { | |
2473 | case WINHV_PLATFORM_FNS_DEFAULT: | |
2474 | WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib) | |
2475 | LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD) | |
2476 | break; | |
2477 | ||
2478 | case WINHV_EMULATION_FNS_DEFAULT: | |
2479 | WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib) | |
2480 | LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD) | |
2481 | break; | |
6785e767 SM |
2482 | |
2483 | case WINHV_PLATFORM_FNS_SUPPLEMENTAL: | |
2484 | WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib) | |
2485 | LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL) | |
2486 | break; | |
b902710f SM |
2487 | } |
2488 | ||
2489 | *handle = hLib; | |
2490 | return true; | |
2491 | ||
2492 | error: | |
2493 | if (hLib) { | |
2494 | FreeLibrary(hLib); | |
2495 | } | |
2496 | ||
2497 | return false; | |
2498 | } | |
2499 | ||
faf20793 SM |
2500 | static void whpx_set_kernel_irqchip(Object *obj, Visitor *v, |
2501 | const char *name, void *opaque, | |
2502 | Error **errp) | |
2503 | { | |
2504 | struct whpx_state *whpx = &whpx_global; | |
2505 | OnOffSplit mode; | |
2506 | ||
2507 | if (!visit_type_OnOffSplit(v, name, &mode, errp)) { | |
2508 | return; | |
2509 | } | |
2510 | ||
2511 | switch (mode) { | |
2512 | case ON_OFF_SPLIT_ON: | |
2513 | whpx->kernel_irqchip_allowed = true; | |
2514 | whpx->kernel_irqchip_required = true; | |
2515 | break; | |
2516 | ||
2517 | case ON_OFF_SPLIT_OFF: | |
2518 | whpx->kernel_irqchip_allowed = false; | |
2519 | whpx->kernel_irqchip_required = false; | |
2520 | break; | |
2521 | ||
2522 | case ON_OFF_SPLIT_SPLIT: | |
2523 | error_setg(errp, "WHPX: split irqchip currently not supported"); | |
2524 | error_append_hint(errp, | |
2525 | "Try without kernel-irqchip or with kernel-irqchip=on|off"); | |
2526 | break; | |
2527 | ||
2528 | default: | |
2529 | /* | |
2530 | * The value was checked in visit_type_OnOffSplit() above. If | |
2531 | * we get here, then something is wrong in QEMU. | |
2532 | */ | |
2533 | abort(); | |
2534 | } | |
2535 | } | |
2536 | ||
812d49f2 JTV |
2537 | /* |
2538 | * Partition support | |
2539 | */ | |
2540 | ||
2541 | static int whpx_accel_init(MachineState *ms) | |
2542 | { | |
2543 | struct whpx_state *whpx; | |
2544 | int ret; | |
2545 | HRESULT hr; | |
2546 | WHV_CAPABILITY whpx_cap; | |
3907e631 | 2547 | UINT32 whpx_cap_size; |
812d49f2 | 2548 | WHV_PARTITION_PROPERTY prop; |
5c8e1e83 | 2549 | UINT32 cpuidExitList[] = {1, 0x80000001}; |
faf20793 | 2550 | WHV_CAPABILITY_FEATURES features = {0}; |
812d49f2 JTV |
2551 | |
2552 | whpx = &whpx_global; | |
2553 | ||
327fccb2 LP |
2554 | if (!init_whp_dispatch()) { |
2555 | ret = -ENOSYS; | |
2556 | goto error; | |
2557 | } | |
2558 | ||
812d49f2 JTV |
2559 | whpx->mem_quota = ms->ram_size; |
2560 | ||
327fccb2 LP |
2561 | hr = whp_dispatch.WHvGetCapability( |
2562 | WHvCapabilityCodeHypervisorPresent, &whpx_cap, | |
2563 | sizeof(whpx_cap), &whpx_cap_size); | |
812d49f2 JTV |
2564 | if (FAILED(hr) || !whpx_cap.HypervisorPresent) { |
2565 | error_report("WHPX: No accelerator found, hr=%08lx", hr); | |
2566 | ret = -ENOSPC; | |
2567 | goto error; | |
2568 | } | |
2569 | ||
faf20793 SM |
2570 | hr = whp_dispatch.WHvGetCapability( |
2571 | WHvCapabilityCodeFeatures, &features, sizeof(features), NULL); | |
2572 | if (FAILED(hr)) { | |
2573 | error_report("WHPX: Failed to query capabilities, hr=%08lx", hr); | |
2574 | ret = -EINVAL; | |
2575 | goto error; | |
2576 | } | |
2577 | ||
327fccb2 | 2578 | hr = whp_dispatch.WHvCreatePartition(&whpx->partition); |
812d49f2 JTV |
2579 | if (FAILED(hr)) { |
2580 | error_report("WHPX: Failed to create partition, hr=%08lx", hr); | |
2581 | ret = -EINVAL; | |
2582 | goto error; | |
2583 | } | |
2584 | ||
b6b3da99 SM |
2585 | /* |
2586 | * Query the XSAVE capability of the partition. Any error here is not | |
2587 | * considered fatal. | |
2588 | */ | |
2589 | hr = whp_dispatch.WHvGetPartitionProperty( | |
2590 | whpx->partition, | |
2591 | WHvPartitionPropertyCodeProcessorXsaveFeatures, | |
2592 | &whpx_xsave_cap, | |
2593 | sizeof(whpx_xsave_cap), | |
2594 | &whpx_cap_size); | |
2595 | ||
2596 | /* | |
2597 | * Windows version which don't support this property will return with the | |
2598 | * specific error code. | |
2599 | */ | |
2600 | if (FAILED(hr) && hr != WHV_E_UNKNOWN_PROPERTY) { | |
2601 | error_report("WHPX: Failed to query XSAVE capability, hr=%08lx", hr); | |
2602 | } | |
2603 | ||
2604 | if (!whpx_has_xsave()) { | |
2605 | printf("WHPX: Partition is not XSAVE capable\n"); | |
2606 | } | |
2607 | ||
812d49f2 | 2608 | memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY)); |
f2b143a2 | 2609 | prop.ProcessorCount = ms->smp.cpus; |
327fccb2 LP |
2610 | hr = whp_dispatch.WHvSetPartitionProperty( |
2611 | whpx->partition, | |
2612 | WHvPartitionPropertyCodeProcessorCount, | |
2613 | &prop, | |
2614 | sizeof(WHV_PARTITION_PROPERTY)); | |
812d49f2 JTV |
2615 | |
2616 | if (FAILED(hr)) { | |
2617 | error_report("WHPX: Failed to set partition core count to %d," | |
f2b143a2 | 2618 | " hr=%08lx", ms->smp.cores, hr); |
812d49f2 JTV |
2619 | ret = -EINVAL; |
2620 | goto error; | |
7becac84 JTV |
2621 | } |
2622 | ||
faf20793 SM |
2623 | /* |
2624 | * Error out if WHP doesn't support apic emulation and user is requiring | |
2625 | * it. | |
2626 | */ | |
2627 | if (whpx->kernel_irqchip_required && (!features.LocalApicEmulation || | |
2628 | !whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2)) { | |
2629 | error_report("WHPX: kernel irqchip requested, but unavailable. " | |
2630 | "Try without kernel-irqchip or with kernel-irqchip=off"); | |
2631 | ret = -EINVAL; | |
2632 | goto error; | |
2633 | } | |
2634 | ||
2635 | if (whpx->kernel_irqchip_allowed && features.LocalApicEmulation && | |
2636 | whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2) { | |
2637 | WHV_X64_LOCAL_APIC_EMULATION_MODE mode = | |
2638 | WHvX64LocalApicEmulationModeXApic; | |
2639 | printf("WHPX: setting APIC emulation mode in the hypervisor\n"); | |
2640 | hr = whp_dispatch.WHvSetPartitionProperty( | |
2641 | whpx->partition, | |
2642 | WHvPartitionPropertyCodeLocalApicEmulationMode, | |
2643 | &mode, | |
2644 | sizeof(mode)); | |
2645 | if (FAILED(hr)) { | |
2646 | error_report("WHPX: Failed to enable kernel irqchip hr=%08lx", hr); | |
2647 | if (whpx->kernel_irqchip_required) { | |
2648 | error_report("WHPX: kernel irqchip requested, but unavailable"); | |
2649 | ret = -EINVAL; | |
2650 | goto error; | |
2651 | } | |
2652 | } else { | |
2653 | whpx->apic_in_platform = true; | |
2654 | } | |
2655 | } | |
2656 | ||
2657 | /* Register for MSR and CPUID exits */ | |
7becac84 | 2658 | memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY)); |
e7ca549f | 2659 | prop.ExtendedVmExits.X64MsrExit = 1; |
7becac84 | 2660 | prop.ExtendedVmExits.X64CpuidExit = 1; |
d7482ffe | 2661 | prop.ExtendedVmExits.ExceptionExit = 1; |
faf20793 SM |
2662 | if (whpx_apic_in_platform()) { |
2663 | prop.ExtendedVmExits.X64ApicInitSipiExitTrap = 1; | |
2664 | } | |
7becac84 | 2665 | |
faf20793 SM |
2666 | hr = whp_dispatch.WHvSetPartitionProperty( |
2667 | whpx->partition, | |
2668 | WHvPartitionPropertyCodeExtendedVmExits, | |
2669 | &prop, | |
2670 | sizeof(WHV_PARTITION_PROPERTY)); | |
7becac84 | 2671 | if (FAILED(hr)) { |
faf20793 | 2672 | error_report("WHPX: Failed to enable MSR & CPUIDexit, hr=%08lx", hr); |
7becac84 JTV |
2673 | ret = -EINVAL; |
2674 | goto error; | |
2675 | } | |
2676 | ||
327fccb2 LP |
2677 | hr = whp_dispatch.WHvSetPartitionProperty( |
2678 | whpx->partition, | |
2679 | WHvPartitionPropertyCodeCpuidExitList, | |
2680 | cpuidExitList, | |
2681 | RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32)); | |
e1753a7e | 2682 | |
7becac84 JTV |
2683 | if (FAILED(hr)) { |
2684 | error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx", | |
2685 | hr); | |
2686 | ret = -EINVAL; | |
2687 | goto error; | |
812d49f2 JTV |
2688 | } |
2689 | ||
d7482ffe IS |
2690 | /* |
2691 | * We do not want to intercept any exceptions from the guest, | |
2692 | * until we actually start debugging with gdb. | |
2693 | */ | |
2694 | whpx->exception_exit_bitmap = -1; | |
2695 | hr = whpx_set_exception_exit_bitmap(0); | |
2696 | ||
2697 | if (FAILED(hr)) { | |
2698 | error_report("WHPX: Failed to set exception exit bitmap, hr=%08lx", hr); | |
2699 | ret = -EINVAL; | |
2700 | goto error; | |
2701 | } | |
2702 | ||
327fccb2 | 2703 | hr = whp_dispatch.WHvSetupPartition(whpx->partition); |
812d49f2 JTV |
2704 | if (FAILED(hr)) { |
2705 | error_report("WHPX: Failed to setup partition, hr=%08lx", hr); | |
2706 | ret = -EINVAL; | |
2707 | goto error; | |
2708 | } | |
2709 | ||
812d49f2 JTV |
2710 | whpx_memory_init(); |
2711 | ||
812d49f2 JTV |
2712 | printf("Windows Hypervisor Platform accelerator is operational\n"); |
2713 | return 0; | |
2714 | ||
5c8e1e83 | 2715 | error: |
812d49f2 JTV |
2716 | |
2717 | if (NULL != whpx->partition) { | |
327fccb2 | 2718 | whp_dispatch.WHvDeletePartition(whpx->partition); |
812d49f2 JTV |
2719 | whpx->partition = NULL; |
2720 | } | |
2721 | ||
812d49f2 JTV |
2722 | return ret; |
2723 | } | |
2724 | ||
2725 | int whpx_enabled(void) | |
2726 | { | |
2727 | return whpx_allowed; | |
2728 | } | |
2729 | ||
84f4ef17 PB |
2730 | bool whpx_apic_in_platform(void) { |
2731 | return whpx_global.apic_in_platform; | |
2732 | } | |
2733 | ||
812d49f2 JTV |
2734 | static void whpx_accel_class_init(ObjectClass *oc, void *data) |
2735 | { | |
2736 | AccelClass *ac = ACCEL_CLASS(oc); | |
2737 | ac->name = "WHPX"; | |
2738 | ac->init_machine = whpx_accel_init; | |
2739 | ac->allowed = &whpx_allowed; | |
faf20793 SM |
2740 | |
2741 | object_class_property_add(oc, "kernel-irqchip", "on|off|split", | |
2742 | NULL, whpx_set_kernel_irqchip, | |
2743 | NULL, NULL); | |
2744 | object_class_property_set_description(oc, "kernel-irqchip", | |
2745 | "Configure WHPX in-kernel irqchip"); | |
2746 | } | |
2747 | ||
2748 | static void whpx_accel_instance_init(Object *obj) | |
2749 | { | |
2750 | struct whpx_state *whpx = &whpx_global; | |
2751 | ||
2752 | memset(whpx, 0, sizeof(struct whpx_state)); | |
2753 | /* Turn on kernel-irqchip, by default */ | |
2754 | whpx->kernel_irqchip_allowed = true; | |
812d49f2 JTV |
2755 | } |
2756 | ||
2757 | static const TypeInfo whpx_accel_type = { | |
2758 | .name = ACCEL_CLASS_NAME("whpx"), | |
2759 | .parent = TYPE_ACCEL, | |
faf20793 | 2760 | .instance_init = whpx_accel_instance_init, |
812d49f2 JTV |
2761 | .class_init = whpx_accel_class_init, |
2762 | }; | |
2763 | ||
2764 | static void whpx_type_init(void) | |
2765 | { | |
2766 | type_register_static(&whpx_accel_type); | |
2767 | } | |
2768 | ||
327fccb2 LP |
2769 | bool init_whp_dispatch(void) |
2770 | { | |
327fccb2 LP |
2771 | if (whp_dispatch_initialized) { |
2772 | return true; | |
2773 | } | |
2774 | ||
b902710f | 2775 | if (!load_whp_dispatch_fns(&hWinHvPlatform, WINHV_PLATFORM_FNS_DEFAULT)) { |
327fccb2 LP |
2776 | goto error; |
2777 | } | |
327fccb2 | 2778 | |
b902710f | 2779 | if (!load_whp_dispatch_fns(&hWinHvEmulation, WINHV_EMULATION_FNS_DEFAULT)) { |
327fccb2 LP |
2780 | goto error; |
2781 | } | |
327fccb2 | 2782 | |
6785e767 SM |
2783 | assert(load_whp_dispatch_fns(&hWinHvPlatform, |
2784 | WINHV_PLATFORM_FNS_SUPPLEMENTAL)); | |
327fccb2 | 2785 | whp_dispatch_initialized = true; |
327fccb2 | 2786 | |
b902710f SM |
2787 | return true; |
2788 | error: | |
327fccb2 LP |
2789 | if (hWinHvPlatform) { |
2790 | FreeLibrary(hWinHvPlatform); | |
2791 | } | |
b902710f | 2792 | |
327fccb2 LP |
2793 | if (hWinHvEmulation) { |
2794 | FreeLibrary(hWinHvEmulation); | |
2795 | } | |
b902710f | 2796 | |
327fccb2 LP |
2797 | return false; |
2798 | } | |
2799 | ||
812d49f2 | 2800 | type_init(whpx_type_init); |