]> git.proxmox.com Git - mirror_qemu.git/blame - target/i386/hvf-all.c
i386: hvf: add code base from Google's QEMU repository
[mirror_qemu.git] / target / i386 / hvf-all.c
CommitLineData
c97d6d2c
SAGDR
1/* Copyright 2008 IBM Corporation
2 * 2008 Red Hat, Inc.
3 * Copyright 2011 Intel Corporation
4 * Copyright 2016 Veertu, Inc.
5 * Copyright 2017 The Android Open Source Project
6 *
7 * QEMU Hypervisor.framework support
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of version 2 of the GNU General Public
11 * License as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21#include "qemu/osdep.h"
22#include "qemu-common.h"
23#include "qemu/error-report.h"
24
25#include "sysemu/hvf.h"
26#include "hvf-i386.h"
27#include "hvf-utils/vmcs.h"
28#include "hvf-utils/vmx.h"
29#include "hvf-utils/x86.h"
30#include "hvf-utils/x86_descr.h"
31#include "hvf-utils/x86_mmu.h"
32#include "hvf-utils/x86_decode.h"
33#include "hvf-utils/x86_emu.h"
34#include "hvf-utils/x86hvf.h"
35
36#include <Hypervisor/hv.h>
37#include <Hypervisor/hv_vmx.h>
38
39#include "exec/address-spaces.h"
40#include "exec/exec-all.h"
41#include "exec/ioport.h"
42#include "hw/i386/apic_internal.h"
43#include "hw/boards.h"
44#include "qemu/main-loop.h"
45#include "strings.h"
46#include "trace.h"
47#include "sysemu/accel.h"
48#include "sysemu/sysemu.h"
49#include "target/i386/cpu.h"
50
51pthread_rwlock_t mem_lock = PTHREAD_RWLOCK_INITIALIZER;
52HVFState *hvf_state;
53int hvf_disabled = 1;
54
55static void assert_hvf_ok(hv_return_t ret)
56{
57 if (ret == HV_SUCCESS) {
58 return;
59 }
60
61 switch (ret) {
62 case HV_ERROR:
63 error_report("Error: HV_ERROR\n");
64 break;
65 case HV_BUSY:
66 error_report("Error: HV_BUSY\n");
67 break;
68 case HV_BAD_ARGUMENT:
69 error_report("Error: HV_BAD_ARGUMENT\n");
70 break;
71 case HV_NO_RESOURCES:
72 error_report("Error: HV_NO_RESOURCES\n");
73 break;
74 case HV_NO_DEVICE:
75 error_report("Error: HV_NO_DEVICE\n");
76 break;
77 case HV_UNSUPPORTED:
78 error_report("Error: HV_UNSUPPORTED\n");
79 break;
80 default:
81 error_report("Unknown Error\n");
82 }
83
84 abort();
85}
86
87/* Memory slots */
88hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t end)
89{
90 hvf_slot *slot;
91 int x;
92 for (x = 0; x < hvf_state->num_slots; ++x) {
93 slot = &hvf_state->slots[x];
94 if (slot->size && start < (slot->start + slot->size) &&
95 end > slot->start) {
96 return slot;
97 }
98 }
99 return NULL;
100}
101
102struct mac_slot {
103 int present;
104 uint64_t size;
105 uint64_t gpa_start;
106 uint64_t gva;
107};
108
109struct mac_slot mac_slots[32];
110#define ALIGN(x, y) (((x) + (y) - 1) & ~((y) - 1))
111
112static int do_hvf_set_memory(hvf_slot *slot)
113{
114 struct mac_slot *macslot;
115 hv_memory_flags_t flags;
116 hv_return_t ret;
117
118 macslot = &mac_slots[slot->slot_id];
119
120 if (macslot->present) {
121 if (macslot->size != slot->size) {
122 macslot->present = 0;
123 ret = hv_vm_unmap(macslot->gpa_start, macslot->size);
124 assert_hvf_ok(ret);
125 }
126 }
127
128 if (!slot->size) {
129 return 0;
130 }
131
132 flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
133
134 macslot->present = 1;
135 macslot->gpa_start = slot->start;
136 macslot->size = slot->size;
137 ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags);
138 assert_hvf_ok(ret);
139 return 0;
140}
141
142void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
143{
144 hvf_slot *mem;
145 MemoryRegion *area = section->mr;
146
147 if (!memory_region_is_ram(area)) {
148 return;
149 }
150
151 mem = hvf_find_overlap_slot(
152 section->offset_within_address_space,
153 section->offset_within_address_space + int128_get64(section->size));
154
155 if (mem && add) {
156 if (mem->size == int128_get64(section->size) &&
157 mem->start == section->offset_within_address_space &&
158 mem->mem == (memory_region_get_ram_ptr(area) +
159 section->offset_within_region)) {
160 return; /* Same region was attempted to register, go away. */
161 }
162 }
163
164 /* Region needs to be reset. set the size to 0 and remap it. */
165 if (mem) {
166 mem->size = 0;
167 if (do_hvf_set_memory(mem)) {
168 error_report("Failed to reset overlapping slot\n");
169 abort();
170 }
171 }
172
173 if (!add) {
174 return;
175 }
176
177 /* Now make a new slot. */
178 int x;
179
180 for (x = 0; x < hvf_state->num_slots; ++x) {
181 mem = &hvf_state->slots[x];
182 if (!mem->size) {
183 break;
184 }
185 }
186
187 if (x == hvf_state->num_slots) {
188 error_report("No free slots\n");
189 abort();
190 }
191
192 mem->size = int128_get64(section->size);
193 mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region;
194 mem->start = section->offset_within_address_space;
195
196 if (do_hvf_set_memory(mem)) {
197 error_report("Error registering new memory slot\n");
198 abort();
199 }
200}
201
202void vmx_update_tpr(CPUState *cpu)
203{
204 /* TODO: need integrate APIC handling */
205 X86CPU *x86_cpu = X86_CPU(cpu);
206 int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4;
207 int irr = apic_get_highest_priority_irr(x86_cpu->apic_state);
208
209 wreg(cpu->hvf_fd, HV_X86_TPR, tpr);
210 if (irr == -1) {
211 wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
212 } else {
213 wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
214 irr >> 4);
215 }
216}
217
218void update_apic_tpr(CPUState *cpu)
219{
220 X86CPU *x86_cpu = X86_CPU(cpu);
221 int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4;
222 cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
223}
224
225#define VECTORING_INFO_VECTOR_MASK 0xff
226
227// TODO: taskswitch handling
228static void save_state_to_tss32(CPUState *cpu, struct x86_tss_segment32 *tss)
229{
230 /* CR3 and ldt selector are not saved intentionally */
231 tss->eip = EIP(cpu);
232 tss->eflags = EFLAGS(cpu);
233 tss->eax = EAX(cpu);
234 tss->ecx = ECX(cpu);
235 tss->edx = EDX(cpu);
236 tss->ebx = EBX(cpu);
237 tss->esp = ESP(cpu);
238 tss->ebp = EBP(cpu);
239 tss->esi = ESI(cpu);
240 tss->edi = EDI(cpu);
241
242 tss->es = vmx_read_segment_selector(cpu, REG_SEG_ES).sel;
243 tss->cs = vmx_read_segment_selector(cpu, REG_SEG_CS).sel;
244 tss->ss = vmx_read_segment_selector(cpu, REG_SEG_SS).sel;
245 tss->ds = vmx_read_segment_selector(cpu, REG_SEG_DS).sel;
246 tss->fs = vmx_read_segment_selector(cpu, REG_SEG_FS).sel;
247 tss->gs = vmx_read_segment_selector(cpu, REG_SEG_GS).sel;
248}
249
250static void load_state_from_tss32(CPUState *cpu, struct x86_tss_segment32 *tss)
251{
252 wvmcs(cpu->hvf_fd, VMCS_GUEST_CR3, tss->cr3);
253
254 RIP(cpu) = tss->eip;
255 EFLAGS(cpu) = tss->eflags | 2;
256
257 /* General purpose registers */
258 RAX(cpu) = tss->eax;
259 RCX(cpu) = tss->ecx;
260 RDX(cpu) = tss->edx;
261 RBX(cpu) = tss->ebx;
262 RSP(cpu) = tss->esp;
263 RBP(cpu) = tss->ebp;
264 RSI(cpu) = tss->esi;
265 RDI(cpu) = tss->edi;
266
267 vmx_write_segment_selector(cpu, (x68_segment_selector){{tss->ldt}}, REG_SEG_LDTR);
268 vmx_write_segment_selector(cpu, (x68_segment_selector){{tss->es}}, REG_SEG_ES);
269 vmx_write_segment_selector(cpu, (x68_segment_selector){{tss->cs}}, REG_SEG_CS);
270 vmx_write_segment_selector(cpu, (x68_segment_selector){{tss->ss}}, REG_SEG_SS);
271 vmx_write_segment_selector(cpu, (x68_segment_selector){{tss->ds}}, REG_SEG_DS);
272 vmx_write_segment_selector(cpu, (x68_segment_selector){{tss->fs}}, REG_SEG_FS);
273 vmx_write_segment_selector(cpu, (x68_segment_selector){{tss->gs}}, REG_SEG_GS);
274
275#if 0
276 load_segment(cpu, REG_SEG_LDTR, tss->ldt);
277 load_segment(cpu, REG_SEG_ES, tss->es);
278 load_segment(cpu, REG_SEG_CS, tss->cs);
279 load_segment(cpu, REG_SEG_SS, tss->ss);
280 load_segment(cpu, REG_SEG_DS, tss->ds);
281 load_segment(cpu, REG_SEG_FS, tss->fs);
282 load_segment(cpu, REG_SEG_GS, tss->gs);
283#endif
284}
285
286static int task_switch_32(CPUState *cpu, x68_segment_selector tss_sel, x68_segment_selector old_tss_sel,
287 uint64_t old_tss_base, struct x86_segment_descriptor *new_desc)
288{
289 struct x86_tss_segment32 tss_seg;
290 uint32_t new_tss_base = x86_segment_base(new_desc);
291 uint32_t eip_offset = offsetof(struct x86_tss_segment32, eip);
292 uint32_t ldt_sel_offset = offsetof(struct x86_tss_segment32, ldt);
293
294 vmx_read_mem(cpu, &tss_seg, old_tss_base, sizeof(tss_seg));
295 save_state_to_tss32(cpu, &tss_seg);
296
297 vmx_write_mem(cpu, old_tss_base + eip_offset, &tss_seg.eip, ldt_sel_offset - eip_offset);
298 vmx_read_mem(cpu, &tss_seg, new_tss_base, sizeof(tss_seg));
299
300 if (old_tss_sel.sel != 0xffff) {
301 tss_seg.prev_tss = old_tss_sel.sel;
302
303 vmx_write_mem(cpu, new_tss_base, &tss_seg.prev_tss, sizeof(tss_seg.prev_tss));
304 }
305 load_state_from_tss32(cpu, &tss_seg);
306 return 0;
307}
308
309static void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel, int reason, bool gate_valid, uint8_t gate, uint64_t gate_type)
310{
311 uint64_t rip = rreg(cpu->hvf_fd, HV_X86_RIP);
312 if (!gate_valid || (gate_type != VMCS_INTR_T_HWEXCEPTION &&
313 gate_type != VMCS_INTR_T_HWINTR &&
314 gate_type != VMCS_INTR_T_NMI)) {
315 int ins_len = rvmcs(cpu->hvf_fd, VMCS_EXIT_INSTRUCTION_LENGTH);
316 macvm_set_rip(cpu, rip + ins_len);
317 return;
318 }
319
320 load_regs(cpu);
321
322 struct x86_segment_descriptor curr_tss_desc, next_tss_desc;
323 int ret;
324 x68_segment_selector old_tss_sel = vmx_read_segment_selector(cpu, REG_SEG_TR);
325 uint64_t old_tss_base = vmx_read_segment_base(cpu, REG_SEG_TR);
326 uint32_t desc_limit;
327 struct x86_call_gate task_gate_desc;
328 struct vmx_segment vmx_seg;
329
330 x86_read_segment_descriptor(cpu, &next_tss_desc, tss_sel);
331 x86_read_segment_descriptor(cpu, &curr_tss_desc, old_tss_sel);
332
333 if (reason == TSR_IDT_GATE && gate_valid) {
334 int dpl;
335
336 ret = x86_read_call_gate(cpu, &task_gate_desc, gate);
337
338 dpl = task_gate_desc.dpl;
339 x68_segment_selector cs = vmx_read_segment_selector(cpu, REG_SEG_CS);
340 if (tss_sel.rpl > dpl || cs.rpl > dpl)
341 ;//DPRINTF("emulate_gp");
342 }
343
344 desc_limit = x86_segment_limit(&next_tss_desc);
345 if (!next_tss_desc.p || ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || desc_limit < 0x2b)) {
346 VM_PANIC("emulate_ts");
347 }
348
349 if (reason == TSR_IRET || reason == TSR_JMP) {
350 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
351 x86_write_segment_descriptor(cpu, &curr_tss_desc, old_tss_sel);
352 }
353
354 if (reason == TSR_IRET)
355 EFLAGS(cpu) &= ~RFLAGS_NT;
356
357 if (reason != TSR_CALL && reason != TSR_IDT_GATE)
358 old_tss_sel.sel = 0xffff;
359
360 if (reason != TSR_IRET) {
361 next_tss_desc.type |= (1 << 1); /* set busy flag */
362 x86_write_segment_descriptor(cpu, &next_tss_desc, tss_sel);
363 }
364
365 if (next_tss_desc.type & 8)
366 ret = task_switch_32(cpu, tss_sel, old_tss_sel, old_tss_base, &next_tss_desc);
367 else
368 //ret = task_switch_16(cpu, tss_sel, old_tss_sel, old_tss_base, &next_tss_desc);
369 VM_PANIC("task_switch_16");
370
371 macvm_set_cr0(cpu->hvf_fd, rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0) | CR0_TS);
372 x86_segment_descriptor_to_vmx(cpu, tss_sel, &next_tss_desc, &vmx_seg);
373 vmx_write_segment_descriptor(cpu, &vmx_seg, REG_SEG_TR);
374
375 store_regs(cpu);
376
377 hv_vcpu_invalidate_tlb(cpu->hvf_fd);
378 hv_vcpu_flush(cpu->hvf_fd);
379}
380
381static void hvf_handle_interrupt(CPUState * cpu, int mask)
382{
383 cpu->interrupt_request |= mask;
384 if (!qemu_cpu_is_self(cpu)) {
385 qemu_cpu_kick(cpu);
386 }
387}
388
389void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer,
390 int direction, int size, int count)
391{
392 int i;
393 uint8_t *ptr = buffer;
394
395 for (i = 0; i < count; i++) {
396 address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED,
397 ptr, size,
398 direction);
399 ptr += size;
400 }
401}
402
403/* TODO: synchronize vcpu state */
404static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
405{
406 CPUState *cpu_state = cpu;
407 if (cpu_state->vcpu_dirty == 0) {
408 hvf_get_registers(cpu_state);
409 }
410
411 cpu_state->vcpu_dirty = 1;
412}
413
414void hvf_cpu_synchronize_state(CPUState *cpu_state)
415{
416 if (cpu_state->vcpu_dirty == 0) {
417 run_on_cpu(cpu_state, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL);
418 }
419}
420
421static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
422{
423 CPUState *cpu_state = cpu;
424 hvf_put_registers(cpu_state);
425 cpu_state->vcpu_dirty = false;
426}
427
428void hvf_cpu_synchronize_post_reset(CPUState *cpu_state)
429{
430 run_on_cpu(cpu_state, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
431}
432
433void _hvf_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
434{
435 CPUState *cpu_state = cpu;
436 hvf_put_registers(cpu_state);
437 cpu_state->vcpu_dirty = false;
438}
439
440void hvf_cpu_synchronize_post_init(CPUState *cpu_state)
441{
442 run_on_cpu(cpu_state, _hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
443}
444
445/* TODO: ept fault handlig */
446static bool ept_emulation_fault(uint64_t ept_qual)
447{
448 int read, write;
449
450 /* EPT fault on an instruction fetch doesn't make sense here */
451 if (ept_qual & EPT_VIOLATION_INST_FETCH) {
452 return false;
453 }
454
455 /* EPT fault must be a read fault or a write fault */
456 read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
457 write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
458 if ((read | write) == 0) {
459 return false;
460 }
461
462 /*
463 * The EPT violation must have been caused by accessing a
464 * guest-physical address that is a translation of a guest-linear
465 * address.
466 */
467 if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
468 (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
469 return false;
470 }
471
472 return true;
473}
474
475static void hvf_region_add(MemoryListener *listener,
476 MemoryRegionSection *section)
477{
478 hvf_set_phys_mem(section, true);
479}
480
481static void hvf_region_del(MemoryListener *listener,
482 MemoryRegionSection *section)
483{
484 hvf_set_phys_mem(section, false);
485}
486
487static MemoryListener hvf_memory_listener = {
488 .priority = 10,
489 .region_add = hvf_region_add,
490 .region_del = hvf_region_del,
491};
492
493void hvf_reset_vcpu(CPUState *cpu) {
494
495 /* TODO: this shouldn't be needed; there is already a call to
496 * cpu_synchronize_all_post_reset in vl.c
497 */
498 wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, 0);
499 wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, 0);
500 macvm_set_cr0(cpu->hvf_fd, 0x60000010);
501
502 wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK);
503 wvmcs(cpu->hvf_fd, VMCS_CR4_SHADOW, 0x0);
504 wvmcs(cpu->hvf_fd, VMCS_GUEST_CR4, CR4_VMXE_MASK);
505
506 /* set VMCS guest state fields */
507 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_SELECTOR, 0xf000);
508 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_LIMIT, 0xffff);
509 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_ACCESS_RIGHTS, 0x9b);
510 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_BASE, 0xffff0000);
511
512 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_SELECTOR, 0);
513 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_LIMIT, 0xffff);
514 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_ACCESS_RIGHTS, 0x93);
515 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_BASE, 0);
516
517 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_SELECTOR, 0);
518 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_LIMIT, 0xffff);
519 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_ACCESS_RIGHTS, 0x93);
520 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_BASE, 0);
521
522 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_SELECTOR, 0);
523 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_LIMIT, 0xffff);
524 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_ACCESS_RIGHTS, 0x93);
525 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE, 0);
526
527 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_SELECTOR, 0);
528 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_LIMIT, 0xffff);
529 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_ACCESS_RIGHTS, 0x93);
530 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE, 0);
531
532 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_SELECTOR, 0);
533 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_LIMIT, 0xffff);
534 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_ACCESS_RIGHTS, 0x93);
535 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_BASE, 0);
536
537 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_SELECTOR, 0);
538 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT, 0);
539 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_ACCESS_RIGHTS, 0x10000);
540 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE, 0);
541
542 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_SELECTOR, 0);
543 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_LIMIT, 0);
544 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_ACCESS_RIGHTS, 0x83);
545 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_BASE, 0);
546
547 wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT, 0);
548 wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE, 0);
549
550 wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_LIMIT, 0);
551 wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_BASE, 0);
552
553 /*wvmcs(cpu->hvf_fd, VMCS_GUEST_CR2, 0x0);*/
554 wvmcs(cpu->hvf_fd, VMCS_GUEST_CR3, 0x0);
555
556 wreg(cpu->hvf_fd, HV_X86_RIP, 0xfff0);
557 wreg(cpu->hvf_fd, HV_X86_RDX, 0x623);
558 wreg(cpu->hvf_fd, HV_X86_RFLAGS, 0x2);
559 wreg(cpu->hvf_fd, HV_X86_RSP, 0x0);
560 wreg(cpu->hvf_fd, HV_X86_RAX, 0x0);
561 wreg(cpu->hvf_fd, HV_X86_RBX, 0x0);
562 wreg(cpu->hvf_fd, HV_X86_RCX, 0x0);
563 wreg(cpu->hvf_fd, HV_X86_RSI, 0x0);
564 wreg(cpu->hvf_fd, HV_X86_RDI, 0x0);
565 wreg(cpu->hvf_fd, HV_X86_RBP, 0x0);
566
567 for (int i = 0; i < 8; i++) {
568 wreg(cpu->hvf_fd, HV_X86_R8 + i, 0x0);
569 }
570
571 hv_vm_sync_tsc(0);
572 cpu->halted = 0;
573 hv_vcpu_invalidate_tlb(cpu->hvf_fd);
574 hv_vcpu_flush(cpu->hvf_fd);
575}
576
577void hvf_vcpu_destroy(CPUState *cpu)
578{
579 hv_return_t ret = hv_vcpu_destroy((hv_vcpuid_t)cpu->hvf_fd);
580 assert_hvf_ok(ret);
581}
582
583static void dummy_signal(int sig)
584{
585}
586
587int hvf_init_vcpu(CPUState *cpu)
588{
589
590 X86CPU *x86cpu = X86_CPU(cpu);
591 CPUX86State *env = &x86cpu->env;
592 int r;
593
594 /* init cpu signals */
595 sigset_t set;
596 struct sigaction sigact;
597
598 memset(&sigact, 0, sizeof(sigact));
599 sigact.sa_handler = dummy_signal;
600 sigaction(SIG_IPI, &sigact, NULL);
601
602 pthread_sigmask(SIG_BLOCK, NULL, &set);
603 sigdelset(&set, SIG_IPI);
604
605 init_emu();
606 init_decoder();
607
608 hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1);
609 env->hvf_emul = g_new0(HVFX86EmulatorState, 1);
610
611 r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT);
612 cpu->vcpu_dirty = 1;
613 assert_hvf_ok(r);
614
615 if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED,
616 &hvf_state->hvf_caps->vmx_cap_pinbased)) {
617 abort();
618 }
619 if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED,
620 &hvf_state->hvf_caps->vmx_cap_procbased)) {
621 abort();
622 }
623 if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2,
624 &hvf_state->hvf_caps->vmx_cap_procbased2)) {
625 abort();
626 }
627 if (hv_vmx_read_capability(HV_VMX_CAP_ENTRY,
628 &hvf_state->hvf_caps->vmx_cap_entry)) {
629 abort();
630 }
631
632 /* set VMCS control fields */
633 wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
634 cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
635 VMCS_PIN_BASED_CTLS_EXTINT |
636 VMCS_PIN_BASED_CTLS_NMI |
637 VMCS_PIN_BASED_CTLS_VNMI));
638 wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS,
639 cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
640 VMCS_PRI_PROC_BASED_CTLS_HLT |
641 VMCS_PRI_PROC_BASED_CTLS_MWAIT |
642 VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
643 VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
644 VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL);
645 wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS,
646 cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2,
647 VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES));
648
649 wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,
650 0));
651 wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
652
653 wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
654
655 hvf_reset_vcpu(cpu);
656
657 x86cpu = X86_CPU(cpu);
658 x86cpu->env.kvm_xsave_buf = qemu_memalign(4096,
659 sizeof(struct hvf_xsave_buf));
660
661 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1);
662 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1);
663 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_CSTAR, 1);
664 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FMASK, 1);
665 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FSBASE, 1);
666 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1);
667 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1);
668 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1);
669 /*hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);*/
670 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1);
671 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1);
672 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1);
673
674 return 0;
675}
676
677void hvf_disable(int shouldDisable)
678{
679 hvf_disabled = shouldDisable;
680}
681
682int hvf_vcpu_exec(CPUState *cpu)
683{
684 X86CPU *x86_cpu = X86_CPU(cpu);
685 CPUX86State *env = &x86_cpu->env;
686 int ret = 0;
687 uint64_t rip = 0;
688
689 cpu->halted = 0;
690
691 if (hvf_process_events(cpu)) {
692 return EXCP_HLT;
693 }
694
695 do {
696 if (cpu->vcpu_dirty) {
697 hvf_put_registers(cpu);
698 cpu->vcpu_dirty = false;
699 }
700
701 env->hvf_emul->interruptable =
702 !(rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
703 (VMCS_INTERRUPTIBILITY_STI_BLOCKING |
704 VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING));
705
706 hvf_inject_interrupts(cpu);
707 vmx_update_tpr(cpu);
708
709 qemu_mutex_unlock_iothread();
710 if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) {
711 qemu_mutex_lock_iothread();
712 return EXCP_HLT;
713 }
714
715 hv_return_t r = hv_vcpu_run(cpu->hvf_fd);
716 assert_hvf_ok(r);
717
718 /* handle VMEXIT */
719 uint64_t exit_reason = rvmcs(cpu->hvf_fd, VMCS_EXIT_REASON);
720 uint64_t exit_qual = rvmcs(cpu->hvf_fd, VMCS_EXIT_QUALIFICATION);
721 uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf_fd,
722 VMCS_EXIT_INSTRUCTION_LENGTH);
723 uint64_t idtvec_info = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
724 rip = rreg(cpu->hvf_fd, HV_X86_RIP);
725 RFLAGS(env) = rreg(cpu->hvf_fd, HV_X86_RFLAGS);
726 env->eflags = RFLAGS(env);
727
728 qemu_mutex_lock_iothread();
729
730 update_apic_tpr(cpu);
731 current_cpu = cpu;
732
733 ret = 0;
734 switch (exit_reason) {
735 case EXIT_REASON_HLT: {
736 macvm_set_rip(cpu, rip + ins_len);
737 if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
738 (EFLAGS(env) & IF_MASK))
739 && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) &&
740 !(idtvec_info & VMCS_IDT_VEC_VALID)) {
741 cpu->halted = 1;
742 ret = EXCP_HLT;
743 }
744 ret = EXCP_INTERRUPT;
745 break;
746 }
747 case EXIT_REASON_MWAIT: {
748 ret = EXCP_INTERRUPT;
749 break;
750 }
751 /* Need to check if MMIO or unmmaped fault */
752 case EXIT_REASON_EPT_FAULT:
753 {
754 hvf_slot *slot;
755 addr_t gpa = rvmcs(cpu->hvf_fd, VMCS_GUEST_PHYSICAL_ADDRESS);
756
757 if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) &&
758 ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) {
759 vmx_set_nmi_blocking(cpu);
760 }
761
762 slot = hvf_find_overlap_slot(gpa, gpa);
763 /* mmio */
764 if (ept_emulation_fault(exit_qual) && !slot) {
765 struct x86_decode decode;
766
767 load_regs(cpu);
768 env->hvf_emul->fetch_rip = rip;
769
770 decode_instruction(env, &decode);
771 exec_instruction(env, &decode);
772 store_regs(cpu);
773 break;
774 }
775#ifdef DIRTY_VGA_TRACKING
776 /* TODO: handle dirty page tracking */
777#endif
778 break;
779 }
780 case EXIT_REASON_INOUT:
781 {
782 uint32_t in = (exit_qual & 8) != 0;
783 uint32_t size = (exit_qual & 7) + 1;
784 uint32_t string = (exit_qual & 16) != 0;
785 uint32_t port = exit_qual >> 16;
786 /*uint32_t rep = (exit_qual & 0x20) != 0;*/
787
788#if 1
789 if (!string && in) {
790 uint64_t val = 0;
791 load_regs(cpu);
792 hvf_handle_io(env, port, &val, 0, size, 1);
793 if (size == 1) {
794 AL(env) = val;
795 } else if (size == 2) {
796 AX(env) = val;
797 } else if (size == 4) {
798 RAX(env) = (uint32_t)val;
799 } else {
800 VM_PANIC("size");
801 }
802 RIP(env) += ins_len;
803 store_regs(cpu);
804 break;
805 } else if (!string && !in) {
806 RAX(env) = rreg(cpu->hvf_fd, HV_X86_RAX);
807 hvf_handle_io(env, port, &RAX(env), 1, size, 1);
808 macvm_set_rip(cpu, rip + ins_len);
809 break;
810 }
811#endif
812 struct x86_decode decode;
813
814 load_regs(cpu);
815 env->hvf_emul->fetch_rip = rip;
816
817 decode_instruction(env, &decode);
818 VM_PANIC_ON(ins_len != decode.len);
819 exec_instruction(env, &decode);
820 store_regs(cpu);
821
822 break;
823 }
824 case EXIT_REASON_CPUID: {
825 uint32_t rax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
826 uint32_t rbx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RBX);
827 uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
828 uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
829
830 cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx);
831
832 wreg(cpu->hvf_fd, HV_X86_RAX, rax);
833 wreg(cpu->hvf_fd, HV_X86_RBX, rbx);
834 wreg(cpu->hvf_fd, HV_X86_RCX, rcx);
835 wreg(cpu->hvf_fd, HV_X86_RDX, rdx);
836
837 macvm_set_rip(cpu, rip + ins_len);
838 break;
839 }
840 case EXIT_REASON_XSETBV: {
841 X86CPU *x86_cpu = X86_CPU(cpu);
842 CPUX86State *env = &x86_cpu->env;
843 uint32_t eax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
844 uint32_t ecx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
845 uint32_t edx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
846
847 if (ecx) {
848 macvm_set_rip(cpu, rip + ins_len);
849 break;
850 }
851 env->xcr0 = ((uint64_t)edx << 32) | eax;
852 wreg(cpu->hvf_fd, HV_X86_XCR0, env->xcr0 | 1);
853 macvm_set_rip(cpu, rip + ins_len);
854 break;
855 }
856 case EXIT_REASON_INTR_WINDOW:
857 vmx_clear_int_window_exiting(cpu);
858 ret = EXCP_INTERRUPT;
859 break;
860 case EXIT_REASON_NMI_WINDOW:
861 vmx_clear_nmi_window_exiting(cpu);
862 ret = EXCP_INTERRUPT;
863 break;
864 case EXIT_REASON_EXT_INTR:
865 /* force exit and allow io handling */
866 ret = EXCP_INTERRUPT;
867 break;
868 case EXIT_REASON_RDMSR:
869 case EXIT_REASON_WRMSR:
870 {
871 load_regs(cpu);
872 if (exit_reason == EXIT_REASON_RDMSR) {
873 simulate_rdmsr(cpu);
874 } else {
875 simulate_wrmsr(cpu);
876 }
877 RIP(env) += rvmcs(cpu->hvf_fd, VMCS_EXIT_INSTRUCTION_LENGTH);
878 store_regs(cpu);
879 break;
880 }
881 case EXIT_REASON_CR_ACCESS: {
882 int cr;
883 int reg;
884
885 load_regs(cpu);
886 cr = exit_qual & 15;
887 reg = (exit_qual >> 8) & 15;
888
889 switch (cr) {
890 case 0x0: {
891 macvm_set_cr0(cpu->hvf_fd, RRX(env, reg));
892 break;
893 }
894 case 4: {
895 macvm_set_cr4(cpu->hvf_fd, RRX(env, reg));
896 break;
897 }
898 case 8: {
899 X86CPU *x86_cpu = X86_CPU(cpu);
900 if (exit_qual & 0x10) {
901 RRX(env, reg) = cpu_get_apic_tpr(x86_cpu->apic_state);
902 } else {
903 int tpr = RRX(env, reg);
904 cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
905 ret = EXCP_INTERRUPT;
906 }
907 break;
908 }
909 default:
910 error_report("Unrecognized CR %d\n", cr);
911 abort();
912 }
913 RIP(env) += ins_len;
914 store_regs(cpu);
915 break;
916 }
917 case EXIT_REASON_APIC_ACCESS: { /* TODO */
918 struct x86_decode decode;
919
920 load_regs(cpu);
921 env->hvf_emul->fetch_rip = rip;
922
923 decode_instruction(env, &decode);
924 exec_instruction(env, &decode);
925 store_regs(cpu);
926 break;
927 }
928 case EXIT_REASON_TPR: {
929 ret = 1;
930 break;
931 }
932 case EXIT_REASON_TASK_SWITCH: {
933 uint64_t vinfo = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
934 x68_segment_selector sel = {.sel = exit_qual & 0xffff};
935 vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3,
936 vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo
937 & VMCS_INTR_T_MASK);
938 break;
939 }
940 case EXIT_REASON_TRIPLE_FAULT: {
941 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
942 ret = EXCP_INTERRUPT;
943 break;
944 }
945 case EXIT_REASON_RDPMC:
946 wreg(cpu->hvf_fd, HV_X86_RAX, 0);
947 wreg(cpu->hvf_fd, HV_X86_RDX, 0);
948 macvm_set_rip(cpu, rip + ins_len);
949 break;
950 case VMX_REASON_VMCALL:
951 /* TODO: inject #GP fault */
952 break;
953 default:
954 error_report("%llx: unhandled exit %llx\n", rip, exit_reason);
955 }
956 } while (ret == 0);
957
958 return ret;
959}
960
961static bool hvf_allowed;
962
963static int hvf_accel_init(MachineState *ms)
964{
965 int x;
966 hv_return_t ret;
967 HVFState *s;
968
969 hvf_disable(0);
970 ret = hv_vm_create(HV_VM_DEFAULT);
971 assert_hvf_ok(ret);
972
973 s = g_new0(HVFState, 1);
974
975 s->num_slots = 32;
976 for (x = 0; x < s->num_slots; ++x) {
977 s->slots[x].size = 0;
978 s->slots[x].slot_id = x;
979 }
980
981 hvf_state = s;
982 cpu_interrupt_handler = hvf_handle_interrupt;
983 memory_listener_register(&hvf_memory_listener, &address_space_memory);
984 return 0;
985}
986
987static void hvf_accel_class_init(ObjectClass *oc, void *data)
988{
989 AccelClass *ac = ACCEL_CLASS(oc);
990 ac->name = "HVF";
991 ac->init_machine = hvf_accel_init;
992 ac->allowed = &hvf_allowed;
993}
994
995static const TypeInfo hvf_accel_type = {
996 .name = TYPE_HVF_ACCEL,
997 .parent = TYPE_ACCEL,
998 .class_init = hvf_accel_class_init,
999};
1000
1001static void hvf_type_init(void)
1002{
1003 type_register_static(&hvf_accel_type);
1004}
1005
1006type_init(hvf_type_init);