]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * QEMU HAX support | |
3 | * | |
4 | * Copyright IBM, Corp. 2008 | |
5 | * Red Hat, Inc. 2008 | |
6 | * | |
7 | * Authors: | |
8 | * Anthony Liguori <aliguori@us.ibm.com> | |
9 | * Glauber Costa <gcosta@redhat.com> | |
10 | * | |
11 | * Copyright (c) 2011 Intel Corporation | |
12 | * Written by: | |
13 | * Jiang Yunhong<yunhong.jiang@intel.com> | |
14 | * Xin Xiaohui<xiaohui.xin@intel.com> | |
15 | * Zhang Xiantao<xiantao.zhang@intel.com> | |
16 | * | |
17 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
18 | * See the COPYING file in the top-level directory. | |
19 | * | |
20 | */ | |
21 | ||
22 | /* | |
23 | * HAX common code for both windows and darwin | |
24 | */ | |
25 | ||
26 | #include "qemu/osdep.h" | |
27 | #include "cpu.h" | |
28 | #include "exec/address-spaces.h" | |
29 | ||
30 | #include "qemu-common.h" | |
31 | #include "hax-i386.h" | |
32 | #include "sysemu/accel.h" | |
33 | #include "sysemu/reset.h" | |
34 | #include "sysemu/runstate.h" | |
35 | #include "qemu/main-loop.h" | |
36 | #include "hw/boards.h" | |
37 | ||
38 | #define DEBUG_HAX 0 | |
39 | ||
40 | #define DPRINTF(fmt, ...) \ | |
41 | do { \ | |
42 | if (DEBUG_HAX) { \ | |
43 | fprintf(stdout, fmt, ## __VA_ARGS__); \ | |
44 | } \ | |
45 | } while (0) | |
46 | ||
47 | /* Current version */ | |
48 | const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */ | |
49 | /* Minimum HAX kernel version */ | |
50 | const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */ | |
51 | ||
52 | static bool hax_allowed; | |
53 | ||
54 | struct hax_state hax_global; | |
55 | ||
56 | static void hax_vcpu_sync_state(CPUArchState *env, int modified); | |
57 | static int hax_arch_get_registers(CPUArchState *env); | |
58 | ||
59 | int hax_enabled(void) | |
60 | { | |
61 | return hax_allowed; | |
62 | } | |
63 | ||
64 | int valid_hax_tunnel_size(uint16_t size) | |
65 | { | |
66 | return size >= sizeof(struct hax_tunnel); | |
67 | } | |
68 | ||
69 | hax_fd hax_vcpu_get_fd(CPUArchState *env) | |
70 | { | |
71 | struct hax_vcpu_state *vcpu = env_cpu(env)->hax_vcpu; | |
72 | if (!vcpu) { | |
73 | return HAX_INVALID_FD; | |
74 | } | |
75 | return vcpu->fd; | |
76 | } | |
77 | ||
78 | static int hax_get_capability(struct hax_state *hax) | |
79 | { | |
80 | int ret; | |
81 | struct hax_capabilityinfo capinfo, *cap = &capinfo; | |
82 | ||
83 | ret = hax_capability(hax, cap); | |
84 | if (ret) { | |
85 | return ret; | |
86 | } | |
87 | ||
88 | if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) { | |
89 | if (cap->winfo & HAX_CAP_FAILREASON_VT) { | |
90 | DPRINTF | |
91 | ("VTX feature is not enabled, HAX driver will not work.\n"); | |
92 | } else if (cap->winfo & HAX_CAP_FAILREASON_NX) { | |
93 | DPRINTF | |
94 | ("NX feature is not enabled, HAX driver will not work.\n"); | |
95 | } | |
96 | return -ENXIO; | |
97 | ||
98 | } | |
99 | ||
100 | if (!(cap->winfo & HAX_CAP_UG)) { | |
101 | fprintf(stderr, "UG mode is not supported by the hardware.\n"); | |
102 | return -ENOTSUP; | |
103 | } | |
104 | ||
105 | hax->supports_64bit_ramblock = !!(cap->winfo & HAX_CAP_64BIT_RAMBLOCK); | |
106 | ||
107 | if (cap->wstatus & HAX_CAP_MEMQUOTA) { | |
108 | if (cap->mem_quota < hax->mem_quota) { | |
109 | fprintf(stderr, "The VM memory needed exceeds the driver limit.\n"); | |
110 | return -ENOSPC; | |
111 | } | |
112 | } | |
113 | return 0; | |
114 | } | |
115 | ||
116 | static int hax_version_support(struct hax_state *hax) | |
117 | { | |
118 | int ret; | |
119 | struct hax_module_version version; | |
120 | ||
121 | ret = hax_mod_version(hax, &version); | |
122 | if (ret < 0) { | |
123 | return 0; | |
124 | } | |
125 | ||
126 | if (hax_min_version > version.cur_version) { | |
127 | fprintf(stderr, "Incompatible HAX module version %d,", | |
128 | version.cur_version); | |
129 | fprintf(stderr, "requires minimum version %d\n", hax_min_version); | |
130 | return 0; | |
131 | } | |
132 | if (hax_cur_version < version.compat_version) { | |
133 | fprintf(stderr, "Incompatible QEMU HAX API version %x,", | |
134 | hax_cur_version); | |
135 | fprintf(stderr, "requires minimum HAX API version %x\n", | |
136 | version.compat_version); | |
137 | return 0; | |
138 | } | |
139 | ||
140 | return 1; | |
141 | } | |
142 | ||
143 | int hax_vcpu_create(int id) | |
144 | { | |
145 | struct hax_vcpu_state *vcpu = NULL; | |
146 | int ret; | |
147 | ||
148 | if (!hax_global.vm) { | |
149 | fprintf(stderr, "vcpu %x created failed, vm is null\n", id); | |
150 | return -1; | |
151 | } | |
152 | ||
153 | if (hax_global.vm->vcpus[id]) { | |
154 | fprintf(stderr, "vcpu %x allocated already\n", id); | |
155 | return 0; | |
156 | } | |
157 | ||
158 | vcpu = g_new0(struct hax_vcpu_state, 1); | |
159 | ||
160 | ret = hax_host_create_vcpu(hax_global.vm->fd, id); | |
161 | if (ret) { | |
162 | fprintf(stderr, "Failed to create vcpu %x\n", id); | |
163 | goto error; | |
164 | } | |
165 | ||
166 | vcpu->vcpu_id = id; | |
167 | vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id); | |
168 | if (hax_invalid_fd(vcpu->fd)) { | |
169 | fprintf(stderr, "Failed to open the vcpu\n"); | |
170 | ret = -ENODEV; | |
171 | goto error; | |
172 | } | |
173 | ||
174 | hax_global.vm->vcpus[id] = vcpu; | |
175 | ||
176 | ret = hax_host_setup_vcpu_channel(vcpu); | |
177 | if (ret) { | |
178 | fprintf(stderr, "Invalid hax tunnel size\n"); | |
179 | ret = -EINVAL; | |
180 | goto error; | |
181 | } | |
182 | return 0; | |
183 | ||
184 | error: | |
185 | /* vcpu and tunnel will be closed automatically */ | |
186 | if (vcpu && !hax_invalid_fd(vcpu->fd)) { | |
187 | hax_close_fd(vcpu->fd); | |
188 | } | |
189 | ||
190 | hax_global.vm->vcpus[id] = NULL; | |
191 | g_free(vcpu); | |
192 | return -1; | |
193 | } | |
194 | ||
195 | int hax_vcpu_destroy(CPUState *cpu) | |
196 | { | |
197 | struct hax_vcpu_state *vcpu = cpu->hax_vcpu; | |
198 | ||
199 | if (!hax_global.vm) { | |
200 | fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id); | |
201 | return -1; | |
202 | } | |
203 | ||
204 | if (!vcpu) { | |
205 | return 0; | |
206 | } | |
207 | ||
208 | /* | |
209 | * 1. The hax_tunnel is also destroyed when vcpu is destroyed | |
210 | * 2. close fd will cause hax module vcpu be cleaned | |
211 | */ | |
212 | hax_close_fd(vcpu->fd); | |
213 | hax_global.vm->vcpus[vcpu->vcpu_id] = NULL; | |
214 | g_free(vcpu); | |
215 | return 0; | |
216 | } | |
217 | ||
218 | int hax_init_vcpu(CPUState *cpu) | |
219 | { | |
220 | int ret; | |
221 | ||
222 | ret = hax_vcpu_create(cpu->cpu_index); | |
223 | if (ret < 0) { | |
224 | fprintf(stderr, "Failed to create HAX vcpu\n"); | |
225 | exit(-1); | |
226 | } | |
227 | ||
228 | cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index]; | |
229 | cpu->vcpu_dirty = true; | |
230 | qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr)); | |
231 | ||
232 | return ret; | |
233 | } | |
234 | ||
235 | struct hax_vm *hax_vm_create(struct hax_state *hax) | |
236 | { | |
237 | struct hax_vm *vm; | |
238 | int vm_id = 0, ret; | |
239 | ||
240 | if (hax_invalid_fd(hax->fd)) { | |
241 | return NULL; | |
242 | } | |
243 | ||
244 | if (hax->vm) { | |
245 | return hax->vm; | |
246 | } | |
247 | ||
248 | vm = g_new0(struct hax_vm, 1); | |
249 | ||
250 | ret = hax_host_create_vm(hax, &vm_id); | |
251 | if (ret) { | |
252 | fprintf(stderr, "Failed to create vm %x\n", ret); | |
253 | goto error; | |
254 | } | |
255 | vm->id = vm_id; | |
256 | vm->fd = hax_host_open_vm(hax, vm_id); | |
257 | if (hax_invalid_fd(vm->fd)) { | |
258 | fprintf(stderr, "Failed to open vm %d\n", vm_id); | |
259 | goto error; | |
260 | } | |
261 | ||
262 | hax->vm = vm; | |
263 | return vm; | |
264 | ||
265 | error: | |
266 | g_free(vm); | |
267 | hax->vm = NULL; | |
268 | return NULL; | |
269 | } | |
270 | ||
271 | int hax_vm_destroy(struct hax_vm *vm) | |
272 | { | |
273 | int i; | |
274 | ||
275 | for (i = 0; i < HAX_MAX_VCPU; i++) | |
276 | if (vm->vcpus[i]) { | |
277 | fprintf(stderr, "VCPU should be cleaned before vm clean\n"); | |
278 | return -1; | |
279 | } | |
280 | hax_close_fd(vm->fd); | |
281 | g_free(vm); | |
282 | hax_global.vm = NULL; | |
283 | return 0; | |
284 | } | |
285 | ||
286 | static void hax_handle_interrupt(CPUState *cpu, int mask) | |
287 | { | |
288 | cpu->interrupt_request |= mask; | |
289 | ||
290 | if (!qemu_cpu_is_self(cpu)) { | |
291 | qemu_cpu_kick(cpu); | |
292 | } | |
293 | } | |
294 | ||
295 | static int hax_init(ram_addr_t ram_size) | |
296 | { | |
297 | struct hax_state *hax = NULL; | |
298 | struct hax_qemu_version qversion; | |
299 | int ret; | |
300 | ||
301 | hax = &hax_global; | |
302 | ||
303 | memset(hax, 0, sizeof(struct hax_state)); | |
304 | hax->mem_quota = ram_size; | |
305 | ||
306 | hax->fd = hax_mod_open(); | |
307 | if (hax_invalid_fd(hax->fd)) { | |
308 | hax->fd = 0; | |
309 | ret = -ENODEV; | |
310 | goto error; | |
311 | } | |
312 | ||
313 | ret = hax_get_capability(hax); | |
314 | ||
315 | if (ret) { | |
316 | if (ret != -ENOSPC) { | |
317 | ret = -EINVAL; | |
318 | } | |
319 | goto error; | |
320 | } | |
321 | ||
322 | if (!hax_version_support(hax)) { | |
323 | ret = -EINVAL; | |
324 | goto error; | |
325 | } | |
326 | ||
327 | hax->vm = hax_vm_create(hax); | |
328 | if (!hax->vm) { | |
329 | fprintf(stderr, "Failed to create HAX VM\n"); | |
330 | ret = -EINVAL; | |
331 | goto error; | |
332 | } | |
333 | ||
334 | hax_memory_init(); | |
335 | ||
336 | qversion.cur_version = hax_cur_version; | |
337 | qversion.min_version = hax_min_version; | |
338 | hax_notify_qemu_version(hax->vm->fd, &qversion); | |
339 | cpu_interrupt_handler = hax_handle_interrupt; | |
340 | ||
341 | return ret; | |
342 | error: | |
343 | if (hax->vm) { | |
344 | hax_vm_destroy(hax->vm); | |
345 | } | |
346 | if (hax->fd) { | |
347 | hax_mod_close(hax); | |
348 | } | |
349 | ||
350 | return ret; | |
351 | } | |
352 | ||
353 | static int hax_accel_init(MachineState *ms) | |
354 | { | |
355 | int ret = hax_init(ms->ram_size); | |
356 | ||
357 | if (ret && (ret != -ENOSPC)) { | |
358 | fprintf(stderr, "No accelerator found.\n"); | |
359 | } else { | |
360 | fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n", | |
361 | !ret ? "working" : "not working", | |
362 | !ret ? "fast virt" : "emulation"); | |
363 | } | |
364 | return ret; | |
365 | } | |
366 | ||
367 | static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft) | |
368 | { | |
369 | if (hft->direction < 2) { | |
370 | cpu_physical_memory_rw(hft->gpa, &hft->value, hft->size, | |
371 | hft->direction); | |
372 | } else { | |
373 | /* | |
374 | * HAX API v4 supports transferring data between two MMIO addresses, | |
375 | * hft->gpa and hft->gpa2 (instructions such as MOVS require this): | |
376 | * hft->direction == 2: gpa ==> gpa2 | |
377 | */ | |
378 | uint64_t value; | |
379 | cpu_physical_memory_read(hft->gpa, &value, hft->size); | |
380 | cpu_physical_memory_write(hft->gpa2, &value, hft->size); | |
381 | } | |
382 | ||
383 | return 0; | |
384 | } | |
385 | ||
386 | static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port, | |
387 | int direction, int size, int count, void *buffer) | |
388 | { | |
389 | uint8_t *ptr; | |
390 | int i; | |
391 | MemTxAttrs attrs = { 0 }; | |
392 | ||
393 | if (!df) { | |
394 | ptr = (uint8_t *) buffer; | |
395 | } else { | |
396 | ptr = buffer + size * count - size; | |
397 | } | |
398 | for (i = 0; i < count; i++) { | |
399 | address_space_rw(&address_space_io, port, attrs, | |
400 | ptr, size, direction == HAX_EXIT_IO_OUT); | |
401 | if (!df) { | |
402 | ptr += size; | |
403 | } else { | |
404 | ptr -= size; | |
405 | } | |
406 | } | |
407 | ||
408 | return 0; | |
409 | } | |
410 | ||
411 | static int hax_vcpu_interrupt(CPUArchState *env) | |
412 | { | |
413 | CPUState *cpu = env_cpu(env); | |
414 | struct hax_vcpu_state *vcpu = cpu->hax_vcpu; | |
415 | struct hax_tunnel *ht = vcpu->tunnel; | |
416 | ||
417 | /* | |
418 | * Try to inject an interrupt if the guest can accept it | |
419 | * Unlike KVM, HAX kernel check for the eflags, instead of qemu | |
420 | */ | |
421 | if (ht->ready_for_interrupt_injection && | |
422 | (cpu->interrupt_request & CPU_INTERRUPT_HARD)) { | |
423 | int irq; | |
424 | ||
425 | irq = cpu_get_pic_interrupt(env); | |
426 | if (irq >= 0) { | |
427 | hax_inject_interrupt(env, irq); | |
428 | cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; | |
429 | } | |
430 | } | |
431 | ||
432 | /* If we have an interrupt but the guest is not ready to receive an | |
433 | * interrupt, request an interrupt window exit. This will | |
434 | * cause a return to userspace as soon as the guest is ready to | |
435 | * receive interrupts. */ | |
436 | if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) { | |
437 | ht->request_interrupt_window = 1; | |
438 | } else { | |
439 | ht->request_interrupt_window = 0; | |
440 | } | |
441 | return 0; | |
442 | } | |
443 | ||
444 | void hax_raise_event(CPUState *cpu) | |
445 | { | |
446 | struct hax_vcpu_state *vcpu = cpu->hax_vcpu; | |
447 | ||
448 | if (!vcpu) { | |
449 | return; | |
450 | } | |
451 | vcpu->tunnel->user_event_pending = 1; | |
452 | } | |
453 | ||
454 | /* | |
455 | * Ask hax kernel module to run the CPU for us till: | |
456 | * 1. Guest crash or shutdown | |
457 | * 2. Need QEMU's emulation like guest execute MMIO instruction | |
458 | * 3. Guest execute HLT | |
459 | * 4. QEMU have Signal/event pending | |
460 | * 5. An unknown VMX exit happens | |
461 | */ | |
462 | static int hax_vcpu_hax_exec(CPUArchState *env) | |
463 | { | |
464 | int ret = 0; | |
465 | CPUState *cpu = env_cpu(env); | |
466 | X86CPU *x86_cpu = X86_CPU(cpu); | |
467 | struct hax_vcpu_state *vcpu = cpu->hax_vcpu; | |
468 | struct hax_tunnel *ht = vcpu->tunnel; | |
469 | ||
470 | if (!hax_enabled()) { | |
471 | DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip); | |
472 | return 0; | |
473 | } | |
474 | ||
475 | if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { | |
476 | cpu->interrupt_request &= ~CPU_INTERRUPT_POLL; | |
477 | apic_poll_irq(x86_cpu->apic_state); | |
478 | } | |
479 | ||
480 | /* After a vcpu is halted (either because it is an AP and has just been | |
481 | * reset, or because it has executed the HLT instruction), it will not be | |
482 | * run (hax_vcpu_run()) until it is unhalted. The next few if blocks check | |
483 | * for events that may change the halted state of this vcpu: | |
484 | * a) Maskable interrupt, when RFLAGS.IF is 1; | |
485 | * Note: env->eflags may not reflect the current RFLAGS state, because | |
486 | * it is not updated after each hax_vcpu_run(). We cannot afford | |
487 | * to fail to recognize any unhalt-by-maskable-interrupt event | |
488 | * (in which case the vcpu will halt forever), and yet we cannot | |
489 | * afford the overhead of hax_vcpu_sync_state(). The current | |
490 | * solution is to err on the side of caution and have the HLT | |
491 | * handler (see case HAX_EXIT_HLT below) unconditionally set the | |
492 | * IF_MASK bit in env->eflags, which, in effect, disables the | |
493 | * RFLAGS.IF check. | |
494 | * b) NMI; | |
495 | * c) INIT signal; | |
496 | * d) SIPI signal. | |
497 | */ | |
498 | if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) && | |
499 | (env->eflags & IF_MASK)) || | |
500 | (cpu->interrupt_request & CPU_INTERRUPT_NMI)) { | |
501 | cpu->halted = 0; | |
502 | } | |
503 | ||
504 | if (cpu->interrupt_request & CPU_INTERRUPT_INIT) { | |
505 | DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n", | |
506 | cpu->cpu_index); | |
507 | do_cpu_init(x86_cpu); | |
508 | hax_vcpu_sync_state(env, 1); | |
509 | } | |
510 | ||
511 | if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) { | |
512 | DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n", | |
513 | cpu->cpu_index); | |
514 | hax_vcpu_sync_state(env, 0); | |
515 | do_cpu_sipi(x86_cpu); | |
516 | hax_vcpu_sync_state(env, 1); | |
517 | } | |
518 | ||
519 | if (cpu->halted) { | |
520 | /* If this vcpu is halted, we must not ask HAXM to run it. Instead, we | |
521 | * break out of hax_smp_cpu_exec() as if this vcpu had executed HLT. | |
522 | * That way, this vcpu thread will be trapped in qemu_wait_io_event(), | |
523 | * until the vcpu is unhalted. | |
524 | */ | |
525 | cpu->exception_index = EXCP_HLT; | |
526 | return 0; | |
527 | } | |
528 | ||
529 | do { | |
530 | int hax_ret; | |
531 | ||
532 | if (cpu->exit_request) { | |
533 | ret = 1; | |
534 | break; | |
535 | } | |
536 | ||
537 | hax_vcpu_interrupt(env); | |
538 | ||
539 | qemu_mutex_unlock_iothread(); | |
540 | cpu_exec_start(cpu); | |
541 | hax_ret = hax_vcpu_run(vcpu); | |
542 | cpu_exec_end(cpu); | |
543 | qemu_mutex_lock_iothread(); | |
544 | ||
545 | /* Simply continue the vcpu_run if system call interrupted */ | |
546 | if (hax_ret == -EINTR || hax_ret == -EAGAIN) { | |
547 | DPRINTF("io window interrupted\n"); | |
548 | continue; | |
549 | } | |
550 | ||
551 | if (hax_ret < 0) { | |
552 | fprintf(stderr, "vcpu run failed for vcpu %x\n", vcpu->vcpu_id); | |
553 | abort(); | |
554 | } | |
555 | switch (ht->_exit_status) { | |
556 | case HAX_EXIT_IO: | |
557 | ret = hax_handle_io(env, ht->pio._df, ht->pio._port, | |
558 | ht->pio._direction, | |
559 | ht->pio._size, ht->pio._count, vcpu->iobuf); | |
560 | break; | |
561 | case HAX_EXIT_FAST_MMIO: | |
562 | ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf); | |
563 | break; | |
564 | /* Guest state changed, currently only for shutdown */ | |
565 | case HAX_EXIT_STATECHANGE: | |
566 | fprintf(stdout, "VCPU shutdown request\n"); | |
567 | qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); | |
568 | hax_vcpu_sync_state(env, 0); | |
569 | ret = 1; | |
570 | break; | |
571 | case HAX_EXIT_UNKNOWN_VMEXIT: | |
572 | fprintf(stderr, "Unknown VMX exit %x from guest\n", | |
573 | ht->_exit_reason); | |
574 | qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); | |
575 | hax_vcpu_sync_state(env, 0); | |
576 | cpu_dump_state(cpu, stderr, 0); | |
577 | ret = -1; | |
578 | break; | |
579 | case HAX_EXIT_HLT: | |
580 | if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) && | |
581 | !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) { | |
582 | /* hlt instruction with interrupt disabled is shutdown */ | |
583 | env->eflags |= IF_MASK; | |
584 | cpu->halted = 1; | |
585 | cpu->exception_index = EXCP_HLT; | |
586 | ret = 1; | |
587 | } | |
588 | break; | |
589 | /* these situations will continue to hax module */ | |
590 | case HAX_EXIT_INTERRUPT: | |
591 | case HAX_EXIT_PAUSED: | |
592 | break; | |
593 | case HAX_EXIT_MMIO: | |
594 | /* Should not happen on UG system */ | |
595 | fprintf(stderr, "HAX: unsupported MMIO emulation\n"); | |
596 | ret = -1; | |
597 | break; | |
598 | case HAX_EXIT_REAL: | |
599 | /* Should not happen on UG system */ | |
600 | fprintf(stderr, "HAX: unimplemented real mode emulation\n"); | |
601 | ret = -1; | |
602 | break; | |
603 | default: | |
604 | fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status); | |
605 | qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); | |
606 | hax_vcpu_sync_state(env, 0); | |
607 | cpu_dump_state(cpu, stderr, 0); | |
608 | ret = 1; | |
609 | break; | |
610 | } | |
611 | } while (!ret); | |
612 | ||
613 | if (cpu->exit_request) { | |
614 | cpu->exit_request = 0; | |
615 | cpu->exception_index = EXCP_INTERRUPT; | |
616 | } | |
617 | return ret < 0; | |
618 | } | |
619 | ||
620 | static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) | |
621 | { | |
622 | CPUArchState *env = cpu->env_ptr; | |
623 | ||
624 | hax_arch_get_registers(env); | |
625 | cpu->vcpu_dirty = true; | |
626 | } | |
627 | ||
628 | void hax_cpu_synchronize_state(CPUState *cpu) | |
629 | { | |
630 | if (!cpu->vcpu_dirty) { | |
631 | run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL); | |
632 | } | |
633 | } | |
634 | ||
635 | static void do_hax_cpu_synchronize_post_reset(CPUState *cpu, | |
636 | run_on_cpu_data arg) | |
637 | { | |
638 | CPUArchState *env = cpu->env_ptr; | |
639 | ||
640 | hax_vcpu_sync_state(env, 1); | |
641 | cpu->vcpu_dirty = false; | |
642 | } | |
643 | ||
644 | void hax_cpu_synchronize_post_reset(CPUState *cpu) | |
645 | { | |
646 | run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); | |
647 | } | |
648 | ||
649 | static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) | |
650 | { | |
651 | CPUArchState *env = cpu->env_ptr; | |
652 | ||
653 | hax_vcpu_sync_state(env, 1); | |
654 | cpu->vcpu_dirty = false; | |
655 | } | |
656 | ||
657 | void hax_cpu_synchronize_post_init(CPUState *cpu) | |
658 | { | |
659 | run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL); | |
660 | } | |
661 | ||
662 | static void do_hax_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) | |
663 | { | |
664 | cpu->vcpu_dirty = true; | |
665 | } | |
666 | ||
667 | void hax_cpu_synchronize_pre_loadvm(CPUState *cpu) | |
668 | { | |
669 | run_on_cpu(cpu, do_hax_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); | |
670 | } | |
671 | ||
672 | int hax_smp_cpu_exec(CPUState *cpu) | |
673 | { | |
674 | CPUArchState *env = (CPUArchState *) (cpu->env_ptr); | |
675 | int fatal; | |
676 | int ret; | |
677 | ||
678 | while (1) { | |
679 | if (cpu->exception_index >= EXCP_INTERRUPT) { | |
680 | ret = cpu->exception_index; | |
681 | cpu->exception_index = -1; | |
682 | break; | |
683 | } | |
684 | ||
685 | fatal = hax_vcpu_hax_exec(env); | |
686 | ||
687 | if (fatal) { | |
688 | fprintf(stderr, "Unsupported HAX vcpu return\n"); | |
689 | abort(); | |
690 | } | |
691 | } | |
692 | ||
693 | return ret; | |
694 | } | |
695 | ||
696 | static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs) | |
697 | { | |
698 | memset(lhs, 0, sizeof(struct segment_desc_t)); | |
699 | lhs->selector = rhs->selector; | |
700 | lhs->base = rhs->base; | |
701 | lhs->limit = rhs->limit; | |
702 | lhs->type = 3; | |
703 | lhs->present = 1; | |
704 | lhs->dpl = 3; | |
705 | lhs->operand_size = 0; | |
706 | lhs->desc = 1; | |
707 | lhs->long_mode = 0; | |
708 | lhs->granularity = 0; | |
709 | lhs->available = 0; | |
710 | } | |
711 | ||
712 | static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs) | |
713 | { | |
714 | lhs->selector = rhs->selector; | |
715 | lhs->base = rhs->base; | |
716 | lhs->limit = rhs->limit; | |
717 | lhs->flags = (rhs->type << DESC_TYPE_SHIFT) | |
718 | | (rhs->present * DESC_P_MASK) | |
719 | | (rhs->dpl << DESC_DPL_SHIFT) | |
720 | | (rhs->operand_size << DESC_B_SHIFT) | |
721 | | (rhs->desc * DESC_S_MASK) | |
722 | | (rhs->long_mode << DESC_L_SHIFT) | |
723 | | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK); | |
724 | } | |
725 | ||
726 | static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs) | |
727 | { | |
728 | unsigned flags = rhs->flags; | |
729 | ||
730 | memset(lhs, 0, sizeof(struct segment_desc_t)); | |
731 | lhs->selector = rhs->selector; | |
732 | lhs->base = rhs->base; | |
733 | lhs->limit = rhs->limit; | |
734 | lhs->type = (flags >> DESC_TYPE_SHIFT) & 15; | |
735 | lhs->present = (flags & DESC_P_MASK) != 0; | |
736 | lhs->dpl = rhs->selector & 3; | |
737 | lhs->operand_size = (flags >> DESC_B_SHIFT) & 1; | |
738 | lhs->desc = (flags & DESC_S_MASK) != 0; | |
739 | lhs->long_mode = (flags >> DESC_L_SHIFT) & 1; | |
740 | lhs->granularity = (flags & DESC_G_MASK) != 0; | |
741 | lhs->available = (flags & DESC_AVL_MASK) != 0; | |
742 | } | |
743 | ||
744 | static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set) | |
745 | { | |
746 | target_ulong reg = *hax_reg; | |
747 | ||
748 | if (set) { | |
749 | *hax_reg = *qemu_reg; | |
750 | } else { | |
751 | *qemu_reg = reg; | |
752 | } | |
753 | } | |
754 | ||
755 | /* The sregs has been synced with HAX kernel already before this call */ | |
756 | static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs) | |
757 | { | |
758 | get_seg(&env->segs[R_CS], &sregs->_cs); | |
759 | get_seg(&env->segs[R_DS], &sregs->_ds); | |
760 | get_seg(&env->segs[R_ES], &sregs->_es); | |
761 | get_seg(&env->segs[R_FS], &sregs->_fs); | |
762 | get_seg(&env->segs[R_GS], &sregs->_gs); | |
763 | get_seg(&env->segs[R_SS], &sregs->_ss); | |
764 | ||
765 | get_seg(&env->tr, &sregs->_tr); | |
766 | get_seg(&env->ldt, &sregs->_ldt); | |
767 | env->idt.limit = sregs->_idt.limit; | |
768 | env->idt.base = sregs->_idt.base; | |
769 | env->gdt.limit = sregs->_gdt.limit; | |
770 | env->gdt.base = sregs->_gdt.base; | |
771 | return 0; | |
772 | } | |
773 | ||
774 | static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs) | |
775 | { | |
776 | if ((env->eflags & VM_MASK)) { | |
777 | set_v8086_seg(&sregs->_cs, &env->segs[R_CS]); | |
778 | set_v8086_seg(&sregs->_ds, &env->segs[R_DS]); | |
779 | set_v8086_seg(&sregs->_es, &env->segs[R_ES]); | |
780 | set_v8086_seg(&sregs->_fs, &env->segs[R_FS]); | |
781 | set_v8086_seg(&sregs->_gs, &env->segs[R_GS]); | |
782 | set_v8086_seg(&sregs->_ss, &env->segs[R_SS]); | |
783 | } else { | |
784 | set_seg(&sregs->_cs, &env->segs[R_CS]); | |
785 | set_seg(&sregs->_ds, &env->segs[R_DS]); | |
786 | set_seg(&sregs->_es, &env->segs[R_ES]); | |
787 | set_seg(&sregs->_fs, &env->segs[R_FS]); | |
788 | set_seg(&sregs->_gs, &env->segs[R_GS]); | |
789 | set_seg(&sregs->_ss, &env->segs[R_SS]); | |
790 | ||
791 | if (env->cr[0] & CR0_PE_MASK) { | |
792 | /* force ss cpl to cs cpl */ | |
793 | sregs->_ss.selector = (sregs->_ss.selector & ~3) | | |
794 | (sregs->_cs.selector & 3); | |
795 | sregs->_ss.dpl = sregs->_ss.selector & 3; | |
796 | } | |
797 | } | |
798 | ||
799 | set_seg(&sregs->_tr, &env->tr); | |
800 | set_seg(&sregs->_ldt, &env->ldt); | |
801 | sregs->_idt.limit = env->idt.limit; | |
802 | sregs->_idt.base = env->idt.base; | |
803 | sregs->_gdt.limit = env->gdt.limit; | |
804 | sregs->_gdt.base = env->gdt.base; | |
805 | return 0; | |
806 | } | |
807 | ||
808 | static int hax_sync_vcpu_register(CPUArchState *env, int set) | |
809 | { | |
810 | struct vcpu_state_t regs; | |
811 | int ret; | |
812 | memset(®s, 0, sizeof(struct vcpu_state_t)); | |
813 | ||
814 | if (!set) { | |
815 | ret = hax_sync_vcpu_state(env, ®s, 0); | |
816 | if (ret < 0) { | |
817 | return -1; | |
818 | } | |
819 | } | |
820 | ||
821 | /* generic register */ | |
822 | hax_getput_reg(®s._rax, &env->regs[R_EAX], set); | |
823 | hax_getput_reg(®s._rbx, &env->regs[R_EBX], set); | |
824 | hax_getput_reg(®s._rcx, &env->regs[R_ECX], set); | |
825 | hax_getput_reg(®s._rdx, &env->regs[R_EDX], set); | |
826 | hax_getput_reg(®s._rsi, &env->regs[R_ESI], set); | |
827 | hax_getput_reg(®s._rdi, &env->regs[R_EDI], set); | |
828 | hax_getput_reg(®s._rsp, &env->regs[R_ESP], set); | |
829 | hax_getput_reg(®s._rbp, &env->regs[R_EBP], set); | |
830 | #ifdef TARGET_X86_64 | |
831 | hax_getput_reg(®s._r8, &env->regs[8], set); | |
832 | hax_getput_reg(®s._r9, &env->regs[9], set); | |
833 | hax_getput_reg(®s._r10, &env->regs[10], set); | |
834 | hax_getput_reg(®s._r11, &env->regs[11], set); | |
835 | hax_getput_reg(®s._r12, &env->regs[12], set); | |
836 | hax_getput_reg(®s._r13, &env->regs[13], set); | |
837 | hax_getput_reg(®s._r14, &env->regs[14], set); | |
838 | hax_getput_reg(®s._r15, &env->regs[15], set); | |
839 | #endif | |
840 | hax_getput_reg(®s._rflags, &env->eflags, set); | |
841 | hax_getput_reg(®s._rip, &env->eip, set); | |
842 | ||
843 | if (set) { | |
844 | regs._cr0 = env->cr[0]; | |
845 | regs._cr2 = env->cr[2]; | |
846 | regs._cr3 = env->cr[3]; | |
847 | regs._cr4 = env->cr[4]; | |
848 | hax_set_segments(env, ®s); | |
849 | } else { | |
850 | env->cr[0] = regs._cr0; | |
851 | env->cr[2] = regs._cr2; | |
852 | env->cr[3] = regs._cr3; | |
853 | env->cr[4] = regs._cr4; | |
854 | hax_get_segments(env, ®s); | |
855 | } | |
856 | ||
857 | if (set) { | |
858 | ret = hax_sync_vcpu_state(env, ®s, 1); | |
859 | if (ret < 0) { | |
860 | return -1; | |
861 | } | |
862 | } | |
863 | return 0; | |
864 | } | |
865 | ||
866 | static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index, | |
867 | uint64_t value) | |
868 | { | |
869 | item->entry = index; | |
870 | item->value = value; | |
871 | } | |
872 | ||
873 | static int hax_get_msrs(CPUArchState *env) | |
874 | { | |
875 | struct hax_msr_data md; | |
876 | struct vmx_msr *msrs = md.entries; | |
877 | int ret, i, n; | |
878 | ||
879 | n = 0; | |
880 | msrs[n++].entry = MSR_IA32_SYSENTER_CS; | |
881 | msrs[n++].entry = MSR_IA32_SYSENTER_ESP; | |
882 | msrs[n++].entry = MSR_IA32_SYSENTER_EIP; | |
883 | msrs[n++].entry = MSR_IA32_TSC; | |
884 | #ifdef TARGET_X86_64 | |
885 | msrs[n++].entry = MSR_EFER; | |
886 | msrs[n++].entry = MSR_STAR; | |
887 | msrs[n++].entry = MSR_LSTAR; | |
888 | msrs[n++].entry = MSR_CSTAR; | |
889 | msrs[n++].entry = MSR_FMASK; | |
890 | msrs[n++].entry = MSR_KERNELGSBASE; | |
891 | #endif | |
892 | md.nr_msr = n; | |
893 | ret = hax_sync_msr(env, &md, 0); | |
894 | if (ret < 0) { | |
895 | return ret; | |
896 | } | |
897 | ||
898 | for (i = 0; i < md.done; i++) { | |
899 | switch (msrs[i].entry) { | |
900 | case MSR_IA32_SYSENTER_CS: | |
901 | env->sysenter_cs = msrs[i].value; | |
902 | break; | |
903 | case MSR_IA32_SYSENTER_ESP: | |
904 | env->sysenter_esp = msrs[i].value; | |
905 | break; | |
906 | case MSR_IA32_SYSENTER_EIP: | |
907 | env->sysenter_eip = msrs[i].value; | |
908 | break; | |
909 | case MSR_IA32_TSC: | |
910 | env->tsc = msrs[i].value; | |
911 | break; | |
912 | #ifdef TARGET_X86_64 | |
913 | case MSR_EFER: | |
914 | env->efer = msrs[i].value; | |
915 | break; | |
916 | case MSR_STAR: | |
917 | env->star = msrs[i].value; | |
918 | break; | |
919 | case MSR_LSTAR: | |
920 | env->lstar = msrs[i].value; | |
921 | break; | |
922 | case MSR_CSTAR: | |
923 | env->cstar = msrs[i].value; | |
924 | break; | |
925 | case MSR_FMASK: | |
926 | env->fmask = msrs[i].value; | |
927 | break; | |
928 | case MSR_KERNELGSBASE: | |
929 | env->kernelgsbase = msrs[i].value; | |
930 | break; | |
931 | #endif | |
932 | } | |
933 | } | |
934 | ||
935 | return 0; | |
936 | } | |
937 | ||
938 | static int hax_set_msrs(CPUArchState *env) | |
939 | { | |
940 | struct hax_msr_data md; | |
941 | struct vmx_msr *msrs; | |
942 | msrs = md.entries; | |
943 | int n = 0; | |
944 | ||
945 | memset(&md, 0, sizeof(struct hax_msr_data)); | |
946 | hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs); | |
947 | hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp); | |
948 | hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip); | |
949 | hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); | |
950 | #ifdef TARGET_X86_64 | |
951 | hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer); | |
952 | hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star); | |
953 | hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar); | |
954 | hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); | |
955 | hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask); | |
956 | hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase); | |
957 | #endif | |
958 | md.nr_msr = n; | |
959 | md.done = 0; | |
960 | ||
961 | return hax_sync_msr(env, &md, 1); | |
962 | } | |
963 | ||
964 | static int hax_get_fpu(CPUArchState *env) | |
965 | { | |
966 | struct fx_layout fpu; | |
967 | int i, ret; | |
968 | ||
969 | ret = hax_sync_fpu(env, &fpu, 0); | |
970 | if (ret < 0) { | |
971 | return ret; | |
972 | } | |
973 | ||
974 | env->fpstt = (fpu.fsw >> 11) & 7; | |
975 | env->fpus = fpu.fsw; | |
976 | env->fpuc = fpu.fcw; | |
977 | for (i = 0; i < 8; ++i) { | |
978 | env->fptags[i] = !((fpu.ftw >> i) & 1); | |
979 | } | |
980 | memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs)); | |
981 | ||
982 | for (i = 0; i < 8; i++) { | |
983 | env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]); | |
984 | env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]); | |
985 | if (CPU_NB_REGS > 8) { | |
986 | env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]); | |
987 | env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]); | |
988 | } | |
989 | } | |
990 | env->mxcsr = fpu.mxcsr; | |
991 | ||
992 | return 0; | |
993 | } | |
994 | ||
995 | static int hax_set_fpu(CPUArchState *env) | |
996 | { | |
997 | struct fx_layout fpu; | |
998 | int i; | |
999 | ||
1000 | memset(&fpu, 0, sizeof(fpu)); | |
1001 | fpu.fsw = env->fpus & ~(7 << 11); | |
1002 | fpu.fsw |= (env->fpstt & 7) << 11; | |
1003 | fpu.fcw = env->fpuc; | |
1004 | ||
1005 | for (i = 0; i < 8; ++i) { | |
1006 | fpu.ftw |= (!env->fptags[i]) << i; | |
1007 | } | |
1008 | ||
1009 | memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs)); | |
1010 | for (i = 0; i < 8; i++) { | |
1011 | stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0)); | |
1012 | stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1)); | |
1013 | if (CPU_NB_REGS > 8) { | |
1014 | stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0)); | |
1015 | stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1)); | |
1016 | } | |
1017 | } | |
1018 | ||
1019 | fpu.mxcsr = env->mxcsr; | |
1020 | ||
1021 | return hax_sync_fpu(env, &fpu, 1); | |
1022 | } | |
1023 | ||
1024 | static int hax_arch_get_registers(CPUArchState *env) | |
1025 | { | |
1026 | int ret; | |
1027 | ||
1028 | ret = hax_sync_vcpu_register(env, 0); | |
1029 | if (ret < 0) { | |
1030 | return ret; | |
1031 | } | |
1032 | ||
1033 | ret = hax_get_fpu(env); | |
1034 | if (ret < 0) { | |
1035 | return ret; | |
1036 | } | |
1037 | ||
1038 | ret = hax_get_msrs(env); | |
1039 | if (ret < 0) { | |
1040 | return ret; | |
1041 | } | |
1042 | ||
1043 | x86_update_hflags(env); | |
1044 | return 0; | |
1045 | } | |
1046 | ||
1047 | static int hax_arch_set_registers(CPUArchState *env) | |
1048 | { | |
1049 | int ret; | |
1050 | ret = hax_sync_vcpu_register(env, 1); | |
1051 | ||
1052 | if (ret < 0) { | |
1053 | fprintf(stderr, "Failed to sync vcpu reg\n"); | |
1054 | return ret; | |
1055 | } | |
1056 | ret = hax_set_fpu(env); | |
1057 | if (ret < 0) { | |
1058 | fprintf(stderr, "FPU failed\n"); | |
1059 | return ret; | |
1060 | } | |
1061 | ret = hax_set_msrs(env); | |
1062 | if (ret < 0) { | |
1063 | fprintf(stderr, "MSR failed\n"); | |
1064 | return ret; | |
1065 | } | |
1066 | ||
1067 | return 0; | |
1068 | } | |
1069 | ||
1070 | static void hax_vcpu_sync_state(CPUArchState *env, int modified) | |
1071 | { | |
1072 | if (hax_enabled()) { | |
1073 | if (modified) { | |
1074 | hax_arch_set_registers(env); | |
1075 | } else { | |
1076 | hax_arch_get_registers(env); | |
1077 | } | |
1078 | } | |
1079 | } | |
1080 | ||
1081 | /* | |
1082 | * much simpler than kvm, at least in first stage because: | |
1083 | * We don't need consider the device pass-through, we don't need | |
1084 | * consider the framebuffer, and we may even remove the bios at all | |
1085 | */ | |
1086 | int hax_sync_vcpus(void) | |
1087 | { | |
1088 | if (hax_enabled()) { | |
1089 | CPUState *cpu; | |
1090 | ||
1091 | cpu = first_cpu; | |
1092 | if (!cpu) { | |
1093 | return 0; | |
1094 | } | |
1095 | ||
1096 | for (; cpu != NULL; cpu = CPU_NEXT(cpu)) { | |
1097 | int ret; | |
1098 | ||
1099 | ret = hax_arch_set_registers(cpu->env_ptr); | |
1100 | if (ret < 0) { | |
1101 | return ret; | |
1102 | } | |
1103 | } | |
1104 | } | |
1105 | ||
1106 | return 0; | |
1107 | } | |
1108 | ||
1109 | void hax_reset_vcpu_state(void *opaque) | |
1110 | { | |
1111 | CPUState *cpu; | |
1112 | for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) { | |
1113 | cpu->hax_vcpu->tunnel->user_event_pending = 0; | |
1114 | cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0; | |
1115 | } | |
1116 | } | |
1117 | ||
1118 | static void hax_accel_class_init(ObjectClass *oc, void *data) | |
1119 | { | |
1120 | AccelClass *ac = ACCEL_CLASS(oc); | |
1121 | ac->name = "HAX"; | |
1122 | ac->init_machine = hax_accel_init; | |
1123 | ac->allowed = &hax_allowed; | |
1124 | } | |
1125 | ||
1126 | static const TypeInfo hax_accel_type = { | |
1127 | .name = ACCEL_CLASS_NAME("hax"), | |
1128 | .parent = TYPE_ACCEL, | |
1129 | .class_init = hax_accel_class_init, | |
1130 | }; | |
1131 | ||
1132 | static void hax_type_init(void) | |
1133 | { | |
1134 | type_register_static(&hax_accel_type); | |
1135 | } | |
1136 | ||
1137 | type_init(hax_type_init); |