]> git.proxmox.com Git - mirror_qemu.git/blob - target/i386/hax-all.c
i386: Fix signedness of hyperv_spinlock_attempts
[mirror_qemu.git] / target / i386 / hax-all.c
1 /*
2 * QEMU HAX support
3 *
4 * Copyright IBM, Corp. 2008
5 * Red Hat, Inc. 2008
6 *
7 * Authors:
8 * Anthony Liguori <aliguori@us.ibm.com>
9 * Glauber Costa <gcosta@redhat.com>
10 *
11 * Copyright (c) 2011 Intel Corporation
12 * Written by:
13 * Jiang Yunhong<yunhong.jiang@intel.com>
14 * Xin Xiaohui<xiaohui.xin@intel.com>
15 * Zhang Xiantao<xiantao.zhang@intel.com>
16 *
17 * This work is licensed under the terms of the GNU GPL, version 2 or later.
18 * See the COPYING file in the top-level directory.
19 *
20 */
21
22 /*
23 * HAX common code for both windows and darwin
24 */
25
26 #include "qemu/osdep.h"
27 #include "cpu.h"
28 #include "exec/address-spaces.h"
29
30 #include "qemu-common.h"
31 #include "hax-i386.h"
32 #include "sysemu/accel.h"
33 #include "sysemu/sysemu.h"
34 #include "qemu/main-loop.h"
35 #include "hw/boards.h"
36
37 #define DEBUG_HAX 0
38
39 #define DPRINTF(fmt, ...) \
40 do { \
41 if (DEBUG_HAX) { \
42 fprintf(stdout, fmt, ## __VA_ARGS__); \
43 } \
44 } while (0)
45
46 /* Current version */
47 const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */
48 /* Minimum HAX kernel version */
49 const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */
50
51 static bool hax_allowed;
52
53 struct hax_state hax_global;
54
55 static void hax_vcpu_sync_state(CPUArchState *env, int modified);
56 static int hax_arch_get_registers(CPUArchState *env);
57
58 int hax_enabled(void)
59 {
60 return hax_allowed;
61 }
62
63 int valid_hax_tunnel_size(uint16_t size)
64 {
65 return size >= sizeof(struct hax_tunnel);
66 }
67
68 hax_fd hax_vcpu_get_fd(CPUArchState *env)
69 {
70 struct hax_vcpu_state *vcpu = env_cpu(env)->hax_vcpu;
71 if (!vcpu) {
72 return HAX_INVALID_FD;
73 }
74 return vcpu->fd;
75 }
76
77 static int hax_get_capability(struct hax_state *hax)
78 {
79 int ret;
80 struct hax_capabilityinfo capinfo, *cap = &capinfo;
81
82 ret = hax_capability(hax, cap);
83 if (ret) {
84 return ret;
85 }
86
87 if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
88 if (cap->winfo & HAX_CAP_FAILREASON_VT) {
89 DPRINTF
90 ("VTX feature is not enabled, HAX driver will not work.\n");
91 } else if (cap->winfo & HAX_CAP_FAILREASON_NX) {
92 DPRINTF
93 ("NX feature is not enabled, HAX driver will not work.\n");
94 }
95 return -ENXIO;
96
97 }
98
99 if (!(cap->winfo & HAX_CAP_UG)) {
100 fprintf(stderr, "UG mode is not supported by the hardware.\n");
101 return -ENOTSUP;
102 }
103
104 hax->supports_64bit_ramblock = !!(cap->winfo & HAX_CAP_64BIT_RAMBLOCK);
105
106 if (cap->wstatus & HAX_CAP_MEMQUOTA) {
107 if (cap->mem_quota < hax->mem_quota) {
108 fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
109 return -ENOSPC;
110 }
111 }
112 return 0;
113 }
114
115 static int hax_version_support(struct hax_state *hax)
116 {
117 int ret;
118 struct hax_module_version version;
119
120 ret = hax_mod_version(hax, &version);
121 if (ret < 0) {
122 return 0;
123 }
124
125 if (hax_min_version > version.cur_version) {
126 fprintf(stderr, "Incompatible HAX module version %d,",
127 version.cur_version);
128 fprintf(stderr, "requires minimum version %d\n", hax_min_version);
129 return 0;
130 }
131 if (hax_cur_version < version.compat_version) {
132 fprintf(stderr, "Incompatible QEMU HAX API version %x,",
133 hax_cur_version);
134 fprintf(stderr, "requires minimum HAX API version %x\n",
135 version.compat_version);
136 return 0;
137 }
138
139 return 1;
140 }
141
142 int hax_vcpu_create(int id)
143 {
144 struct hax_vcpu_state *vcpu = NULL;
145 int ret;
146
147 if (!hax_global.vm) {
148 fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
149 return -1;
150 }
151
152 if (hax_global.vm->vcpus[id]) {
153 fprintf(stderr, "vcpu %x allocated already\n", id);
154 return 0;
155 }
156
157 vcpu = g_new0(struct hax_vcpu_state, 1);
158
159 ret = hax_host_create_vcpu(hax_global.vm->fd, id);
160 if (ret) {
161 fprintf(stderr, "Failed to create vcpu %x\n", id);
162 goto error;
163 }
164
165 vcpu->vcpu_id = id;
166 vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
167 if (hax_invalid_fd(vcpu->fd)) {
168 fprintf(stderr, "Failed to open the vcpu\n");
169 ret = -ENODEV;
170 goto error;
171 }
172
173 hax_global.vm->vcpus[id] = vcpu;
174
175 ret = hax_host_setup_vcpu_channel(vcpu);
176 if (ret) {
177 fprintf(stderr, "Invalid hax tunnel size\n");
178 ret = -EINVAL;
179 goto error;
180 }
181 return 0;
182
183 error:
184 /* vcpu and tunnel will be closed automatically */
185 if (vcpu && !hax_invalid_fd(vcpu->fd)) {
186 hax_close_fd(vcpu->fd);
187 }
188
189 hax_global.vm->vcpus[id] = NULL;
190 g_free(vcpu);
191 return -1;
192 }
193
194 int hax_vcpu_destroy(CPUState *cpu)
195 {
196 struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
197
198 if (!hax_global.vm) {
199 fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
200 return -1;
201 }
202
203 if (!vcpu) {
204 return 0;
205 }
206
207 /*
208 * 1. The hax_tunnel is also destroyed when vcpu is destroyed
209 * 2. close fd will cause hax module vcpu be cleaned
210 */
211 hax_close_fd(vcpu->fd);
212 hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
213 g_free(vcpu);
214 return 0;
215 }
216
217 int hax_init_vcpu(CPUState *cpu)
218 {
219 int ret;
220
221 ret = hax_vcpu_create(cpu->cpu_index);
222 if (ret < 0) {
223 fprintf(stderr, "Failed to create HAX vcpu\n");
224 exit(-1);
225 }
226
227 cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
228 cpu->vcpu_dirty = true;
229 qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr));
230
231 return ret;
232 }
233
234 struct hax_vm *hax_vm_create(struct hax_state *hax)
235 {
236 struct hax_vm *vm;
237 int vm_id = 0, ret;
238
239 if (hax_invalid_fd(hax->fd)) {
240 return NULL;
241 }
242
243 if (hax->vm) {
244 return hax->vm;
245 }
246
247 vm = g_new0(struct hax_vm, 1);
248
249 ret = hax_host_create_vm(hax, &vm_id);
250 if (ret) {
251 fprintf(stderr, "Failed to create vm %x\n", ret);
252 goto error;
253 }
254 vm->id = vm_id;
255 vm->fd = hax_host_open_vm(hax, vm_id);
256 if (hax_invalid_fd(vm->fd)) {
257 fprintf(stderr, "Failed to open vm %d\n", vm_id);
258 goto error;
259 }
260
261 hax->vm = vm;
262 return vm;
263
264 error:
265 g_free(vm);
266 hax->vm = NULL;
267 return NULL;
268 }
269
270 int hax_vm_destroy(struct hax_vm *vm)
271 {
272 int i;
273
274 for (i = 0; i < HAX_MAX_VCPU; i++)
275 if (vm->vcpus[i]) {
276 fprintf(stderr, "VCPU should be cleaned before vm clean\n");
277 return -1;
278 }
279 hax_close_fd(vm->fd);
280 g_free(vm);
281 hax_global.vm = NULL;
282 return 0;
283 }
284
285 static void hax_handle_interrupt(CPUState *cpu, int mask)
286 {
287 cpu->interrupt_request |= mask;
288
289 if (!qemu_cpu_is_self(cpu)) {
290 qemu_cpu_kick(cpu);
291 }
292 }
293
294 static int hax_init(ram_addr_t ram_size)
295 {
296 struct hax_state *hax = NULL;
297 struct hax_qemu_version qversion;
298 int ret;
299
300 hax = &hax_global;
301
302 memset(hax, 0, sizeof(struct hax_state));
303 hax->mem_quota = ram_size;
304
305 hax->fd = hax_mod_open();
306 if (hax_invalid_fd(hax->fd)) {
307 hax->fd = 0;
308 ret = -ENODEV;
309 goto error;
310 }
311
312 ret = hax_get_capability(hax);
313
314 if (ret) {
315 if (ret != -ENOSPC) {
316 ret = -EINVAL;
317 }
318 goto error;
319 }
320
321 if (!hax_version_support(hax)) {
322 ret = -EINVAL;
323 goto error;
324 }
325
326 hax->vm = hax_vm_create(hax);
327 if (!hax->vm) {
328 fprintf(stderr, "Failed to create HAX VM\n");
329 ret = -EINVAL;
330 goto error;
331 }
332
333 hax_memory_init();
334
335 qversion.cur_version = hax_cur_version;
336 qversion.min_version = hax_min_version;
337 hax_notify_qemu_version(hax->vm->fd, &qversion);
338 cpu_interrupt_handler = hax_handle_interrupt;
339
340 return ret;
341 error:
342 if (hax->vm) {
343 hax_vm_destroy(hax->vm);
344 }
345 if (hax->fd) {
346 hax_mod_close(hax);
347 }
348
349 return ret;
350 }
351
352 static int hax_accel_init(MachineState *ms)
353 {
354 int ret = hax_init(ms->ram_size);
355
356 if (ret && (ret != -ENOSPC)) {
357 fprintf(stderr, "No accelerator found.\n");
358 } else {
359 fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
360 !ret ? "working" : "not working",
361 !ret ? "fast virt" : "emulation");
362 }
363 return ret;
364 }
365
366 static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
367 {
368 if (hft->direction < 2) {
369 cpu_physical_memory_rw(hft->gpa, (uint8_t *) &hft->value, hft->size,
370 hft->direction);
371 } else {
372 /*
373 * HAX API v4 supports transferring data between two MMIO addresses,
374 * hft->gpa and hft->gpa2 (instructions such as MOVS require this):
375 * hft->direction == 2: gpa ==> gpa2
376 */
377 uint64_t value;
378 cpu_physical_memory_rw(hft->gpa, (uint8_t *) &value, hft->size, 0);
379 cpu_physical_memory_rw(hft->gpa2, (uint8_t *) &value, hft->size, 1);
380 }
381
382 return 0;
383 }
384
385 static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
386 int direction, int size, int count, void *buffer)
387 {
388 uint8_t *ptr;
389 int i;
390 MemTxAttrs attrs = { 0 };
391
392 if (!df) {
393 ptr = (uint8_t *) buffer;
394 } else {
395 ptr = buffer + size * count - size;
396 }
397 for (i = 0; i < count; i++) {
398 address_space_rw(&address_space_io, port, attrs,
399 ptr, size, direction == HAX_EXIT_IO_OUT);
400 if (!df) {
401 ptr += size;
402 } else {
403 ptr -= size;
404 }
405 }
406
407 return 0;
408 }
409
410 static int hax_vcpu_interrupt(CPUArchState *env)
411 {
412 CPUState *cpu = env_cpu(env);
413 struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
414 struct hax_tunnel *ht = vcpu->tunnel;
415
416 /*
417 * Try to inject an interrupt if the guest can accept it
418 * Unlike KVM, HAX kernel check for the eflags, instead of qemu
419 */
420 if (ht->ready_for_interrupt_injection &&
421 (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
422 int irq;
423
424 irq = cpu_get_pic_interrupt(env);
425 if (irq >= 0) {
426 hax_inject_interrupt(env, irq);
427 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
428 }
429 }
430
431 /* If we have an interrupt but the guest is not ready to receive an
432 * interrupt, request an interrupt window exit. This will
433 * cause a return to userspace as soon as the guest is ready to
434 * receive interrupts. */
435 if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
436 ht->request_interrupt_window = 1;
437 } else {
438 ht->request_interrupt_window = 0;
439 }
440 return 0;
441 }
442
443 void hax_raise_event(CPUState *cpu)
444 {
445 struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
446
447 if (!vcpu) {
448 return;
449 }
450 vcpu->tunnel->user_event_pending = 1;
451 }
452
453 /*
454 * Ask hax kernel module to run the CPU for us till:
455 * 1. Guest crash or shutdown
456 * 2. Need QEMU's emulation like guest execute MMIO instruction
457 * 3. Guest execute HLT
458 * 4. QEMU have Signal/event pending
459 * 5. An unknown VMX exit happens
460 */
461 static int hax_vcpu_hax_exec(CPUArchState *env)
462 {
463 int ret = 0;
464 CPUState *cpu = env_cpu(env);
465 X86CPU *x86_cpu = X86_CPU(cpu);
466 struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
467 struct hax_tunnel *ht = vcpu->tunnel;
468
469 if (!hax_enabled()) {
470 DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip);
471 return 0;
472 }
473
474 if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
475 cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
476 apic_poll_irq(x86_cpu->apic_state);
477 }
478
479 /* After a vcpu is halted (either because it is an AP and has just been
480 * reset, or because it has executed the HLT instruction), it will not be
481 * run (hax_vcpu_run()) until it is unhalted. The next few if blocks check
482 * for events that may change the halted state of this vcpu:
483 * a) Maskable interrupt, when RFLAGS.IF is 1;
484 * Note: env->eflags may not reflect the current RFLAGS state, because
485 * it is not updated after each hax_vcpu_run(). We cannot afford
486 * to fail to recognize any unhalt-by-maskable-interrupt event
487 * (in which case the vcpu will halt forever), and yet we cannot
488 * afford the overhead of hax_vcpu_sync_state(). The current
489 * solution is to err on the side of caution and have the HLT
490 * handler (see case HAX_EXIT_HLT below) unconditionally set the
491 * IF_MASK bit in env->eflags, which, in effect, disables the
492 * RFLAGS.IF check.
493 * b) NMI;
494 * c) INIT signal;
495 * d) SIPI signal.
496 */
497 if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
498 (env->eflags & IF_MASK)) ||
499 (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
500 cpu->halted = 0;
501 }
502
503 if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
504 DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n",
505 cpu->cpu_index);
506 do_cpu_init(x86_cpu);
507 hax_vcpu_sync_state(env, 1);
508 }
509
510 if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
511 DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n",
512 cpu->cpu_index);
513 hax_vcpu_sync_state(env, 0);
514 do_cpu_sipi(x86_cpu);
515 hax_vcpu_sync_state(env, 1);
516 }
517
518 if (cpu->halted) {
519 /* If this vcpu is halted, we must not ask HAXM to run it. Instead, we
520 * break out of hax_smp_cpu_exec() as if this vcpu had executed HLT.
521 * That way, this vcpu thread will be trapped in qemu_wait_io_event(),
522 * until the vcpu is unhalted.
523 */
524 cpu->exception_index = EXCP_HLT;
525 return 0;
526 }
527
528 do {
529 int hax_ret;
530
531 if (cpu->exit_request) {
532 ret = 1;
533 break;
534 }
535
536 hax_vcpu_interrupt(env);
537
538 qemu_mutex_unlock_iothread();
539 cpu_exec_start(cpu);
540 hax_ret = hax_vcpu_run(vcpu);
541 cpu_exec_end(cpu);
542 qemu_mutex_lock_iothread();
543
544 /* Simply continue the vcpu_run if system call interrupted */
545 if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
546 DPRINTF("io window interrupted\n");
547 continue;
548 }
549
550 if (hax_ret < 0) {
551 fprintf(stderr, "vcpu run failed for vcpu %x\n", vcpu->vcpu_id);
552 abort();
553 }
554 switch (ht->_exit_status) {
555 case HAX_EXIT_IO:
556 ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
557 ht->pio._direction,
558 ht->pio._size, ht->pio._count, vcpu->iobuf);
559 break;
560 case HAX_EXIT_FAST_MMIO:
561 ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
562 break;
563 /* Guest state changed, currently only for shutdown */
564 case HAX_EXIT_STATECHANGE:
565 fprintf(stdout, "VCPU shutdown request\n");
566 qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
567 hax_vcpu_sync_state(env, 0);
568 ret = 1;
569 break;
570 case HAX_EXIT_UNKNOWN_VMEXIT:
571 fprintf(stderr, "Unknown VMX exit %x from guest\n",
572 ht->_exit_reason);
573 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
574 hax_vcpu_sync_state(env, 0);
575 cpu_dump_state(cpu, stderr, 0);
576 ret = -1;
577 break;
578 case HAX_EXIT_HLT:
579 if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
580 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
581 /* hlt instruction with interrupt disabled is shutdown */
582 env->eflags |= IF_MASK;
583 cpu->halted = 1;
584 cpu->exception_index = EXCP_HLT;
585 ret = 1;
586 }
587 break;
588 /* these situations will continue to hax module */
589 case HAX_EXIT_INTERRUPT:
590 case HAX_EXIT_PAUSED:
591 break;
592 case HAX_EXIT_MMIO:
593 /* Should not happen on UG system */
594 fprintf(stderr, "HAX: unsupported MMIO emulation\n");
595 ret = -1;
596 break;
597 case HAX_EXIT_REAL:
598 /* Should not happen on UG system */
599 fprintf(stderr, "HAX: unimplemented real mode emulation\n");
600 ret = -1;
601 break;
602 default:
603 fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status);
604 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
605 hax_vcpu_sync_state(env, 0);
606 cpu_dump_state(cpu, stderr, 0);
607 ret = 1;
608 break;
609 }
610 } while (!ret);
611
612 if (cpu->exit_request) {
613 cpu->exit_request = 0;
614 cpu->exception_index = EXCP_INTERRUPT;
615 }
616 return ret < 0;
617 }
618
619 static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
620 {
621 CPUArchState *env = cpu->env_ptr;
622
623 hax_arch_get_registers(env);
624 cpu->vcpu_dirty = true;
625 }
626
627 void hax_cpu_synchronize_state(CPUState *cpu)
628 {
629 if (!cpu->vcpu_dirty) {
630 run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL);
631 }
632 }
633
634 static void do_hax_cpu_synchronize_post_reset(CPUState *cpu,
635 run_on_cpu_data arg)
636 {
637 CPUArchState *env = cpu->env_ptr;
638
639 hax_vcpu_sync_state(env, 1);
640 cpu->vcpu_dirty = false;
641 }
642
643 void hax_cpu_synchronize_post_reset(CPUState *cpu)
644 {
645 run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
646 }
647
648 static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
649 {
650 CPUArchState *env = cpu->env_ptr;
651
652 hax_vcpu_sync_state(env, 1);
653 cpu->vcpu_dirty = false;
654 }
655
656 void hax_cpu_synchronize_post_init(CPUState *cpu)
657 {
658 run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
659 }
660
661 static void do_hax_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
662 {
663 cpu->vcpu_dirty = true;
664 }
665
666 void hax_cpu_synchronize_pre_loadvm(CPUState *cpu)
667 {
668 run_on_cpu(cpu, do_hax_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
669 }
670
671 int hax_smp_cpu_exec(CPUState *cpu)
672 {
673 CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
674 int fatal;
675 int ret;
676
677 while (1) {
678 if (cpu->exception_index >= EXCP_INTERRUPT) {
679 ret = cpu->exception_index;
680 cpu->exception_index = -1;
681 break;
682 }
683
684 fatal = hax_vcpu_hax_exec(env);
685
686 if (fatal) {
687 fprintf(stderr, "Unsupported HAX vcpu return\n");
688 abort();
689 }
690 }
691
692 return ret;
693 }
694
695 static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
696 {
697 memset(lhs, 0, sizeof(struct segment_desc_t));
698 lhs->selector = rhs->selector;
699 lhs->base = rhs->base;
700 lhs->limit = rhs->limit;
701 lhs->type = 3;
702 lhs->present = 1;
703 lhs->dpl = 3;
704 lhs->operand_size = 0;
705 lhs->desc = 1;
706 lhs->long_mode = 0;
707 lhs->granularity = 0;
708 lhs->available = 0;
709 }
710
711 static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs)
712 {
713 lhs->selector = rhs->selector;
714 lhs->base = rhs->base;
715 lhs->limit = rhs->limit;
716 lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
717 | (rhs->present * DESC_P_MASK)
718 | (rhs->dpl << DESC_DPL_SHIFT)
719 | (rhs->operand_size << DESC_B_SHIFT)
720 | (rhs->desc * DESC_S_MASK)
721 | (rhs->long_mode << DESC_L_SHIFT)
722 | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
723 }
724
725 static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
726 {
727 unsigned flags = rhs->flags;
728
729 memset(lhs, 0, sizeof(struct segment_desc_t));
730 lhs->selector = rhs->selector;
731 lhs->base = rhs->base;
732 lhs->limit = rhs->limit;
733 lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
734 lhs->present = (flags & DESC_P_MASK) != 0;
735 lhs->dpl = rhs->selector & 3;
736 lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
737 lhs->desc = (flags & DESC_S_MASK) != 0;
738 lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
739 lhs->granularity = (flags & DESC_G_MASK) != 0;
740 lhs->available = (flags & DESC_AVL_MASK) != 0;
741 }
742
743 static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set)
744 {
745 target_ulong reg = *hax_reg;
746
747 if (set) {
748 *hax_reg = *qemu_reg;
749 } else {
750 *qemu_reg = reg;
751 }
752 }
753
754 /* The sregs has been synced with HAX kernel already before this call */
755 static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs)
756 {
757 get_seg(&env->segs[R_CS], &sregs->_cs);
758 get_seg(&env->segs[R_DS], &sregs->_ds);
759 get_seg(&env->segs[R_ES], &sregs->_es);
760 get_seg(&env->segs[R_FS], &sregs->_fs);
761 get_seg(&env->segs[R_GS], &sregs->_gs);
762 get_seg(&env->segs[R_SS], &sregs->_ss);
763
764 get_seg(&env->tr, &sregs->_tr);
765 get_seg(&env->ldt, &sregs->_ldt);
766 env->idt.limit = sregs->_idt.limit;
767 env->idt.base = sregs->_idt.base;
768 env->gdt.limit = sregs->_gdt.limit;
769 env->gdt.base = sregs->_gdt.base;
770 return 0;
771 }
772
773 static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs)
774 {
775 if ((env->eflags & VM_MASK)) {
776 set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
777 set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
778 set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
779 set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
780 set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
781 set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
782 } else {
783 set_seg(&sregs->_cs, &env->segs[R_CS]);
784 set_seg(&sregs->_ds, &env->segs[R_DS]);
785 set_seg(&sregs->_es, &env->segs[R_ES]);
786 set_seg(&sregs->_fs, &env->segs[R_FS]);
787 set_seg(&sregs->_gs, &env->segs[R_GS]);
788 set_seg(&sregs->_ss, &env->segs[R_SS]);
789
790 if (env->cr[0] & CR0_PE_MASK) {
791 /* force ss cpl to cs cpl */
792 sregs->_ss.selector = (sregs->_ss.selector & ~3) |
793 (sregs->_cs.selector & 3);
794 sregs->_ss.dpl = sregs->_ss.selector & 3;
795 }
796 }
797
798 set_seg(&sregs->_tr, &env->tr);
799 set_seg(&sregs->_ldt, &env->ldt);
800 sregs->_idt.limit = env->idt.limit;
801 sregs->_idt.base = env->idt.base;
802 sregs->_gdt.limit = env->gdt.limit;
803 sregs->_gdt.base = env->gdt.base;
804 return 0;
805 }
806
807 static int hax_sync_vcpu_register(CPUArchState *env, int set)
808 {
809 struct vcpu_state_t regs;
810 int ret;
811 memset(&regs, 0, sizeof(struct vcpu_state_t));
812
813 if (!set) {
814 ret = hax_sync_vcpu_state(env, &regs, 0);
815 if (ret < 0) {
816 return -1;
817 }
818 }
819
820 /* generic register */
821 hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
822 hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
823 hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
824 hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
825 hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
826 hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
827 hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
828 hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
829 #ifdef TARGET_X86_64
830 hax_getput_reg(&regs._r8, &env->regs[8], set);
831 hax_getput_reg(&regs._r9, &env->regs[9], set);
832 hax_getput_reg(&regs._r10, &env->regs[10], set);
833 hax_getput_reg(&regs._r11, &env->regs[11], set);
834 hax_getput_reg(&regs._r12, &env->regs[12], set);
835 hax_getput_reg(&regs._r13, &env->regs[13], set);
836 hax_getput_reg(&regs._r14, &env->regs[14], set);
837 hax_getput_reg(&regs._r15, &env->regs[15], set);
838 #endif
839 hax_getput_reg(&regs._rflags, &env->eflags, set);
840 hax_getput_reg(&regs._rip, &env->eip, set);
841
842 if (set) {
843 regs._cr0 = env->cr[0];
844 regs._cr2 = env->cr[2];
845 regs._cr3 = env->cr[3];
846 regs._cr4 = env->cr[4];
847 hax_set_segments(env, &regs);
848 } else {
849 env->cr[0] = regs._cr0;
850 env->cr[2] = regs._cr2;
851 env->cr[3] = regs._cr3;
852 env->cr[4] = regs._cr4;
853 hax_get_segments(env, &regs);
854 }
855
856 if (set) {
857 ret = hax_sync_vcpu_state(env, &regs, 1);
858 if (ret < 0) {
859 return -1;
860 }
861 }
862 return 0;
863 }
864
865 static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
866 uint64_t value)
867 {
868 item->entry = index;
869 item->value = value;
870 }
871
872 static int hax_get_msrs(CPUArchState *env)
873 {
874 struct hax_msr_data md;
875 struct vmx_msr *msrs = md.entries;
876 int ret, i, n;
877
878 n = 0;
879 msrs[n++].entry = MSR_IA32_SYSENTER_CS;
880 msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
881 msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
882 msrs[n++].entry = MSR_IA32_TSC;
883 #ifdef TARGET_X86_64
884 msrs[n++].entry = MSR_EFER;
885 msrs[n++].entry = MSR_STAR;
886 msrs[n++].entry = MSR_LSTAR;
887 msrs[n++].entry = MSR_CSTAR;
888 msrs[n++].entry = MSR_FMASK;
889 msrs[n++].entry = MSR_KERNELGSBASE;
890 #endif
891 md.nr_msr = n;
892 ret = hax_sync_msr(env, &md, 0);
893 if (ret < 0) {
894 return ret;
895 }
896
897 for (i = 0; i < md.done; i++) {
898 switch (msrs[i].entry) {
899 case MSR_IA32_SYSENTER_CS:
900 env->sysenter_cs = msrs[i].value;
901 break;
902 case MSR_IA32_SYSENTER_ESP:
903 env->sysenter_esp = msrs[i].value;
904 break;
905 case MSR_IA32_SYSENTER_EIP:
906 env->sysenter_eip = msrs[i].value;
907 break;
908 case MSR_IA32_TSC:
909 env->tsc = msrs[i].value;
910 break;
911 #ifdef TARGET_X86_64
912 case MSR_EFER:
913 env->efer = msrs[i].value;
914 break;
915 case MSR_STAR:
916 env->star = msrs[i].value;
917 break;
918 case MSR_LSTAR:
919 env->lstar = msrs[i].value;
920 break;
921 case MSR_CSTAR:
922 env->cstar = msrs[i].value;
923 break;
924 case MSR_FMASK:
925 env->fmask = msrs[i].value;
926 break;
927 case MSR_KERNELGSBASE:
928 env->kernelgsbase = msrs[i].value;
929 break;
930 #endif
931 }
932 }
933
934 return 0;
935 }
936
937 static int hax_set_msrs(CPUArchState *env)
938 {
939 struct hax_msr_data md;
940 struct vmx_msr *msrs;
941 msrs = md.entries;
942 int n = 0;
943
944 memset(&md, 0, sizeof(struct hax_msr_data));
945 hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
946 hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
947 hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
948 hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
949 #ifdef TARGET_X86_64
950 hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
951 hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
952 hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
953 hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
954 hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
955 hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
956 #endif
957 md.nr_msr = n;
958 md.done = 0;
959
960 return hax_sync_msr(env, &md, 1);
961 }
962
963 static int hax_get_fpu(CPUArchState *env)
964 {
965 struct fx_layout fpu;
966 int i, ret;
967
968 ret = hax_sync_fpu(env, &fpu, 0);
969 if (ret < 0) {
970 return ret;
971 }
972
973 env->fpstt = (fpu.fsw >> 11) & 7;
974 env->fpus = fpu.fsw;
975 env->fpuc = fpu.fcw;
976 for (i = 0; i < 8; ++i) {
977 env->fptags[i] = !((fpu.ftw >> i) & 1);
978 }
979 memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
980
981 for (i = 0; i < 8; i++) {
982 env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]);
983 env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]);
984 if (CPU_NB_REGS > 8) {
985 env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]);
986 env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]);
987 }
988 }
989 env->mxcsr = fpu.mxcsr;
990
991 return 0;
992 }
993
994 static int hax_set_fpu(CPUArchState *env)
995 {
996 struct fx_layout fpu;
997 int i;
998
999 memset(&fpu, 0, sizeof(fpu));
1000 fpu.fsw = env->fpus & ~(7 << 11);
1001 fpu.fsw |= (env->fpstt & 7) << 11;
1002 fpu.fcw = env->fpuc;
1003
1004 for (i = 0; i < 8; ++i) {
1005 fpu.ftw |= (!env->fptags[i]) << i;
1006 }
1007
1008 memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
1009 for (i = 0; i < 8; i++) {
1010 stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0));
1011 stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1));
1012 if (CPU_NB_REGS > 8) {
1013 stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0));
1014 stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1));
1015 }
1016 }
1017
1018 fpu.mxcsr = env->mxcsr;
1019
1020 return hax_sync_fpu(env, &fpu, 1);
1021 }
1022
1023 static int hax_arch_get_registers(CPUArchState *env)
1024 {
1025 int ret;
1026
1027 ret = hax_sync_vcpu_register(env, 0);
1028 if (ret < 0) {
1029 return ret;
1030 }
1031
1032 ret = hax_get_fpu(env);
1033 if (ret < 0) {
1034 return ret;
1035 }
1036
1037 ret = hax_get_msrs(env);
1038 if (ret < 0) {
1039 return ret;
1040 }
1041
1042 x86_update_hflags(env);
1043 return 0;
1044 }
1045
1046 static int hax_arch_set_registers(CPUArchState *env)
1047 {
1048 int ret;
1049 ret = hax_sync_vcpu_register(env, 1);
1050
1051 if (ret < 0) {
1052 fprintf(stderr, "Failed to sync vcpu reg\n");
1053 return ret;
1054 }
1055 ret = hax_set_fpu(env);
1056 if (ret < 0) {
1057 fprintf(stderr, "FPU failed\n");
1058 return ret;
1059 }
1060 ret = hax_set_msrs(env);
1061 if (ret < 0) {
1062 fprintf(stderr, "MSR failed\n");
1063 return ret;
1064 }
1065
1066 return 0;
1067 }
1068
1069 static void hax_vcpu_sync_state(CPUArchState *env, int modified)
1070 {
1071 if (hax_enabled()) {
1072 if (modified) {
1073 hax_arch_set_registers(env);
1074 } else {
1075 hax_arch_get_registers(env);
1076 }
1077 }
1078 }
1079
1080 /*
1081 * much simpler than kvm, at least in first stage because:
1082 * We don't need consider the device pass-through, we don't need
1083 * consider the framebuffer, and we may even remove the bios at all
1084 */
1085 int hax_sync_vcpus(void)
1086 {
1087 if (hax_enabled()) {
1088 CPUState *cpu;
1089
1090 cpu = first_cpu;
1091 if (!cpu) {
1092 return 0;
1093 }
1094
1095 for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1096 int ret;
1097
1098 ret = hax_arch_set_registers(cpu->env_ptr);
1099 if (ret < 0) {
1100 return ret;
1101 }
1102 }
1103 }
1104
1105 return 0;
1106 }
1107
1108 void hax_reset_vcpu_state(void *opaque)
1109 {
1110 CPUState *cpu;
1111 for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1112 cpu->hax_vcpu->tunnel->user_event_pending = 0;
1113 cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
1114 }
1115 }
1116
1117 static void hax_accel_class_init(ObjectClass *oc, void *data)
1118 {
1119 AccelClass *ac = ACCEL_CLASS(oc);
1120 ac->name = "HAX";
1121 ac->init_machine = hax_accel_init;
1122 ac->allowed = &hax_allowed;
1123 }
1124
1125 static const TypeInfo hax_accel_type = {
1126 .name = ACCEL_CLASS_NAME("hax"),
1127 .parent = TYPE_ACCEL,
1128 .class_init = hax_accel_class_init,
1129 };
1130
1131 static void hax_type_init(void)
1132 {
1133 type_register_static(&hax_accel_type);
1134 }
1135
1136 type_init(hax_type_init);