]> git.proxmox.com Git - mirror_qemu.git/blob - target/i386/hax-all.c
shutdown: Add source information to SHUTDOWN and RESET
[mirror_qemu.git] / target / i386 / hax-all.c
1 /*
2 * QEMU HAX support
3 *
4 * Copyright IBM, Corp. 2008
5 * Red Hat, Inc. 2008
6 *
7 * Authors:
8 * Anthony Liguori <aliguori@us.ibm.com>
9 * Glauber Costa <gcosta@redhat.com>
10 *
11 * Copyright (c) 2011 Intel Corporation
12 * Written by:
13 * Jiang Yunhong<yunhong.jiang@intel.com>
14 * Xin Xiaohui<xiaohui.xin@intel.com>
15 * Zhang Xiantao<xiantao.zhang@intel.com>
16 *
17 * This work is licensed under the terms of the GNU GPL, version 2 or later.
18 * See the COPYING file in the top-level directory.
19 *
20 */
21
22 /*
23 * HAX common code for both windows and darwin
24 */
25
26 #include "qemu/osdep.h"
27 #include "cpu.h"
28 #include "exec/address-spaces.h"
29 #include "exec/exec-all.h"
30 #include "exec/ioport.h"
31
32 #include "qemu-common.h"
33 #include "strings.h"
34 #include "hax-i386.h"
35 #include "sysemu/accel.h"
36 #include "sysemu/sysemu.h"
37 #include "qemu/main-loop.h"
38 #include "hw/boards.h"
39
40 #define DEBUG_HAX 0
41
42 #define DPRINTF(fmt, ...) \
43 do { \
44 if (DEBUG_HAX) { \
45 fprintf(stdout, fmt, ## __VA_ARGS__); \
46 } \
47 } while (0)
48
49 /* Current version */
50 const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */
51 /* Minimum HAX kernel version */
52 const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */
53
54 static bool hax_allowed;
55
56 struct hax_state hax_global;
57
58 static void hax_vcpu_sync_state(CPUArchState *env, int modified);
59 static int hax_arch_get_registers(CPUArchState *env);
60
61 int hax_enabled(void)
62 {
63 return hax_allowed;
64 }
65
66 int valid_hax_tunnel_size(uint16_t size)
67 {
68 return size >= sizeof(struct hax_tunnel);
69 }
70
71 hax_fd hax_vcpu_get_fd(CPUArchState *env)
72 {
73 struct hax_vcpu_state *vcpu = ENV_GET_CPU(env)->hax_vcpu;
74 if (!vcpu) {
75 return HAX_INVALID_FD;
76 }
77 return vcpu->fd;
78 }
79
80 static int hax_get_capability(struct hax_state *hax)
81 {
82 int ret;
83 struct hax_capabilityinfo capinfo, *cap = &capinfo;
84
85 ret = hax_capability(hax, cap);
86 if (ret) {
87 return ret;
88 }
89
90 if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
91 if (cap->winfo & HAX_CAP_FAILREASON_VT) {
92 DPRINTF
93 ("VTX feature is not enabled, HAX driver will not work.\n");
94 } else if (cap->winfo & HAX_CAP_FAILREASON_NX) {
95 DPRINTF
96 ("NX feature is not enabled, HAX driver will not work.\n");
97 }
98 return -ENXIO;
99
100 }
101
102 if (!(cap->winfo & HAX_CAP_UG)) {
103 fprintf(stderr, "UG mode is not supported by the hardware.\n");
104 return -ENOTSUP;
105 }
106
107 if (cap->wstatus & HAX_CAP_MEMQUOTA) {
108 if (cap->mem_quota < hax->mem_quota) {
109 fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
110 return -ENOSPC;
111 }
112 }
113 return 0;
114 }
115
116 static int hax_version_support(struct hax_state *hax)
117 {
118 int ret;
119 struct hax_module_version version;
120
121 ret = hax_mod_version(hax, &version);
122 if (ret < 0) {
123 return 0;
124 }
125
126 if (hax_min_version > version.cur_version) {
127 fprintf(stderr, "Incompatible HAX module version %d,",
128 version.cur_version);
129 fprintf(stderr, "requires minimum version %d\n", hax_min_version);
130 return 0;
131 }
132 if (hax_cur_version < version.compat_version) {
133 fprintf(stderr, "Incompatible QEMU HAX API version %x,",
134 hax_cur_version);
135 fprintf(stderr, "requires minimum HAX API version %x\n",
136 version.compat_version);
137 return 0;
138 }
139
140 return 1;
141 }
142
143 int hax_vcpu_create(int id)
144 {
145 struct hax_vcpu_state *vcpu = NULL;
146 int ret;
147
148 if (!hax_global.vm) {
149 fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
150 return -1;
151 }
152
153 if (hax_global.vm->vcpus[id]) {
154 fprintf(stderr, "vcpu %x allocated already\n", id);
155 return 0;
156 }
157
158 vcpu = g_malloc(sizeof(struct hax_vcpu_state));
159 if (!vcpu) {
160 fprintf(stderr, "Failed to alloc vcpu state\n");
161 return -ENOMEM;
162 }
163
164 memset(vcpu, 0, sizeof(struct hax_vcpu_state));
165
166 ret = hax_host_create_vcpu(hax_global.vm->fd, id);
167 if (ret) {
168 fprintf(stderr, "Failed to create vcpu %x\n", id);
169 goto error;
170 }
171
172 vcpu->vcpu_id = id;
173 vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
174 if (hax_invalid_fd(vcpu->fd)) {
175 fprintf(stderr, "Failed to open the vcpu\n");
176 ret = -ENODEV;
177 goto error;
178 }
179
180 hax_global.vm->vcpus[id] = vcpu;
181
182 ret = hax_host_setup_vcpu_channel(vcpu);
183 if (ret) {
184 fprintf(stderr, "Invalid hax tunnel size\n");
185 ret = -EINVAL;
186 goto error;
187 }
188 return 0;
189
190 error:
191 /* vcpu and tunnel will be closed automatically */
192 if (vcpu && !hax_invalid_fd(vcpu->fd)) {
193 hax_close_fd(vcpu->fd);
194 }
195
196 hax_global.vm->vcpus[id] = NULL;
197 g_free(vcpu);
198 return -1;
199 }
200
201 int hax_vcpu_destroy(CPUState *cpu)
202 {
203 struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
204
205 if (!hax_global.vm) {
206 fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
207 return -1;
208 }
209
210 if (!vcpu) {
211 return 0;
212 }
213
214 /*
215 * 1. The hax_tunnel is also destroied when vcpu destroy
216 * 2. close fd will cause hax module vcpu be cleaned
217 */
218 hax_close_fd(vcpu->fd);
219 hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
220 g_free(vcpu);
221 return 0;
222 }
223
224 int hax_init_vcpu(CPUState *cpu)
225 {
226 int ret;
227
228 ret = hax_vcpu_create(cpu->cpu_index);
229 if (ret < 0) {
230 fprintf(stderr, "Failed to create HAX vcpu\n");
231 exit(-1);
232 }
233
234 cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
235 cpu->hax_vcpu_dirty = true;
236 qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr));
237
238 return ret;
239 }
240
241 struct hax_vm *hax_vm_create(struct hax_state *hax)
242 {
243 struct hax_vm *vm;
244 int vm_id = 0, ret;
245
246 if (hax_invalid_fd(hax->fd)) {
247 return NULL;
248 }
249
250 if (hax->vm) {
251 return hax->vm;
252 }
253
254 vm = g_malloc(sizeof(struct hax_vm));
255 if (!vm) {
256 return NULL;
257 }
258 memset(vm, 0, sizeof(struct hax_vm));
259 ret = hax_host_create_vm(hax, &vm_id);
260 if (ret) {
261 fprintf(stderr, "Failed to create vm %x\n", ret);
262 goto error;
263 }
264 vm->id = vm_id;
265 vm->fd = hax_host_open_vm(hax, vm_id);
266 if (hax_invalid_fd(vm->fd)) {
267 fprintf(stderr, "Failed to open vm %d\n", vm_id);
268 goto error;
269 }
270
271 hax->vm = vm;
272 return vm;
273
274 error:
275 g_free(vm);
276 hax->vm = NULL;
277 return NULL;
278 }
279
280 int hax_vm_destroy(struct hax_vm *vm)
281 {
282 int i;
283
284 for (i = 0; i < HAX_MAX_VCPU; i++)
285 if (vm->vcpus[i]) {
286 fprintf(stderr, "VCPU should be cleaned before vm clean\n");
287 return -1;
288 }
289 hax_close_fd(vm->fd);
290 g_free(vm);
291 hax_global.vm = NULL;
292 return 0;
293 }
294
295 static void hax_handle_interrupt(CPUState *cpu, int mask)
296 {
297 cpu->interrupt_request |= mask;
298
299 if (!qemu_cpu_is_self(cpu)) {
300 qemu_cpu_kick(cpu);
301 }
302 }
303
304 static int hax_init(ram_addr_t ram_size)
305 {
306 struct hax_state *hax = NULL;
307 struct hax_qemu_version qversion;
308 int ret;
309
310 hax = &hax_global;
311
312 memset(hax, 0, sizeof(struct hax_state));
313 hax->mem_quota = ram_size;
314
315 hax->fd = hax_mod_open();
316 if (hax_invalid_fd(hax->fd)) {
317 hax->fd = 0;
318 ret = -ENODEV;
319 goto error;
320 }
321
322 ret = hax_get_capability(hax);
323
324 if (ret) {
325 if (ret != -ENOSPC) {
326 ret = -EINVAL;
327 }
328 goto error;
329 }
330
331 if (!hax_version_support(hax)) {
332 ret = -EINVAL;
333 goto error;
334 }
335
336 hax->vm = hax_vm_create(hax);
337 if (!hax->vm) {
338 fprintf(stderr, "Failed to create HAX VM\n");
339 ret = -EINVAL;
340 goto error;
341 }
342
343 hax_memory_init();
344
345 qversion.cur_version = hax_cur_version;
346 qversion.min_version = hax_min_version;
347 hax_notify_qemu_version(hax->vm->fd, &qversion);
348 cpu_interrupt_handler = hax_handle_interrupt;
349
350 return ret;
351 error:
352 if (hax->vm) {
353 hax_vm_destroy(hax->vm);
354 }
355 if (hax->fd) {
356 hax_mod_close(hax);
357 }
358
359 return ret;
360 }
361
362 static int hax_accel_init(MachineState *ms)
363 {
364 int ret = hax_init(ms->ram_size);
365
366 if (ret && (ret != -ENOSPC)) {
367 fprintf(stderr, "No accelerator found.\n");
368 } else {
369 fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
370 !ret ? "working" : "not working",
371 !ret ? "fast virt" : "emulation");
372 }
373 return ret;
374 }
375
376 static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
377 {
378 if (hft->direction < 2) {
379 cpu_physical_memory_rw(hft->gpa, (uint8_t *) &hft->value, hft->size,
380 hft->direction);
381 } else {
382 /*
383 * HAX API v4 supports transferring data between two MMIO addresses,
384 * hft->gpa and hft->gpa2 (instructions such as MOVS require this):
385 * hft->direction == 2: gpa ==> gpa2
386 */
387 uint64_t value;
388 cpu_physical_memory_rw(hft->gpa, (uint8_t *) &value, hft->size, 0);
389 cpu_physical_memory_rw(hft->gpa2, (uint8_t *) &value, hft->size, 1);
390 }
391
392 return 0;
393 }
394
395 static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
396 int direction, int size, int count, void *buffer)
397 {
398 uint8_t *ptr;
399 int i;
400 MemTxAttrs attrs = { 0 };
401
402 if (!df) {
403 ptr = (uint8_t *) buffer;
404 } else {
405 ptr = buffer + size * count - size;
406 }
407 for (i = 0; i < count; i++) {
408 address_space_rw(&address_space_io, port, attrs,
409 ptr, size, direction == HAX_EXIT_IO_OUT);
410 if (!df) {
411 ptr += size;
412 } else {
413 ptr -= size;
414 }
415 }
416
417 return 0;
418 }
419
420 static int hax_vcpu_interrupt(CPUArchState *env)
421 {
422 CPUState *cpu = ENV_GET_CPU(env);
423 struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
424 struct hax_tunnel *ht = vcpu->tunnel;
425
426 /*
427 * Try to inject an interrupt if the guest can accept it
428 * Unlike KVM, HAX kernel check for the eflags, instead of qemu
429 */
430 if (ht->ready_for_interrupt_injection &&
431 (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
432 int irq;
433
434 irq = cpu_get_pic_interrupt(env);
435 if (irq >= 0) {
436 hax_inject_interrupt(env, irq);
437 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
438 }
439 }
440
441 /* If we have an interrupt but the guest is not ready to receive an
442 * interrupt, request an interrupt window exit. This will
443 * cause a return to userspace as soon as the guest is ready to
444 * receive interrupts. */
445 if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
446 ht->request_interrupt_window = 1;
447 } else {
448 ht->request_interrupt_window = 0;
449 }
450 return 0;
451 }
452
453 void hax_raise_event(CPUState *cpu)
454 {
455 struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
456
457 if (!vcpu) {
458 return;
459 }
460 vcpu->tunnel->user_event_pending = 1;
461 }
462
463 /*
464 * Ask hax kernel module to run the CPU for us till:
465 * 1. Guest crash or shutdown
466 * 2. Need QEMU's emulation like guest execute MMIO instruction
467 * 3. Guest execute HLT
468 * 4. QEMU have Signal/event pending
469 * 5. An unknown VMX exit happens
470 */
471 static int hax_vcpu_hax_exec(CPUArchState *env)
472 {
473 int ret = 0;
474 CPUState *cpu = ENV_GET_CPU(env);
475 X86CPU *x86_cpu = X86_CPU(cpu);
476 struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
477 struct hax_tunnel *ht = vcpu->tunnel;
478
479 if (!hax_enabled()) {
480 DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip);
481 return 0;
482 }
483
484 cpu->halted = 0;
485
486 if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
487 cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
488 apic_poll_irq(x86_cpu->apic_state);
489 }
490
491 if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
492 DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n",
493 cpu->cpu_index);
494 do_cpu_init(x86_cpu);
495 hax_vcpu_sync_state(env, 1);
496 }
497
498 if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
499 DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n",
500 cpu->cpu_index);
501 hax_vcpu_sync_state(env, 0);
502 do_cpu_sipi(x86_cpu);
503 hax_vcpu_sync_state(env, 1);
504 }
505
506 do {
507 int hax_ret;
508
509 if (cpu->exit_request) {
510 ret = 1;
511 break;
512 }
513
514 hax_vcpu_interrupt(env);
515
516 qemu_mutex_unlock_iothread();
517 hax_ret = hax_vcpu_run(vcpu);
518 qemu_mutex_lock_iothread();
519 current_cpu = cpu;
520
521 /* Simply continue the vcpu_run if system call interrupted */
522 if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
523 DPRINTF("io window interrupted\n");
524 continue;
525 }
526
527 if (hax_ret < 0) {
528 fprintf(stderr, "vcpu run failed for vcpu %x\n", vcpu->vcpu_id);
529 abort();
530 }
531 switch (ht->_exit_status) {
532 case HAX_EXIT_IO:
533 ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
534 ht->pio._direction,
535 ht->pio._size, ht->pio._count, vcpu->iobuf);
536 break;
537 case HAX_EXIT_FAST_MMIO:
538 ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
539 break;
540 /* Guest state changed, currently only for shutdown */
541 case HAX_EXIT_STATECHANGE:
542 fprintf(stdout, "VCPU shutdown request\n");
543 qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
544 hax_vcpu_sync_state(env, 0);
545 ret = 1;
546 break;
547 case HAX_EXIT_UNKNOWN_VMEXIT:
548 fprintf(stderr, "Unknown VMX exit %x from guest\n",
549 ht->_exit_reason);
550 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
551 hax_vcpu_sync_state(env, 0);
552 cpu_dump_state(cpu, stderr, fprintf, 0);
553 ret = -1;
554 break;
555 case HAX_EXIT_HLT:
556 if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
557 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
558 /* hlt instruction with interrupt disabled is shutdown */
559 env->eflags |= IF_MASK;
560 cpu->halted = 1;
561 cpu->exception_index = EXCP_HLT;
562 ret = 1;
563 }
564 break;
565 /* these situations will continue to hax module */
566 case HAX_EXIT_INTERRUPT:
567 case HAX_EXIT_PAUSED:
568 break;
569 case HAX_EXIT_MMIO:
570 /* Should not happen on UG system */
571 fprintf(stderr, "HAX: unsupported MMIO emulation\n");
572 ret = -1;
573 break;
574 case HAX_EXIT_REAL:
575 /* Should not happen on UG system */
576 fprintf(stderr, "HAX: unimplemented real mode emulation\n");
577 ret = -1;
578 break;
579 default:
580 fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status);
581 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
582 hax_vcpu_sync_state(env, 0);
583 cpu_dump_state(cpu, stderr, fprintf, 0);
584 ret = 1;
585 break;
586 }
587 } while (!ret);
588
589 if (cpu->exit_request) {
590 cpu->exit_request = 0;
591 cpu->exception_index = EXCP_INTERRUPT;
592 }
593 return ret < 0;
594 }
595
596 static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
597 {
598 CPUArchState *env = cpu->env_ptr;
599
600 hax_arch_get_registers(env);
601 cpu->hax_vcpu_dirty = true;
602 }
603
604 void hax_cpu_synchronize_state(CPUState *cpu)
605 {
606 if (!cpu->hax_vcpu_dirty) {
607 run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL);
608 }
609 }
610
611 static void do_hax_cpu_synchronize_post_reset(CPUState *cpu,
612 run_on_cpu_data arg)
613 {
614 CPUArchState *env = cpu->env_ptr;
615
616 hax_vcpu_sync_state(env, 1);
617 cpu->hax_vcpu_dirty = false;
618 }
619
620 void hax_cpu_synchronize_post_reset(CPUState *cpu)
621 {
622 run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
623 }
624
625 static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
626 {
627 CPUArchState *env = cpu->env_ptr;
628
629 hax_vcpu_sync_state(env, 1);
630 cpu->hax_vcpu_dirty = false;
631 }
632
633 void hax_cpu_synchronize_post_init(CPUState *cpu)
634 {
635 run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
636 }
637
638 int hax_smp_cpu_exec(CPUState *cpu)
639 {
640 CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
641 int fatal;
642 int ret;
643
644 while (1) {
645 if (cpu->exception_index >= EXCP_INTERRUPT) {
646 ret = cpu->exception_index;
647 cpu->exception_index = -1;
648 break;
649 }
650
651 fatal = hax_vcpu_hax_exec(env);
652
653 if (fatal) {
654 fprintf(stderr, "Unsupported HAX vcpu return\n");
655 abort();
656 }
657 }
658
659 return ret;
660 }
661
662 static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
663 {
664 memset(lhs, 0, sizeof(struct segment_desc_t));
665 lhs->selector = rhs->selector;
666 lhs->base = rhs->base;
667 lhs->limit = rhs->limit;
668 lhs->type = 3;
669 lhs->present = 1;
670 lhs->dpl = 3;
671 lhs->operand_size = 0;
672 lhs->desc = 1;
673 lhs->long_mode = 0;
674 lhs->granularity = 0;
675 lhs->available = 0;
676 }
677
678 static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs)
679 {
680 lhs->selector = rhs->selector;
681 lhs->base = rhs->base;
682 lhs->limit = rhs->limit;
683 lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
684 | (rhs->present * DESC_P_MASK)
685 | (rhs->dpl << DESC_DPL_SHIFT)
686 | (rhs->operand_size << DESC_B_SHIFT)
687 | (rhs->desc * DESC_S_MASK)
688 | (rhs->long_mode << DESC_L_SHIFT)
689 | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
690 }
691
692 static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
693 {
694 unsigned flags = rhs->flags;
695
696 memset(lhs, 0, sizeof(struct segment_desc_t));
697 lhs->selector = rhs->selector;
698 lhs->base = rhs->base;
699 lhs->limit = rhs->limit;
700 lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
701 lhs->present = (flags & DESC_P_MASK) != 0;
702 lhs->dpl = rhs->selector & 3;
703 lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
704 lhs->desc = (flags & DESC_S_MASK) != 0;
705 lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
706 lhs->granularity = (flags & DESC_G_MASK) != 0;
707 lhs->available = (flags & DESC_AVL_MASK) != 0;
708 }
709
710 static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set)
711 {
712 target_ulong reg = *hax_reg;
713
714 if (set) {
715 *hax_reg = *qemu_reg;
716 } else {
717 *qemu_reg = reg;
718 }
719 }
720
721 /* The sregs has been synced with HAX kernel already before this call */
722 static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs)
723 {
724 get_seg(&env->segs[R_CS], &sregs->_cs);
725 get_seg(&env->segs[R_DS], &sregs->_ds);
726 get_seg(&env->segs[R_ES], &sregs->_es);
727 get_seg(&env->segs[R_FS], &sregs->_fs);
728 get_seg(&env->segs[R_GS], &sregs->_gs);
729 get_seg(&env->segs[R_SS], &sregs->_ss);
730
731 get_seg(&env->tr, &sregs->_tr);
732 get_seg(&env->ldt, &sregs->_ldt);
733 env->idt.limit = sregs->_idt.limit;
734 env->idt.base = sregs->_idt.base;
735 env->gdt.limit = sregs->_gdt.limit;
736 env->gdt.base = sregs->_gdt.base;
737 return 0;
738 }
739
740 static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs)
741 {
742 if ((env->eflags & VM_MASK)) {
743 set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
744 set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
745 set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
746 set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
747 set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
748 set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
749 } else {
750 set_seg(&sregs->_cs, &env->segs[R_CS]);
751 set_seg(&sregs->_ds, &env->segs[R_DS]);
752 set_seg(&sregs->_es, &env->segs[R_ES]);
753 set_seg(&sregs->_fs, &env->segs[R_FS]);
754 set_seg(&sregs->_gs, &env->segs[R_GS]);
755 set_seg(&sregs->_ss, &env->segs[R_SS]);
756
757 if (env->cr[0] & CR0_PE_MASK) {
758 /* force ss cpl to cs cpl */
759 sregs->_ss.selector = (sregs->_ss.selector & ~3) |
760 (sregs->_cs.selector & 3);
761 sregs->_ss.dpl = sregs->_ss.selector & 3;
762 }
763 }
764
765 set_seg(&sregs->_tr, &env->tr);
766 set_seg(&sregs->_ldt, &env->ldt);
767 sregs->_idt.limit = env->idt.limit;
768 sregs->_idt.base = env->idt.base;
769 sregs->_gdt.limit = env->gdt.limit;
770 sregs->_gdt.base = env->gdt.base;
771 return 0;
772 }
773
774 /*
775 * After get the state from the kernel module, some
776 * qemu emulator state need be updated also
777 */
778 static int hax_setup_qemu_emulator(CPUArchState *env)
779 {
780
781 #define HFLAG_COPY_MASK (~( \
782 HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
783 HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
784 HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
785 HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK))
786
787 uint32_t hflags;
788
789 hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
790 hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
791 hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
792 (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
793 hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
794 hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
795 (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
796
797 if (env->efer & MSR_EFER_LMA) {
798 hflags |= HF_LMA_MASK;
799 }
800
801 if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
802 hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
803 } else {
804 hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
805 (DESC_B_SHIFT - HF_CS32_SHIFT);
806 hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
807 (DESC_B_SHIFT - HF_SS32_SHIFT);
808 if (!(env->cr[0] & CR0_PE_MASK) ||
809 (env->eflags & VM_MASK) || !(hflags & HF_CS32_MASK)) {
810 hflags |= HF_ADDSEG_MASK;
811 } else {
812 hflags |= ((env->segs[R_DS].base |
813 env->segs[R_ES].base |
814 env->segs[R_SS].base) != 0) << HF_ADDSEG_SHIFT;
815 }
816 }
817
818 hflags &= ~HF_SMM_MASK;
819
820 env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
821 return 0;
822 }
823
824 static int hax_sync_vcpu_register(CPUArchState *env, int set)
825 {
826 struct vcpu_state_t regs;
827 int ret;
828 memset(&regs, 0, sizeof(struct vcpu_state_t));
829
830 if (!set) {
831 ret = hax_sync_vcpu_state(env, &regs, 0);
832 if (ret < 0) {
833 return -1;
834 }
835 }
836
837 /* generic register */
838 hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
839 hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
840 hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
841 hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
842 hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
843 hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
844 hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
845 hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
846 #ifdef TARGET_X86_64
847 hax_getput_reg(&regs._r8, &env->regs[8], set);
848 hax_getput_reg(&regs._r9, &env->regs[9], set);
849 hax_getput_reg(&regs._r10, &env->regs[10], set);
850 hax_getput_reg(&regs._r11, &env->regs[11], set);
851 hax_getput_reg(&regs._r12, &env->regs[12], set);
852 hax_getput_reg(&regs._r13, &env->regs[13], set);
853 hax_getput_reg(&regs._r14, &env->regs[14], set);
854 hax_getput_reg(&regs._r15, &env->regs[15], set);
855 #endif
856 hax_getput_reg(&regs._rflags, &env->eflags, set);
857 hax_getput_reg(&regs._rip, &env->eip, set);
858
859 if (set) {
860 regs._cr0 = env->cr[0];
861 regs._cr2 = env->cr[2];
862 regs._cr3 = env->cr[3];
863 regs._cr4 = env->cr[4];
864 hax_set_segments(env, &regs);
865 } else {
866 env->cr[0] = regs._cr0;
867 env->cr[2] = regs._cr2;
868 env->cr[3] = regs._cr3;
869 env->cr[4] = regs._cr4;
870 hax_get_segments(env, &regs);
871 }
872
873 if (set) {
874 ret = hax_sync_vcpu_state(env, &regs, 1);
875 if (ret < 0) {
876 return -1;
877 }
878 }
879 if (!set) {
880 hax_setup_qemu_emulator(env);
881 }
882 return 0;
883 }
884
885 static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
886 uint64_t value)
887 {
888 item->entry = index;
889 item->value = value;
890 }
891
892 static int hax_get_msrs(CPUArchState *env)
893 {
894 struct hax_msr_data md;
895 struct vmx_msr *msrs = md.entries;
896 int ret, i, n;
897
898 n = 0;
899 msrs[n++].entry = MSR_IA32_SYSENTER_CS;
900 msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
901 msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
902 msrs[n++].entry = MSR_IA32_TSC;
903 #ifdef TARGET_X86_64
904 msrs[n++].entry = MSR_EFER;
905 msrs[n++].entry = MSR_STAR;
906 msrs[n++].entry = MSR_LSTAR;
907 msrs[n++].entry = MSR_CSTAR;
908 msrs[n++].entry = MSR_FMASK;
909 msrs[n++].entry = MSR_KERNELGSBASE;
910 #endif
911 md.nr_msr = n;
912 ret = hax_sync_msr(env, &md, 0);
913 if (ret < 0) {
914 return ret;
915 }
916
917 for (i = 0; i < md.done; i++) {
918 switch (msrs[i].entry) {
919 case MSR_IA32_SYSENTER_CS:
920 env->sysenter_cs = msrs[i].value;
921 break;
922 case MSR_IA32_SYSENTER_ESP:
923 env->sysenter_esp = msrs[i].value;
924 break;
925 case MSR_IA32_SYSENTER_EIP:
926 env->sysenter_eip = msrs[i].value;
927 break;
928 case MSR_IA32_TSC:
929 env->tsc = msrs[i].value;
930 break;
931 #ifdef TARGET_X86_64
932 case MSR_EFER:
933 env->efer = msrs[i].value;
934 break;
935 case MSR_STAR:
936 env->star = msrs[i].value;
937 break;
938 case MSR_LSTAR:
939 env->lstar = msrs[i].value;
940 break;
941 case MSR_CSTAR:
942 env->cstar = msrs[i].value;
943 break;
944 case MSR_FMASK:
945 env->fmask = msrs[i].value;
946 break;
947 case MSR_KERNELGSBASE:
948 env->kernelgsbase = msrs[i].value;
949 break;
950 #endif
951 }
952 }
953
954 return 0;
955 }
956
957 static int hax_set_msrs(CPUArchState *env)
958 {
959 struct hax_msr_data md;
960 struct vmx_msr *msrs;
961 msrs = md.entries;
962 int n = 0;
963
964 memset(&md, 0, sizeof(struct hax_msr_data));
965 hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
966 hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
967 hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
968 hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
969 #ifdef TARGET_X86_64
970 hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
971 hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
972 hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
973 hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
974 hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
975 hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
976 #endif
977 md.nr_msr = n;
978 md.done = 0;
979
980 return hax_sync_msr(env, &md, 1);
981 }
982
983 static int hax_get_fpu(CPUArchState *env)
984 {
985 struct fx_layout fpu;
986 int i, ret;
987
988 ret = hax_sync_fpu(env, &fpu, 0);
989 if (ret < 0) {
990 return ret;
991 }
992
993 env->fpstt = (fpu.fsw >> 11) & 7;
994 env->fpus = fpu.fsw;
995 env->fpuc = fpu.fcw;
996 for (i = 0; i < 8; ++i) {
997 env->fptags[i] = !((fpu.ftw >> i) & 1);
998 }
999 memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
1000
1001 for (i = 0; i < 8; i++) {
1002 env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]);
1003 env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]);
1004 if (CPU_NB_REGS > 8) {
1005 env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]);
1006 env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]);
1007 }
1008 }
1009 env->mxcsr = fpu.mxcsr;
1010
1011 return 0;
1012 }
1013
1014 static int hax_set_fpu(CPUArchState *env)
1015 {
1016 struct fx_layout fpu;
1017 int i;
1018
1019 memset(&fpu, 0, sizeof(fpu));
1020 fpu.fsw = env->fpus & ~(7 << 11);
1021 fpu.fsw |= (env->fpstt & 7) << 11;
1022 fpu.fcw = env->fpuc;
1023
1024 for (i = 0; i < 8; ++i) {
1025 fpu.ftw |= (!env->fptags[i]) << i;
1026 }
1027
1028 memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
1029 for (i = 0; i < 8; i++) {
1030 stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0));
1031 stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1));
1032 if (CPU_NB_REGS > 8) {
1033 stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0));
1034 stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1));
1035 }
1036 }
1037
1038 fpu.mxcsr = env->mxcsr;
1039
1040 return hax_sync_fpu(env, &fpu, 1);
1041 }
1042
1043 static int hax_arch_get_registers(CPUArchState *env)
1044 {
1045 int ret;
1046
1047 ret = hax_sync_vcpu_register(env, 0);
1048 if (ret < 0) {
1049 return ret;
1050 }
1051
1052 ret = hax_get_fpu(env);
1053 if (ret < 0) {
1054 return ret;
1055 }
1056
1057 ret = hax_get_msrs(env);
1058 if (ret < 0) {
1059 return ret;
1060 }
1061
1062 return 0;
1063 }
1064
1065 static int hax_arch_set_registers(CPUArchState *env)
1066 {
1067 int ret;
1068 ret = hax_sync_vcpu_register(env, 1);
1069
1070 if (ret < 0) {
1071 fprintf(stderr, "Failed to sync vcpu reg\n");
1072 return ret;
1073 }
1074 ret = hax_set_fpu(env);
1075 if (ret < 0) {
1076 fprintf(stderr, "FPU failed\n");
1077 return ret;
1078 }
1079 ret = hax_set_msrs(env);
1080 if (ret < 0) {
1081 fprintf(stderr, "MSR failed\n");
1082 return ret;
1083 }
1084
1085 return 0;
1086 }
1087
1088 static void hax_vcpu_sync_state(CPUArchState *env, int modified)
1089 {
1090 if (hax_enabled()) {
1091 if (modified) {
1092 hax_arch_set_registers(env);
1093 } else {
1094 hax_arch_get_registers(env);
1095 }
1096 }
1097 }
1098
1099 /*
1100 * much simpler than kvm, at least in first stage because:
1101 * We don't need consider the device pass-through, we don't need
1102 * consider the framebuffer, and we may even remove the bios at all
1103 */
1104 int hax_sync_vcpus(void)
1105 {
1106 if (hax_enabled()) {
1107 CPUState *cpu;
1108
1109 cpu = first_cpu;
1110 if (!cpu) {
1111 return 0;
1112 }
1113
1114 for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1115 int ret;
1116
1117 ret = hax_arch_set_registers(cpu->env_ptr);
1118 if (ret < 0) {
1119 return ret;
1120 }
1121 }
1122 }
1123
1124 return 0;
1125 }
1126
1127 void hax_reset_vcpu_state(void *opaque)
1128 {
1129 CPUState *cpu;
1130 for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1131 cpu->hax_vcpu->tunnel->user_event_pending = 0;
1132 cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
1133 }
1134 }
1135
1136 static void hax_accel_class_init(ObjectClass *oc, void *data)
1137 {
1138 AccelClass *ac = ACCEL_CLASS(oc);
1139 ac->name = "HAX";
1140 ac->init_machine = hax_accel_init;
1141 ac->allowed = &hax_allowed;
1142 }
1143
1144 static const TypeInfo hax_accel_type = {
1145 .name = ACCEL_CLASS_NAME("hax"),
1146 .parent = TYPE_ACCEL,
1147 .class_init = hax_accel_class_init,
1148 };
1149
1150 static void hax_type_init(void)
1151 {
1152 type_register_static(&hax_accel_type);
1153 }
1154
1155 type_init(hax_type_init);