target/i386/hax-all.c

   1 /*
   2  * QEMU HAX support
   3  *
   4  * Copyright IBM, Corp. 2008
   5  *           Red Hat, Inc. 2008
   6  *
   7  * Authors:
   8  *  Anthony Liguori   <aliguori@us.ibm.com>
   9  *  Glauber Costa     <gcosta@redhat.com>
  10  *
  11  * Copyright (c) 2011 Intel Corporation
  12  *  Written by:
  13  *  Jiang Yunhong<yunhong.jiang@intel.com>
  14  *  Xin Xiaohui<xiaohui.xin@intel.com>
  15  *  Zhang Xiantao<xiantao.zhang@intel.com>
  16  *
  17  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  18  * See the COPYING file in the top-level directory.
  19  *
  20  */
  21
  22 /*
  23  * HAX common code for both windows and darwin
  24  */
  25
  26 #include "qemu/osdep.h"
  27 #include "cpu.h"
  28 #include "exec/address-spaces.h"
  29
  30 #include "qemu-common.h"
  31 #include "hax-i386.h"
  32 #include "sysemu/accel.h"
  33 #include "sysemu/sysemu.h"
  34 #include "qemu/main-loop.h"
  35 #include "hw/boards.h"
  36
  37 #define DEBUG_HAX 0
  38
  39 #define DPRINTF(fmt, ...) \
  40     do { \
  41         if (DEBUG_HAX) { \
  42             fprintf(stdout, fmt, ## __VA_ARGS__); \
  43         } \
  44     } while (0)
  45
  46 /* Current version */
  47 const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */
  48 /* Minimum HAX kernel version */
  49 const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */
  50
  51 static bool hax_allowed;
  52
  53 struct hax_state hax_global;
  54
  55 static void hax_vcpu_sync_state(CPUArchState *env, int modified);
  56 static int hax_arch_get_registers(CPUArchState *env);
  57
  58 int hax_enabled(void)
  59 {
  60     return hax_allowed;
  61 }
  62
  63 int valid_hax_tunnel_size(uint16_t size)
  64 {
  65     return size >= sizeof(struct hax_tunnel);
  66 }
  67
  68 hax_fd hax_vcpu_get_fd(CPUArchState *env)
  69 {
  70     struct hax_vcpu_state *vcpu = env_cpu(env)->hax_vcpu;
  71     if (!vcpu) {
  72         return HAX_INVALID_FD;
  73     }
  74     return vcpu->fd;
  75 }
  76
  77 static int hax_get_capability(struct hax_state *hax)
  78 {
  79     int ret;
  80     struct hax_capabilityinfo capinfo, *cap = &capinfo;
  81
  82     ret = hax_capability(hax, cap);
  83     if (ret) {
  84         return ret;
  85     }
  86
  87     if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
  88         if (cap->winfo & HAX_CAP_FAILREASON_VT) {
  89             DPRINTF
  90                 ("VTX feature is not enabled, HAX driver will not work.\n");
  91         } else if (cap->winfo & HAX_CAP_FAILREASON_NX) {
  92             DPRINTF
  93                 ("NX feature is not enabled, HAX driver will not work.\n");
  94         }
  95         return -ENXIO;
  96
  97     }
  98
  99     if (!(cap->winfo & HAX_CAP_UG)) {
 100         fprintf(stderr, "UG mode is not supported by the hardware.\n");
 101         return -ENOTSUP;
 102     }
 103
 104     hax->supports_64bit_ramblock = !!(cap->winfo & HAX_CAP_64BIT_RAMBLOCK);
 105
 106     if (cap->wstatus & HAX_CAP_MEMQUOTA) {
 107         if (cap->mem_quota < hax->mem_quota) {
 108             fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
 109             return -ENOSPC;
 110         }
 111     }
 112     return 0;
 113 }
 114
 115 static int hax_version_support(struct hax_state *hax)
 116 {
 117     int ret;
 118     struct hax_module_version version;
 119
 120     ret = hax_mod_version(hax, &version);
 121     if (ret < 0) {
 122         return 0;
 123     }
 124
 125     if (hax_min_version > version.cur_version) {
 126         fprintf(stderr, "Incompatible HAX module version %d,",
 127                 version.cur_version);
 128         fprintf(stderr, "requires minimum version %d\n", hax_min_version);
 129         return 0;
 130     }
 131     if (hax_cur_version < version.compat_version) {
 132         fprintf(stderr, "Incompatible QEMU HAX API version %x,",
 133                 hax_cur_version);
 134         fprintf(stderr, "requires minimum HAX API version %x\n",
 135                 version.compat_version);
 136         return 0;
 137     }
 138
 139     return 1;
 140 }
 141
 142 int hax_vcpu_create(int id)
 143 {
 144     struct hax_vcpu_state *vcpu = NULL;
 145     int ret;
 146
 147     if (!hax_global.vm) {
 148         fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
 149         return -1;
 150     }
 151
 152     if (hax_global.vm->vcpus[id]) {
 153         fprintf(stderr, "vcpu %x allocated already\n", id);
 154         return 0;
 155     }
 156
 157     vcpu = g_new0(struct hax_vcpu_state, 1);
 158
 159     ret = hax_host_create_vcpu(hax_global.vm->fd, id);
 160     if (ret) {
 161         fprintf(stderr, "Failed to create vcpu %x\n", id);
 162         goto error;
 163     }
 164
 165     vcpu->vcpu_id = id;
 166     vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
 167     if (hax_invalid_fd(vcpu->fd)) {
 168         fprintf(stderr, "Failed to open the vcpu\n");
 169         ret = -ENODEV;
 170         goto error;
 171     }
 172
 173     hax_global.vm->vcpus[id] = vcpu;
 174
 175     ret = hax_host_setup_vcpu_channel(vcpu);
 176     if (ret) {
 177         fprintf(stderr, "Invalid hax tunnel size\n");
 178         ret = -EINVAL;
 179         goto error;
 180     }
 181     return 0;
 182
 183   error:
 184     /* vcpu and tunnel will be closed automatically */
 185     if (vcpu && !hax_invalid_fd(vcpu->fd)) {
 186         hax_close_fd(vcpu->fd);
 187     }
 188
 189     hax_global.vm->vcpus[id] = NULL;
 190     g_free(vcpu);
 191     return -1;
 192 }
 193
 194 int hax_vcpu_destroy(CPUState *cpu)
 195 {
 196     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 197
 198     if (!hax_global.vm) {
 199         fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
 200         return -1;
 201     }
 202
 203     if (!vcpu) {
 204         return 0;
 205     }
 206
 207     /*
 208      * 1. The hax_tunnel is also destroyed when vcpu is destroyed
 209      * 2. close fd will cause hax module vcpu be cleaned
 210      */
 211     hax_close_fd(vcpu->fd);
 212     hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
 213     g_free(vcpu);
 214     return 0;
 215 }
 216
 217 int hax_init_vcpu(CPUState *cpu)
 218 {
 219     int ret;
 220
 221     ret = hax_vcpu_create(cpu->cpu_index);
 222     if (ret < 0) {
 223         fprintf(stderr, "Failed to create HAX vcpu\n");
 224         exit(-1);
 225     }
 226
 227     cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
 228     cpu->vcpu_dirty = true;
 229     qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr));
 230
 231     return ret;
 232 }
 233
 234 struct hax_vm *hax_vm_create(struct hax_state *hax)
 235 {
 236     struct hax_vm *vm;
 237     int vm_id = 0, ret;
 238
 239     if (hax_invalid_fd(hax->fd)) {
 240         return NULL;
 241     }
 242
 243     if (hax->vm) {
 244         return hax->vm;
 245     }
 246
 247     vm = g_new0(struct hax_vm, 1);
 248
 249     ret = hax_host_create_vm(hax, &vm_id);
 250     if (ret) {
 251         fprintf(stderr, "Failed to create vm %x\n", ret);
 252         goto error;
 253     }
 254     vm->id = vm_id;
 255     vm->fd = hax_host_open_vm(hax, vm_id);
 256     if (hax_invalid_fd(vm->fd)) {
 257         fprintf(stderr, "Failed to open vm %d\n", vm_id);
 258         goto error;
 259     }
 260
 261     hax->vm = vm;
 262     return vm;
 263
 264   error:
 265     g_free(vm);
 266     hax->vm = NULL;
 267     return NULL;
 268 }
 269
 270 int hax_vm_destroy(struct hax_vm *vm)
 271 {
 272     int i;
 273
 274     for (i = 0; i < HAX_MAX_VCPU; i++)
 275         if (vm->vcpus[i]) {
 276             fprintf(stderr, "VCPU should be cleaned before vm clean\n");
 277             return -1;
 278         }
 279     hax_close_fd(vm->fd);
 280     g_free(vm);
 281     hax_global.vm = NULL;
 282     return 0;
 283 }
 284
 285 static void hax_handle_interrupt(CPUState *cpu, int mask)
 286 {
 287     cpu->interrupt_request |= mask;
 288
 289     if (!qemu_cpu_is_self(cpu)) {
 290         qemu_cpu_kick(cpu);
 291     }
 292 }
 293
 294 static int hax_init(ram_addr_t ram_size)
 295 {
 296     struct hax_state *hax = NULL;
 297     struct hax_qemu_version qversion;
 298     int ret;
 299
 300     hax = &hax_global;
 301
 302     memset(hax, 0, sizeof(struct hax_state));
 303     hax->mem_quota = ram_size;
 304
 305     hax->fd = hax_mod_open();
 306     if (hax_invalid_fd(hax->fd)) {
 307         hax->fd = 0;
 308         ret = -ENODEV;
 309         goto error;
 310     }
 311
 312     ret = hax_get_capability(hax);
 313
 314     if (ret) {
 315         if (ret != -ENOSPC) {
 316             ret = -EINVAL;
 317         }
 318         goto error;
 319     }
 320
 321     if (!hax_version_support(hax)) {
 322         ret = -EINVAL;
 323         goto error;
 324     }
 325
 326     hax->vm = hax_vm_create(hax);
 327     if (!hax->vm) {
 328         fprintf(stderr, "Failed to create HAX VM\n");
 329         ret = -EINVAL;
 330         goto error;
 331     }
 332
 333     hax_memory_init();
 334
 335     qversion.cur_version = hax_cur_version;
 336     qversion.min_version = hax_min_version;
 337     hax_notify_qemu_version(hax->vm->fd, &qversion);
 338     cpu_interrupt_handler = hax_handle_interrupt;
 339
 340     return ret;
 341   error:
 342     if (hax->vm) {
 343         hax_vm_destroy(hax->vm);
 344     }
 345     if (hax->fd) {
 346         hax_mod_close(hax);
 347     }
 348
 349     return ret;
 350 }
 351
 352 static int hax_accel_init(MachineState *ms)
 353 {
 354     int ret = hax_init(ms->ram_size);
 355
 356     if (ret && (ret != -ENOSPC)) {
 357         fprintf(stderr, "No accelerator found.\n");
 358     } else {
 359         fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
 360                 !ret ? "working" : "not working",
 361                 !ret ? "fast virt" : "emulation");
 362     }
 363     return ret;
 364 }
 365
 366 static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
 367 {
 368     if (hft->direction < 2) {
 369         cpu_physical_memory_rw(hft->gpa, (uint8_t *) &hft->value, hft->size,
 370                                hft->direction);
 371     } else {
 372         /*
 373          * HAX API v4 supports transferring data between two MMIO addresses,
 374          * hft->gpa and hft->gpa2 (instructions such as MOVS require this):
 375          *  hft->direction == 2: gpa ==> gpa2
 376          */
 377         uint64_t value;
 378         cpu_physical_memory_rw(hft->gpa, (uint8_t *) &value, hft->size, 0);
 379         cpu_physical_memory_rw(hft->gpa2, (uint8_t *) &value, hft->size, 1);
 380     }
 381
 382     return 0;
 383 }
 384
 385 static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
 386                          int direction, int size, int count, void *buffer)
 387 {
 388     uint8_t *ptr;
 389     int i;
 390     MemTxAttrs attrs = { 0 };
 391
 392     if (!df) {
 393         ptr = (uint8_t *) buffer;
 394     } else {
 395         ptr = buffer + size * count - size;
 396     }
 397     for (i = 0; i < count; i++) {
 398         address_space_rw(&address_space_io, port, attrs,
 399                          ptr, size, direction == HAX_EXIT_IO_OUT);
 400         if (!df) {
 401             ptr += size;
 402         } else {
 403             ptr -= size;
 404         }
 405     }
 406
 407     return 0;
 408 }
 409
 410 static int hax_vcpu_interrupt(CPUArchState *env)
 411 {
 412     CPUState *cpu = env_cpu(env);
 413     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 414     struct hax_tunnel *ht = vcpu->tunnel;
 415
 416     /*
 417      * Try to inject an interrupt if the guest can accept it
 418      * Unlike KVM, HAX kernel check for the eflags, instead of qemu
 419      */
 420     if (ht->ready_for_interrupt_injection &&
 421         (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 422         int irq;
 423
 424         irq = cpu_get_pic_interrupt(env);
 425         if (irq >= 0) {
 426             hax_inject_interrupt(env, irq);
 427             cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
 428         }
 429     }
 430
 431     /* If we have an interrupt but the guest is not ready to receive an
 432      * interrupt, request an interrupt window exit.  This will
 433      * cause a return to userspace as soon as the guest is ready to
 434      * receive interrupts. */
 435     if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 436         ht->request_interrupt_window = 1;
 437     } else {
 438         ht->request_interrupt_window = 0;
 439     }
 440     return 0;
 441 }
 442
 443 void hax_raise_event(CPUState *cpu)
 444 {
 445     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 446
 447     if (!vcpu) {
 448         return;
 449     }
 450     vcpu->tunnel->user_event_pending = 1;
 451 }
 452
 453 /*
 454  * Ask hax kernel module to run the CPU for us till:
 455  * 1. Guest crash or shutdown
 456  * 2. Need QEMU's emulation like guest execute MMIO instruction
 457  * 3. Guest execute HLT
 458  * 4. QEMU have Signal/event pending
 459  * 5. An unknown VMX exit happens
 460  */
 461 static int hax_vcpu_hax_exec(CPUArchState *env)
 462 {
 463     int ret = 0;
 464     CPUState *cpu = env_cpu(env);
 465     X86CPU *x86_cpu = X86_CPU(cpu);
 466     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 467     struct hax_tunnel *ht = vcpu->tunnel;
 468
 469     if (!hax_enabled()) {
 470         DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip);
 471         return 0;
 472     }
 473
 474     if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
 475         cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
 476         apic_poll_irq(x86_cpu->apic_state);
 477     }
 478
 479     /* After a vcpu is halted (either because it is an AP and has just been
 480      * reset, or because it has executed the HLT instruction), it will not be
 481      * run (hax_vcpu_run()) until it is unhalted. The next few if blocks check
 482      * for events that may change the halted state of this vcpu:
 483      *  a) Maskable interrupt, when RFLAGS.IF is 1;
 484      *     Note: env->eflags may not reflect the current RFLAGS state, because
 485      *           it is not updated after each hax_vcpu_run(). We cannot afford
 486      *           to fail to recognize any unhalt-by-maskable-interrupt event
 487      *           (in which case the vcpu will halt forever), and yet we cannot
 488      *           afford the overhead of hax_vcpu_sync_state(). The current
 489      *           solution is to err on the side of caution and have the HLT
 490      *           handler (see case HAX_EXIT_HLT below) unconditionally set the
 491      *           IF_MASK bit in env->eflags, which, in effect, disables the
 492      *           RFLAGS.IF check.
 493      *  b) NMI;
 494      *  c) INIT signal;
 495      *  d) SIPI signal.
 496      */
 497     if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 498          (env->eflags & IF_MASK)) ||
 499         (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
 500         cpu->halted = 0;
 501     }
 502
 503     if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
 504         DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n",
 505                 cpu->cpu_index);
 506         do_cpu_init(x86_cpu);
 507         hax_vcpu_sync_state(env, 1);
 508     }
 509
 510     if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
 511         DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n",
 512                 cpu->cpu_index);
 513         hax_vcpu_sync_state(env, 0);
 514         do_cpu_sipi(x86_cpu);
 515         hax_vcpu_sync_state(env, 1);
 516     }
 517
 518     if (cpu->halted) {
 519         /* If this vcpu is halted, we must not ask HAXM to run it. Instead, we
 520          * break out of hax_smp_cpu_exec() as if this vcpu had executed HLT.
 521          * That way, this vcpu thread will be trapped in qemu_wait_io_event(),
 522          * until the vcpu is unhalted.
 523          */
 524         cpu->exception_index = EXCP_HLT;
 525         return 0;
 526     }
 527
 528     do {
 529         int hax_ret;
 530
 531         if (cpu->exit_request) {
 532             ret = 1;
 533             break;
 534         }
 535
 536         hax_vcpu_interrupt(env);
 537
 538         qemu_mutex_unlock_iothread();
 539         cpu_exec_start(cpu);
 540         hax_ret = hax_vcpu_run(vcpu);
 541         cpu_exec_end(cpu);
 542         qemu_mutex_lock_iothread();
 543
 544         /* Simply continue the vcpu_run if system call interrupted */
 545         if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
 546             DPRINTF("io window interrupted\n");
 547             continue;
 548         }
 549
 550         if (hax_ret < 0) {
 551             fprintf(stderr, "vcpu run failed for vcpu  %x\n", vcpu->vcpu_id);
 552             abort();
 553         }
 554         switch (ht->_exit_status) {
 555         case HAX_EXIT_IO:
 556             ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
 557                             ht->pio._direction,
 558                             ht->pio._size, ht->pio._count, vcpu->iobuf);
 559             break;
 560         case HAX_EXIT_FAST_MMIO:
 561             ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
 562             break;
 563         /* Guest state changed, currently only for shutdown */
 564         case HAX_EXIT_STATECHANGE:
 565             fprintf(stdout, "VCPU shutdown request\n");
 566             qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
 567             hax_vcpu_sync_state(env, 0);
 568             ret = 1;
 569             break;
 570         case HAX_EXIT_UNKNOWN_VMEXIT:
 571             fprintf(stderr, "Unknown VMX exit %x from guest\n",
 572                     ht->_exit_reason);
 573             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 574             hax_vcpu_sync_state(env, 0);
 575             cpu_dump_state(cpu, stderr, 0);
 576             ret = -1;
 577             break;
 578         case HAX_EXIT_HLT:
 579             if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 580                 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
 581                 /* hlt instruction with interrupt disabled is shutdown */
 582                 env->eflags |= IF_MASK;
 583                 cpu->halted = 1;
 584                 cpu->exception_index = EXCP_HLT;
 585                 ret = 1;
 586             }
 587             break;
 588         /* these situations will continue to hax module */
 589         case HAX_EXIT_INTERRUPT:
 590         case HAX_EXIT_PAUSED:
 591             break;
 592         case HAX_EXIT_MMIO:
 593             /* Should not happen on UG system */
 594             fprintf(stderr, "HAX: unsupported MMIO emulation\n");
 595             ret = -1;
 596             break;
 597         case HAX_EXIT_REAL:
 598             /* Should not happen on UG system */
 599             fprintf(stderr, "HAX: unimplemented real mode emulation\n");
 600             ret = -1;
 601             break;
 602         default:
 603             fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status);
 604             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 605             hax_vcpu_sync_state(env, 0);
 606             cpu_dump_state(cpu, stderr, 0);
 607             ret = 1;
 608             break;
 609         }
 610     } while (!ret);
 611
 612     if (cpu->exit_request) {
 613         cpu->exit_request = 0;
 614         cpu->exception_index = EXCP_INTERRUPT;
 615     }
 616     return ret < 0;
 617 }
 618
 619 static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 620 {
 621     CPUArchState *env = cpu->env_ptr;
 622
 623     hax_arch_get_registers(env);
 624     cpu->vcpu_dirty = true;
 625 }
 626
 627 void hax_cpu_synchronize_state(CPUState *cpu)
 628 {
 629     if (!cpu->vcpu_dirty) {
 630         run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL);
 631     }
 632 }
 633
 634 static void do_hax_cpu_synchronize_post_reset(CPUState *cpu,
 635                                               run_on_cpu_data arg)
 636 {
 637     CPUArchState *env = cpu->env_ptr;
 638
 639     hax_vcpu_sync_state(env, 1);
 640     cpu->vcpu_dirty = false;
 641 }
 642
 643 void hax_cpu_synchronize_post_reset(CPUState *cpu)
 644 {
 645     run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 646 }
 647
 648 static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 649 {
 650     CPUArchState *env = cpu->env_ptr;
 651
 652     hax_vcpu_sync_state(env, 1);
 653     cpu->vcpu_dirty = false;
 654 }
 655
 656 void hax_cpu_synchronize_post_init(CPUState *cpu)
 657 {
 658     run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 659 }
 660
 661 static void do_hax_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
 662 {
 663     cpu->vcpu_dirty = true;
 664 }
 665
 666 void hax_cpu_synchronize_pre_loadvm(CPUState *cpu)
 667 {
 668     run_on_cpu(cpu, do_hax_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
 669 }
 670
 671 int hax_smp_cpu_exec(CPUState *cpu)
 672 {
 673     CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
 674     int fatal;
 675     int ret;
 676
 677     while (1) {
 678         if (cpu->exception_index >= EXCP_INTERRUPT) {
 679             ret = cpu->exception_index;
 680             cpu->exception_index = -1;
 681             break;
 682         }
 683
 684         fatal = hax_vcpu_hax_exec(env);
 685
 686         if (fatal) {
 687             fprintf(stderr, "Unsupported HAX vcpu return\n");
 688             abort();
 689         }
 690     }
 691
 692     return ret;
 693 }
 694
 695 static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 696 {
 697     memset(lhs, 0, sizeof(struct segment_desc_t));
 698     lhs->selector = rhs->selector;
 699     lhs->base = rhs->base;
 700     lhs->limit = rhs->limit;
 701     lhs->type = 3;
 702     lhs->present = 1;
 703     lhs->dpl = 3;
 704     lhs->operand_size = 0;
 705     lhs->desc = 1;
 706     lhs->long_mode = 0;
 707     lhs->granularity = 0;
 708     lhs->available = 0;
 709 }
 710
 711 static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs)
 712 {
 713     lhs->selector = rhs->selector;
 714     lhs->base = rhs->base;
 715     lhs->limit = rhs->limit;
 716     lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
 717         | (rhs->present * DESC_P_MASK)
 718         | (rhs->dpl << DESC_DPL_SHIFT)
 719         | (rhs->operand_size << DESC_B_SHIFT)
 720         | (rhs->desc * DESC_S_MASK)
 721         | (rhs->long_mode << DESC_L_SHIFT)
 722         | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
 723 }
 724
 725 static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 726 {
 727     unsigned flags = rhs->flags;
 728
 729     memset(lhs, 0, sizeof(struct segment_desc_t));
 730     lhs->selector = rhs->selector;
 731     lhs->base = rhs->base;
 732     lhs->limit = rhs->limit;
 733     lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
 734     lhs->present = (flags & DESC_P_MASK) != 0;
 735     lhs->dpl = rhs->selector & 3;
 736     lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
 737     lhs->desc = (flags & DESC_S_MASK) != 0;
 738     lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
 739     lhs->granularity = (flags & DESC_G_MASK) != 0;
 740     lhs->available = (flags & DESC_AVL_MASK) != 0;
 741 }
 742
 743 static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set)
 744 {
 745     target_ulong reg = *hax_reg;
 746
 747     if (set) {
 748         *hax_reg = *qemu_reg;
 749     } else {
 750         *qemu_reg = reg;
 751     }
 752 }
 753
 754 /* The sregs has been synced with HAX kernel already before this call */
 755 static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 756 {
 757     get_seg(&env->segs[R_CS], &sregs->_cs);
 758     get_seg(&env->segs[R_DS], &sregs->_ds);
 759     get_seg(&env->segs[R_ES], &sregs->_es);
 760     get_seg(&env->segs[R_FS], &sregs->_fs);
 761     get_seg(&env->segs[R_GS], &sregs->_gs);
 762     get_seg(&env->segs[R_SS], &sregs->_ss);
 763
 764     get_seg(&env->tr, &sregs->_tr);
 765     get_seg(&env->ldt, &sregs->_ldt);
 766     env->idt.limit = sregs->_idt.limit;
 767     env->idt.base = sregs->_idt.base;
 768     env->gdt.limit = sregs->_gdt.limit;
 769     env->gdt.base = sregs->_gdt.base;
 770     return 0;
 771 }
 772
 773 static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 774 {
 775     if ((env->eflags & VM_MASK)) {
 776         set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
 777         set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
 778         set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
 779         set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
 780         set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
 781         set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
 782     } else {
 783         set_seg(&sregs->_cs, &env->segs[R_CS]);
 784         set_seg(&sregs->_ds, &env->segs[R_DS]);
 785         set_seg(&sregs->_es, &env->segs[R_ES]);
 786         set_seg(&sregs->_fs, &env->segs[R_FS]);
 787         set_seg(&sregs->_gs, &env->segs[R_GS]);
 788         set_seg(&sregs->_ss, &env->segs[R_SS]);
 789
 790         if (env->cr[0] & CR0_PE_MASK) {
 791             /* force ss cpl to cs cpl */
 792             sregs->_ss.selector = (sregs->_ss.selector & ~3) |
 793                                   (sregs->_cs.selector & 3);
 794             sregs->_ss.dpl = sregs->_ss.selector & 3;
 795         }
 796     }
 797
 798     set_seg(&sregs->_tr, &env->tr);
 799     set_seg(&sregs->_ldt, &env->ldt);
 800     sregs->_idt.limit = env->idt.limit;
 801     sregs->_idt.base = env->idt.base;
 802     sregs->_gdt.limit = env->gdt.limit;
 803     sregs->_gdt.base = env->gdt.base;
 804     return 0;
 805 }
 806
 807 static int hax_sync_vcpu_register(CPUArchState *env, int set)
 808 {
 809     struct vcpu_state_t regs;
 810     int ret;
 811     memset(&regs, 0, sizeof(struct vcpu_state_t));
 812
 813     if (!set) {
 814         ret = hax_sync_vcpu_state(env, &regs, 0);
 815         if (ret < 0) {
 816             return -1;
 817         }
 818     }
 819
 820     /* generic register */
 821     hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
 822     hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
 823     hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
 824     hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
 825     hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
 826     hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
 827     hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
 828     hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
 829 #ifdef TARGET_X86_64
 830     hax_getput_reg(&regs._r8, &env->regs[8], set);
 831     hax_getput_reg(&regs._r9, &env->regs[9], set);
 832     hax_getput_reg(&regs._r10, &env->regs[10], set);
 833     hax_getput_reg(&regs._r11, &env->regs[11], set);
 834     hax_getput_reg(&regs._r12, &env->regs[12], set);
 835     hax_getput_reg(&regs._r13, &env->regs[13], set);
 836     hax_getput_reg(&regs._r14, &env->regs[14], set);
 837     hax_getput_reg(&regs._r15, &env->regs[15], set);
 838 #endif
 839     hax_getput_reg(&regs._rflags, &env->eflags, set);
 840     hax_getput_reg(&regs._rip, &env->eip, set);
 841
 842     if (set) {
 843         regs._cr0 = env->cr[0];
 844         regs._cr2 = env->cr[2];
 845         regs._cr3 = env->cr[3];
 846         regs._cr4 = env->cr[4];
 847         hax_set_segments(env, &regs);
 848     } else {
 849         env->cr[0] = regs._cr0;
 850         env->cr[2] = regs._cr2;
 851         env->cr[3] = regs._cr3;
 852         env->cr[4] = regs._cr4;
 853         hax_get_segments(env, &regs);
 854     }
 855
 856     if (set) {
 857         ret = hax_sync_vcpu_state(env, &regs, 1);
 858         if (ret < 0) {
 859             return -1;
 860         }
 861     }
 862     return 0;
 863 }
 864
 865 static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
 866                               uint64_t value)
 867 {
 868     item->entry = index;
 869     item->value = value;
 870 }
 871
 872 static int hax_get_msrs(CPUArchState *env)
 873 {
 874     struct hax_msr_data md;
 875     struct vmx_msr *msrs = md.entries;
 876     int ret, i, n;
 877
 878     n = 0;
 879     msrs[n++].entry = MSR_IA32_SYSENTER_CS;
 880     msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
 881     msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
 882     msrs[n++].entry = MSR_IA32_TSC;
 883 #ifdef TARGET_X86_64
 884     msrs[n++].entry = MSR_EFER;
 885     msrs[n++].entry = MSR_STAR;
 886     msrs[n++].entry = MSR_LSTAR;
 887     msrs[n++].entry = MSR_CSTAR;
 888     msrs[n++].entry = MSR_FMASK;
 889     msrs[n++].entry = MSR_KERNELGSBASE;
 890 #endif
 891     md.nr_msr = n;
 892     ret = hax_sync_msr(env, &md, 0);
 893     if (ret < 0) {
 894         return ret;
 895     }
 896
 897     for (i = 0; i < md.done; i++) {
 898         switch (msrs[i].entry) {
 899         case MSR_IA32_SYSENTER_CS:
 900             env->sysenter_cs = msrs[i].value;
 901             break;
 902         case MSR_IA32_SYSENTER_ESP:
 903             env->sysenter_esp = msrs[i].value;
 904             break;
 905         case MSR_IA32_SYSENTER_EIP:
 906             env->sysenter_eip = msrs[i].value;
 907             break;
 908         case MSR_IA32_TSC:
 909             env->tsc = msrs[i].value;
 910             break;
 911 #ifdef TARGET_X86_64
 912         case MSR_EFER:
 913             env->efer = msrs[i].value;
 914             break;
 915         case MSR_STAR:
 916             env->star = msrs[i].value;
 917             break;
 918         case MSR_LSTAR:
 919             env->lstar = msrs[i].value;
 920             break;
 921         case MSR_CSTAR:
 922             env->cstar = msrs[i].value;
 923             break;
 924         case MSR_FMASK:
 925             env->fmask = msrs[i].value;
 926             break;
 927         case MSR_KERNELGSBASE:
 928             env->kernelgsbase = msrs[i].value;
 929             break;
 930 #endif
 931         }
 932     }
 933
 934     return 0;
 935 }
 936
 937 static int hax_set_msrs(CPUArchState *env)
 938 {
 939     struct hax_msr_data md;
 940     struct vmx_msr *msrs;
 941     msrs = md.entries;
 942     int n = 0;
 943
 944     memset(&md, 0, sizeof(struct hax_msr_data));
 945     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
 946     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
 947     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
 948     hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
 949 #ifdef TARGET_X86_64
 950     hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
 951     hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
 952     hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
 953     hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
 954     hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
 955     hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
 956 #endif
 957     md.nr_msr = n;
 958     md.done = 0;
 959
 960     return hax_sync_msr(env, &md, 1);
 961 }
 962
 963 static int hax_get_fpu(CPUArchState *env)
 964 {
 965     struct fx_layout fpu;
 966     int i, ret;
 967
 968     ret = hax_sync_fpu(env, &fpu, 0);
 969     if (ret < 0) {
 970         return ret;
 971     }
 972
 973     env->fpstt = (fpu.fsw >> 11) & 7;
 974     env->fpus = fpu.fsw;
 975     env->fpuc = fpu.fcw;
 976     for (i = 0; i < 8; ++i) {
 977         env->fptags[i] = !((fpu.ftw >> i) & 1);
 978     }
 979     memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
 980
 981     for (i = 0; i < 8; i++) {
 982         env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]);
 983         env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]);
 984         if (CPU_NB_REGS > 8) {
 985             env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]);
 986             env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]);
 987         }
 988     }
 989     env->mxcsr = fpu.mxcsr;
 990
 991     return 0;
 992 }
 993
 994 static int hax_set_fpu(CPUArchState *env)
 995 {
 996     struct fx_layout fpu;
 997     int i;
 998
 999     memset(&fpu, 0, sizeof(fpu));
1000     fpu.fsw = env->fpus & ~(7 << 11);
1001     fpu.fsw |= (env->fpstt & 7) << 11;
1002     fpu.fcw = env->fpuc;
1003
1004     for (i = 0; i < 8; ++i) {
1005         fpu.ftw |= (!env->fptags[i]) << i;
1006     }
1007
1008     memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
1009     for (i = 0; i < 8; i++) {
1010         stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0));
1011         stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1));
1012         if (CPU_NB_REGS > 8) {
1013             stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0));
1014             stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1));
1015         }
1016     }
1017
1018     fpu.mxcsr = env->mxcsr;
1019
1020     return hax_sync_fpu(env, &fpu, 1);
1021 }
1022
1023 static int hax_arch_get_registers(CPUArchState *env)
1024 {
1025     int ret;
1026
1027     ret = hax_sync_vcpu_register(env, 0);
1028     if (ret < 0) {
1029         return ret;
1030     }
1031
1032     ret = hax_get_fpu(env);
1033     if (ret < 0) {
1034         return ret;
1035     }
1036
1037     ret = hax_get_msrs(env);
1038     if (ret < 0) {
1039         return ret;
1040     }
1041
1042     x86_update_hflags(env);
1043     return 0;
1044 }
1045
1046 static int hax_arch_set_registers(CPUArchState *env)
1047 {
1048     int ret;
1049     ret = hax_sync_vcpu_register(env, 1);
1050
1051     if (ret < 0) {
1052         fprintf(stderr, "Failed to sync vcpu reg\n");
1053         return ret;
1054     }
1055     ret = hax_set_fpu(env);
1056     if (ret < 0) {
1057         fprintf(stderr, "FPU failed\n");
1058         return ret;
1059     }
1060     ret = hax_set_msrs(env);
1061     if (ret < 0) {
1062         fprintf(stderr, "MSR failed\n");
1063         return ret;
1064     }
1065
1066     return 0;
1067 }
1068
1069 static void hax_vcpu_sync_state(CPUArchState *env, int modified)
1070 {
1071     if (hax_enabled()) {
1072         if (modified) {
1073             hax_arch_set_registers(env);
1074         } else {
1075             hax_arch_get_registers(env);
1076         }
1077     }
1078 }
1079
1080 /*
1081  * much simpler than kvm, at least in first stage because:
1082  * We don't need consider the device pass-through, we don't need
1083  * consider the framebuffer, and we may even remove the bios at all
1084  */
1085 int hax_sync_vcpus(void)
1086 {
1087     if (hax_enabled()) {
1088         CPUState *cpu;
1089
1090         cpu = first_cpu;
1091         if (!cpu) {
1092             return 0;
1093         }
1094
1095         for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1096             int ret;
1097
1098             ret = hax_arch_set_registers(cpu->env_ptr);
1099             if (ret < 0) {
1100                 return ret;
1101             }
1102         }
1103     }
1104
1105     return 0;
1106 }
1107
1108 void hax_reset_vcpu_state(void *opaque)
1109 {
1110     CPUState *cpu;
1111     for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1112         cpu->hax_vcpu->tunnel->user_event_pending = 0;
1113         cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
1114     }
1115 }
1116
1117 static void hax_accel_class_init(ObjectClass *oc, void *data)
1118 {
1119     AccelClass *ac = ACCEL_CLASS(oc);
1120     ac->name = "HAX";
1121     ac->init_machine = hax_accel_init;
1122     ac->allowed = &hax_allowed;
1123 }
1124
1125 static const TypeInfo hax_accel_type = {
1126     .name = ACCEL_CLASS_NAME("hax"),
1127     .parent = TYPE_ACCEL,
1128     .class_init = hax_accel_class_init,
1129 };
1130
1131 static void hax_type_init(void)
1132 {
1133     type_register_static(&hax_accel_type);
1134 }
1135
1136 type_init(hax_type_init);