target/i386/hax-all.c

   1 /*
   2  * QEMU HAX support
   3  *
   4  * Copyright IBM, Corp. 2008
   5  *           Red Hat, Inc. 2008
   6  *
   7  * Authors:
   8  *  Anthony Liguori   <aliguori@us.ibm.com>
   9  *  Glauber Costa     <gcosta@redhat.com>
  10  *
  11  * Copyright (c) 2011 Intel Corporation
  12  *  Written by:
  13  *  Jiang Yunhong<yunhong.jiang@intel.com>
  14  *  Xin Xiaohui<xiaohui.xin@intel.com>
  15  *  Zhang Xiantao<xiantao.zhang@intel.com>
  16  *
  17  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  18  * See the COPYING file in the top-level directory.
  19  *
  20  */
  21
  22 /*
  23  * HAX common code for both windows and darwin
  24  */
  25
  26 #include "qemu/osdep.h"
  27 #include "cpu.h"
  28 #include "exec/address-spaces.h"
  29
  30 #include "qemu-common.h"
  31 #include "hax-i386.h"
  32 #include "sysemu/accel.h"
  33 #include "sysemu/sysemu.h"
  34 #include "qemu/main-loop.h"
  35 #include "hw/boards.h"
  36
  37 #define DEBUG_HAX 0
  38
  39 #define DPRINTF(fmt, ...) \
  40     do { \
  41         if (DEBUG_HAX) { \
  42             fprintf(stdout, fmt, ## __VA_ARGS__); \
  43         } \
  44     } while (0)
  45
  46 /* Current version */
  47 const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */
  48 /* Minimum HAX kernel version */
  49 const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */
  50
  51 static bool hax_allowed;
  52
  53 struct hax_state hax_global;
  54
  55 static void hax_vcpu_sync_state(CPUArchState *env, int modified);
  56 static int hax_arch_get_registers(CPUArchState *env);
  57
  58 int hax_enabled(void)
  59 {
  60     return hax_allowed;
  61 }
  62
  63 int valid_hax_tunnel_size(uint16_t size)
  64 {
  65     return size >= sizeof(struct hax_tunnel);
  66 }
  67
  68 hax_fd hax_vcpu_get_fd(CPUArchState *env)
  69 {
  70     struct hax_vcpu_state *vcpu = ENV_GET_CPU(env)->hax_vcpu;
  71     if (!vcpu) {
  72         return HAX_INVALID_FD;
  73     }
  74     return vcpu->fd;
  75 }
  76
  77 static int hax_get_capability(struct hax_state *hax)
  78 {
  79     int ret;
  80     struct hax_capabilityinfo capinfo, *cap = &capinfo;
  81
  82     ret = hax_capability(hax, cap);
  83     if (ret) {
  84         return ret;
  85     }
  86
  87     if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
  88         if (cap->winfo & HAX_CAP_FAILREASON_VT) {
  89             DPRINTF
  90                 ("VTX feature is not enabled, HAX driver will not work.\n");
  91         } else if (cap->winfo & HAX_CAP_FAILREASON_NX) {
  92             DPRINTF
  93                 ("NX feature is not enabled, HAX driver will not work.\n");
  94         }
  95         return -ENXIO;
  96
  97     }
  98
  99     if (!(cap->winfo & HAX_CAP_UG)) {
 100         fprintf(stderr, "UG mode is not supported by the hardware.\n");
 101         return -ENOTSUP;
 102     }
 103
 104     hax->supports_64bit_ramblock = !!(cap->winfo & HAX_CAP_64BIT_RAMBLOCK);
 105
 106     if (cap->wstatus & HAX_CAP_MEMQUOTA) {
 107         if (cap->mem_quota < hax->mem_quota) {
 108             fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
 109             return -ENOSPC;
 110         }
 111     }
 112     return 0;
 113 }
 114
 115 static int hax_version_support(struct hax_state *hax)
 116 {
 117     int ret;
 118     struct hax_module_version version;
 119
 120     ret = hax_mod_version(hax, &version);
 121     if (ret < 0) {
 122         return 0;
 123     }
 124
 125     if (hax_min_version > version.cur_version) {
 126         fprintf(stderr, "Incompatible HAX module version %d,",
 127                 version.cur_version);
 128         fprintf(stderr, "requires minimum version %d\n", hax_min_version);
 129         return 0;
 130     }
 131     if (hax_cur_version < version.compat_version) {
 132         fprintf(stderr, "Incompatible QEMU HAX API version %x,",
 133                 hax_cur_version);
 134         fprintf(stderr, "requires minimum HAX API version %x\n",
 135                 version.compat_version);
 136         return 0;
 137     }
 138
 139     return 1;
 140 }
 141
 142 int hax_vcpu_create(int id)
 143 {
 144     struct hax_vcpu_state *vcpu = NULL;
 145     int ret;
 146
 147     if (!hax_global.vm) {
 148         fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
 149         return -1;
 150     }
 151
 152     if (hax_global.vm->vcpus[id]) {
 153         fprintf(stderr, "vcpu %x allocated already\n", id);
 154         return 0;
 155     }
 156
 157     vcpu = g_malloc(sizeof(struct hax_vcpu_state));
 158     if (!vcpu) {
 159         fprintf(stderr, "Failed to alloc vcpu state\n");
 160         return -ENOMEM;
 161     }
 162
 163     memset(vcpu, 0, sizeof(struct hax_vcpu_state));
 164
 165     ret = hax_host_create_vcpu(hax_global.vm->fd, id);
 166     if (ret) {
 167         fprintf(stderr, "Failed to create vcpu %x\n", id);
 168         goto error;
 169     }
 170
 171     vcpu->vcpu_id = id;
 172     vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
 173     if (hax_invalid_fd(vcpu->fd)) {
 174         fprintf(stderr, "Failed to open the vcpu\n");
 175         ret = -ENODEV;
 176         goto error;
 177     }
 178
 179     hax_global.vm->vcpus[id] = vcpu;
 180
 181     ret = hax_host_setup_vcpu_channel(vcpu);
 182     if (ret) {
 183         fprintf(stderr, "Invalid hax tunnel size\n");
 184         ret = -EINVAL;
 185         goto error;
 186     }
 187     return 0;
 188
 189   error:
 190     /* vcpu and tunnel will be closed automatically */
 191     if (vcpu && !hax_invalid_fd(vcpu->fd)) {
 192         hax_close_fd(vcpu->fd);
 193     }
 194
 195     hax_global.vm->vcpus[id] = NULL;
 196     g_free(vcpu);
 197     return -1;
 198 }
 199
 200 int hax_vcpu_destroy(CPUState *cpu)
 201 {
 202     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 203
 204     if (!hax_global.vm) {
 205         fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
 206         return -1;
 207     }
 208
 209     if (!vcpu) {
 210         return 0;
 211     }
 212
 213     /*
 214      * 1. The hax_tunnel is also destroied when vcpu destroy
 215      * 2. close fd will cause hax module vcpu be cleaned
 216      */
 217     hax_close_fd(vcpu->fd);
 218     hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
 219     g_free(vcpu);
 220     return 0;
 221 }
 222
 223 int hax_init_vcpu(CPUState *cpu)
 224 {
 225     int ret;
 226
 227     ret = hax_vcpu_create(cpu->cpu_index);
 228     if (ret < 0) {
 229         fprintf(stderr, "Failed to create HAX vcpu\n");
 230         exit(-1);
 231     }
 232
 233     cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
 234     cpu->vcpu_dirty = true;
 235     qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr));
 236
 237     return ret;
 238 }
 239
 240 struct hax_vm *hax_vm_create(struct hax_state *hax)
 241 {
 242     struct hax_vm *vm;
 243     int vm_id = 0, ret;
 244
 245     if (hax_invalid_fd(hax->fd)) {
 246         return NULL;
 247     }
 248
 249     if (hax->vm) {
 250         return hax->vm;
 251     }
 252
 253     vm = g_malloc(sizeof(struct hax_vm));
 254     if (!vm) {
 255         return NULL;
 256     }
 257     memset(vm, 0, sizeof(struct hax_vm));
 258     ret = hax_host_create_vm(hax, &vm_id);
 259     if (ret) {
 260         fprintf(stderr, "Failed to create vm %x\n", ret);
 261         goto error;
 262     }
 263     vm->id = vm_id;
 264     vm->fd = hax_host_open_vm(hax, vm_id);
 265     if (hax_invalid_fd(vm->fd)) {
 266         fprintf(stderr, "Failed to open vm %d\n", vm_id);
 267         goto error;
 268     }
 269
 270     hax->vm = vm;
 271     return vm;
 272
 273   error:
 274     g_free(vm);
 275     hax->vm = NULL;
 276     return NULL;
 277 }
 278
 279 int hax_vm_destroy(struct hax_vm *vm)
 280 {
 281     int i;
 282
 283     for (i = 0; i < HAX_MAX_VCPU; i++)
 284         if (vm->vcpus[i]) {
 285             fprintf(stderr, "VCPU should be cleaned before vm clean\n");
 286             return -1;
 287         }
 288     hax_close_fd(vm->fd);
 289     g_free(vm);
 290     hax_global.vm = NULL;
 291     return 0;
 292 }
 293
 294 static void hax_handle_interrupt(CPUState *cpu, int mask)
 295 {
 296     cpu->interrupt_request |= mask;
 297
 298     if (!qemu_cpu_is_self(cpu)) {
 299         qemu_cpu_kick(cpu);
 300     }
 301 }
 302
 303 static int hax_init(ram_addr_t ram_size)
 304 {
 305     struct hax_state *hax = NULL;
 306     struct hax_qemu_version qversion;
 307     int ret;
 308
 309     hax = &hax_global;
 310
 311     memset(hax, 0, sizeof(struct hax_state));
 312     hax->mem_quota = ram_size;
 313
 314     hax->fd = hax_mod_open();
 315     if (hax_invalid_fd(hax->fd)) {
 316         hax->fd = 0;
 317         ret = -ENODEV;
 318         goto error;
 319     }
 320
 321     ret = hax_get_capability(hax);
 322
 323     if (ret) {
 324         if (ret != -ENOSPC) {
 325             ret = -EINVAL;
 326         }
 327         goto error;
 328     }
 329
 330     if (!hax_version_support(hax)) {
 331         ret = -EINVAL;
 332         goto error;
 333     }
 334
 335     hax->vm = hax_vm_create(hax);
 336     if (!hax->vm) {
 337         fprintf(stderr, "Failed to create HAX VM\n");
 338         ret = -EINVAL;
 339         goto error;
 340     }
 341
 342     hax_memory_init();
 343
 344     qversion.cur_version = hax_cur_version;
 345     qversion.min_version = hax_min_version;
 346     hax_notify_qemu_version(hax->vm->fd, &qversion);
 347     cpu_interrupt_handler = hax_handle_interrupt;
 348
 349     return ret;
 350   error:
 351     if (hax->vm) {
 352         hax_vm_destroy(hax->vm);
 353     }
 354     if (hax->fd) {
 355         hax_mod_close(hax);
 356     }
 357
 358     return ret;
 359 }
 360
 361 static int hax_accel_init(MachineState *ms)
 362 {
 363     int ret = hax_init(ms->ram_size);
 364
 365     if (ret && (ret != -ENOSPC)) {
 366         fprintf(stderr, "No accelerator found.\n");
 367     } else {
 368         fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
 369                 !ret ? "working" : "not working",
 370                 !ret ? "fast virt" : "emulation");
 371     }
 372     return ret;
 373 }
 374
 375 static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
 376 {
 377     if (hft->direction < 2) {
 378         cpu_physical_memory_rw(hft->gpa, (uint8_t *) &hft->value, hft->size,
 379                                hft->direction);
 380     } else {
 381         /*
 382          * HAX API v4 supports transferring data between two MMIO addresses,
 383          * hft->gpa and hft->gpa2 (instructions such as MOVS require this):
 384          *  hft->direction == 2: gpa ==> gpa2
 385          */
 386         uint64_t value;
 387         cpu_physical_memory_rw(hft->gpa, (uint8_t *) &value, hft->size, 0);
 388         cpu_physical_memory_rw(hft->gpa2, (uint8_t *) &value, hft->size, 1);
 389     }
 390
 391     return 0;
 392 }
 393
 394 static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
 395                          int direction, int size, int count, void *buffer)
 396 {
 397     uint8_t *ptr;
 398     int i;
 399     MemTxAttrs attrs = { 0 };
 400
 401     if (!df) {
 402         ptr = (uint8_t *) buffer;
 403     } else {
 404         ptr = buffer + size * count - size;
 405     }
 406     for (i = 0; i < count; i++) {
 407         address_space_rw(&address_space_io, port, attrs,
 408                          ptr, size, direction == HAX_EXIT_IO_OUT);
 409         if (!df) {
 410             ptr += size;
 411         } else {
 412             ptr -= size;
 413         }
 414     }
 415
 416     return 0;
 417 }
 418
 419 static int hax_vcpu_interrupt(CPUArchState *env)
 420 {
 421     CPUState *cpu = ENV_GET_CPU(env);
 422     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 423     struct hax_tunnel *ht = vcpu->tunnel;
 424
 425     /*
 426      * Try to inject an interrupt if the guest can accept it
 427      * Unlike KVM, HAX kernel check for the eflags, instead of qemu
 428      */
 429     if (ht->ready_for_interrupt_injection &&
 430         (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 431         int irq;
 432
 433         irq = cpu_get_pic_interrupt(env);
 434         if (irq >= 0) {
 435             hax_inject_interrupt(env, irq);
 436             cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
 437         }
 438     }
 439
 440     /* If we have an interrupt but the guest is not ready to receive an
 441      * interrupt, request an interrupt window exit.  This will
 442      * cause a return to userspace as soon as the guest is ready to
 443      * receive interrupts. */
 444     if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 445         ht->request_interrupt_window = 1;
 446     } else {
 447         ht->request_interrupt_window = 0;
 448     }
 449     return 0;
 450 }
 451
 452 void hax_raise_event(CPUState *cpu)
 453 {
 454     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 455
 456     if (!vcpu) {
 457         return;
 458     }
 459     vcpu->tunnel->user_event_pending = 1;
 460 }
 461
 462 /*
 463  * Ask hax kernel module to run the CPU for us till:
 464  * 1. Guest crash or shutdown
 465  * 2. Need QEMU's emulation like guest execute MMIO instruction
 466  * 3. Guest execute HLT
 467  * 4. QEMU have Signal/event pending
 468  * 5. An unknown VMX exit happens
 469  */
 470 static int hax_vcpu_hax_exec(CPUArchState *env)
 471 {
 472     int ret = 0;
 473     CPUState *cpu = ENV_GET_CPU(env);
 474     X86CPU *x86_cpu = X86_CPU(cpu);
 475     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 476     struct hax_tunnel *ht = vcpu->tunnel;
 477
 478     if (!hax_enabled()) {
 479         DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip);
 480         return 0;
 481     }
 482
 483     cpu->halted = 0;
 484
 485     if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
 486         cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
 487         apic_poll_irq(x86_cpu->apic_state);
 488     }
 489
 490     if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
 491         DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n",
 492                 cpu->cpu_index);
 493         do_cpu_init(x86_cpu);
 494         hax_vcpu_sync_state(env, 1);
 495     }
 496
 497     if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
 498         DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n",
 499                 cpu->cpu_index);
 500         hax_vcpu_sync_state(env, 0);
 501         do_cpu_sipi(x86_cpu);
 502         hax_vcpu_sync_state(env, 1);
 503     }
 504
 505     do {
 506         int hax_ret;
 507
 508         if (cpu->exit_request) {
 509             ret = 1;
 510             break;
 511         }
 512
 513         hax_vcpu_interrupt(env);
 514
 515         qemu_mutex_unlock_iothread();
 516         cpu_exec_start(cpu);
 517         hax_ret = hax_vcpu_run(vcpu);
 518         cpu_exec_end(cpu);
 519         qemu_mutex_lock_iothread();
 520
 521         /* Simply continue the vcpu_run if system call interrupted */
 522         if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
 523             DPRINTF("io window interrupted\n");
 524             continue;
 525         }
 526
 527         if (hax_ret < 0) {
 528             fprintf(stderr, "vcpu run failed for vcpu  %x\n", vcpu->vcpu_id);
 529             abort();
 530         }
 531         switch (ht->_exit_status) {
 532         case HAX_EXIT_IO:
 533             ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
 534                             ht->pio._direction,
 535                             ht->pio._size, ht->pio._count, vcpu->iobuf);
 536             break;
 537         case HAX_EXIT_FAST_MMIO:
 538             ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
 539             break;
 540         /* Guest state changed, currently only for shutdown */
 541         case HAX_EXIT_STATECHANGE:
 542             fprintf(stdout, "VCPU shutdown request\n");
 543             qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
 544             hax_vcpu_sync_state(env, 0);
 545             ret = 1;
 546             break;
 547         case HAX_EXIT_UNKNOWN_VMEXIT:
 548             fprintf(stderr, "Unknown VMX exit %x from guest\n",
 549                     ht->_exit_reason);
 550             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 551             hax_vcpu_sync_state(env, 0);
 552             cpu_dump_state(cpu, stderr, fprintf, 0);
 553             ret = -1;
 554             break;
 555         case HAX_EXIT_HLT:
 556             if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 557                 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
 558                 /* hlt instruction with interrupt disabled is shutdown */
 559                 env->eflags |= IF_MASK;
 560                 cpu->halted = 1;
 561                 cpu->exception_index = EXCP_HLT;
 562                 ret = 1;
 563             }
 564             break;
 565         /* these situations will continue to hax module */
 566         case HAX_EXIT_INTERRUPT:
 567         case HAX_EXIT_PAUSED:
 568             break;
 569         case HAX_EXIT_MMIO:
 570             /* Should not happen on UG system */
 571             fprintf(stderr, "HAX: unsupported MMIO emulation\n");
 572             ret = -1;
 573             break;
 574         case HAX_EXIT_REAL:
 575             /* Should not happen on UG system */
 576             fprintf(stderr, "HAX: unimplemented real mode emulation\n");
 577             ret = -1;
 578             break;
 579         default:
 580             fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status);
 581             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 582             hax_vcpu_sync_state(env, 0);
 583             cpu_dump_state(cpu, stderr, fprintf, 0);
 584             ret = 1;
 585             break;
 586         }
 587     } while (!ret);
 588
 589     if (cpu->exit_request) {
 590         cpu->exit_request = 0;
 591         cpu->exception_index = EXCP_INTERRUPT;
 592     }
 593     return ret < 0;
 594 }
 595
 596 static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 597 {
 598     CPUArchState *env = cpu->env_ptr;
 599
 600     hax_arch_get_registers(env);
 601     cpu->vcpu_dirty = true;
 602 }
 603
 604 void hax_cpu_synchronize_state(CPUState *cpu)
 605 {
 606     if (!cpu->vcpu_dirty) {
 607         run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL);
 608     }
 609 }
 610
 611 static void do_hax_cpu_synchronize_post_reset(CPUState *cpu,
 612                                               run_on_cpu_data arg)
 613 {
 614     CPUArchState *env = cpu->env_ptr;
 615
 616     hax_vcpu_sync_state(env, 1);
 617     cpu->vcpu_dirty = false;
 618 }
 619
 620 void hax_cpu_synchronize_post_reset(CPUState *cpu)
 621 {
 622     run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 623 }
 624
 625 static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 626 {
 627     CPUArchState *env = cpu->env_ptr;
 628
 629     hax_vcpu_sync_state(env, 1);
 630     cpu->vcpu_dirty = false;
 631 }
 632
 633 void hax_cpu_synchronize_post_init(CPUState *cpu)
 634 {
 635     run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 636 }
 637
 638 static void do_hax_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
 639 {
 640     cpu->vcpu_dirty = true;
 641 }
 642
 643 void hax_cpu_synchronize_pre_loadvm(CPUState *cpu)
 644 {
 645     run_on_cpu(cpu, do_hax_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
 646 }
 647
 648 int hax_smp_cpu_exec(CPUState *cpu)
 649 {
 650     CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
 651     int fatal;
 652     int ret;
 653
 654     while (1) {
 655         if (cpu->exception_index >= EXCP_INTERRUPT) {
 656             ret = cpu->exception_index;
 657             cpu->exception_index = -1;
 658             break;
 659         }
 660
 661         fatal = hax_vcpu_hax_exec(env);
 662
 663         if (fatal) {
 664             fprintf(stderr, "Unsupported HAX vcpu return\n");
 665             abort();
 666         }
 667     }
 668
 669     return ret;
 670 }
 671
 672 static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 673 {
 674     memset(lhs, 0, sizeof(struct segment_desc_t));
 675     lhs->selector = rhs->selector;
 676     lhs->base = rhs->base;
 677     lhs->limit = rhs->limit;
 678     lhs->type = 3;
 679     lhs->present = 1;
 680     lhs->dpl = 3;
 681     lhs->operand_size = 0;
 682     lhs->desc = 1;
 683     lhs->long_mode = 0;
 684     lhs->granularity = 0;
 685     lhs->available = 0;
 686 }
 687
 688 static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs)
 689 {
 690     lhs->selector = rhs->selector;
 691     lhs->base = rhs->base;
 692     lhs->limit = rhs->limit;
 693     lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
 694         | (rhs->present * DESC_P_MASK)
 695         | (rhs->dpl << DESC_DPL_SHIFT)
 696         | (rhs->operand_size << DESC_B_SHIFT)
 697         | (rhs->desc * DESC_S_MASK)
 698         | (rhs->long_mode << DESC_L_SHIFT)
 699         | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
 700 }
 701
 702 static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 703 {
 704     unsigned flags = rhs->flags;
 705
 706     memset(lhs, 0, sizeof(struct segment_desc_t));
 707     lhs->selector = rhs->selector;
 708     lhs->base = rhs->base;
 709     lhs->limit = rhs->limit;
 710     lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
 711     lhs->present = (flags & DESC_P_MASK) != 0;
 712     lhs->dpl = rhs->selector & 3;
 713     lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
 714     lhs->desc = (flags & DESC_S_MASK) != 0;
 715     lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
 716     lhs->granularity = (flags & DESC_G_MASK) != 0;
 717     lhs->available = (flags & DESC_AVL_MASK) != 0;
 718 }
 719
 720 static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set)
 721 {
 722     target_ulong reg = *hax_reg;
 723
 724     if (set) {
 725         *hax_reg = *qemu_reg;
 726     } else {
 727         *qemu_reg = reg;
 728     }
 729 }
 730
 731 /* The sregs has been synced with HAX kernel already before this call */
 732 static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 733 {
 734     get_seg(&env->segs[R_CS], &sregs->_cs);
 735     get_seg(&env->segs[R_DS], &sregs->_ds);
 736     get_seg(&env->segs[R_ES], &sregs->_es);
 737     get_seg(&env->segs[R_FS], &sregs->_fs);
 738     get_seg(&env->segs[R_GS], &sregs->_gs);
 739     get_seg(&env->segs[R_SS], &sregs->_ss);
 740
 741     get_seg(&env->tr, &sregs->_tr);
 742     get_seg(&env->ldt, &sregs->_ldt);
 743     env->idt.limit = sregs->_idt.limit;
 744     env->idt.base = sregs->_idt.base;
 745     env->gdt.limit = sregs->_gdt.limit;
 746     env->gdt.base = sregs->_gdt.base;
 747     return 0;
 748 }
 749
 750 static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 751 {
 752     if ((env->eflags & VM_MASK)) {
 753         set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
 754         set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
 755         set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
 756         set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
 757         set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
 758         set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
 759     } else {
 760         set_seg(&sregs->_cs, &env->segs[R_CS]);
 761         set_seg(&sregs->_ds, &env->segs[R_DS]);
 762         set_seg(&sregs->_es, &env->segs[R_ES]);
 763         set_seg(&sregs->_fs, &env->segs[R_FS]);
 764         set_seg(&sregs->_gs, &env->segs[R_GS]);
 765         set_seg(&sregs->_ss, &env->segs[R_SS]);
 766
 767         if (env->cr[0] & CR0_PE_MASK) {
 768             /* force ss cpl to cs cpl */
 769             sregs->_ss.selector = (sregs->_ss.selector & ~3) |
 770                                   (sregs->_cs.selector & 3);
 771             sregs->_ss.dpl = sregs->_ss.selector & 3;
 772         }
 773     }
 774
 775     set_seg(&sregs->_tr, &env->tr);
 776     set_seg(&sregs->_ldt, &env->ldt);
 777     sregs->_idt.limit = env->idt.limit;
 778     sregs->_idt.base = env->idt.base;
 779     sregs->_gdt.limit = env->gdt.limit;
 780     sregs->_gdt.base = env->gdt.base;
 781     return 0;
 782 }
 783
 784 static int hax_sync_vcpu_register(CPUArchState *env, int set)
 785 {
 786     struct vcpu_state_t regs;
 787     int ret;
 788     memset(&regs, 0, sizeof(struct vcpu_state_t));
 789
 790     if (!set) {
 791         ret = hax_sync_vcpu_state(env, &regs, 0);
 792         if (ret < 0) {
 793             return -1;
 794         }
 795     }
 796
 797     /* generic register */
 798     hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
 799     hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
 800     hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
 801     hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
 802     hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
 803     hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
 804     hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
 805     hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
 806 #ifdef TARGET_X86_64
 807     hax_getput_reg(&regs._r8, &env->regs[8], set);
 808     hax_getput_reg(&regs._r9, &env->regs[9], set);
 809     hax_getput_reg(&regs._r10, &env->regs[10], set);
 810     hax_getput_reg(&regs._r11, &env->regs[11], set);
 811     hax_getput_reg(&regs._r12, &env->regs[12], set);
 812     hax_getput_reg(&regs._r13, &env->regs[13], set);
 813     hax_getput_reg(&regs._r14, &env->regs[14], set);
 814     hax_getput_reg(&regs._r15, &env->regs[15], set);
 815 #endif
 816     hax_getput_reg(&regs._rflags, &env->eflags, set);
 817     hax_getput_reg(&regs._rip, &env->eip, set);
 818
 819     if (set) {
 820         regs._cr0 = env->cr[0];
 821         regs._cr2 = env->cr[2];
 822         regs._cr3 = env->cr[3];
 823         regs._cr4 = env->cr[4];
 824         hax_set_segments(env, &regs);
 825     } else {
 826         env->cr[0] = regs._cr0;
 827         env->cr[2] = regs._cr2;
 828         env->cr[3] = regs._cr3;
 829         env->cr[4] = regs._cr4;
 830         hax_get_segments(env, &regs);
 831     }
 832
 833     if (set) {
 834         ret = hax_sync_vcpu_state(env, &regs, 1);
 835         if (ret < 0) {
 836             return -1;
 837         }
 838     }
 839     return 0;
 840 }
 841
 842 static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
 843                               uint64_t value)
 844 {
 845     item->entry = index;
 846     item->value = value;
 847 }
 848
 849 static int hax_get_msrs(CPUArchState *env)
 850 {
 851     struct hax_msr_data md;
 852     struct vmx_msr *msrs = md.entries;
 853     int ret, i, n;
 854
 855     n = 0;
 856     msrs[n++].entry = MSR_IA32_SYSENTER_CS;
 857     msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
 858     msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
 859     msrs[n++].entry = MSR_IA32_TSC;
 860 #ifdef TARGET_X86_64
 861     msrs[n++].entry = MSR_EFER;
 862     msrs[n++].entry = MSR_STAR;
 863     msrs[n++].entry = MSR_LSTAR;
 864     msrs[n++].entry = MSR_CSTAR;
 865     msrs[n++].entry = MSR_FMASK;
 866     msrs[n++].entry = MSR_KERNELGSBASE;
 867 #endif
 868     md.nr_msr = n;
 869     ret = hax_sync_msr(env, &md, 0);
 870     if (ret < 0) {
 871         return ret;
 872     }
 873
 874     for (i = 0; i < md.done; i++) {
 875         switch (msrs[i].entry) {
 876         case MSR_IA32_SYSENTER_CS:
 877             env->sysenter_cs = msrs[i].value;
 878             break;
 879         case MSR_IA32_SYSENTER_ESP:
 880             env->sysenter_esp = msrs[i].value;
 881             break;
 882         case MSR_IA32_SYSENTER_EIP:
 883             env->sysenter_eip = msrs[i].value;
 884             break;
 885         case MSR_IA32_TSC:
 886             env->tsc = msrs[i].value;
 887             break;
 888 #ifdef TARGET_X86_64
 889         case MSR_EFER:
 890             env->efer = msrs[i].value;
 891             break;
 892         case MSR_STAR:
 893             env->star = msrs[i].value;
 894             break;
 895         case MSR_LSTAR:
 896             env->lstar = msrs[i].value;
 897             break;
 898         case MSR_CSTAR:
 899             env->cstar = msrs[i].value;
 900             break;
 901         case MSR_FMASK:
 902             env->fmask = msrs[i].value;
 903             break;
 904         case MSR_KERNELGSBASE:
 905             env->kernelgsbase = msrs[i].value;
 906             break;
 907 #endif
 908         }
 909     }
 910
 911     return 0;
 912 }
 913
 914 static int hax_set_msrs(CPUArchState *env)
 915 {
 916     struct hax_msr_data md;
 917     struct vmx_msr *msrs;
 918     msrs = md.entries;
 919     int n = 0;
 920
 921     memset(&md, 0, sizeof(struct hax_msr_data));
 922     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
 923     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
 924     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
 925     hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
 926 #ifdef TARGET_X86_64
 927     hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
 928     hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
 929     hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
 930     hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
 931     hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
 932     hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
 933 #endif
 934     md.nr_msr = n;
 935     md.done = 0;
 936
 937     return hax_sync_msr(env, &md, 1);
 938 }
 939
 940 static int hax_get_fpu(CPUArchState *env)
 941 {
 942     struct fx_layout fpu;
 943     int i, ret;
 944
 945     ret = hax_sync_fpu(env, &fpu, 0);
 946     if (ret < 0) {
 947         return ret;
 948     }
 949
 950     env->fpstt = (fpu.fsw >> 11) & 7;
 951     env->fpus = fpu.fsw;
 952     env->fpuc = fpu.fcw;
 953     for (i = 0; i < 8; ++i) {
 954         env->fptags[i] = !((fpu.ftw >> i) & 1);
 955     }
 956     memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
 957
 958     for (i = 0; i < 8; i++) {
 959         env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]);
 960         env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]);
 961         if (CPU_NB_REGS > 8) {
 962             env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]);
 963             env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]);
 964         }
 965     }
 966     env->mxcsr = fpu.mxcsr;
 967
 968     return 0;
 969 }
 970
 971 static int hax_set_fpu(CPUArchState *env)
 972 {
 973     struct fx_layout fpu;
 974     int i;
 975
 976     memset(&fpu, 0, sizeof(fpu));
 977     fpu.fsw = env->fpus & ~(7 << 11);
 978     fpu.fsw |= (env->fpstt & 7) << 11;
 979     fpu.fcw = env->fpuc;
 980
 981     for (i = 0; i < 8; ++i) {
 982         fpu.ftw |= (!env->fptags[i]) << i;
 983     }
 984
 985     memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
 986     for (i = 0; i < 8; i++) {
 987         stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0));
 988         stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1));
 989         if (CPU_NB_REGS > 8) {
 990             stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0));
 991             stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1));
 992         }
 993     }
 994
 995     fpu.mxcsr = env->mxcsr;
 996
 997     return hax_sync_fpu(env, &fpu, 1);
 998 }
 999
1000 static int hax_arch_get_registers(CPUArchState *env)
1001 {
1002     int ret;
1003
1004     ret = hax_sync_vcpu_register(env, 0);
1005     if (ret < 0) {
1006         return ret;
1007     }
1008
1009     ret = hax_get_fpu(env);
1010     if (ret < 0) {
1011         return ret;
1012     }
1013
1014     ret = hax_get_msrs(env);
1015     if (ret < 0) {
1016         return ret;
1017     }
1018
1019     x86_update_hflags(env);
1020     return 0;
1021 }
1022
1023 static int hax_arch_set_registers(CPUArchState *env)
1024 {
1025     int ret;
1026     ret = hax_sync_vcpu_register(env, 1);
1027
1028     if (ret < 0) {
1029         fprintf(stderr, "Failed to sync vcpu reg\n");
1030         return ret;
1031     }
1032     ret = hax_set_fpu(env);
1033     if (ret < 0) {
1034         fprintf(stderr, "FPU failed\n");
1035         return ret;
1036     }
1037     ret = hax_set_msrs(env);
1038     if (ret < 0) {
1039         fprintf(stderr, "MSR failed\n");
1040         return ret;
1041     }
1042
1043     return 0;
1044 }
1045
1046 static void hax_vcpu_sync_state(CPUArchState *env, int modified)
1047 {
1048     if (hax_enabled()) {
1049         if (modified) {
1050             hax_arch_set_registers(env);
1051         } else {
1052             hax_arch_get_registers(env);
1053         }
1054     }
1055 }
1056
1057 /*
1058  * much simpler than kvm, at least in first stage because:
1059  * We don't need consider the device pass-through, we don't need
1060  * consider the framebuffer, and we may even remove the bios at all
1061  */
1062 int hax_sync_vcpus(void)
1063 {
1064     if (hax_enabled()) {
1065         CPUState *cpu;
1066
1067         cpu = first_cpu;
1068         if (!cpu) {
1069             return 0;
1070         }
1071
1072         for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1073             int ret;
1074
1075             ret = hax_arch_set_registers(cpu->env_ptr);
1076             if (ret < 0) {
1077                 return ret;
1078             }
1079         }
1080     }
1081
1082     return 0;
1083 }
1084
1085 void hax_reset_vcpu_state(void *opaque)
1086 {
1087     CPUState *cpu;
1088     for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1089         cpu->hax_vcpu->tunnel->user_event_pending = 0;
1090         cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
1091     }
1092 }
1093
1094 static void hax_accel_class_init(ObjectClass *oc, void *data)
1095 {
1096     AccelClass *ac = ACCEL_CLASS(oc);
1097     ac->name = "HAX";
1098     ac->init_machine = hax_accel_init;
1099     ac->allowed = &hax_allowed;
1100 }
1101
1102 static const TypeInfo hax_accel_type = {
1103     .name = ACCEL_CLASS_NAME("hax"),
1104     .parent = TYPE_ACCEL,
1105     .class_init = hax_accel_class_init,
1106 };
1107
1108 static void hax_type_init(void)
1109 {
1110     type_register_static(&hax_accel_type);
1111 }
1112
1113 type_init(hax_type_init);