target/i386/nvmm/nvmm-all.c

   1 /*
   2  * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
   3  *
   4  * NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU.
   5  *
   6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
   7  * See the COPYING file in the top-level directory.
   8  */
   9
  10 #include "qemu/osdep.h"
  11 #include "cpu.h"
  12 #include "exec/address-spaces.h"
  13 #include "exec/ioport.h"
  14 #include "qemu/accel.h"
  15 #include "sysemu/nvmm.h"
  16 #include "sysemu/cpus.h"
  17 #include "sysemu/runstate.h"
  18 #include "qemu/main-loop.h"
  19 #include "qemu/error-report.h"
  20 #include "qapi/error.h"
  21 #include "qemu/queue.h"
  22 #include "migration/blocker.h"
  23 #include "strings.h"
  24
  25 #include "nvmm-accel-ops.h"
  26
  27 #include <nvmm.h>
  28
  29 struct AccelCPUState {
  30     struct nvmm_vcpu vcpu;
  31     uint8_t tpr;
  32     bool stop;
  33     bool dirty;
  34
  35     /* Window-exiting for INTs/NMIs. */
  36     bool int_window_exit;
  37     bool nmi_window_exit;
  38
  39     /* The guest is in an interrupt shadow (POP SS, etc). */
  40     bool int_shadow;
  41 };
  42
  43 struct qemu_machine {
  44     struct nvmm_capability cap;
  45     struct nvmm_machine mach;
  46 };
  47
  48 /* -------------------------------------------------------------------------- */
  49
  50 static bool nvmm_allowed;
  51 static struct qemu_machine qemu_mach;
  52
  53 static struct nvmm_machine *
  54 get_nvmm_mach(void)
  55 {
  56     return &qemu_mach.mach;
  57 }
  58
  59 /* -------------------------------------------------------------------------- */
  60
  61 static void
  62 nvmm_set_segment(struct nvmm_x64_state_seg *nseg, const SegmentCache *qseg)
  63 {
  64     uint32_t attrib = qseg->flags;
  65
  66     nseg->selector = qseg->selector;
  67     nseg->limit = qseg->limit;
  68     nseg->base = qseg->base;
  69     nseg->attrib.type = __SHIFTOUT(attrib, DESC_TYPE_MASK);
  70     nseg->attrib.s = __SHIFTOUT(attrib, DESC_S_MASK);
  71     nseg->attrib.dpl = __SHIFTOUT(attrib, DESC_DPL_MASK);
  72     nseg->attrib.p = __SHIFTOUT(attrib, DESC_P_MASK);
  73     nseg->attrib.avl = __SHIFTOUT(attrib, DESC_AVL_MASK);
  74     nseg->attrib.l = __SHIFTOUT(attrib, DESC_L_MASK);
  75     nseg->attrib.def = __SHIFTOUT(attrib, DESC_B_MASK);
  76     nseg->attrib.g = __SHIFTOUT(attrib, DESC_G_MASK);
  77 }
  78
  79 static void
  80 nvmm_set_registers(CPUState *cpu)
  81 {
  82     CPUX86State *env = cpu_env(cpu);
  83     struct nvmm_machine *mach = get_nvmm_mach();
  84     AccelCPUState *qcpu = cpu->accel;
  85     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
  86     struct nvmm_x64_state *state = vcpu->state;
  87     uint64_t bitmap;
  88     size_t i;
  89     int ret;
  90
  91     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
  92
  93     /* GPRs. */
  94     state->gprs[NVMM_X64_GPR_RAX] = env->regs[R_EAX];
  95     state->gprs[NVMM_X64_GPR_RCX] = env->regs[R_ECX];
  96     state->gprs[NVMM_X64_GPR_RDX] = env->regs[R_EDX];
  97     state->gprs[NVMM_X64_GPR_RBX] = env->regs[R_EBX];
  98     state->gprs[NVMM_X64_GPR_RSP] = env->regs[R_ESP];
  99     state->gprs[NVMM_X64_GPR_RBP] = env->regs[R_EBP];
 100     state->gprs[NVMM_X64_GPR_RSI] = env->regs[R_ESI];
 101     state->gprs[NVMM_X64_GPR_RDI] = env->regs[R_EDI];
 102 #ifdef TARGET_X86_64
 103     state->gprs[NVMM_X64_GPR_R8]  = env->regs[R_R8];
 104     state->gprs[NVMM_X64_GPR_R9]  = env->regs[R_R9];
 105     state->gprs[NVMM_X64_GPR_R10] = env->regs[R_R10];
 106     state->gprs[NVMM_X64_GPR_R11] = env->regs[R_R11];
 107     state->gprs[NVMM_X64_GPR_R12] = env->regs[R_R12];
 108     state->gprs[NVMM_X64_GPR_R13] = env->regs[R_R13];
 109     state->gprs[NVMM_X64_GPR_R14] = env->regs[R_R14];
 110     state->gprs[NVMM_X64_GPR_R15] = env->regs[R_R15];
 111 #endif
 112
 113     /* RIP and RFLAGS. */
 114     state->gprs[NVMM_X64_GPR_RIP] = env->eip;
 115     state->gprs[NVMM_X64_GPR_RFLAGS] = env->eflags;
 116
 117     /* Segments. */
 118     nvmm_set_segment(&state->segs[NVMM_X64_SEG_CS], &env->segs[R_CS]);
 119     nvmm_set_segment(&state->segs[NVMM_X64_SEG_DS], &env->segs[R_DS]);
 120     nvmm_set_segment(&state->segs[NVMM_X64_SEG_ES], &env->segs[R_ES]);
 121     nvmm_set_segment(&state->segs[NVMM_X64_SEG_FS], &env->segs[R_FS]);
 122     nvmm_set_segment(&state->segs[NVMM_X64_SEG_GS], &env->segs[R_GS]);
 123     nvmm_set_segment(&state->segs[NVMM_X64_SEG_SS], &env->segs[R_SS]);
 124
 125     /* Special segments. */
 126     nvmm_set_segment(&state->segs[NVMM_X64_SEG_GDT], &env->gdt);
 127     nvmm_set_segment(&state->segs[NVMM_X64_SEG_LDT], &env->ldt);
 128     nvmm_set_segment(&state->segs[NVMM_X64_SEG_TR], &env->tr);
 129     nvmm_set_segment(&state->segs[NVMM_X64_SEG_IDT], &env->idt);
 130
 131     /* Control registers. */
 132     state->crs[NVMM_X64_CR_CR0] = env->cr[0];
 133     state->crs[NVMM_X64_CR_CR2] = env->cr[2];
 134     state->crs[NVMM_X64_CR_CR3] = env->cr[3];
 135     state->crs[NVMM_X64_CR_CR4] = env->cr[4];
 136     state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
 137     state->crs[NVMM_X64_CR_XCR0] = env->xcr0;
 138
 139     /* Debug registers. */
 140     state->drs[NVMM_X64_DR_DR0] = env->dr[0];
 141     state->drs[NVMM_X64_DR_DR1] = env->dr[1];
 142     state->drs[NVMM_X64_DR_DR2] = env->dr[2];
 143     state->drs[NVMM_X64_DR_DR3] = env->dr[3];
 144     state->drs[NVMM_X64_DR_DR6] = env->dr[6];
 145     state->drs[NVMM_X64_DR_DR7] = env->dr[7];
 146
 147     /* FPU. */
 148     state->fpu.fx_cw = env->fpuc;
 149     state->fpu.fx_sw = (env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11);
 150     state->fpu.fx_tw = 0;
 151     for (i = 0; i < 8; i++) {
 152         state->fpu.fx_tw |= (!env->fptags[i]) << i;
 153     }
 154     state->fpu.fx_opcode = env->fpop;
 155     state->fpu.fx_ip.fa_64 = env->fpip;
 156     state->fpu.fx_dp.fa_64 = env->fpdp;
 157     state->fpu.fx_mxcsr = env->mxcsr;
 158     state->fpu.fx_mxcsr_mask = 0x0000FFFF;
 159     assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
 160     memcpy(state->fpu.fx_87_ac, env->fpregs, sizeof(env->fpregs));
 161     for (i = 0; i < CPU_NB_REGS; i++) {
 162         memcpy(&state->fpu.fx_xmm[i].xmm_bytes[0],
 163             &env->xmm_regs[i].ZMM_Q(0), 8);
 164         memcpy(&state->fpu.fx_xmm[i].xmm_bytes[8],
 165             &env->xmm_regs[i].ZMM_Q(1), 8);
 166     }
 167
 168     /* MSRs. */
 169     state->msrs[NVMM_X64_MSR_EFER] = env->efer;
 170     state->msrs[NVMM_X64_MSR_STAR] = env->star;
 171 #ifdef TARGET_X86_64
 172     state->msrs[NVMM_X64_MSR_LSTAR] = env->lstar;
 173     state->msrs[NVMM_X64_MSR_CSTAR] = env->cstar;
 174     state->msrs[NVMM_X64_MSR_SFMASK] = env->fmask;
 175     state->msrs[NVMM_X64_MSR_KERNELGSBASE] = env->kernelgsbase;
 176 #endif
 177     state->msrs[NVMM_X64_MSR_SYSENTER_CS]  = env->sysenter_cs;
 178     state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = env->sysenter_esp;
 179     state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = env->sysenter_eip;
 180     state->msrs[NVMM_X64_MSR_PAT] = env->pat;
 181     state->msrs[NVMM_X64_MSR_TSC] = env->tsc;
 182
 183     bitmap =
 184         NVMM_X64_STATE_SEGS |
 185         NVMM_X64_STATE_GPRS |
 186         NVMM_X64_STATE_CRS  |
 187         NVMM_X64_STATE_DRS  |
 188         NVMM_X64_STATE_MSRS |
 189         NVMM_X64_STATE_FPU;
 190
 191     ret = nvmm_vcpu_setstate(mach, vcpu, bitmap);
 192     if (ret == -1) {
 193         error_report("NVMM: Failed to set virtual processor context,"
 194             " error=%d", errno);
 195     }
 196 }
 197
 198 static void
 199 nvmm_get_segment(SegmentCache *qseg, const struct nvmm_x64_state_seg *nseg)
 200 {
 201     qseg->selector = nseg->selector;
 202     qseg->limit = nseg->limit;
 203     qseg->base = nseg->base;
 204
 205     qseg->flags =
 206         __SHIFTIN((uint32_t)nseg->attrib.type, DESC_TYPE_MASK) |
 207         __SHIFTIN((uint32_t)nseg->attrib.s, DESC_S_MASK) |
 208         __SHIFTIN((uint32_t)nseg->attrib.dpl, DESC_DPL_MASK) |
 209         __SHIFTIN((uint32_t)nseg->attrib.p, DESC_P_MASK) |
 210         __SHIFTIN((uint32_t)nseg->attrib.avl, DESC_AVL_MASK) |
 211         __SHIFTIN((uint32_t)nseg->attrib.l, DESC_L_MASK) |
 212         __SHIFTIN((uint32_t)nseg->attrib.def, DESC_B_MASK) |
 213         __SHIFTIN((uint32_t)nseg->attrib.g, DESC_G_MASK);
 214 }
 215
 216 static void
 217 nvmm_get_registers(CPUState *cpu)
 218 {
 219     CPUX86State *env = cpu_env(cpu);
 220     struct nvmm_machine *mach = get_nvmm_mach();
 221     AccelCPUState *qcpu = cpu->accel;
 222     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
 223     X86CPU *x86_cpu = X86_CPU(cpu);
 224     struct nvmm_x64_state *state = vcpu->state;
 225     uint64_t bitmap, tpr;
 226     size_t i;
 227     int ret;
 228
 229     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
 230
 231     bitmap =
 232         NVMM_X64_STATE_SEGS |
 233         NVMM_X64_STATE_GPRS |
 234         NVMM_X64_STATE_CRS  |
 235         NVMM_X64_STATE_DRS  |
 236         NVMM_X64_STATE_MSRS |
 237         NVMM_X64_STATE_FPU;
 238
 239     ret = nvmm_vcpu_getstate(mach, vcpu, bitmap);
 240     if (ret == -1) {
 241         error_report("NVMM: Failed to get virtual processor context,"
 242             " error=%d", errno);
 243     }
 244
 245     /* GPRs. */
 246     env->regs[R_EAX] = state->gprs[NVMM_X64_GPR_RAX];
 247     env->regs[R_ECX] = state->gprs[NVMM_X64_GPR_RCX];
 248     env->regs[R_EDX] = state->gprs[NVMM_X64_GPR_RDX];
 249     env->regs[R_EBX] = state->gprs[NVMM_X64_GPR_RBX];
 250     env->regs[R_ESP] = state->gprs[NVMM_X64_GPR_RSP];
 251     env->regs[R_EBP] = state->gprs[NVMM_X64_GPR_RBP];
 252     env->regs[R_ESI] = state->gprs[NVMM_X64_GPR_RSI];
 253     env->regs[R_EDI] = state->gprs[NVMM_X64_GPR_RDI];
 254 #ifdef TARGET_X86_64
 255     env->regs[R_R8]  = state->gprs[NVMM_X64_GPR_R8];
 256     env->regs[R_R9]  = state->gprs[NVMM_X64_GPR_R9];
 257     env->regs[R_R10] = state->gprs[NVMM_X64_GPR_R10];
 258     env->regs[R_R11] = state->gprs[NVMM_X64_GPR_R11];
 259     env->regs[R_R12] = state->gprs[NVMM_X64_GPR_R12];
 260     env->regs[R_R13] = state->gprs[NVMM_X64_GPR_R13];
 261     env->regs[R_R14] = state->gprs[NVMM_X64_GPR_R14];
 262     env->regs[R_R15] = state->gprs[NVMM_X64_GPR_R15];
 263 #endif
 264
 265     /* RIP and RFLAGS. */
 266     env->eip = state->gprs[NVMM_X64_GPR_RIP];
 267     env->eflags = state->gprs[NVMM_X64_GPR_RFLAGS];
 268
 269     /* Segments. */
 270     nvmm_get_segment(&env->segs[R_ES], &state->segs[NVMM_X64_SEG_ES]);
 271     nvmm_get_segment(&env->segs[R_CS], &state->segs[NVMM_X64_SEG_CS]);
 272     nvmm_get_segment(&env->segs[R_SS], &state->segs[NVMM_X64_SEG_SS]);
 273     nvmm_get_segment(&env->segs[R_DS], &state->segs[NVMM_X64_SEG_DS]);
 274     nvmm_get_segment(&env->segs[R_FS], &state->segs[NVMM_X64_SEG_FS]);
 275     nvmm_get_segment(&env->segs[R_GS], &state->segs[NVMM_X64_SEG_GS]);
 276
 277     /* Special segments. */
 278     nvmm_get_segment(&env->gdt, &state->segs[NVMM_X64_SEG_GDT]);
 279     nvmm_get_segment(&env->ldt, &state->segs[NVMM_X64_SEG_LDT]);
 280     nvmm_get_segment(&env->tr, &state->segs[NVMM_X64_SEG_TR]);
 281     nvmm_get_segment(&env->idt, &state->segs[NVMM_X64_SEG_IDT]);
 282
 283     /* Control registers. */
 284     env->cr[0] = state->crs[NVMM_X64_CR_CR0];
 285     env->cr[2] = state->crs[NVMM_X64_CR_CR2];
 286     env->cr[3] = state->crs[NVMM_X64_CR_CR3];
 287     env->cr[4] = state->crs[NVMM_X64_CR_CR4];
 288     tpr = state->crs[NVMM_X64_CR_CR8];
 289     if (tpr != qcpu->tpr) {
 290         qcpu->tpr = tpr;
 291         cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
 292     }
 293     env->xcr0 = state->crs[NVMM_X64_CR_XCR0];
 294
 295     /* Debug registers. */
 296     env->dr[0] = state->drs[NVMM_X64_DR_DR0];
 297     env->dr[1] = state->drs[NVMM_X64_DR_DR1];
 298     env->dr[2] = state->drs[NVMM_X64_DR_DR2];
 299     env->dr[3] = state->drs[NVMM_X64_DR_DR3];
 300     env->dr[6] = state->drs[NVMM_X64_DR_DR6];
 301     env->dr[7] = state->drs[NVMM_X64_DR_DR7];
 302
 303     /* FPU. */
 304     env->fpuc = state->fpu.fx_cw;
 305     env->fpstt = (state->fpu.fx_sw >> 11) & 0x7;
 306     env->fpus = state->fpu.fx_sw & ~0x3800;
 307     for (i = 0; i < 8; i++) {
 308         env->fptags[i] = !((state->fpu.fx_tw >> i) & 1);
 309     }
 310     env->fpop = state->fpu.fx_opcode;
 311     env->fpip = state->fpu.fx_ip.fa_64;
 312     env->fpdp = state->fpu.fx_dp.fa_64;
 313     env->mxcsr = state->fpu.fx_mxcsr;
 314     assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
 315     memcpy(env->fpregs, state->fpu.fx_87_ac, sizeof(env->fpregs));
 316     for (i = 0; i < CPU_NB_REGS; i++) {
 317         memcpy(&env->xmm_regs[i].ZMM_Q(0),
 318             &state->fpu.fx_xmm[i].xmm_bytes[0], 8);
 319         memcpy(&env->xmm_regs[i].ZMM_Q(1),
 320             &state->fpu.fx_xmm[i].xmm_bytes[8], 8);
 321     }
 322
 323     /* MSRs. */
 324     env->efer = state->msrs[NVMM_X64_MSR_EFER];
 325     env->star = state->msrs[NVMM_X64_MSR_STAR];
 326 #ifdef TARGET_X86_64
 327     env->lstar = state->msrs[NVMM_X64_MSR_LSTAR];
 328     env->cstar = state->msrs[NVMM_X64_MSR_CSTAR];
 329     env->fmask = state->msrs[NVMM_X64_MSR_SFMASK];
 330     env->kernelgsbase = state->msrs[NVMM_X64_MSR_KERNELGSBASE];
 331 #endif
 332     env->sysenter_cs  = state->msrs[NVMM_X64_MSR_SYSENTER_CS];
 333     env->sysenter_esp = state->msrs[NVMM_X64_MSR_SYSENTER_ESP];
 334     env->sysenter_eip = state->msrs[NVMM_X64_MSR_SYSENTER_EIP];
 335     env->pat = state->msrs[NVMM_X64_MSR_PAT];
 336     env->tsc = state->msrs[NVMM_X64_MSR_TSC];
 337
 338     x86_update_hflags(env);
 339 }
 340
 341 static bool
 342 nvmm_can_take_int(CPUState *cpu)
 343 {
 344     AccelCPUState *qcpu = cpu->accel;
 345     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
 346     struct nvmm_machine *mach = get_nvmm_mach();
 347
 348     if (qcpu->int_window_exit) {
 349         return false;
 350     }
 351
 352     if (qcpu->int_shadow || !(cpu_env(cpu)->eflags & IF_MASK)) {
 353         struct nvmm_x64_state *state = vcpu->state;
 354
 355         /* Exit on interrupt window. */
 356         nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_INTR);
 357         state->intr.int_window_exiting = 1;
 358         nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_INTR);
 359
 360         return false;
 361     }
 362
 363     return true;
 364 }
 365
 366 static bool
 367 nvmm_can_take_nmi(CPUState *cpu)
 368 {
 369     AccelCPUState *qcpu = cpu->accel;
 370
 371     /*
 372      * Contrary to INTs, NMIs always schedule an exit when they are
 373      * completed. Therefore, if window-exiting is enabled, it means
 374      * NMIs are blocked.
 375      */
 376     if (qcpu->nmi_window_exit) {
 377         return false;
 378     }
 379
 380     return true;
 381 }
 382
 383 /*
 384  * Called before the VCPU is run. We inject events generated by the I/O
 385  * thread, and synchronize the guest TPR.
 386  */
 387 static void
 388 nvmm_vcpu_pre_run(CPUState *cpu)
 389 {
 390     CPUX86State *env = cpu_env(cpu);
 391     struct nvmm_machine *mach = get_nvmm_mach();
 392     AccelCPUState *qcpu = cpu->accel;
 393     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
 394     X86CPU *x86_cpu = X86_CPU(cpu);
 395     struct nvmm_x64_state *state = vcpu->state;
 396     struct nvmm_vcpu_event *event = vcpu->event;
 397     bool has_event = false;
 398     bool sync_tpr = false;
 399     uint8_t tpr;
 400     int ret;
 401
 402     bql_lock();
 403
 404     tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
 405     if (tpr != qcpu->tpr) {
 406         qcpu->tpr = tpr;
 407         sync_tpr = true;
 408     }
 409
 410     /*
 411      * Force the VCPU out of its inner loop to process any INIT requests
 412      * or commit pending TPR access.
 413      */
 414     if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
 415         cpu->exit_request = 1;
 416     }
 417
 418     if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
 419         if (nvmm_can_take_nmi(cpu)) {
 420             cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
 421             event->type = NVMM_VCPU_EVENT_INTR;
 422             event->vector = 2;
 423             has_event = true;
 424         }
 425     }
 426
 427     if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 428         if (nvmm_can_take_int(cpu)) {
 429             cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
 430             event->type = NVMM_VCPU_EVENT_INTR;
 431             event->vector = cpu_get_pic_interrupt(env);
 432             has_event = true;
 433         }
 434     }
 435
 436     /* Don't want SMIs. */
 437     if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
 438         cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
 439     }
 440
 441     if (sync_tpr) {
 442         ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_CRS);
 443         if (ret == -1) {
 444             error_report("NVMM: Failed to get CPU state,"
 445                 " error=%d", errno);
 446         }
 447
 448         state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
 449
 450         ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_CRS);
 451         if (ret == -1) {
 452             error_report("NVMM: Failed to set CPU state,"
 453                 " error=%d", errno);
 454         }
 455     }
 456
 457     if (has_event) {
 458         ret = nvmm_vcpu_inject(mach, vcpu);
 459         if (ret == -1) {
 460             error_report("NVMM: Failed to inject event,"
 461                 " error=%d", errno);
 462         }
 463     }
 464
 465     bql_unlock();
 466 }
 467
 468 /*
 469  * Called after the VCPU ran. We synchronize the host view of the TPR and
 470  * RFLAGS.
 471  */
 472 static void
 473 nvmm_vcpu_post_run(CPUState *cpu, struct nvmm_vcpu_exit *exit)
 474 {
 475     AccelCPUState *qcpu = cpu->accel;
 476     X86CPU *x86_cpu = X86_CPU(cpu);
 477     CPUX86State *env = &x86_cpu->env;
 478     uint64_t tpr;
 479
 480     env->eflags = exit->exitstate.rflags;
 481     qcpu->int_shadow = exit->exitstate.int_shadow;
 482     qcpu->int_window_exit = exit->exitstate.int_window_exiting;
 483     qcpu->nmi_window_exit = exit->exitstate.nmi_window_exiting;
 484
 485     tpr = exit->exitstate.cr8;
 486     if (qcpu->tpr != tpr) {
 487         qcpu->tpr = tpr;
 488         bql_lock();
 489         cpu_set_apic_tpr(x86_cpu->apic_state, qcpu->tpr);
 490         bql_unlock();
 491     }
 492 }
 493
 494 /* -------------------------------------------------------------------------- */
 495
 496 static void
 497 nvmm_io_callback(struct nvmm_io *io)
 498 {
 499     MemTxAttrs attrs = { 0 };
 500     int ret;
 501
 502     ret = address_space_rw(&address_space_io, io->port, attrs, io->data,
 503         io->size, !io->in);
 504     if (ret != MEMTX_OK) {
 505         error_report("NVMM: I/O Transaction Failed "
 506             "[%s, port=%u, size=%zu]", (io->in ? "in" : "out"),
 507             io->port, io->size);
 508     }
 509
 510     /* Needed, otherwise infinite loop. */
 511     current_cpu->accel->dirty = false;
 512 }
 513
 514 static void
 515 nvmm_mem_callback(struct nvmm_mem *mem)
 516 {
 517     cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write);
 518
 519     /* Needed, otherwise infinite loop. */
 520     current_cpu->accel->dirty = false;
 521 }
 522
 523 static struct nvmm_assist_callbacks nvmm_callbacks = {
 524     .io = nvmm_io_callback,
 525     .mem = nvmm_mem_callback
 526 };
 527
 528 /* -------------------------------------------------------------------------- */
 529
 530 static int
 531 nvmm_handle_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
 532 {
 533     int ret;
 534
 535     ret = nvmm_assist_mem(mach, vcpu);
 536     if (ret == -1) {
 537         error_report("NVMM: Mem Assist Failed [gpa=%p]",
 538             (void *)vcpu->exit->u.mem.gpa);
 539     }
 540
 541     return ret;
 542 }
 543
 544 static int
 545 nvmm_handle_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
 546 {
 547     int ret;
 548
 549     ret = nvmm_assist_io(mach, vcpu);
 550     if (ret == -1) {
 551         error_report("NVMM: I/O Assist Failed [port=%d]",
 552             (int)vcpu->exit->u.io.port);
 553     }
 554
 555     return ret;
 556 }
 557
 558 static int
 559 nvmm_handle_rdmsr(struct nvmm_machine *mach, CPUState *cpu,
 560     struct nvmm_vcpu_exit *exit)
 561 {
 562     AccelCPUState *qcpu = cpu->accel;
 563     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
 564     X86CPU *x86_cpu = X86_CPU(cpu);
 565     struct nvmm_x64_state *state = vcpu->state;
 566     uint64_t val;
 567     int ret;
 568
 569     switch (exit->u.rdmsr.msr) {
 570     case MSR_IA32_APICBASE:
 571         val = cpu_get_apic_base(x86_cpu->apic_state);
 572         break;
 573     case MSR_MTRRcap:
 574     case MSR_MTRRdefType:
 575     case MSR_MCG_CAP:
 576     case MSR_MCG_STATUS:
 577         val = 0;
 578         break;
 579     default: /* More MSRs to add? */
 580         val = 0;
 581         error_report("NVMM: Unexpected RDMSR 0x%x, ignored",
 582             exit->u.rdmsr.msr);
 583         break;
 584     }
 585
 586     ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
 587     if (ret == -1) {
 588         return -1;
 589     }
 590
 591     state->gprs[NVMM_X64_GPR_RAX] = (val & 0xFFFFFFFF);
 592     state->gprs[NVMM_X64_GPR_RDX] = (val >> 32);
 593     state->gprs[NVMM_X64_GPR_RIP] = exit->u.rdmsr.npc;
 594
 595     ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
 596     if (ret == -1) {
 597         return -1;
 598     }
 599
 600     return 0;
 601 }
 602
 603 static int
 604 nvmm_handle_wrmsr(struct nvmm_machine *mach, CPUState *cpu,
 605     struct nvmm_vcpu_exit *exit)
 606 {
 607     AccelCPUState *qcpu = cpu->accel;
 608     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
 609     X86CPU *x86_cpu = X86_CPU(cpu);
 610     struct nvmm_x64_state *state = vcpu->state;
 611     uint64_t val;
 612     int ret;
 613
 614     val = exit->u.wrmsr.val;
 615
 616     switch (exit->u.wrmsr.msr) {
 617     case MSR_IA32_APICBASE:
 618         cpu_set_apic_base(x86_cpu->apic_state, val);
 619         break;
 620     case MSR_MTRRdefType:
 621     case MSR_MCG_STATUS:
 622         break;
 623     default: /* More MSRs to add? */
 624         error_report("NVMM: Unexpected WRMSR 0x%x [val=0x%lx], ignored",
 625             exit->u.wrmsr.msr, val);
 626         break;
 627     }
 628
 629     ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
 630     if (ret == -1) {
 631         return -1;
 632     }
 633
 634     state->gprs[NVMM_X64_GPR_RIP] = exit->u.wrmsr.npc;
 635
 636     ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
 637     if (ret == -1) {
 638         return -1;
 639     }
 640
 641     return 0;
 642 }
 643
 644 static int
 645 nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu,
 646     struct nvmm_vcpu_exit *exit)
 647 {
 648     int ret = 0;
 649
 650     bql_lock();
 651
 652     if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 653           (cpu_env(cpu)->eflags & IF_MASK)) &&
 654         !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
 655         cpu->exception_index = EXCP_HLT;
 656         cpu->halted = true;
 657         ret = 1;
 658     }
 659
 660     bql_unlock();
 661
 662     return ret;
 663 }
 664
 665 static int
 666 nvmm_inject_ud(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
 667 {
 668     struct nvmm_vcpu_event *event = vcpu->event;
 669
 670     event->type = NVMM_VCPU_EVENT_EXCP;
 671     event->vector = 6;
 672     event->u.excp.error = 0;
 673
 674     return nvmm_vcpu_inject(mach, vcpu);
 675 }
 676
 677 static int
 678 nvmm_vcpu_loop(CPUState *cpu)
 679 {
 680     struct nvmm_machine *mach = get_nvmm_mach();
 681     AccelCPUState *qcpu = cpu->accel;
 682     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
 683     X86CPU *x86_cpu = X86_CPU(cpu);
 684     CPUX86State *env = &x86_cpu->env;
 685     struct nvmm_vcpu_exit *exit = vcpu->exit;
 686     int ret;
 687
 688     /*
 689      * Some asynchronous events must be handled outside of the inner
 690      * VCPU loop. They are handled here.
 691      */
 692     if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
 693         nvmm_cpu_synchronize_state(cpu);
 694         do_cpu_init(x86_cpu);
 695         /* set int/nmi windows back to the reset state */
 696     }
 697     if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
 698         cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
 699         apic_poll_irq(x86_cpu->apic_state);
 700     }
 701     if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 702          (env->eflags & IF_MASK)) ||
 703         (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
 704         cpu->halted = false;
 705     }
 706     if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
 707         nvmm_cpu_synchronize_state(cpu);
 708         do_cpu_sipi(x86_cpu);
 709     }
 710     if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
 711         cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
 712         nvmm_cpu_synchronize_state(cpu);
 713         apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
 714             env->tpr_access_type);
 715     }
 716
 717     if (cpu->halted) {
 718         cpu->exception_index = EXCP_HLT;
 719         qatomic_set(&cpu->exit_request, false);
 720         return 0;
 721     }
 722
 723     bql_unlock();
 724     cpu_exec_start(cpu);
 725
 726     /*
 727      * Inner VCPU loop.
 728      */
 729     do {
 730         if (cpu->accel->dirty) {
 731             nvmm_set_registers(cpu);
 732             cpu->accel->dirty = false;
 733         }
 734
 735         if (qcpu->stop) {
 736             cpu->exception_index = EXCP_INTERRUPT;
 737             qcpu->stop = false;
 738             ret = 1;
 739             break;
 740         }
 741
 742         nvmm_vcpu_pre_run(cpu);
 743
 744         if (qatomic_read(&cpu->exit_request)) {
 745 #if NVMM_USER_VERSION >= 2
 746             nvmm_vcpu_stop(vcpu);
 747 #else
 748             qemu_cpu_kick_self();
 749 #endif
 750         }
 751
 752         /* Read exit_request before the kernel reads the immediate exit flag */
 753         smp_rmb();
 754         ret = nvmm_vcpu_run(mach, vcpu);
 755         if (ret == -1) {
 756             error_report("NVMM: Failed to exec a virtual processor,"
 757                 " error=%d", errno);
 758             break;
 759         }
 760
 761         nvmm_vcpu_post_run(cpu, exit);
 762
 763         switch (exit->reason) {
 764         case NVMM_VCPU_EXIT_NONE:
 765             break;
 766 #if NVMM_USER_VERSION >= 2
 767         case NVMM_VCPU_EXIT_STOPPED:
 768             /*
 769              * The kernel cleared the immediate exit flag; cpu->exit_request
 770              * must be cleared after
 771              */
 772             smp_wmb();
 773             qcpu->stop = true;
 774             break;
 775 #endif
 776         case NVMM_VCPU_EXIT_MEMORY:
 777             ret = nvmm_handle_mem(mach, vcpu);
 778             break;
 779         case NVMM_VCPU_EXIT_IO:
 780             ret = nvmm_handle_io(mach, vcpu);
 781             break;
 782         case NVMM_VCPU_EXIT_INT_READY:
 783         case NVMM_VCPU_EXIT_NMI_READY:
 784         case NVMM_VCPU_EXIT_TPR_CHANGED:
 785             break;
 786         case NVMM_VCPU_EXIT_HALTED:
 787             ret = nvmm_handle_halted(mach, cpu, exit);
 788             break;
 789         case NVMM_VCPU_EXIT_SHUTDOWN:
 790             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 791             cpu->exception_index = EXCP_INTERRUPT;
 792             ret = 1;
 793             break;
 794         case NVMM_VCPU_EXIT_RDMSR:
 795             ret = nvmm_handle_rdmsr(mach, cpu, exit);
 796             break;
 797         case NVMM_VCPU_EXIT_WRMSR:
 798             ret = nvmm_handle_wrmsr(mach, cpu, exit);
 799             break;
 800         case NVMM_VCPU_EXIT_MONITOR:
 801         case NVMM_VCPU_EXIT_MWAIT:
 802             ret = nvmm_inject_ud(mach, vcpu);
 803             break;
 804         default:
 805             error_report("NVMM: Unexpected VM exit code 0x%lx [hw=0x%lx]",
 806                 exit->reason, exit->u.inv.hwcode);
 807             nvmm_get_registers(cpu);
 808             bql_lock();
 809             qemu_system_guest_panicked(cpu_get_crash_info(cpu));
 810             bql_unlock();
 811             ret = -1;
 812             break;
 813         }
 814     } while (ret == 0);
 815
 816     cpu_exec_end(cpu);
 817     bql_lock();
 818
 819     qatomic_set(&cpu->exit_request, false);
 820
 821     return ret < 0;
 822 }
 823
 824 /* -------------------------------------------------------------------------- */
 825
 826 static void
 827 do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 828 {
 829     nvmm_get_registers(cpu);
 830     cpu->accel->dirty = true;
 831 }
 832
 833 static void
 834 do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
 835 {
 836     nvmm_set_registers(cpu);
 837     cpu->accel->dirty = false;
 838 }
 839
 840 static void
 841 do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 842 {
 843     nvmm_set_registers(cpu);
 844     cpu->accel->dirty = false;
 845 }
 846
 847 static void
 848 do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
 849 {
 850     cpu->accel->dirty = true;
 851 }
 852
 853 void nvmm_cpu_synchronize_state(CPUState *cpu)
 854 {
 855     if (!cpu->accel->dirty) {
 856         run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL);
 857     }
 858 }
 859
 860 void nvmm_cpu_synchronize_post_reset(CPUState *cpu)
 861 {
 862     run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 863 }
 864
 865 void nvmm_cpu_synchronize_post_init(CPUState *cpu)
 866 {
 867     run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 868 }
 869
 870 void nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu)
 871 {
 872     run_on_cpu(cpu, do_nvmm_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
 873 }
 874
 875 /* -------------------------------------------------------------------------- */
 876
 877 static Error *nvmm_migration_blocker;
 878
 879 /*
 880  * The nvmm_vcpu_stop() mechanism breaks races between entering the VMM
 881  * and another thread signaling the vCPU thread to exit.
 882  */
 883
 884 static void
 885 nvmm_ipi_signal(int sigcpu)
 886 {
 887     if (current_cpu) {
 888         AccelCPUState *qcpu = current_cpu->accel;
 889 #if NVMM_USER_VERSION >= 2
 890         struct nvmm_vcpu *vcpu = &qcpu->vcpu;
 891         nvmm_vcpu_stop(vcpu);
 892 #else
 893         qcpu->stop = true;
 894 #endif
 895     }
 896 }
 897
 898 static void
 899 nvmm_init_cpu_signals(void)
 900 {
 901     struct sigaction sigact;
 902     sigset_t set;
 903
 904     /* Install the IPI handler. */
 905     memset(&sigact, 0, sizeof(sigact));
 906     sigact.sa_handler = nvmm_ipi_signal;
 907     sigaction(SIG_IPI, &sigact, NULL);
 908
 909     /* Allow IPIs on the current thread. */
 910     sigprocmask(SIG_BLOCK, NULL, &set);
 911     sigdelset(&set, SIG_IPI);
 912     pthread_sigmask(SIG_SETMASK, &set, NULL);
 913 }
 914
 915 int
 916 nvmm_init_vcpu(CPUState *cpu)
 917 {
 918     struct nvmm_machine *mach = get_nvmm_mach();
 919     struct nvmm_vcpu_conf_cpuid cpuid;
 920     struct nvmm_vcpu_conf_tpr tpr;
 921     Error *local_error = NULL;
 922     AccelCPUState *qcpu;
 923     int ret, err;
 924
 925     nvmm_init_cpu_signals();
 926
 927     if (nvmm_migration_blocker == NULL) {
 928         error_setg(&nvmm_migration_blocker,
 929             "NVMM: Migration not supported");
 930
 931         if (migrate_add_blocker(&nvmm_migration_blocker, &local_error) < 0) {
 932             error_report_err(local_error);
 933             return -EINVAL;
 934         }
 935     }
 936
 937     qcpu = g_new0(AccelCPUState, 1);
 938
 939     ret = nvmm_vcpu_create(mach, cpu->cpu_index, &qcpu->vcpu);
 940     if (ret == -1) {
 941         err = errno;
 942         error_report("NVMM: Failed to create a virtual processor,"
 943             " error=%d", err);
 944         g_free(qcpu);
 945         return -err;
 946     }
 947
 948     memset(&cpuid, 0, sizeof(cpuid));
 949     cpuid.mask = 1;
 950     cpuid.leaf = 0x00000001;
 951     cpuid.u.mask.set.edx = CPUID_MCE | CPUID_MCA | CPUID_MTRR;
 952     ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CPUID,
 953         &cpuid);
 954     if (ret == -1) {
 955         err = errno;
 956         error_report("NVMM: Failed to configure a virtual processor,"
 957             " error=%d", err);
 958         g_free(qcpu);
 959         return -err;
 960     }
 961
 962     ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CALLBACKS,
 963         &nvmm_callbacks);
 964     if (ret == -1) {
 965         err = errno;
 966         error_report("NVMM: Failed to configure a virtual processor,"
 967             " error=%d", err);
 968         g_free(qcpu);
 969         return -err;
 970     }
 971
 972     if (qemu_mach.cap.arch.vcpu_conf_support & NVMM_CAP_ARCH_VCPU_CONF_TPR) {
 973         memset(&tpr, 0, sizeof(tpr));
 974         tpr.exit_changed = 1;
 975         ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_TPR, &tpr);
 976         if (ret == -1) {
 977             err = errno;
 978             error_report("NVMM: Failed to configure a virtual processor,"
 979                 " error=%d", err);
 980             g_free(qcpu);
 981             return -err;
 982         }
 983     }
 984
 985     qcpu->dirty = true;
 986     cpu->accel = qcpu;
 987
 988     return 0;
 989 }
 990
 991 int
 992 nvmm_vcpu_exec(CPUState *cpu)
 993 {
 994     int ret, fatal;
 995
 996     while (1) {
 997         if (cpu->exception_index >= EXCP_INTERRUPT) {
 998             ret = cpu->exception_index;
 999             cpu->exception_index = -1;
1000             break;
1001         }
1002
1003         fatal = nvmm_vcpu_loop(cpu);
1004
1005         if (fatal) {
1006             error_report("NVMM: Failed to execute a VCPU.");
1007             abort();
1008         }
1009     }
1010
1011     return ret;
1012 }
1013
1014 void
1015 nvmm_destroy_vcpu(CPUState *cpu)
1016 {
1017     struct nvmm_machine *mach = get_nvmm_mach();
1018     AccelCPUState *qcpu = cpu->accel;
1019
1020     nvmm_vcpu_destroy(mach, &qcpu->vcpu);
1021     g_free(cpu->accel);
1022 }
1023
1024 /* -------------------------------------------------------------------------- */
1025
1026 static void
1027 nvmm_update_mapping(hwaddr start_pa, ram_addr_t size, uintptr_t hva,
1028     bool add, bool rom, const char *name)
1029 {
1030     struct nvmm_machine *mach = get_nvmm_mach();
1031     int ret, prot;
1032
1033     if (add) {
1034         prot = PROT_READ | PROT_EXEC;
1035         if (!rom) {
1036             prot |= PROT_WRITE;
1037         }
1038         ret = nvmm_gpa_map(mach, hva, start_pa, size, prot);
1039     } else {
1040         ret = nvmm_gpa_unmap(mach, hva, start_pa, size);
1041     }
1042
1043     if (ret == -1) {
1044         error_report("NVMM: Failed to %s GPA range '%s' PA:%p, "
1045             "Size:%p bytes, HostVA:%p, error=%d",
1046             (add ? "map" : "unmap"), name, (void *)(uintptr_t)start_pa,
1047             (void *)size, (void *)hva, errno);
1048     }
1049 }
1050
1051 static void
1052 nvmm_process_section(MemoryRegionSection *section, int add)
1053 {
1054     MemoryRegion *mr = section->mr;
1055     hwaddr start_pa = section->offset_within_address_space;
1056     ram_addr_t size = int128_get64(section->size);
1057     unsigned int delta;
1058     uintptr_t hva;
1059
1060     if (!memory_region_is_ram(mr)) {
1061         return;
1062     }
1063
1064     /* Adjust start_pa and size so that they are page-aligned. */
1065     delta = qemu_real_host_page_size() - (start_pa & ~qemu_real_host_page_mask());
1066     delta &= ~qemu_real_host_page_mask();
1067     if (delta > size) {
1068         return;
1069     }
1070     start_pa += delta;
1071     size -= delta;
1072     size &= qemu_real_host_page_mask();
1073     if (!size || (start_pa & ~qemu_real_host_page_mask())) {
1074         return;
1075     }
1076
1077     hva = (uintptr_t)memory_region_get_ram_ptr(mr) +
1078         section->offset_within_region + delta;
1079
1080     nvmm_update_mapping(start_pa, size, hva, add,
1081         memory_region_is_rom(mr), mr->name);
1082 }
1083
1084 static void
1085 nvmm_region_add(MemoryListener *listener, MemoryRegionSection *section)
1086 {
1087     memory_region_ref(section->mr);
1088     nvmm_process_section(section, 1);
1089 }
1090
1091 static void
1092 nvmm_region_del(MemoryListener *listener, MemoryRegionSection *section)
1093 {
1094     nvmm_process_section(section, 0);
1095     memory_region_unref(section->mr);
1096 }
1097
1098 static void
1099 nvmm_transaction_begin(MemoryListener *listener)
1100 {
1101     /* nothing */
1102 }
1103
1104 static void
1105 nvmm_transaction_commit(MemoryListener *listener)
1106 {
1107     /* nothing */
1108 }
1109
1110 static void
1111 nvmm_log_sync(MemoryListener *listener, MemoryRegionSection *section)
1112 {
1113     MemoryRegion *mr = section->mr;
1114
1115     if (!memory_region_is_ram(mr)) {
1116         return;
1117     }
1118
1119     memory_region_set_dirty(mr, 0, int128_get64(section->size));
1120 }
1121
1122 static MemoryListener nvmm_memory_listener = {
1123     .name = "nvmm",
1124     .begin = nvmm_transaction_begin,
1125     .commit = nvmm_transaction_commit,
1126     .region_add = nvmm_region_add,
1127     .region_del = nvmm_region_del,
1128     .log_sync = nvmm_log_sync,
1129     .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
1130 };
1131
1132 static void
1133 nvmm_ram_block_added(RAMBlockNotifier *n, void *host, size_t size,
1134                      size_t max_size)
1135 {
1136     struct nvmm_machine *mach = get_nvmm_mach();
1137     uintptr_t hva = (uintptr_t)host;
1138     int ret;
1139
1140     ret = nvmm_hva_map(mach, hva, max_size);
1141
1142     if (ret == -1) {
1143         error_report("NVMM: Failed to map HVA, HostVA:%p "
1144             "Size:%p bytes, error=%d",
1145             (void *)hva, (void *)size, errno);
1146     }
1147 }
1148
1149 static struct RAMBlockNotifier nvmm_ram_notifier = {
1150     .ram_block_added = nvmm_ram_block_added
1151 };
1152
1153 /* -------------------------------------------------------------------------- */
1154
1155 static int
1156 nvmm_accel_init(MachineState *ms)
1157 {
1158     int ret, err;
1159
1160     ret = nvmm_init();
1161     if (ret == -1) {
1162         err = errno;
1163         error_report("NVMM: Initialization failed, error=%d", errno);
1164         return -err;
1165     }
1166
1167     ret = nvmm_capability(&qemu_mach.cap);
1168     if (ret == -1) {
1169         err = errno;
1170         error_report("NVMM: Unable to fetch capability, error=%d", errno);
1171         return -err;
1172     }
1173     if (qemu_mach.cap.version < NVMM_KERN_VERSION) {
1174         error_report("NVMM: Unsupported version %u", qemu_mach.cap.version);
1175         return -EPROGMISMATCH;
1176     }
1177     if (qemu_mach.cap.state_size != sizeof(struct nvmm_x64_state)) {
1178         error_report("NVMM: Wrong state size %u", qemu_mach.cap.state_size);
1179         return -EPROGMISMATCH;
1180     }
1181
1182     ret = nvmm_machine_create(&qemu_mach.mach);
1183     if (ret == -1) {
1184         err = errno;
1185         error_report("NVMM: Machine creation failed, error=%d", errno);
1186         return -err;
1187     }
1188
1189     memory_listener_register(&nvmm_memory_listener, &address_space_memory);
1190     ram_block_notifier_add(&nvmm_ram_notifier);
1191
1192     printf("NetBSD Virtual Machine Monitor accelerator is operational\n");
1193     return 0;
1194 }
1195
1196 int
1197 nvmm_enabled(void)
1198 {
1199     return nvmm_allowed;
1200 }
1201
1202 static void
1203 nvmm_accel_class_init(ObjectClass *oc, void *data)
1204 {
1205     AccelClass *ac = ACCEL_CLASS(oc);
1206     ac->name = "NVMM";
1207     ac->init_machine = nvmm_accel_init;
1208     ac->allowed = &nvmm_allowed;
1209 }
1210
1211 static const TypeInfo nvmm_accel_type = {
1212     .name = ACCEL_CLASS_NAME("nvmm"),
1213     .parent = TYPE_ACCEL,
1214     .class_init = nvmm_accel_class_init,
1215 };
1216
1217 static void
1218 nvmm_type_init(void)
1219 {
1220     type_register_static(&nvmm_accel_type);
1221 }
1222
1223 type_init(nvmm_type_init);