]> git.proxmox.com Git - mirror_qemu.git/blob - target/i386/nvmm/nvmm-all.c
Merge tag 'pull-maintainer-may24-160524-2' of https://gitlab.com/stsquad/qemu into...
[mirror_qemu.git] / target / i386 / nvmm / nvmm-all.c
1 /*
2 * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
3 *
4 * NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 */
9
10 #include "qemu/osdep.h"
11 #include "cpu.h"
12 #include "exec/address-spaces.h"
13 #include "exec/ioport.h"
14 #include "qemu/accel.h"
15 #include "sysemu/nvmm.h"
16 #include "sysemu/cpus.h"
17 #include "sysemu/runstate.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/error-report.h"
20 #include "qapi/error.h"
21 #include "qemu/queue.h"
22 #include "migration/blocker.h"
23 #include "strings.h"
24
25 #include "nvmm-accel-ops.h"
26
27 #include <nvmm.h>
28
29 struct AccelCPUState {
30 struct nvmm_vcpu vcpu;
31 uint8_t tpr;
32 bool stop;
33 bool dirty;
34
35 /* Window-exiting for INTs/NMIs. */
36 bool int_window_exit;
37 bool nmi_window_exit;
38
39 /* The guest is in an interrupt shadow (POP SS, etc). */
40 bool int_shadow;
41 };
42
43 struct qemu_machine {
44 struct nvmm_capability cap;
45 struct nvmm_machine mach;
46 };
47
48 /* -------------------------------------------------------------------------- */
49
50 static bool nvmm_allowed;
51 static struct qemu_machine qemu_mach;
52
53 static struct nvmm_machine *
54 get_nvmm_mach(void)
55 {
56 return &qemu_mach.mach;
57 }
58
59 /* -------------------------------------------------------------------------- */
60
61 static void
62 nvmm_set_segment(struct nvmm_x64_state_seg *nseg, const SegmentCache *qseg)
63 {
64 uint32_t attrib = qseg->flags;
65
66 nseg->selector = qseg->selector;
67 nseg->limit = qseg->limit;
68 nseg->base = qseg->base;
69 nseg->attrib.type = __SHIFTOUT(attrib, DESC_TYPE_MASK);
70 nseg->attrib.s = __SHIFTOUT(attrib, DESC_S_MASK);
71 nseg->attrib.dpl = __SHIFTOUT(attrib, DESC_DPL_MASK);
72 nseg->attrib.p = __SHIFTOUT(attrib, DESC_P_MASK);
73 nseg->attrib.avl = __SHIFTOUT(attrib, DESC_AVL_MASK);
74 nseg->attrib.l = __SHIFTOUT(attrib, DESC_L_MASK);
75 nseg->attrib.def = __SHIFTOUT(attrib, DESC_B_MASK);
76 nseg->attrib.g = __SHIFTOUT(attrib, DESC_G_MASK);
77 }
78
79 static void
80 nvmm_set_registers(CPUState *cpu)
81 {
82 CPUX86State *env = cpu_env(cpu);
83 struct nvmm_machine *mach = get_nvmm_mach();
84 AccelCPUState *qcpu = cpu->accel;
85 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
86 struct nvmm_x64_state *state = vcpu->state;
87 uint64_t bitmap;
88 size_t i;
89 int ret;
90
91 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
92
93 /* GPRs. */
94 state->gprs[NVMM_X64_GPR_RAX] = env->regs[R_EAX];
95 state->gprs[NVMM_X64_GPR_RCX] = env->regs[R_ECX];
96 state->gprs[NVMM_X64_GPR_RDX] = env->regs[R_EDX];
97 state->gprs[NVMM_X64_GPR_RBX] = env->regs[R_EBX];
98 state->gprs[NVMM_X64_GPR_RSP] = env->regs[R_ESP];
99 state->gprs[NVMM_X64_GPR_RBP] = env->regs[R_EBP];
100 state->gprs[NVMM_X64_GPR_RSI] = env->regs[R_ESI];
101 state->gprs[NVMM_X64_GPR_RDI] = env->regs[R_EDI];
102 #ifdef TARGET_X86_64
103 state->gprs[NVMM_X64_GPR_R8] = env->regs[R_R8];
104 state->gprs[NVMM_X64_GPR_R9] = env->regs[R_R9];
105 state->gprs[NVMM_X64_GPR_R10] = env->regs[R_R10];
106 state->gprs[NVMM_X64_GPR_R11] = env->regs[R_R11];
107 state->gprs[NVMM_X64_GPR_R12] = env->regs[R_R12];
108 state->gprs[NVMM_X64_GPR_R13] = env->regs[R_R13];
109 state->gprs[NVMM_X64_GPR_R14] = env->regs[R_R14];
110 state->gprs[NVMM_X64_GPR_R15] = env->regs[R_R15];
111 #endif
112
113 /* RIP and RFLAGS. */
114 state->gprs[NVMM_X64_GPR_RIP] = env->eip;
115 state->gprs[NVMM_X64_GPR_RFLAGS] = env->eflags;
116
117 /* Segments. */
118 nvmm_set_segment(&state->segs[NVMM_X64_SEG_CS], &env->segs[R_CS]);
119 nvmm_set_segment(&state->segs[NVMM_X64_SEG_DS], &env->segs[R_DS]);
120 nvmm_set_segment(&state->segs[NVMM_X64_SEG_ES], &env->segs[R_ES]);
121 nvmm_set_segment(&state->segs[NVMM_X64_SEG_FS], &env->segs[R_FS]);
122 nvmm_set_segment(&state->segs[NVMM_X64_SEG_GS], &env->segs[R_GS]);
123 nvmm_set_segment(&state->segs[NVMM_X64_SEG_SS], &env->segs[R_SS]);
124
125 /* Special segments. */
126 nvmm_set_segment(&state->segs[NVMM_X64_SEG_GDT], &env->gdt);
127 nvmm_set_segment(&state->segs[NVMM_X64_SEG_LDT], &env->ldt);
128 nvmm_set_segment(&state->segs[NVMM_X64_SEG_TR], &env->tr);
129 nvmm_set_segment(&state->segs[NVMM_X64_SEG_IDT], &env->idt);
130
131 /* Control registers. */
132 state->crs[NVMM_X64_CR_CR0] = env->cr[0];
133 state->crs[NVMM_X64_CR_CR2] = env->cr[2];
134 state->crs[NVMM_X64_CR_CR3] = env->cr[3];
135 state->crs[NVMM_X64_CR_CR4] = env->cr[4];
136 state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
137 state->crs[NVMM_X64_CR_XCR0] = env->xcr0;
138
139 /* Debug registers. */
140 state->drs[NVMM_X64_DR_DR0] = env->dr[0];
141 state->drs[NVMM_X64_DR_DR1] = env->dr[1];
142 state->drs[NVMM_X64_DR_DR2] = env->dr[2];
143 state->drs[NVMM_X64_DR_DR3] = env->dr[3];
144 state->drs[NVMM_X64_DR_DR6] = env->dr[6];
145 state->drs[NVMM_X64_DR_DR7] = env->dr[7];
146
147 /* FPU. */
148 state->fpu.fx_cw = env->fpuc;
149 state->fpu.fx_sw = (env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11);
150 state->fpu.fx_tw = 0;
151 for (i = 0; i < 8; i++) {
152 state->fpu.fx_tw |= (!env->fptags[i]) << i;
153 }
154 state->fpu.fx_opcode = env->fpop;
155 state->fpu.fx_ip.fa_64 = env->fpip;
156 state->fpu.fx_dp.fa_64 = env->fpdp;
157 state->fpu.fx_mxcsr = env->mxcsr;
158 state->fpu.fx_mxcsr_mask = 0x0000FFFF;
159 assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
160 memcpy(state->fpu.fx_87_ac, env->fpregs, sizeof(env->fpregs));
161 for (i = 0; i < CPU_NB_REGS; i++) {
162 memcpy(&state->fpu.fx_xmm[i].xmm_bytes[0],
163 &env->xmm_regs[i].ZMM_Q(0), 8);
164 memcpy(&state->fpu.fx_xmm[i].xmm_bytes[8],
165 &env->xmm_regs[i].ZMM_Q(1), 8);
166 }
167
168 /* MSRs. */
169 state->msrs[NVMM_X64_MSR_EFER] = env->efer;
170 state->msrs[NVMM_X64_MSR_STAR] = env->star;
171 #ifdef TARGET_X86_64
172 state->msrs[NVMM_X64_MSR_LSTAR] = env->lstar;
173 state->msrs[NVMM_X64_MSR_CSTAR] = env->cstar;
174 state->msrs[NVMM_X64_MSR_SFMASK] = env->fmask;
175 state->msrs[NVMM_X64_MSR_KERNELGSBASE] = env->kernelgsbase;
176 #endif
177 state->msrs[NVMM_X64_MSR_SYSENTER_CS] = env->sysenter_cs;
178 state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = env->sysenter_esp;
179 state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = env->sysenter_eip;
180 state->msrs[NVMM_X64_MSR_PAT] = env->pat;
181 state->msrs[NVMM_X64_MSR_TSC] = env->tsc;
182
183 bitmap =
184 NVMM_X64_STATE_SEGS |
185 NVMM_X64_STATE_GPRS |
186 NVMM_X64_STATE_CRS |
187 NVMM_X64_STATE_DRS |
188 NVMM_X64_STATE_MSRS |
189 NVMM_X64_STATE_FPU;
190
191 ret = nvmm_vcpu_setstate(mach, vcpu, bitmap);
192 if (ret == -1) {
193 error_report("NVMM: Failed to set virtual processor context,"
194 " error=%d", errno);
195 }
196 }
197
198 static void
199 nvmm_get_segment(SegmentCache *qseg, const struct nvmm_x64_state_seg *nseg)
200 {
201 qseg->selector = nseg->selector;
202 qseg->limit = nseg->limit;
203 qseg->base = nseg->base;
204
205 qseg->flags =
206 __SHIFTIN((uint32_t)nseg->attrib.type, DESC_TYPE_MASK) |
207 __SHIFTIN((uint32_t)nseg->attrib.s, DESC_S_MASK) |
208 __SHIFTIN((uint32_t)nseg->attrib.dpl, DESC_DPL_MASK) |
209 __SHIFTIN((uint32_t)nseg->attrib.p, DESC_P_MASK) |
210 __SHIFTIN((uint32_t)nseg->attrib.avl, DESC_AVL_MASK) |
211 __SHIFTIN((uint32_t)nseg->attrib.l, DESC_L_MASK) |
212 __SHIFTIN((uint32_t)nseg->attrib.def, DESC_B_MASK) |
213 __SHIFTIN((uint32_t)nseg->attrib.g, DESC_G_MASK);
214 }
215
216 static void
217 nvmm_get_registers(CPUState *cpu)
218 {
219 CPUX86State *env = cpu_env(cpu);
220 struct nvmm_machine *mach = get_nvmm_mach();
221 AccelCPUState *qcpu = cpu->accel;
222 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
223 X86CPU *x86_cpu = X86_CPU(cpu);
224 struct nvmm_x64_state *state = vcpu->state;
225 uint64_t bitmap, tpr;
226 size_t i;
227 int ret;
228
229 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
230
231 bitmap =
232 NVMM_X64_STATE_SEGS |
233 NVMM_X64_STATE_GPRS |
234 NVMM_X64_STATE_CRS |
235 NVMM_X64_STATE_DRS |
236 NVMM_X64_STATE_MSRS |
237 NVMM_X64_STATE_FPU;
238
239 ret = nvmm_vcpu_getstate(mach, vcpu, bitmap);
240 if (ret == -1) {
241 error_report("NVMM: Failed to get virtual processor context,"
242 " error=%d", errno);
243 }
244
245 /* GPRs. */
246 env->regs[R_EAX] = state->gprs[NVMM_X64_GPR_RAX];
247 env->regs[R_ECX] = state->gprs[NVMM_X64_GPR_RCX];
248 env->regs[R_EDX] = state->gprs[NVMM_X64_GPR_RDX];
249 env->regs[R_EBX] = state->gprs[NVMM_X64_GPR_RBX];
250 env->regs[R_ESP] = state->gprs[NVMM_X64_GPR_RSP];
251 env->regs[R_EBP] = state->gprs[NVMM_X64_GPR_RBP];
252 env->regs[R_ESI] = state->gprs[NVMM_X64_GPR_RSI];
253 env->regs[R_EDI] = state->gprs[NVMM_X64_GPR_RDI];
254 #ifdef TARGET_X86_64
255 env->regs[R_R8] = state->gprs[NVMM_X64_GPR_R8];
256 env->regs[R_R9] = state->gprs[NVMM_X64_GPR_R9];
257 env->regs[R_R10] = state->gprs[NVMM_X64_GPR_R10];
258 env->regs[R_R11] = state->gprs[NVMM_X64_GPR_R11];
259 env->regs[R_R12] = state->gprs[NVMM_X64_GPR_R12];
260 env->regs[R_R13] = state->gprs[NVMM_X64_GPR_R13];
261 env->regs[R_R14] = state->gprs[NVMM_X64_GPR_R14];
262 env->regs[R_R15] = state->gprs[NVMM_X64_GPR_R15];
263 #endif
264
265 /* RIP and RFLAGS. */
266 env->eip = state->gprs[NVMM_X64_GPR_RIP];
267 env->eflags = state->gprs[NVMM_X64_GPR_RFLAGS];
268
269 /* Segments. */
270 nvmm_get_segment(&env->segs[R_ES], &state->segs[NVMM_X64_SEG_ES]);
271 nvmm_get_segment(&env->segs[R_CS], &state->segs[NVMM_X64_SEG_CS]);
272 nvmm_get_segment(&env->segs[R_SS], &state->segs[NVMM_X64_SEG_SS]);
273 nvmm_get_segment(&env->segs[R_DS], &state->segs[NVMM_X64_SEG_DS]);
274 nvmm_get_segment(&env->segs[R_FS], &state->segs[NVMM_X64_SEG_FS]);
275 nvmm_get_segment(&env->segs[R_GS], &state->segs[NVMM_X64_SEG_GS]);
276
277 /* Special segments. */
278 nvmm_get_segment(&env->gdt, &state->segs[NVMM_X64_SEG_GDT]);
279 nvmm_get_segment(&env->ldt, &state->segs[NVMM_X64_SEG_LDT]);
280 nvmm_get_segment(&env->tr, &state->segs[NVMM_X64_SEG_TR]);
281 nvmm_get_segment(&env->idt, &state->segs[NVMM_X64_SEG_IDT]);
282
283 /* Control registers. */
284 env->cr[0] = state->crs[NVMM_X64_CR_CR0];
285 env->cr[2] = state->crs[NVMM_X64_CR_CR2];
286 env->cr[3] = state->crs[NVMM_X64_CR_CR3];
287 env->cr[4] = state->crs[NVMM_X64_CR_CR4];
288 tpr = state->crs[NVMM_X64_CR_CR8];
289 if (tpr != qcpu->tpr) {
290 qcpu->tpr = tpr;
291 cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
292 }
293 env->xcr0 = state->crs[NVMM_X64_CR_XCR0];
294
295 /* Debug registers. */
296 env->dr[0] = state->drs[NVMM_X64_DR_DR0];
297 env->dr[1] = state->drs[NVMM_X64_DR_DR1];
298 env->dr[2] = state->drs[NVMM_X64_DR_DR2];
299 env->dr[3] = state->drs[NVMM_X64_DR_DR3];
300 env->dr[6] = state->drs[NVMM_X64_DR_DR6];
301 env->dr[7] = state->drs[NVMM_X64_DR_DR7];
302
303 /* FPU. */
304 env->fpuc = state->fpu.fx_cw;
305 env->fpstt = (state->fpu.fx_sw >> 11) & 0x7;
306 env->fpus = state->fpu.fx_sw & ~0x3800;
307 for (i = 0; i < 8; i++) {
308 env->fptags[i] = !((state->fpu.fx_tw >> i) & 1);
309 }
310 env->fpop = state->fpu.fx_opcode;
311 env->fpip = state->fpu.fx_ip.fa_64;
312 env->fpdp = state->fpu.fx_dp.fa_64;
313 env->mxcsr = state->fpu.fx_mxcsr;
314 assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
315 memcpy(env->fpregs, state->fpu.fx_87_ac, sizeof(env->fpregs));
316 for (i = 0; i < CPU_NB_REGS; i++) {
317 memcpy(&env->xmm_regs[i].ZMM_Q(0),
318 &state->fpu.fx_xmm[i].xmm_bytes[0], 8);
319 memcpy(&env->xmm_regs[i].ZMM_Q(1),
320 &state->fpu.fx_xmm[i].xmm_bytes[8], 8);
321 }
322
323 /* MSRs. */
324 env->efer = state->msrs[NVMM_X64_MSR_EFER];
325 env->star = state->msrs[NVMM_X64_MSR_STAR];
326 #ifdef TARGET_X86_64
327 env->lstar = state->msrs[NVMM_X64_MSR_LSTAR];
328 env->cstar = state->msrs[NVMM_X64_MSR_CSTAR];
329 env->fmask = state->msrs[NVMM_X64_MSR_SFMASK];
330 env->kernelgsbase = state->msrs[NVMM_X64_MSR_KERNELGSBASE];
331 #endif
332 env->sysenter_cs = state->msrs[NVMM_X64_MSR_SYSENTER_CS];
333 env->sysenter_esp = state->msrs[NVMM_X64_MSR_SYSENTER_ESP];
334 env->sysenter_eip = state->msrs[NVMM_X64_MSR_SYSENTER_EIP];
335 env->pat = state->msrs[NVMM_X64_MSR_PAT];
336 env->tsc = state->msrs[NVMM_X64_MSR_TSC];
337
338 x86_update_hflags(env);
339 }
340
341 static bool
342 nvmm_can_take_int(CPUState *cpu)
343 {
344 AccelCPUState *qcpu = cpu->accel;
345 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
346 struct nvmm_machine *mach = get_nvmm_mach();
347
348 if (qcpu->int_window_exit) {
349 return false;
350 }
351
352 if (qcpu->int_shadow || !(cpu_env(cpu)->eflags & IF_MASK)) {
353 struct nvmm_x64_state *state = vcpu->state;
354
355 /* Exit on interrupt window. */
356 nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_INTR);
357 state->intr.int_window_exiting = 1;
358 nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_INTR);
359
360 return false;
361 }
362
363 return true;
364 }
365
366 static bool
367 nvmm_can_take_nmi(CPUState *cpu)
368 {
369 AccelCPUState *qcpu = cpu->accel;
370
371 /*
372 * Contrary to INTs, NMIs always schedule an exit when they are
373 * completed. Therefore, if window-exiting is enabled, it means
374 * NMIs are blocked.
375 */
376 if (qcpu->nmi_window_exit) {
377 return false;
378 }
379
380 return true;
381 }
382
383 /*
384 * Called before the VCPU is run. We inject events generated by the I/O
385 * thread, and synchronize the guest TPR.
386 */
387 static void
388 nvmm_vcpu_pre_run(CPUState *cpu)
389 {
390 CPUX86State *env = cpu_env(cpu);
391 struct nvmm_machine *mach = get_nvmm_mach();
392 AccelCPUState *qcpu = cpu->accel;
393 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
394 X86CPU *x86_cpu = X86_CPU(cpu);
395 struct nvmm_x64_state *state = vcpu->state;
396 struct nvmm_vcpu_event *event = vcpu->event;
397 bool has_event = false;
398 bool sync_tpr = false;
399 uint8_t tpr;
400 int ret;
401
402 bql_lock();
403
404 tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
405 if (tpr != qcpu->tpr) {
406 qcpu->tpr = tpr;
407 sync_tpr = true;
408 }
409
410 /*
411 * Force the VCPU out of its inner loop to process any INIT requests
412 * or commit pending TPR access.
413 */
414 if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
415 cpu->exit_request = 1;
416 }
417
418 if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
419 if (nvmm_can_take_nmi(cpu)) {
420 cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
421 event->type = NVMM_VCPU_EVENT_INTR;
422 event->vector = 2;
423 has_event = true;
424 }
425 }
426
427 if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
428 if (nvmm_can_take_int(cpu)) {
429 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
430 event->type = NVMM_VCPU_EVENT_INTR;
431 event->vector = cpu_get_pic_interrupt(env);
432 has_event = true;
433 }
434 }
435
436 /* Don't want SMIs. */
437 if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
438 cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
439 }
440
441 if (sync_tpr) {
442 ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_CRS);
443 if (ret == -1) {
444 error_report("NVMM: Failed to get CPU state,"
445 " error=%d", errno);
446 }
447
448 state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
449
450 ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_CRS);
451 if (ret == -1) {
452 error_report("NVMM: Failed to set CPU state,"
453 " error=%d", errno);
454 }
455 }
456
457 if (has_event) {
458 ret = nvmm_vcpu_inject(mach, vcpu);
459 if (ret == -1) {
460 error_report("NVMM: Failed to inject event,"
461 " error=%d", errno);
462 }
463 }
464
465 bql_unlock();
466 }
467
468 /*
469 * Called after the VCPU ran. We synchronize the host view of the TPR and
470 * RFLAGS.
471 */
472 static void
473 nvmm_vcpu_post_run(CPUState *cpu, struct nvmm_vcpu_exit *exit)
474 {
475 AccelCPUState *qcpu = cpu->accel;
476 X86CPU *x86_cpu = X86_CPU(cpu);
477 CPUX86State *env = &x86_cpu->env;
478 uint64_t tpr;
479
480 env->eflags = exit->exitstate.rflags;
481 qcpu->int_shadow = exit->exitstate.int_shadow;
482 qcpu->int_window_exit = exit->exitstate.int_window_exiting;
483 qcpu->nmi_window_exit = exit->exitstate.nmi_window_exiting;
484
485 tpr = exit->exitstate.cr8;
486 if (qcpu->tpr != tpr) {
487 qcpu->tpr = tpr;
488 bql_lock();
489 cpu_set_apic_tpr(x86_cpu->apic_state, qcpu->tpr);
490 bql_unlock();
491 }
492 }
493
494 /* -------------------------------------------------------------------------- */
495
496 static void
497 nvmm_io_callback(struct nvmm_io *io)
498 {
499 MemTxAttrs attrs = { 0 };
500 int ret;
501
502 ret = address_space_rw(&address_space_io, io->port, attrs, io->data,
503 io->size, !io->in);
504 if (ret != MEMTX_OK) {
505 error_report("NVMM: I/O Transaction Failed "
506 "[%s, port=%u, size=%zu]", (io->in ? "in" : "out"),
507 io->port, io->size);
508 }
509
510 /* Needed, otherwise infinite loop. */
511 current_cpu->accel->dirty = false;
512 }
513
514 static void
515 nvmm_mem_callback(struct nvmm_mem *mem)
516 {
517 cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write);
518
519 /* Needed, otherwise infinite loop. */
520 current_cpu->accel->dirty = false;
521 }
522
523 static struct nvmm_assist_callbacks nvmm_callbacks = {
524 .io = nvmm_io_callback,
525 .mem = nvmm_mem_callback
526 };
527
528 /* -------------------------------------------------------------------------- */
529
530 static int
531 nvmm_handle_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
532 {
533 int ret;
534
535 ret = nvmm_assist_mem(mach, vcpu);
536 if (ret == -1) {
537 error_report("NVMM: Mem Assist Failed [gpa=%p]",
538 (void *)vcpu->exit->u.mem.gpa);
539 }
540
541 return ret;
542 }
543
544 static int
545 nvmm_handle_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
546 {
547 int ret;
548
549 ret = nvmm_assist_io(mach, vcpu);
550 if (ret == -1) {
551 error_report("NVMM: I/O Assist Failed [port=%d]",
552 (int)vcpu->exit->u.io.port);
553 }
554
555 return ret;
556 }
557
558 static int
559 nvmm_handle_rdmsr(struct nvmm_machine *mach, CPUState *cpu,
560 struct nvmm_vcpu_exit *exit)
561 {
562 AccelCPUState *qcpu = cpu->accel;
563 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
564 X86CPU *x86_cpu = X86_CPU(cpu);
565 struct nvmm_x64_state *state = vcpu->state;
566 uint64_t val;
567 int ret;
568
569 switch (exit->u.rdmsr.msr) {
570 case MSR_IA32_APICBASE:
571 val = cpu_get_apic_base(x86_cpu->apic_state);
572 break;
573 case MSR_MTRRcap:
574 case MSR_MTRRdefType:
575 case MSR_MCG_CAP:
576 case MSR_MCG_STATUS:
577 val = 0;
578 break;
579 default: /* More MSRs to add? */
580 val = 0;
581 error_report("NVMM: Unexpected RDMSR 0x%x, ignored",
582 exit->u.rdmsr.msr);
583 break;
584 }
585
586 ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
587 if (ret == -1) {
588 return -1;
589 }
590
591 state->gprs[NVMM_X64_GPR_RAX] = (val & 0xFFFFFFFF);
592 state->gprs[NVMM_X64_GPR_RDX] = (val >> 32);
593 state->gprs[NVMM_X64_GPR_RIP] = exit->u.rdmsr.npc;
594
595 ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
596 if (ret == -1) {
597 return -1;
598 }
599
600 return 0;
601 }
602
603 static int
604 nvmm_handle_wrmsr(struct nvmm_machine *mach, CPUState *cpu,
605 struct nvmm_vcpu_exit *exit)
606 {
607 AccelCPUState *qcpu = cpu->accel;
608 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
609 X86CPU *x86_cpu = X86_CPU(cpu);
610 struct nvmm_x64_state *state = vcpu->state;
611 uint64_t val;
612 int ret;
613
614 val = exit->u.wrmsr.val;
615
616 switch (exit->u.wrmsr.msr) {
617 case MSR_IA32_APICBASE:
618 cpu_set_apic_base(x86_cpu->apic_state, val);
619 break;
620 case MSR_MTRRdefType:
621 case MSR_MCG_STATUS:
622 break;
623 default: /* More MSRs to add? */
624 error_report("NVMM: Unexpected WRMSR 0x%x [val=0x%lx], ignored",
625 exit->u.wrmsr.msr, val);
626 break;
627 }
628
629 ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
630 if (ret == -1) {
631 return -1;
632 }
633
634 state->gprs[NVMM_X64_GPR_RIP] = exit->u.wrmsr.npc;
635
636 ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
637 if (ret == -1) {
638 return -1;
639 }
640
641 return 0;
642 }
643
644 static int
645 nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu,
646 struct nvmm_vcpu_exit *exit)
647 {
648 int ret = 0;
649
650 bql_lock();
651
652 if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
653 (cpu_env(cpu)->eflags & IF_MASK)) &&
654 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
655 cpu->exception_index = EXCP_HLT;
656 cpu->halted = true;
657 ret = 1;
658 }
659
660 bql_unlock();
661
662 return ret;
663 }
664
665 static int
666 nvmm_inject_ud(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
667 {
668 struct nvmm_vcpu_event *event = vcpu->event;
669
670 event->type = NVMM_VCPU_EVENT_EXCP;
671 event->vector = 6;
672 event->u.excp.error = 0;
673
674 return nvmm_vcpu_inject(mach, vcpu);
675 }
676
677 static int
678 nvmm_vcpu_loop(CPUState *cpu)
679 {
680 struct nvmm_machine *mach = get_nvmm_mach();
681 AccelCPUState *qcpu = cpu->accel;
682 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
683 X86CPU *x86_cpu = X86_CPU(cpu);
684 CPUX86State *env = &x86_cpu->env;
685 struct nvmm_vcpu_exit *exit = vcpu->exit;
686 int ret;
687
688 /*
689 * Some asynchronous events must be handled outside of the inner
690 * VCPU loop. They are handled here.
691 */
692 if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
693 nvmm_cpu_synchronize_state(cpu);
694 do_cpu_init(x86_cpu);
695 /* set int/nmi windows back to the reset state */
696 }
697 if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
698 cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
699 apic_poll_irq(x86_cpu->apic_state);
700 }
701 if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
702 (env->eflags & IF_MASK)) ||
703 (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
704 cpu->halted = false;
705 }
706 if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
707 nvmm_cpu_synchronize_state(cpu);
708 do_cpu_sipi(x86_cpu);
709 }
710 if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
711 cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
712 nvmm_cpu_synchronize_state(cpu);
713 apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
714 env->tpr_access_type);
715 }
716
717 if (cpu->halted) {
718 cpu->exception_index = EXCP_HLT;
719 qatomic_set(&cpu->exit_request, false);
720 return 0;
721 }
722
723 bql_unlock();
724 cpu_exec_start(cpu);
725
726 /*
727 * Inner VCPU loop.
728 */
729 do {
730 if (cpu->accel->dirty) {
731 nvmm_set_registers(cpu);
732 cpu->accel->dirty = false;
733 }
734
735 if (qcpu->stop) {
736 cpu->exception_index = EXCP_INTERRUPT;
737 qcpu->stop = false;
738 ret = 1;
739 break;
740 }
741
742 nvmm_vcpu_pre_run(cpu);
743
744 if (qatomic_read(&cpu->exit_request)) {
745 #if NVMM_USER_VERSION >= 2
746 nvmm_vcpu_stop(vcpu);
747 #else
748 qemu_cpu_kick_self();
749 #endif
750 }
751
752 /* Read exit_request before the kernel reads the immediate exit flag */
753 smp_rmb();
754 ret = nvmm_vcpu_run(mach, vcpu);
755 if (ret == -1) {
756 error_report("NVMM: Failed to exec a virtual processor,"
757 " error=%d", errno);
758 break;
759 }
760
761 nvmm_vcpu_post_run(cpu, exit);
762
763 switch (exit->reason) {
764 case NVMM_VCPU_EXIT_NONE:
765 break;
766 #if NVMM_USER_VERSION >= 2
767 case NVMM_VCPU_EXIT_STOPPED:
768 /*
769 * The kernel cleared the immediate exit flag; cpu->exit_request
770 * must be cleared after
771 */
772 smp_wmb();
773 qcpu->stop = true;
774 break;
775 #endif
776 case NVMM_VCPU_EXIT_MEMORY:
777 ret = nvmm_handle_mem(mach, vcpu);
778 break;
779 case NVMM_VCPU_EXIT_IO:
780 ret = nvmm_handle_io(mach, vcpu);
781 break;
782 case NVMM_VCPU_EXIT_INT_READY:
783 case NVMM_VCPU_EXIT_NMI_READY:
784 case NVMM_VCPU_EXIT_TPR_CHANGED:
785 break;
786 case NVMM_VCPU_EXIT_HALTED:
787 ret = nvmm_handle_halted(mach, cpu, exit);
788 break;
789 case NVMM_VCPU_EXIT_SHUTDOWN:
790 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
791 cpu->exception_index = EXCP_INTERRUPT;
792 ret = 1;
793 break;
794 case NVMM_VCPU_EXIT_RDMSR:
795 ret = nvmm_handle_rdmsr(mach, cpu, exit);
796 break;
797 case NVMM_VCPU_EXIT_WRMSR:
798 ret = nvmm_handle_wrmsr(mach, cpu, exit);
799 break;
800 case NVMM_VCPU_EXIT_MONITOR:
801 case NVMM_VCPU_EXIT_MWAIT:
802 ret = nvmm_inject_ud(mach, vcpu);
803 break;
804 default:
805 error_report("NVMM: Unexpected VM exit code 0x%lx [hw=0x%lx]",
806 exit->reason, exit->u.inv.hwcode);
807 nvmm_get_registers(cpu);
808 bql_lock();
809 qemu_system_guest_panicked(cpu_get_crash_info(cpu));
810 bql_unlock();
811 ret = -1;
812 break;
813 }
814 } while (ret == 0);
815
816 cpu_exec_end(cpu);
817 bql_lock();
818
819 qatomic_set(&cpu->exit_request, false);
820
821 return ret < 0;
822 }
823
824 /* -------------------------------------------------------------------------- */
825
826 static void
827 do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
828 {
829 nvmm_get_registers(cpu);
830 cpu->accel->dirty = true;
831 }
832
833 static void
834 do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
835 {
836 nvmm_set_registers(cpu);
837 cpu->accel->dirty = false;
838 }
839
840 static void
841 do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
842 {
843 nvmm_set_registers(cpu);
844 cpu->accel->dirty = false;
845 }
846
847 static void
848 do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
849 {
850 cpu->accel->dirty = true;
851 }
852
853 void nvmm_cpu_synchronize_state(CPUState *cpu)
854 {
855 if (!cpu->accel->dirty) {
856 run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL);
857 }
858 }
859
860 void nvmm_cpu_synchronize_post_reset(CPUState *cpu)
861 {
862 run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
863 }
864
865 void nvmm_cpu_synchronize_post_init(CPUState *cpu)
866 {
867 run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
868 }
869
870 void nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu)
871 {
872 run_on_cpu(cpu, do_nvmm_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
873 }
874
875 /* -------------------------------------------------------------------------- */
876
877 static Error *nvmm_migration_blocker;
878
879 /*
880 * The nvmm_vcpu_stop() mechanism breaks races between entering the VMM
881 * and another thread signaling the vCPU thread to exit.
882 */
883
884 static void
885 nvmm_ipi_signal(int sigcpu)
886 {
887 if (current_cpu) {
888 AccelCPUState *qcpu = current_cpu->accel;
889 #if NVMM_USER_VERSION >= 2
890 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
891 nvmm_vcpu_stop(vcpu);
892 #else
893 qcpu->stop = true;
894 #endif
895 }
896 }
897
898 static void
899 nvmm_init_cpu_signals(void)
900 {
901 struct sigaction sigact;
902 sigset_t set;
903
904 /* Install the IPI handler. */
905 memset(&sigact, 0, sizeof(sigact));
906 sigact.sa_handler = nvmm_ipi_signal;
907 sigaction(SIG_IPI, &sigact, NULL);
908
909 /* Allow IPIs on the current thread. */
910 sigprocmask(SIG_BLOCK, NULL, &set);
911 sigdelset(&set, SIG_IPI);
912 pthread_sigmask(SIG_SETMASK, &set, NULL);
913 }
914
915 int
916 nvmm_init_vcpu(CPUState *cpu)
917 {
918 struct nvmm_machine *mach = get_nvmm_mach();
919 struct nvmm_vcpu_conf_cpuid cpuid;
920 struct nvmm_vcpu_conf_tpr tpr;
921 Error *local_error = NULL;
922 AccelCPUState *qcpu;
923 int ret, err;
924
925 nvmm_init_cpu_signals();
926
927 if (nvmm_migration_blocker == NULL) {
928 error_setg(&nvmm_migration_blocker,
929 "NVMM: Migration not supported");
930
931 if (migrate_add_blocker(&nvmm_migration_blocker, &local_error) < 0) {
932 error_report_err(local_error);
933 return -EINVAL;
934 }
935 }
936
937 qcpu = g_new0(AccelCPUState, 1);
938
939 ret = nvmm_vcpu_create(mach, cpu->cpu_index, &qcpu->vcpu);
940 if (ret == -1) {
941 err = errno;
942 error_report("NVMM: Failed to create a virtual processor,"
943 " error=%d", err);
944 g_free(qcpu);
945 return -err;
946 }
947
948 memset(&cpuid, 0, sizeof(cpuid));
949 cpuid.mask = 1;
950 cpuid.leaf = 0x00000001;
951 cpuid.u.mask.set.edx = CPUID_MCE | CPUID_MCA | CPUID_MTRR;
952 ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CPUID,
953 &cpuid);
954 if (ret == -1) {
955 err = errno;
956 error_report("NVMM: Failed to configure a virtual processor,"
957 " error=%d", err);
958 g_free(qcpu);
959 return -err;
960 }
961
962 ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CALLBACKS,
963 &nvmm_callbacks);
964 if (ret == -1) {
965 err = errno;
966 error_report("NVMM: Failed to configure a virtual processor,"
967 " error=%d", err);
968 g_free(qcpu);
969 return -err;
970 }
971
972 if (qemu_mach.cap.arch.vcpu_conf_support & NVMM_CAP_ARCH_VCPU_CONF_TPR) {
973 memset(&tpr, 0, sizeof(tpr));
974 tpr.exit_changed = 1;
975 ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_TPR, &tpr);
976 if (ret == -1) {
977 err = errno;
978 error_report("NVMM: Failed to configure a virtual processor,"
979 " error=%d", err);
980 g_free(qcpu);
981 return -err;
982 }
983 }
984
985 qcpu->dirty = true;
986 cpu->accel = qcpu;
987
988 return 0;
989 }
990
991 int
992 nvmm_vcpu_exec(CPUState *cpu)
993 {
994 int ret, fatal;
995
996 while (1) {
997 if (cpu->exception_index >= EXCP_INTERRUPT) {
998 ret = cpu->exception_index;
999 cpu->exception_index = -1;
1000 break;
1001 }
1002
1003 fatal = nvmm_vcpu_loop(cpu);
1004
1005 if (fatal) {
1006 error_report("NVMM: Failed to execute a VCPU.");
1007 abort();
1008 }
1009 }
1010
1011 return ret;
1012 }
1013
1014 void
1015 nvmm_destroy_vcpu(CPUState *cpu)
1016 {
1017 struct nvmm_machine *mach = get_nvmm_mach();
1018 AccelCPUState *qcpu = cpu->accel;
1019
1020 nvmm_vcpu_destroy(mach, &qcpu->vcpu);
1021 g_free(cpu->accel);
1022 }
1023
1024 /* -------------------------------------------------------------------------- */
1025
1026 static void
1027 nvmm_update_mapping(hwaddr start_pa, ram_addr_t size, uintptr_t hva,
1028 bool add, bool rom, const char *name)
1029 {
1030 struct nvmm_machine *mach = get_nvmm_mach();
1031 int ret, prot;
1032
1033 if (add) {
1034 prot = PROT_READ | PROT_EXEC;
1035 if (!rom) {
1036 prot |= PROT_WRITE;
1037 }
1038 ret = nvmm_gpa_map(mach, hva, start_pa, size, prot);
1039 } else {
1040 ret = nvmm_gpa_unmap(mach, hva, start_pa, size);
1041 }
1042
1043 if (ret == -1) {
1044 error_report("NVMM: Failed to %s GPA range '%s' PA:%p, "
1045 "Size:%p bytes, HostVA:%p, error=%d",
1046 (add ? "map" : "unmap"), name, (void *)(uintptr_t)start_pa,
1047 (void *)size, (void *)hva, errno);
1048 }
1049 }
1050
1051 static void
1052 nvmm_process_section(MemoryRegionSection *section, int add)
1053 {
1054 MemoryRegion *mr = section->mr;
1055 hwaddr start_pa = section->offset_within_address_space;
1056 ram_addr_t size = int128_get64(section->size);
1057 unsigned int delta;
1058 uintptr_t hva;
1059
1060 if (!memory_region_is_ram(mr)) {
1061 return;
1062 }
1063
1064 /* Adjust start_pa and size so that they are page-aligned. */
1065 delta = qemu_real_host_page_size() - (start_pa & ~qemu_real_host_page_mask());
1066 delta &= ~qemu_real_host_page_mask();
1067 if (delta > size) {
1068 return;
1069 }
1070 start_pa += delta;
1071 size -= delta;
1072 size &= qemu_real_host_page_mask();
1073 if (!size || (start_pa & ~qemu_real_host_page_mask())) {
1074 return;
1075 }
1076
1077 hva = (uintptr_t)memory_region_get_ram_ptr(mr) +
1078 section->offset_within_region + delta;
1079
1080 nvmm_update_mapping(start_pa, size, hva, add,
1081 memory_region_is_rom(mr), mr->name);
1082 }
1083
1084 static void
1085 nvmm_region_add(MemoryListener *listener, MemoryRegionSection *section)
1086 {
1087 memory_region_ref(section->mr);
1088 nvmm_process_section(section, 1);
1089 }
1090
1091 static void
1092 nvmm_region_del(MemoryListener *listener, MemoryRegionSection *section)
1093 {
1094 nvmm_process_section(section, 0);
1095 memory_region_unref(section->mr);
1096 }
1097
1098 static void
1099 nvmm_transaction_begin(MemoryListener *listener)
1100 {
1101 /* nothing */
1102 }
1103
1104 static void
1105 nvmm_transaction_commit(MemoryListener *listener)
1106 {
1107 /* nothing */
1108 }
1109
1110 static void
1111 nvmm_log_sync(MemoryListener *listener, MemoryRegionSection *section)
1112 {
1113 MemoryRegion *mr = section->mr;
1114
1115 if (!memory_region_is_ram(mr)) {
1116 return;
1117 }
1118
1119 memory_region_set_dirty(mr, 0, int128_get64(section->size));
1120 }
1121
1122 static MemoryListener nvmm_memory_listener = {
1123 .name = "nvmm",
1124 .begin = nvmm_transaction_begin,
1125 .commit = nvmm_transaction_commit,
1126 .region_add = nvmm_region_add,
1127 .region_del = nvmm_region_del,
1128 .log_sync = nvmm_log_sync,
1129 .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
1130 };
1131
1132 static void
1133 nvmm_ram_block_added(RAMBlockNotifier *n, void *host, size_t size,
1134 size_t max_size)
1135 {
1136 struct nvmm_machine *mach = get_nvmm_mach();
1137 uintptr_t hva = (uintptr_t)host;
1138 int ret;
1139
1140 ret = nvmm_hva_map(mach, hva, max_size);
1141
1142 if (ret == -1) {
1143 error_report("NVMM: Failed to map HVA, HostVA:%p "
1144 "Size:%p bytes, error=%d",
1145 (void *)hva, (void *)size, errno);
1146 }
1147 }
1148
1149 static struct RAMBlockNotifier nvmm_ram_notifier = {
1150 .ram_block_added = nvmm_ram_block_added
1151 };
1152
1153 /* -------------------------------------------------------------------------- */
1154
1155 static int
1156 nvmm_accel_init(MachineState *ms)
1157 {
1158 int ret, err;
1159
1160 ret = nvmm_init();
1161 if (ret == -1) {
1162 err = errno;
1163 error_report("NVMM: Initialization failed, error=%d", errno);
1164 return -err;
1165 }
1166
1167 ret = nvmm_capability(&qemu_mach.cap);
1168 if (ret == -1) {
1169 err = errno;
1170 error_report("NVMM: Unable to fetch capability, error=%d", errno);
1171 return -err;
1172 }
1173 if (qemu_mach.cap.version < NVMM_KERN_VERSION) {
1174 error_report("NVMM: Unsupported version %u", qemu_mach.cap.version);
1175 return -EPROGMISMATCH;
1176 }
1177 if (qemu_mach.cap.state_size != sizeof(struct nvmm_x64_state)) {
1178 error_report("NVMM: Wrong state size %u", qemu_mach.cap.state_size);
1179 return -EPROGMISMATCH;
1180 }
1181
1182 ret = nvmm_machine_create(&qemu_mach.mach);
1183 if (ret == -1) {
1184 err = errno;
1185 error_report("NVMM: Machine creation failed, error=%d", errno);
1186 return -err;
1187 }
1188
1189 memory_listener_register(&nvmm_memory_listener, &address_space_memory);
1190 ram_block_notifier_add(&nvmm_ram_notifier);
1191
1192 printf("NetBSD Virtual Machine Monitor accelerator is operational\n");
1193 return 0;
1194 }
1195
1196 int
1197 nvmm_enabled(void)
1198 {
1199 return nvmm_allowed;
1200 }
1201
1202 static void
1203 nvmm_accel_class_init(ObjectClass *oc, void *data)
1204 {
1205 AccelClass *ac = ACCEL_CLASS(oc);
1206 ac->name = "NVMM";
1207 ac->init_machine = nvmm_accel_init;
1208 ac->allowed = &nvmm_allowed;
1209 }
1210
1211 static const TypeInfo nvmm_accel_type = {
1212 .name = ACCEL_CLASS_NAME("nvmm"),
1213 .parent = TYPE_ACCEL,
1214 .class_init = nvmm_accel_class_init,
1215 };
1216
1217 static void
1218 nvmm_type_init(void)
1219 {
1220 type_register_static(&nvmm_accel_type);
1221 }
1222
1223 type_init(nvmm_type_init);