]>
Commit | Line | Data |
---|---|---|
fdc8635e RZ |
1 | /* |
2 | * Copyright (c) 2018-2019 Maxime Villard, All rights reserved. | |
3 | * | |
4 | * NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU. | |
5 | * | |
6 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
7 | * See the COPYING file in the top-level directory. | |
8 | */ | |
9 | ||
10 | #include "qemu/osdep.h" | |
11 | #include "cpu.h" | |
12 | #include "exec/address-spaces.h" | |
13 | #include "exec/ioport.h" | |
14 | #include "qemu-common.h" | |
15 | #include "qemu/accel.h" | |
16 | #include "sysemu/nvmm.h" | |
17 | #include "sysemu/cpus.h" | |
18 | #include "sysemu/runstate.h" | |
19 | #include "qemu/main-loop.h" | |
20 | #include "qemu/error-report.h" | |
21 | #include "qapi/error.h" | |
22 | #include "qemu/queue.h" | |
23 | #include "migration/blocker.h" | |
24 | #include "strings.h" | |
25 | ||
26 | #include "nvmm-accel-ops.h" | |
27 | ||
28 | #include <nvmm.h> | |
29 | ||
30 | struct qemu_vcpu { | |
31 | struct nvmm_vcpu vcpu; | |
32 | uint8_t tpr; | |
33 | bool stop; | |
34 | ||
35 | /* Window-exiting for INTs/NMIs. */ | |
36 | bool int_window_exit; | |
37 | bool nmi_window_exit; | |
38 | ||
39 | /* The guest is in an interrupt shadow (POP SS, etc). */ | |
40 | bool int_shadow; | |
41 | }; | |
42 | ||
43 | struct qemu_machine { | |
44 | struct nvmm_capability cap; | |
45 | struct nvmm_machine mach; | |
46 | }; | |
47 | ||
48 | /* -------------------------------------------------------------------------- */ | |
49 | ||
50 | static bool nvmm_allowed; | |
51 | static struct qemu_machine qemu_mach; | |
52 | ||
53 | static struct qemu_vcpu * | |
54 | get_qemu_vcpu(CPUState *cpu) | |
55 | { | |
56 | return (struct qemu_vcpu *)cpu->hax_vcpu; | |
57 | } | |
58 | ||
59 | static struct nvmm_machine * | |
60 | get_nvmm_mach(void) | |
61 | { | |
62 | return &qemu_mach.mach; | |
63 | } | |
64 | ||
65 | /* -------------------------------------------------------------------------- */ | |
66 | ||
67 | static void | |
68 | nvmm_set_segment(struct nvmm_x64_state_seg *nseg, const SegmentCache *qseg) | |
69 | { | |
70 | uint32_t attrib = qseg->flags; | |
71 | ||
72 | nseg->selector = qseg->selector; | |
73 | nseg->limit = qseg->limit; | |
74 | nseg->base = qseg->base; | |
75 | nseg->attrib.type = __SHIFTOUT(attrib, DESC_TYPE_MASK); | |
76 | nseg->attrib.s = __SHIFTOUT(attrib, DESC_S_MASK); | |
77 | nseg->attrib.dpl = __SHIFTOUT(attrib, DESC_DPL_MASK); | |
78 | nseg->attrib.p = __SHIFTOUT(attrib, DESC_P_MASK); | |
79 | nseg->attrib.avl = __SHIFTOUT(attrib, DESC_AVL_MASK); | |
80 | nseg->attrib.l = __SHIFTOUT(attrib, DESC_L_MASK); | |
81 | nseg->attrib.def = __SHIFTOUT(attrib, DESC_B_MASK); | |
82 | nseg->attrib.g = __SHIFTOUT(attrib, DESC_G_MASK); | |
83 | } | |
84 | ||
85 | static void | |
86 | nvmm_set_registers(CPUState *cpu) | |
87 | { | |
95e862d7 | 88 | CPUX86State *env = cpu->env_ptr; |
fdc8635e RZ |
89 | struct nvmm_machine *mach = get_nvmm_mach(); |
90 | struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu); | |
91 | struct nvmm_vcpu *vcpu = &qcpu->vcpu; | |
92 | struct nvmm_x64_state *state = vcpu->state; | |
93 | uint64_t bitmap; | |
94 | size_t i; | |
95 | int ret; | |
96 | ||
97 | assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); | |
98 | ||
99 | /* GPRs. */ | |
100 | state->gprs[NVMM_X64_GPR_RAX] = env->regs[R_EAX]; | |
101 | state->gprs[NVMM_X64_GPR_RCX] = env->regs[R_ECX]; | |
102 | state->gprs[NVMM_X64_GPR_RDX] = env->regs[R_EDX]; | |
103 | state->gprs[NVMM_X64_GPR_RBX] = env->regs[R_EBX]; | |
104 | state->gprs[NVMM_X64_GPR_RSP] = env->regs[R_ESP]; | |
105 | state->gprs[NVMM_X64_GPR_RBP] = env->regs[R_EBP]; | |
106 | state->gprs[NVMM_X64_GPR_RSI] = env->regs[R_ESI]; | |
107 | state->gprs[NVMM_X64_GPR_RDI] = env->regs[R_EDI]; | |
108 | #ifdef TARGET_X86_64 | |
109 | state->gprs[NVMM_X64_GPR_R8] = env->regs[R_R8]; | |
110 | state->gprs[NVMM_X64_GPR_R9] = env->regs[R_R9]; | |
111 | state->gprs[NVMM_X64_GPR_R10] = env->regs[R_R10]; | |
112 | state->gprs[NVMM_X64_GPR_R11] = env->regs[R_R11]; | |
113 | state->gprs[NVMM_X64_GPR_R12] = env->regs[R_R12]; | |
114 | state->gprs[NVMM_X64_GPR_R13] = env->regs[R_R13]; | |
115 | state->gprs[NVMM_X64_GPR_R14] = env->regs[R_R14]; | |
116 | state->gprs[NVMM_X64_GPR_R15] = env->regs[R_R15]; | |
117 | #endif | |
118 | ||
119 | /* RIP and RFLAGS. */ | |
120 | state->gprs[NVMM_X64_GPR_RIP] = env->eip; | |
121 | state->gprs[NVMM_X64_GPR_RFLAGS] = env->eflags; | |
122 | ||
123 | /* Segments. */ | |
124 | nvmm_set_segment(&state->segs[NVMM_X64_SEG_CS], &env->segs[R_CS]); | |
125 | nvmm_set_segment(&state->segs[NVMM_X64_SEG_DS], &env->segs[R_DS]); | |
126 | nvmm_set_segment(&state->segs[NVMM_X64_SEG_ES], &env->segs[R_ES]); | |
127 | nvmm_set_segment(&state->segs[NVMM_X64_SEG_FS], &env->segs[R_FS]); | |
128 | nvmm_set_segment(&state->segs[NVMM_X64_SEG_GS], &env->segs[R_GS]); | |
129 | nvmm_set_segment(&state->segs[NVMM_X64_SEG_SS], &env->segs[R_SS]); | |
130 | ||
131 | /* Special segments. */ | |
132 | nvmm_set_segment(&state->segs[NVMM_X64_SEG_GDT], &env->gdt); | |
133 | nvmm_set_segment(&state->segs[NVMM_X64_SEG_LDT], &env->ldt); | |
134 | nvmm_set_segment(&state->segs[NVMM_X64_SEG_TR], &env->tr); | |
135 | nvmm_set_segment(&state->segs[NVMM_X64_SEG_IDT], &env->idt); | |
136 | ||
137 | /* Control registers. */ | |
138 | state->crs[NVMM_X64_CR_CR0] = env->cr[0]; | |
139 | state->crs[NVMM_X64_CR_CR2] = env->cr[2]; | |
140 | state->crs[NVMM_X64_CR_CR3] = env->cr[3]; | |
141 | state->crs[NVMM_X64_CR_CR4] = env->cr[4]; | |
142 | state->crs[NVMM_X64_CR_CR8] = qcpu->tpr; | |
143 | state->crs[NVMM_X64_CR_XCR0] = env->xcr0; | |
144 | ||
145 | /* Debug registers. */ | |
146 | state->drs[NVMM_X64_DR_DR0] = env->dr[0]; | |
147 | state->drs[NVMM_X64_DR_DR1] = env->dr[1]; | |
148 | state->drs[NVMM_X64_DR_DR2] = env->dr[2]; | |
149 | state->drs[NVMM_X64_DR_DR3] = env->dr[3]; | |
150 | state->drs[NVMM_X64_DR_DR6] = env->dr[6]; | |
151 | state->drs[NVMM_X64_DR_DR7] = env->dr[7]; | |
152 | ||
153 | /* FPU. */ | |
154 | state->fpu.fx_cw = env->fpuc; | |
155 | state->fpu.fx_sw = (env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11); | |
156 | state->fpu.fx_tw = 0; | |
157 | for (i = 0; i < 8; i++) { | |
158 | state->fpu.fx_tw |= (!env->fptags[i]) << i; | |
159 | } | |
160 | state->fpu.fx_opcode = env->fpop; | |
161 | state->fpu.fx_ip.fa_64 = env->fpip; | |
162 | state->fpu.fx_dp.fa_64 = env->fpdp; | |
163 | state->fpu.fx_mxcsr = env->mxcsr; | |
164 | state->fpu.fx_mxcsr_mask = 0x0000FFFF; | |
165 | assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs)); | |
166 | memcpy(state->fpu.fx_87_ac, env->fpregs, sizeof(env->fpregs)); | |
167 | for (i = 0; i < CPU_NB_REGS; i++) { | |
168 | memcpy(&state->fpu.fx_xmm[i].xmm_bytes[0], | |
169 | &env->xmm_regs[i].ZMM_Q(0), 8); | |
170 | memcpy(&state->fpu.fx_xmm[i].xmm_bytes[8], | |
171 | &env->xmm_regs[i].ZMM_Q(1), 8); | |
172 | } | |
173 | ||
174 | /* MSRs. */ | |
175 | state->msrs[NVMM_X64_MSR_EFER] = env->efer; | |
176 | state->msrs[NVMM_X64_MSR_STAR] = env->star; | |
177 | #ifdef TARGET_X86_64 | |
178 | state->msrs[NVMM_X64_MSR_LSTAR] = env->lstar; | |
179 | state->msrs[NVMM_X64_MSR_CSTAR] = env->cstar; | |
180 | state->msrs[NVMM_X64_MSR_SFMASK] = env->fmask; | |
181 | state->msrs[NVMM_X64_MSR_KERNELGSBASE] = env->kernelgsbase; | |
182 | #endif | |
183 | state->msrs[NVMM_X64_MSR_SYSENTER_CS] = env->sysenter_cs; | |
184 | state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = env->sysenter_esp; | |
185 | state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = env->sysenter_eip; | |
186 | state->msrs[NVMM_X64_MSR_PAT] = env->pat; | |
187 | state->msrs[NVMM_X64_MSR_TSC] = env->tsc; | |
188 | ||
189 | bitmap = | |
190 | NVMM_X64_STATE_SEGS | | |
191 | NVMM_X64_STATE_GPRS | | |
192 | NVMM_X64_STATE_CRS | | |
193 | NVMM_X64_STATE_DRS | | |
194 | NVMM_X64_STATE_MSRS | | |
195 | NVMM_X64_STATE_FPU; | |
196 | ||
197 | ret = nvmm_vcpu_setstate(mach, vcpu, bitmap); | |
198 | if (ret == -1) { | |
199 | error_report("NVMM: Failed to set virtual processor context," | |
200 | " error=%d", errno); | |
201 | } | |
202 | } | |
203 | ||
204 | static void | |
205 | nvmm_get_segment(SegmentCache *qseg, const struct nvmm_x64_state_seg *nseg) | |
206 | { | |
207 | qseg->selector = nseg->selector; | |
208 | qseg->limit = nseg->limit; | |
209 | qseg->base = nseg->base; | |
210 | ||
211 | qseg->flags = | |
212 | __SHIFTIN((uint32_t)nseg->attrib.type, DESC_TYPE_MASK) | | |
213 | __SHIFTIN((uint32_t)nseg->attrib.s, DESC_S_MASK) | | |
214 | __SHIFTIN((uint32_t)nseg->attrib.dpl, DESC_DPL_MASK) | | |
215 | __SHIFTIN((uint32_t)nseg->attrib.p, DESC_P_MASK) | | |
216 | __SHIFTIN((uint32_t)nseg->attrib.avl, DESC_AVL_MASK) | | |
217 | __SHIFTIN((uint32_t)nseg->attrib.l, DESC_L_MASK) | | |
218 | __SHIFTIN((uint32_t)nseg->attrib.def, DESC_B_MASK) | | |
219 | __SHIFTIN((uint32_t)nseg->attrib.g, DESC_G_MASK); | |
220 | } | |
221 | ||
222 | static void | |
223 | nvmm_get_registers(CPUState *cpu) | |
224 | { | |
95e862d7 | 225 | CPUX86State *env = cpu->env_ptr; |
fdc8635e RZ |
226 | struct nvmm_machine *mach = get_nvmm_mach(); |
227 | struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu); | |
228 | struct nvmm_vcpu *vcpu = &qcpu->vcpu; | |
229 | X86CPU *x86_cpu = X86_CPU(cpu); | |
230 | struct nvmm_x64_state *state = vcpu->state; | |
231 | uint64_t bitmap, tpr; | |
232 | size_t i; | |
233 | int ret; | |
234 | ||
235 | assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); | |
236 | ||
237 | bitmap = | |
238 | NVMM_X64_STATE_SEGS | | |
239 | NVMM_X64_STATE_GPRS | | |
240 | NVMM_X64_STATE_CRS | | |
241 | NVMM_X64_STATE_DRS | | |
242 | NVMM_X64_STATE_MSRS | | |
243 | NVMM_X64_STATE_FPU; | |
244 | ||
245 | ret = nvmm_vcpu_getstate(mach, vcpu, bitmap); | |
246 | if (ret == -1) { | |
247 | error_report("NVMM: Failed to get virtual processor context," | |
248 | " error=%d", errno); | |
249 | } | |
250 | ||
251 | /* GPRs. */ | |
252 | env->regs[R_EAX] = state->gprs[NVMM_X64_GPR_RAX]; | |
253 | env->regs[R_ECX] = state->gprs[NVMM_X64_GPR_RCX]; | |
254 | env->regs[R_EDX] = state->gprs[NVMM_X64_GPR_RDX]; | |
255 | env->regs[R_EBX] = state->gprs[NVMM_X64_GPR_RBX]; | |
256 | env->regs[R_ESP] = state->gprs[NVMM_X64_GPR_RSP]; | |
257 | env->regs[R_EBP] = state->gprs[NVMM_X64_GPR_RBP]; | |
258 | env->regs[R_ESI] = state->gprs[NVMM_X64_GPR_RSI]; | |
259 | env->regs[R_EDI] = state->gprs[NVMM_X64_GPR_RDI]; | |
260 | #ifdef TARGET_X86_64 | |
261 | env->regs[R_R8] = state->gprs[NVMM_X64_GPR_R8]; | |
262 | env->regs[R_R9] = state->gprs[NVMM_X64_GPR_R9]; | |
263 | env->regs[R_R10] = state->gprs[NVMM_X64_GPR_R10]; | |
264 | env->regs[R_R11] = state->gprs[NVMM_X64_GPR_R11]; | |
265 | env->regs[R_R12] = state->gprs[NVMM_X64_GPR_R12]; | |
266 | env->regs[R_R13] = state->gprs[NVMM_X64_GPR_R13]; | |
267 | env->regs[R_R14] = state->gprs[NVMM_X64_GPR_R14]; | |
268 | env->regs[R_R15] = state->gprs[NVMM_X64_GPR_R15]; | |
269 | #endif | |
270 | ||
271 | /* RIP and RFLAGS. */ | |
272 | env->eip = state->gprs[NVMM_X64_GPR_RIP]; | |
273 | env->eflags = state->gprs[NVMM_X64_GPR_RFLAGS]; | |
274 | ||
275 | /* Segments. */ | |
276 | nvmm_get_segment(&env->segs[R_ES], &state->segs[NVMM_X64_SEG_ES]); | |
277 | nvmm_get_segment(&env->segs[R_CS], &state->segs[NVMM_X64_SEG_CS]); | |
278 | nvmm_get_segment(&env->segs[R_SS], &state->segs[NVMM_X64_SEG_SS]); | |
279 | nvmm_get_segment(&env->segs[R_DS], &state->segs[NVMM_X64_SEG_DS]); | |
280 | nvmm_get_segment(&env->segs[R_FS], &state->segs[NVMM_X64_SEG_FS]); | |
281 | nvmm_get_segment(&env->segs[R_GS], &state->segs[NVMM_X64_SEG_GS]); | |
282 | ||
283 | /* Special segments. */ | |
284 | nvmm_get_segment(&env->gdt, &state->segs[NVMM_X64_SEG_GDT]); | |
285 | nvmm_get_segment(&env->ldt, &state->segs[NVMM_X64_SEG_LDT]); | |
286 | nvmm_get_segment(&env->tr, &state->segs[NVMM_X64_SEG_TR]); | |
287 | nvmm_get_segment(&env->idt, &state->segs[NVMM_X64_SEG_IDT]); | |
288 | ||
289 | /* Control registers. */ | |
290 | env->cr[0] = state->crs[NVMM_X64_CR_CR0]; | |
291 | env->cr[2] = state->crs[NVMM_X64_CR_CR2]; | |
292 | env->cr[3] = state->crs[NVMM_X64_CR_CR3]; | |
293 | env->cr[4] = state->crs[NVMM_X64_CR_CR4]; | |
294 | tpr = state->crs[NVMM_X64_CR_CR8]; | |
295 | if (tpr != qcpu->tpr) { | |
296 | qcpu->tpr = tpr; | |
297 | cpu_set_apic_tpr(x86_cpu->apic_state, tpr); | |
298 | } | |
299 | env->xcr0 = state->crs[NVMM_X64_CR_XCR0]; | |
300 | ||
301 | /* Debug registers. */ | |
302 | env->dr[0] = state->drs[NVMM_X64_DR_DR0]; | |
303 | env->dr[1] = state->drs[NVMM_X64_DR_DR1]; | |
304 | env->dr[2] = state->drs[NVMM_X64_DR_DR2]; | |
305 | env->dr[3] = state->drs[NVMM_X64_DR_DR3]; | |
306 | env->dr[6] = state->drs[NVMM_X64_DR_DR6]; | |
307 | env->dr[7] = state->drs[NVMM_X64_DR_DR7]; | |
308 | ||
309 | /* FPU. */ | |
310 | env->fpuc = state->fpu.fx_cw; | |
311 | env->fpstt = (state->fpu.fx_sw >> 11) & 0x7; | |
312 | env->fpus = state->fpu.fx_sw & ~0x3800; | |
313 | for (i = 0; i < 8; i++) { | |
314 | env->fptags[i] = !((state->fpu.fx_tw >> i) & 1); | |
315 | } | |
316 | env->fpop = state->fpu.fx_opcode; | |
317 | env->fpip = state->fpu.fx_ip.fa_64; | |
318 | env->fpdp = state->fpu.fx_dp.fa_64; | |
319 | env->mxcsr = state->fpu.fx_mxcsr; | |
320 | assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs)); | |
321 | memcpy(env->fpregs, state->fpu.fx_87_ac, sizeof(env->fpregs)); | |
322 | for (i = 0; i < CPU_NB_REGS; i++) { | |
323 | memcpy(&env->xmm_regs[i].ZMM_Q(0), | |
324 | &state->fpu.fx_xmm[i].xmm_bytes[0], 8); | |
325 | memcpy(&env->xmm_regs[i].ZMM_Q(1), | |
326 | &state->fpu.fx_xmm[i].xmm_bytes[8], 8); | |
327 | } | |
328 | ||
329 | /* MSRs. */ | |
330 | env->efer = state->msrs[NVMM_X64_MSR_EFER]; | |
331 | env->star = state->msrs[NVMM_X64_MSR_STAR]; | |
332 | #ifdef TARGET_X86_64 | |
333 | env->lstar = state->msrs[NVMM_X64_MSR_LSTAR]; | |
334 | env->cstar = state->msrs[NVMM_X64_MSR_CSTAR]; | |
335 | env->fmask = state->msrs[NVMM_X64_MSR_SFMASK]; | |
336 | env->kernelgsbase = state->msrs[NVMM_X64_MSR_KERNELGSBASE]; | |
337 | #endif | |
338 | env->sysenter_cs = state->msrs[NVMM_X64_MSR_SYSENTER_CS]; | |
339 | env->sysenter_esp = state->msrs[NVMM_X64_MSR_SYSENTER_ESP]; | |
340 | env->sysenter_eip = state->msrs[NVMM_X64_MSR_SYSENTER_EIP]; | |
341 | env->pat = state->msrs[NVMM_X64_MSR_PAT]; | |
342 | env->tsc = state->msrs[NVMM_X64_MSR_TSC]; | |
343 | ||
344 | x86_update_hflags(env); | |
345 | } | |
346 | ||
347 | static bool | |
348 | nvmm_can_take_int(CPUState *cpu) | |
349 | { | |
95e862d7 | 350 | CPUX86State *env = cpu->env_ptr; |
fdc8635e RZ |
351 | struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu); |
352 | struct nvmm_vcpu *vcpu = &qcpu->vcpu; | |
353 | struct nvmm_machine *mach = get_nvmm_mach(); | |
354 | ||
355 | if (qcpu->int_window_exit) { | |
356 | return false; | |
357 | } | |
358 | ||
359 | if (qcpu->int_shadow || !(env->eflags & IF_MASK)) { | |
360 | struct nvmm_x64_state *state = vcpu->state; | |
361 | ||
362 | /* Exit on interrupt window. */ | |
363 | nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_INTR); | |
364 | state->intr.int_window_exiting = 1; | |
365 | nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_INTR); | |
366 | ||
367 | return false; | |
368 | } | |
369 | ||
370 | return true; | |
371 | } | |
372 | ||
373 | static bool | |
374 | nvmm_can_take_nmi(CPUState *cpu) | |
375 | { | |
376 | struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu); | |
377 | ||
378 | /* | |
379 | * Contrary to INTs, NMIs always schedule an exit when they are | |
380 | * completed. Therefore, if window-exiting is enabled, it means | |
381 | * NMIs are blocked. | |
382 | */ | |
383 | if (qcpu->nmi_window_exit) { | |
384 | return false; | |
385 | } | |
386 | ||
387 | return true; | |
388 | } | |
389 | ||
390 | /* | |
391 | * Called before the VCPU is run. We inject events generated by the I/O | |
392 | * thread, and synchronize the guest TPR. | |
393 | */ | |
394 | static void | |
395 | nvmm_vcpu_pre_run(CPUState *cpu) | |
396 | { | |
95e862d7 | 397 | CPUX86State *env = cpu->env_ptr; |
fdc8635e RZ |
398 | struct nvmm_machine *mach = get_nvmm_mach(); |
399 | struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu); | |
400 | struct nvmm_vcpu *vcpu = &qcpu->vcpu; | |
401 | X86CPU *x86_cpu = X86_CPU(cpu); | |
402 | struct nvmm_x64_state *state = vcpu->state; | |
403 | struct nvmm_vcpu_event *event = vcpu->event; | |
404 | bool has_event = false; | |
405 | bool sync_tpr = false; | |
406 | uint8_t tpr; | |
407 | int ret; | |
408 | ||
409 | qemu_mutex_lock_iothread(); | |
410 | ||
411 | tpr = cpu_get_apic_tpr(x86_cpu->apic_state); | |
412 | if (tpr != qcpu->tpr) { | |
413 | qcpu->tpr = tpr; | |
414 | sync_tpr = true; | |
415 | } | |
416 | ||
417 | /* | |
418 | * Force the VCPU out of its inner loop to process any INIT requests | |
419 | * or commit pending TPR access. | |
420 | */ | |
421 | if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { | |
422 | cpu->exit_request = 1; | |
423 | } | |
424 | ||
425 | if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_NMI)) { | |
426 | if (nvmm_can_take_nmi(cpu)) { | |
427 | cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; | |
428 | event->type = NVMM_VCPU_EVENT_INTR; | |
429 | event->vector = 2; | |
430 | has_event = true; | |
431 | } | |
432 | } | |
433 | ||
434 | if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_HARD)) { | |
435 | if (nvmm_can_take_int(cpu)) { | |
436 | cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; | |
437 | event->type = NVMM_VCPU_EVENT_INTR; | |
438 | event->vector = cpu_get_pic_interrupt(env); | |
439 | has_event = true; | |
440 | } | |
441 | } | |
442 | ||
443 | /* Don't want SMIs. */ | |
444 | if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { | |
445 | cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; | |
446 | } | |
447 | ||
448 | if (sync_tpr) { | |
449 | ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_CRS); | |
450 | if (ret == -1) { | |
451 | error_report("NVMM: Failed to get CPU state," | |
452 | " error=%d", errno); | |
453 | } | |
454 | ||
455 | state->crs[NVMM_X64_CR_CR8] = qcpu->tpr; | |
456 | ||
457 | ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_CRS); | |
458 | if (ret == -1) { | |
459 | error_report("NVMM: Failed to set CPU state," | |
460 | " error=%d", errno); | |
461 | } | |
462 | } | |
463 | ||
464 | if (has_event) { | |
465 | ret = nvmm_vcpu_inject(mach, vcpu); | |
466 | if (ret == -1) { | |
467 | error_report("NVMM: Failed to inject event," | |
468 | " error=%d", errno); | |
469 | } | |
470 | } | |
471 | ||
472 | qemu_mutex_unlock_iothread(); | |
473 | } | |
474 | ||
475 | /* | |
476 | * Called after the VCPU ran. We synchronize the host view of the TPR and | |
477 | * RFLAGS. | |
478 | */ | |
479 | static void | |
480 | nvmm_vcpu_post_run(CPUState *cpu, struct nvmm_vcpu_exit *exit) | |
481 | { | |
482 | struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu); | |
95e862d7 | 483 | CPUX86State *env = cpu->env_ptr; |
fdc8635e RZ |
484 | X86CPU *x86_cpu = X86_CPU(cpu); |
485 | uint64_t tpr; | |
486 | ||
487 | env->eflags = exit->exitstate.rflags; | |
488 | qcpu->int_shadow = exit->exitstate.int_shadow; | |
489 | qcpu->int_window_exit = exit->exitstate.int_window_exiting; | |
490 | qcpu->nmi_window_exit = exit->exitstate.nmi_window_exiting; | |
491 | ||
492 | tpr = exit->exitstate.cr8; | |
493 | if (qcpu->tpr != tpr) { | |
494 | qcpu->tpr = tpr; | |
495 | qemu_mutex_lock_iothread(); | |
496 | cpu_set_apic_tpr(x86_cpu->apic_state, qcpu->tpr); | |
497 | qemu_mutex_unlock_iothread(); | |
498 | } | |
499 | } | |
500 | ||
501 | /* -------------------------------------------------------------------------- */ | |
502 | ||
503 | static void | |
504 | nvmm_io_callback(struct nvmm_io *io) | |
505 | { | |
506 | MemTxAttrs attrs = { 0 }; | |
507 | int ret; | |
508 | ||
509 | ret = address_space_rw(&address_space_io, io->port, attrs, io->data, | |
510 | io->size, !io->in); | |
511 | if (ret != MEMTX_OK) { | |
512 | error_report("NVMM: I/O Transaction Failed " | |
513 | "[%s, port=%u, size=%zu]", (io->in ? "in" : "out"), | |
514 | io->port, io->size); | |
515 | } | |
516 | ||
517 | /* Needed, otherwise infinite loop. */ | |
518 | current_cpu->vcpu_dirty = false; | |
519 | } | |
520 | ||
521 | static void | |
522 | nvmm_mem_callback(struct nvmm_mem *mem) | |
523 | { | |
524 | cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write); | |
525 | ||
526 | /* Needed, otherwise infinite loop. */ | |
527 | current_cpu->vcpu_dirty = false; | |
528 | } | |
529 | ||
530 | static struct nvmm_assist_callbacks nvmm_callbacks = { | |
531 | .io = nvmm_io_callback, | |
532 | .mem = nvmm_mem_callback | |
533 | }; | |
534 | ||
535 | /* -------------------------------------------------------------------------- */ | |
536 | ||
537 | static int | |
538 | nvmm_handle_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu) | |
539 | { | |
540 | int ret; | |
541 | ||
542 | ret = nvmm_assist_mem(mach, vcpu); | |
543 | if (ret == -1) { | |
544 | error_report("NVMM: Mem Assist Failed [gpa=%p]", | |
545 | (void *)vcpu->exit->u.mem.gpa); | |
546 | } | |
547 | ||
548 | return ret; | |
549 | } | |
550 | ||
551 | static int | |
552 | nvmm_handle_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu) | |
553 | { | |
554 | int ret; | |
555 | ||
556 | ret = nvmm_assist_io(mach, vcpu); | |
557 | if (ret == -1) { | |
558 | error_report("NVMM: I/O Assist Failed [port=%d]", | |
559 | (int)vcpu->exit->u.io.port); | |
560 | } | |
561 | ||
562 | return ret; | |
563 | } | |
564 | ||
565 | static int | |
566 | nvmm_handle_rdmsr(struct nvmm_machine *mach, CPUState *cpu, | |
567 | struct nvmm_vcpu_exit *exit) | |
568 | { | |
569 | struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu); | |
570 | struct nvmm_vcpu *vcpu = &qcpu->vcpu; | |
571 | X86CPU *x86_cpu = X86_CPU(cpu); | |
572 | struct nvmm_x64_state *state = vcpu->state; | |
573 | uint64_t val; | |
574 | int ret; | |
575 | ||
576 | switch (exit->u.rdmsr.msr) { | |
577 | case MSR_IA32_APICBASE: | |
578 | val = cpu_get_apic_base(x86_cpu->apic_state); | |
579 | break; | |
580 | case MSR_MTRRcap: | |
581 | case MSR_MTRRdefType: | |
582 | case MSR_MCG_CAP: | |
583 | case MSR_MCG_STATUS: | |
584 | val = 0; | |
585 | break; | |
586 | default: /* More MSRs to add? */ | |
587 | val = 0; | |
588 | error_report("NVMM: Unexpected RDMSR 0x%x, ignored", | |
589 | exit->u.rdmsr.msr); | |
590 | break; | |
591 | } | |
592 | ||
593 | ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS); | |
594 | if (ret == -1) { | |
595 | return -1; | |
596 | } | |
597 | ||
598 | state->gprs[NVMM_X64_GPR_RAX] = (val & 0xFFFFFFFF); | |
599 | state->gprs[NVMM_X64_GPR_RDX] = (val >> 32); | |
600 | state->gprs[NVMM_X64_GPR_RIP] = exit->u.rdmsr.npc; | |
601 | ||
602 | ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS); | |
603 | if (ret == -1) { | |
604 | return -1; | |
605 | } | |
606 | ||
607 | return 0; | |
608 | } | |
609 | ||
610 | static int | |
611 | nvmm_handle_wrmsr(struct nvmm_machine *mach, CPUState *cpu, | |
612 | struct nvmm_vcpu_exit *exit) | |
613 | { | |
614 | struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu); | |
615 | struct nvmm_vcpu *vcpu = &qcpu->vcpu; | |
616 | X86CPU *x86_cpu = X86_CPU(cpu); | |
617 | struct nvmm_x64_state *state = vcpu->state; | |
618 | uint64_t val; | |
619 | int ret; | |
620 | ||
621 | val = exit->u.wrmsr.val; | |
622 | ||
623 | switch (exit->u.wrmsr.msr) { | |
624 | case MSR_IA32_APICBASE: | |
625 | cpu_set_apic_base(x86_cpu->apic_state, val); | |
626 | break; | |
627 | case MSR_MTRRdefType: | |
628 | case MSR_MCG_STATUS: | |
629 | break; | |
630 | default: /* More MSRs to add? */ | |
631 | error_report("NVMM: Unexpected WRMSR 0x%x [val=0x%lx], ignored", | |
632 | exit->u.wrmsr.msr, val); | |
633 | break; | |
634 | } | |
635 | ||
636 | ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS); | |
637 | if (ret == -1) { | |
638 | return -1; | |
639 | } | |
640 | ||
641 | state->gprs[NVMM_X64_GPR_RIP] = exit->u.wrmsr.npc; | |
642 | ||
643 | ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS); | |
644 | if (ret == -1) { | |
645 | return -1; | |
646 | } | |
647 | ||
648 | return 0; | |
649 | } | |
650 | ||
651 | static int | |
652 | nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu, | |
653 | struct nvmm_vcpu_exit *exit) | |
654 | { | |
95e862d7 | 655 | CPUX86State *env = cpu->env_ptr; |
fdc8635e RZ |
656 | int ret = 0; |
657 | ||
658 | qemu_mutex_lock_iothread(); | |
659 | ||
660 | if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && | |
661 | (env->eflags & IF_MASK)) && | |
662 | !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) { | |
663 | cpu->exception_index = EXCP_HLT; | |
664 | cpu->halted = true; | |
665 | ret = 1; | |
666 | } | |
667 | ||
668 | qemu_mutex_unlock_iothread(); | |
669 | ||
670 | return ret; | |
671 | } | |
672 | ||
673 | static int | |
674 | nvmm_inject_ud(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu) | |
675 | { | |
676 | struct nvmm_vcpu_event *event = vcpu->event; | |
677 | ||
678 | event->type = NVMM_VCPU_EVENT_EXCP; | |
679 | event->vector = 6; | |
680 | event->u.excp.error = 0; | |
681 | ||
682 | return nvmm_vcpu_inject(mach, vcpu); | |
683 | } | |
684 | ||
685 | static int | |
686 | nvmm_vcpu_loop(CPUState *cpu) | |
687 | { | |
95e862d7 | 688 | CPUX86State *env = cpu->env_ptr; |
fdc8635e RZ |
689 | struct nvmm_machine *mach = get_nvmm_mach(); |
690 | struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu); | |
691 | struct nvmm_vcpu *vcpu = &qcpu->vcpu; | |
692 | X86CPU *x86_cpu = X86_CPU(cpu); | |
693 | struct nvmm_vcpu_exit *exit = vcpu->exit; | |
694 | int ret; | |
695 | ||
696 | /* | |
697 | * Some asynchronous events must be handled outside of the inner | |
698 | * VCPU loop. They are handled here. | |
699 | */ | |
700 | if (cpu->interrupt_request & CPU_INTERRUPT_INIT) { | |
701 | nvmm_cpu_synchronize_state(cpu); | |
702 | do_cpu_init(x86_cpu); | |
703 | /* set int/nmi windows back to the reset state */ | |
704 | } | |
705 | if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { | |
706 | cpu->interrupt_request &= ~CPU_INTERRUPT_POLL; | |
707 | apic_poll_irq(x86_cpu->apic_state); | |
708 | } | |
709 | if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) && | |
710 | (env->eflags & IF_MASK)) || | |
711 | (cpu->interrupt_request & CPU_INTERRUPT_NMI)) { | |
712 | cpu->halted = false; | |
713 | } | |
714 | if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) { | |
715 | nvmm_cpu_synchronize_state(cpu); | |
716 | do_cpu_sipi(x86_cpu); | |
717 | } | |
718 | if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { | |
719 | cpu->interrupt_request &= ~CPU_INTERRUPT_TPR; | |
720 | nvmm_cpu_synchronize_state(cpu); | |
721 | apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip, | |
722 | env->tpr_access_type); | |
723 | } | |
724 | ||
725 | if (cpu->halted) { | |
726 | cpu->exception_index = EXCP_HLT; | |
727 | qatomic_set(&cpu->exit_request, false); | |
728 | return 0; | |
729 | } | |
730 | ||
731 | qemu_mutex_unlock_iothread(); | |
732 | cpu_exec_start(cpu); | |
733 | ||
734 | /* | |
735 | * Inner VCPU loop. | |
736 | */ | |
737 | do { | |
738 | if (cpu->vcpu_dirty) { | |
739 | nvmm_set_registers(cpu); | |
740 | cpu->vcpu_dirty = false; | |
741 | } | |
742 | ||
743 | if (qcpu->stop) { | |
744 | cpu->exception_index = EXCP_INTERRUPT; | |
745 | qcpu->stop = false; | |
746 | ret = 1; | |
747 | break; | |
748 | } | |
749 | ||
750 | nvmm_vcpu_pre_run(cpu); | |
751 | ||
752 | if (qatomic_read(&cpu->exit_request)) { | |
0cc49650 | 753 | #if NVMM_USER_VERSION >= 2 |
fdc8635e | 754 | nvmm_vcpu_stop(vcpu); |
0cc49650 | 755 | #else |
756 | qemu_cpu_kick_self(); | |
757 | #endif | |
fdc8635e RZ |
758 | } |
759 | ||
760 | /* Read exit_request before the kernel reads the immediate exit flag */ | |
761 | smp_rmb(); | |
762 | ret = nvmm_vcpu_run(mach, vcpu); | |
763 | if (ret == -1) { | |
764 | error_report("NVMM: Failed to exec a virtual processor," | |
765 | " error=%d", errno); | |
766 | break; | |
767 | } | |
768 | ||
769 | nvmm_vcpu_post_run(cpu, exit); | |
770 | ||
771 | switch (exit->reason) { | |
772 | case NVMM_VCPU_EXIT_NONE: | |
773 | break; | |
0cc49650 | 774 | #if NVMM_USER_VERSION >= 2 |
fdc8635e RZ |
775 | case NVMM_VCPU_EXIT_STOPPED: |
776 | /* | |
777 | * The kernel cleared the immediate exit flag; cpu->exit_request | |
778 | * must be cleared after | |
779 | */ | |
780 | smp_wmb(); | |
781 | qcpu->stop = true; | |
782 | break; | |
0cc49650 | 783 | #endif |
fdc8635e RZ |
784 | case NVMM_VCPU_EXIT_MEMORY: |
785 | ret = nvmm_handle_mem(mach, vcpu); | |
786 | break; | |
787 | case NVMM_VCPU_EXIT_IO: | |
788 | ret = nvmm_handle_io(mach, vcpu); | |
789 | break; | |
790 | case NVMM_VCPU_EXIT_INT_READY: | |
791 | case NVMM_VCPU_EXIT_NMI_READY: | |
792 | case NVMM_VCPU_EXIT_TPR_CHANGED: | |
793 | break; | |
794 | case NVMM_VCPU_EXIT_HALTED: | |
795 | ret = nvmm_handle_halted(mach, cpu, exit); | |
796 | break; | |
797 | case NVMM_VCPU_EXIT_SHUTDOWN: | |
798 | qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); | |
799 | cpu->exception_index = EXCP_INTERRUPT; | |
800 | ret = 1; | |
801 | break; | |
802 | case NVMM_VCPU_EXIT_RDMSR: | |
803 | ret = nvmm_handle_rdmsr(mach, cpu, exit); | |
804 | break; | |
805 | case NVMM_VCPU_EXIT_WRMSR: | |
806 | ret = nvmm_handle_wrmsr(mach, cpu, exit); | |
807 | break; | |
808 | case NVMM_VCPU_EXIT_MONITOR: | |
809 | case NVMM_VCPU_EXIT_MWAIT: | |
810 | ret = nvmm_inject_ud(mach, vcpu); | |
811 | break; | |
812 | default: | |
813 | error_report("NVMM: Unexpected VM exit code 0x%lx [hw=0x%lx]", | |
814 | exit->reason, exit->u.inv.hwcode); | |
815 | nvmm_get_registers(cpu); | |
816 | qemu_mutex_lock_iothread(); | |
817 | qemu_system_guest_panicked(cpu_get_crash_info(cpu)); | |
818 | qemu_mutex_unlock_iothread(); | |
819 | ret = -1; | |
820 | break; | |
821 | } | |
822 | } while (ret == 0); | |
823 | ||
824 | cpu_exec_end(cpu); | |
825 | qemu_mutex_lock_iothread(); | |
826 | ||
827 | qatomic_set(&cpu->exit_request, false); | |
828 | ||
829 | return ret < 0; | |
830 | } | |
831 | ||
832 | /* -------------------------------------------------------------------------- */ | |
833 | ||
834 | static void | |
835 | do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) | |
836 | { | |
837 | nvmm_get_registers(cpu); | |
838 | cpu->vcpu_dirty = true; | |
839 | } | |
840 | ||
841 | static void | |
842 | do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) | |
843 | { | |
844 | nvmm_set_registers(cpu); | |
845 | cpu->vcpu_dirty = false; | |
846 | } | |
847 | ||
848 | static void | |
849 | do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) | |
850 | { | |
851 | nvmm_set_registers(cpu); | |
852 | cpu->vcpu_dirty = false; | |
853 | } | |
854 | ||
855 | static void | |
856 | do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) | |
857 | { | |
858 | cpu->vcpu_dirty = true; | |
859 | } | |
860 | ||
861 | void nvmm_cpu_synchronize_state(CPUState *cpu) | |
862 | { | |
863 | if (!cpu->vcpu_dirty) { | |
864 | run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL); | |
865 | } | |
866 | } | |
867 | ||
868 | void nvmm_cpu_synchronize_post_reset(CPUState *cpu) | |
869 | { | |
870 | run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); | |
871 | } | |
872 | ||
873 | void nvmm_cpu_synchronize_post_init(CPUState *cpu) | |
874 | { | |
875 | run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); | |
876 | } | |
877 | ||
878 | void nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu) | |
879 | { | |
880 | run_on_cpu(cpu, do_nvmm_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); | |
881 | } | |
882 | ||
883 | /* -------------------------------------------------------------------------- */ | |
884 | ||
885 | static Error *nvmm_migration_blocker; | |
886 | ||
887 | /* | |
888 | * The nvmm_vcpu_stop() mechanism breaks races between entering the VMM | |
889 | * and another thread signaling the vCPU thread to exit. | |
890 | */ | |
891 | ||
892 | static void | |
893 | nvmm_ipi_signal(int sigcpu) | |
894 | { | |
895 | if (current_cpu) { | |
896 | struct qemu_vcpu *qcpu = get_qemu_vcpu(current_cpu); | |
0cc49650 | 897 | #if NVMM_USER_VERSION >= 2 |
fdc8635e RZ |
898 | struct nvmm_vcpu *vcpu = &qcpu->vcpu; |
899 | nvmm_vcpu_stop(vcpu); | |
0cc49650 | 900 | #else |
901 | qcpu->stop = true; | |
902 | #endif | |
fdc8635e RZ |
903 | } |
904 | } | |
905 | ||
906 | static void | |
907 | nvmm_init_cpu_signals(void) | |
908 | { | |
909 | struct sigaction sigact; | |
910 | sigset_t set; | |
911 | ||
912 | /* Install the IPI handler. */ | |
913 | memset(&sigact, 0, sizeof(sigact)); | |
914 | sigact.sa_handler = nvmm_ipi_signal; | |
915 | sigaction(SIG_IPI, &sigact, NULL); | |
916 | ||
917 | /* Allow IPIs on the current thread. */ | |
918 | sigprocmask(SIG_BLOCK, NULL, &set); | |
919 | sigdelset(&set, SIG_IPI); | |
920 | pthread_sigmask(SIG_SETMASK, &set, NULL); | |
921 | } | |
922 | ||
923 | int | |
924 | nvmm_init_vcpu(CPUState *cpu) | |
925 | { | |
926 | struct nvmm_machine *mach = get_nvmm_mach(); | |
927 | struct nvmm_vcpu_conf_cpuid cpuid; | |
928 | struct nvmm_vcpu_conf_tpr tpr; | |
929 | Error *local_error = NULL; | |
930 | struct qemu_vcpu *qcpu; | |
931 | int ret, err; | |
932 | ||
933 | nvmm_init_cpu_signals(); | |
934 | ||
935 | if (nvmm_migration_blocker == NULL) { | |
936 | error_setg(&nvmm_migration_blocker, | |
937 | "NVMM: Migration not supported"); | |
938 | ||
436c831a | 939 | if (migrate_add_blocker(nvmm_migration_blocker, &local_error) < 0) { |
fdc8635e | 940 | error_report_err(local_error); |
fdc8635e RZ |
941 | error_free(nvmm_migration_blocker); |
942 | return -EINVAL; | |
943 | } | |
944 | } | |
945 | ||
946 | qcpu = g_malloc0(sizeof(*qcpu)); | |
947 | if (qcpu == NULL) { | |
948 | error_report("NVMM: Failed to allocate VCPU context."); | |
949 | return -ENOMEM; | |
950 | } | |
951 | ||
952 | ret = nvmm_vcpu_create(mach, cpu->cpu_index, &qcpu->vcpu); | |
953 | if (ret == -1) { | |
954 | err = errno; | |
955 | error_report("NVMM: Failed to create a virtual processor," | |
956 | " error=%d", err); | |
957 | g_free(qcpu); | |
958 | return -err; | |
959 | } | |
960 | ||
961 | memset(&cpuid, 0, sizeof(cpuid)); | |
962 | cpuid.mask = 1; | |
963 | cpuid.leaf = 0x00000001; | |
964 | cpuid.u.mask.set.edx = CPUID_MCE | CPUID_MCA | CPUID_MTRR; | |
965 | ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CPUID, | |
966 | &cpuid); | |
967 | if (ret == -1) { | |
968 | err = errno; | |
969 | error_report("NVMM: Failed to configure a virtual processor," | |
970 | " error=%d", err); | |
971 | g_free(qcpu); | |
972 | return -err; | |
973 | } | |
974 | ||
975 | ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CALLBACKS, | |
976 | &nvmm_callbacks); | |
977 | if (ret == -1) { | |
978 | err = errno; | |
979 | error_report("NVMM: Failed to configure a virtual processor," | |
980 | " error=%d", err); | |
981 | g_free(qcpu); | |
982 | return -err; | |
983 | } | |
984 | ||
985 | if (qemu_mach.cap.arch.vcpu_conf_support & NVMM_CAP_ARCH_VCPU_CONF_TPR) { | |
986 | memset(&tpr, 0, sizeof(tpr)); | |
987 | tpr.exit_changed = 1; | |
988 | ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_TPR, &tpr); | |
989 | if (ret == -1) { | |
990 | err = errno; | |
991 | error_report("NVMM: Failed to configure a virtual processor," | |
992 | " error=%d", err); | |
993 | g_free(qcpu); | |
994 | return -err; | |
995 | } | |
996 | } | |
997 | ||
998 | cpu->vcpu_dirty = true; | |
999 | cpu->hax_vcpu = (struct hax_vcpu_state *)qcpu; | |
1000 | ||
1001 | return 0; | |
1002 | } | |
1003 | ||
1004 | int | |
1005 | nvmm_vcpu_exec(CPUState *cpu) | |
1006 | { | |
1007 | int ret, fatal; | |
1008 | ||
1009 | while (1) { | |
1010 | if (cpu->exception_index >= EXCP_INTERRUPT) { | |
1011 | ret = cpu->exception_index; | |
1012 | cpu->exception_index = -1; | |
1013 | break; | |
1014 | } | |
1015 | ||
1016 | fatal = nvmm_vcpu_loop(cpu); | |
1017 | ||
1018 | if (fatal) { | |
1019 | error_report("NVMM: Failed to execute a VCPU."); | |
1020 | abort(); | |
1021 | } | |
1022 | } | |
1023 | ||
1024 | return ret; | |
1025 | } | |
1026 | ||
1027 | void | |
1028 | nvmm_destroy_vcpu(CPUState *cpu) | |
1029 | { | |
1030 | struct nvmm_machine *mach = get_nvmm_mach(); | |
1031 | struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu); | |
1032 | ||
1033 | nvmm_vcpu_destroy(mach, &qcpu->vcpu); | |
1034 | g_free(cpu->hax_vcpu); | |
1035 | } | |
1036 | ||
1037 | /* -------------------------------------------------------------------------- */ | |
1038 | ||
1039 | static void | |
1040 | nvmm_update_mapping(hwaddr start_pa, ram_addr_t size, uintptr_t hva, | |
1041 | bool add, bool rom, const char *name) | |
1042 | { | |
1043 | struct nvmm_machine *mach = get_nvmm_mach(); | |
1044 | int ret, prot; | |
1045 | ||
1046 | if (add) { | |
1047 | prot = PROT_READ | PROT_EXEC; | |
1048 | if (!rom) { | |
1049 | prot |= PROT_WRITE; | |
1050 | } | |
1051 | ret = nvmm_gpa_map(mach, hva, start_pa, size, prot); | |
1052 | } else { | |
1053 | ret = nvmm_gpa_unmap(mach, hva, start_pa, size); | |
1054 | } | |
1055 | ||
1056 | if (ret == -1) { | |
1057 | error_report("NVMM: Failed to %s GPA range '%s' PA:%p, " | |
1058 | "Size:%p bytes, HostVA:%p, error=%d", | |
1059 | (add ? "map" : "unmap"), name, (void *)(uintptr_t)start_pa, | |
1060 | (void *)size, (void *)hva, errno); | |
1061 | } | |
1062 | } | |
1063 | ||
1064 | static void | |
1065 | nvmm_process_section(MemoryRegionSection *section, int add) | |
1066 | { | |
1067 | MemoryRegion *mr = section->mr; | |
1068 | hwaddr start_pa = section->offset_within_address_space; | |
1069 | ram_addr_t size = int128_get64(section->size); | |
1070 | unsigned int delta; | |
1071 | uintptr_t hva; | |
1072 | ||
1073 | if (!memory_region_is_ram(mr)) { | |
1074 | return; | |
1075 | } | |
1076 | ||
1077 | /* Adjust start_pa and size so that they are page-aligned. */ | |
8e3b0cbb MAL |
1078 | delta = qemu_real_host_page_size() - (start_pa & ~qemu_real_host_page_mask()); |
1079 | delta &= ~qemu_real_host_page_mask(); | |
fdc8635e RZ |
1080 | if (delta > size) { |
1081 | return; | |
1082 | } | |
1083 | start_pa += delta; | |
1084 | size -= delta; | |
8e3b0cbb MAL |
1085 | size &= qemu_real_host_page_mask(); |
1086 | if (!size || (start_pa & ~qemu_real_host_page_mask())) { | |
fdc8635e RZ |
1087 | return; |
1088 | } | |
1089 | ||
1090 | hva = (uintptr_t)memory_region_get_ram_ptr(mr) + | |
1091 | section->offset_within_region + delta; | |
1092 | ||
1093 | nvmm_update_mapping(start_pa, size, hva, add, | |
1094 | memory_region_is_rom(mr), mr->name); | |
1095 | } | |
1096 | ||
1097 | static void | |
1098 | nvmm_region_add(MemoryListener *listener, MemoryRegionSection *section) | |
1099 | { | |
1100 | memory_region_ref(section->mr); | |
1101 | nvmm_process_section(section, 1); | |
1102 | } | |
1103 | ||
1104 | static void | |
1105 | nvmm_region_del(MemoryListener *listener, MemoryRegionSection *section) | |
1106 | { | |
1107 | nvmm_process_section(section, 0); | |
1108 | memory_region_unref(section->mr); | |
1109 | } | |
1110 | ||
1111 | static void | |
1112 | nvmm_transaction_begin(MemoryListener *listener) | |
1113 | { | |
1114 | /* nothing */ | |
1115 | } | |
1116 | ||
1117 | static void | |
1118 | nvmm_transaction_commit(MemoryListener *listener) | |
1119 | { | |
1120 | /* nothing */ | |
1121 | } | |
1122 | ||
1123 | static void | |
1124 | nvmm_log_sync(MemoryListener *listener, MemoryRegionSection *section) | |
1125 | { | |
1126 | MemoryRegion *mr = section->mr; | |
1127 | ||
1128 | if (!memory_region_is_ram(mr)) { | |
1129 | return; | |
1130 | } | |
1131 | ||
1132 | memory_region_set_dirty(mr, 0, int128_get64(section->size)); | |
1133 | } | |
1134 | ||
1135 | static MemoryListener nvmm_memory_listener = { | |
142518bd | 1136 | .name = "nvmm", |
fdc8635e RZ |
1137 | .begin = nvmm_transaction_begin, |
1138 | .commit = nvmm_transaction_commit, | |
1139 | .region_add = nvmm_region_add, | |
1140 | .region_del = nvmm_region_del, | |
1141 | .log_sync = nvmm_log_sync, | |
1142 | .priority = 10, | |
1143 | }; | |
1144 | ||
1145 | static void | |
8d4cd3dd RZ |
1146 | nvmm_ram_block_added(RAMBlockNotifier *n, void *host, size_t size, |
1147 | size_t max_size) | |
fdc8635e RZ |
1148 | { |
1149 | struct nvmm_machine *mach = get_nvmm_mach(); | |
1150 | uintptr_t hva = (uintptr_t)host; | |
1151 | int ret; | |
1152 | ||
8d4cd3dd | 1153 | ret = nvmm_hva_map(mach, hva, max_size); |
fdc8635e RZ |
1154 | |
1155 | if (ret == -1) { | |
1156 | error_report("NVMM: Failed to map HVA, HostVA:%p " | |
1157 | "Size:%p bytes, error=%d", | |
1158 | (void *)hva, (void *)size, errno); | |
1159 | } | |
1160 | } | |
1161 | ||
1162 | static struct RAMBlockNotifier nvmm_ram_notifier = { | |
1163 | .ram_block_added = nvmm_ram_block_added | |
1164 | }; | |
1165 | ||
1166 | /* -------------------------------------------------------------------------- */ | |
1167 | ||
1168 | static int | |
1169 | nvmm_accel_init(MachineState *ms) | |
1170 | { | |
1171 | int ret, err; | |
1172 | ||
1173 | ret = nvmm_init(); | |
1174 | if (ret == -1) { | |
1175 | err = errno; | |
1176 | error_report("NVMM: Initialization failed, error=%d", errno); | |
1177 | return -err; | |
1178 | } | |
1179 | ||
1180 | ret = nvmm_capability(&qemu_mach.cap); | |
1181 | if (ret == -1) { | |
1182 | err = errno; | |
1183 | error_report("NVMM: Unable to fetch capability, error=%d", errno); | |
1184 | return -err; | |
1185 | } | |
1186 | if (qemu_mach.cap.version < NVMM_KERN_VERSION) { | |
1187 | error_report("NVMM: Unsupported version %u", qemu_mach.cap.version); | |
1188 | return -EPROGMISMATCH; | |
1189 | } | |
1190 | if (qemu_mach.cap.state_size != sizeof(struct nvmm_x64_state)) { | |
1191 | error_report("NVMM: Wrong state size %u", qemu_mach.cap.state_size); | |
1192 | return -EPROGMISMATCH; | |
1193 | } | |
1194 | ||
1195 | ret = nvmm_machine_create(&qemu_mach.mach); | |
1196 | if (ret == -1) { | |
1197 | err = errno; | |
1198 | error_report("NVMM: Machine creation failed, error=%d", errno); | |
1199 | return -err; | |
1200 | } | |
1201 | ||
1202 | memory_listener_register(&nvmm_memory_listener, &address_space_memory); | |
1203 | ram_block_notifier_add(&nvmm_ram_notifier); | |
1204 | ||
1205 | printf("NetBSD Virtual Machine Monitor accelerator is operational\n"); | |
1206 | return 0; | |
1207 | } | |
1208 | ||
1209 | int | |
1210 | nvmm_enabled(void) | |
1211 | { | |
1212 | return nvmm_allowed; | |
1213 | } | |
1214 | ||
1215 | static void | |
1216 | nvmm_accel_class_init(ObjectClass *oc, void *data) | |
1217 | { | |
1218 | AccelClass *ac = ACCEL_CLASS(oc); | |
1219 | ac->name = "NVMM"; | |
1220 | ac->init_machine = nvmm_accel_init; | |
1221 | ac->allowed = &nvmm_allowed; | |
1222 | } | |
1223 | ||
1224 | static const TypeInfo nvmm_accel_type = { | |
1225 | .name = ACCEL_CLASS_NAME("nvmm"), | |
1226 | .parent = TYPE_ACCEL, | |
1227 | .class_init = nvmm_accel_class_init, | |
1228 | }; | |
1229 | ||
1230 | static void | |
1231 | nvmm_type_init(void) | |
1232 | { | |
1233 | type_register_static(&nvmm_accel_type); | |
1234 | } | |
1235 | ||
1236 | type_init(nvmm_type_init); |