2 * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
4 * NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU.
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
10 #include "qemu/osdep.h"
12 #include "exec/address-spaces.h"
13 #include "exec/ioport.h"
14 #include "qemu/accel.h"
15 #include "sysemu/nvmm.h"
16 #include "sysemu/cpus.h"
17 #include "sysemu/runstate.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/error-report.h"
20 #include "qapi/error.h"
21 #include "qemu/queue.h"
22 #include "migration/blocker.h"
25 #include "nvmm-accel-ops.h"
29 struct AccelCPUState
{
30 struct nvmm_vcpu vcpu
;
34 /* Window-exiting for INTs/NMIs. */
38 /* The guest is in an interrupt shadow (POP SS, etc). */
43 struct nvmm_capability cap
;
44 struct nvmm_machine mach
;
47 /* -------------------------------------------------------------------------- */
49 static bool nvmm_allowed
;
50 static struct qemu_machine qemu_mach
;
52 static struct nvmm_machine
*
55 return &qemu_mach
.mach
;
58 /* -------------------------------------------------------------------------- */
61 nvmm_set_segment(struct nvmm_x64_state_seg
*nseg
, const SegmentCache
*qseg
)
63 uint32_t attrib
= qseg
->flags
;
65 nseg
->selector
= qseg
->selector
;
66 nseg
->limit
= qseg
->limit
;
67 nseg
->base
= qseg
->base
;
68 nseg
->attrib
.type
= __SHIFTOUT(attrib
, DESC_TYPE_MASK
);
69 nseg
->attrib
.s
= __SHIFTOUT(attrib
, DESC_S_MASK
);
70 nseg
->attrib
.dpl
= __SHIFTOUT(attrib
, DESC_DPL_MASK
);
71 nseg
->attrib
.p
= __SHIFTOUT(attrib
, DESC_P_MASK
);
72 nseg
->attrib
.avl
= __SHIFTOUT(attrib
, DESC_AVL_MASK
);
73 nseg
->attrib
.l
= __SHIFTOUT(attrib
, DESC_L_MASK
);
74 nseg
->attrib
.def
= __SHIFTOUT(attrib
, DESC_B_MASK
);
75 nseg
->attrib
.g
= __SHIFTOUT(attrib
, DESC_G_MASK
);
79 nvmm_set_registers(CPUState
*cpu
)
81 CPUX86State
*env
= cpu_env(cpu
);
82 struct nvmm_machine
*mach
= get_nvmm_mach();
83 AccelCPUState
*qcpu
= cpu
->accel
;
84 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
85 struct nvmm_x64_state
*state
= vcpu
->state
;
90 assert(cpu_is_stopped(cpu
) || qemu_cpu_is_self(cpu
));
93 state
->gprs
[NVMM_X64_GPR_RAX
] = env
->regs
[R_EAX
];
94 state
->gprs
[NVMM_X64_GPR_RCX
] = env
->regs
[R_ECX
];
95 state
->gprs
[NVMM_X64_GPR_RDX
] = env
->regs
[R_EDX
];
96 state
->gprs
[NVMM_X64_GPR_RBX
] = env
->regs
[R_EBX
];
97 state
->gprs
[NVMM_X64_GPR_RSP
] = env
->regs
[R_ESP
];
98 state
->gprs
[NVMM_X64_GPR_RBP
] = env
->regs
[R_EBP
];
99 state
->gprs
[NVMM_X64_GPR_RSI
] = env
->regs
[R_ESI
];
100 state
->gprs
[NVMM_X64_GPR_RDI
] = env
->regs
[R_EDI
];
102 state
->gprs
[NVMM_X64_GPR_R8
] = env
->regs
[R_R8
];
103 state
->gprs
[NVMM_X64_GPR_R9
] = env
->regs
[R_R9
];
104 state
->gprs
[NVMM_X64_GPR_R10
] = env
->regs
[R_R10
];
105 state
->gprs
[NVMM_X64_GPR_R11
] = env
->regs
[R_R11
];
106 state
->gprs
[NVMM_X64_GPR_R12
] = env
->regs
[R_R12
];
107 state
->gprs
[NVMM_X64_GPR_R13
] = env
->regs
[R_R13
];
108 state
->gprs
[NVMM_X64_GPR_R14
] = env
->regs
[R_R14
];
109 state
->gprs
[NVMM_X64_GPR_R15
] = env
->regs
[R_R15
];
112 /* RIP and RFLAGS. */
113 state
->gprs
[NVMM_X64_GPR_RIP
] = env
->eip
;
114 state
->gprs
[NVMM_X64_GPR_RFLAGS
] = env
->eflags
;
117 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_CS
], &env
->segs
[R_CS
]);
118 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_DS
], &env
->segs
[R_DS
]);
119 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_ES
], &env
->segs
[R_ES
]);
120 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_FS
], &env
->segs
[R_FS
]);
121 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_GS
], &env
->segs
[R_GS
]);
122 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_SS
], &env
->segs
[R_SS
]);
124 /* Special segments. */
125 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_GDT
], &env
->gdt
);
126 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_LDT
], &env
->ldt
);
127 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_TR
], &env
->tr
);
128 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_IDT
], &env
->idt
);
130 /* Control registers. */
131 state
->crs
[NVMM_X64_CR_CR0
] = env
->cr
[0];
132 state
->crs
[NVMM_X64_CR_CR2
] = env
->cr
[2];
133 state
->crs
[NVMM_X64_CR_CR3
] = env
->cr
[3];
134 state
->crs
[NVMM_X64_CR_CR4
] = env
->cr
[4];
135 state
->crs
[NVMM_X64_CR_CR8
] = qcpu
->tpr
;
136 state
->crs
[NVMM_X64_CR_XCR0
] = env
->xcr0
;
138 /* Debug registers. */
139 state
->drs
[NVMM_X64_DR_DR0
] = env
->dr
[0];
140 state
->drs
[NVMM_X64_DR_DR1
] = env
->dr
[1];
141 state
->drs
[NVMM_X64_DR_DR2
] = env
->dr
[2];
142 state
->drs
[NVMM_X64_DR_DR3
] = env
->dr
[3];
143 state
->drs
[NVMM_X64_DR_DR6
] = env
->dr
[6];
144 state
->drs
[NVMM_X64_DR_DR7
] = env
->dr
[7];
147 state
->fpu
.fx_cw
= env
->fpuc
;
148 state
->fpu
.fx_sw
= (env
->fpus
& ~0x3800) | ((env
->fpstt
& 0x7) << 11);
149 state
->fpu
.fx_tw
= 0;
150 for (i
= 0; i
< 8; i
++) {
151 state
->fpu
.fx_tw
|= (!env
->fptags
[i
]) << i
;
153 state
->fpu
.fx_opcode
= env
->fpop
;
154 state
->fpu
.fx_ip
.fa_64
= env
->fpip
;
155 state
->fpu
.fx_dp
.fa_64
= env
->fpdp
;
156 state
->fpu
.fx_mxcsr
= env
->mxcsr
;
157 state
->fpu
.fx_mxcsr_mask
= 0x0000FFFF;
158 assert(sizeof(state
->fpu
.fx_87_ac
) == sizeof(env
->fpregs
));
159 memcpy(state
->fpu
.fx_87_ac
, env
->fpregs
, sizeof(env
->fpregs
));
160 for (i
= 0; i
< CPU_NB_REGS
; i
++) {
161 memcpy(&state
->fpu
.fx_xmm
[i
].xmm_bytes
[0],
162 &env
->xmm_regs
[i
].ZMM_Q(0), 8);
163 memcpy(&state
->fpu
.fx_xmm
[i
].xmm_bytes
[8],
164 &env
->xmm_regs
[i
].ZMM_Q(1), 8);
168 state
->msrs
[NVMM_X64_MSR_EFER
] = env
->efer
;
169 state
->msrs
[NVMM_X64_MSR_STAR
] = env
->star
;
171 state
->msrs
[NVMM_X64_MSR_LSTAR
] = env
->lstar
;
172 state
->msrs
[NVMM_X64_MSR_CSTAR
] = env
->cstar
;
173 state
->msrs
[NVMM_X64_MSR_SFMASK
] = env
->fmask
;
174 state
->msrs
[NVMM_X64_MSR_KERNELGSBASE
] = env
->kernelgsbase
;
176 state
->msrs
[NVMM_X64_MSR_SYSENTER_CS
] = env
->sysenter_cs
;
177 state
->msrs
[NVMM_X64_MSR_SYSENTER_ESP
] = env
->sysenter_esp
;
178 state
->msrs
[NVMM_X64_MSR_SYSENTER_EIP
] = env
->sysenter_eip
;
179 state
->msrs
[NVMM_X64_MSR_PAT
] = env
->pat
;
180 state
->msrs
[NVMM_X64_MSR_TSC
] = env
->tsc
;
183 NVMM_X64_STATE_SEGS
|
184 NVMM_X64_STATE_GPRS
|
187 NVMM_X64_STATE_MSRS
|
190 ret
= nvmm_vcpu_setstate(mach
, vcpu
, bitmap
);
192 error_report("NVMM: Failed to set virtual processor context,"
198 nvmm_get_segment(SegmentCache
*qseg
, const struct nvmm_x64_state_seg
*nseg
)
200 qseg
->selector
= nseg
->selector
;
201 qseg
->limit
= nseg
->limit
;
202 qseg
->base
= nseg
->base
;
205 __SHIFTIN((uint32_t)nseg
->attrib
.type
, DESC_TYPE_MASK
) |
206 __SHIFTIN((uint32_t)nseg
->attrib
.s
, DESC_S_MASK
) |
207 __SHIFTIN((uint32_t)nseg
->attrib
.dpl
, DESC_DPL_MASK
) |
208 __SHIFTIN((uint32_t)nseg
->attrib
.p
, DESC_P_MASK
) |
209 __SHIFTIN((uint32_t)nseg
->attrib
.avl
, DESC_AVL_MASK
) |
210 __SHIFTIN((uint32_t)nseg
->attrib
.l
, DESC_L_MASK
) |
211 __SHIFTIN((uint32_t)nseg
->attrib
.def
, DESC_B_MASK
) |
212 __SHIFTIN((uint32_t)nseg
->attrib
.g
, DESC_G_MASK
);
216 nvmm_get_registers(CPUState
*cpu
)
218 CPUX86State
*env
= cpu_env(cpu
);
219 struct nvmm_machine
*mach
= get_nvmm_mach();
220 AccelCPUState
*qcpu
= cpu
->accel
;
221 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
222 X86CPU
*x86_cpu
= X86_CPU(cpu
);
223 struct nvmm_x64_state
*state
= vcpu
->state
;
224 uint64_t bitmap
, tpr
;
228 assert(cpu_is_stopped(cpu
) || qemu_cpu_is_self(cpu
));
231 NVMM_X64_STATE_SEGS
|
232 NVMM_X64_STATE_GPRS
|
235 NVMM_X64_STATE_MSRS
|
238 ret
= nvmm_vcpu_getstate(mach
, vcpu
, bitmap
);
240 error_report("NVMM: Failed to get virtual processor context,"
245 env
->regs
[R_EAX
] = state
->gprs
[NVMM_X64_GPR_RAX
];
246 env
->regs
[R_ECX
] = state
->gprs
[NVMM_X64_GPR_RCX
];
247 env
->regs
[R_EDX
] = state
->gprs
[NVMM_X64_GPR_RDX
];
248 env
->regs
[R_EBX
] = state
->gprs
[NVMM_X64_GPR_RBX
];
249 env
->regs
[R_ESP
] = state
->gprs
[NVMM_X64_GPR_RSP
];
250 env
->regs
[R_EBP
] = state
->gprs
[NVMM_X64_GPR_RBP
];
251 env
->regs
[R_ESI
] = state
->gprs
[NVMM_X64_GPR_RSI
];
252 env
->regs
[R_EDI
] = state
->gprs
[NVMM_X64_GPR_RDI
];
254 env
->regs
[R_R8
] = state
->gprs
[NVMM_X64_GPR_R8
];
255 env
->regs
[R_R9
] = state
->gprs
[NVMM_X64_GPR_R9
];
256 env
->regs
[R_R10
] = state
->gprs
[NVMM_X64_GPR_R10
];
257 env
->regs
[R_R11
] = state
->gprs
[NVMM_X64_GPR_R11
];
258 env
->regs
[R_R12
] = state
->gprs
[NVMM_X64_GPR_R12
];
259 env
->regs
[R_R13
] = state
->gprs
[NVMM_X64_GPR_R13
];
260 env
->regs
[R_R14
] = state
->gprs
[NVMM_X64_GPR_R14
];
261 env
->regs
[R_R15
] = state
->gprs
[NVMM_X64_GPR_R15
];
264 /* RIP and RFLAGS. */
265 env
->eip
= state
->gprs
[NVMM_X64_GPR_RIP
];
266 env
->eflags
= state
->gprs
[NVMM_X64_GPR_RFLAGS
];
269 nvmm_get_segment(&env
->segs
[R_ES
], &state
->segs
[NVMM_X64_SEG_ES
]);
270 nvmm_get_segment(&env
->segs
[R_CS
], &state
->segs
[NVMM_X64_SEG_CS
]);
271 nvmm_get_segment(&env
->segs
[R_SS
], &state
->segs
[NVMM_X64_SEG_SS
]);
272 nvmm_get_segment(&env
->segs
[R_DS
], &state
->segs
[NVMM_X64_SEG_DS
]);
273 nvmm_get_segment(&env
->segs
[R_FS
], &state
->segs
[NVMM_X64_SEG_FS
]);
274 nvmm_get_segment(&env
->segs
[R_GS
], &state
->segs
[NVMM_X64_SEG_GS
]);
276 /* Special segments. */
277 nvmm_get_segment(&env
->gdt
, &state
->segs
[NVMM_X64_SEG_GDT
]);
278 nvmm_get_segment(&env
->ldt
, &state
->segs
[NVMM_X64_SEG_LDT
]);
279 nvmm_get_segment(&env
->tr
, &state
->segs
[NVMM_X64_SEG_TR
]);
280 nvmm_get_segment(&env
->idt
, &state
->segs
[NVMM_X64_SEG_IDT
]);
282 /* Control registers. */
283 env
->cr
[0] = state
->crs
[NVMM_X64_CR_CR0
];
284 env
->cr
[2] = state
->crs
[NVMM_X64_CR_CR2
];
285 env
->cr
[3] = state
->crs
[NVMM_X64_CR_CR3
];
286 env
->cr
[4] = state
->crs
[NVMM_X64_CR_CR4
];
287 tpr
= state
->crs
[NVMM_X64_CR_CR8
];
288 if (tpr
!= qcpu
->tpr
) {
290 cpu_set_apic_tpr(x86_cpu
->apic_state
, tpr
);
292 env
->xcr0
= state
->crs
[NVMM_X64_CR_XCR0
];
294 /* Debug registers. */
295 env
->dr
[0] = state
->drs
[NVMM_X64_DR_DR0
];
296 env
->dr
[1] = state
->drs
[NVMM_X64_DR_DR1
];
297 env
->dr
[2] = state
->drs
[NVMM_X64_DR_DR2
];
298 env
->dr
[3] = state
->drs
[NVMM_X64_DR_DR3
];
299 env
->dr
[6] = state
->drs
[NVMM_X64_DR_DR6
];
300 env
->dr
[7] = state
->drs
[NVMM_X64_DR_DR7
];
303 env
->fpuc
= state
->fpu
.fx_cw
;
304 env
->fpstt
= (state
->fpu
.fx_sw
>> 11) & 0x7;
305 env
->fpus
= state
->fpu
.fx_sw
& ~0x3800;
306 for (i
= 0; i
< 8; i
++) {
307 env
->fptags
[i
] = !((state
->fpu
.fx_tw
>> i
) & 1);
309 env
->fpop
= state
->fpu
.fx_opcode
;
310 env
->fpip
= state
->fpu
.fx_ip
.fa_64
;
311 env
->fpdp
= state
->fpu
.fx_dp
.fa_64
;
312 env
->mxcsr
= state
->fpu
.fx_mxcsr
;
313 assert(sizeof(state
->fpu
.fx_87_ac
) == sizeof(env
->fpregs
));
314 memcpy(env
->fpregs
, state
->fpu
.fx_87_ac
, sizeof(env
->fpregs
));
315 for (i
= 0; i
< CPU_NB_REGS
; i
++) {
316 memcpy(&env
->xmm_regs
[i
].ZMM_Q(0),
317 &state
->fpu
.fx_xmm
[i
].xmm_bytes
[0], 8);
318 memcpy(&env
->xmm_regs
[i
].ZMM_Q(1),
319 &state
->fpu
.fx_xmm
[i
].xmm_bytes
[8], 8);
323 env
->efer
= state
->msrs
[NVMM_X64_MSR_EFER
];
324 env
->star
= state
->msrs
[NVMM_X64_MSR_STAR
];
326 env
->lstar
= state
->msrs
[NVMM_X64_MSR_LSTAR
];
327 env
->cstar
= state
->msrs
[NVMM_X64_MSR_CSTAR
];
328 env
->fmask
= state
->msrs
[NVMM_X64_MSR_SFMASK
];
329 env
->kernelgsbase
= state
->msrs
[NVMM_X64_MSR_KERNELGSBASE
];
331 env
->sysenter_cs
= state
->msrs
[NVMM_X64_MSR_SYSENTER_CS
];
332 env
->sysenter_esp
= state
->msrs
[NVMM_X64_MSR_SYSENTER_ESP
];
333 env
->sysenter_eip
= state
->msrs
[NVMM_X64_MSR_SYSENTER_EIP
];
334 env
->pat
= state
->msrs
[NVMM_X64_MSR_PAT
];
335 env
->tsc
= state
->msrs
[NVMM_X64_MSR_TSC
];
337 x86_update_hflags(env
);
341 nvmm_can_take_int(CPUState
*cpu
)
343 CPUX86State
*env
= cpu_env(cpu
);
344 AccelCPUState
*qcpu
= cpu
->accel
;
345 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
346 struct nvmm_machine
*mach
= get_nvmm_mach();
348 if (qcpu
->int_window_exit
) {
352 if (qcpu
->int_shadow
|| !(env
->eflags
& IF_MASK
)) {
353 struct nvmm_x64_state
*state
= vcpu
->state
;
355 /* Exit on interrupt window. */
356 nvmm_vcpu_getstate(mach
, vcpu
, NVMM_X64_STATE_INTR
);
357 state
->intr
.int_window_exiting
= 1;
358 nvmm_vcpu_setstate(mach
, vcpu
, NVMM_X64_STATE_INTR
);
367 nvmm_can_take_nmi(CPUState
*cpu
)
369 AccelCPUState
*qcpu
= cpu
->accel
;
372 * Contrary to INTs, NMIs always schedule an exit when they are
373 * completed. Therefore, if window-exiting is enabled, it means
376 if (qcpu
->nmi_window_exit
) {
384 * Called before the VCPU is run. We inject events generated by the I/O
385 * thread, and synchronize the guest TPR.
388 nvmm_vcpu_pre_run(CPUState
*cpu
)
390 CPUX86State
*env
= cpu_env(cpu
);
391 struct nvmm_machine
*mach
= get_nvmm_mach();
392 AccelCPUState
*qcpu
= cpu
->accel
;
393 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
394 X86CPU
*x86_cpu
= X86_CPU(cpu
);
395 struct nvmm_x64_state
*state
= vcpu
->state
;
396 struct nvmm_vcpu_event
*event
= vcpu
->event
;
397 bool has_event
= false;
398 bool sync_tpr
= false;
402 qemu_mutex_lock_iothread();
404 tpr
= cpu_get_apic_tpr(x86_cpu
->apic_state
);
405 if (tpr
!= qcpu
->tpr
) {
411 * Force the VCPU out of its inner loop to process any INIT requests
412 * or commit pending TPR access.
414 if (cpu
->interrupt_request
& (CPU_INTERRUPT_INIT
| CPU_INTERRUPT_TPR
)) {
415 cpu
->exit_request
= 1;
418 if (!has_event
&& (cpu
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
419 if (nvmm_can_take_nmi(cpu
)) {
420 cpu
->interrupt_request
&= ~CPU_INTERRUPT_NMI
;
421 event
->type
= NVMM_VCPU_EVENT_INTR
;
427 if (!has_event
&& (cpu
->interrupt_request
& CPU_INTERRUPT_HARD
)) {
428 if (nvmm_can_take_int(cpu
)) {
429 cpu
->interrupt_request
&= ~CPU_INTERRUPT_HARD
;
430 event
->type
= NVMM_VCPU_EVENT_INTR
;
431 event
->vector
= cpu_get_pic_interrupt(env
);
436 /* Don't want SMIs. */
437 if (cpu
->interrupt_request
& CPU_INTERRUPT_SMI
) {
438 cpu
->interrupt_request
&= ~CPU_INTERRUPT_SMI
;
442 ret
= nvmm_vcpu_getstate(mach
, vcpu
, NVMM_X64_STATE_CRS
);
444 error_report("NVMM: Failed to get CPU state,"
448 state
->crs
[NVMM_X64_CR_CR8
] = qcpu
->tpr
;
450 ret
= nvmm_vcpu_setstate(mach
, vcpu
, NVMM_X64_STATE_CRS
);
452 error_report("NVMM: Failed to set CPU state,"
458 ret
= nvmm_vcpu_inject(mach
, vcpu
);
460 error_report("NVMM: Failed to inject event,"
465 qemu_mutex_unlock_iothread();
469 * Called after the VCPU ran. We synchronize the host view of the TPR and
473 nvmm_vcpu_post_run(CPUState
*cpu
, struct nvmm_vcpu_exit
*exit
)
475 AccelCPUState
*qcpu
= cpu
->accel
;
476 X86CPU
*x86_cpu
= X86_CPU(cpu
);
477 CPUX86State
*env
= &x86_cpu
->env
;
480 env
->eflags
= exit
->exitstate
.rflags
;
481 qcpu
->int_shadow
= exit
->exitstate
.int_shadow
;
482 qcpu
->int_window_exit
= exit
->exitstate
.int_window_exiting
;
483 qcpu
->nmi_window_exit
= exit
->exitstate
.nmi_window_exiting
;
485 tpr
= exit
->exitstate
.cr8
;
486 if (qcpu
->tpr
!= tpr
) {
488 qemu_mutex_lock_iothread();
489 cpu_set_apic_tpr(x86_cpu
->apic_state
, qcpu
->tpr
);
490 qemu_mutex_unlock_iothread();
494 /* -------------------------------------------------------------------------- */
497 nvmm_io_callback(struct nvmm_io
*io
)
499 MemTxAttrs attrs
= { 0 };
502 ret
= address_space_rw(&address_space_io
, io
->port
, attrs
, io
->data
,
504 if (ret
!= MEMTX_OK
) {
505 error_report("NVMM: I/O Transaction Failed "
506 "[%s, port=%u, size=%zu]", (io
->in
? "in" : "out"),
510 /* Needed, otherwise infinite loop. */
511 current_cpu
->vcpu_dirty
= false;
515 nvmm_mem_callback(struct nvmm_mem
*mem
)
517 cpu_physical_memory_rw(mem
->gpa
, mem
->data
, mem
->size
, mem
->write
);
519 /* Needed, otherwise infinite loop. */
520 current_cpu
->vcpu_dirty
= false;
523 static struct nvmm_assist_callbacks nvmm_callbacks
= {
524 .io
= nvmm_io_callback
,
525 .mem
= nvmm_mem_callback
528 /* -------------------------------------------------------------------------- */
531 nvmm_handle_mem(struct nvmm_machine
*mach
, struct nvmm_vcpu
*vcpu
)
535 ret
= nvmm_assist_mem(mach
, vcpu
);
537 error_report("NVMM: Mem Assist Failed [gpa=%p]",
538 (void *)vcpu
->exit
->u
.mem
.gpa
);
545 nvmm_handle_io(struct nvmm_machine
*mach
, struct nvmm_vcpu
*vcpu
)
549 ret
= nvmm_assist_io(mach
, vcpu
);
551 error_report("NVMM: I/O Assist Failed [port=%d]",
552 (int)vcpu
->exit
->u
.io
.port
);
559 nvmm_handle_rdmsr(struct nvmm_machine
*mach
, CPUState
*cpu
,
560 struct nvmm_vcpu_exit
*exit
)
562 AccelCPUState
*qcpu
= cpu
->accel
;
563 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
564 X86CPU
*x86_cpu
= X86_CPU(cpu
);
565 struct nvmm_x64_state
*state
= vcpu
->state
;
569 switch (exit
->u
.rdmsr
.msr
) {
570 case MSR_IA32_APICBASE
:
571 val
= cpu_get_apic_base(x86_cpu
->apic_state
);
574 case MSR_MTRRdefType
:
579 default: /* More MSRs to add? */
581 error_report("NVMM: Unexpected RDMSR 0x%x, ignored",
586 ret
= nvmm_vcpu_getstate(mach
, vcpu
, NVMM_X64_STATE_GPRS
);
591 state
->gprs
[NVMM_X64_GPR_RAX
] = (val
& 0xFFFFFFFF);
592 state
->gprs
[NVMM_X64_GPR_RDX
] = (val
>> 32);
593 state
->gprs
[NVMM_X64_GPR_RIP
] = exit
->u
.rdmsr
.npc
;
595 ret
= nvmm_vcpu_setstate(mach
, vcpu
, NVMM_X64_STATE_GPRS
);
604 nvmm_handle_wrmsr(struct nvmm_machine
*mach
, CPUState
*cpu
,
605 struct nvmm_vcpu_exit
*exit
)
607 AccelCPUState
*qcpu
= cpu
->accel
;
608 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
609 X86CPU
*x86_cpu
= X86_CPU(cpu
);
610 struct nvmm_x64_state
*state
= vcpu
->state
;
614 val
= exit
->u
.wrmsr
.val
;
616 switch (exit
->u
.wrmsr
.msr
) {
617 case MSR_IA32_APICBASE
:
618 cpu_set_apic_base(x86_cpu
->apic_state
, val
);
620 case MSR_MTRRdefType
:
623 default: /* More MSRs to add? */
624 error_report("NVMM: Unexpected WRMSR 0x%x [val=0x%lx], ignored",
625 exit
->u
.wrmsr
.msr
, val
);
629 ret
= nvmm_vcpu_getstate(mach
, vcpu
, NVMM_X64_STATE_GPRS
);
634 state
->gprs
[NVMM_X64_GPR_RIP
] = exit
->u
.wrmsr
.npc
;
636 ret
= nvmm_vcpu_setstate(mach
, vcpu
, NVMM_X64_STATE_GPRS
);
645 nvmm_handle_halted(struct nvmm_machine
*mach
, CPUState
*cpu
,
646 struct nvmm_vcpu_exit
*exit
)
648 CPUX86State
*env
= cpu_env(cpu
);
651 qemu_mutex_lock_iothread();
653 if (!((cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
654 (env
->eflags
& IF_MASK
)) &&
655 !(cpu
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
656 cpu
->exception_index
= EXCP_HLT
;
661 qemu_mutex_unlock_iothread();
667 nvmm_inject_ud(struct nvmm_machine
*mach
, struct nvmm_vcpu
*vcpu
)
669 struct nvmm_vcpu_event
*event
= vcpu
->event
;
671 event
->type
= NVMM_VCPU_EVENT_EXCP
;
673 event
->u
.excp
.error
= 0;
675 return nvmm_vcpu_inject(mach
, vcpu
);
679 nvmm_vcpu_loop(CPUState
*cpu
)
681 struct nvmm_machine
*mach
= get_nvmm_mach();
682 AccelCPUState
*qcpu
= cpu
->accel
;
683 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
684 X86CPU
*x86_cpu
= X86_CPU(cpu
);
685 CPUX86State
*env
= &x86_cpu
->env
;
686 struct nvmm_vcpu_exit
*exit
= vcpu
->exit
;
690 * Some asynchronous events must be handled outside of the inner
691 * VCPU loop. They are handled here.
693 if (cpu
->interrupt_request
& CPU_INTERRUPT_INIT
) {
694 nvmm_cpu_synchronize_state(cpu
);
695 do_cpu_init(x86_cpu
);
696 /* set int/nmi windows back to the reset state */
698 if (cpu
->interrupt_request
& CPU_INTERRUPT_POLL
) {
699 cpu
->interrupt_request
&= ~CPU_INTERRUPT_POLL
;
700 apic_poll_irq(x86_cpu
->apic_state
);
702 if (((cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
703 (env
->eflags
& IF_MASK
)) ||
704 (cpu
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
707 if (cpu
->interrupt_request
& CPU_INTERRUPT_SIPI
) {
708 nvmm_cpu_synchronize_state(cpu
);
709 do_cpu_sipi(x86_cpu
);
711 if (cpu
->interrupt_request
& CPU_INTERRUPT_TPR
) {
712 cpu
->interrupt_request
&= ~CPU_INTERRUPT_TPR
;
713 nvmm_cpu_synchronize_state(cpu
);
714 apic_handle_tpr_access_report(x86_cpu
->apic_state
, env
->eip
,
715 env
->tpr_access_type
);
719 cpu
->exception_index
= EXCP_HLT
;
720 qatomic_set(&cpu
->exit_request
, false);
724 qemu_mutex_unlock_iothread();
731 if (cpu
->vcpu_dirty
) {
732 nvmm_set_registers(cpu
);
733 cpu
->vcpu_dirty
= false;
737 cpu
->exception_index
= EXCP_INTERRUPT
;
743 nvmm_vcpu_pre_run(cpu
);
745 if (qatomic_read(&cpu
->exit_request
)) {
746 #if NVMM_USER_VERSION >= 2
747 nvmm_vcpu_stop(vcpu
);
749 qemu_cpu_kick_self();
753 /* Read exit_request before the kernel reads the immediate exit flag */
755 ret
= nvmm_vcpu_run(mach
, vcpu
);
757 error_report("NVMM: Failed to exec a virtual processor,"
762 nvmm_vcpu_post_run(cpu
, exit
);
764 switch (exit
->reason
) {
765 case NVMM_VCPU_EXIT_NONE
:
767 #if NVMM_USER_VERSION >= 2
768 case NVMM_VCPU_EXIT_STOPPED
:
770 * The kernel cleared the immediate exit flag; cpu->exit_request
771 * must be cleared after
777 case NVMM_VCPU_EXIT_MEMORY
:
778 ret
= nvmm_handle_mem(mach
, vcpu
);
780 case NVMM_VCPU_EXIT_IO
:
781 ret
= nvmm_handle_io(mach
, vcpu
);
783 case NVMM_VCPU_EXIT_INT_READY
:
784 case NVMM_VCPU_EXIT_NMI_READY
:
785 case NVMM_VCPU_EXIT_TPR_CHANGED
:
787 case NVMM_VCPU_EXIT_HALTED
:
788 ret
= nvmm_handle_halted(mach
, cpu
, exit
);
790 case NVMM_VCPU_EXIT_SHUTDOWN
:
791 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET
);
792 cpu
->exception_index
= EXCP_INTERRUPT
;
795 case NVMM_VCPU_EXIT_RDMSR
:
796 ret
= nvmm_handle_rdmsr(mach
, cpu
, exit
);
798 case NVMM_VCPU_EXIT_WRMSR
:
799 ret
= nvmm_handle_wrmsr(mach
, cpu
, exit
);
801 case NVMM_VCPU_EXIT_MONITOR
:
802 case NVMM_VCPU_EXIT_MWAIT
:
803 ret
= nvmm_inject_ud(mach
, vcpu
);
806 error_report("NVMM: Unexpected VM exit code 0x%lx [hw=0x%lx]",
807 exit
->reason
, exit
->u
.inv
.hwcode
);
808 nvmm_get_registers(cpu
);
809 qemu_mutex_lock_iothread();
810 qemu_system_guest_panicked(cpu_get_crash_info(cpu
));
811 qemu_mutex_unlock_iothread();
818 qemu_mutex_lock_iothread();
820 qatomic_set(&cpu
->exit_request
, false);
825 /* -------------------------------------------------------------------------- */
828 do_nvmm_cpu_synchronize_state(CPUState
*cpu
, run_on_cpu_data arg
)
830 nvmm_get_registers(cpu
);
831 cpu
->vcpu_dirty
= true;
835 do_nvmm_cpu_synchronize_post_reset(CPUState
*cpu
, run_on_cpu_data arg
)
837 nvmm_set_registers(cpu
);
838 cpu
->vcpu_dirty
= false;
842 do_nvmm_cpu_synchronize_post_init(CPUState
*cpu
, run_on_cpu_data arg
)
844 nvmm_set_registers(cpu
);
845 cpu
->vcpu_dirty
= false;
849 do_nvmm_cpu_synchronize_pre_loadvm(CPUState
*cpu
, run_on_cpu_data arg
)
851 cpu
->vcpu_dirty
= true;
854 void nvmm_cpu_synchronize_state(CPUState
*cpu
)
856 if (!cpu
->vcpu_dirty
) {
857 run_on_cpu(cpu
, do_nvmm_cpu_synchronize_state
, RUN_ON_CPU_NULL
);
861 void nvmm_cpu_synchronize_post_reset(CPUState
*cpu
)
863 run_on_cpu(cpu
, do_nvmm_cpu_synchronize_post_reset
, RUN_ON_CPU_NULL
);
866 void nvmm_cpu_synchronize_post_init(CPUState
*cpu
)
868 run_on_cpu(cpu
, do_nvmm_cpu_synchronize_post_init
, RUN_ON_CPU_NULL
);
871 void nvmm_cpu_synchronize_pre_loadvm(CPUState
*cpu
)
873 run_on_cpu(cpu
, do_nvmm_cpu_synchronize_pre_loadvm
, RUN_ON_CPU_NULL
);
876 /* -------------------------------------------------------------------------- */
878 static Error
*nvmm_migration_blocker
;
881 * The nvmm_vcpu_stop() mechanism breaks races between entering the VMM
882 * and another thread signaling the vCPU thread to exit.
886 nvmm_ipi_signal(int sigcpu
)
889 AccelCPUState
*qcpu
= current_cpu
->accel
;
890 #if NVMM_USER_VERSION >= 2
891 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
892 nvmm_vcpu_stop(vcpu
);
900 nvmm_init_cpu_signals(void)
902 struct sigaction sigact
;
905 /* Install the IPI handler. */
906 memset(&sigact
, 0, sizeof(sigact
));
907 sigact
.sa_handler
= nvmm_ipi_signal
;
908 sigaction(SIG_IPI
, &sigact
, NULL
);
910 /* Allow IPIs on the current thread. */
911 sigprocmask(SIG_BLOCK
, NULL
, &set
);
912 sigdelset(&set
, SIG_IPI
);
913 pthread_sigmask(SIG_SETMASK
, &set
, NULL
);
917 nvmm_init_vcpu(CPUState
*cpu
)
919 struct nvmm_machine
*mach
= get_nvmm_mach();
920 struct nvmm_vcpu_conf_cpuid cpuid
;
921 struct nvmm_vcpu_conf_tpr tpr
;
922 Error
*local_error
= NULL
;
926 nvmm_init_cpu_signals();
928 if (nvmm_migration_blocker
== NULL
) {
929 error_setg(&nvmm_migration_blocker
,
930 "NVMM: Migration not supported");
932 if (migrate_add_blocker(nvmm_migration_blocker
, &local_error
) < 0) {
933 error_report_err(local_error
);
934 error_free(nvmm_migration_blocker
);
939 qcpu
= g_new0(AccelCPUState
, 1);
941 ret
= nvmm_vcpu_create(mach
, cpu
->cpu_index
, &qcpu
->vcpu
);
944 error_report("NVMM: Failed to create a virtual processor,"
950 memset(&cpuid
, 0, sizeof(cpuid
));
952 cpuid
.leaf
= 0x00000001;
953 cpuid
.u
.mask
.set
.edx
= CPUID_MCE
| CPUID_MCA
| CPUID_MTRR
;
954 ret
= nvmm_vcpu_configure(mach
, &qcpu
->vcpu
, NVMM_VCPU_CONF_CPUID
,
958 error_report("NVMM: Failed to configure a virtual processor,"
964 ret
= nvmm_vcpu_configure(mach
, &qcpu
->vcpu
, NVMM_VCPU_CONF_CALLBACKS
,
968 error_report("NVMM: Failed to configure a virtual processor,"
974 if (qemu_mach
.cap
.arch
.vcpu_conf_support
& NVMM_CAP_ARCH_VCPU_CONF_TPR
) {
975 memset(&tpr
, 0, sizeof(tpr
));
976 tpr
.exit_changed
= 1;
977 ret
= nvmm_vcpu_configure(mach
, &qcpu
->vcpu
, NVMM_VCPU_CONF_TPR
, &tpr
);
980 error_report("NVMM: Failed to configure a virtual processor,"
987 cpu
->vcpu_dirty
= true;
994 nvmm_vcpu_exec(CPUState
*cpu
)
999 if (cpu
->exception_index
>= EXCP_INTERRUPT
) {
1000 ret
= cpu
->exception_index
;
1001 cpu
->exception_index
= -1;
1005 fatal
= nvmm_vcpu_loop(cpu
);
1008 error_report("NVMM: Failed to execute a VCPU.");
1017 nvmm_destroy_vcpu(CPUState
*cpu
)
1019 struct nvmm_machine
*mach
= get_nvmm_mach();
1020 AccelCPUState
*qcpu
= cpu
->accel
;
1022 nvmm_vcpu_destroy(mach
, &qcpu
->vcpu
);
1026 /* -------------------------------------------------------------------------- */
1029 nvmm_update_mapping(hwaddr start_pa
, ram_addr_t size
, uintptr_t hva
,
1030 bool add
, bool rom
, const char *name
)
1032 struct nvmm_machine
*mach
= get_nvmm_mach();
1036 prot
= PROT_READ
| PROT_EXEC
;
1040 ret
= nvmm_gpa_map(mach
, hva
, start_pa
, size
, prot
);
1042 ret
= nvmm_gpa_unmap(mach
, hva
, start_pa
, size
);
1046 error_report("NVMM: Failed to %s GPA range '%s' PA:%p, "
1047 "Size:%p bytes, HostVA:%p, error=%d",
1048 (add
? "map" : "unmap"), name
, (void *)(uintptr_t)start_pa
,
1049 (void *)size
, (void *)hva
, errno
);
1054 nvmm_process_section(MemoryRegionSection
*section
, int add
)
1056 MemoryRegion
*mr
= section
->mr
;
1057 hwaddr start_pa
= section
->offset_within_address_space
;
1058 ram_addr_t size
= int128_get64(section
->size
);
1062 if (!memory_region_is_ram(mr
)) {
1066 /* Adjust start_pa and size so that they are page-aligned. */
1067 delta
= qemu_real_host_page_size() - (start_pa
& ~qemu_real_host_page_mask());
1068 delta
&= ~qemu_real_host_page_mask();
1074 size
&= qemu_real_host_page_mask();
1075 if (!size
|| (start_pa
& ~qemu_real_host_page_mask())) {
1079 hva
= (uintptr_t)memory_region_get_ram_ptr(mr
) +
1080 section
->offset_within_region
+ delta
;
1082 nvmm_update_mapping(start_pa
, size
, hva
, add
,
1083 memory_region_is_rom(mr
), mr
->name
);
1087 nvmm_region_add(MemoryListener
*listener
, MemoryRegionSection
*section
)
1089 memory_region_ref(section
->mr
);
1090 nvmm_process_section(section
, 1);
1094 nvmm_region_del(MemoryListener
*listener
, MemoryRegionSection
*section
)
1096 nvmm_process_section(section
, 0);
1097 memory_region_unref(section
->mr
);
1101 nvmm_transaction_begin(MemoryListener
*listener
)
1107 nvmm_transaction_commit(MemoryListener
*listener
)
1113 nvmm_log_sync(MemoryListener
*listener
, MemoryRegionSection
*section
)
1115 MemoryRegion
*mr
= section
->mr
;
1117 if (!memory_region_is_ram(mr
)) {
1121 memory_region_set_dirty(mr
, 0, int128_get64(section
->size
));
1124 static MemoryListener nvmm_memory_listener
= {
1126 .begin
= nvmm_transaction_begin
,
1127 .commit
= nvmm_transaction_commit
,
1128 .region_add
= nvmm_region_add
,
1129 .region_del
= nvmm_region_del
,
1130 .log_sync
= nvmm_log_sync
,
1131 .priority
= MEMORY_LISTENER_PRIORITY_ACCEL
,
1135 nvmm_ram_block_added(RAMBlockNotifier
*n
, void *host
, size_t size
,
1138 struct nvmm_machine
*mach
= get_nvmm_mach();
1139 uintptr_t hva
= (uintptr_t)host
;
1142 ret
= nvmm_hva_map(mach
, hva
, max_size
);
1145 error_report("NVMM: Failed to map HVA, HostVA:%p "
1146 "Size:%p bytes, error=%d",
1147 (void *)hva
, (void *)size
, errno
);
1151 static struct RAMBlockNotifier nvmm_ram_notifier
= {
1152 .ram_block_added
= nvmm_ram_block_added
1155 /* -------------------------------------------------------------------------- */
1158 nvmm_accel_init(MachineState
*ms
)
1165 error_report("NVMM: Initialization failed, error=%d", errno
);
1169 ret
= nvmm_capability(&qemu_mach
.cap
);
1172 error_report("NVMM: Unable to fetch capability, error=%d", errno
);
1175 if (qemu_mach
.cap
.version
< NVMM_KERN_VERSION
) {
1176 error_report("NVMM: Unsupported version %u", qemu_mach
.cap
.version
);
1177 return -EPROGMISMATCH
;
1179 if (qemu_mach
.cap
.state_size
!= sizeof(struct nvmm_x64_state
)) {
1180 error_report("NVMM: Wrong state size %u", qemu_mach
.cap
.state_size
);
1181 return -EPROGMISMATCH
;
1184 ret
= nvmm_machine_create(&qemu_mach
.mach
);
1187 error_report("NVMM: Machine creation failed, error=%d", errno
);
1191 memory_listener_register(&nvmm_memory_listener
, &address_space_memory
);
1192 ram_block_notifier_add(&nvmm_ram_notifier
);
1194 printf("NetBSD Virtual Machine Monitor accelerator is operational\n");
1201 return nvmm_allowed
;
1205 nvmm_accel_class_init(ObjectClass
*oc
, void *data
)
1207 AccelClass
*ac
= ACCEL_CLASS(oc
);
1209 ac
->init_machine
= nvmm_accel_init
;
1210 ac
->allowed
= &nvmm_allowed
;
1213 static const TypeInfo nvmm_accel_type
= {
1214 .name
= ACCEL_CLASS_NAME("nvmm"),
1215 .parent
= TYPE_ACCEL
,
1216 .class_init
= nvmm_accel_class_init
,
1220 nvmm_type_init(void)
1222 type_register_static(&nvmm_accel_type
);
1225 type_init(nvmm_type_init
);