2 * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
4 * NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU.
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
10 #include "qemu/osdep.h"
12 #include "exec/address-spaces.h"
13 #include "exec/ioport.h"
14 #include "qemu/accel.h"
15 #include "sysemu/nvmm.h"
16 #include "sysemu/cpus.h"
17 #include "sysemu/runstate.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/error-report.h"
20 #include "qapi/error.h"
21 #include "qemu/queue.h"
22 #include "migration/blocker.h"
25 #include "nvmm-accel-ops.h"
30 struct nvmm_vcpu vcpu
;
34 /* Window-exiting for INTs/NMIs. */
38 /* The guest is in an interrupt shadow (POP SS, etc). */
43 struct nvmm_capability cap
;
44 struct nvmm_machine mach
;
47 /* -------------------------------------------------------------------------- */
49 static bool nvmm_allowed
;
50 static struct qemu_machine qemu_mach
;
52 static struct qemu_vcpu
*
53 get_qemu_vcpu(CPUState
*cpu
)
55 return (struct qemu_vcpu
*)cpu
->hax_vcpu
;
58 static struct nvmm_machine
*
61 return &qemu_mach
.mach
;
64 /* -------------------------------------------------------------------------- */
67 nvmm_set_segment(struct nvmm_x64_state_seg
*nseg
, const SegmentCache
*qseg
)
69 uint32_t attrib
= qseg
->flags
;
71 nseg
->selector
= qseg
->selector
;
72 nseg
->limit
= qseg
->limit
;
73 nseg
->base
= qseg
->base
;
74 nseg
->attrib
.type
= __SHIFTOUT(attrib
, DESC_TYPE_MASK
);
75 nseg
->attrib
.s
= __SHIFTOUT(attrib
, DESC_S_MASK
);
76 nseg
->attrib
.dpl
= __SHIFTOUT(attrib
, DESC_DPL_MASK
);
77 nseg
->attrib
.p
= __SHIFTOUT(attrib
, DESC_P_MASK
);
78 nseg
->attrib
.avl
= __SHIFTOUT(attrib
, DESC_AVL_MASK
);
79 nseg
->attrib
.l
= __SHIFTOUT(attrib
, DESC_L_MASK
);
80 nseg
->attrib
.def
= __SHIFTOUT(attrib
, DESC_B_MASK
);
81 nseg
->attrib
.g
= __SHIFTOUT(attrib
, DESC_G_MASK
);
85 nvmm_set_registers(CPUState
*cpu
)
87 CPUX86State
*env
= cpu
->env_ptr
;
88 struct nvmm_machine
*mach
= get_nvmm_mach();
89 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
90 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
91 struct nvmm_x64_state
*state
= vcpu
->state
;
96 assert(cpu_is_stopped(cpu
) || qemu_cpu_is_self(cpu
));
99 state
->gprs
[NVMM_X64_GPR_RAX
] = env
->regs
[R_EAX
];
100 state
->gprs
[NVMM_X64_GPR_RCX
] = env
->regs
[R_ECX
];
101 state
->gprs
[NVMM_X64_GPR_RDX
] = env
->regs
[R_EDX
];
102 state
->gprs
[NVMM_X64_GPR_RBX
] = env
->regs
[R_EBX
];
103 state
->gprs
[NVMM_X64_GPR_RSP
] = env
->regs
[R_ESP
];
104 state
->gprs
[NVMM_X64_GPR_RBP
] = env
->regs
[R_EBP
];
105 state
->gprs
[NVMM_X64_GPR_RSI
] = env
->regs
[R_ESI
];
106 state
->gprs
[NVMM_X64_GPR_RDI
] = env
->regs
[R_EDI
];
108 state
->gprs
[NVMM_X64_GPR_R8
] = env
->regs
[R_R8
];
109 state
->gprs
[NVMM_X64_GPR_R9
] = env
->regs
[R_R9
];
110 state
->gprs
[NVMM_X64_GPR_R10
] = env
->regs
[R_R10
];
111 state
->gprs
[NVMM_X64_GPR_R11
] = env
->regs
[R_R11
];
112 state
->gprs
[NVMM_X64_GPR_R12
] = env
->regs
[R_R12
];
113 state
->gprs
[NVMM_X64_GPR_R13
] = env
->regs
[R_R13
];
114 state
->gprs
[NVMM_X64_GPR_R14
] = env
->regs
[R_R14
];
115 state
->gprs
[NVMM_X64_GPR_R15
] = env
->regs
[R_R15
];
118 /* RIP and RFLAGS. */
119 state
->gprs
[NVMM_X64_GPR_RIP
] = env
->eip
;
120 state
->gprs
[NVMM_X64_GPR_RFLAGS
] = env
->eflags
;
123 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_CS
], &env
->segs
[R_CS
]);
124 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_DS
], &env
->segs
[R_DS
]);
125 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_ES
], &env
->segs
[R_ES
]);
126 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_FS
], &env
->segs
[R_FS
]);
127 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_GS
], &env
->segs
[R_GS
]);
128 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_SS
], &env
->segs
[R_SS
]);
130 /* Special segments. */
131 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_GDT
], &env
->gdt
);
132 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_LDT
], &env
->ldt
);
133 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_TR
], &env
->tr
);
134 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_IDT
], &env
->idt
);
136 /* Control registers. */
137 state
->crs
[NVMM_X64_CR_CR0
] = env
->cr
[0];
138 state
->crs
[NVMM_X64_CR_CR2
] = env
->cr
[2];
139 state
->crs
[NVMM_X64_CR_CR3
] = env
->cr
[3];
140 state
->crs
[NVMM_X64_CR_CR4
] = env
->cr
[4];
141 state
->crs
[NVMM_X64_CR_CR8
] = qcpu
->tpr
;
142 state
->crs
[NVMM_X64_CR_XCR0
] = env
->xcr0
;
144 /* Debug registers. */
145 state
->drs
[NVMM_X64_DR_DR0
] = env
->dr
[0];
146 state
->drs
[NVMM_X64_DR_DR1
] = env
->dr
[1];
147 state
->drs
[NVMM_X64_DR_DR2
] = env
->dr
[2];
148 state
->drs
[NVMM_X64_DR_DR3
] = env
->dr
[3];
149 state
->drs
[NVMM_X64_DR_DR6
] = env
->dr
[6];
150 state
->drs
[NVMM_X64_DR_DR7
] = env
->dr
[7];
153 state
->fpu
.fx_cw
= env
->fpuc
;
154 state
->fpu
.fx_sw
= (env
->fpus
& ~0x3800) | ((env
->fpstt
& 0x7) << 11);
155 state
->fpu
.fx_tw
= 0;
156 for (i
= 0; i
< 8; i
++) {
157 state
->fpu
.fx_tw
|= (!env
->fptags
[i
]) << i
;
159 state
->fpu
.fx_opcode
= env
->fpop
;
160 state
->fpu
.fx_ip
.fa_64
= env
->fpip
;
161 state
->fpu
.fx_dp
.fa_64
= env
->fpdp
;
162 state
->fpu
.fx_mxcsr
= env
->mxcsr
;
163 state
->fpu
.fx_mxcsr_mask
= 0x0000FFFF;
164 assert(sizeof(state
->fpu
.fx_87_ac
) == sizeof(env
->fpregs
));
165 memcpy(state
->fpu
.fx_87_ac
, env
->fpregs
, sizeof(env
->fpregs
));
166 for (i
= 0; i
< CPU_NB_REGS
; i
++) {
167 memcpy(&state
->fpu
.fx_xmm
[i
].xmm_bytes
[0],
168 &env
->xmm_regs
[i
].ZMM_Q(0), 8);
169 memcpy(&state
->fpu
.fx_xmm
[i
].xmm_bytes
[8],
170 &env
->xmm_regs
[i
].ZMM_Q(1), 8);
174 state
->msrs
[NVMM_X64_MSR_EFER
] = env
->efer
;
175 state
->msrs
[NVMM_X64_MSR_STAR
] = env
->star
;
177 state
->msrs
[NVMM_X64_MSR_LSTAR
] = env
->lstar
;
178 state
->msrs
[NVMM_X64_MSR_CSTAR
] = env
->cstar
;
179 state
->msrs
[NVMM_X64_MSR_SFMASK
] = env
->fmask
;
180 state
->msrs
[NVMM_X64_MSR_KERNELGSBASE
] = env
->kernelgsbase
;
182 state
->msrs
[NVMM_X64_MSR_SYSENTER_CS
] = env
->sysenter_cs
;
183 state
->msrs
[NVMM_X64_MSR_SYSENTER_ESP
] = env
->sysenter_esp
;
184 state
->msrs
[NVMM_X64_MSR_SYSENTER_EIP
] = env
->sysenter_eip
;
185 state
->msrs
[NVMM_X64_MSR_PAT
] = env
->pat
;
186 state
->msrs
[NVMM_X64_MSR_TSC
] = env
->tsc
;
189 NVMM_X64_STATE_SEGS
|
190 NVMM_X64_STATE_GPRS
|
193 NVMM_X64_STATE_MSRS
|
196 ret
= nvmm_vcpu_setstate(mach
, vcpu
, bitmap
);
198 error_report("NVMM: Failed to set virtual processor context,"
204 nvmm_get_segment(SegmentCache
*qseg
, const struct nvmm_x64_state_seg
*nseg
)
206 qseg
->selector
= nseg
->selector
;
207 qseg
->limit
= nseg
->limit
;
208 qseg
->base
= nseg
->base
;
211 __SHIFTIN((uint32_t)nseg
->attrib
.type
, DESC_TYPE_MASK
) |
212 __SHIFTIN((uint32_t)nseg
->attrib
.s
, DESC_S_MASK
) |
213 __SHIFTIN((uint32_t)nseg
->attrib
.dpl
, DESC_DPL_MASK
) |
214 __SHIFTIN((uint32_t)nseg
->attrib
.p
, DESC_P_MASK
) |
215 __SHIFTIN((uint32_t)nseg
->attrib
.avl
, DESC_AVL_MASK
) |
216 __SHIFTIN((uint32_t)nseg
->attrib
.l
, DESC_L_MASK
) |
217 __SHIFTIN((uint32_t)nseg
->attrib
.def
, DESC_B_MASK
) |
218 __SHIFTIN((uint32_t)nseg
->attrib
.g
, DESC_G_MASK
);
222 nvmm_get_registers(CPUState
*cpu
)
224 CPUX86State
*env
= cpu
->env_ptr
;
225 struct nvmm_machine
*mach
= get_nvmm_mach();
226 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
227 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
228 X86CPU
*x86_cpu
= X86_CPU(cpu
);
229 struct nvmm_x64_state
*state
= vcpu
->state
;
230 uint64_t bitmap
, tpr
;
234 assert(cpu_is_stopped(cpu
) || qemu_cpu_is_self(cpu
));
237 NVMM_X64_STATE_SEGS
|
238 NVMM_X64_STATE_GPRS
|
241 NVMM_X64_STATE_MSRS
|
244 ret
= nvmm_vcpu_getstate(mach
, vcpu
, bitmap
);
246 error_report("NVMM: Failed to get virtual processor context,"
251 env
->regs
[R_EAX
] = state
->gprs
[NVMM_X64_GPR_RAX
];
252 env
->regs
[R_ECX
] = state
->gprs
[NVMM_X64_GPR_RCX
];
253 env
->regs
[R_EDX
] = state
->gprs
[NVMM_X64_GPR_RDX
];
254 env
->regs
[R_EBX
] = state
->gprs
[NVMM_X64_GPR_RBX
];
255 env
->regs
[R_ESP
] = state
->gprs
[NVMM_X64_GPR_RSP
];
256 env
->regs
[R_EBP
] = state
->gprs
[NVMM_X64_GPR_RBP
];
257 env
->regs
[R_ESI
] = state
->gprs
[NVMM_X64_GPR_RSI
];
258 env
->regs
[R_EDI
] = state
->gprs
[NVMM_X64_GPR_RDI
];
260 env
->regs
[R_R8
] = state
->gprs
[NVMM_X64_GPR_R8
];
261 env
->regs
[R_R9
] = state
->gprs
[NVMM_X64_GPR_R9
];
262 env
->regs
[R_R10
] = state
->gprs
[NVMM_X64_GPR_R10
];
263 env
->regs
[R_R11
] = state
->gprs
[NVMM_X64_GPR_R11
];
264 env
->regs
[R_R12
] = state
->gprs
[NVMM_X64_GPR_R12
];
265 env
->regs
[R_R13
] = state
->gprs
[NVMM_X64_GPR_R13
];
266 env
->regs
[R_R14
] = state
->gprs
[NVMM_X64_GPR_R14
];
267 env
->regs
[R_R15
] = state
->gprs
[NVMM_X64_GPR_R15
];
270 /* RIP and RFLAGS. */
271 env
->eip
= state
->gprs
[NVMM_X64_GPR_RIP
];
272 env
->eflags
= state
->gprs
[NVMM_X64_GPR_RFLAGS
];
275 nvmm_get_segment(&env
->segs
[R_ES
], &state
->segs
[NVMM_X64_SEG_ES
]);
276 nvmm_get_segment(&env
->segs
[R_CS
], &state
->segs
[NVMM_X64_SEG_CS
]);
277 nvmm_get_segment(&env
->segs
[R_SS
], &state
->segs
[NVMM_X64_SEG_SS
]);
278 nvmm_get_segment(&env
->segs
[R_DS
], &state
->segs
[NVMM_X64_SEG_DS
]);
279 nvmm_get_segment(&env
->segs
[R_FS
], &state
->segs
[NVMM_X64_SEG_FS
]);
280 nvmm_get_segment(&env
->segs
[R_GS
], &state
->segs
[NVMM_X64_SEG_GS
]);
282 /* Special segments. */
283 nvmm_get_segment(&env
->gdt
, &state
->segs
[NVMM_X64_SEG_GDT
]);
284 nvmm_get_segment(&env
->ldt
, &state
->segs
[NVMM_X64_SEG_LDT
]);
285 nvmm_get_segment(&env
->tr
, &state
->segs
[NVMM_X64_SEG_TR
]);
286 nvmm_get_segment(&env
->idt
, &state
->segs
[NVMM_X64_SEG_IDT
]);
288 /* Control registers. */
289 env
->cr
[0] = state
->crs
[NVMM_X64_CR_CR0
];
290 env
->cr
[2] = state
->crs
[NVMM_X64_CR_CR2
];
291 env
->cr
[3] = state
->crs
[NVMM_X64_CR_CR3
];
292 env
->cr
[4] = state
->crs
[NVMM_X64_CR_CR4
];
293 tpr
= state
->crs
[NVMM_X64_CR_CR8
];
294 if (tpr
!= qcpu
->tpr
) {
296 cpu_set_apic_tpr(x86_cpu
->apic_state
, tpr
);
298 env
->xcr0
= state
->crs
[NVMM_X64_CR_XCR0
];
300 /* Debug registers. */
301 env
->dr
[0] = state
->drs
[NVMM_X64_DR_DR0
];
302 env
->dr
[1] = state
->drs
[NVMM_X64_DR_DR1
];
303 env
->dr
[2] = state
->drs
[NVMM_X64_DR_DR2
];
304 env
->dr
[3] = state
->drs
[NVMM_X64_DR_DR3
];
305 env
->dr
[6] = state
->drs
[NVMM_X64_DR_DR6
];
306 env
->dr
[7] = state
->drs
[NVMM_X64_DR_DR7
];
309 env
->fpuc
= state
->fpu
.fx_cw
;
310 env
->fpstt
= (state
->fpu
.fx_sw
>> 11) & 0x7;
311 env
->fpus
= state
->fpu
.fx_sw
& ~0x3800;
312 for (i
= 0; i
< 8; i
++) {
313 env
->fptags
[i
] = !((state
->fpu
.fx_tw
>> i
) & 1);
315 env
->fpop
= state
->fpu
.fx_opcode
;
316 env
->fpip
= state
->fpu
.fx_ip
.fa_64
;
317 env
->fpdp
= state
->fpu
.fx_dp
.fa_64
;
318 env
->mxcsr
= state
->fpu
.fx_mxcsr
;
319 assert(sizeof(state
->fpu
.fx_87_ac
) == sizeof(env
->fpregs
));
320 memcpy(env
->fpregs
, state
->fpu
.fx_87_ac
, sizeof(env
->fpregs
));
321 for (i
= 0; i
< CPU_NB_REGS
; i
++) {
322 memcpy(&env
->xmm_regs
[i
].ZMM_Q(0),
323 &state
->fpu
.fx_xmm
[i
].xmm_bytes
[0], 8);
324 memcpy(&env
->xmm_regs
[i
].ZMM_Q(1),
325 &state
->fpu
.fx_xmm
[i
].xmm_bytes
[8], 8);
329 env
->efer
= state
->msrs
[NVMM_X64_MSR_EFER
];
330 env
->star
= state
->msrs
[NVMM_X64_MSR_STAR
];
332 env
->lstar
= state
->msrs
[NVMM_X64_MSR_LSTAR
];
333 env
->cstar
= state
->msrs
[NVMM_X64_MSR_CSTAR
];
334 env
->fmask
= state
->msrs
[NVMM_X64_MSR_SFMASK
];
335 env
->kernelgsbase
= state
->msrs
[NVMM_X64_MSR_KERNELGSBASE
];
337 env
->sysenter_cs
= state
->msrs
[NVMM_X64_MSR_SYSENTER_CS
];
338 env
->sysenter_esp
= state
->msrs
[NVMM_X64_MSR_SYSENTER_ESP
];
339 env
->sysenter_eip
= state
->msrs
[NVMM_X64_MSR_SYSENTER_EIP
];
340 env
->pat
= state
->msrs
[NVMM_X64_MSR_PAT
];
341 env
->tsc
= state
->msrs
[NVMM_X64_MSR_TSC
];
343 x86_update_hflags(env
);
347 nvmm_can_take_int(CPUState
*cpu
)
349 CPUX86State
*env
= cpu
->env_ptr
;
350 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
351 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
352 struct nvmm_machine
*mach
= get_nvmm_mach();
354 if (qcpu
->int_window_exit
) {
358 if (qcpu
->int_shadow
|| !(env
->eflags
& IF_MASK
)) {
359 struct nvmm_x64_state
*state
= vcpu
->state
;
361 /* Exit on interrupt window. */
362 nvmm_vcpu_getstate(mach
, vcpu
, NVMM_X64_STATE_INTR
);
363 state
->intr
.int_window_exiting
= 1;
364 nvmm_vcpu_setstate(mach
, vcpu
, NVMM_X64_STATE_INTR
);
373 nvmm_can_take_nmi(CPUState
*cpu
)
375 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
378 * Contrary to INTs, NMIs always schedule an exit when they are
379 * completed. Therefore, if window-exiting is enabled, it means
382 if (qcpu
->nmi_window_exit
) {
390 * Called before the VCPU is run. We inject events generated by the I/O
391 * thread, and synchronize the guest TPR.
394 nvmm_vcpu_pre_run(CPUState
*cpu
)
396 CPUX86State
*env
= cpu
->env_ptr
;
397 struct nvmm_machine
*mach
= get_nvmm_mach();
398 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
399 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
400 X86CPU
*x86_cpu
= X86_CPU(cpu
);
401 struct nvmm_x64_state
*state
= vcpu
->state
;
402 struct nvmm_vcpu_event
*event
= vcpu
->event
;
403 bool has_event
= false;
404 bool sync_tpr
= false;
408 qemu_mutex_lock_iothread();
410 tpr
= cpu_get_apic_tpr(x86_cpu
->apic_state
);
411 if (tpr
!= qcpu
->tpr
) {
417 * Force the VCPU out of its inner loop to process any INIT requests
418 * or commit pending TPR access.
420 if (cpu
->interrupt_request
& (CPU_INTERRUPT_INIT
| CPU_INTERRUPT_TPR
)) {
421 cpu
->exit_request
= 1;
424 if (!has_event
&& (cpu
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
425 if (nvmm_can_take_nmi(cpu
)) {
426 cpu
->interrupt_request
&= ~CPU_INTERRUPT_NMI
;
427 event
->type
= NVMM_VCPU_EVENT_INTR
;
433 if (!has_event
&& (cpu
->interrupt_request
& CPU_INTERRUPT_HARD
)) {
434 if (nvmm_can_take_int(cpu
)) {
435 cpu
->interrupt_request
&= ~CPU_INTERRUPT_HARD
;
436 event
->type
= NVMM_VCPU_EVENT_INTR
;
437 event
->vector
= cpu_get_pic_interrupt(env
);
442 /* Don't want SMIs. */
443 if (cpu
->interrupt_request
& CPU_INTERRUPT_SMI
) {
444 cpu
->interrupt_request
&= ~CPU_INTERRUPT_SMI
;
448 ret
= nvmm_vcpu_getstate(mach
, vcpu
, NVMM_X64_STATE_CRS
);
450 error_report("NVMM: Failed to get CPU state,"
454 state
->crs
[NVMM_X64_CR_CR8
] = qcpu
->tpr
;
456 ret
= nvmm_vcpu_setstate(mach
, vcpu
, NVMM_X64_STATE_CRS
);
458 error_report("NVMM: Failed to set CPU state,"
464 ret
= nvmm_vcpu_inject(mach
, vcpu
);
466 error_report("NVMM: Failed to inject event,"
471 qemu_mutex_unlock_iothread();
475 * Called after the VCPU ran. We synchronize the host view of the TPR and
479 nvmm_vcpu_post_run(CPUState
*cpu
, struct nvmm_vcpu_exit
*exit
)
481 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
482 CPUX86State
*env
= cpu
->env_ptr
;
483 X86CPU
*x86_cpu
= X86_CPU(cpu
);
486 env
->eflags
= exit
->exitstate
.rflags
;
487 qcpu
->int_shadow
= exit
->exitstate
.int_shadow
;
488 qcpu
->int_window_exit
= exit
->exitstate
.int_window_exiting
;
489 qcpu
->nmi_window_exit
= exit
->exitstate
.nmi_window_exiting
;
491 tpr
= exit
->exitstate
.cr8
;
492 if (qcpu
->tpr
!= tpr
) {
494 qemu_mutex_lock_iothread();
495 cpu_set_apic_tpr(x86_cpu
->apic_state
, qcpu
->tpr
);
496 qemu_mutex_unlock_iothread();
500 /* -------------------------------------------------------------------------- */
503 nvmm_io_callback(struct nvmm_io
*io
)
505 MemTxAttrs attrs
= { 0 };
508 ret
= address_space_rw(&address_space_io
, io
->port
, attrs
, io
->data
,
510 if (ret
!= MEMTX_OK
) {
511 error_report("NVMM: I/O Transaction Failed "
512 "[%s, port=%u, size=%zu]", (io
->in
? "in" : "out"),
516 /* Needed, otherwise infinite loop. */
517 current_cpu
->vcpu_dirty
= false;
521 nvmm_mem_callback(struct nvmm_mem
*mem
)
523 cpu_physical_memory_rw(mem
->gpa
, mem
->data
, mem
->size
, mem
->write
);
525 /* Needed, otherwise infinite loop. */
526 current_cpu
->vcpu_dirty
= false;
529 static struct nvmm_assist_callbacks nvmm_callbacks
= {
530 .io
= nvmm_io_callback
,
531 .mem
= nvmm_mem_callback
534 /* -------------------------------------------------------------------------- */
537 nvmm_handle_mem(struct nvmm_machine
*mach
, struct nvmm_vcpu
*vcpu
)
541 ret
= nvmm_assist_mem(mach
, vcpu
);
543 error_report("NVMM: Mem Assist Failed [gpa=%p]",
544 (void *)vcpu
->exit
->u
.mem
.gpa
);
551 nvmm_handle_io(struct nvmm_machine
*mach
, struct nvmm_vcpu
*vcpu
)
555 ret
= nvmm_assist_io(mach
, vcpu
);
557 error_report("NVMM: I/O Assist Failed [port=%d]",
558 (int)vcpu
->exit
->u
.io
.port
);
565 nvmm_handle_rdmsr(struct nvmm_machine
*mach
, CPUState
*cpu
,
566 struct nvmm_vcpu_exit
*exit
)
568 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
569 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
570 X86CPU
*x86_cpu
= X86_CPU(cpu
);
571 struct nvmm_x64_state
*state
= vcpu
->state
;
575 switch (exit
->u
.rdmsr
.msr
) {
576 case MSR_IA32_APICBASE
:
577 val
= cpu_get_apic_base(x86_cpu
->apic_state
);
580 case MSR_MTRRdefType
:
585 default: /* More MSRs to add? */
587 error_report("NVMM: Unexpected RDMSR 0x%x, ignored",
592 ret
= nvmm_vcpu_getstate(mach
, vcpu
, NVMM_X64_STATE_GPRS
);
597 state
->gprs
[NVMM_X64_GPR_RAX
] = (val
& 0xFFFFFFFF);
598 state
->gprs
[NVMM_X64_GPR_RDX
] = (val
>> 32);
599 state
->gprs
[NVMM_X64_GPR_RIP
] = exit
->u
.rdmsr
.npc
;
601 ret
= nvmm_vcpu_setstate(mach
, vcpu
, NVMM_X64_STATE_GPRS
);
610 nvmm_handle_wrmsr(struct nvmm_machine
*mach
, CPUState
*cpu
,
611 struct nvmm_vcpu_exit
*exit
)
613 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
614 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
615 X86CPU
*x86_cpu
= X86_CPU(cpu
);
616 struct nvmm_x64_state
*state
= vcpu
->state
;
620 val
= exit
->u
.wrmsr
.val
;
622 switch (exit
->u
.wrmsr
.msr
) {
623 case MSR_IA32_APICBASE
:
624 cpu_set_apic_base(x86_cpu
->apic_state
, val
);
626 case MSR_MTRRdefType
:
629 default: /* More MSRs to add? */
630 error_report("NVMM: Unexpected WRMSR 0x%x [val=0x%lx], ignored",
631 exit
->u
.wrmsr
.msr
, val
);
635 ret
= nvmm_vcpu_getstate(mach
, vcpu
, NVMM_X64_STATE_GPRS
);
640 state
->gprs
[NVMM_X64_GPR_RIP
] = exit
->u
.wrmsr
.npc
;
642 ret
= nvmm_vcpu_setstate(mach
, vcpu
, NVMM_X64_STATE_GPRS
);
651 nvmm_handle_halted(struct nvmm_machine
*mach
, CPUState
*cpu
,
652 struct nvmm_vcpu_exit
*exit
)
654 CPUX86State
*env
= cpu
->env_ptr
;
657 qemu_mutex_lock_iothread();
659 if (!((cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
660 (env
->eflags
& IF_MASK
)) &&
661 !(cpu
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
662 cpu
->exception_index
= EXCP_HLT
;
667 qemu_mutex_unlock_iothread();
673 nvmm_inject_ud(struct nvmm_machine
*mach
, struct nvmm_vcpu
*vcpu
)
675 struct nvmm_vcpu_event
*event
= vcpu
->event
;
677 event
->type
= NVMM_VCPU_EVENT_EXCP
;
679 event
->u
.excp
.error
= 0;
681 return nvmm_vcpu_inject(mach
, vcpu
);
685 nvmm_vcpu_loop(CPUState
*cpu
)
687 CPUX86State
*env
= cpu
->env_ptr
;
688 struct nvmm_machine
*mach
= get_nvmm_mach();
689 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
690 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
691 X86CPU
*x86_cpu
= X86_CPU(cpu
);
692 struct nvmm_vcpu_exit
*exit
= vcpu
->exit
;
696 * Some asynchronous events must be handled outside of the inner
697 * VCPU loop. They are handled here.
699 if (cpu
->interrupt_request
& CPU_INTERRUPT_INIT
) {
700 nvmm_cpu_synchronize_state(cpu
);
701 do_cpu_init(x86_cpu
);
702 /* set int/nmi windows back to the reset state */
704 if (cpu
->interrupt_request
& CPU_INTERRUPT_POLL
) {
705 cpu
->interrupt_request
&= ~CPU_INTERRUPT_POLL
;
706 apic_poll_irq(x86_cpu
->apic_state
);
708 if (((cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
709 (env
->eflags
& IF_MASK
)) ||
710 (cpu
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
713 if (cpu
->interrupt_request
& CPU_INTERRUPT_SIPI
) {
714 nvmm_cpu_synchronize_state(cpu
);
715 do_cpu_sipi(x86_cpu
);
717 if (cpu
->interrupt_request
& CPU_INTERRUPT_TPR
) {
718 cpu
->interrupt_request
&= ~CPU_INTERRUPT_TPR
;
719 nvmm_cpu_synchronize_state(cpu
);
720 apic_handle_tpr_access_report(x86_cpu
->apic_state
, env
->eip
,
721 env
->tpr_access_type
);
725 cpu
->exception_index
= EXCP_HLT
;
726 qatomic_set(&cpu
->exit_request
, false);
730 qemu_mutex_unlock_iothread();
737 if (cpu
->vcpu_dirty
) {
738 nvmm_set_registers(cpu
);
739 cpu
->vcpu_dirty
= false;
743 cpu
->exception_index
= EXCP_INTERRUPT
;
749 nvmm_vcpu_pre_run(cpu
);
751 if (qatomic_read(&cpu
->exit_request
)) {
752 #if NVMM_USER_VERSION >= 2
753 nvmm_vcpu_stop(vcpu
);
755 qemu_cpu_kick_self();
759 /* Read exit_request before the kernel reads the immediate exit flag */
761 ret
= nvmm_vcpu_run(mach
, vcpu
);
763 error_report("NVMM: Failed to exec a virtual processor,"
768 nvmm_vcpu_post_run(cpu
, exit
);
770 switch (exit
->reason
) {
771 case NVMM_VCPU_EXIT_NONE
:
773 #if NVMM_USER_VERSION >= 2
774 case NVMM_VCPU_EXIT_STOPPED
:
776 * The kernel cleared the immediate exit flag; cpu->exit_request
777 * must be cleared after
783 case NVMM_VCPU_EXIT_MEMORY
:
784 ret
= nvmm_handle_mem(mach
, vcpu
);
786 case NVMM_VCPU_EXIT_IO
:
787 ret
= nvmm_handle_io(mach
, vcpu
);
789 case NVMM_VCPU_EXIT_INT_READY
:
790 case NVMM_VCPU_EXIT_NMI_READY
:
791 case NVMM_VCPU_EXIT_TPR_CHANGED
:
793 case NVMM_VCPU_EXIT_HALTED
:
794 ret
= nvmm_handle_halted(mach
, cpu
, exit
);
796 case NVMM_VCPU_EXIT_SHUTDOWN
:
797 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET
);
798 cpu
->exception_index
= EXCP_INTERRUPT
;
801 case NVMM_VCPU_EXIT_RDMSR
:
802 ret
= nvmm_handle_rdmsr(mach
, cpu
, exit
);
804 case NVMM_VCPU_EXIT_WRMSR
:
805 ret
= nvmm_handle_wrmsr(mach
, cpu
, exit
);
807 case NVMM_VCPU_EXIT_MONITOR
:
808 case NVMM_VCPU_EXIT_MWAIT
:
809 ret
= nvmm_inject_ud(mach
, vcpu
);
812 error_report("NVMM: Unexpected VM exit code 0x%lx [hw=0x%lx]",
813 exit
->reason
, exit
->u
.inv
.hwcode
);
814 nvmm_get_registers(cpu
);
815 qemu_mutex_lock_iothread();
816 qemu_system_guest_panicked(cpu_get_crash_info(cpu
));
817 qemu_mutex_unlock_iothread();
824 qemu_mutex_lock_iothread();
826 qatomic_set(&cpu
->exit_request
, false);
831 /* -------------------------------------------------------------------------- */
834 do_nvmm_cpu_synchronize_state(CPUState
*cpu
, run_on_cpu_data arg
)
836 nvmm_get_registers(cpu
);
837 cpu
->vcpu_dirty
= true;
841 do_nvmm_cpu_synchronize_post_reset(CPUState
*cpu
, run_on_cpu_data arg
)
843 nvmm_set_registers(cpu
);
844 cpu
->vcpu_dirty
= false;
848 do_nvmm_cpu_synchronize_post_init(CPUState
*cpu
, run_on_cpu_data arg
)
850 nvmm_set_registers(cpu
);
851 cpu
->vcpu_dirty
= false;
855 do_nvmm_cpu_synchronize_pre_loadvm(CPUState
*cpu
, run_on_cpu_data arg
)
857 cpu
->vcpu_dirty
= true;
860 void nvmm_cpu_synchronize_state(CPUState
*cpu
)
862 if (!cpu
->vcpu_dirty
) {
863 run_on_cpu(cpu
, do_nvmm_cpu_synchronize_state
, RUN_ON_CPU_NULL
);
867 void nvmm_cpu_synchronize_post_reset(CPUState
*cpu
)
869 run_on_cpu(cpu
, do_nvmm_cpu_synchronize_post_reset
, RUN_ON_CPU_NULL
);
872 void nvmm_cpu_synchronize_post_init(CPUState
*cpu
)
874 run_on_cpu(cpu
, do_nvmm_cpu_synchronize_post_init
, RUN_ON_CPU_NULL
);
877 void nvmm_cpu_synchronize_pre_loadvm(CPUState
*cpu
)
879 run_on_cpu(cpu
, do_nvmm_cpu_synchronize_pre_loadvm
, RUN_ON_CPU_NULL
);
882 /* -------------------------------------------------------------------------- */
884 static Error
*nvmm_migration_blocker
;
887 * The nvmm_vcpu_stop() mechanism breaks races between entering the VMM
888 * and another thread signaling the vCPU thread to exit.
892 nvmm_ipi_signal(int sigcpu
)
895 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(current_cpu
);
896 #if NVMM_USER_VERSION >= 2
897 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
898 nvmm_vcpu_stop(vcpu
);
906 nvmm_init_cpu_signals(void)
908 struct sigaction sigact
;
911 /* Install the IPI handler. */
912 memset(&sigact
, 0, sizeof(sigact
));
913 sigact
.sa_handler
= nvmm_ipi_signal
;
914 sigaction(SIG_IPI
, &sigact
, NULL
);
916 /* Allow IPIs on the current thread. */
917 sigprocmask(SIG_BLOCK
, NULL
, &set
);
918 sigdelset(&set
, SIG_IPI
);
919 pthread_sigmask(SIG_SETMASK
, &set
, NULL
);
923 nvmm_init_vcpu(CPUState
*cpu
)
925 struct nvmm_machine
*mach
= get_nvmm_mach();
926 struct nvmm_vcpu_conf_cpuid cpuid
;
927 struct nvmm_vcpu_conf_tpr tpr
;
928 Error
*local_error
= NULL
;
929 struct qemu_vcpu
*qcpu
;
932 nvmm_init_cpu_signals();
934 if (nvmm_migration_blocker
== NULL
) {
935 error_setg(&nvmm_migration_blocker
,
936 "NVMM: Migration not supported");
938 if (migrate_add_blocker(nvmm_migration_blocker
, &local_error
) < 0) {
939 error_report_err(local_error
);
940 error_free(nvmm_migration_blocker
);
945 qcpu
= g_malloc0(sizeof(*qcpu
));
947 error_report("NVMM: Failed to allocate VCPU context.");
951 ret
= nvmm_vcpu_create(mach
, cpu
->cpu_index
, &qcpu
->vcpu
);
954 error_report("NVMM: Failed to create a virtual processor,"
960 memset(&cpuid
, 0, sizeof(cpuid
));
962 cpuid
.leaf
= 0x00000001;
963 cpuid
.u
.mask
.set
.edx
= CPUID_MCE
| CPUID_MCA
| CPUID_MTRR
;
964 ret
= nvmm_vcpu_configure(mach
, &qcpu
->vcpu
, NVMM_VCPU_CONF_CPUID
,
968 error_report("NVMM: Failed to configure a virtual processor,"
974 ret
= nvmm_vcpu_configure(mach
, &qcpu
->vcpu
, NVMM_VCPU_CONF_CALLBACKS
,
978 error_report("NVMM: Failed to configure a virtual processor,"
984 if (qemu_mach
.cap
.arch
.vcpu_conf_support
& NVMM_CAP_ARCH_VCPU_CONF_TPR
) {
985 memset(&tpr
, 0, sizeof(tpr
));
986 tpr
.exit_changed
= 1;
987 ret
= nvmm_vcpu_configure(mach
, &qcpu
->vcpu
, NVMM_VCPU_CONF_TPR
, &tpr
);
990 error_report("NVMM: Failed to configure a virtual processor,"
997 cpu
->vcpu_dirty
= true;
998 cpu
->hax_vcpu
= (struct hax_vcpu_state
*)qcpu
;
1004 nvmm_vcpu_exec(CPUState
*cpu
)
1009 if (cpu
->exception_index
>= EXCP_INTERRUPT
) {
1010 ret
= cpu
->exception_index
;
1011 cpu
->exception_index
= -1;
1015 fatal
= nvmm_vcpu_loop(cpu
);
1018 error_report("NVMM: Failed to execute a VCPU.");
1027 nvmm_destroy_vcpu(CPUState
*cpu
)
1029 struct nvmm_machine
*mach
= get_nvmm_mach();
1030 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
1032 nvmm_vcpu_destroy(mach
, &qcpu
->vcpu
);
1033 g_free(cpu
->hax_vcpu
);
1036 /* -------------------------------------------------------------------------- */
1039 nvmm_update_mapping(hwaddr start_pa
, ram_addr_t size
, uintptr_t hva
,
1040 bool add
, bool rom
, const char *name
)
1042 struct nvmm_machine
*mach
= get_nvmm_mach();
1046 prot
= PROT_READ
| PROT_EXEC
;
1050 ret
= nvmm_gpa_map(mach
, hva
, start_pa
, size
, prot
);
1052 ret
= nvmm_gpa_unmap(mach
, hva
, start_pa
, size
);
1056 error_report("NVMM: Failed to %s GPA range '%s' PA:%p, "
1057 "Size:%p bytes, HostVA:%p, error=%d",
1058 (add
? "map" : "unmap"), name
, (void *)(uintptr_t)start_pa
,
1059 (void *)size
, (void *)hva
, errno
);
1064 nvmm_process_section(MemoryRegionSection
*section
, int add
)
1066 MemoryRegion
*mr
= section
->mr
;
1067 hwaddr start_pa
= section
->offset_within_address_space
;
1068 ram_addr_t size
= int128_get64(section
->size
);
1072 if (!memory_region_is_ram(mr
)) {
1076 /* Adjust start_pa and size so that they are page-aligned. */
1077 delta
= qemu_real_host_page_size() - (start_pa
& ~qemu_real_host_page_mask());
1078 delta
&= ~qemu_real_host_page_mask();
1084 size
&= qemu_real_host_page_mask();
1085 if (!size
|| (start_pa
& ~qemu_real_host_page_mask())) {
1089 hva
= (uintptr_t)memory_region_get_ram_ptr(mr
) +
1090 section
->offset_within_region
+ delta
;
1092 nvmm_update_mapping(start_pa
, size
, hva
, add
,
1093 memory_region_is_rom(mr
), mr
->name
);
1097 nvmm_region_add(MemoryListener
*listener
, MemoryRegionSection
*section
)
1099 memory_region_ref(section
->mr
);
1100 nvmm_process_section(section
, 1);
1104 nvmm_region_del(MemoryListener
*listener
, MemoryRegionSection
*section
)
1106 nvmm_process_section(section
, 0);
1107 memory_region_unref(section
->mr
);
1111 nvmm_transaction_begin(MemoryListener
*listener
)
1117 nvmm_transaction_commit(MemoryListener
*listener
)
1123 nvmm_log_sync(MemoryListener
*listener
, MemoryRegionSection
*section
)
1125 MemoryRegion
*mr
= section
->mr
;
1127 if (!memory_region_is_ram(mr
)) {
1131 memory_region_set_dirty(mr
, 0, int128_get64(section
->size
));
1134 static MemoryListener nvmm_memory_listener
= {
1136 .begin
= nvmm_transaction_begin
,
1137 .commit
= nvmm_transaction_commit
,
1138 .region_add
= nvmm_region_add
,
1139 .region_del
= nvmm_region_del
,
1140 .log_sync
= nvmm_log_sync
,
1145 nvmm_ram_block_added(RAMBlockNotifier
*n
, void *host
, size_t size
,
1148 struct nvmm_machine
*mach
= get_nvmm_mach();
1149 uintptr_t hva
= (uintptr_t)host
;
1152 ret
= nvmm_hva_map(mach
, hva
, max_size
);
1155 error_report("NVMM: Failed to map HVA, HostVA:%p "
1156 "Size:%p bytes, error=%d",
1157 (void *)hva
, (void *)size
, errno
);
1161 static struct RAMBlockNotifier nvmm_ram_notifier
= {
1162 .ram_block_added
= nvmm_ram_block_added
1165 /* -------------------------------------------------------------------------- */
1168 nvmm_accel_init(MachineState
*ms
)
1175 error_report("NVMM: Initialization failed, error=%d", errno
);
1179 ret
= nvmm_capability(&qemu_mach
.cap
);
1182 error_report("NVMM: Unable to fetch capability, error=%d", errno
);
1185 if (qemu_mach
.cap
.version
< NVMM_KERN_VERSION
) {
1186 error_report("NVMM: Unsupported version %u", qemu_mach
.cap
.version
);
1187 return -EPROGMISMATCH
;
1189 if (qemu_mach
.cap
.state_size
!= sizeof(struct nvmm_x64_state
)) {
1190 error_report("NVMM: Wrong state size %u", qemu_mach
.cap
.state_size
);
1191 return -EPROGMISMATCH
;
1194 ret
= nvmm_machine_create(&qemu_mach
.mach
);
1197 error_report("NVMM: Machine creation failed, error=%d", errno
);
1201 memory_listener_register(&nvmm_memory_listener
, &address_space_memory
);
1202 ram_block_notifier_add(&nvmm_ram_notifier
);
1204 printf("NetBSD Virtual Machine Monitor accelerator is operational\n");
1211 return nvmm_allowed
;
1215 nvmm_accel_class_init(ObjectClass
*oc
, void *data
)
1217 AccelClass
*ac
= ACCEL_CLASS(oc
);
1219 ac
->init_machine
= nvmm_accel_init
;
1220 ac
->allowed
= &nvmm_allowed
;
1223 static const TypeInfo nvmm_accel_type
= {
1224 .name
= ACCEL_CLASS_NAME("nvmm"),
1225 .parent
= TYPE_ACCEL
,
1226 .class_init
= nvmm_accel_class_init
,
1230 nvmm_type_init(void)
1232 type_register_static(&nvmm_accel_type
);
1235 type_init(nvmm_type_init
);