2 * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
4 * NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU.
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
10 #include "qemu/osdep.h"
12 #include "exec/address-spaces.h"
13 #include "exec/ioport.h"
14 #include "qemu/accel.h"
15 #include "sysemu/nvmm.h"
16 #include "sysemu/cpus.h"
17 #include "sysemu/runstate.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/error-report.h"
20 #include "qapi/error.h"
21 #include "qemu/queue.h"
22 #include "migration/blocker.h"
25 #include "nvmm-accel-ops.h"
29 struct AccelCPUState
{
30 struct nvmm_vcpu vcpu
;
35 /* Window-exiting for INTs/NMIs. */
39 /* The guest is in an interrupt shadow (POP SS, etc). */
44 struct nvmm_capability cap
;
45 struct nvmm_machine mach
;
48 /* -------------------------------------------------------------------------- */
50 static bool nvmm_allowed
;
51 static struct qemu_machine qemu_mach
;
53 static struct nvmm_machine
*
56 return &qemu_mach
.mach
;
59 /* -------------------------------------------------------------------------- */
62 nvmm_set_segment(struct nvmm_x64_state_seg
*nseg
, const SegmentCache
*qseg
)
64 uint32_t attrib
= qseg
->flags
;
66 nseg
->selector
= qseg
->selector
;
67 nseg
->limit
= qseg
->limit
;
68 nseg
->base
= qseg
->base
;
69 nseg
->attrib
.type
= __SHIFTOUT(attrib
, DESC_TYPE_MASK
);
70 nseg
->attrib
.s
= __SHIFTOUT(attrib
, DESC_S_MASK
);
71 nseg
->attrib
.dpl
= __SHIFTOUT(attrib
, DESC_DPL_MASK
);
72 nseg
->attrib
.p
= __SHIFTOUT(attrib
, DESC_P_MASK
);
73 nseg
->attrib
.avl
= __SHIFTOUT(attrib
, DESC_AVL_MASK
);
74 nseg
->attrib
.l
= __SHIFTOUT(attrib
, DESC_L_MASK
);
75 nseg
->attrib
.def
= __SHIFTOUT(attrib
, DESC_B_MASK
);
76 nseg
->attrib
.g
= __SHIFTOUT(attrib
, DESC_G_MASK
);
80 nvmm_set_registers(CPUState
*cpu
)
82 CPUX86State
*env
= cpu_env(cpu
);
83 struct nvmm_machine
*mach
= get_nvmm_mach();
84 AccelCPUState
*qcpu
= cpu
->accel
;
85 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
86 struct nvmm_x64_state
*state
= vcpu
->state
;
91 assert(cpu_is_stopped(cpu
) || qemu_cpu_is_self(cpu
));
94 state
->gprs
[NVMM_X64_GPR_RAX
] = env
->regs
[R_EAX
];
95 state
->gprs
[NVMM_X64_GPR_RCX
] = env
->regs
[R_ECX
];
96 state
->gprs
[NVMM_X64_GPR_RDX
] = env
->regs
[R_EDX
];
97 state
->gprs
[NVMM_X64_GPR_RBX
] = env
->regs
[R_EBX
];
98 state
->gprs
[NVMM_X64_GPR_RSP
] = env
->regs
[R_ESP
];
99 state
->gprs
[NVMM_X64_GPR_RBP
] = env
->regs
[R_EBP
];
100 state
->gprs
[NVMM_X64_GPR_RSI
] = env
->regs
[R_ESI
];
101 state
->gprs
[NVMM_X64_GPR_RDI
] = env
->regs
[R_EDI
];
103 state
->gprs
[NVMM_X64_GPR_R8
] = env
->regs
[R_R8
];
104 state
->gprs
[NVMM_X64_GPR_R9
] = env
->regs
[R_R9
];
105 state
->gprs
[NVMM_X64_GPR_R10
] = env
->regs
[R_R10
];
106 state
->gprs
[NVMM_X64_GPR_R11
] = env
->regs
[R_R11
];
107 state
->gprs
[NVMM_X64_GPR_R12
] = env
->regs
[R_R12
];
108 state
->gprs
[NVMM_X64_GPR_R13
] = env
->regs
[R_R13
];
109 state
->gprs
[NVMM_X64_GPR_R14
] = env
->regs
[R_R14
];
110 state
->gprs
[NVMM_X64_GPR_R15
] = env
->regs
[R_R15
];
113 /* RIP and RFLAGS. */
114 state
->gprs
[NVMM_X64_GPR_RIP
] = env
->eip
;
115 state
->gprs
[NVMM_X64_GPR_RFLAGS
] = env
->eflags
;
118 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_CS
], &env
->segs
[R_CS
]);
119 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_DS
], &env
->segs
[R_DS
]);
120 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_ES
], &env
->segs
[R_ES
]);
121 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_FS
], &env
->segs
[R_FS
]);
122 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_GS
], &env
->segs
[R_GS
]);
123 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_SS
], &env
->segs
[R_SS
]);
125 /* Special segments. */
126 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_GDT
], &env
->gdt
);
127 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_LDT
], &env
->ldt
);
128 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_TR
], &env
->tr
);
129 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_IDT
], &env
->idt
);
131 /* Control registers. */
132 state
->crs
[NVMM_X64_CR_CR0
] = env
->cr
[0];
133 state
->crs
[NVMM_X64_CR_CR2
] = env
->cr
[2];
134 state
->crs
[NVMM_X64_CR_CR3
] = env
->cr
[3];
135 state
->crs
[NVMM_X64_CR_CR4
] = env
->cr
[4];
136 state
->crs
[NVMM_X64_CR_CR8
] = qcpu
->tpr
;
137 state
->crs
[NVMM_X64_CR_XCR0
] = env
->xcr0
;
139 /* Debug registers. */
140 state
->drs
[NVMM_X64_DR_DR0
] = env
->dr
[0];
141 state
->drs
[NVMM_X64_DR_DR1
] = env
->dr
[1];
142 state
->drs
[NVMM_X64_DR_DR2
] = env
->dr
[2];
143 state
->drs
[NVMM_X64_DR_DR3
] = env
->dr
[3];
144 state
->drs
[NVMM_X64_DR_DR6
] = env
->dr
[6];
145 state
->drs
[NVMM_X64_DR_DR7
] = env
->dr
[7];
148 state
->fpu
.fx_cw
= env
->fpuc
;
149 state
->fpu
.fx_sw
= (env
->fpus
& ~0x3800) | ((env
->fpstt
& 0x7) << 11);
150 state
->fpu
.fx_tw
= 0;
151 for (i
= 0; i
< 8; i
++) {
152 state
->fpu
.fx_tw
|= (!env
->fptags
[i
]) << i
;
154 state
->fpu
.fx_opcode
= env
->fpop
;
155 state
->fpu
.fx_ip
.fa_64
= env
->fpip
;
156 state
->fpu
.fx_dp
.fa_64
= env
->fpdp
;
157 state
->fpu
.fx_mxcsr
= env
->mxcsr
;
158 state
->fpu
.fx_mxcsr_mask
= 0x0000FFFF;
159 assert(sizeof(state
->fpu
.fx_87_ac
) == sizeof(env
->fpregs
));
160 memcpy(state
->fpu
.fx_87_ac
, env
->fpregs
, sizeof(env
->fpregs
));
161 for (i
= 0; i
< CPU_NB_REGS
; i
++) {
162 memcpy(&state
->fpu
.fx_xmm
[i
].xmm_bytes
[0],
163 &env
->xmm_regs
[i
].ZMM_Q(0), 8);
164 memcpy(&state
->fpu
.fx_xmm
[i
].xmm_bytes
[8],
165 &env
->xmm_regs
[i
].ZMM_Q(1), 8);
169 state
->msrs
[NVMM_X64_MSR_EFER
] = env
->efer
;
170 state
->msrs
[NVMM_X64_MSR_STAR
] = env
->star
;
172 state
->msrs
[NVMM_X64_MSR_LSTAR
] = env
->lstar
;
173 state
->msrs
[NVMM_X64_MSR_CSTAR
] = env
->cstar
;
174 state
->msrs
[NVMM_X64_MSR_SFMASK
] = env
->fmask
;
175 state
->msrs
[NVMM_X64_MSR_KERNELGSBASE
] = env
->kernelgsbase
;
177 state
->msrs
[NVMM_X64_MSR_SYSENTER_CS
] = env
->sysenter_cs
;
178 state
->msrs
[NVMM_X64_MSR_SYSENTER_ESP
] = env
->sysenter_esp
;
179 state
->msrs
[NVMM_X64_MSR_SYSENTER_EIP
] = env
->sysenter_eip
;
180 state
->msrs
[NVMM_X64_MSR_PAT
] = env
->pat
;
181 state
->msrs
[NVMM_X64_MSR_TSC
] = env
->tsc
;
184 NVMM_X64_STATE_SEGS
|
185 NVMM_X64_STATE_GPRS
|
188 NVMM_X64_STATE_MSRS
|
191 ret
= nvmm_vcpu_setstate(mach
, vcpu
, bitmap
);
193 error_report("NVMM: Failed to set virtual processor context,"
199 nvmm_get_segment(SegmentCache
*qseg
, const struct nvmm_x64_state_seg
*nseg
)
201 qseg
->selector
= nseg
->selector
;
202 qseg
->limit
= nseg
->limit
;
203 qseg
->base
= nseg
->base
;
206 __SHIFTIN((uint32_t)nseg
->attrib
.type
, DESC_TYPE_MASK
) |
207 __SHIFTIN((uint32_t)nseg
->attrib
.s
, DESC_S_MASK
) |
208 __SHIFTIN((uint32_t)nseg
->attrib
.dpl
, DESC_DPL_MASK
) |
209 __SHIFTIN((uint32_t)nseg
->attrib
.p
, DESC_P_MASK
) |
210 __SHIFTIN((uint32_t)nseg
->attrib
.avl
, DESC_AVL_MASK
) |
211 __SHIFTIN((uint32_t)nseg
->attrib
.l
, DESC_L_MASK
) |
212 __SHIFTIN((uint32_t)nseg
->attrib
.def
, DESC_B_MASK
) |
213 __SHIFTIN((uint32_t)nseg
->attrib
.g
, DESC_G_MASK
);
217 nvmm_get_registers(CPUState
*cpu
)
219 CPUX86State
*env
= cpu_env(cpu
);
220 struct nvmm_machine
*mach
= get_nvmm_mach();
221 AccelCPUState
*qcpu
= cpu
->accel
;
222 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
223 X86CPU
*x86_cpu
= X86_CPU(cpu
);
224 struct nvmm_x64_state
*state
= vcpu
->state
;
225 uint64_t bitmap
, tpr
;
229 assert(cpu_is_stopped(cpu
) || qemu_cpu_is_self(cpu
));
232 NVMM_X64_STATE_SEGS
|
233 NVMM_X64_STATE_GPRS
|
236 NVMM_X64_STATE_MSRS
|
239 ret
= nvmm_vcpu_getstate(mach
, vcpu
, bitmap
);
241 error_report("NVMM: Failed to get virtual processor context,"
246 env
->regs
[R_EAX
] = state
->gprs
[NVMM_X64_GPR_RAX
];
247 env
->regs
[R_ECX
] = state
->gprs
[NVMM_X64_GPR_RCX
];
248 env
->regs
[R_EDX
] = state
->gprs
[NVMM_X64_GPR_RDX
];
249 env
->regs
[R_EBX
] = state
->gprs
[NVMM_X64_GPR_RBX
];
250 env
->regs
[R_ESP
] = state
->gprs
[NVMM_X64_GPR_RSP
];
251 env
->regs
[R_EBP
] = state
->gprs
[NVMM_X64_GPR_RBP
];
252 env
->regs
[R_ESI
] = state
->gprs
[NVMM_X64_GPR_RSI
];
253 env
->regs
[R_EDI
] = state
->gprs
[NVMM_X64_GPR_RDI
];
255 env
->regs
[R_R8
] = state
->gprs
[NVMM_X64_GPR_R8
];
256 env
->regs
[R_R9
] = state
->gprs
[NVMM_X64_GPR_R9
];
257 env
->regs
[R_R10
] = state
->gprs
[NVMM_X64_GPR_R10
];
258 env
->regs
[R_R11
] = state
->gprs
[NVMM_X64_GPR_R11
];
259 env
->regs
[R_R12
] = state
->gprs
[NVMM_X64_GPR_R12
];
260 env
->regs
[R_R13
] = state
->gprs
[NVMM_X64_GPR_R13
];
261 env
->regs
[R_R14
] = state
->gprs
[NVMM_X64_GPR_R14
];
262 env
->regs
[R_R15
] = state
->gprs
[NVMM_X64_GPR_R15
];
265 /* RIP and RFLAGS. */
266 env
->eip
= state
->gprs
[NVMM_X64_GPR_RIP
];
267 env
->eflags
= state
->gprs
[NVMM_X64_GPR_RFLAGS
];
270 nvmm_get_segment(&env
->segs
[R_ES
], &state
->segs
[NVMM_X64_SEG_ES
]);
271 nvmm_get_segment(&env
->segs
[R_CS
], &state
->segs
[NVMM_X64_SEG_CS
]);
272 nvmm_get_segment(&env
->segs
[R_SS
], &state
->segs
[NVMM_X64_SEG_SS
]);
273 nvmm_get_segment(&env
->segs
[R_DS
], &state
->segs
[NVMM_X64_SEG_DS
]);
274 nvmm_get_segment(&env
->segs
[R_FS
], &state
->segs
[NVMM_X64_SEG_FS
]);
275 nvmm_get_segment(&env
->segs
[R_GS
], &state
->segs
[NVMM_X64_SEG_GS
]);
277 /* Special segments. */
278 nvmm_get_segment(&env
->gdt
, &state
->segs
[NVMM_X64_SEG_GDT
]);
279 nvmm_get_segment(&env
->ldt
, &state
->segs
[NVMM_X64_SEG_LDT
]);
280 nvmm_get_segment(&env
->tr
, &state
->segs
[NVMM_X64_SEG_TR
]);
281 nvmm_get_segment(&env
->idt
, &state
->segs
[NVMM_X64_SEG_IDT
]);
283 /* Control registers. */
284 env
->cr
[0] = state
->crs
[NVMM_X64_CR_CR0
];
285 env
->cr
[2] = state
->crs
[NVMM_X64_CR_CR2
];
286 env
->cr
[3] = state
->crs
[NVMM_X64_CR_CR3
];
287 env
->cr
[4] = state
->crs
[NVMM_X64_CR_CR4
];
288 tpr
= state
->crs
[NVMM_X64_CR_CR8
];
289 if (tpr
!= qcpu
->tpr
) {
291 cpu_set_apic_tpr(x86_cpu
->apic_state
, tpr
);
293 env
->xcr0
= state
->crs
[NVMM_X64_CR_XCR0
];
295 /* Debug registers. */
296 env
->dr
[0] = state
->drs
[NVMM_X64_DR_DR0
];
297 env
->dr
[1] = state
->drs
[NVMM_X64_DR_DR1
];
298 env
->dr
[2] = state
->drs
[NVMM_X64_DR_DR2
];
299 env
->dr
[3] = state
->drs
[NVMM_X64_DR_DR3
];
300 env
->dr
[6] = state
->drs
[NVMM_X64_DR_DR6
];
301 env
->dr
[7] = state
->drs
[NVMM_X64_DR_DR7
];
304 env
->fpuc
= state
->fpu
.fx_cw
;
305 env
->fpstt
= (state
->fpu
.fx_sw
>> 11) & 0x7;
306 env
->fpus
= state
->fpu
.fx_sw
& ~0x3800;
307 for (i
= 0; i
< 8; i
++) {
308 env
->fptags
[i
] = !((state
->fpu
.fx_tw
>> i
) & 1);
310 env
->fpop
= state
->fpu
.fx_opcode
;
311 env
->fpip
= state
->fpu
.fx_ip
.fa_64
;
312 env
->fpdp
= state
->fpu
.fx_dp
.fa_64
;
313 env
->mxcsr
= state
->fpu
.fx_mxcsr
;
314 assert(sizeof(state
->fpu
.fx_87_ac
) == sizeof(env
->fpregs
));
315 memcpy(env
->fpregs
, state
->fpu
.fx_87_ac
, sizeof(env
->fpregs
));
316 for (i
= 0; i
< CPU_NB_REGS
; i
++) {
317 memcpy(&env
->xmm_regs
[i
].ZMM_Q(0),
318 &state
->fpu
.fx_xmm
[i
].xmm_bytes
[0], 8);
319 memcpy(&env
->xmm_regs
[i
].ZMM_Q(1),
320 &state
->fpu
.fx_xmm
[i
].xmm_bytes
[8], 8);
324 env
->efer
= state
->msrs
[NVMM_X64_MSR_EFER
];
325 env
->star
= state
->msrs
[NVMM_X64_MSR_STAR
];
327 env
->lstar
= state
->msrs
[NVMM_X64_MSR_LSTAR
];
328 env
->cstar
= state
->msrs
[NVMM_X64_MSR_CSTAR
];
329 env
->fmask
= state
->msrs
[NVMM_X64_MSR_SFMASK
];
330 env
->kernelgsbase
= state
->msrs
[NVMM_X64_MSR_KERNELGSBASE
];
332 env
->sysenter_cs
= state
->msrs
[NVMM_X64_MSR_SYSENTER_CS
];
333 env
->sysenter_esp
= state
->msrs
[NVMM_X64_MSR_SYSENTER_ESP
];
334 env
->sysenter_eip
= state
->msrs
[NVMM_X64_MSR_SYSENTER_EIP
];
335 env
->pat
= state
->msrs
[NVMM_X64_MSR_PAT
];
336 env
->tsc
= state
->msrs
[NVMM_X64_MSR_TSC
];
338 x86_update_hflags(env
);
342 nvmm_can_take_int(CPUState
*cpu
)
344 AccelCPUState
*qcpu
= cpu
->accel
;
345 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
346 struct nvmm_machine
*mach
= get_nvmm_mach();
348 if (qcpu
->int_window_exit
) {
352 if (qcpu
->int_shadow
|| !(cpu_env(cpu
)->eflags
& IF_MASK
)) {
353 struct nvmm_x64_state
*state
= vcpu
->state
;
355 /* Exit on interrupt window. */
356 nvmm_vcpu_getstate(mach
, vcpu
, NVMM_X64_STATE_INTR
);
357 state
->intr
.int_window_exiting
= 1;
358 nvmm_vcpu_setstate(mach
, vcpu
, NVMM_X64_STATE_INTR
);
367 nvmm_can_take_nmi(CPUState
*cpu
)
369 AccelCPUState
*qcpu
= cpu
->accel
;
372 * Contrary to INTs, NMIs always schedule an exit when they are
373 * completed. Therefore, if window-exiting is enabled, it means
376 if (qcpu
->nmi_window_exit
) {
384 * Called before the VCPU is run. We inject events generated by the I/O
385 * thread, and synchronize the guest TPR.
388 nvmm_vcpu_pre_run(CPUState
*cpu
)
390 CPUX86State
*env
= cpu_env(cpu
);
391 struct nvmm_machine
*mach
= get_nvmm_mach();
392 AccelCPUState
*qcpu
= cpu
->accel
;
393 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
394 X86CPU
*x86_cpu
= X86_CPU(cpu
);
395 struct nvmm_x64_state
*state
= vcpu
->state
;
396 struct nvmm_vcpu_event
*event
= vcpu
->event
;
397 bool has_event
= false;
398 bool sync_tpr
= false;
404 tpr
= cpu_get_apic_tpr(x86_cpu
->apic_state
);
405 if (tpr
!= qcpu
->tpr
) {
411 * Force the VCPU out of its inner loop to process any INIT requests
412 * or commit pending TPR access.
414 if (cpu
->interrupt_request
& (CPU_INTERRUPT_INIT
| CPU_INTERRUPT_TPR
)) {
415 cpu
->exit_request
= 1;
418 if (!has_event
&& (cpu
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
419 if (nvmm_can_take_nmi(cpu
)) {
420 cpu
->interrupt_request
&= ~CPU_INTERRUPT_NMI
;
421 event
->type
= NVMM_VCPU_EVENT_INTR
;
427 if (!has_event
&& (cpu
->interrupt_request
& CPU_INTERRUPT_HARD
)) {
428 if (nvmm_can_take_int(cpu
)) {
429 cpu
->interrupt_request
&= ~CPU_INTERRUPT_HARD
;
430 event
->type
= NVMM_VCPU_EVENT_INTR
;
431 event
->vector
= cpu_get_pic_interrupt(env
);
436 /* Don't want SMIs. */
437 if (cpu
->interrupt_request
& CPU_INTERRUPT_SMI
) {
438 cpu
->interrupt_request
&= ~CPU_INTERRUPT_SMI
;
442 ret
= nvmm_vcpu_getstate(mach
, vcpu
, NVMM_X64_STATE_CRS
);
444 error_report("NVMM: Failed to get CPU state,"
448 state
->crs
[NVMM_X64_CR_CR8
] = qcpu
->tpr
;
450 ret
= nvmm_vcpu_setstate(mach
, vcpu
, NVMM_X64_STATE_CRS
);
452 error_report("NVMM: Failed to set CPU state,"
458 ret
= nvmm_vcpu_inject(mach
, vcpu
);
460 error_report("NVMM: Failed to inject event,"
469 * Called after the VCPU ran. We synchronize the host view of the TPR and
473 nvmm_vcpu_post_run(CPUState
*cpu
, struct nvmm_vcpu_exit
*exit
)
475 AccelCPUState
*qcpu
= cpu
->accel
;
476 X86CPU
*x86_cpu
= X86_CPU(cpu
);
477 CPUX86State
*env
= &x86_cpu
->env
;
480 env
->eflags
= exit
->exitstate
.rflags
;
481 qcpu
->int_shadow
= exit
->exitstate
.int_shadow
;
482 qcpu
->int_window_exit
= exit
->exitstate
.int_window_exiting
;
483 qcpu
->nmi_window_exit
= exit
->exitstate
.nmi_window_exiting
;
485 tpr
= exit
->exitstate
.cr8
;
486 if (qcpu
->tpr
!= tpr
) {
489 cpu_set_apic_tpr(x86_cpu
->apic_state
, qcpu
->tpr
);
494 /* -------------------------------------------------------------------------- */
497 nvmm_io_callback(struct nvmm_io
*io
)
499 MemTxAttrs attrs
= { 0 };
502 ret
= address_space_rw(&address_space_io
, io
->port
, attrs
, io
->data
,
504 if (ret
!= MEMTX_OK
) {
505 error_report("NVMM: I/O Transaction Failed "
506 "[%s, port=%u, size=%zu]", (io
->in
? "in" : "out"),
510 /* Needed, otherwise infinite loop. */
511 current_cpu
->accel
->dirty
= false;
515 nvmm_mem_callback(struct nvmm_mem
*mem
)
517 cpu_physical_memory_rw(mem
->gpa
, mem
->data
, mem
->size
, mem
->write
);
519 /* Needed, otherwise infinite loop. */
520 current_cpu
->accel
->dirty
= false;
523 static struct nvmm_assist_callbacks nvmm_callbacks
= {
524 .io
= nvmm_io_callback
,
525 .mem
= nvmm_mem_callback
528 /* -------------------------------------------------------------------------- */
531 nvmm_handle_mem(struct nvmm_machine
*mach
, struct nvmm_vcpu
*vcpu
)
535 ret
= nvmm_assist_mem(mach
, vcpu
);
537 error_report("NVMM: Mem Assist Failed [gpa=%p]",
538 (void *)vcpu
->exit
->u
.mem
.gpa
);
545 nvmm_handle_io(struct nvmm_machine
*mach
, struct nvmm_vcpu
*vcpu
)
549 ret
= nvmm_assist_io(mach
, vcpu
);
551 error_report("NVMM: I/O Assist Failed [port=%d]",
552 (int)vcpu
->exit
->u
.io
.port
);
559 nvmm_handle_rdmsr(struct nvmm_machine
*mach
, CPUState
*cpu
,
560 struct nvmm_vcpu_exit
*exit
)
562 AccelCPUState
*qcpu
= cpu
->accel
;
563 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
564 X86CPU
*x86_cpu
= X86_CPU(cpu
);
565 struct nvmm_x64_state
*state
= vcpu
->state
;
569 switch (exit
->u
.rdmsr
.msr
) {
570 case MSR_IA32_APICBASE
:
571 val
= cpu_get_apic_base(x86_cpu
->apic_state
);
574 case MSR_MTRRdefType
:
579 default: /* More MSRs to add? */
581 error_report("NVMM: Unexpected RDMSR 0x%x, ignored",
586 ret
= nvmm_vcpu_getstate(mach
, vcpu
, NVMM_X64_STATE_GPRS
);
591 state
->gprs
[NVMM_X64_GPR_RAX
] = (val
& 0xFFFFFFFF);
592 state
->gprs
[NVMM_X64_GPR_RDX
] = (val
>> 32);
593 state
->gprs
[NVMM_X64_GPR_RIP
] = exit
->u
.rdmsr
.npc
;
595 ret
= nvmm_vcpu_setstate(mach
, vcpu
, NVMM_X64_STATE_GPRS
);
604 nvmm_handle_wrmsr(struct nvmm_machine
*mach
, CPUState
*cpu
,
605 struct nvmm_vcpu_exit
*exit
)
607 AccelCPUState
*qcpu
= cpu
->accel
;
608 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
609 X86CPU
*x86_cpu
= X86_CPU(cpu
);
610 struct nvmm_x64_state
*state
= vcpu
->state
;
614 val
= exit
->u
.wrmsr
.val
;
616 switch (exit
->u
.wrmsr
.msr
) {
617 case MSR_IA32_APICBASE
:
618 cpu_set_apic_base(x86_cpu
->apic_state
, val
);
620 case MSR_MTRRdefType
:
623 default: /* More MSRs to add? */
624 error_report("NVMM: Unexpected WRMSR 0x%x [val=0x%lx], ignored",
625 exit
->u
.wrmsr
.msr
, val
);
629 ret
= nvmm_vcpu_getstate(mach
, vcpu
, NVMM_X64_STATE_GPRS
);
634 state
->gprs
[NVMM_X64_GPR_RIP
] = exit
->u
.wrmsr
.npc
;
636 ret
= nvmm_vcpu_setstate(mach
, vcpu
, NVMM_X64_STATE_GPRS
);
645 nvmm_handle_halted(struct nvmm_machine
*mach
, CPUState
*cpu
,
646 struct nvmm_vcpu_exit
*exit
)
652 if (!((cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
653 (cpu_env(cpu
)->eflags
& IF_MASK
)) &&
654 !(cpu
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
655 cpu
->exception_index
= EXCP_HLT
;
666 nvmm_inject_ud(struct nvmm_machine
*mach
, struct nvmm_vcpu
*vcpu
)
668 struct nvmm_vcpu_event
*event
= vcpu
->event
;
670 event
->type
= NVMM_VCPU_EVENT_EXCP
;
672 event
->u
.excp
.error
= 0;
674 return nvmm_vcpu_inject(mach
, vcpu
);
678 nvmm_vcpu_loop(CPUState
*cpu
)
680 struct nvmm_machine
*mach
= get_nvmm_mach();
681 AccelCPUState
*qcpu
= cpu
->accel
;
682 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
683 X86CPU
*x86_cpu
= X86_CPU(cpu
);
684 CPUX86State
*env
= &x86_cpu
->env
;
685 struct nvmm_vcpu_exit
*exit
= vcpu
->exit
;
689 * Some asynchronous events must be handled outside of the inner
690 * VCPU loop. They are handled here.
692 if (cpu
->interrupt_request
& CPU_INTERRUPT_INIT
) {
693 nvmm_cpu_synchronize_state(cpu
);
694 do_cpu_init(x86_cpu
);
695 /* set int/nmi windows back to the reset state */
697 if (cpu
->interrupt_request
& CPU_INTERRUPT_POLL
) {
698 cpu
->interrupt_request
&= ~CPU_INTERRUPT_POLL
;
699 apic_poll_irq(x86_cpu
->apic_state
);
701 if (((cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
702 (env
->eflags
& IF_MASK
)) ||
703 (cpu
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
706 if (cpu
->interrupt_request
& CPU_INTERRUPT_SIPI
) {
707 nvmm_cpu_synchronize_state(cpu
);
708 do_cpu_sipi(x86_cpu
);
710 if (cpu
->interrupt_request
& CPU_INTERRUPT_TPR
) {
711 cpu
->interrupt_request
&= ~CPU_INTERRUPT_TPR
;
712 nvmm_cpu_synchronize_state(cpu
);
713 apic_handle_tpr_access_report(x86_cpu
->apic_state
, env
->eip
,
714 env
->tpr_access_type
);
718 cpu
->exception_index
= EXCP_HLT
;
719 qatomic_set(&cpu
->exit_request
, false);
730 if (cpu
->accel
->dirty
) {
731 nvmm_set_registers(cpu
);
732 cpu
->accel
->dirty
= false;
736 cpu
->exception_index
= EXCP_INTERRUPT
;
742 nvmm_vcpu_pre_run(cpu
);
744 if (qatomic_read(&cpu
->exit_request
)) {
745 #if NVMM_USER_VERSION >= 2
746 nvmm_vcpu_stop(vcpu
);
748 qemu_cpu_kick_self();
752 /* Read exit_request before the kernel reads the immediate exit flag */
754 ret
= nvmm_vcpu_run(mach
, vcpu
);
756 error_report("NVMM: Failed to exec a virtual processor,"
761 nvmm_vcpu_post_run(cpu
, exit
);
763 switch (exit
->reason
) {
764 case NVMM_VCPU_EXIT_NONE
:
766 #if NVMM_USER_VERSION >= 2
767 case NVMM_VCPU_EXIT_STOPPED
:
769 * The kernel cleared the immediate exit flag; cpu->exit_request
770 * must be cleared after
776 case NVMM_VCPU_EXIT_MEMORY
:
777 ret
= nvmm_handle_mem(mach
, vcpu
);
779 case NVMM_VCPU_EXIT_IO
:
780 ret
= nvmm_handle_io(mach
, vcpu
);
782 case NVMM_VCPU_EXIT_INT_READY
:
783 case NVMM_VCPU_EXIT_NMI_READY
:
784 case NVMM_VCPU_EXIT_TPR_CHANGED
:
786 case NVMM_VCPU_EXIT_HALTED
:
787 ret
= nvmm_handle_halted(mach
, cpu
, exit
);
789 case NVMM_VCPU_EXIT_SHUTDOWN
:
790 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET
);
791 cpu
->exception_index
= EXCP_INTERRUPT
;
794 case NVMM_VCPU_EXIT_RDMSR
:
795 ret
= nvmm_handle_rdmsr(mach
, cpu
, exit
);
797 case NVMM_VCPU_EXIT_WRMSR
:
798 ret
= nvmm_handle_wrmsr(mach
, cpu
, exit
);
800 case NVMM_VCPU_EXIT_MONITOR
:
801 case NVMM_VCPU_EXIT_MWAIT
:
802 ret
= nvmm_inject_ud(mach
, vcpu
);
805 error_report("NVMM: Unexpected VM exit code 0x%lx [hw=0x%lx]",
806 exit
->reason
, exit
->u
.inv
.hwcode
);
807 nvmm_get_registers(cpu
);
809 qemu_system_guest_panicked(cpu_get_crash_info(cpu
));
819 qatomic_set(&cpu
->exit_request
, false);
824 /* -------------------------------------------------------------------------- */
827 do_nvmm_cpu_synchronize_state(CPUState
*cpu
, run_on_cpu_data arg
)
829 nvmm_get_registers(cpu
);
830 cpu
->accel
->dirty
= true;
834 do_nvmm_cpu_synchronize_post_reset(CPUState
*cpu
, run_on_cpu_data arg
)
836 nvmm_set_registers(cpu
);
837 cpu
->accel
->dirty
= false;
841 do_nvmm_cpu_synchronize_post_init(CPUState
*cpu
, run_on_cpu_data arg
)
843 nvmm_set_registers(cpu
);
844 cpu
->accel
->dirty
= false;
848 do_nvmm_cpu_synchronize_pre_loadvm(CPUState
*cpu
, run_on_cpu_data arg
)
850 cpu
->accel
->dirty
= true;
853 void nvmm_cpu_synchronize_state(CPUState
*cpu
)
855 if (!cpu
->accel
->dirty
) {
856 run_on_cpu(cpu
, do_nvmm_cpu_synchronize_state
, RUN_ON_CPU_NULL
);
860 void nvmm_cpu_synchronize_post_reset(CPUState
*cpu
)
862 run_on_cpu(cpu
, do_nvmm_cpu_synchronize_post_reset
, RUN_ON_CPU_NULL
);
865 void nvmm_cpu_synchronize_post_init(CPUState
*cpu
)
867 run_on_cpu(cpu
, do_nvmm_cpu_synchronize_post_init
, RUN_ON_CPU_NULL
);
870 void nvmm_cpu_synchronize_pre_loadvm(CPUState
*cpu
)
872 run_on_cpu(cpu
, do_nvmm_cpu_synchronize_pre_loadvm
, RUN_ON_CPU_NULL
);
875 /* -------------------------------------------------------------------------- */
877 static Error
*nvmm_migration_blocker
;
880 * The nvmm_vcpu_stop() mechanism breaks races between entering the VMM
881 * and another thread signaling the vCPU thread to exit.
885 nvmm_ipi_signal(int sigcpu
)
888 AccelCPUState
*qcpu
= current_cpu
->accel
;
889 #if NVMM_USER_VERSION >= 2
890 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
891 nvmm_vcpu_stop(vcpu
);
899 nvmm_init_cpu_signals(void)
901 struct sigaction sigact
;
904 /* Install the IPI handler. */
905 memset(&sigact
, 0, sizeof(sigact
));
906 sigact
.sa_handler
= nvmm_ipi_signal
;
907 sigaction(SIG_IPI
, &sigact
, NULL
);
909 /* Allow IPIs on the current thread. */
910 sigprocmask(SIG_BLOCK
, NULL
, &set
);
911 sigdelset(&set
, SIG_IPI
);
912 pthread_sigmask(SIG_SETMASK
, &set
, NULL
);
916 nvmm_init_vcpu(CPUState
*cpu
)
918 struct nvmm_machine
*mach
= get_nvmm_mach();
919 struct nvmm_vcpu_conf_cpuid cpuid
;
920 struct nvmm_vcpu_conf_tpr tpr
;
921 Error
*local_error
= NULL
;
925 nvmm_init_cpu_signals();
927 if (nvmm_migration_blocker
== NULL
) {
928 error_setg(&nvmm_migration_blocker
,
929 "NVMM: Migration not supported");
931 if (migrate_add_blocker(&nvmm_migration_blocker
, &local_error
) < 0) {
932 error_report_err(local_error
);
937 qcpu
= g_new0(AccelCPUState
, 1);
939 ret
= nvmm_vcpu_create(mach
, cpu
->cpu_index
, &qcpu
->vcpu
);
942 error_report("NVMM: Failed to create a virtual processor,"
948 memset(&cpuid
, 0, sizeof(cpuid
));
950 cpuid
.leaf
= 0x00000001;
951 cpuid
.u
.mask
.set
.edx
= CPUID_MCE
| CPUID_MCA
| CPUID_MTRR
;
952 ret
= nvmm_vcpu_configure(mach
, &qcpu
->vcpu
, NVMM_VCPU_CONF_CPUID
,
956 error_report("NVMM: Failed to configure a virtual processor,"
962 ret
= nvmm_vcpu_configure(mach
, &qcpu
->vcpu
, NVMM_VCPU_CONF_CALLBACKS
,
966 error_report("NVMM: Failed to configure a virtual processor,"
972 if (qemu_mach
.cap
.arch
.vcpu_conf_support
& NVMM_CAP_ARCH_VCPU_CONF_TPR
) {
973 memset(&tpr
, 0, sizeof(tpr
));
974 tpr
.exit_changed
= 1;
975 ret
= nvmm_vcpu_configure(mach
, &qcpu
->vcpu
, NVMM_VCPU_CONF_TPR
, &tpr
);
978 error_report("NVMM: Failed to configure a virtual processor,"
992 nvmm_vcpu_exec(CPUState
*cpu
)
997 if (cpu
->exception_index
>= EXCP_INTERRUPT
) {
998 ret
= cpu
->exception_index
;
999 cpu
->exception_index
= -1;
1003 fatal
= nvmm_vcpu_loop(cpu
);
1006 error_report("NVMM: Failed to execute a VCPU.");
1015 nvmm_destroy_vcpu(CPUState
*cpu
)
1017 struct nvmm_machine
*mach
= get_nvmm_mach();
1018 AccelCPUState
*qcpu
= cpu
->accel
;
1020 nvmm_vcpu_destroy(mach
, &qcpu
->vcpu
);
1024 /* -------------------------------------------------------------------------- */
1027 nvmm_update_mapping(hwaddr start_pa
, ram_addr_t size
, uintptr_t hva
,
1028 bool add
, bool rom
, const char *name
)
1030 struct nvmm_machine
*mach
= get_nvmm_mach();
1034 prot
= PROT_READ
| PROT_EXEC
;
1038 ret
= nvmm_gpa_map(mach
, hva
, start_pa
, size
, prot
);
1040 ret
= nvmm_gpa_unmap(mach
, hva
, start_pa
, size
);
1044 error_report("NVMM: Failed to %s GPA range '%s' PA:%p, "
1045 "Size:%p bytes, HostVA:%p, error=%d",
1046 (add
? "map" : "unmap"), name
, (void *)(uintptr_t)start_pa
,
1047 (void *)size
, (void *)hva
, errno
);
1052 nvmm_process_section(MemoryRegionSection
*section
, int add
)
1054 MemoryRegion
*mr
= section
->mr
;
1055 hwaddr start_pa
= section
->offset_within_address_space
;
1056 ram_addr_t size
= int128_get64(section
->size
);
1060 if (!memory_region_is_ram(mr
)) {
1064 /* Adjust start_pa and size so that they are page-aligned. */
1065 delta
= qemu_real_host_page_size() - (start_pa
& ~qemu_real_host_page_mask());
1066 delta
&= ~qemu_real_host_page_mask();
1072 size
&= qemu_real_host_page_mask();
1073 if (!size
|| (start_pa
& ~qemu_real_host_page_mask())) {
1077 hva
= (uintptr_t)memory_region_get_ram_ptr(mr
) +
1078 section
->offset_within_region
+ delta
;
1080 nvmm_update_mapping(start_pa
, size
, hva
, add
,
1081 memory_region_is_rom(mr
), mr
->name
);
1085 nvmm_region_add(MemoryListener
*listener
, MemoryRegionSection
*section
)
1087 memory_region_ref(section
->mr
);
1088 nvmm_process_section(section
, 1);
1092 nvmm_region_del(MemoryListener
*listener
, MemoryRegionSection
*section
)
1094 nvmm_process_section(section
, 0);
1095 memory_region_unref(section
->mr
);
1099 nvmm_transaction_begin(MemoryListener
*listener
)
1105 nvmm_transaction_commit(MemoryListener
*listener
)
1111 nvmm_log_sync(MemoryListener
*listener
, MemoryRegionSection
*section
)
1113 MemoryRegion
*mr
= section
->mr
;
1115 if (!memory_region_is_ram(mr
)) {
1119 memory_region_set_dirty(mr
, 0, int128_get64(section
->size
));
1122 static MemoryListener nvmm_memory_listener
= {
1124 .begin
= nvmm_transaction_begin
,
1125 .commit
= nvmm_transaction_commit
,
1126 .region_add
= nvmm_region_add
,
1127 .region_del
= nvmm_region_del
,
1128 .log_sync
= nvmm_log_sync
,
1129 .priority
= MEMORY_LISTENER_PRIORITY_ACCEL
,
1133 nvmm_ram_block_added(RAMBlockNotifier
*n
, void *host
, size_t size
,
1136 struct nvmm_machine
*mach
= get_nvmm_mach();
1137 uintptr_t hva
= (uintptr_t)host
;
1140 ret
= nvmm_hva_map(mach
, hva
, max_size
);
1143 error_report("NVMM: Failed to map HVA, HostVA:%p "
1144 "Size:%p bytes, error=%d",
1145 (void *)hva
, (void *)size
, errno
);
1149 static struct RAMBlockNotifier nvmm_ram_notifier
= {
1150 .ram_block_added
= nvmm_ram_block_added
1153 /* -------------------------------------------------------------------------- */
1156 nvmm_accel_init(MachineState
*ms
)
1163 error_report("NVMM: Initialization failed, error=%d", errno
);
1167 ret
= nvmm_capability(&qemu_mach
.cap
);
1170 error_report("NVMM: Unable to fetch capability, error=%d", errno
);
1173 if (qemu_mach
.cap
.version
< NVMM_KERN_VERSION
) {
1174 error_report("NVMM: Unsupported version %u", qemu_mach
.cap
.version
);
1175 return -EPROGMISMATCH
;
1177 if (qemu_mach
.cap
.state_size
!= sizeof(struct nvmm_x64_state
)) {
1178 error_report("NVMM: Wrong state size %u", qemu_mach
.cap
.state_size
);
1179 return -EPROGMISMATCH
;
1182 ret
= nvmm_machine_create(&qemu_mach
.mach
);
1185 error_report("NVMM: Machine creation failed, error=%d", errno
);
1189 memory_listener_register(&nvmm_memory_listener
, &address_space_memory
);
1190 ram_block_notifier_add(&nvmm_ram_notifier
);
1192 printf("NetBSD Virtual Machine Monitor accelerator is operational\n");
1199 return nvmm_allowed
;
1203 nvmm_accel_class_init(ObjectClass
*oc
, void *data
)
1205 AccelClass
*ac
= ACCEL_CLASS(oc
);
1207 ac
->init_machine
= nvmm_accel_init
;
1208 ac
->allowed
= &nvmm_allowed
;
1211 static const TypeInfo nvmm_accel_type
= {
1212 .name
= ACCEL_CLASS_NAME("nvmm"),
1213 .parent
= TYPE_ACCEL
,
1214 .class_init
= nvmm_accel_class_init
,
1218 nvmm_type_init(void)
1220 type_register_static(&nvmm_accel_type
);
1223 type_init(nvmm_type_init
);