]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - arch/x86/kvm/vmx/vmenter.S
KVM: VMX: Prevent RSB underflow before vmenter
[mirror_ubuntu-jammy-kernel.git] / arch / x86 / kvm / vmx / vmenter.S
CommitLineData
453eafbe
SC
1/* SPDX-License-Identifier: GPL-2.0 */
2#include <linux/linkage.h>
3#include <asm/asm.h>
85561a53 4#include <asm/asm-offsets.h>
5e0781df
SC
5#include <asm/bitsperlong.h>
6#include <asm/kvm_vcpu_regs.h>
f2fde6a5 7#include <asm/nospec-branch.h>
85561a53 8#include <asm/percpu.h>
535f7ef2 9#include <asm/segment.h>
e5b6b3e7 10#include "run_flags.h"
5e0781df
SC
11
12#define WORD_SIZE (BITS_PER_LONG / 8)
13
14#define VCPU_RAX __VCPU_REGS_RAX * WORD_SIZE
15#define VCPU_RCX __VCPU_REGS_RCX * WORD_SIZE
16#define VCPU_RDX __VCPU_REGS_RDX * WORD_SIZE
17#define VCPU_RBX __VCPU_REGS_RBX * WORD_SIZE
18/* Intentionally omit RSP as it's context switched by hardware */
19#define VCPU_RBP __VCPU_REGS_RBP * WORD_SIZE
20#define VCPU_RSI __VCPU_REGS_RSI * WORD_SIZE
21#define VCPU_RDI __VCPU_REGS_RDI * WORD_SIZE
22
23#ifdef CONFIG_X86_64
24#define VCPU_R8 __VCPU_REGS_R8 * WORD_SIZE
25#define VCPU_R9 __VCPU_REGS_R9 * WORD_SIZE
26#define VCPU_R10 __VCPU_REGS_R10 * WORD_SIZE
27#define VCPU_R11 __VCPU_REGS_R11 * WORD_SIZE
28#define VCPU_R12 __VCPU_REGS_R12 * WORD_SIZE
29#define VCPU_R13 __VCPU_REGS_R13 * WORD_SIZE
30#define VCPU_R14 __VCPU_REGS_R14 * WORD_SIZE
31#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
32#endif
453eafbe 33
3ebccdf3 34.section .noinstr.text, "ax"
453eafbe 35
5e0781df 36/**
ee2fc635 37 * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
68d98cfd 38 * @vmx: struct vcpu_vmx *
5e0781df 39 * @regs: unsigned long * (to guest registers)
68d98cfd
JP
40 * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH
41 * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl
5e0781df
SC
42 *
43 * Returns:
e75c3c3a 44 * 0 on VM-Exit, 1 on VM-Fail
5e0781df 45 */
6dcc5627 46SYM_FUNC_START(__vmx_vcpu_run)
5e0781df
SC
47 push %_ASM_BP
48 mov %_ASM_SP, %_ASM_BP
3b895ef4
SC
49#ifdef CONFIG_X86_64
50 push %r15
51 push %r14
52 push %r13
53 push %r12
54#else
55 push %edi
56 push %esi
57#endif
58 push %_ASM_BX
5e0781df 59
68d98cfd
JP
60 /* Save @vmx for SPEC_CTRL handling */
61 push %_ASM_ARG1
62
63 /* Save @flags for SPEC_CTRL handling */
64 push %_ASM_ARG3
65
5e0781df
SC
66 /*
67 * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
68 * @regs is needed after VM-Exit to save the guest's register values.
69 */
70 push %_ASM_ARG2
71
e5b6b3e7 72 /* Copy @flags to BL, _ASM_ARG3 is volatile. */
77df5495
SC
73 mov %_ASM_ARG3B, %bl
74
b241fd3d 75 lea (%_ASM_SP), %_ASM_ARG2
5e0781df
SC
76 call vmx_update_host_rsp
77
85561a53
JP
78 ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL
79
80 /*
81 * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
82 * host's, write the MSR.
83 *
84 * IMPORTANT: To avoid RSB underflow attacks and any other nastiness,
85 * there must not be any returns or indirect branches between this code
86 * and vmentry.
87 */
88 mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI
89 movl VMX_spec_ctrl(%_ASM_DI), %edi
90 movl PER_CPU_VAR(x86_spec_ctrl_current), %esi
91 cmp %edi, %esi
92 je .Lspec_ctrl_done
93 mov $MSR_IA32_SPEC_CTRL, %ecx
94 xor %edx, %edx
95 mov %edi, %eax
96 wrmsr
97
98.Lspec_ctrl_done:
99
100 /*
101 * Since vmentry is serializing on affected CPUs, there's no need for
102 * an LFENCE to stop speculation from skipping the wrmsr.
103 */
104
a62fd5a7
SC
105 /* Load @regs to RAX. */
106 mov (%_ASM_SP), %_ASM_AX
5e0781df
SC
107
108 /* Check if vmlaunch or vmresume is needed */
e5b6b3e7 109 testb $VMX_RUN_VMRESUME, %bl
5e0781df
SC
110
111 /* Load guest registers. Don't clobber flags. */
a62fd5a7
SC
112 mov VCPU_RCX(%_ASM_AX), %_ASM_CX
113 mov VCPU_RDX(%_ASM_AX), %_ASM_DX
bb03911f
UB
114 mov VCPU_RBX(%_ASM_AX), %_ASM_BX
115 mov VCPU_RBP(%_ASM_AX), %_ASM_BP
a62fd5a7
SC
116 mov VCPU_RSI(%_ASM_AX), %_ASM_SI
117 mov VCPU_RDI(%_ASM_AX), %_ASM_DI
5e0781df 118#ifdef CONFIG_X86_64
a62fd5a7
SC
119 mov VCPU_R8 (%_ASM_AX), %r8
120 mov VCPU_R9 (%_ASM_AX), %r9
121 mov VCPU_R10(%_ASM_AX), %r10
122 mov VCPU_R11(%_ASM_AX), %r11
123 mov VCPU_R12(%_ASM_AX), %r12
124 mov VCPU_R13(%_ASM_AX), %r13
125 mov VCPU_R14(%_ASM_AX), %r14
126 mov VCPU_R15(%_ASM_AX), %r15
5e0781df 127#endif
b6852ae7 128 /* Load guest RAX. This kills the @regs pointer! */
a62fd5a7 129 mov VCPU_RAX(%_ASM_AX), %_ASM_AX
5e0781df 130
b241fd3d 131 /* Check EFLAGS.ZF from 'testb' above */
e5b6b3e7 132 jz .Lvmlaunch
5e0781df 133
b241fd3d
JP
134 /*
135 * After a successful VMRESUME/VMLAUNCH, control flow "magically"
136 * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting.
137 * So this isn't a typical function and objtool needs to be told to
138 * save the unwind state here and restore it below.
139 */
140 UNWIND_HINT_SAVE
141
142/*
143 * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at
144 * the 'vmx_vmexit' label below.
145 */
146.Lvmresume:
147 vmresume
148 jmp .Lvmfail
149
150.Lvmlaunch:
151 vmlaunch
152 jmp .Lvmfail
153
154 _ASM_EXTABLE(.Lvmresume, .Lfixup)
155 _ASM_EXTABLE(.Lvmlaunch, .Lfixup)
156
157SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
158
159 /* Restore unwind state from before the VMRESUME/VMLAUNCH. */
160 UNWIND_HINT_RESTORE
5e0781df 161
a62fd5a7
SC
162 /* Temporarily save guest's RAX. */
163 push %_ASM_AX
5e0781df 164
a62fd5a7
SC
165 /* Reload @regs to RAX. */
166 mov WORD_SIZE(%_ASM_SP), %_ASM_AX
5e0781df 167
a62fd5a7 168 /* Save all guest registers, including RAX from the stack */
c16312f4
UB
169 pop VCPU_RAX(%_ASM_AX)
170 mov %_ASM_CX, VCPU_RCX(%_ASM_AX)
171 mov %_ASM_DX, VCPU_RDX(%_ASM_AX)
172 mov %_ASM_BX, VCPU_RBX(%_ASM_AX)
173 mov %_ASM_BP, VCPU_RBP(%_ASM_AX)
174 mov %_ASM_SI, VCPU_RSI(%_ASM_AX)
175 mov %_ASM_DI, VCPU_RDI(%_ASM_AX)
5e0781df 176#ifdef CONFIG_X86_64
a62fd5a7
SC
177 mov %r8, VCPU_R8 (%_ASM_AX)
178 mov %r9, VCPU_R9 (%_ASM_AX)
179 mov %r10, VCPU_R10(%_ASM_AX)
180 mov %r11, VCPU_R11(%_ASM_AX)
181 mov %r12, VCPU_R12(%_ASM_AX)
182 mov %r13, VCPU_R13(%_ASM_AX)
183 mov %r14, VCPU_R14(%_ASM_AX)
184 mov %r15, VCPU_R15(%_ASM_AX)
5e0781df
SC
185#endif
186
68d98cfd
JP
187 /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */
188 xor %ebx, %ebx
5e0781df 189
b241fd3d 190.Lclear_regs:
5e0781df 191 /*
68d98cfd 192 * Clear all general purpose registers except RSP and RBX to prevent
5e0781df
SC
193 * speculative use of the guest's values, even those that are reloaded
194 * via the stack. In theory, an L1 cache miss when restoring registers
195 * could lead to speculative execution with the guest's values.
196 * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
e75c3c3a 197 * free. RSP and RAX are exempt as RSP is restored by hardware during
68d98cfd
JP
198 * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return
199 * value.
5e0781df 200 */
68d98cfd 201 xor %eax, %eax
b241fd3d 202 xor %ecx, %ecx
4f44c4ee 203 xor %edx, %edx
bb03911f 204 xor %ebp, %ebp
4f44c4ee
SC
205 xor %esi, %esi
206 xor %edi, %edi
5e0781df
SC
207#ifdef CONFIG_X86_64
208 xor %r8d, %r8d
209 xor %r9d, %r9d
210 xor %r10d, %r10d
211 xor %r11d, %r11d
212 xor %r12d, %r12d
213 xor %r13d, %r13d
214 xor %r14d, %r14d
215 xor %r15d, %r15d
216#endif
5e0781df
SC
217
218 /* "POP" @regs. */
219 add $WORD_SIZE, %_ASM_SP
3b895ef4 220
68d98cfd
JP
221 /*
222 * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
223 * the first unbalanced RET after vmexit!
224 *
7bb0b4d7
JP
225 * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB
226 * entries and (in some cases) RSB underflow.
68d98cfd
JP
227 *
228 * eIBRS has its own protection against poisoned RSB, so it doesn't
229 * need the RSB filling sequence. But it does need to be enabled
230 * before the first unbalanced RET.
231 */
232
7bb0b4d7 233 FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT
68d98cfd
JP
234
235 pop %_ASM_ARG2 /* @flags */
236 pop %_ASM_ARG1 /* @vmx */
237
238 call vmx_spec_ctrl_restore_host
239
240 /* Put return value in AX */
241 mov %_ASM_BX, %_ASM_AX
242
b241fd3d 243 pop %_ASM_BX
3b895ef4
SC
244#ifdef CONFIG_X86_64
245 pop %r12
246 pop %r13
247 pop %r14
248 pop %r15
249#else
250 pop %esi
251 pop %edi
252#endif
5e0781df 253 pop %_ASM_BP
5a8cd547 254 RET
5e0781df 255
b241fd3d
JP
256.Lfixup:
257 cmpb $0, kvm_rebooting
258 jne .Lvmfail
259 ud2
260.Lvmfail:
261 /* VM-Fail: set return value to 1 */
68d98cfd 262 mov $1, %_ASM_BX
b241fd3d
JP
263 jmp .Lclear_regs
264
6dcc5627 265SYM_FUNC_END(__vmx_vcpu_run)
842f4be9 266
3ebccdf3
TG
267
268.section .text, "ax"
269
842f4be9
SC
270/**
271 * vmread_error_trampoline - Trampoline from inline asm to vmread_error()
272 * @field: VMCS field encoding that failed
273 * @fault: %true if the VMREAD faulted, %false if it failed
274
275 * Save and restore volatile registers across a call to vmread_error(). Note,
276 * all parameters are passed on the stack.
277 */
278SYM_FUNC_START(vmread_error_trampoline)
279 push %_ASM_BP
280 mov %_ASM_SP, %_ASM_BP
281
282 push %_ASM_AX
283 push %_ASM_CX
284 push %_ASM_DX
285#ifdef CONFIG_X86_64
286 push %rdi
287 push %rsi
288 push %r8
289 push %r9
290 push %r10
291 push %r11
292#endif
293#ifdef CONFIG_X86_64
294 /* Load @field and @fault to arg1 and arg2 respectively. */
295 mov 3*WORD_SIZE(%rbp), %_ASM_ARG2
296 mov 2*WORD_SIZE(%rbp), %_ASM_ARG1
297#else
298 /* Parameters are passed on the stack for 32-bit (see asmlinkage). */
299 push 3*WORD_SIZE(%ebp)
300 push 2*WORD_SIZE(%ebp)
301#endif
302
303 call vmread_error
304
305#ifndef CONFIG_X86_64
306 add $8, %esp
307#endif
308
309 /* Zero out @fault, which will be popped into the result register. */
310 _ASM_MOV $0, 3*WORD_SIZE(%_ASM_BP)
311
312#ifdef CONFIG_X86_64
313 pop %r11
314 pop %r10
315 pop %r9
316 pop %r8
317 pop %rsi
318 pop %rdi
319#endif
320 pop %_ASM_DX
321 pop %_ASM_CX
322 pop %_ASM_AX
323 pop %_ASM_BP
324
5a8cd547 325 RET
842f4be9 326SYM_FUNC_END(vmread_error_trampoline)
535f7ef2
SC
327
328SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff)
329 /*
330 * Unconditionally create a stack frame, getting the correct RSP on the
331 * stack (for x86-64) would take two instructions anyways, and RBP can
332 * be used to restore RSP to make objtool happy (see below).
333 */
334 push %_ASM_BP
335 mov %_ASM_SP, %_ASM_BP
336
337#ifdef CONFIG_X86_64
338 /*
339 * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
340 * creating the synthetic interrupt stack frame for the IRQ/NMI.
341 */
342 and $-16, %rsp
343 push $__KERNEL_DS
344 push %rbp
345#endif
346 pushf
347 push $__KERNEL_CS
348 CALL_NOSPEC _ASM_ARG1
349
350 /*
351 * "Restore" RSP from RBP, even though IRET has already unwound RSP to
352 * the correct value. objtool doesn't know the callee will IRET and,
353 * without the explicit restore, thinks the stack is getting walloped.
354 * Using an unwind hint is problematic due to x86-64's dynamic alignment.
355 */
356 mov %_ASM_BP, %_ASM_SP
357 pop %_ASM_BP
5a8cd547 358 RET
535f7ef2 359SYM_FUNC_END(vmx_do_interrupt_nmi_irqoff)