]>
Commit | Line | Data |
---|---|---|
f980f9c3 JR |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* | |
3 | * AMD Memory Encryption Support | |
4 | * | |
5 | * Copyright (C) 2019 SUSE | |
6 | * | |
7 | * Author: Joerg Roedel <jroedel@suse.de> | |
8 | */ | |
9 | ||
0786138c TL |
10 | #define pr_fmt(fmt) "SEV-ES: " fmt |
11 | ||
1aa9aa8e | 12 | #include <linux/sched/debug.h> /* For show_regs() */ |
885689e4 TL |
13 | #include <linux/percpu-defs.h> |
14 | #include <linux/mem_encrypt.h> | |
0786138c | 15 | #include <linux/lockdep.h> |
1aa9aa8e | 16 | #include <linux/printk.h> |
885689e4 TL |
17 | #include <linux/mm_types.h> |
18 | #include <linux/set_memory.h> | |
19 | #include <linux/memblock.h> | |
20 | #include <linux/kernel.h> | |
f980f9c3 JR |
21 | #include <linux/mm.h> |
22 | ||
02772fb9 | 23 | #include <asm/cpu_entry_area.h> |
8940ac9c | 24 | #include <asm/stacktrace.h> |
f980f9c3 JR |
25 | #include <asm/sev-es.h> |
26 | #include <asm/insn-eval.h> | |
27 | #include <asm/fpu/internal.h> | |
28 | #include <asm/processor.h> | |
0786138c TL |
29 | #include <asm/realmode.h> |
30 | #include <asm/traps.h> | |
f980f9c3 | 31 | #include <asm/svm.h> |
094794f5 JR |
32 | #include <asm/smp.h> |
33 | #include <asm/cpu.h> | |
f980f9c3 | 34 | |
479a7bf5 TL |
35 | #define DR7_RESET_VALUE 0x400 |
36 | ||
1aa9aa8e JR |
37 | /* For early boot hypervisor communication in SEV-ES enabled guests */ |
38 | static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); | |
39 | ||
40 | /* | |
41 | * Needs to be in the .data section because we need it NULL before bss is | |
42 | * cleared | |
43 | */ | |
44 | static struct ghcb __initdata *boot_ghcb; | |
45 | ||
885689e4 TL |
46 | /* #VC handler runtime per-CPU data */ |
47 | struct sev_es_runtime_data { | |
48 | struct ghcb ghcb_page; | |
02772fb9 JR |
49 | |
50 | /* Physical storage for the per-CPU IST stack of the #VC handler */ | |
51 | char ist_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE); | |
52 | ||
53 | /* | |
54 | * Physical storage for the per-CPU fall-back stack of the #VC handler. | |
55 | * The fall-back stack is used when it is not safe to switch back to the | |
56 | * interrupted stack in the #VC entry code. | |
57 | */ | |
58 | char fallback_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE); | |
0786138c TL |
59 | |
60 | /* | |
61 | * Reserve one page per CPU as backup storage for the unencrypted GHCB. | |
62 | * It is needed when an NMI happens while the #VC handler uses the real | |
63 | * GHCB, and the NMI handler itself is causing another #VC exception. In | |
64 | * that case the GHCB content of the first handler needs to be backed up | |
65 | * and restored. | |
66 | */ | |
67 | struct ghcb backup_ghcb; | |
68 | ||
69 | /* | |
70 | * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions. | |
71 | * There is no need for it to be atomic, because nothing is written to | |
72 | * the GHCB between the read and the write of ghcb_active. So it is safe | |
73 | * to use it when a nested #VC exception happens before the write. | |
74 | * | |
75 | * This is necessary for example in the #VC->NMI->#VC case when the NMI | |
76 | * happens while the first #VC handler uses the GHCB. When the NMI code | |
77 | * raises a second #VC handler it might overwrite the contents of the | |
78 | * GHCB written by the first handler. To avoid this the content of the | |
79 | * GHCB is saved and restored when the GHCB is detected to be in use | |
80 | * already. | |
81 | */ | |
82 | bool ghcb_active; | |
83 | bool backup_ghcb_active; | |
479a7bf5 TL |
84 | |
85 | /* | |
86 | * Cached DR7 value - write it on DR7 writes and return it on reads. | |
87 | * That value will never make it to the real hardware DR7 as debugging | |
88 | * is currently unsupported in SEV-ES guests. | |
89 | */ | |
90 | unsigned long dr7; | |
0786138c TL |
91 | }; |
92 | ||
93 | struct ghcb_state { | |
94 | struct ghcb *ghcb; | |
885689e4 TL |
95 | }; |
96 | ||
97 | static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); | |
315562c9 | 98 | DEFINE_STATIC_KEY_FALSE(sev_es_enable_key); |
885689e4 | 99 | |
0786138c TL |
100 | /* Needed in vc_early_forward_exception */ |
101 | void do_early_exception(struct pt_regs *regs, int trapnr); | |
102 | ||
02772fb9 JR |
103 | static void __init setup_vc_stacks(int cpu) |
104 | { | |
105 | struct sev_es_runtime_data *data; | |
106 | struct cpu_entry_area *cea; | |
107 | unsigned long vaddr; | |
108 | phys_addr_t pa; | |
109 | ||
110 | data = per_cpu(runtime_data, cpu); | |
111 | cea = get_cpu_entry_area(cpu); | |
112 | ||
113 | /* Map #VC IST stack */ | |
114 | vaddr = CEA_ESTACK_BOT(&cea->estacks, VC); | |
115 | pa = __pa(data->ist_stack); | |
116 | cea_set_pte((void *)vaddr, pa, PAGE_KERNEL); | |
117 | ||
118 | /* Map VC fall-back stack */ | |
119 | vaddr = CEA_ESTACK_BOT(&cea->estacks, VC2); | |
120 | pa = __pa(data->fallback_stack); | |
121 | cea_set_pte((void *)vaddr, pa, PAGE_KERNEL); | |
122 | } | |
123 | ||
315562c9 JR |
124 | static __always_inline bool on_vc_stack(unsigned long sp) |
125 | { | |
126 | return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); | |
127 | } | |
128 | ||
129 | /* | |
130 | * This function handles the case when an NMI is raised in the #VC exception | |
131 | * handler entry code. In this case, the IST entry for #VC must be adjusted, so | |
132 | * that any subsequent #VC exception will not overwrite the stack contents of the | |
133 | * interrupted #VC handler. | |
134 | * | |
135 | * The IST entry is adjusted unconditionally so that it can be also be | |
136 | * unconditionally adjusted back in sev_es_ist_exit(). Otherwise a nested | |
137 | * sev_es_ist_exit() call may adjust back the IST entry too early. | |
138 | */ | |
139 | void noinstr __sev_es_ist_enter(struct pt_regs *regs) | |
140 | { | |
141 | unsigned long old_ist, new_ist; | |
142 | ||
143 | /* Read old IST entry */ | |
144 | old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); | |
145 | ||
146 | /* Make room on the IST stack */ | |
147 | if (on_vc_stack(regs->sp)) | |
148 | new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist); | |
149 | else | |
150 | new_ist = old_ist - sizeof(old_ist); | |
151 | ||
152 | /* Store old IST entry */ | |
153 | *(unsigned long *)new_ist = old_ist; | |
154 | ||
155 | /* Set new IST entry */ | |
156 | this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist); | |
157 | } | |
158 | ||
159 | void noinstr __sev_es_ist_exit(void) | |
160 | { | |
161 | unsigned long ist; | |
162 | ||
163 | /* Read IST entry */ | |
164 | ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); | |
165 | ||
166 | if (WARN_ON(ist == __this_cpu_ist_top_va(VC))) | |
167 | return; | |
168 | ||
169 | /* Read back old IST entry and write it to the TSS */ | |
170 | this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); | |
171 | } | |
172 | ||
0786138c TL |
173 | static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state) |
174 | { | |
175 | struct sev_es_runtime_data *data; | |
176 | struct ghcb *ghcb; | |
177 | ||
178 | data = this_cpu_read(runtime_data); | |
179 | ghcb = &data->ghcb_page; | |
180 | ||
181 | if (unlikely(data->ghcb_active)) { | |
182 | /* GHCB is already in use - save its contents */ | |
183 | ||
184 | if (unlikely(data->backup_ghcb_active)) | |
185 | return NULL; | |
186 | ||
187 | /* Mark backup_ghcb active before writing to it */ | |
188 | data->backup_ghcb_active = true; | |
189 | ||
190 | state->ghcb = &data->backup_ghcb; | |
191 | ||
192 | /* Backup GHCB content */ | |
193 | *state->ghcb = *ghcb; | |
194 | } else { | |
195 | state->ghcb = NULL; | |
196 | data->ghcb_active = true; | |
197 | } | |
198 | ||
199 | return ghcb; | |
200 | } | |
201 | ||
202 | static __always_inline void sev_es_put_ghcb(struct ghcb_state *state) | |
203 | { | |
204 | struct sev_es_runtime_data *data; | |
205 | struct ghcb *ghcb; | |
206 | ||
207 | data = this_cpu_read(runtime_data); | |
208 | ghcb = &data->ghcb_page; | |
209 | ||
210 | if (state->ghcb) { | |
211 | /* Restore GHCB from Backup */ | |
212 | *ghcb = *state->ghcb; | |
213 | data->backup_ghcb_active = false; | |
214 | state->ghcb = NULL; | |
215 | } else { | |
216 | data->ghcb_active = false; | |
217 | } | |
218 | } | |
1aa9aa8e | 219 | |
8940ac9c TL |
220 | /* Needed in vc_early_forward_exception */ |
221 | void do_early_exception(struct pt_regs *regs, int trapnr); | |
222 | ||
f980f9c3 JR |
223 | static inline u64 sev_es_rd_ghcb_msr(void) |
224 | { | |
225 | return __rdmsr(MSR_AMD64_SEV_ES_GHCB); | |
226 | } | |
227 | ||
a1d5c98a | 228 | static __always_inline void sev_es_wr_ghcb_msr(u64 val) |
f980f9c3 JR |
229 | { |
230 | u32 low, high; | |
231 | ||
232 | low = (u32)(val); | |
233 | high = (u32)(val >> 32); | |
234 | ||
235 | native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); | |
236 | } | |
237 | ||
238 | static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, | |
239 | unsigned char *buffer) | |
240 | { | |
241 | return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); | |
242 | } | |
243 | ||
244 | static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) | |
245 | { | |
246 | char buffer[MAX_INSN_SIZE]; | |
247 | enum es_result ret; | |
248 | int res; | |
249 | ||
5e3427a7 JR |
250 | if (user_mode(ctxt->regs)) { |
251 | res = insn_fetch_from_user(ctxt->regs, buffer); | |
252 | if (!res) { | |
253 | ctxt->fi.vector = X86_TRAP_PF; | |
254 | ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; | |
255 | ctxt->fi.cr2 = ctxt->regs->ip; | |
256 | return ES_EXCEPTION; | |
257 | } | |
258 | ||
259 | if (!insn_decode(&ctxt->insn, ctxt->regs, buffer, res)) | |
260 | return ES_DECODE_FAILED; | |
261 | } else { | |
262 | res = vc_fetch_insn_kernel(ctxt, buffer); | |
263 | if (res) { | |
264 | ctxt->fi.vector = X86_TRAP_PF; | |
265 | ctxt->fi.error_code = X86_PF_INSTR; | |
266 | ctxt->fi.cr2 = ctxt->regs->ip; | |
267 | return ES_EXCEPTION; | |
268 | } | |
269 | ||
270 | insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE - res, 1); | |
271 | insn_get_length(&ctxt->insn); | |
f980f9c3 JR |
272 | } |
273 | ||
f980f9c3 JR |
274 | ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED; |
275 | ||
276 | return ret; | |
277 | } | |
278 | ||
279 | static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, | |
280 | char *dst, char *buf, size_t size) | |
281 | { | |
282 | unsigned long error_code = X86_PF_PROT | X86_PF_WRITE; | |
283 | char __user *target = (char __user *)dst; | |
284 | u64 d8; | |
285 | u32 d4; | |
286 | u16 d2; | |
287 | u8 d1; | |
288 | ||
7024f60d HWB |
289 | /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ |
290 | if (!user_mode(ctxt->regs) && !access_ok(target, size)) { | |
291 | memcpy(dst, buf, size); | |
292 | return ES_OK; | |
293 | } | |
294 | ||
f980f9c3 JR |
295 | switch (size) { |
296 | case 1: | |
297 | memcpy(&d1, buf, 1); | |
298 | if (put_user(d1, target)) | |
299 | goto fault; | |
300 | break; | |
301 | case 2: | |
302 | memcpy(&d2, buf, 2); | |
303 | if (put_user(d2, target)) | |
304 | goto fault; | |
305 | break; | |
306 | case 4: | |
307 | memcpy(&d4, buf, 4); | |
308 | if (put_user(d4, target)) | |
309 | goto fault; | |
310 | break; | |
311 | case 8: | |
312 | memcpy(&d8, buf, 8); | |
313 | if (put_user(d8, target)) | |
314 | goto fault; | |
315 | break; | |
316 | default: | |
317 | WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); | |
318 | return ES_UNSUPPORTED; | |
319 | } | |
320 | ||
321 | return ES_OK; | |
322 | ||
323 | fault: | |
324 | if (user_mode(ctxt->regs)) | |
325 | error_code |= X86_PF_USER; | |
326 | ||
327 | ctxt->fi.vector = X86_TRAP_PF; | |
328 | ctxt->fi.error_code = error_code; | |
329 | ctxt->fi.cr2 = (unsigned long)dst; | |
330 | ||
331 | return ES_EXCEPTION; | |
332 | } | |
333 | ||
334 | static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, | |
335 | char *src, char *buf, size_t size) | |
336 | { | |
337 | unsigned long error_code = X86_PF_PROT; | |
338 | char __user *s = (char __user *)src; | |
339 | u64 d8; | |
340 | u32 d4; | |
341 | u16 d2; | |
342 | u8 d1; | |
343 | ||
7024f60d HWB |
344 | /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ |
345 | if (!user_mode(ctxt->regs) && !access_ok(s, size)) { | |
346 | memcpy(buf, src, size); | |
347 | return ES_OK; | |
348 | } | |
349 | ||
f980f9c3 JR |
350 | switch (size) { |
351 | case 1: | |
352 | if (get_user(d1, s)) | |
353 | goto fault; | |
354 | memcpy(buf, &d1, 1); | |
355 | break; | |
356 | case 2: | |
357 | if (get_user(d2, s)) | |
358 | goto fault; | |
359 | memcpy(buf, &d2, 2); | |
360 | break; | |
361 | case 4: | |
362 | if (get_user(d4, s)) | |
363 | goto fault; | |
364 | memcpy(buf, &d4, 4); | |
365 | break; | |
366 | case 8: | |
367 | if (get_user(d8, s)) | |
368 | goto fault; | |
369 | memcpy(buf, &d8, 8); | |
370 | break; | |
371 | default: | |
372 | WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); | |
373 | return ES_UNSUPPORTED; | |
374 | } | |
375 | ||
376 | return ES_OK; | |
377 | ||
378 | fault: | |
379 | if (user_mode(ctxt->regs)) | |
380 | error_code |= X86_PF_USER; | |
381 | ||
382 | ctxt->fi.vector = X86_TRAP_PF; | |
383 | ctxt->fi.error_code = error_code; | |
384 | ctxt->fi.cr2 = (unsigned long)src; | |
385 | ||
386 | return ES_EXCEPTION; | |
387 | } | |
388 | ||
2411cd82 JR |
389 | static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, |
390 | unsigned long vaddr, phys_addr_t *paddr) | |
51ee7d6e TL |
391 | { |
392 | unsigned long va = (unsigned long)vaddr; | |
393 | unsigned int level; | |
394 | phys_addr_t pa; | |
395 | pgd_t *pgd; | |
396 | pte_t *pte; | |
397 | ||
398 | pgd = __va(read_cr3_pa()); | |
399 | pgd = &pgd[pgd_index(va)]; | |
400 | pte = lookup_address_in_pgd(pgd, va, &level); | |
401 | if (!pte) { | |
402 | ctxt->fi.vector = X86_TRAP_PF; | |
403 | ctxt->fi.cr2 = vaddr; | |
404 | ctxt->fi.error_code = 0; | |
405 | ||
406 | if (user_mode(ctxt->regs)) | |
407 | ctxt->fi.error_code |= X86_PF_USER; | |
408 | ||
2411cd82 | 409 | return ES_EXCEPTION; |
51ee7d6e TL |
410 | } |
411 | ||
2411cd82 JR |
412 | if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) |
413 | /* Emulated MMIO to/from encrypted memory not supported */ | |
414 | return ES_UNSUPPORTED; | |
415 | ||
51ee7d6e TL |
416 | pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; |
417 | pa |= va & ~page_level_mask(level); | |
418 | ||
419 | *paddr = pa; | |
420 | ||
2411cd82 | 421 | return ES_OK; |
51ee7d6e TL |
422 | } |
423 | ||
f980f9c3 JR |
424 | /* Include code shared with pre-decompression boot stage */ |
425 | #include "sev-es-shared.c" | |
1aa9aa8e | 426 | |
4ca68e02 JR |
427 | void noinstr __sev_es_nmi_complete(void) |
428 | { | |
429 | struct ghcb_state state; | |
430 | struct ghcb *ghcb; | |
431 | ||
432 | ghcb = sev_es_get_ghcb(&state); | |
433 | ||
434 | vc_ghcb_invalidate(ghcb); | |
435 | ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); | |
436 | ghcb_set_sw_exit_info_1(ghcb, 0); | |
437 | ghcb_set_sw_exit_info_2(ghcb, 0); | |
438 | ||
439 | sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); | |
440 | VMGEXIT(); | |
441 | ||
442 | sev_es_put_ghcb(&state); | |
443 | } | |
444 | ||
8940ac9c TL |
445 | static u64 get_jump_table_addr(void) |
446 | { | |
447 | struct ghcb_state state; | |
448 | unsigned long flags; | |
449 | struct ghcb *ghcb; | |
450 | u64 ret = 0; | |
451 | ||
452 | local_irq_save(flags); | |
453 | ||
454 | ghcb = sev_es_get_ghcb(&state); | |
455 | ||
456 | vc_ghcb_invalidate(ghcb); | |
457 | ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); | |
458 | ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE); | |
459 | ghcb_set_sw_exit_info_2(ghcb, 0); | |
460 | ||
461 | sev_es_wr_ghcb_msr(__pa(ghcb)); | |
462 | VMGEXIT(); | |
463 | ||
464 | if (ghcb_sw_exit_info_1_is_valid(ghcb) && | |
465 | ghcb_sw_exit_info_2_is_valid(ghcb)) | |
466 | ret = ghcb->save.sw_exit_info_2; | |
467 | ||
468 | sev_es_put_ghcb(&state); | |
469 | ||
470 | local_irq_restore(flags); | |
471 | ||
472 | return ret; | |
473 | } | |
474 | ||
475 | int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) | |
476 | { | |
477 | u16 startup_cs, startup_ip; | |
478 | phys_addr_t jump_table_pa; | |
479 | u64 jump_table_addr; | |
480 | u16 __iomem *jump_table; | |
481 | ||
482 | jump_table_addr = get_jump_table_addr(); | |
483 | ||
484 | /* On UP guests there is no jump table so this is not a failure */ | |
485 | if (!jump_table_addr) | |
486 | return 0; | |
487 | ||
488 | /* Check if AP Jump Table is page-aligned */ | |
489 | if (jump_table_addr & ~PAGE_MASK) | |
490 | return -EINVAL; | |
491 | ||
492 | jump_table_pa = jump_table_addr & PAGE_MASK; | |
493 | ||
494 | startup_cs = (u16)(rmh->trampoline_start >> 4); | |
495 | startup_ip = (u16)(rmh->sev_es_trampoline_start - | |
496 | rmh->trampoline_start); | |
497 | ||
498 | jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE); | |
499 | if (!jump_table) | |
500 | return -EIO; | |
501 | ||
502 | writew(startup_ip, &jump_table[0]); | |
503 | writew(startup_cs, &jump_table[1]); | |
504 | ||
505 | iounmap(jump_table); | |
506 | ||
507 | return 0; | |
508 | } | |
509 | ||
39336f4f TL |
510 | /* |
511 | * This is needed by the OVMF UEFI firmware which will use whatever it finds in | |
512 | * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu | |
513 | * runtime GHCBs used by the kernel are also mapped in the EFI page-table. | |
514 | */ | |
515 | int __init sev_es_efi_map_ghcbs(pgd_t *pgd) | |
516 | { | |
517 | struct sev_es_runtime_data *data; | |
518 | unsigned long address, pflags; | |
519 | int cpu; | |
520 | u64 pfn; | |
521 | ||
522 | if (!sev_es_active()) | |
523 | return 0; | |
524 | ||
525 | pflags = _PAGE_NX | _PAGE_RW; | |
526 | ||
527 | for_each_possible_cpu(cpu) { | |
528 | data = per_cpu(runtime_data, cpu); | |
529 | ||
530 | address = __pa(&data->ghcb_page); | |
531 | pfn = address >> PAGE_SHIFT; | |
532 | ||
533 | if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags)) | |
534 | return 1; | |
535 | } | |
536 | ||
537 | return 0; | |
538 | } | |
539 | ||
a4afa608 TL |
540 | static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) |
541 | { | |
542 | struct pt_regs *regs = ctxt->regs; | |
543 | enum es_result ret; | |
544 | u64 exit_info_1; | |
545 | ||
546 | /* Is it a WRMSR? */ | |
547 | exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0; | |
548 | ||
549 | ghcb_set_rcx(ghcb, regs->cx); | |
550 | if (exit_info_1) { | |
551 | ghcb_set_rax(ghcb, regs->ax); | |
552 | ghcb_set_rdx(ghcb, regs->dx); | |
553 | } | |
554 | ||
555 | ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0); | |
556 | ||
557 | if ((ret == ES_OK) && (!exit_info_1)) { | |
558 | regs->ax = ghcb->save.rax; | |
559 | regs->dx = ghcb->save.rdx; | |
560 | } | |
561 | ||
562 | return ret; | |
563 | } | |
564 | ||
1aa9aa8e JR |
565 | /* |
566 | * This function runs on the first #VC exception after the kernel | |
567 | * switched to virtual addresses. | |
568 | */ | |
569 | static bool __init sev_es_setup_ghcb(void) | |
570 | { | |
571 | /* First make sure the hypervisor talks a supported protocol. */ | |
572 | if (!sev_es_negotiate_protocol()) | |
573 | return false; | |
574 | ||
575 | /* | |
576 | * Clear the boot_ghcb. The first exception comes in before the bss | |
577 | * section is cleared. | |
578 | */ | |
579 | memset(&boot_ghcb_page, 0, PAGE_SIZE); | |
580 | ||
581 | /* Alright - Make the boot-ghcb public */ | |
582 | boot_ghcb = &boot_ghcb_page; | |
583 | ||
584 | return true; | |
585 | } | |
586 | ||
094794f5 JR |
587 | #ifdef CONFIG_HOTPLUG_CPU |
588 | static void sev_es_ap_hlt_loop(void) | |
589 | { | |
590 | struct ghcb_state state; | |
591 | struct ghcb *ghcb; | |
592 | ||
593 | ghcb = sev_es_get_ghcb(&state); | |
594 | ||
595 | while (true) { | |
596 | vc_ghcb_invalidate(ghcb); | |
597 | ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP); | |
598 | ghcb_set_sw_exit_info_1(ghcb, 0); | |
599 | ghcb_set_sw_exit_info_2(ghcb, 0); | |
600 | ||
601 | sev_es_wr_ghcb_msr(__pa(ghcb)); | |
602 | VMGEXIT(); | |
603 | ||
604 | /* Wakeup signal? */ | |
605 | if (ghcb_sw_exit_info_2_is_valid(ghcb) && | |
606 | ghcb->save.sw_exit_info_2) | |
607 | break; | |
608 | } | |
609 | ||
610 | sev_es_put_ghcb(&state); | |
611 | } | |
612 | ||
613 | /* | |
614 | * Play_dead handler when running under SEV-ES. This is needed because | |
615 | * the hypervisor can't deliver an SIPI request to restart the AP. | |
616 | * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the | |
617 | * hypervisor wakes it up again. | |
618 | */ | |
619 | static void sev_es_play_dead(void) | |
620 | { | |
621 | play_dead_common(); | |
622 | ||
623 | /* IRQs now disabled */ | |
624 | ||
625 | sev_es_ap_hlt_loop(); | |
626 | ||
627 | /* | |
628 | * If we get here, the VCPU was woken up again. Jump to CPU | |
629 | * startup code to get it back online. | |
630 | */ | |
631 | start_cpu0(); | |
632 | } | |
633 | #else /* CONFIG_HOTPLUG_CPU */ | |
634 | #define sev_es_play_dead native_play_dead | |
635 | #endif /* CONFIG_HOTPLUG_CPU */ | |
636 | ||
637 | #ifdef CONFIG_SMP | |
638 | static void __init sev_es_setup_play_dead(void) | |
639 | { | |
640 | smp_ops.play_dead = sev_es_play_dead; | |
641 | } | |
642 | #else | |
643 | static inline void sev_es_setup_play_dead(void) { } | |
644 | #endif | |
645 | ||
885689e4 TL |
646 | static void __init alloc_runtime_data(int cpu) |
647 | { | |
648 | struct sev_es_runtime_data *data; | |
649 | ||
650 | data = memblock_alloc(sizeof(*data), PAGE_SIZE); | |
651 | if (!data) | |
652 | panic("Can't allocate SEV-ES runtime data"); | |
653 | ||
654 | per_cpu(runtime_data, cpu) = data; | |
655 | } | |
656 | ||
657 | static void __init init_ghcb(int cpu) | |
658 | { | |
659 | struct sev_es_runtime_data *data; | |
660 | int err; | |
661 | ||
662 | data = per_cpu(runtime_data, cpu); | |
663 | ||
664 | err = early_set_memory_decrypted((unsigned long)&data->ghcb_page, | |
665 | sizeof(data->ghcb_page)); | |
666 | if (err) | |
667 | panic("Can't map GHCBs unencrypted"); | |
668 | ||
669 | memset(&data->ghcb_page, 0, sizeof(data->ghcb_page)); | |
0786138c TL |
670 | |
671 | data->ghcb_active = false; | |
672 | data->backup_ghcb_active = false; | |
885689e4 TL |
673 | } |
674 | ||
675 | void __init sev_es_init_vc_handling(void) | |
676 | { | |
677 | int cpu; | |
678 | ||
679 | BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE); | |
680 | ||
681 | if (!sev_es_active()) | |
682 | return; | |
683 | ||
f5ed7775 MR |
684 | if (!sev_es_check_cpu_features()) |
685 | panic("SEV-ES CPU Features missing"); | |
686 | ||
315562c9 JR |
687 | /* Enable SEV-ES special handling */ |
688 | static_branch_enable(&sev_es_enable_key); | |
689 | ||
885689e4 TL |
690 | /* Initialize per-cpu GHCB pages */ |
691 | for_each_possible_cpu(cpu) { | |
692 | alloc_runtime_data(cpu); | |
693 | init_ghcb(cpu); | |
02772fb9 | 694 | setup_vc_stacks(cpu); |
885689e4 | 695 | } |
0786138c | 696 | |
094794f5 JR |
697 | sev_es_setup_play_dead(); |
698 | ||
0786138c TL |
699 | /* Secondary CPUs use the runtime #VC handler */ |
700 | initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication; | |
885689e4 TL |
701 | } |
702 | ||
1aa9aa8e JR |
703 | static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) |
704 | { | |
705 | int trapnr = ctxt->fi.vector; | |
706 | ||
707 | if (trapnr == X86_TRAP_PF) | |
708 | native_write_cr2(ctxt->fi.cr2); | |
709 | ||
710 | ctxt->regs->orig_ax = ctxt->fi.error_code; | |
711 | do_early_exception(ctxt->regs, trapnr); | |
712 | } | |
713 | ||
51ee7d6e TL |
714 | static long *vc_insn_get_reg(struct es_em_ctxt *ctxt) |
715 | { | |
716 | long *reg_array; | |
717 | int offset; | |
718 | ||
719 | reg_array = (long *)ctxt->regs; | |
720 | offset = insn_get_modrm_reg_off(&ctxt->insn, ctxt->regs); | |
721 | ||
722 | if (offset < 0) | |
723 | return NULL; | |
724 | ||
725 | offset /= sizeof(long); | |
726 | ||
727 | return reg_array + offset; | |
728 | } | |
729 | ||
479a7bf5 TL |
730 | static long *vc_insn_get_rm(struct es_em_ctxt *ctxt) |
731 | { | |
732 | long *reg_array; | |
733 | int offset; | |
734 | ||
735 | reg_array = (long *)ctxt->regs; | |
736 | offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs); | |
737 | ||
738 | if (offset < 0) | |
739 | return NULL; | |
740 | ||
741 | offset /= sizeof(long); | |
742 | ||
743 | return reg_array + offset; | |
744 | } | |
51ee7d6e TL |
745 | static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, |
746 | unsigned int bytes, bool read) | |
747 | { | |
748 | u64 exit_code, exit_info_1, exit_info_2; | |
749 | unsigned long ghcb_pa = __pa(ghcb); | |
2411cd82 | 750 | enum es_result res; |
51ee7d6e TL |
751 | phys_addr_t paddr; |
752 | void __user *ref; | |
753 | ||
754 | ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs); | |
755 | if (ref == (void __user *)-1L) | |
756 | return ES_UNSUPPORTED; | |
757 | ||
758 | exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; | |
759 | ||
2411cd82 JR |
760 | res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); |
761 | if (res != ES_OK) { | |
762 | if (res == ES_EXCEPTION && !read) | |
51ee7d6e TL |
763 | ctxt->fi.error_code |= X86_PF_WRITE; |
764 | ||
2411cd82 | 765 | return res; |
51ee7d6e TL |
766 | } |
767 | ||
768 | exit_info_1 = paddr; | |
769 | /* Can never be greater than 8 */ | |
770 | exit_info_2 = bytes; | |
771 | ||
0ddfb1cf | 772 | ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer)); |
51ee7d6e TL |
773 | |
774 | return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2); | |
775 | } | |
776 | ||
777 | static enum es_result vc_handle_mmio_twobyte_ops(struct ghcb *ghcb, | |
778 | struct es_em_ctxt *ctxt) | |
779 | { | |
780 | struct insn *insn = &ctxt->insn; | |
781 | unsigned int bytes = 0; | |
782 | enum es_result ret; | |
783 | int sign_byte; | |
784 | long *reg_data; | |
785 | ||
786 | switch (insn->opcode.bytes[1]) { | |
787 | /* MMIO Read w/ zero-extension */ | |
788 | case 0xb6: | |
789 | bytes = 1; | |
790 | fallthrough; | |
791 | case 0xb7: | |
792 | if (!bytes) | |
793 | bytes = 2; | |
794 | ||
795 | ret = vc_do_mmio(ghcb, ctxt, bytes, true); | |
796 | if (ret) | |
797 | break; | |
798 | ||
799 | /* Zero extend based on operand size */ | |
800 | reg_data = vc_insn_get_reg(ctxt); | |
801 | if (!reg_data) | |
802 | return ES_DECODE_FAILED; | |
803 | ||
804 | memset(reg_data, 0, insn->opnd_bytes); | |
805 | ||
806 | memcpy(reg_data, ghcb->shared_buffer, bytes); | |
807 | break; | |
808 | ||
809 | /* MMIO Read w/ sign-extension */ | |
810 | case 0xbe: | |
811 | bytes = 1; | |
812 | fallthrough; | |
813 | case 0xbf: | |
814 | if (!bytes) | |
815 | bytes = 2; | |
816 | ||
817 | ret = vc_do_mmio(ghcb, ctxt, bytes, true); | |
818 | if (ret) | |
819 | break; | |
820 | ||
821 | /* Sign extend based on operand size */ | |
822 | reg_data = vc_insn_get_reg(ctxt); | |
823 | if (!reg_data) | |
824 | return ES_DECODE_FAILED; | |
825 | ||
826 | if (bytes == 1) { | |
827 | u8 *val = (u8 *)ghcb->shared_buffer; | |
828 | ||
829 | sign_byte = (*val & 0x80) ? 0xff : 0x00; | |
830 | } else { | |
831 | u16 *val = (u16 *)ghcb->shared_buffer; | |
832 | ||
833 | sign_byte = (*val & 0x8000) ? 0xff : 0x00; | |
834 | } | |
835 | memset(reg_data, sign_byte, insn->opnd_bytes); | |
836 | ||
837 | memcpy(reg_data, ghcb->shared_buffer, bytes); | |
838 | break; | |
839 | ||
840 | default: | |
841 | ret = ES_UNSUPPORTED; | |
842 | } | |
843 | ||
844 | return ret; | |
845 | } | |
846 | ||
0118b604 JR |
847 | /* |
848 | * The MOVS instruction has two memory operands, which raises the | |
849 | * problem that it is not known whether the access to the source or the | |
850 | * destination caused the #VC exception (and hence whether an MMIO read | |
851 | * or write operation needs to be emulated). | |
852 | * | |
853 | * Instead of playing games with walking page-tables and trying to guess | |
854 | * whether the source or destination is an MMIO range, split the move | |
855 | * into two operations, a read and a write with only one memory operand. | |
856 | * This will cause a nested #VC exception on the MMIO address which can | |
857 | * then be handled. | |
858 | * | |
859 | * This implementation has the benefit that it also supports MOVS where | |
860 | * source _and_ destination are MMIO regions. | |
861 | * | |
862 | * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a | |
863 | * rare operation. If it turns out to be a performance problem the split | |
864 | * operations can be moved to memcpy_fromio() and memcpy_toio(). | |
865 | */ | |
866 | static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt, | |
867 | unsigned int bytes) | |
868 | { | |
869 | unsigned long ds_base, es_base; | |
870 | unsigned char *src, *dst; | |
871 | unsigned char buffer[8]; | |
872 | enum es_result ret; | |
873 | bool rep; | |
874 | int off; | |
875 | ||
876 | ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS); | |
877 | es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); | |
878 | ||
879 | if (ds_base == -1L || es_base == -1L) { | |
880 | ctxt->fi.vector = X86_TRAP_GP; | |
881 | ctxt->fi.error_code = 0; | |
882 | return ES_EXCEPTION; | |
883 | } | |
884 | ||
885 | src = ds_base + (unsigned char *)ctxt->regs->si; | |
886 | dst = es_base + (unsigned char *)ctxt->regs->di; | |
887 | ||
888 | ret = vc_read_mem(ctxt, src, buffer, bytes); | |
889 | if (ret != ES_OK) | |
890 | return ret; | |
891 | ||
892 | ret = vc_write_mem(ctxt, dst, buffer, bytes); | |
893 | if (ret != ES_OK) | |
894 | return ret; | |
895 | ||
896 | if (ctxt->regs->flags & X86_EFLAGS_DF) | |
897 | off = -bytes; | |
898 | else | |
899 | off = bytes; | |
900 | ||
901 | ctxt->regs->si += off; | |
902 | ctxt->regs->di += off; | |
903 | ||
904 | rep = insn_has_rep_prefix(&ctxt->insn); | |
905 | if (rep) | |
906 | ctxt->regs->cx -= 1; | |
907 | ||
908 | if (!rep || ctxt->regs->cx == 0) | |
909 | return ES_OK; | |
910 | else | |
911 | return ES_RETRY; | |
912 | } | |
913 | ||
51ee7d6e TL |
914 | static enum es_result vc_handle_mmio(struct ghcb *ghcb, |
915 | struct es_em_ctxt *ctxt) | |
916 | { | |
917 | struct insn *insn = &ctxt->insn; | |
918 | unsigned int bytes = 0; | |
919 | enum es_result ret; | |
920 | long *reg_data; | |
921 | ||
922 | switch (insn->opcode.bytes[0]) { | |
923 | /* MMIO Write */ | |
924 | case 0x88: | |
925 | bytes = 1; | |
926 | fallthrough; | |
927 | case 0x89: | |
928 | if (!bytes) | |
929 | bytes = insn->opnd_bytes; | |
930 | ||
931 | reg_data = vc_insn_get_reg(ctxt); | |
932 | if (!reg_data) | |
933 | return ES_DECODE_FAILED; | |
934 | ||
935 | memcpy(ghcb->shared_buffer, reg_data, bytes); | |
936 | ||
937 | ret = vc_do_mmio(ghcb, ctxt, bytes, false); | |
938 | break; | |
939 | ||
940 | case 0xc6: | |
941 | bytes = 1; | |
942 | fallthrough; | |
943 | case 0xc7: | |
944 | if (!bytes) | |
945 | bytes = insn->opnd_bytes; | |
946 | ||
947 | memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes); | |
948 | ||
949 | ret = vc_do_mmio(ghcb, ctxt, bytes, false); | |
950 | break; | |
951 | ||
952 | /* MMIO Read */ | |
953 | case 0x8a: | |
954 | bytes = 1; | |
955 | fallthrough; | |
956 | case 0x8b: | |
957 | if (!bytes) | |
958 | bytes = insn->opnd_bytes; | |
959 | ||
960 | ret = vc_do_mmio(ghcb, ctxt, bytes, true); | |
961 | if (ret) | |
962 | break; | |
963 | ||
964 | reg_data = vc_insn_get_reg(ctxt); | |
965 | if (!reg_data) | |
966 | return ES_DECODE_FAILED; | |
967 | ||
968 | /* Zero-extend for 32-bit operation */ | |
969 | if (bytes == 4) | |
970 | *reg_data = 0; | |
971 | ||
972 | memcpy(reg_data, ghcb->shared_buffer, bytes); | |
973 | break; | |
974 | ||
0118b604 JR |
975 | /* MOVS instruction */ |
976 | case 0xa4: | |
977 | bytes = 1; | |
978 | fallthrough; | |
979 | case 0xa5: | |
980 | if (!bytes) | |
981 | bytes = insn->opnd_bytes; | |
982 | ||
983 | ret = vc_handle_mmio_movs(ctxt, bytes); | |
984 | break; | |
51ee7d6e TL |
985 | /* Two-Byte Opcodes */ |
986 | case 0x0f: | |
987 | ret = vc_handle_mmio_twobyte_ops(ghcb, ctxt); | |
988 | break; | |
989 | default: | |
990 | ret = ES_UNSUPPORTED; | |
991 | } | |
992 | ||
993 | return ret; | |
994 | } | |
995 | ||
479a7bf5 TL |
996 | static enum es_result vc_handle_dr7_write(struct ghcb *ghcb, |
997 | struct es_em_ctxt *ctxt) | |
998 | { | |
999 | struct sev_es_runtime_data *data = this_cpu_read(runtime_data); | |
1000 | long val, *reg = vc_insn_get_rm(ctxt); | |
1001 | enum es_result ret; | |
1002 | ||
1003 | if (!reg) | |
1004 | return ES_DECODE_FAILED; | |
1005 | ||
1006 | val = *reg; | |
1007 | ||
1008 | /* Upper 32 bits must be written as zeroes */ | |
1009 | if (val >> 32) { | |
1010 | ctxt->fi.vector = X86_TRAP_GP; | |
1011 | ctxt->fi.error_code = 0; | |
1012 | return ES_EXCEPTION; | |
1013 | } | |
1014 | ||
1015 | /* Clear out other reserved bits and set bit 10 */ | |
1016 | val = (val & 0xffff23ffL) | BIT(10); | |
1017 | ||
1018 | /* Early non-zero writes to DR7 are not supported */ | |
1019 | if (!data && (val & ~DR7_RESET_VALUE)) | |
1020 | return ES_UNSUPPORTED; | |
1021 | ||
1022 | /* Using a value of 0 for ExitInfo1 means RAX holds the value */ | |
1023 | ghcb_set_rax(ghcb, val); | |
1024 | ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0); | |
1025 | if (ret != ES_OK) | |
1026 | return ret; | |
1027 | ||
1028 | if (data) | |
1029 | data->dr7 = val; | |
1030 | ||
1031 | return ES_OK; | |
1032 | } | |
1033 | ||
1034 | static enum es_result vc_handle_dr7_read(struct ghcb *ghcb, | |
1035 | struct es_em_ctxt *ctxt) | |
1036 | { | |
1037 | struct sev_es_runtime_data *data = this_cpu_read(runtime_data); | |
1038 | long *reg = vc_insn_get_rm(ctxt); | |
1039 | ||
1040 | if (!reg) | |
1041 | return ES_DECODE_FAILED; | |
1042 | ||
1043 | if (data) | |
1044 | *reg = data->dr7; | |
1045 | else | |
1046 | *reg = DR7_RESET_VALUE; | |
1047 | ||
1048 | return ES_OK; | |
1049 | } | |
1050 | ||
a14a92fc TL |
1051 | static enum es_result vc_handle_wbinvd(struct ghcb *ghcb, |
1052 | struct es_em_ctxt *ctxt) | |
1053 | { | |
1054 | return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0); | |
1055 | } | |
1056 | ||
5d55cf78 TL |
1057 | static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt) |
1058 | { | |
1059 | enum es_result ret; | |
1060 | ||
1061 | ghcb_set_rcx(ghcb, ctxt->regs->cx); | |
1062 | ||
1063 | ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0); | |
1064 | if (ret != ES_OK) | |
1065 | return ret; | |
1066 | ||
1067 | if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb))) | |
1068 | return ES_VMM_ERROR; | |
1069 | ||
1070 | ctxt->regs->ax = ghcb->save.rax; | |
1071 | ctxt->regs->dx = ghcb->save.rdx; | |
1072 | ||
1073 | return ES_OK; | |
1074 | } | |
1075 | ||
0c2fd2ef TL |
1076 | static enum es_result vc_handle_monitor(struct ghcb *ghcb, |
1077 | struct es_em_ctxt *ctxt) | |
1078 | { | |
1079 | /* | |
1080 | * Treat it as a NOP and do not leak a physical address to the | |
1081 | * hypervisor. | |
1082 | */ | |
1083 | return ES_OK; | |
1084 | } | |
1085 | ||
ded476bb TL |
1086 | static enum es_result vc_handle_mwait(struct ghcb *ghcb, |
1087 | struct es_em_ctxt *ctxt) | |
1088 | { | |
1089 | /* Treat the same as MONITOR/MONITORX */ | |
1090 | return ES_OK; | |
1091 | } | |
1092 | ||
2eb7dcf0 TL |
1093 | static enum es_result vc_handle_vmmcall(struct ghcb *ghcb, |
1094 | struct es_em_ctxt *ctxt) | |
1095 | { | |
1096 | enum es_result ret; | |
1097 | ||
1098 | ghcb_set_rax(ghcb, ctxt->regs->ax); | |
1099 | ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0); | |
1100 | ||
f6a9f8a4 JR |
1101 | if (x86_platform.hyper.sev_es_hcall_prepare) |
1102 | x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs); | |
1103 | ||
2eb7dcf0 TL |
1104 | ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0); |
1105 | if (ret != ES_OK) | |
1106 | return ret; | |
1107 | ||
1108 | if (!ghcb_rax_is_valid(ghcb)) | |
1109 | return ES_VMM_ERROR; | |
1110 | ||
1111 | ctxt->regs->ax = ghcb->save.rax; | |
1112 | ||
f6a9f8a4 JR |
1113 | /* |
1114 | * Call sev_es_hcall_finish() after regs->ax is already set. | |
1115 | * This allows the hypervisor handler to overwrite it again if | |
1116 | * necessary. | |
1117 | */ | |
1118 | if (x86_platform.hyper.sev_es_hcall_finish && | |
1119 | !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs)) | |
1120 | return ES_VMM_ERROR; | |
1121 | ||
2eb7dcf0 TL |
1122 | return ES_OK; |
1123 | } | |
1124 | ||
a2d0171a JR |
1125 | static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, |
1126 | struct es_em_ctxt *ctxt) | |
1127 | { | |
1128 | /* | |
1129 | * Calling ecx_alignment_check() directly does not work, because it | |
1130 | * enables IRQs and the GHCB is active. Forward the exception and call | |
1131 | * it later from vc_forward_exception(). | |
1132 | */ | |
1133 | ctxt->fi.vector = X86_TRAP_AC; | |
1134 | ctxt->fi.error_code = 0; | |
1135 | return ES_EXCEPTION; | |
1136 | } | |
1137 | ||
cb1ad3ec JR |
1138 | static __always_inline void vc_handle_trap_db(struct pt_regs *regs) |
1139 | { | |
1140 | if (user_mode(regs)) | |
1141 | noist_exc_debug(regs); | |
1142 | else | |
1143 | exc_debug(regs); | |
1144 | } | |
1145 | ||
1aa9aa8e JR |
1146 | static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, |
1147 | struct ghcb *ghcb, | |
1148 | unsigned long exit_code) | |
1149 | { | |
1150 | enum es_result result; | |
1151 | ||
1152 | switch (exit_code) { | |
479a7bf5 TL |
1153 | case SVM_EXIT_READ_DR7: |
1154 | result = vc_handle_dr7_read(ghcb, ctxt); | |
1155 | break; | |
1156 | case SVM_EXIT_WRITE_DR7: | |
1157 | result = vc_handle_dr7_write(ghcb, ctxt); | |
1158 | break; | |
a2d0171a JR |
1159 | case SVM_EXIT_EXCP_BASE + X86_TRAP_AC: |
1160 | result = vc_handle_trap_ac(ghcb, ctxt); | |
1161 | break; | |
4711e7ac TL |
1162 | case SVM_EXIT_RDTSC: |
1163 | case SVM_EXIT_RDTSCP: | |
1164 | result = vc_handle_rdtsc(ghcb, ctxt, exit_code); | |
1165 | break; | |
5d55cf78 TL |
1166 | case SVM_EXIT_RDPMC: |
1167 | result = vc_handle_rdpmc(ghcb, ctxt); | |
1168 | break; | |
8b4ce837 TL |
1169 | case SVM_EXIT_INVD: |
1170 | pr_err_ratelimited("#VC exception for INVD??? Seriously???\n"); | |
1171 | result = ES_UNSUPPORTED; | |
1172 | break; | |
d3529bb7 JR |
1173 | case SVM_EXIT_CPUID: |
1174 | result = vc_handle_cpuid(ghcb, ctxt); | |
1175 | break; | |
1176 | case SVM_EXIT_IOIO: | |
1177 | result = vc_handle_ioio(ghcb, ctxt); | |
1178 | break; | |
a4afa608 TL |
1179 | case SVM_EXIT_MSR: |
1180 | result = vc_handle_msr(ghcb, ctxt); | |
1181 | break; | |
2eb7dcf0 TL |
1182 | case SVM_EXIT_VMMCALL: |
1183 | result = vc_handle_vmmcall(ghcb, ctxt); | |
1184 | break; | |
a14a92fc TL |
1185 | case SVM_EXIT_WBINVD: |
1186 | result = vc_handle_wbinvd(ghcb, ctxt); | |
1187 | break; | |
0c2fd2ef TL |
1188 | case SVM_EXIT_MONITOR: |
1189 | result = vc_handle_monitor(ghcb, ctxt); | |
1190 | break; | |
ded476bb TL |
1191 | case SVM_EXIT_MWAIT: |
1192 | result = vc_handle_mwait(ghcb, ctxt); | |
1193 | break; | |
51ee7d6e TL |
1194 | case SVM_EXIT_NPF: |
1195 | result = vc_handle_mmio(ghcb, ctxt); | |
1196 | break; | |
1aa9aa8e JR |
1197 | default: |
1198 | /* | |
1199 | * Unexpected #VC exception | |
1200 | */ | |
1201 | result = ES_UNSUPPORTED; | |
1202 | } | |
1203 | ||
1204 | return result; | |
1205 | } | |
1206 | ||
0786138c TL |
1207 | static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) |
1208 | { | |
1209 | long error_code = ctxt->fi.error_code; | |
1210 | int trapnr = ctxt->fi.vector; | |
1211 | ||
1212 | ctxt->regs->orig_ax = ctxt->fi.error_code; | |
1213 | ||
1214 | switch (trapnr) { | |
1215 | case X86_TRAP_GP: | |
1216 | exc_general_protection(ctxt->regs, error_code); | |
1217 | break; | |
1218 | case X86_TRAP_UD: | |
1219 | exc_invalid_op(ctxt->regs); | |
1220 | break; | |
a2d0171a JR |
1221 | case X86_TRAP_AC: |
1222 | exc_alignment_check(ctxt->regs, error_code); | |
1223 | break; | |
0786138c TL |
1224 | default: |
1225 | pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); | |
1226 | BUG(); | |
1227 | } | |
1228 | } | |
1229 | ||
1230 | static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs) | |
1231 | { | |
1232 | unsigned long sp = (unsigned long)regs; | |
1233 | ||
1234 | return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); | |
1235 | } | |
1236 | ||
1237 | /* | |
1238 | * Main #VC exception handler. It is called when the entry code was able to | |
1239 | * switch off the IST to a safe kernel stack. | |
1240 | * | |
1241 | * With the current implementation it is always possible to switch to a safe | |
1242 | * stack because #VC exceptions only happen at known places, like intercepted | |
1243 | * instructions or accesses to MMIO areas/IO ports. They can also happen with | |
1244 | * code instrumentation when the hypervisor intercepts #DB, but the critical | |
1245 | * paths are forbidden to be instrumented, so #DB exceptions currently also | |
1246 | * only happen in safe places. | |
1247 | */ | |
1248 | DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) | |
1249 | { | |
1250 | struct sev_es_runtime_data *data = this_cpu_read(runtime_data); | |
1251 | struct ghcb_state state; | |
1252 | struct es_em_ctxt ctxt; | |
1253 | enum es_result result; | |
1254 | struct ghcb *ghcb; | |
1255 | ||
1256 | lockdep_assert_irqs_disabled(); | |
cb1ad3ec JR |
1257 | |
1258 | /* | |
1259 | * Handle #DB before calling into !noinstr code to avoid recursive #DB. | |
1260 | */ | |
1261 | if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) { | |
1262 | vc_handle_trap_db(regs); | |
1263 | return; | |
1264 | } | |
1265 | ||
0786138c TL |
1266 | instrumentation_begin(); |
1267 | ||
1268 | /* | |
1269 | * This is invoked through an interrupt gate, so IRQs are disabled. The | |
1270 | * code below might walk page-tables for user or kernel addresses, so | |
1271 | * keep the IRQs disabled to protect us against concurrent TLB flushes. | |
1272 | */ | |
1273 | ||
1274 | ghcb = sev_es_get_ghcb(&state); | |
1275 | if (!ghcb) { | |
1276 | /* | |
1277 | * Mark GHCBs inactive so that panic() is able to print the | |
1278 | * message. | |
1279 | */ | |
1280 | data->ghcb_active = false; | |
1281 | data->backup_ghcb_active = false; | |
1282 | ||
1283 | panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); | |
1284 | } | |
1285 | ||
1286 | vc_ghcb_invalidate(ghcb); | |
1287 | result = vc_init_em_ctxt(&ctxt, regs, error_code); | |
1288 | ||
1289 | if (result == ES_OK) | |
1290 | result = vc_handle_exitcode(&ctxt, ghcb, error_code); | |
1291 | ||
1292 | sev_es_put_ghcb(&state); | |
1293 | ||
1294 | /* Done - now check the result */ | |
1295 | switch (result) { | |
1296 | case ES_OK: | |
1297 | vc_finish_insn(&ctxt); | |
1298 | break; | |
1299 | case ES_UNSUPPORTED: | |
1300 | pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", | |
1301 | error_code, regs->ip); | |
1302 | goto fail; | |
1303 | case ES_VMM_ERROR: | |
1304 | pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", | |
1305 | error_code, regs->ip); | |
1306 | goto fail; | |
1307 | case ES_DECODE_FAILED: | |
1308 | pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", | |
1309 | error_code, regs->ip); | |
1310 | goto fail; | |
1311 | case ES_EXCEPTION: | |
1312 | vc_forward_exception(&ctxt); | |
1313 | break; | |
1314 | case ES_RETRY: | |
1315 | /* Nothing to do */ | |
1316 | break; | |
1317 | default: | |
1318 | pr_emerg("Unknown result in %s():%d\n", __func__, result); | |
1319 | /* | |
1320 | * Emulating the instruction which caused the #VC exception | |
1321 | * failed - can't continue so print debug information | |
1322 | */ | |
1323 | BUG(); | |
1324 | } | |
1325 | ||
1326 | out: | |
1327 | instrumentation_end(); | |
1328 | ||
1329 | return; | |
1330 | ||
1331 | fail: | |
1332 | if (user_mode(regs)) { | |
1333 | /* | |
1334 | * Do not kill the machine if user-space triggered the | |
1335 | * exception. Send SIGBUS instead and let user-space deal with | |
1336 | * it. | |
1337 | */ | |
1338 | force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); | |
1339 | } else { | |
1340 | pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n", | |
1341 | result); | |
1342 | ||
1343 | /* Show some debug info */ | |
1344 | show_regs(regs); | |
1345 | ||
1346 | /* Ask hypervisor to sev_es_terminate */ | |
1347 | sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); | |
1348 | ||
1349 | /* If that fails and we get here - just panic */ | |
1350 | panic("Returned from Terminate-Request to Hypervisor\n"); | |
1351 | } | |
1352 | ||
1353 | goto out; | |
1354 | } | |
1355 | ||
1356 | /* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */ | |
1357 | DEFINE_IDTENTRY_VC_IST(exc_vmm_communication) | |
1358 | { | |
1359 | instrumentation_begin(); | |
1360 | panic("Can't handle #VC exception from unsupported context\n"); | |
1361 | instrumentation_end(); | |
1362 | } | |
1363 | ||
1364 | DEFINE_IDTENTRY_VC(exc_vmm_communication) | |
1365 | { | |
1366 | if (likely(!on_vc_fallback_stack(regs))) | |
1367 | safe_stack_exc_vmm_communication(regs, error_code); | |
1368 | else | |
1369 | ist_exc_vmm_communication(regs, error_code); | |
1370 | } | |
1371 | ||
1aa9aa8e JR |
1372 | bool __init handle_vc_boot_ghcb(struct pt_regs *regs) |
1373 | { | |
1374 | unsigned long exit_code = regs->orig_ax; | |
1375 | struct es_em_ctxt ctxt; | |
1376 | enum es_result result; | |
1377 | ||
1378 | /* Do initial setup or terminate the guest */ | |
1379 | if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb())) | |
1380 | sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); | |
1381 | ||
1382 | vc_ghcb_invalidate(boot_ghcb); | |
1383 | ||
1384 | result = vc_init_em_ctxt(&ctxt, regs, exit_code); | |
1385 | if (result == ES_OK) | |
1386 | result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code); | |
1387 | ||
1388 | /* Done - now check the result */ | |
1389 | switch (result) { | |
1390 | case ES_OK: | |
1391 | vc_finish_insn(&ctxt); | |
1392 | break; | |
1393 | case ES_UNSUPPORTED: | |
1394 | early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", | |
1395 | exit_code, regs->ip); | |
1396 | goto fail; | |
1397 | case ES_VMM_ERROR: | |
1398 | early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", | |
1399 | exit_code, regs->ip); | |
1400 | goto fail; | |
1401 | case ES_DECODE_FAILED: | |
1402 | early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", | |
1403 | exit_code, regs->ip); | |
1404 | goto fail; | |
1405 | case ES_EXCEPTION: | |
1406 | vc_early_forward_exception(&ctxt); | |
1407 | break; | |
1408 | case ES_RETRY: | |
1409 | /* Nothing to do */ | |
1410 | break; | |
1411 | default: | |
1412 | BUG(); | |
1413 | } | |
1414 | ||
1415 | return true; | |
1416 | ||
1417 | fail: | |
1418 | show_regs(regs); | |
1419 | ||
1420 | while (true) | |
1421 | halt(); | |
1422 | } |