]>
Commit | Line | Data |
---|---|---|
1 | // SPDX-License-Identifier: GPL-2.0-only | |
2 | /* | |
3 | * AMD Memory Encryption Support | |
4 | * | |
5 | * Copyright (C) 2019 SUSE | |
6 | * | |
7 | * Author: Joerg Roedel <jroedel@suse.de> | |
8 | */ | |
9 | ||
10 | #define pr_fmt(fmt) "SEV-ES: " fmt | |
11 | ||
12 | #include <linux/sched/debug.h> /* For show_regs() */ | |
13 | #include <linux/percpu-defs.h> | |
14 | #include <linux/mem_encrypt.h> | |
15 | #include <linux/lockdep.h> | |
16 | #include <linux/printk.h> | |
17 | #include <linux/mm_types.h> | |
18 | #include <linux/set_memory.h> | |
19 | #include <linux/memblock.h> | |
20 | #include <linux/kernel.h> | |
21 | #include <linux/mm.h> | |
22 | ||
23 | #include <asm/cpu_entry_area.h> | |
24 | #include <asm/sev-es.h> | |
25 | #include <asm/insn-eval.h> | |
26 | #include <asm/fpu/internal.h> | |
27 | #include <asm/processor.h> | |
28 | #include <asm/realmode.h> | |
29 | #include <asm/traps.h> | |
30 | #include <asm/svm.h> | |
31 | ||
32 | #define DR7_RESET_VALUE 0x400 | |
33 | ||
34 | /* For early boot hypervisor communication in SEV-ES enabled guests */ | |
35 | static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); | |
36 | ||
37 | /* | |
38 | * Needs to be in the .data section because we need it NULL before bss is | |
39 | * cleared | |
40 | */ | |
41 | static struct ghcb __initdata *boot_ghcb; | |
42 | ||
43 | /* #VC handler runtime per-CPU data */ | |
44 | struct sev_es_runtime_data { | |
45 | struct ghcb ghcb_page; | |
46 | ||
47 | /* Physical storage for the per-CPU IST stack of the #VC handler */ | |
48 | char ist_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE); | |
49 | ||
50 | /* | |
51 | * Physical storage for the per-CPU fall-back stack of the #VC handler. | |
52 | * The fall-back stack is used when it is not safe to switch back to the | |
53 | * interrupted stack in the #VC entry code. | |
54 | */ | |
55 | char fallback_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE); | |
56 | ||
57 | /* | |
58 | * Reserve one page per CPU as backup storage for the unencrypted GHCB. | |
59 | * It is needed when an NMI happens while the #VC handler uses the real | |
60 | * GHCB, and the NMI handler itself is causing another #VC exception. In | |
61 | * that case the GHCB content of the first handler needs to be backed up | |
62 | * and restored. | |
63 | */ | |
64 | struct ghcb backup_ghcb; | |
65 | ||
66 | /* | |
67 | * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions. | |
68 | * There is no need for it to be atomic, because nothing is written to | |
69 | * the GHCB between the read and the write of ghcb_active. So it is safe | |
70 | * to use it when a nested #VC exception happens before the write. | |
71 | * | |
72 | * This is necessary for example in the #VC->NMI->#VC case when the NMI | |
73 | * happens while the first #VC handler uses the GHCB. When the NMI code | |
74 | * raises a second #VC handler it might overwrite the contents of the | |
75 | * GHCB written by the first handler. To avoid this the content of the | |
76 | * GHCB is saved and restored when the GHCB is detected to be in use | |
77 | * already. | |
78 | */ | |
79 | bool ghcb_active; | |
80 | bool backup_ghcb_active; | |
81 | ||
82 | /* | |
83 | * Cached DR7 value - write it on DR7 writes and return it on reads. | |
84 | * That value will never make it to the real hardware DR7 as debugging | |
85 | * is currently unsupported in SEV-ES guests. | |
86 | */ | |
87 | unsigned long dr7; | |
88 | }; | |
89 | ||
90 | struct ghcb_state { | |
91 | struct ghcb *ghcb; | |
92 | }; | |
93 | ||
94 | static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); | |
95 | DEFINE_STATIC_KEY_FALSE(sev_es_enable_key); | |
96 | ||
97 | /* Needed in vc_early_forward_exception */ | |
98 | void do_early_exception(struct pt_regs *regs, int trapnr); | |
99 | ||
100 | static void __init setup_vc_stacks(int cpu) | |
101 | { | |
102 | struct sev_es_runtime_data *data; | |
103 | struct cpu_entry_area *cea; | |
104 | unsigned long vaddr; | |
105 | phys_addr_t pa; | |
106 | ||
107 | data = per_cpu(runtime_data, cpu); | |
108 | cea = get_cpu_entry_area(cpu); | |
109 | ||
110 | /* Map #VC IST stack */ | |
111 | vaddr = CEA_ESTACK_BOT(&cea->estacks, VC); | |
112 | pa = __pa(data->ist_stack); | |
113 | cea_set_pte((void *)vaddr, pa, PAGE_KERNEL); | |
114 | ||
115 | /* Map VC fall-back stack */ | |
116 | vaddr = CEA_ESTACK_BOT(&cea->estacks, VC2); | |
117 | pa = __pa(data->fallback_stack); | |
118 | cea_set_pte((void *)vaddr, pa, PAGE_KERNEL); | |
119 | } | |
120 | ||
121 | static __always_inline bool on_vc_stack(unsigned long sp) | |
122 | { | |
123 | return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); | |
124 | } | |
125 | ||
126 | /* | |
127 | * This function handles the case when an NMI is raised in the #VC exception | |
128 | * handler entry code. In this case, the IST entry for #VC must be adjusted, so | |
129 | * that any subsequent #VC exception will not overwrite the stack contents of the | |
130 | * interrupted #VC handler. | |
131 | * | |
132 | * The IST entry is adjusted unconditionally so that it can be also be | |
133 | * unconditionally adjusted back in sev_es_ist_exit(). Otherwise a nested | |
134 | * sev_es_ist_exit() call may adjust back the IST entry too early. | |
135 | */ | |
136 | void noinstr __sev_es_ist_enter(struct pt_regs *regs) | |
137 | { | |
138 | unsigned long old_ist, new_ist; | |
139 | ||
140 | /* Read old IST entry */ | |
141 | old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); | |
142 | ||
143 | /* Make room on the IST stack */ | |
144 | if (on_vc_stack(regs->sp)) | |
145 | new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist); | |
146 | else | |
147 | new_ist = old_ist - sizeof(old_ist); | |
148 | ||
149 | /* Store old IST entry */ | |
150 | *(unsigned long *)new_ist = old_ist; | |
151 | ||
152 | /* Set new IST entry */ | |
153 | this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist); | |
154 | } | |
155 | ||
156 | void noinstr __sev_es_ist_exit(void) | |
157 | { | |
158 | unsigned long ist; | |
159 | ||
160 | /* Read IST entry */ | |
161 | ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); | |
162 | ||
163 | if (WARN_ON(ist == __this_cpu_ist_top_va(VC))) | |
164 | return; | |
165 | ||
166 | /* Read back old IST entry and write it to the TSS */ | |
167 | this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); | |
168 | } | |
169 | ||
170 | static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state) | |
171 | { | |
172 | struct sev_es_runtime_data *data; | |
173 | struct ghcb *ghcb; | |
174 | ||
175 | data = this_cpu_read(runtime_data); | |
176 | ghcb = &data->ghcb_page; | |
177 | ||
178 | if (unlikely(data->ghcb_active)) { | |
179 | /* GHCB is already in use - save its contents */ | |
180 | ||
181 | if (unlikely(data->backup_ghcb_active)) | |
182 | return NULL; | |
183 | ||
184 | /* Mark backup_ghcb active before writing to it */ | |
185 | data->backup_ghcb_active = true; | |
186 | ||
187 | state->ghcb = &data->backup_ghcb; | |
188 | ||
189 | /* Backup GHCB content */ | |
190 | *state->ghcb = *ghcb; | |
191 | } else { | |
192 | state->ghcb = NULL; | |
193 | data->ghcb_active = true; | |
194 | } | |
195 | ||
196 | return ghcb; | |
197 | } | |
198 | ||
199 | static __always_inline void sev_es_put_ghcb(struct ghcb_state *state) | |
200 | { | |
201 | struct sev_es_runtime_data *data; | |
202 | struct ghcb *ghcb; | |
203 | ||
204 | data = this_cpu_read(runtime_data); | |
205 | ghcb = &data->ghcb_page; | |
206 | ||
207 | if (state->ghcb) { | |
208 | /* Restore GHCB from Backup */ | |
209 | *ghcb = *state->ghcb; | |
210 | data->backup_ghcb_active = false; | |
211 | state->ghcb = NULL; | |
212 | } else { | |
213 | data->ghcb_active = false; | |
214 | } | |
215 | } | |
216 | ||
217 | static inline u64 sev_es_rd_ghcb_msr(void) | |
218 | { | |
219 | return __rdmsr(MSR_AMD64_SEV_ES_GHCB); | |
220 | } | |
221 | ||
222 | static inline void sev_es_wr_ghcb_msr(u64 val) | |
223 | { | |
224 | u32 low, high; | |
225 | ||
226 | low = (u32)(val); | |
227 | high = (u32)(val >> 32); | |
228 | ||
229 | native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); | |
230 | } | |
231 | ||
232 | static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, | |
233 | unsigned char *buffer) | |
234 | { | |
235 | return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); | |
236 | } | |
237 | ||
238 | static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) | |
239 | { | |
240 | char buffer[MAX_INSN_SIZE]; | |
241 | enum es_result ret; | |
242 | int res; | |
243 | ||
244 | if (user_mode(ctxt->regs)) { | |
245 | res = insn_fetch_from_user(ctxt->regs, buffer); | |
246 | if (!res) { | |
247 | ctxt->fi.vector = X86_TRAP_PF; | |
248 | ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; | |
249 | ctxt->fi.cr2 = ctxt->regs->ip; | |
250 | return ES_EXCEPTION; | |
251 | } | |
252 | ||
253 | if (!insn_decode(&ctxt->insn, ctxt->regs, buffer, res)) | |
254 | return ES_DECODE_FAILED; | |
255 | } else { | |
256 | res = vc_fetch_insn_kernel(ctxt, buffer); | |
257 | if (res) { | |
258 | ctxt->fi.vector = X86_TRAP_PF; | |
259 | ctxt->fi.error_code = X86_PF_INSTR; | |
260 | ctxt->fi.cr2 = ctxt->regs->ip; | |
261 | return ES_EXCEPTION; | |
262 | } | |
263 | ||
264 | insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE - res, 1); | |
265 | insn_get_length(&ctxt->insn); | |
266 | } | |
267 | ||
268 | ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED; | |
269 | ||
270 | return ret; | |
271 | } | |
272 | ||
273 | static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, | |
274 | char *dst, char *buf, size_t size) | |
275 | { | |
276 | unsigned long error_code = X86_PF_PROT | X86_PF_WRITE; | |
277 | char __user *target = (char __user *)dst; | |
278 | u64 d8; | |
279 | u32 d4; | |
280 | u16 d2; | |
281 | u8 d1; | |
282 | ||
283 | switch (size) { | |
284 | case 1: | |
285 | memcpy(&d1, buf, 1); | |
286 | if (put_user(d1, target)) | |
287 | goto fault; | |
288 | break; | |
289 | case 2: | |
290 | memcpy(&d2, buf, 2); | |
291 | if (put_user(d2, target)) | |
292 | goto fault; | |
293 | break; | |
294 | case 4: | |
295 | memcpy(&d4, buf, 4); | |
296 | if (put_user(d4, target)) | |
297 | goto fault; | |
298 | break; | |
299 | case 8: | |
300 | memcpy(&d8, buf, 8); | |
301 | if (put_user(d8, target)) | |
302 | goto fault; | |
303 | break; | |
304 | default: | |
305 | WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); | |
306 | return ES_UNSUPPORTED; | |
307 | } | |
308 | ||
309 | return ES_OK; | |
310 | ||
311 | fault: | |
312 | if (user_mode(ctxt->regs)) | |
313 | error_code |= X86_PF_USER; | |
314 | ||
315 | ctxt->fi.vector = X86_TRAP_PF; | |
316 | ctxt->fi.error_code = error_code; | |
317 | ctxt->fi.cr2 = (unsigned long)dst; | |
318 | ||
319 | return ES_EXCEPTION; | |
320 | } | |
321 | ||
322 | static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, | |
323 | char *src, char *buf, size_t size) | |
324 | { | |
325 | unsigned long error_code = X86_PF_PROT; | |
326 | char __user *s = (char __user *)src; | |
327 | u64 d8; | |
328 | u32 d4; | |
329 | u16 d2; | |
330 | u8 d1; | |
331 | ||
332 | switch (size) { | |
333 | case 1: | |
334 | if (get_user(d1, s)) | |
335 | goto fault; | |
336 | memcpy(buf, &d1, 1); | |
337 | break; | |
338 | case 2: | |
339 | if (get_user(d2, s)) | |
340 | goto fault; | |
341 | memcpy(buf, &d2, 2); | |
342 | break; | |
343 | case 4: | |
344 | if (get_user(d4, s)) | |
345 | goto fault; | |
346 | memcpy(buf, &d4, 4); | |
347 | break; | |
348 | case 8: | |
349 | if (get_user(d8, s)) | |
350 | goto fault; | |
351 | memcpy(buf, &d8, 8); | |
352 | break; | |
353 | default: | |
354 | WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); | |
355 | return ES_UNSUPPORTED; | |
356 | } | |
357 | ||
358 | return ES_OK; | |
359 | ||
360 | fault: | |
361 | if (user_mode(ctxt->regs)) | |
362 | error_code |= X86_PF_USER; | |
363 | ||
364 | ctxt->fi.vector = X86_TRAP_PF; | |
365 | ctxt->fi.error_code = error_code; | |
366 | ctxt->fi.cr2 = (unsigned long)src; | |
367 | ||
368 | return ES_EXCEPTION; | |
369 | } | |
370 | ||
371 | static bool vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, | |
372 | unsigned long vaddr, phys_addr_t *paddr) | |
373 | { | |
374 | unsigned long va = (unsigned long)vaddr; | |
375 | unsigned int level; | |
376 | phys_addr_t pa; | |
377 | pgd_t *pgd; | |
378 | pte_t *pte; | |
379 | ||
380 | pgd = __va(read_cr3_pa()); | |
381 | pgd = &pgd[pgd_index(va)]; | |
382 | pte = lookup_address_in_pgd(pgd, va, &level); | |
383 | if (!pte) { | |
384 | ctxt->fi.vector = X86_TRAP_PF; | |
385 | ctxt->fi.cr2 = vaddr; | |
386 | ctxt->fi.error_code = 0; | |
387 | ||
388 | if (user_mode(ctxt->regs)) | |
389 | ctxt->fi.error_code |= X86_PF_USER; | |
390 | ||
391 | return false; | |
392 | } | |
393 | ||
394 | pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; | |
395 | pa |= va & ~page_level_mask(level); | |
396 | ||
397 | *paddr = pa; | |
398 | ||
399 | return true; | |
400 | } | |
401 | ||
402 | /* Include code shared with pre-decompression boot stage */ | |
403 | #include "sev-es-shared.c" | |
404 | ||
405 | static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) | |
406 | { | |
407 | struct pt_regs *regs = ctxt->regs; | |
408 | enum es_result ret; | |
409 | u64 exit_info_1; | |
410 | ||
411 | /* Is it a WRMSR? */ | |
412 | exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0; | |
413 | ||
414 | ghcb_set_rcx(ghcb, regs->cx); | |
415 | if (exit_info_1) { | |
416 | ghcb_set_rax(ghcb, regs->ax); | |
417 | ghcb_set_rdx(ghcb, regs->dx); | |
418 | } | |
419 | ||
420 | ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0); | |
421 | ||
422 | if ((ret == ES_OK) && (!exit_info_1)) { | |
423 | regs->ax = ghcb->save.rax; | |
424 | regs->dx = ghcb->save.rdx; | |
425 | } | |
426 | ||
427 | return ret; | |
428 | } | |
429 | ||
430 | /* | |
431 | * This function runs on the first #VC exception after the kernel | |
432 | * switched to virtual addresses. | |
433 | */ | |
434 | static bool __init sev_es_setup_ghcb(void) | |
435 | { | |
436 | /* First make sure the hypervisor talks a supported protocol. */ | |
437 | if (!sev_es_negotiate_protocol()) | |
438 | return false; | |
439 | ||
440 | /* | |
441 | * Clear the boot_ghcb. The first exception comes in before the bss | |
442 | * section is cleared. | |
443 | */ | |
444 | memset(&boot_ghcb_page, 0, PAGE_SIZE); | |
445 | ||
446 | /* Alright - Make the boot-ghcb public */ | |
447 | boot_ghcb = &boot_ghcb_page; | |
448 | ||
449 | return true; | |
450 | } | |
451 | ||
452 | static void __init alloc_runtime_data(int cpu) | |
453 | { | |
454 | struct sev_es_runtime_data *data; | |
455 | ||
456 | data = memblock_alloc(sizeof(*data), PAGE_SIZE); | |
457 | if (!data) | |
458 | panic("Can't allocate SEV-ES runtime data"); | |
459 | ||
460 | per_cpu(runtime_data, cpu) = data; | |
461 | } | |
462 | ||
463 | static void __init init_ghcb(int cpu) | |
464 | { | |
465 | struct sev_es_runtime_data *data; | |
466 | int err; | |
467 | ||
468 | data = per_cpu(runtime_data, cpu); | |
469 | ||
470 | err = early_set_memory_decrypted((unsigned long)&data->ghcb_page, | |
471 | sizeof(data->ghcb_page)); | |
472 | if (err) | |
473 | panic("Can't map GHCBs unencrypted"); | |
474 | ||
475 | memset(&data->ghcb_page, 0, sizeof(data->ghcb_page)); | |
476 | ||
477 | data->ghcb_active = false; | |
478 | data->backup_ghcb_active = false; | |
479 | } | |
480 | ||
481 | void __init sev_es_init_vc_handling(void) | |
482 | { | |
483 | int cpu; | |
484 | ||
485 | BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE); | |
486 | ||
487 | if (!sev_es_active()) | |
488 | return; | |
489 | ||
490 | /* Enable SEV-ES special handling */ | |
491 | static_branch_enable(&sev_es_enable_key); | |
492 | ||
493 | /* Initialize per-cpu GHCB pages */ | |
494 | for_each_possible_cpu(cpu) { | |
495 | alloc_runtime_data(cpu); | |
496 | init_ghcb(cpu); | |
497 | setup_vc_stacks(cpu); | |
498 | } | |
499 | ||
500 | /* Secondary CPUs use the runtime #VC handler */ | |
501 | initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication; | |
502 | } | |
503 | ||
504 | static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) | |
505 | { | |
506 | int trapnr = ctxt->fi.vector; | |
507 | ||
508 | if (trapnr == X86_TRAP_PF) | |
509 | native_write_cr2(ctxt->fi.cr2); | |
510 | ||
511 | ctxt->regs->orig_ax = ctxt->fi.error_code; | |
512 | do_early_exception(ctxt->regs, trapnr); | |
513 | } | |
514 | ||
515 | static long *vc_insn_get_reg(struct es_em_ctxt *ctxt) | |
516 | { | |
517 | long *reg_array; | |
518 | int offset; | |
519 | ||
520 | reg_array = (long *)ctxt->regs; | |
521 | offset = insn_get_modrm_reg_off(&ctxt->insn, ctxt->regs); | |
522 | ||
523 | if (offset < 0) | |
524 | return NULL; | |
525 | ||
526 | offset /= sizeof(long); | |
527 | ||
528 | return reg_array + offset; | |
529 | } | |
530 | ||
531 | static long *vc_insn_get_rm(struct es_em_ctxt *ctxt) | |
532 | { | |
533 | long *reg_array; | |
534 | int offset; | |
535 | ||
536 | reg_array = (long *)ctxt->regs; | |
537 | offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs); | |
538 | ||
539 | if (offset < 0) | |
540 | return NULL; | |
541 | ||
542 | offset /= sizeof(long); | |
543 | ||
544 | return reg_array + offset; | |
545 | } | |
546 | static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, | |
547 | unsigned int bytes, bool read) | |
548 | { | |
549 | u64 exit_code, exit_info_1, exit_info_2; | |
550 | unsigned long ghcb_pa = __pa(ghcb); | |
551 | phys_addr_t paddr; | |
552 | void __user *ref; | |
553 | ||
554 | ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs); | |
555 | if (ref == (void __user *)-1L) | |
556 | return ES_UNSUPPORTED; | |
557 | ||
558 | exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; | |
559 | ||
560 | if (!vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr)) { | |
561 | if (!read) | |
562 | ctxt->fi.error_code |= X86_PF_WRITE; | |
563 | ||
564 | return ES_EXCEPTION; | |
565 | } | |
566 | ||
567 | exit_info_1 = paddr; | |
568 | /* Can never be greater than 8 */ | |
569 | exit_info_2 = bytes; | |
570 | ||
571 | ghcb->save.sw_scratch = ghcb_pa + offsetof(struct ghcb, shared_buffer); | |
572 | ||
573 | return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2); | |
574 | } | |
575 | ||
576 | static enum es_result vc_handle_mmio_twobyte_ops(struct ghcb *ghcb, | |
577 | struct es_em_ctxt *ctxt) | |
578 | { | |
579 | struct insn *insn = &ctxt->insn; | |
580 | unsigned int bytes = 0; | |
581 | enum es_result ret; | |
582 | int sign_byte; | |
583 | long *reg_data; | |
584 | ||
585 | switch (insn->opcode.bytes[1]) { | |
586 | /* MMIO Read w/ zero-extension */ | |
587 | case 0xb6: | |
588 | bytes = 1; | |
589 | fallthrough; | |
590 | case 0xb7: | |
591 | if (!bytes) | |
592 | bytes = 2; | |
593 | ||
594 | ret = vc_do_mmio(ghcb, ctxt, bytes, true); | |
595 | if (ret) | |
596 | break; | |
597 | ||
598 | /* Zero extend based on operand size */ | |
599 | reg_data = vc_insn_get_reg(ctxt); | |
600 | if (!reg_data) | |
601 | return ES_DECODE_FAILED; | |
602 | ||
603 | memset(reg_data, 0, insn->opnd_bytes); | |
604 | ||
605 | memcpy(reg_data, ghcb->shared_buffer, bytes); | |
606 | break; | |
607 | ||
608 | /* MMIO Read w/ sign-extension */ | |
609 | case 0xbe: | |
610 | bytes = 1; | |
611 | fallthrough; | |
612 | case 0xbf: | |
613 | if (!bytes) | |
614 | bytes = 2; | |
615 | ||
616 | ret = vc_do_mmio(ghcb, ctxt, bytes, true); | |
617 | if (ret) | |
618 | break; | |
619 | ||
620 | /* Sign extend based on operand size */ | |
621 | reg_data = vc_insn_get_reg(ctxt); | |
622 | if (!reg_data) | |
623 | return ES_DECODE_FAILED; | |
624 | ||
625 | if (bytes == 1) { | |
626 | u8 *val = (u8 *)ghcb->shared_buffer; | |
627 | ||
628 | sign_byte = (*val & 0x80) ? 0xff : 0x00; | |
629 | } else { | |
630 | u16 *val = (u16 *)ghcb->shared_buffer; | |
631 | ||
632 | sign_byte = (*val & 0x8000) ? 0xff : 0x00; | |
633 | } | |
634 | memset(reg_data, sign_byte, insn->opnd_bytes); | |
635 | ||
636 | memcpy(reg_data, ghcb->shared_buffer, bytes); | |
637 | break; | |
638 | ||
639 | default: | |
640 | ret = ES_UNSUPPORTED; | |
641 | } | |
642 | ||
643 | return ret; | |
644 | } | |
645 | ||
646 | /* | |
647 | * The MOVS instruction has two memory operands, which raises the | |
648 | * problem that it is not known whether the access to the source or the | |
649 | * destination caused the #VC exception (and hence whether an MMIO read | |
650 | * or write operation needs to be emulated). | |
651 | * | |
652 | * Instead of playing games with walking page-tables and trying to guess | |
653 | * whether the source or destination is an MMIO range, split the move | |
654 | * into two operations, a read and a write with only one memory operand. | |
655 | * This will cause a nested #VC exception on the MMIO address which can | |
656 | * then be handled. | |
657 | * | |
658 | * This implementation has the benefit that it also supports MOVS where | |
659 | * source _and_ destination are MMIO regions. | |
660 | * | |
661 | * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a | |
662 | * rare operation. If it turns out to be a performance problem the split | |
663 | * operations can be moved to memcpy_fromio() and memcpy_toio(). | |
664 | */ | |
665 | static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt, | |
666 | unsigned int bytes) | |
667 | { | |
668 | unsigned long ds_base, es_base; | |
669 | unsigned char *src, *dst; | |
670 | unsigned char buffer[8]; | |
671 | enum es_result ret; | |
672 | bool rep; | |
673 | int off; | |
674 | ||
675 | ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS); | |
676 | es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); | |
677 | ||
678 | if (ds_base == -1L || es_base == -1L) { | |
679 | ctxt->fi.vector = X86_TRAP_GP; | |
680 | ctxt->fi.error_code = 0; | |
681 | return ES_EXCEPTION; | |
682 | } | |
683 | ||
684 | src = ds_base + (unsigned char *)ctxt->regs->si; | |
685 | dst = es_base + (unsigned char *)ctxt->regs->di; | |
686 | ||
687 | ret = vc_read_mem(ctxt, src, buffer, bytes); | |
688 | if (ret != ES_OK) | |
689 | return ret; | |
690 | ||
691 | ret = vc_write_mem(ctxt, dst, buffer, bytes); | |
692 | if (ret != ES_OK) | |
693 | return ret; | |
694 | ||
695 | if (ctxt->regs->flags & X86_EFLAGS_DF) | |
696 | off = -bytes; | |
697 | else | |
698 | off = bytes; | |
699 | ||
700 | ctxt->regs->si += off; | |
701 | ctxt->regs->di += off; | |
702 | ||
703 | rep = insn_has_rep_prefix(&ctxt->insn); | |
704 | if (rep) | |
705 | ctxt->regs->cx -= 1; | |
706 | ||
707 | if (!rep || ctxt->regs->cx == 0) | |
708 | return ES_OK; | |
709 | else | |
710 | return ES_RETRY; | |
711 | } | |
712 | ||
713 | static enum es_result vc_handle_mmio(struct ghcb *ghcb, | |
714 | struct es_em_ctxt *ctxt) | |
715 | { | |
716 | struct insn *insn = &ctxt->insn; | |
717 | unsigned int bytes = 0; | |
718 | enum es_result ret; | |
719 | long *reg_data; | |
720 | ||
721 | switch (insn->opcode.bytes[0]) { | |
722 | /* MMIO Write */ | |
723 | case 0x88: | |
724 | bytes = 1; | |
725 | fallthrough; | |
726 | case 0x89: | |
727 | if (!bytes) | |
728 | bytes = insn->opnd_bytes; | |
729 | ||
730 | reg_data = vc_insn_get_reg(ctxt); | |
731 | if (!reg_data) | |
732 | return ES_DECODE_FAILED; | |
733 | ||
734 | memcpy(ghcb->shared_buffer, reg_data, bytes); | |
735 | ||
736 | ret = vc_do_mmio(ghcb, ctxt, bytes, false); | |
737 | break; | |
738 | ||
739 | case 0xc6: | |
740 | bytes = 1; | |
741 | fallthrough; | |
742 | case 0xc7: | |
743 | if (!bytes) | |
744 | bytes = insn->opnd_bytes; | |
745 | ||
746 | memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes); | |
747 | ||
748 | ret = vc_do_mmio(ghcb, ctxt, bytes, false); | |
749 | break; | |
750 | ||
751 | /* MMIO Read */ | |
752 | case 0x8a: | |
753 | bytes = 1; | |
754 | fallthrough; | |
755 | case 0x8b: | |
756 | if (!bytes) | |
757 | bytes = insn->opnd_bytes; | |
758 | ||
759 | ret = vc_do_mmio(ghcb, ctxt, bytes, true); | |
760 | if (ret) | |
761 | break; | |
762 | ||
763 | reg_data = vc_insn_get_reg(ctxt); | |
764 | if (!reg_data) | |
765 | return ES_DECODE_FAILED; | |
766 | ||
767 | /* Zero-extend for 32-bit operation */ | |
768 | if (bytes == 4) | |
769 | *reg_data = 0; | |
770 | ||
771 | memcpy(reg_data, ghcb->shared_buffer, bytes); | |
772 | break; | |
773 | ||
774 | /* MOVS instruction */ | |
775 | case 0xa4: | |
776 | bytes = 1; | |
777 | fallthrough; | |
778 | case 0xa5: | |
779 | if (!bytes) | |
780 | bytes = insn->opnd_bytes; | |
781 | ||
782 | ret = vc_handle_mmio_movs(ctxt, bytes); | |
783 | break; | |
784 | /* Two-Byte Opcodes */ | |
785 | case 0x0f: | |
786 | ret = vc_handle_mmio_twobyte_ops(ghcb, ctxt); | |
787 | break; | |
788 | default: | |
789 | ret = ES_UNSUPPORTED; | |
790 | } | |
791 | ||
792 | return ret; | |
793 | } | |
794 | ||
795 | static enum es_result vc_handle_dr7_write(struct ghcb *ghcb, | |
796 | struct es_em_ctxt *ctxt) | |
797 | { | |
798 | struct sev_es_runtime_data *data = this_cpu_read(runtime_data); | |
799 | long val, *reg = vc_insn_get_rm(ctxt); | |
800 | enum es_result ret; | |
801 | ||
802 | if (!reg) | |
803 | return ES_DECODE_FAILED; | |
804 | ||
805 | val = *reg; | |
806 | ||
807 | /* Upper 32 bits must be written as zeroes */ | |
808 | if (val >> 32) { | |
809 | ctxt->fi.vector = X86_TRAP_GP; | |
810 | ctxt->fi.error_code = 0; | |
811 | return ES_EXCEPTION; | |
812 | } | |
813 | ||
814 | /* Clear out other reserved bits and set bit 10 */ | |
815 | val = (val & 0xffff23ffL) | BIT(10); | |
816 | ||
817 | /* Early non-zero writes to DR7 are not supported */ | |
818 | if (!data && (val & ~DR7_RESET_VALUE)) | |
819 | return ES_UNSUPPORTED; | |
820 | ||
821 | /* Using a value of 0 for ExitInfo1 means RAX holds the value */ | |
822 | ghcb_set_rax(ghcb, val); | |
823 | ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0); | |
824 | if (ret != ES_OK) | |
825 | return ret; | |
826 | ||
827 | if (data) | |
828 | data->dr7 = val; | |
829 | ||
830 | return ES_OK; | |
831 | } | |
832 | ||
833 | static enum es_result vc_handle_dr7_read(struct ghcb *ghcb, | |
834 | struct es_em_ctxt *ctxt) | |
835 | { | |
836 | struct sev_es_runtime_data *data = this_cpu_read(runtime_data); | |
837 | long *reg = vc_insn_get_rm(ctxt); | |
838 | ||
839 | if (!reg) | |
840 | return ES_DECODE_FAILED; | |
841 | ||
842 | if (data) | |
843 | *reg = data->dr7; | |
844 | else | |
845 | *reg = DR7_RESET_VALUE; | |
846 | ||
847 | return ES_OK; | |
848 | } | |
849 | ||
850 | static enum es_result vc_handle_wbinvd(struct ghcb *ghcb, | |
851 | struct es_em_ctxt *ctxt) | |
852 | { | |
853 | return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0); | |
854 | } | |
855 | ||
856 | static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, | |
857 | struct ghcb *ghcb, | |
858 | unsigned long exit_code) | |
859 | { | |
860 | enum es_result result; | |
861 | ||
862 | switch (exit_code) { | |
863 | case SVM_EXIT_READ_DR7: | |
864 | result = vc_handle_dr7_read(ghcb, ctxt); | |
865 | break; | |
866 | case SVM_EXIT_WRITE_DR7: | |
867 | result = vc_handle_dr7_write(ghcb, ctxt); | |
868 | break; | |
869 | case SVM_EXIT_CPUID: | |
870 | result = vc_handle_cpuid(ghcb, ctxt); | |
871 | break; | |
872 | case SVM_EXIT_IOIO: | |
873 | result = vc_handle_ioio(ghcb, ctxt); | |
874 | break; | |
875 | case SVM_EXIT_MSR: | |
876 | result = vc_handle_msr(ghcb, ctxt); | |
877 | break; | |
878 | case SVM_EXIT_WBINVD: | |
879 | result = vc_handle_wbinvd(ghcb, ctxt); | |
880 | break; | |
881 | case SVM_EXIT_NPF: | |
882 | result = vc_handle_mmio(ghcb, ctxt); | |
883 | break; | |
884 | default: | |
885 | /* | |
886 | * Unexpected #VC exception | |
887 | */ | |
888 | result = ES_UNSUPPORTED; | |
889 | } | |
890 | ||
891 | return result; | |
892 | } | |
893 | ||
894 | static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) | |
895 | { | |
896 | long error_code = ctxt->fi.error_code; | |
897 | int trapnr = ctxt->fi.vector; | |
898 | ||
899 | ctxt->regs->orig_ax = ctxt->fi.error_code; | |
900 | ||
901 | switch (trapnr) { | |
902 | case X86_TRAP_GP: | |
903 | exc_general_protection(ctxt->regs, error_code); | |
904 | break; | |
905 | case X86_TRAP_UD: | |
906 | exc_invalid_op(ctxt->regs); | |
907 | break; | |
908 | default: | |
909 | pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); | |
910 | BUG(); | |
911 | } | |
912 | } | |
913 | ||
914 | static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs) | |
915 | { | |
916 | unsigned long sp = (unsigned long)regs; | |
917 | ||
918 | return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); | |
919 | } | |
920 | ||
921 | /* | |
922 | * Main #VC exception handler. It is called when the entry code was able to | |
923 | * switch off the IST to a safe kernel stack. | |
924 | * | |
925 | * With the current implementation it is always possible to switch to a safe | |
926 | * stack because #VC exceptions only happen at known places, like intercepted | |
927 | * instructions or accesses to MMIO areas/IO ports. They can also happen with | |
928 | * code instrumentation when the hypervisor intercepts #DB, but the critical | |
929 | * paths are forbidden to be instrumented, so #DB exceptions currently also | |
930 | * only happen in safe places. | |
931 | */ | |
932 | DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) | |
933 | { | |
934 | struct sev_es_runtime_data *data = this_cpu_read(runtime_data); | |
935 | struct ghcb_state state; | |
936 | struct es_em_ctxt ctxt; | |
937 | enum es_result result; | |
938 | struct ghcb *ghcb; | |
939 | ||
940 | lockdep_assert_irqs_disabled(); | |
941 | instrumentation_begin(); | |
942 | ||
943 | /* | |
944 | * This is invoked through an interrupt gate, so IRQs are disabled. The | |
945 | * code below might walk page-tables for user or kernel addresses, so | |
946 | * keep the IRQs disabled to protect us against concurrent TLB flushes. | |
947 | */ | |
948 | ||
949 | ghcb = sev_es_get_ghcb(&state); | |
950 | if (!ghcb) { | |
951 | /* | |
952 | * Mark GHCBs inactive so that panic() is able to print the | |
953 | * message. | |
954 | */ | |
955 | data->ghcb_active = false; | |
956 | data->backup_ghcb_active = false; | |
957 | ||
958 | panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); | |
959 | } | |
960 | ||
961 | vc_ghcb_invalidate(ghcb); | |
962 | result = vc_init_em_ctxt(&ctxt, regs, error_code); | |
963 | ||
964 | if (result == ES_OK) | |
965 | result = vc_handle_exitcode(&ctxt, ghcb, error_code); | |
966 | ||
967 | sev_es_put_ghcb(&state); | |
968 | ||
969 | /* Done - now check the result */ | |
970 | switch (result) { | |
971 | case ES_OK: | |
972 | vc_finish_insn(&ctxt); | |
973 | break; | |
974 | case ES_UNSUPPORTED: | |
975 | pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", | |
976 | error_code, regs->ip); | |
977 | goto fail; | |
978 | case ES_VMM_ERROR: | |
979 | pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", | |
980 | error_code, regs->ip); | |
981 | goto fail; | |
982 | case ES_DECODE_FAILED: | |
983 | pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", | |
984 | error_code, regs->ip); | |
985 | goto fail; | |
986 | case ES_EXCEPTION: | |
987 | vc_forward_exception(&ctxt); | |
988 | break; | |
989 | case ES_RETRY: | |
990 | /* Nothing to do */ | |
991 | break; | |
992 | default: | |
993 | pr_emerg("Unknown result in %s():%d\n", __func__, result); | |
994 | /* | |
995 | * Emulating the instruction which caused the #VC exception | |
996 | * failed - can't continue so print debug information | |
997 | */ | |
998 | BUG(); | |
999 | } | |
1000 | ||
1001 | out: | |
1002 | instrumentation_end(); | |
1003 | ||
1004 | return; | |
1005 | ||
1006 | fail: | |
1007 | if (user_mode(regs)) { | |
1008 | /* | |
1009 | * Do not kill the machine if user-space triggered the | |
1010 | * exception. Send SIGBUS instead and let user-space deal with | |
1011 | * it. | |
1012 | */ | |
1013 | force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); | |
1014 | } else { | |
1015 | pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n", | |
1016 | result); | |
1017 | ||
1018 | /* Show some debug info */ | |
1019 | show_regs(regs); | |
1020 | ||
1021 | /* Ask hypervisor to sev_es_terminate */ | |
1022 | sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); | |
1023 | ||
1024 | /* If that fails and we get here - just panic */ | |
1025 | panic("Returned from Terminate-Request to Hypervisor\n"); | |
1026 | } | |
1027 | ||
1028 | goto out; | |
1029 | } | |
1030 | ||
1031 | /* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */ | |
1032 | DEFINE_IDTENTRY_VC_IST(exc_vmm_communication) | |
1033 | { | |
1034 | instrumentation_begin(); | |
1035 | panic("Can't handle #VC exception from unsupported context\n"); | |
1036 | instrumentation_end(); | |
1037 | } | |
1038 | ||
1039 | DEFINE_IDTENTRY_VC(exc_vmm_communication) | |
1040 | { | |
1041 | if (likely(!on_vc_fallback_stack(regs))) | |
1042 | safe_stack_exc_vmm_communication(regs, error_code); | |
1043 | else | |
1044 | ist_exc_vmm_communication(regs, error_code); | |
1045 | } | |
1046 | ||
1047 | bool __init handle_vc_boot_ghcb(struct pt_regs *regs) | |
1048 | { | |
1049 | unsigned long exit_code = regs->orig_ax; | |
1050 | struct es_em_ctxt ctxt; | |
1051 | enum es_result result; | |
1052 | ||
1053 | /* Do initial setup or terminate the guest */ | |
1054 | if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb())) | |
1055 | sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); | |
1056 | ||
1057 | vc_ghcb_invalidate(boot_ghcb); | |
1058 | ||
1059 | result = vc_init_em_ctxt(&ctxt, regs, exit_code); | |
1060 | if (result == ES_OK) | |
1061 | result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code); | |
1062 | ||
1063 | /* Done - now check the result */ | |
1064 | switch (result) { | |
1065 | case ES_OK: | |
1066 | vc_finish_insn(&ctxt); | |
1067 | break; | |
1068 | case ES_UNSUPPORTED: | |
1069 | early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", | |
1070 | exit_code, regs->ip); | |
1071 | goto fail; | |
1072 | case ES_VMM_ERROR: | |
1073 | early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", | |
1074 | exit_code, regs->ip); | |
1075 | goto fail; | |
1076 | case ES_DECODE_FAILED: | |
1077 | early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", | |
1078 | exit_code, regs->ip); | |
1079 | goto fail; | |
1080 | case ES_EXCEPTION: | |
1081 | vc_early_forward_exception(&ctxt); | |
1082 | break; | |
1083 | case ES_RETRY: | |
1084 | /* Nothing to do */ | |
1085 | break; | |
1086 | default: | |
1087 | BUG(); | |
1088 | } | |
1089 | ||
1090 | return true; | |
1091 | ||
1092 | fail: | |
1093 | show_regs(regs); | |
1094 | ||
1095 | while (true) | |
1096 | halt(); | |
1097 | } |