]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - arch/x86/kvm/svm/sev.c
KVM: SVM: Create trace events for VMGEXIT processing
[mirror_ubuntu-jammy-kernel.git] / arch / x86 / kvm / svm / sev.c
CommitLineData
eaf78265
JR
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Kernel-based Virtual Machine driver for Linux
4 *
5 * AMD SVM-SEV support
6 *
7 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
8 */
9
10#include <linux/kvm_types.h>
11#include <linux/kvm_host.h>
12#include <linux/kernel.h>
13#include <linux/highmem.h>
14#include <linux/psp-sev.h>
b2bce0a5 15#include <linux/pagemap.h>
eaf78265 16#include <linux/swap.h>
add5e2f0 17#include <linux/processor.h>
d523ab6b 18#include <linux/trace_events.h>
eaf78265
JR
19
20#include "x86.h"
21#include "svm.h"
291bd20d 22#include "cpuid.h"
d523ab6b 23#include "trace.h"
eaf78265 24
1edc1459 25static u8 sev_enc_bit;
eaf78265
JR
26static int sev_flush_asids(void);
27static DECLARE_RWSEM(sev_deactivate_lock);
28static DEFINE_MUTEX(sev_bitmap_lock);
29unsigned int max_sev_asid;
30static unsigned int min_sev_asid;
31static unsigned long *sev_asid_bitmap;
32static unsigned long *sev_reclaim_asid_bitmap;
33#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
34
35struct enc_region {
36 struct list_head list;
37 unsigned long npages;
38 struct page **pages;
39 unsigned long uaddr;
40 unsigned long size;
41};
42
43static int sev_flush_asids(void)
44{
45 int ret, error = 0;
46
47 /*
48 * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail,
49 * so it must be guarded.
50 */
51 down_write(&sev_deactivate_lock);
52
53 wbinvd_on_all_cpus();
54 ret = sev_guest_df_flush(&error);
55
56 up_write(&sev_deactivate_lock);
57
58 if (ret)
59 pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n", ret, error);
60
61 return ret;
62}
63
64/* Must be called with the sev_bitmap_lock held */
65static bool __sev_recycle_asids(void)
66{
67 int pos;
68
69 /* Check if there are any ASIDs to reclaim before performing a flush */
70 pos = find_next_bit(sev_reclaim_asid_bitmap,
71 max_sev_asid, min_sev_asid - 1);
72 if (pos >= max_sev_asid)
73 return false;
74
75 if (sev_flush_asids())
76 return false;
77
78 bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
79 max_sev_asid);
80 bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
81
82 return true;
83}
84
85static int sev_asid_new(void)
86{
87 bool retry = true;
88 int pos;
89
90 mutex_lock(&sev_bitmap_lock);
91
92 /*
93 * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
94 */
95again:
96 pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
97 if (pos >= max_sev_asid) {
98 if (retry && __sev_recycle_asids()) {
99 retry = false;
100 goto again;
101 }
102 mutex_unlock(&sev_bitmap_lock);
103 return -EBUSY;
104 }
105
106 __set_bit(pos, sev_asid_bitmap);
107
108 mutex_unlock(&sev_bitmap_lock);
109
110 return pos + 1;
111}
112
113static int sev_get_asid(struct kvm *kvm)
114{
115 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
116
117 return sev->asid;
118}
119
120static void sev_asid_free(int asid)
121{
122 struct svm_cpu_data *sd;
123 int cpu, pos;
124
125 mutex_lock(&sev_bitmap_lock);
126
127 pos = asid - 1;
128 __set_bit(pos, sev_reclaim_asid_bitmap);
129
130 for_each_possible_cpu(cpu) {
131 sd = per_cpu(svm_data, cpu);
132 sd->sev_vmcbs[pos] = NULL;
133 }
134
135 mutex_unlock(&sev_bitmap_lock);
136}
137
138static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
139{
140 struct sev_data_decommission *decommission;
141 struct sev_data_deactivate *data;
142
143 if (!handle)
144 return;
145
146 data = kzalloc(sizeof(*data), GFP_KERNEL);
147 if (!data)
148 return;
149
150 /* deactivate handle */
151 data->handle = handle;
152
153 /* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */
154 down_read(&sev_deactivate_lock);
155 sev_guest_deactivate(data, NULL);
156 up_read(&sev_deactivate_lock);
157
158 kfree(data);
159
160 decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
161 if (!decommission)
162 return;
163
164 /* decommission handle */
165 decommission->handle = handle;
166 sev_guest_decommission(decommission, NULL);
167
168 kfree(decommission);
169}
170
171static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
172{
173 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
174 int asid, ret;
175
176 ret = -EBUSY;
177 if (unlikely(sev->active))
178 return ret;
179
180 asid = sev_asid_new();
181 if (asid < 0)
182 return ret;
183
184 ret = sev_platform_init(&argp->error);
185 if (ret)
186 goto e_free;
187
188 sev->active = true;
189 sev->asid = asid;
190 INIT_LIST_HEAD(&sev->regions_list);
191
192 return 0;
193
194e_free:
195 sev_asid_free(asid);
196 return ret;
197}
198
199static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
200{
201 struct sev_data_activate *data;
202 int asid = sev_get_asid(kvm);
203 int ret;
204
205 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
206 if (!data)
207 return -ENOMEM;
208
209 /* activate ASID on the given handle */
210 data->handle = handle;
211 data->asid = asid;
212 ret = sev_guest_activate(data, error);
213 kfree(data);
214
215 return ret;
216}
217
218static int __sev_issue_cmd(int fd, int id, void *data, int *error)
219{
220 struct fd f;
221 int ret;
222
223 f = fdget(fd);
224 if (!f.file)
225 return -EBADF;
226
227 ret = sev_issue_cmd_external_user(f.file, id, data, error);
228
229 fdput(f);
230 return ret;
231}
232
233static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
234{
235 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
236
237 return __sev_issue_cmd(sev->fd, id, data, error);
238}
239
240static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
241{
242 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
243 struct sev_data_launch_start *start;
244 struct kvm_sev_launch_start params;
245 void *dh_blob, *session_blob;
246 int *error = &argp->error;
247 int ret;
248
249 if (!sev_guest(kvm))
250 return -ENOTTY;
251
252 if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
253 return -EFAULT;
254
255 start = kzalloc(sizeof(*start), GFP_KERNEL_ACCOUNT);
256 if (!start)
257 return -ENOMEM;
258
259 dh_blob = NULL;
260 if (params.dh_uaddr) {
261 dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
262 if (IS_ERR(dh_blob)) {
263 ret = PTR_ERR(dh_blob);
264 goto e_free;
265 }
266
267 start->dh_cert_address = __sme_set(__pa(dh_blob));
268 start->dh_cert_len = params.dh_len;
269 }
270
271 session_blob = NULL;
272 if (params.session_uaddr) {
273 session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
274 if (IS_ERR(session_blob)) {
275 ret = PTR_ERR(session_blob);
276 goto e_free_dh;
277 }
278
279 start->session_address = __sme_set(__pa(session_blob));
280 start->session_len = params.session_len;
281 }
282
283 start->handle = params.handle;
284 start->policy = params.policy;
285
286 /* create memory encryption context */
287 ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
288 if (ret)
289 goto e_free_session;
290
291 /* Bind ASID to this guest */
292 ret = sev_bind_asid(kvm, start->handle, error);
293 if (ret)
294 goto e_free_session;
295
296 /* return handle to userspace */
297 params.handle = start->handle;
298 if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) {
299 sev_unbind_asid(kvm, start->handle);
300 ret = -EFAULT;
301 goto e_free_session;
302 }
303
304 sev->handle = start->handle;
305 sev->fd = argp->sev_fd;
306
307e_free_session:
308 kfree(session_blob);
309e_free_dh:
310 kfree(dh_blob);
311e_free:
312 kfree(start);
313 return ret;
314}
315
316static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
317 unsigned long ulen, unsigned long *n,
318 int write)
319{
320 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
78824fab
JH
321 unsigned long npages, size;
322 int npinned;
eaf78265
JR
323 unsigned long locked, lock_limit;
324 struct page **pages;
325 unsigned long first, last;
ff2bd9ff 326 int ret;
eaf78265
JR
327
328 if (ulen == 0 || uaddr + ulen < uaddr)
a8d908b5 329 return ERR_PTR(-EINVAL);
eaf78265
JR
330
331 /* Calculate number of pages. */
332 first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
333 last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
334 npages = (last - first + 1);
335
336 locked = sev->pages_locked + npages;
337 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
338 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
339 pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
a8d908b5 340 return ERR_PTR(-ENOMEM);
eaf78265
JR
341 }
342
78824fab 343 if (WARN_ON_ONCE(npages > INT_MAX))
a8d908b5 344 return ERR_PTR(-EINVAL);
78824fab 345
eaf78265
JR
346 /* Avoid using vmalloc for smaller buffers. */
347 size = npages * sizeof(struct page *);
348 if (size > PAGE_SIZE)
88dca4ca 349 pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
eaf78265
JR
350 else
351 pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
352
353 if (!pages)
a8d908b5 354 return ERR_PTR(-ENOMEM);
eaf78265
JR
355
356 /* Pin the user virtual address. */
dc42c8ae 357 npinned = pin_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
eaf78265
JR
358 if (npinned != npages) {
359 pr_err("SEV: Failure locking %lu pages.\n", npages);
ff2bd9ff 360 ret = -ENOMEM;
eaf78265
JR
361 goto err;
362 }
363
364 *n = npages;
365 sev->pages_locked = locked;
366
367 return pages;
368
369err:
ff2bd9ff 370 if (npinned > 0)
dc42c8ae 371 unpin_user_pages(pages, npinned);
eaf78265
JR
372
373 kvfree(pages);
ff2bd9ff 374 return ERR_PTR(ret);
eaf78265
JR
375}
376
377static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
378 unsigned long npages)
379{
380 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
381
dc42c8ae 382 unpin_user_pages(pages, npages);
eaf78265
JR
383 kvfree(pages);
384 sev->pages_locked -= npages;
385}
386
387static void sev_clflush_pages(struct page *pages[], unsigned long npages)
388{
389 uint8_t *page_virtual;
390 unsigned long i;
391
e1ebb2b4
KS
392 if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 ||
393 pages == NULL)
eaf78265
JR
394 return;
395
396 for (i = 0; i < npages; i++) {
397 page_virtual = kmap_atomic(pages[i]);
398 clflush_cache_range(page_virtual, PAGE_SIZE);
399 kunmap_atomic(page_virtual);
400 }
401}
402
403static unsigned long get_num_contig_pages(unsigned long idx,
404 struct page **inpages, unsigned long npages)
405{
406 unsigned long paddr, next_paddr;
407 unsigned long i = idx + 1, pages = 1;
408
409 /* find the number of contiguous pages starting from idx */
410 paddr = __sme_page_pa(inpages[idx]);
411 while (i < npages) {
412 next_paddr = __sme_page_pa(inpages[i++]);
413 if ((paddr + PAGE_SIZE) == next_paddr) {
414 pages++;
415 paddr = next_paddr;
416 continue;
417 }
418 break;
419 }
420
421 return pages;
422}
423
424static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
425{
426 unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
427 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
428 struct kvm_sev_launch_update_data params;
429 struct sev_data_launch_update_data *data;
430 struct page **inpages;
431 int ret;
432
433 if (!sev_guest(kvm))
434 return -ENOTTY;
435
436 if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
437 return -EFAULT;
438
439 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
440 if (!data)
441 return -ENOMEM;
442
443 vaddr = params.uaddr;
444 size = params.len;
445 vaddr_end = vaddr + size;
446
447 /* Lock the user memory. */
448 inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
ff2bd9ff
DC
449 if (IS_ERR(inpages)) {
450 ret = PTR_ERR(inpages);
eaf78265
JR
451 goto e_free;
452 }
453
454 /*
14e3dd8d
PB
455 * Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in
456 * place; the cache may contain the data that was written unencrypted.
eaf78265
JR
457 */
458 sev_clflush_pages(inpages, npages);
459
460 for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
461 int offset, len;
462
463 /*
464 * If the user buffer is not page-aligned, calculate the offset
465 * within the page.
466 */
467 offset = vaddr & (PAGE_SIZE - 1);
468
469 /* Calculate the number of pages that can be encrypted in one go. */
470 pages = get_num_contig_pages(i, inpages, npages);
471
472 len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
473
474 data->handle = sev->handle;
475 data->len = len;
476 data->address = __sme_page_pa(inpages[i]) + offset;
477 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
478 if (ret)
479 goto e_unpin;
480
481 size -= len;
482 next_vaddr = vaddr + len;
483 }
484
485e_unpin:
486 /* content of memory is updated, mark pages dirty */
487 for (i = 0; i < npages; i++) {
488 set_page_dirty_lock(inpages[i]);
489 mark_page_accessed(inpages[i]);
490 }
491 /* unlock the user pages */
492 sev_unpin_memory(kvm, inpages, npages);
493e_free:
494 kfree(data);
495 return ret;
496}
497
498static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
499{
500 void __user *measure = (void __user *)(uintptr_t)argp->data;
501 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
502 struct sev_data_launch_measure *data;
503 struct kvm_sev_launch_measure params;
504 void __user *p = NULL;
505 void *blob = NULL;
506 int ret;
507
508 if (!sev_guest(kvm))
509 return -ENOTTY;
510
511 if (copy_from_user(&params, measure, sizeof(params)))
512 return -EFAULT;
513
514 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
515 if (!data)
516 return -ENOMEM;
517
518 /* User wants to query the blob length */
519 if (!params.len)
520 goto cmd;
521
522 p = (void __user *)(uintptr_t)params.uaddr;
523 if (p) {
524 if (params.len > SEV_FW_BLOB_MAX_SIZE) {
525 ret = -EINVAL;
526 goto e_free;
527 }
528
529 ret = -ENOMEM;
530 blob = kmalloc(params.len, GFP_KERNEL);
531 if (!blob)
532 goto e_free;
533
534 data->address = __psp_pa(blob);
535 data->len = params.len;
536 }
537
538cmd:
539 data->handle = sev->handle;
540 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
541
542 /*
543 * If we query the session length, FW responded with expected data.
544 */
545 if (!params.len)
546 goto done;
547
548 if (ret)
549 goto e_free_blob;
550
551 if (blob) {
552 if (copy_to_user(p, blob, params.len))
553 ret = -EFAULT;
554 }
555
556done:
557 params.len = data->len;
558 if (copy_to_user(measure, &params, sizeof(params)))
559 ret = -EFAULT;
560e_free_blob:
561 kfree(blob);
562e_free:
563 kfree(data);
564 return ret;
565}
566
567static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
568{
569 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
570 struct sev_data_launch_finish *data;
571 int ret;
572
573 if (!sev_guest(kvm))
574 return -ENOTTY;
575
576 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
577 if (!data)
578 return -ENOMEM;
579
580 data->handle = sev->handle;
581 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
582
583 kfree(data);
584 return ret;
585}
586
587static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
588{
589 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
590 struct kvm_sev_guest_status params;
591 struct sev_data_guest_status *data;
592 int ret;
593
594 if (!sev_guest(kvm))
595 return -ENOTTY;
596
597 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
598 if (!data)
599 return -ENOMEM;
600
601 data->handle = sev->handle;
602 ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
603 if (ret)
604 goto e_free;
605
606 params.policy = data->policy;
607 params.state = data->state;
608 params.handle = data->handle;
609
610 if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
611 ret = -EFAULT;
612e_free:
613 kfree(data);
614 return ret;
615}
616
617static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
618 unsigned long dst, int size,
619 int *error, bool enc)
620{
621 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
622 struct sev_data_dbg *data;
623 int ret;
624
625 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
626 if (!data)
627 return -ENOMEM;
628
629 data->handle = sev->handle;
630 data->dst_addr = dst;
631 data->src_addr = src;
632 data->len = size;
633
634 ret = sev_issue_cmd(kvm,
635 enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
636 data, error);
637 kfree(data);
638 return ret;
639}
640
641static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
642 unsigned long dst_paddr, int sz, int *err)
643{
644 int offset;
645
646 /*
647 * Its safe to read more than we are asked, caller should ensure that
648 * destination has enough space.
649 */
650 src_paddr = round_down(src_paddr, 16);
651 offset = src_paddr & 15;
652 sz = round_up(sz + offset, 16);
653
654 return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
655}
656
657static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
658 unsigned long __user dst_uaddr,
659 unsigned long dst_paddr,
660 int size, int *err)
661{
662 struct page *tpage = NULL;
663 int ret, offset;
664
665 /* if inputs are not 16-byte then use intermediate buffer */
666 if (!IS_ALIGNED(dst_paddr, 16) ||
667 !IS_ALIGNED(paddr, 16) ||
668 !IS_ALIGNED(size, 16)) {
669 tpage = (void *)alloc_page(GFP_KERNEL);
670 if (!tpage)
671 return -ENOMEM;
672
673 dst_paddr = __sme_page_pa(tpage);
674 }
675
676 ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
677 if (ret)
678 goto e_free;
679
680 if (tpage) {
681 offset = paddr & 15;
682 if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
683 page_address(tpage) + offset, size))
684 ret = -EFAULT;
685 }
686
687e_free:
688 if (tpage)
689 __free_page(tpage);
690
691 return ret;
692}
693
694static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
695 unsigned long __user vaddr,
696 unsigned long dst_paddr,
697 unsigned long __user dst_vaddr,
698 int size, int *error)
699{
700 struct page *src_tpage = NULL;
701 struct page *dst_tpage = NULL;
702 int ret, len = size;
703
704 /* If source buffer is not aligned then use an intermediate buffer */
705 if (!IS_ALIGNED(vaddr, 16)) {
706 src_tpage = alloc_page(GFP_KERNEL);
707 if (!src_tpage)
708 return -ENOMEM;
709
710 if (copy_from_user(page_address(src_tpage),
711 (void __user *)(uintptr_t)vaddr, size)) {
712 __free_page(src_tpage);
713 return -EFAULT;
714 }
715
716 paddr = __sme_page_pa(src_tpage);
717 }
718
719 /*
720 * If destination buffer or length is not aligned then do read-modify-write:
721 * - decrypt destination in an intermediate buffer
722 * - copy the source buffer in an intermediate buffer
723 * - use the intermediate buffer as source buffer
724 */
725 if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
726 int dst_offset;
727
728 dst_tpage = alloc_page(GFP_KERNEL);
729 if (!dst_tpage) {
730 ret = -ENOMEM;
731 goto e_free;
732 }
733
734 ret = __sev_dbg_decrypt(kvm, dst_paddr,
735 __sme_page_pa(dst_tpage), size, error);
736 if (ret)
737 goto e_free;
738
739 /*
740 * If source is kernel buffer then use memcpy() otherwise
741 * copy_from_user().
742 */
743 dst_offset = dst_paddr & 15;
744
745 if (src_tpage)
746 memcpy(page_address(dst_tpage) + dst_offset,
747 page_address(src_tpage), size);
748 else {
749 if (copy_from_user(page_address(dst_tpage) + dst_offset,
750 (void __user *)(uintptr_t)vaddr, size)) {
751 ret = -EFAULT;
752 goto e_free;
753 }
754 }
755
756 paddr = __sme_page_pa(dst_tpage);
757 dst_paddr = round_down(dst_paddr, 16);
758 len = round_up(size, 16);
759 }
760
761 ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
762
763e_free:
764 if (src_tpage)
765 __free_page(src_tpage);
766 if (dst_tpage)
767 __free_page(dst_tpage);
768 return ret;
769}
770
771static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
772{
773 unsigned long vaddr, vaddr_end, next_vaddr;
774 unsigned long dst_vaddr;
775 struct page **src_p, **dst_p;
776 struct kvm_sev_dbg debug;
777 unsigned long n;
778 unsigned int size;
779 int ret;
780
781 if (!sev_guest(kvm))
782 return -ENOTTY;
783
784 if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
785 return -EFAULT;
786
787 if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr)
788 return -EINVAL;
789 if (!debug.dst_uaddr)
790 return -EINVAL;
791
792 vaddr = debug.src_uaddr;
793 size = debug.len;
794 vaddr_end = vaddr + size;
795 dst_vaddr = debug.dst_uaddr;
796
797 for (; vaddr < vaddr_end; vaddr = next_vaddr) {
798 int len, s_off, d_off;
799
800 /* lock userspace source and destination page */
801 src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
ff2bd9ff
DC
802 if (IS_ERR(src_p))
803 return PTR_ERR(src_p);
eaf78265
JR
804
805 dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
ff2bd9ff 806 if (IS_ERR(dst_p)) {
eaf78265 807 sev_unpin_memory(kvm, src_p, n);
ff2bd9ff 808 return PTR_ERR(dst_p);
eaf78265
JR
809 }
810
811 /*
14e3dd8d
PB
812 * Flush (on non-coherent CPUs) before DBG_{DE,EN}CRYPT read or modify
813 * the pages; flush the destination too so that future accesses do not
814 * see stale data.
eaf78265
JR
815 */
816 sev_clflush_pages(src_p, 1);
817 sev_clflush_pages(dst_p, 1);
818
819 /*
820 * Since user buffer may not be page aligned, calculate the
821 * offset within the page.
822 */
823 s_off = vaddr & ~PAGE_MASK;
824 d_off = dst_vaddr & ~PAGE_MASK;
825 len = min_t(size_t, (PAGE_SIZE - s_off), size);
826
827 if (dec)
828 ret = __sev_dbg_decrypt_user(kvm,
829 __sme_page_pa(src_p[0]) + s_off,
830 dst_vaddr,
831 __sme_page_pa(dst_p[0]) + d_off,
832 len, &argp->error);
833 else
834 ret = __sev_dbg_encrypt_user(kvm,
835 __sme_page_pa(src_p[0]) + s_off,
836 vaddr,
837 __sme_page_pa(dst_p[0]) + d_off,
838 dst_vaddr,
839 len, &argp->error);
840
841 sev_unpin_memory(kvm, src_p, n);
842 sev_unpin_memory(kvm, dst_p, n);
843
844 if (ret)
845 goto err;
846
847 next_vaddr = vaddr + len;
848 dst_vaddr = dst_vaddr + len;
849 size -= len;
850 }
851err:
852 return ret;
853}
854
855static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
856{
857 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
858 struct sev_data_launch_secret *data;
859 struct kvm_sev_launch_secret params;
860 struct page **pages;
861 void *blob, *hdr;
50085bee 862 unsigned long n, i;
eaf78265
JR
863 int ret, offset;
864
865 if (!sev_guest(kvm))
866 return -ENOTTY;
867
868 if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
869 return -EFAULT;
870
871 pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
a8d908b5
PB
872 if (IS_ERR(pages))
873 return PTR_ERR(pages);
eaf78265 874
50085bee 875 /*
14e3dd8d
PB
876 * Flush (on non-coherent CPUs) before LAUNCH_SECRET encrypts pages in
877 * place; the cache may contain the data that was written unencrypted.
50085bee
CC
878 */
879 sev_clflush_pages(pages, n);
880
eaf78265
JR
881 /*
882 * The secret must be copied into contiguous memory region, lets verify
883 * that userspace memory pages are contiguous before we issue command.
884 */
885 if (get_num_contig_pages(0, pages, n) != n) {
886 ret = -EINVAL;
887 goto e_unpin_memory;
888 }
889
890 ret = -ENOMEM;
891 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
892 if (!data)
893 goto e_unpin_memory;
894
895 offset = params.guest_uaddr & (PAGE_SIZE - 1);
896 data->guest_address = __sme_page_pa(pages[0]) + offset;
897 data->guest_len = params.guest_len;
898
899 blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
900 if (IS_ERR(blob)) {
901 ret = PTR_ERR(blob);
902 goto e_free;
903 }
904
905 data->trans_address = __psp_pa(blob);
906 data->trans_len = params.trans_len;
907
908 hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
909 if (IS_ERR(hdr)) {
910 ret = PTR_ERR(hdr);
911 goto e_free_blob;
912 }
913 data->hdr_address = __psp_pa(hdr);
914 data->hdr_len = params.hdr_len;
915
916 data->handle = sev->handle;
917 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
918
919 kfree(hdr);
920
921e_free_blob:
922 kfree(blob);
923e_free:
924 kfree(data);
925e_unpin_memory:
50085bee
CC
926 /* content of memory is updated, mark pages dirty */
927 for (i = 0; i < n; i++) {
928 set_page_dirty_lock(pages[i]);
929 mark_page_accessed(pages[i]);
930 }
eaf78265
JR
931 sev_unpin_memory(kvm, pages, n);
932 return ret;
933}
934
935int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
936{
937 struct kvm_sev_cmd sev_cmd;
938 int r;
939
916391a2 940 if (!svm_sev_enabled() || !sev)
eaf78265
JR
941 return -ENOTTY;
942
943 if (!argp)
944 return 0;
945
946 if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
947 return -EFAULT;
948
949 mutex_lock(&kvm->lock);
950
951 switch (sev_cmd.id) {
952 case KVM_SEV_INIT:
953 r = sev_guest_init(kvm, &sev_cmd);
954 break;
955 case KVM_SEV_LAUNCH_START:
956 r = sev_launch_start(kvm, &sev_cmd);
957 break;
958 case KVM_SEV_LAUNCH_UPDATE_DATA:
959 r = sev_launch_update_data(kvm, &sev_cmd);
960 break;
961 case KVM_SEV_LAUNCH_MEASURE:
962 r = sev_launch_measure(kvm, &sev_cmd);
963 break;
964 case KVM_SEV_LAUNCH_FINISH:
965 r = sev_launch_finish(kvm, &sev_cmd);
966 break;
967 case KVM_SEV_GUEST_STATUS:
968 r = sev_guest_status(kvm, &sev_cmd);
969 break;
970 case KVM_SEV_DBG_DECRYPT:
971 r = sev_dbg_crypt(kvm, &sev_cmd, true);
972 break;
973 case KVM_SEV_DBG_ENCRYPT:
974 r = sev_dbg_crypt(kvm, &sev_cmd, false);
975 break;
976 case KVM_SEV_LAUNCH_SECRET:
977 r = sev_launch_secret(kvm, &sev_cmd);
978 break;
979 default:
980 r = -EINVAL;
981 goto out;
982 }
983
984 if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
985 r = -EFAULT;
986
987out:
988 mutex_unlock(&kvm->lock);
989 return r;
990}
991
992int svm_register_enc_region(struct kvm *kvm,
993 struct kvm_enc_region *range)
994{
995 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
996 struct enc_region *region;
997 int ret = 0;
998
999 if (!sev_guest(kvm))
1000 return -ENOTTY;
1001
1002 if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
1003 return -EINVAL;
1004
1005 region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT);
1006 if (!region)
1007 return -ENOMEM;
1008
1009 region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
a8d908b5
PB
1010 if (IS_ERR(region->pages)) {
1011 ret = PTR_ERR(region->pages);
eaf78265
JR
1012 goto e_free;
1013 }
1014
1015 /*
1016 * The guest may change the memory encryption attribute from C=0 -> C=1
1017 * or vice versa for this memory range. Lets make sure caches are
1018 * flushed to ensure that guest data gets written into memory with
1019 * correct C-bit.
1020 */
1021 sev_clflush_pages(region->pages, region->npages);
1022
1023 region->uaddr = range->addr;
1024 region->size = range->size;
1025
1026 mutex_lock(&kvm->lock);
1027 list_add_tail(&region->list, &sev->regions_list);
1028 mutex_unlock(&kvm->lock);
1029
1030 return ret;
1031
1032e_free:
1033 kfree(region);
1034 return ret;
1035}
1036
1037static struct enc_region *
1038find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
1039{
1040 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1041 struct list_head *head = &sev->regions_list;
1042 struct enc_region *i;
1043
1044 list_for_each_entry(i, head, list) {
1045 if (i->uaddr == range->addr &&
1046 i->size == range->size)
1047 return i;
1048 }
1049
1050 return NULL;
1051}
1052
1053static void __unregister_enc_region_locked(struct kvm *kvm,
1054 struct enc_region *region)
1055{
1056 sev_unpin_memory(kvm, region->pages, region->npages);
1057 list_del(&region->list);
1058 kfree(region);
1059}
1060
1061int svm_unregister_enc_region(struct kvm *kvm,
1062 struct kvm_enc_region *range)
1063{
1064 struct enc_region *region;
1065 int ret;
1066
1067 mutex_lock(&kvm->lock);
1068
1069 if (!sev_guest(kvm)) {
1070 ret = -ENOTTY;
1071 goto failed;
1072 }
1073
1074 region = find_enc_region(kvm, range);
1075 if (!region) {
1076 ret = -EINVAL;
1077 goto failed;
1078 }
1079
1080 /*
1081 * Ensure that all guest tagged cache entries are flushed before
1082 * releasing the pages back to the system for use. CLFLUSH will
1083 * not do this, so issue a WBINVD.
1084 */
1085 wbinvd_on_all_cpus();
1086
1087 __unregister_enc_region_locked(kvm, region);
1088
1089 mutex_unlock(&kvm->lock);
1090 return 0;
1091
1092failed:
1093 mutex_unlock(&kvm->lock);
1094 return ret;
1095}
1096
1097void sev_vm_destroy(struct kvm *kvm)
1098{
1099 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1100 struct list_head *head = &sev->regions_list;
1101 struct list_head *pos, *q;
1102
1103 if (!sev_guest(kvm))
1104 return;
1105
1106 mutex_lock(&kvm->lock);
1107
1108 /*
1109 * Ensure that all guest tagged cache entries are flushed before
1110 * releasing the pages back to the system for use. CLFLUSH will
1111 * not do this, so issue a WBINVD.
1112 */
1113 wbinvd_on_all_cpus();
1114
1115 /*
1116 * if userspace was terminated before unregistering the memory regions
1117 * then lets unpin all the registered memory.
1118 */
1119 if (!list_empty(head)) {
1120 list_for_each_safe(pos, q, head) {
1121 __unregister_enc_region_locked(kvm,
1122 list_entry(pos, struct enc_region, list));
7be74942 1123 cond_resched();
eaf78265
JR
1124 }
1125 }
1126
1127 mutex_unlock(&kvm->lock);
1128
1129 sev_unbind_asid(kvm, sev->handle);
1130 sev_asid_free(sev->asid);
1131}
1132
916391a2 1133void __init sev_hardware_setup(void)
eaf78265 1134{
916391a2
TL
1135 unsigned int eax, ebx, ecx, edx;
1136 bool sev_es_supported = false;
1137 bool sev_supported = false;
1138
1139 /* Does the CPU support SEV? */
1140 if (!boot_cpu_has(X86_FEATURE_SEV))
1141 goto out;
1142
1143 /* Retrieve SEV CPUID information */
1144 cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);
1145
1edc1459
TL
1146 /* Set encryption bit location for SEV-ES guests */
1147 sev_enc_bit = ebx & 0x3f;
1148
eaf78265 1149 /* Maximum number of encrypted guests supported simultaneously */
916391a2 1150 max_sev_asid = ecx;
eaf78265 1151
9ef1530c 1152 if (!svm_sev_enabled())
916391a2 1153 goto out;
eaf78265
JR
1154
1155 /* Minimum ASID value that should be used for SEV guest */
916391a2 1156 min_sev_asid = edx;
eaf78265
JR
1157
1158 /* Initialize SEV ASID bitmaps */
1159 sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
1160 if (!sev_asid_bitmap)
916391a2 1161 goto out;
eaf78265
JR
1162
1163 sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
1164 if (!sev_reclaim_asid_bitmap)
916391a2 1165 goto out;
eaf78265 1166
916391a2
TL
1167 pr_info("SEV supported: %u ASIDs\n", max_sev_asid - min_sev_asid + 1);
1168 sev_supported = true;
eaf78265 1169
916391a2
TL
1170 /* SEV-ES support requested? */
1171 if (!sev_es)
1172 goto out;
1173
1174 /* Does the CPU support SEV-ES? */
1175 if (!boot_cpu_has(X86_FEATURE_SEV_ES))
1176 goto out;
1177
1178 /* Has the system been allocated ASIDs for SEV-ES? */
1179 if (min_sev_asid == 1)
1180 goto out;
1181
1182 pr_info("SEV-ES supported: %u ASIDs\n", min_sev_asid - 1);
1183 sev_es_supported = true;
1184
1185out:
1186 sev = sev_supported;
1187 sev_es = sev_es_supported;
eaf78265
JR
1188}
1189
1190void sev_hardware_teardown(void)
1191{
9ef1530c
PB
1192 if (!svm_sev_enabled())
1193 return;
1194
eaf78265
JR
1195 bitmap_free(sev_asid_bitmap);
1196 bitmap_free(sev_reclaim_asid_bitmap);
1197
1198 sev_flush_asids();
1199}
1200
add5e2f0
TL
1201/*
1202 * Pages used by hardware to hold guest encrypted state must be flushed before
1203 * returning them to the system.
1204 */
1205static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va,
1206 unsigned long len)
1207{
1208 /*
1209 * If hardware enforced cache coherency for encrypted mappings of the
1210 * same physical page is supported, nothing to do.
1211 */
1212 if (boot_cpu_has(X86_FEATURE_SME_COHERENT))
1213 return;
1214
1215 /*
1216 * If the VM Page Flush MSR is supported, use it to flush the page
1217 * (using the page virtual address and the guest ASID).
1218 */
1219 if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) {
1220 struct kvm_sev_info *sev;
1221 unsigned long va_start;
1222 u64 start, stop;
1223
1224 /* Align start and stop to page boundaries. */
1225 va_start = (unsigned long)va;
1226 start = (u64)va_start & PAGE_MASK;
1227 stop = PAGE_ALIGN((u64)va_start + len);
1228
1229 if (start < stop) {
1230 sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
1231
1232 while (start < stop) {
1233 wrmsrl(MSR_AMD64_VM_PAGE_FLUSH,
1234 start | sev->asid);
1235
1236 start += PAGE_SIZE;
1237 }
1238
1239 return;
1240 }
1241
1242 WARN(1, "Address overflow, using WBINVD\n");
1243 }
1244
1245 /*
1246 * Hardware should always have one of the above features,
1247 * but if not, use WBINVD and issue a warning.
1248 */
1249 WARN_ONCE(1, "Using WBINVD to flush guest memory\n");
1250 wbinvd_on_all_cpus();
1251}
1252
1253void sev_free_vcpu(struct kvm_vcpu *vcpu)
1254{
1255 struct vcpu_svm *svm;
1256
1257 if (!sev_es_guest(vcpu->kvm))
1258 return;
1259
1260 svm = to_svm(vcpu);
1261
1262 if (vcpu->arch.guest_state_protected)
1263 sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
1264 __free_page(virt_to_page(svm->vmsa));
1265}
1266
291bd20d
TL
1267static void dump_ghcb(struct vcpu_svm *svm)
1268{
1269 struct ghcb *ghcb = svm->ghcb;
1270 unsigned int nbits;
1271
1272 /* Re-use the dump_invalid_vmcb module parameter */
1273 if (!dump_invalid_vmcb) {
1274 pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
1275 return;
1276 }
1277
1278 nbits = sizeof(ghcb->save.valid_bitmap) * 8;
1279
1280 pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa);
1281 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
1282 ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb));
1283 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
1284 ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb));
1285 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
1286 ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb));
1287 pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
1288 ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
1289 pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
1290}
1291
1292static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
1293{
1294 struct kvm_vcpu *vcpu = &svm->vcpu;
1295 struct ghcb *ghcb = svm->ghcb;
1296
1297 /*
1298 * The GHCB protocol so far allows for the following data
1299 * to be returned:
1300 * GPRs RAX, RBX, RCX, RDX
1301 *
1302 * Copy their values to the GHCB if they are dirty.
1303 */
1304 if (kvm_register_is_dirty(vcpu, VCPU_REGS_RAX))
1305 ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]);
1306 if (kvm_register_is_dirty(vcpu, VCPU_REGS_RBX))
1307 ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
1308 if (kvm_register_is_dirty(vcpu, VCPU_REGS_RCX))
1309 ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
1310 if (kvm_register_is_dirty(vcpu, VCPU_REGS_RDX))
1311 ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
1312}
1313
1314static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
1315{
1316 struct vmcb_control_area *control = &svm->vmcb->control;
1317 struct kvm_vcpu *vcpu = &svm->vcpu;
1318 struct ghcb *ghcb = svm->ghcb;
1319 u64 exit_code;
1320
1321 /*
1322 * The GHCB protocol so far allows for the following data
1323 * to be supplied:
1324 * GPRs RAX, RBX, RCX, RDX
1325 * XCR0
1326 * CPL
1327 *
1328 * VMMCALL allows the guest to provide extra registers. KVM also
1329 * expects RSI for hypercalls, so include that, too.
1330 *
1331 * Copy their values to the appropriate location if supplied.
1332 */
1333 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
1334
1335 vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb);
1336 vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb);
1337 vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb);
1338 vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb);
1339 vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb);
1340
1341 svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb);
1342
1343 if (ghcb_xcr0_is_valid(ghcb)) {
1344 vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
1345 kvm_update_cpuid_runtime(vcpu);
1346 }
1347
1348 /* Copy the GHCB exit information into the VMCB fields */
1349 exit_code = ghcb_get_sw_exit_code(ghcb);
1350 control->exit_code = lower_32_bits(exit_code);
1351 control->exit_code_hi = upper_32_bits(exit_code);
1352 control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb);
1353 control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb);
1354
1355 /* Clear the valid entries fields */
1356 memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
1357}
1358
1359static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
1360{
1361 struct kvm_vcpu *vcpu;
1362 struct ghcb *ghcb;
1363 u64 exit_code = 0;
1364
1365 ghcb = svm->ghcb;
1366
1367 /* Only GHCB Usage code 0 is supported */
1368 if (ghcb->ghcb_usage)
1369 goto vmgexit_err;
1370
1371 /*
1372 * Retrieve the exit code now even though is may not be marked valid
1373 * as it could help with debugging.
1374 */
1375 exit_code = ghcb_get_sw_exit_code(ghcb);
1376
1377 if (!ghcb_sw_exit_code_is_valid(ghcb) ||
1378 !ghcb_sw_exit_info_1_is_valid(ghcb) ||
1379 !ghcb_sw_exit_info_2_is_valid(ghcb))
1380 goto vmgexit_err;
1381
1382 switch (ghcb_get_sw_exit_code(ghcb)) {
1383 case SVM_EXIT_READ_DR7:
1384 break;
1385 case SVM_EXIT_WRITE_DR7:
1386 if (!ghcb_rax_is_valid(ghcb))
1387 goto vmgexit_err;
1388 break;
1389 case SVM_EXIT_RDTSC:
1390 break;
1391 case SVM_EXIT_RDPMC:
1392 if (!ghcb_rcx_is_valid(ghcb))
1393 goto vmgexit_err;
1394 break;
1395 case SVM_EXIT_CPUID:
1396 if (!ghcb_rax_is_valid(ghcb) ||
1397 !ghcb_rcx_is_valid(ghcb))
1398 goto vmgexit_err;
1399 if (ghcb_get_rax(ghcb) == 0xd)
1400 if (!ghcb_xcr0_is_valid(ghcb))
1401 goto vmgexit_err;
1402 break;
1403 case SVM_EXIT_INVD:
1404 break;
1405 case SVM_EXIT_IOIO:
1406 if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
1407 if (!ghcb_rax_is_valid(ghcb))
1408 goto vmgexit_err;
1409 break;
1410 case SVM_EXIT_MSR:
1411 if (!ghcb_rcx_is_valid(ghcb))
1412 goto vmgexit_err;
1413 if (ghcb_get_sw_exit_info_1(ghcb)) {
1414 if (!ghcb_rax_is_valid(ghcb) ||
1415 !ghcb_rdx_is_valid(ghcb))
1416 goto vmgexit_err;
1417 }
1418 break;
1419 case SVM_EXIT_VMMCALL:
1420 if (!ghcb_rax_is_valid(ghcb) ||
1421 !ghcb_cpl_is_valid(ghcb))
1422 goto vmgexit_err;
1423 break;
1424 case SVM_EXIT_RDTSCP:
1425 break;
1426 case SVM_EXIT_WBINVD:
1427 break;
1428 case SVM_EXIT_MONITOR:
1429 if (!ghcb_rax_is_valid(ghcb) ||
1430 !ghcb_rcx_is_valid(ghcb) ||
1431 !ghcb_rdx_is_valid(ghcb))
1432 goto vmgexit_err;
1433 break;
1434 case SVM_EXIT_MWAIT:
1435 if (!ghcb_rax_is_valid(ghcb) ||
1436 !ghcb_rcx_is_valid(ghcb))
1437 goto vmgexit_err;
1438 break;
1439 case SVM_VMGEXIT_UNSUPPORTED_EVENT:
1440 break;
1441 default:
1442 goto vmgexit_err;
1443 }
1444
1445 return 0;
1446
1447vmgexit_err:
1448 vcpu = &svm->vcpu;
1449
1450 if (ghcb->ghcb_usage) {
1451 vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
1452 ghcb->ghcb_usage);
1453 } else {
1454 vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n",
1455 exit_code);
1456 dump_ghcb(svm);
1457 }
1458
1459 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1460 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
1461 vcpu->run->internal.ndata = 2;
1462 vcpu->run->internal.data[0] = exit_code;
1463 vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
1464
1465 return -EINVAL;
1466}
1467
1468static void pre_sev_es_run(struct vcpu_svm *svm)
1469{
1470 if (!svm->ghcb)
1471 return;
1472
d523ab6b
TL
1473 trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb);
1474
291bd20d
TL
1475 sev_es_sync_to_ghcb(svm);
1476
1477 kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true);
1478 svm->ghcb = NULL;
1479}
1480
eaf78265
JR
1481void pre_sev_run(struct vcpu_svm *svm, int cpu)
1482{
1483 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
1484 int asid = sev_get_asid(svm->vcpu.kvm);
1485
291bd20d
TL
1486 /* Perform any SEV-ES pre-run actions */
1487 pre_sev_es_run(svm);
1488
eaf78265 1489 /* Assign the asid allocated with this SEV guest */
dee734a7 1490 svm->asid = asid;
eaf78265
JR
1491
1492 /*
1493 * Flush guest TLB:
1494 *
1495 * 1) when different VMCB for the same ASID is to be run on the same host CPU.
1496 * 2) or this VMCB was executed on different host CPU in previous VMRUNs.
1497 */
1498 if (sd->sev_vmcbs[asid] == svm->vmcb &&
8a14fe4f 1499 svm->vcpu.arch.last_vmentry_cpu == cpu)
eaf78265
JR
1500 return;
1501
eaf78265
JR
1502 sd->sev_vmcbs[asid] = svm->vmcb;
1503 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
06e7852c 1504 vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
eaf78265 1505}
291bd20d 1506
d3694667
TL
1507static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask,
1508 unsigned int pos)
1509{
1510 svm->vmcb->control.ghcb_gpa &= ~(mask << pos);
1511 svm->vmcb->control.ghcb_gpa |= (value & mask) << pos;
1512}
1513
1514static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos)
1515{
1516 return (svm->vmcb->control.ghcb_gpa >> pos) & mask;
1517}
1518
1edc1459
TL
1519static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
1520{
1521 svm->vmcb->control.ghcb_gpa = value;
1522}
1523
291bd20d
TL
1524static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
1525{
1edc1459 1526 struct vmcb_control_area *control = &svm->vmcb->control;
d3694667 1527 struct kvm_vcpu *vcpu = &svm->vcpu;
1edc1459 1528 u64 ghcb_info;
d3694667 1529 int ret = 1;
1edc1459
TL
1530
1531 ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK;
1532
1533 switch (ghcb_info) {
1534 case GHCB_MSR_SEV_INFO_REQ:
1535 set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
1536 GHCB_VERSION_MIN,
1537 sev_enc_bit));
1538 break;
d3694667
TL
1539 case GHCB_MSR_CPUID_REQ: {
1540 u64 cpuid_fn, cpuid_reg, cpuid_value;
1541
1542 cpuid_fn = get_ghcb_msr_bits(svm,
1543 GHCB_MSR_CPUID_FUNC_MASK,
1544 GHCB_MSR_CPUID_FUNC_POS);
1545
1546 /* Initialize the registers needed by the CPUID intercept */
1547 vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn;
1548 vcpu->arch.regs[VCPU_REGS_RCX] = 0;
1549
1550 ret = svm_invoke_exit_handler(svm, SVM_EXIT_CPUID);
1551 if (!ret) {
1552 ret = -EINVAL;
1553 break;
1554 }
1555
1556 cpuid_reg = get_ghcb_msr_bits(svm,
1557 GHCB_MSR_CPUID_REG_MASK,
1558 GHCB_MSR_CPUID_REG_POS);
1559 if (cpuid_reg == 0)
1560 cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX];
1561 else if (cpuid_reg == 1)
1562 cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX];
1563 else if (cpuid_reg == 2)
1564 cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX];
1565 else
1566 cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX];
1567
1568 set_ghcb_msr_bits(svm, cpuid_value,
1569 GHCB_MSR_CPUID_VALUE_MASK,
1570 GHCB_MSR_CPUID_VALUE_POS);
1571
1572 set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP,
1573 GHCB_MSR_INFO_MASK,
1574 GHCB_MSR_INFO_POS);
1575 break;
1576 }
e1d71116
TL
1577 case GHCB_MSR_TERM_REQ: {
1578 u64 reason_set, reason_code;
1579
1580 reason_set = get_ghcb_msr_bits(svm,
1581 GHCB_MSR_TERM_REASON_SET_MASK,
1582 GHCB_MSR_TERM_REASON_SET_POS);
1583 reason_code = get_ghcb_msr_bits(svm,
1584 GHCB_MSR_TERM_REASON_MASK,
1585 GHCB_MSR_TERM_REASON_POS);
1586 pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
1587 reason_set, reason_code);
1588 fallthrough;
1589 }
1edc1459 1590 default:
d3694667 1591 ret = -EINVAL;
1edc1459
TL
1592 }
1593
d3694667 1594 return ret;
291bd20d
TL
1595}
1596
1597int sev_handle_vmgexit(struct vcpu_svm *svm)
1598{
1599 struct vmcb_control_area *control = &svm->vmcb->control;
1600 u64 ghcb_gpa, exit_code;
1601 struct ghcb *ghcb;
1602 int ret;
1603
1604 /* Validate the GHCB */
1605 ghcb_gpa = control->ghcb_gpa;
1606 if (ghcb_gpa & GHCB_MSR_INFO_MASK)
1607 return sev_handle_vmgexit_msr_protocol(svm);
1608
1609 if (!ghcb_gpa) {
1610 vcpu_unimpl(&svm->vcpu, "vmgexit: GHCB gpa is not set\n");
1611 return -EINVAL;
1612 }
1613
1614 if (kvm_vcpu_map(&svm->vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
1615 /* Unable to map GHCB from guest */
1616 vcpu_unimpl(&svm->vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
1617 ghcb_gpa);
1618 return -EINVAL;
1619 }
1620
1621 svm->ghcb = svm->ghcb_map.hva;
1622 ghcb = svm->ghcb_map.hva;
1623
d523ab6b
TL
1624 trace_kvm_vmgexit_enter(svm->vcpu.vcpu_id, ghcb);
1625
291bd20d
TL
1626 exit_code = ghcb_get_sw_exit_code(ghcb);
1627
1628 ret = sev_es_validate_vmgexit(svm);
1629 if (ret)
1630 return ret;
1631
1632 sev_es_sync_from_ghcb(svm);
1633 ghcb_set_sw_exit_info_1(ghcb, 0);
1634 ghcb_set_sw_exit_info_2(ghcb, 0);
1635
1636 ret = -EINVAL;
1637 switch (exit_code) {
1638 case SVM_VMGEXIT_UNSUPPORTED_EVENT:
1639 vcpu_unimpl(&svm->vcpu,
1640 "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
1641 control->exit_info_1, control->exit_info_2);
1642 break;
1643 default:
1644 ret = svm_invoke_exit_handler(svm, exit_code);
1645 }
1646
1647 return ret;
1648}