1 // SPDX-License-Identifier: GPL-2.0-only
3 * Kernel-based Virtual Machine driver for Linux
7 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
10 #include <linux/kvm_types.h>
11 #include <linux/kvm_host.h>
12 #include <linux/kernel.h>
13 #include <linux/highmem.h>
14 #include <linux/psp-sev.h>
15 #include <linux/pagemap.h>
16 #include <linux/swap.h>
17 #include <linux/processor.h>
18 #include <linux/trace_events.h>
19 #include <asm/fpu/internal.h>
21 #include <asm/trapnr.h>
29 #define __ex(x) __kvm_handle_fault_on_reboot(x)
31 static u8 sev_enc_bit
;
32 static int sev_flush_asids(void);
33 static DECLARE_RWSEM(sev_deactivate_lock
);
34 static DEFINE_MUTEX(sev_bitmap_lock
);
35 unsigned int max_sev_asid
;
36 static unsigned int min_sev_asid
;
37 static unsigned long *sev_asid_bitmap
;
38 static unsigned long *sev_reclaim_asid_bitmap
;
41 struct list_head list
;
48 static int sev_flush_asids(void)
53 * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail,
54 * so it must be guarded.
56 down_write(&sev_deactivate_lock
);
59 ret
= sev_guest_df_flush(&error
);
61 up_write(&sev_deactivate_lock
);
64 pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n", ret
, error
);
69 /* Must be called with the sev_bitmap_lock held */
70 static bool __sev_recycle_asids(int min_asid
, int max_asid
)
74 /* Check if there are any ASIDs to reclaim before performing a flush */
75 pos
= find_next_bit(sev_reclaim_asid_bitmap
, max_sev_asid
, min_asid
);
79 if (sev_flush_asids())
82 /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */
83 bitmap_xor(sev_asid_bitmap
, sev_asid_bitmap
, sev_reclaim_asid_bitmap
,
85 bitmap_zero(sev_reclaim_asid_bitmap
, max_sev_asid
);
90 static int sev_asid_new(bool es_active
)
92 int pos
, min_asid
, max_asid
;
95 mutex_lock(&sev_bitmap_lock
);
98 * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
99 * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
101 min_asid
= es_active
? 0 : min_sev_asid
- 1;
102 max_asid
= es_active
? min_sev_asid
- 1 : max_sev_asid
;
104 pos
= find_next_zero_bit(sev_asid_bitmap
, max_sev_asid
, min_asid
);
105 if (pos
>= max_asid
) {
106 if (retry
&& __sev_recycle_asids(min_asid
, max_asid
)) {
110 mutex_unlock(&sev_bitmap_lock
);
114 __set_bit(pos
, sev_asid_bitmap
);
116 mutex_unlock(&sev_bitmap_lock
);
121 static int sev_get_asid(struct kvm
*kvm
)
123 struct kvm_sev_info
*sev
= &to_kvm_svm(kvm
)->sev_info
;
128 static void sev_asid_free(int asid
)
130 struct svm_cpu_data
*sd
;
133 mutex_lock(&sev_bitmap_lock
);
136 __set_bit(pos
, sev_reclaim_asid_bitmap
);
138 for_each_possible_cpu(cpu
) {
139 sd
= per_cpu(svm_data
, cpu
);
140 sd
->sev_vmcbs
[pos
] = NULL
;
143 mutex_unlock(&sev_bitmap_lock
);
146 static void sev_unbind_asid(struct kvm
*kvm
, unsigned int handle
)
148 struct sev_data_decommission
*decommission
;
149 struct sev_data_deactivate
*data
;
154 data
= kzalloc(sizeof(*data
), GFP_KERNEL
);
158 /* deactivate handle */
159 data
->handle
= handle
;
161 /* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */
162 down_read(&sev_deactivate_lock
);
163 sev_guest_deactivate(data
, NULL
);
164 up_read(&sev_deactivate_lock
);
168 decommission
= kzalloc(sizeof(*decommission
), GFP_KERNEL
);
172 /* decommission handle */
173 decommission
->handle
= handle
;
174 sev_guest_decommission(decommission
, NULL
);
179 static int sev_guest_init(struct kvm
*kvm
, struct kvm_sev_cmd
*argp
)
181 struct kvm_sev_info
*sev
= &to_kvm_svm(kvm
)->sev_info
;
182 bool es_active
= argp
->id
== KVM_SEV_ES_INIT
;
185 if (kvm
->created_vcpus
)
189 if (unlikely(sev
->active
))
192 asid
= sev_asid_new(es_active
);
196 ret
= sev_platform_init(&argp
->error
);
201 sev
->es_active
= es_active
;
203 INIT_LIST_HEAD(&sev
->regions_list
);
212 static int sev_bind_asid(struct kvm
*kvm
, unsigned int handle
, int *error
)
214 struct sev_data_activate
*data
;
215 int asid
= sev_get_asid(kvm
);
218 data
= kzalloc(sizeof(*data
), GFP_KERNEL_ACCOUNT
);
222 /* activate ASID on the given handle */
223 data
->handle
= handle
;
225 ret
= sev_guest_activate(data
, error
);
231 static int __sev_issue_cmd(int fd
, int id
, void *data
, int *error
)
240 ret
= sev_issue_cmd_external_user(f
.file
, id
, data
, error
);
246 static int sev_issue_cmd(struct kvm
*kvm
, int id
, void *data
, int *error
)
248 struct kvm_sev_info
*sev
= &to_kvm_svm(kvm
)->sev_info
;
250 return __sev_issue_cmd(sev
->fd
, id
, data
, error
);
253 static int sev_launch_start(struct kvm
*kvm
, struct kvm_sev_cmd
*argp
)
255 struct kvm_sev_info
*sev
= &to_kvm_svm(kvm
)->sev_info
;
256 struct sev_data_launch_start
*start
;
257 struct kvm_sev_launch_start params
;
258 void *dh_blob
, *session_blob
;
259 int *error
= &argp
->error
;
265 if (copy_from_user(¶ms
, (void __user
*)(uintptr_t)argp
->data
, sizeof(params
)))
268 start
= kzalloc(sizeof(*start
), GFP_KERNEL_ACCOUNT
);
273 if (params
.dh_uaddr
) {
274 dh_blob
= psp_copy_user_blob(params
.dh_uaddr
, params
.dh_len
);
275 if (IS_ERR(dh_blob
)) {
276 ret
= PTR_ERR(dh_blob
);
280 start
->dh_cert_address
= __sme_set(__pa(dh_blob
));
281 start
->dh_cert_len
= params
.dh_len
;
285 if (params
.session_uaddr
) {
286 session_blob
= psp_copy_user_blob(params
.session_uaddr
, params
.session_len
);
287 if (IS_ERR(session_blob
)) {
288 ret
= PTR_ERR(session_blob
);
292 start
->session_address
= __sme_set(__pa(session_blob
));
293 start
->session_len
= params
.session_len
;
296 start
->handle
= params
.handle
;
297 start
->policy
= params
.policy
;
299 /* create memory encryption context */
300 ret
= __sev_issue_cmd(argp
->sev_fd
, SEV_CMD_LAUNCH_START
, start
, error
);
304 /* Bind ASID to this guest */
305 ret
= sev_bind_asid(kvm
, start
->handle
, error
);
309 /* return handle to userspace */
310 params
.handle
= start
->handle
;
311 if (copy_to_user((void __user
*)(uintptr_t)argp
->data
, ¶ms
, sizeof(params
))) {
312 sev_unbind_asid(kvm
, start
->handle
);
317 sev
->handle
= start
->handle
;
318 sev
->fd
= argp
->sev_fd
;
329 static struct page
**sev_pin_memory(struct kvm
*kvm
, unsigned long uaddr
,
330 unsigned long ulen
, unsigned long *n
,
333 struct kvm_sev_info
*sev
= &to_kvm_svm(kvm
)->sev_info
;
334 unsigned long npages
, size
;
336 unsigned long locked
, lock_limit
;
338 unsigned long first
, last
;
341 lockdep_assert_held(&kvm
->lock
);
343 if (ulen
== 0 || uaddr
+ ulen
< uaddr
)
344 return ERR_PTR(-EINVAL
);
346 /* Calculate number of pages. */
347 first
= (uaddr
& PAGE_MASK
) >> PAGE_SHIFT
;
348 last
= ((uaddr
+ ulen
- 1) & PAGE_MASK
) >> PAGE_SHIFT
;
349 npages
= (last
- first
+ 1);
351 locked
= sev
->pages_locked
+ npages
;
352 lock_limit
= rlimit(RLIMIT_MEMLOCK
) >> PAGE_SHIFT
;
353 if (locked
> lock_limit
&& !capable(CAP_IPC_LOCK
)) {
354 pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked
, lock_limit
);
355 return ERR_PTR(-ENOMEM
);
358 if (WARN_ON_ONCE(npages
> INT_MAX
))
359 return ERR_PTR(-EINVAL
);
361 /* Avoid using vmalloc for smaller buffers. */
362 size
= npages
* sizeof(struct page
*);
363 if (size
> PAGE_SIZE
)
364 pages
= __vmalloc(size
, GFP_KERNEL_ACCOUNT
| __GFP_ZERO
);
366 pages
= kmalloc(size
, GFP_KERNEL_ACCOUNT
);
369 return ERR_PTR(-ENOMEM
);
371 /* Pin the user virtual address. */
372 npinned
= pin_user_pages_fast(uaddr
, npages
, write
? FOLL_WRITE
: 0, pages
);
373 if (npinned
!= npages
) {
374 pr_err("SEV: Failure locking %lu pages.\n", npages
);
380 sev
->pages_locked
= locked
;
386 unpin_user_pages(pages
, npinned
);
392 static void sev_unpin_memory(struct kvm
*kvm
, struct page
**pages
,
393 unsigned long npages
)
395 struct kvm_sev_info
*sev
= &to_kvm_svm(kvm
)->sev_info
;
397 unpin_user_pages(pages
, npages
);
399 sev
->pages_locked
-= npages
;
402 static void sev_clflush_pages(struct page
*pages
[], unsigned long npages
)
404 uint8_t *page_virtual
;
407 if (this_cpu_has(X86_FEATURE_SME_COHERENT
) || npages
== 0 ||
411 for (i
= 0; i
< npages
; i
++) {
412 page_virtual
= kmap_atomic(pages
[i
]);
413 clflush_cache_range(page_virtual
, PAGE_SIZE
);
414 kunmap_atomic(page_virtual
);
418 static unsigned long get_num_contig_pages(unsigned long idx
,
419 struct page
**inpages
, unsigned long npages
)
421 unsigned long paddr
, next_paddr
;
422 unsigned long i
= idx
+ 1, pages
= 1;
424 /* find the number of contiguous pages starting from idx */
425 paddr
= __sme_page_pa(inpages
[idx
]);
427 next_paddr
= __sme_page_pa(inpages
[i
++]);
428 if ((paddr
+ PAGE_SIZE
) == next_paddr
) {
439 static int sev_launch_update_data(struct kvm
*kvm
, struct kvm_sev_cmd
*argp
)
441 unsigned long vaddr
, vaddr_end
, next_vaddr
, npages
, pages
, size
, i
;
442 struct kvm_sev_info
*sev
= &to_kvm_svm(kvm
)->sev_info
;
443 struct kvm_sev_launch_update_data params
;
444 struct sev_data_launch_update_data
*data
;
445 struct page
**inpages
;
451 if (copy_from_user(¶ms
, (void __user
*)(uintptr_t)argp
->data
, sizeof(params
)))
454 data
= kzalloc(sizeof(*data
), GFP_KERNEL_ACCOUNT
);
458 vaddr
= params
.uaddr
;
460 vaddr_end
= vaddr
+ size
;
462 /* Lock the user memory. */
463 inpages
= sev_pin_memory(kvm
, vaddr
, size
, &npages
, 1);
464 if (IS_ERR(inpages
)) {
465 ret
= PTR_ERR(inpages
);
470 * Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in
471 * place; the cache may contain the data that was written unencrypted.
473 sev_clflush_pages(inpages
, npages
);
475 for (i
= 0; vaddr
< vaddr_end
; vaddr
= next_vaddr
, i
+= pages
) {
479 * If the user buffer is not page-aligned, calculate the offset
482 offset
= vaddr
& (PAGE_SIZE
- 1);
484 /* Calculate the number of pages that can be encrypted in one go. */
485 pages
= get_num_contig_pages(i
, inpages
, npages
);
487 len
= min_t(size_t, ((pages
* PAGE_SIZE
) - offset
), size
);
489 data
->handle
= sev
->handle
;
491 data
->address
= __sme_page_pa(inpages
[i
]) + offset
;
492 ret
= sev_issue_cmd(kvm
, SEV_CMD_LAUNCH_UPDATE_DATA
, data
, &argp
->error
);
497 next_vaddr
= vaddr
+ len
;
501 /* content of memory is updated, mark pages dirty */
502 for (i
= 0; i
< npages
; i
++) {
503 set_page_dirty_lock(inpages
[i
]);
504 mark_page_accessed(inpages
[i
]);
506 /* unlock the user pages */
507 sev_unpin_memory(kvm
, inpages
, npages
);
513 static int sev_es_sync_vmsa(struct vcpu_svm
*svm
)
515 struct vmcb_save_area
*save
= &svm
->vmcb
->save
;
517 /* Check some debug related fields before encrypting the VMSA */
518 if (svm
->vcpu
.guest_debug
|| (save
->dr7
& ~DR7_FIXED_1
))
521 /* Sync registgers */
522 save
->rax
= svm
->vcpu
.arch
.regs
[VCPU_REGS_RAX
];
523 save
->rbx
= svm
->vcpu
.arch
.regs
[VCPU_REGS_RBX
];
524 save
->rcx
= svm
->vcpu
.arch
.regs
[VCPU_REGS_RCX
];
525 save
->rdx
= svm
->vcpu
.arch
.regs
[VCPU_REGS_RDX
];
526 save
->rsp
= svm
->vcpu
.arch
.regs
[VCPU_REGS_RSP
];
527 save
->rbp
= svm
->vcpu
.arch
.regs
[VCPU_REGS_RBP
];
528 save
->rsi
= svm
->vcpu
.arch
.regs
[VCPU_REGS_RSI
];
529 save
->rdi
= svm
->vcpu
.arch
.regs
[VCPU_REGS_RDI
];
531 save
->r8
= svm
->vcpu
.arch
.regs
[VCPU_REGS_R8
];
532 save
->r9
= svm
->vcpu
.arch
.regs
[VCPU_REGS_R9
];
533 save
->r10
= svm
->vcpu
.arch
.regs
[VCPU_REGS_R10
];
534 save
->r11
= svm
->vcpu
.arch
.regs
[VCPU_REGS_R11
];
535 save
->r12
= svm
->vcpu
.arch
.regs
[VCPU_REGS_R12
];
536 save
->r13
= svm
->vcpu
.arch
.regs
[VCPU_REGS_R13
];
537 save
->r14
= svm
->vcpu
.arch
.regs
[VCPU_REGS_R14
];
538 save
->r15
= svm
->vcpu
.arch
.regs
[VCPU_REGS_R15
];
540 save
->rip
= svm
->vcpu
.arch
.regs
[VCPU_REGS_RIP
];
542 /* Sync some non-GPR registers before encrypting */
543 save
->xcr0
= svm
->vcpu
.arch
.xcr0
;
544 save
->pkru
= svm
->vcpu
.arch
.pkru
;
545 save
->xss
= svm
->vcpu
.arch
.ia32_xss
;
548 * SEV-ES will use a VMSA that is pointed to by the VMCB, not
549 * the traditional VMSA that is part of the VMCB. Copy the
550 * traditional VMSA as it has been built so far (in prep
551 * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
553 memcpy(svm
->vmsa
, save
, sizeof(*save
));
558 static int sev_launch_update_vmsa(struct kvm
*kvm
, struct kvm_sev_cmd
*argp
)
560 struct kvm_sev_info
*sev
= &to_kvm_svm(kvm
)->sev_info
;
561 struct sev_data_launch_update_vmsa
*vmsa
;
562 struct kvm_vcpu
*vcpu
;
565 if (!sev_es_guest(kvm
))
568 vmsa
= kzalloc(sizeof(*vmsa
), GFP_KERNEL
);
572 kvm_for_each_vcpu(i
, vcpu
, kvm
) {
573 struct vcpu_svm
*svm
= to_svm(vcpu
);
575 /* Perform some pre-encryption checks against the VMSA */
576 ret
= sev_es_sync_vmsa(svm
);
581 * The LAUNCH_UPDATE_VMSA command will perform in-place
582 * encryption of the VMSA memory content (i.e it will write
583 * the same memory region with the guest's key), so invalidate
586 clflush_cache_range(svm
->vmsa
, PAGE_SIZE
);
588 vmsa
->handle
= sev
->handle
;
589 vmsa
->address
= __sme_pa(svm
->vmsa
);
590 vmsa
->len
= PAGE_SIZE
;
591 ret
= sev_issue_cmd(kvm
, SEV_CMD_LAUNCH_UPDATE_VMSA
, vmsa
,
596 svm
->vcpu
.arch
.guest_state_protected
= true;
604 static int sev_launch_measure(struct kvm
*kvm
, struct kvm_sev_cmd
*argp
)
606 void __user
*measure
= (void __user
*)(uintptr_t)argp
->data
;
607 struct kvm_sev_info
*sev
= &to_kvm_svm(kvm
)->sev_info
;
608 struct sev_data_launch_measure
*data
;
609 struct kvm_sev_launch_measure params
;
610 void __user
*p
= NULL
;
617 if (copy_from_user(¶ms
, measure
, sizeof(params
)))
620 data
= kzalloc(sizeof(*data
), GFP_KERNEL_ACCOUNT
);
624 /* User wants to query the blob length */
628 p
= (void __user
*)(uintptr_t)params
.uaddr
;
630 if (params
.len
> SEV_FW_BLOB_MAX_SIZE
) {
636 blob
= kmalloc(params
.len
, GFP_KERNEL
);
640 data
->address
= __psp_pa(blob
);
641 data
->len
= params
.len
;
645 data
->handle
= sev
->handle
;
646 ret
= sev_issue_cmd(kvm
, SEV_CMD_LAUNCH_MEASURE
, data
, &argp
->error
);
649 * If we query the session length, FW responded with expected data.
658 if (copy_to_user(p
, blob
, params
.len
))
663 params
.len
= data
->len
;
664 if (copy_to_user(measure
, ¶ms
, sizeof(params
)))
673 static int sev_launch_finish(struct kvm
*kvm
, struct kvm_sev_cmd
*argp
)
675 struct kvm_sev_info
*sev
= &to_kvm_svm(kvm
)->sev_info
;
676 struct sev_data_launch_finish
*data
;
682 data
= kzalloc(sizeof(*data
), GFP_KERNEL_ACCOUNT
);
686 data
->handle
= sev
->handle
;
687 ret
= sev_issue_cmd(kvm
, SEV_CMD_LAUNCH_FINISH
, data
, &argp
->error
);
693 static int sev_guest_status(struct kvm
*kvm
, struct kvm_sev_cmd
*argp
)
695 struct kvm_sev_info
*sev
= &to_kvm_svm(kvm
)->sev_info
;
696 struct kvm_sev_guest_status params
;
697 struct sev_data_guest_status
*data
;
703 data
= kzalloc(sizeof(*data
), GFP_KERNEL_ACCOUNT
);
707 data
->handle
= sev
->handle
;
708 ret
= sev_issue_cmd(kvm
, SEV_CMD_GUEST_STATUS
, data
, &argp
->error
);
712 params
.policy
= data
->policy
;
713 params
.state
= data
->state
;
714 params
.handle
= data
->handle
;
716 if (copy_to_user((void __user
*)(uintptr_t)argp
->data
, ¶ms
, sizeof(params
)))
723 static int __sev_issue_dbg_cmd(struct kvm
*kvm
, unsigned long src
,
724 unsigned long dst
, int size
,
725 int *error
, bool enc
)
727 struct kvm_sev_info
*sev
= &to_kvm_svm(kvm
)->sev_info
;
728 struct sev_data_dbg
*data
;
731 data
= kzalloc(sizeof(*data
), GFP_KERNEL_ACCOUNT
);
735 data
->handle
= sev
->handle
;
736 data
->dst_addr
= dst
;
737 data
->src_addr
= src
;
740 ret
= sev_issue_cmd(kvm
,
741 enc
? SEV_CMD_DBG_ENCRYPT
: SEV_CMD_DBG_DECRYPT
,
747 static int __sev_dbg_decrypt(struct kvm
*kvm
, unsigned long src_paddr
,
748 unsigned long dst_paddr
, int sz
, int *err
)
753 * Its safe to read more than we are asked, caller should ensure that
754 * destination has enough space.
756 offset
= src_paddr
& 15;
757 src_paddr
= round_down(src_paddr
, 16);
758 sz
= round_up(sz
+ offset
, 16);
760 return __sev_issue_dbg_cmd(kvm
, src_paddr
, dst_paddr
, sz
, err
, false);
763 static int __sev_dbg_decrypt_user(struct kvm
*kvm
, unsigned long paddr
,
764 unsigned long __user dst_uaddr
,
765 unsigned long dst_paddr
,
768 struct page
*tpage
= NULL
;
771 /* if inputs are not 16-byte then use intermediate buffer */
772 if (!IS_ALIGNED(dst_paddr
, 16) ||
773 !IS_ALIGNED(paddr
, 16) ||
774 !IS_ALIGNED(size
, 16)) {
775 tpage
= (void *)alloc_page(GFP_KERNEL
);
779 dst_paddr
= __sme_page_pa(tpage
);
782 ret
= __sev_dbg_decrypt(kvm
, paddr
, dst_paddr
, size
, err
);
788 if (copy_to_user((void __user
*)(uintptr_t)dst_uaddr
,
789 page_address(tpage
) + offset
, size
))
800 static int __sev_dbg_encrypt_user(struct kvm
*kvm
, unsigned long paddr
,
801 unsigned long __user vaddr
,
802 unsigned long dst_paddr
,
803 unsigned long __user dst_vaddr
,
804 int size
, int *error
)
806 struct page
*src_tpage
= NULL
;
807 struct page
*dst_tpage
= NULL
;
810 /* If source buffer is not aligned then use an intermediate buffer */
811 if (!IS_ALIGNED(vaddr
, 16)) {
812 src_tpage
= alloc_page(GFP_KERNEL
);
816 if (copy_from_user(page_address(src_tpage
),
817 (void __user
*)(uintptr_t)vaddr
, size
)) {
818 __free_page(src_tpage
);
822 paddr
= __sme_page_pa(src_tpage
);
826 * If destination buffer or length is not aligned then do read-modify-write:
827 * - decrypt destination in an intermediate buffer
828 * - copy the source buffer in an intermediate buffer
829 * - use the intermediate buffer as source buffer
831 if (!IS_ALIGNED(dst_vaddr
, 16) || !IS_ALIGNED(size
, 16)) {
834 dst_tpage
= alloc_page(GFP_KERNEL
);
840 ret
= __sev_dbg_decrypt(kvm
, dst_paddr
,
841 __sme_page_pa(dst_tpage
), size
, error
);
846 * If source is kernel buffer then use memcpy() otherwise
849 dst_offset
= dst_paddr
& 15;
852 memcpy(page_address(dst_tpage
) + dst_offset
,
853 page_address(src_tpage
), size
);
855 if (copy_from_user(page_address(dst_tpage
) + dst_offset
,
856 (void __user
*)(uintptr_t)vaddr
, size
)) {
862 paddr
= __sme_page_pa(dst_tpage
);
863 dst_paddr
= round_down(dst_paddr
, 16);
864 len
= round_up(size
, 16);
867 ret
= __sev_issue_dbg_cmd(kvm
, paddr
, dst_paddr
, len
, error
, true);
871 __free_page(src_tpage
);
873 __free_page(dst_tpage
);
877 static int sev_dbg_crypt(struct kvm
*kvm
, struct kvm_sev_cmd
*argp
, bool dec
)
879 unsigned long vaddr
, vaddr_end
, next_vaddr
;
880 unsigned long dst_vaddr
;
881 struct page
**src_p
, **dst_p
;
882 struct kvm_sev_dbg debug
;
890 if (copy_from_user(&debug
, (void __user
*)(uintptr_t)argp
->data
, sizeof(debug
)))
893 if (!debug
.len
|| debug
.src_uaddr
+ debug
.len
< debug
.src_uaddr
)
895 if (!debug
.dst_uaddr
)
898 vaddr
= debug
.src_uaddr
;
900 vaddr_end
= vaddr
+ size
;
901 dst_vaddr
= debug
.dst_uaddr
;
903 for (; vaddr
< vaddr_end
; vaddr
= next_vaddr
) {
904 int len
, s_off
, d_off
;
906 /* lock userspace source and destination page */
907 src_p
= sev_pin_memory(kvm
, vaddr
& PAGE_MASK
, PAGE_SIZE
, &n
, 0);
909 return PTR_ERR(src_p
);
911 dst_p
= sev_pin_memory(kvm
, dst_vaddr
& PAGE_MASK
, PAGE_SIZE
, &n
, 1);
913 sev_unpin_memory(kvm
, src_p
, n
);
914 return PTR_ERR(dst_p
);
918 * Flush (on non-coherent CPUs) before DBG_{DE,EN}CRYPT read or modify
919 * the pages; flush the destination too so that future accesses do not
922 sev_clflush_pages(src_p
, 1);
923 sev_clflush_pages(dst_p
, 1);
926 * Since user buffer may not be page aligned, calculate the
927 * offset within the page.
929 s_off
= vaddr
& ~PAGE_MASK
;
930 d_off
= dst_vaddr
& ~PAGE_MASK
;
931 len
= min_t(size_t, (PAGE_SIZE
- s_off
), size
);
934 ret
= __sev_dbg_decrypt_user(kvm
,
935 __sme_page_pa(src_p
[0]) + s_off
,
937 __sme_page_pa(dst_p
[0]) + d_off
,
940 ret
= __sev_dbg_encrypt_user(kvm
,
941 __sme_page_pa(src_p
[0]) + s_off
,
943 __sme_page_pa(dst_p
[0]) + d_off
,
947 sev_unpin_memory(kvm
, src_p
, n
);
948 sev_unpin_memory(kvm
, dst_p
, n
);
953 next_vaddr
= vaddr
+ len
;
954 dst_vaddr
= dst_vaddr
+ len
;
961 static int sev_launch_secret(struct kvm
*kvm
, struct kvm_sev_cmd
*argp
)
963 struct kvm_sev_info
*sev
= &to_kvm_svm(kvm
)->sev_info
;
964 struct sev_data_launch_secret
*data
;
965 struct kvm_sev_launch_secret params
;
974 if (copy_from_user(¶ms
, (void __user
*)(uintptr_t)argp
->data
, sizeof(params
)))
977 pages
= sev_pin_memory(kvm
, params
.guest_uaddr
, params
.guest_len
, &n
, 1);
979 return PTR_ERR(pages
);
982 * Flush (on non-coherent CPUs) before LAUNCH_SECRET encrypts pages in
983 * place; the cache may contain the data that was written unencrypted.
985 sev_clflush_pages(pages
, n
);
988 * The secret must be copied into contiguous memory region, lets verify
989 * that userspace memory pages are contiguous before we issue command.
991 if (get_num_contig_pages(0, pages
, n
) != n
) {
997 data
= kzalloc(sizeof(*data
), GFP_KERNEL_ACCOUNT
);
1001 offset
= params
.guest_uaddr
& (PAGE_SIZE
- 1);
1002 data
->guest_address
= __sme_page_pa(pages
[0]) + offset
;
1003 data
->guest_len
= params
.guest_len
;
1005 blob
= psp_copy_user_blob(params
.trans_uaddr
, params
.trans_len
);
1007 ret
= PTR_ERR(blob
);
1011 data
->trans_address
= __psp_pa(blob
);
1012 data
->trans_len
= params
.trans_len
;
1014 hdr
= psp_copy_user_blob(params
.hdr_uaddr
, params
.hdr_len
);
1019 data
->hdr_address
= __psp_pa(hdr
);
1020 data
->hdr_len
= params
.hdr_len
;
1022 data
->handle
= sev
->handle
;
1023 ret
= sev_issue_cmd(kvm
, SEV_CMD_LAUNCH_UPDATE_SECRET
, data
, &argp
->error
);
1032 /* content of memory is updated, mark pages dirty */
1033 for (i
= 0; i
< n
; i
++) {
1034 set_page_dirty_lock(pages
[i
]);
1035 mark_page_accessed(pages
[i
]);
1037 sev_unpin_memory(kvm
, pages
, n
);
1041 static int sev_get_attestation_report(struct kvm
*kvm
, struct kvm_sev_cmd
*argp
)
1043 void __user
*report
= (void __user
*)(uintptr_t)argp
->data
;
1044 struct kvm_sev_info
*sev
= &to_kvm_svm(kvm
)->sev_info
;
1045 struct sev_data_attestation_report
*data
;
1046 struct kvm_sev_attestation_report params
;
1051 if (!sev_guest(kvm
))
1054 if (copy_from_user(¶ms
, (void __user
*)(uintptr_t)argp
->data
, sizeof(params
)))
1057 data
= kzalloc(sizeof(*data
), GFP_KERNEL_ACCOUNT
);
1061 /* User wants to query the blob length */
1065 p
= (void __user
*)(uintptr_t)params
.uaddr
;
1067 if (params
.len
> SEV_FW_BLOB_MAX_SIZE
) {
1073 blob
= kmalloc(params
.len
, GFP_KERNEL
);
1077 data
->address
= __psp_pa(blob
);
1078 data
->len
= params
.len
;
1079 memcpy(data
->mnonce
, params
.mnonce
, sizeof(params
.mnonce
));
1082 data
->handle
= sev
->handle
;
1083 ret
= sev_issue_cmd(kvm
, SEV_CMD_ATTESTATION_REPORT
, data
, &argp
->error
);
1085 * If we query the session length, FW responded with expected data.
1094 if (copy_to_user(p
, blob
, params
.len
))
1099 params
.len
= data
->len
;
1100 if (copy_to_user(report
, ¶ms
, sizeof(params
)))
1109 int svm_mem_enc_op(struct kvm
*kvm
, void __user
*argp
)
1111 struct kvm_sev_cmd sev_cmd
;
1114 if (!svm_sev_enabled() || !sev
)
1120 if (copy_from_user(&sev_cmd
, argp
, sizeof(struct kvm_sev_cmd
)))
1123 mutex_lock(&kvm
->lock
);
1125 switch (sev_cmd
.id
) {
1126 case KVM_SEV_ES_INIT
:
1133 r
= sev_guest_init(kvm
, &sev_cmd
);
1135 case KVM_SEV_LAUNCH_START
:
1136 r
= sev_launch_start(kvm
, &sev_cmd
);
1138 case KVM_SEV_LAUNCH_UPDATE_DATA
:
1139 r
= sev_launch_update_data(kvm
, &sev_cmd
);
1141 case KVM_SEV_LAUNCH_UPDATE_VMSA
:
1142 r
= sev_launch_update_vmsa(kvm
, &sev_cmd
);
1144 case KVM_SEV_LAUNCH_MEASURE
:
1145 r
= sev_launch_measure(kvm
, &sev_cmd
);
1147 case KVM_SEV_LAUNCH_FINISH
:
1148 r
= sev_launch_finish(kvm
, &sev_cmd
);
1150 case KVM_SEV_GUEST_STATUS
:
1151 r
= sev_guest_status(kvm
, &sev_cmd
);
1153 case KVM_SEV_DBG_DECRYPT
:
1154 r
= sev_dbg_crypt(kvm
, &sev_cmd
, true);
1156 case KVM_SEV_DBG_ENCRYPT
:
1157 r
= sev_dbg_crypt(kvm
, &sev_cmd
, false);
1159 case KVM_SEV_LAUNCH_SECRET
:
1160 r
= sev_launch_secret(kvm
, &sev_cmd
);
1162 case KVM_SEV_GET_ATTESTATION_REPORT
:
1163 r
= sev_get_attestation_report(kvm
, &sev_cmd
);
1170 if (copy_to_user(argp
, &sev_cmd
, sizeof(struct kvm_sev_cmd
)))
1174 mutex_unlock(&kvm
->lock
);
1178 int svm_register_enc_region(struct kvm
*kvm
,
1179 struct kvm_enc_region
*range
)
1181 struct kvm_sev_info
*sev
= &to_kvm_svm(kvm
)->sev_info
;
1182 struct enc_region
*region
;
1185 if (!sev_guest(kvm
))
1188 if (range
->addr
> ULONG_MAX
|| range
->size
> ULONG_MAX
)
1191 region
= kzalloc(sizeof(*region
), GFP_KERNEL_ACCOUNT
);
1195 mutex_lock(&kvm
->lock
);
1196 region
->pages
= sev_pin_memory(kvm
, range
->addr
, range
->size
, ®ion
->npages
, 1);
1197 if (IS_ERR(region
->pages
)) {
1198 ret
= PTR_ERR(region
->pages
);
1199 mutex_unlock(&kvm
->lock
);
1203 region
->uaddr
= range
->addr
;
1204 region
->size
= range
->size
;
1206 list_add_tail(®ion
->list
, &sev
->regions_list
);
1207 mutex_unlock(&kvm
->lock
);
1210 * The guest may change the memory encryption attribute from C=0 -> C=1
1211 * or vice versa for this memory range. Lets make sure caches are
1212 * flushed to ensure that guest data gets written into memory with
1215 sev_clflush_pages(region
->pages
, region
->npages
);
1224 static struct enc_region
*
1225 find_enc_region(struct kvm
*kvm
, struct kvm_enc_region
*range
)
1227 struct kvm_sev_info
*sev
= &to_kvm_svm(kvm
)->sev_info
;
1228 struct list_head
*head
= &sev
->regions_list
;
1229 struct enc_region
*i
;
1231 list_for_each_entry(i
, head
, list
) {
1232 if (i
->uaddr
== range
->addr
&&
1233 i
->size
== range
->size
)
1240 static void __unregister_enc_region_locked(struct kvm
*kvm
,
1241 struct enc_region
*region
)
1243 sev_unpin_memory(kvm
, region
->pages
, region
->npages
);
1244 list_del(®ion
->list
);
1248 int svm_unregister_enc_region(struct kvm
*kvm
,
1249 struct kvm_enc_region
*range
)
1251 struct enc_region
*region
;
1254 mutex_lock(&kvm
->lock
);
1256 if (!sev_guest(kvm
)) {
1261 region
= find_enc_region(kvm
, range
);
1268 * Ensure that all guest tagged cache entries are flushed before
1269 * releasing the pages back to the system for use. CLFLUSH will
1270 * not do this, so issue a WBINVD.
1272 wbinvd_on_all_cpus();
1274 __unregister_enc_region_locked(kvm
, region
);
1276 mutex_unlock(&kvm
->lock
);
1280 mutex_unlock(&kvm
->lock
);
1284 void sev_vm_destroy(struct kvm
*kvm
)
1286 struct kvm_sev_info
*sev
= &to_kvm_svm(kvm
)->sev_info
;
1287 struct list_head
*head
= &sev
->regions_list
;
1288 struct list_head
*pos
, *q
;
1290 if (!sev_guest(kvm
))
1293 mutex_lock(&kvm
->lock
);
1296 * Ensure that all guest tagged cache entries are flushed before
1297 * releasing the pages back to the system for use. CLFLUSH will
1298 * not do this, so issue a WBINVD.
1300 wbinvd_on_all_cpus();
1303 * if userspace was terminated before unregistering the memory regions
1304 * then lets unpin all the registered memory.
1306 if (!list_empty(head
)) {
1307 list_for_each_safe(pos
, q
, head
) {
1308 __unregister_enc_region_locked(kvm
,
1309 list_entry(pos
, struct enc_region
, list
));
1314 mutex_unlock(&kvm
->lock
);
1316 sev_unbind_asid(kvm
, sev
->handle
);
1317 sev_asid_free(sev
->asid
);
1320 void __init
sev_hardware_setup(void)
1322 unsigned int eax
, ebx
, ecx
, edx
;
1323 bool sev_es_supported
= false;
1324 bool sev_supported
= false;
1326 /* Does the CPU support SEV? */
1327 if (!boot_cpu_has(X86_FEATURE_SEV
))
1330 /* Retrieve SEV CPUID information */
1331 cpuid(0x8000001f, &eax
, &ebx
, &ecx
, &edx
);
1333 /* Set encryption bit location for SEV-ES guests */
1334 sev_enc_bit
= ebx
& 0x3f;
1336 /* Maximum number of encrypted guests supported simultaneously */
1339 if (!svm_sev_enabled())
1342 /* Minimum ASID value that should be used for SEV guest */
1345 /* Initialize SEV ASID bitmaps */
1346 sev_asid_bitmap
= bitmap_zalloc(max_sev_asid
, GFP_KERNEL
);
1347 if (!sev_asid_bitmap
)
1350 sev_reclaim_asid_bitmap
= bitmap_zalloc(max_sev_asid
, GFP_KERNEL
);
1351 if (!sev_reclaim_asid_bitmap
)
1354 pr_info("SEV supported: %u ASIDs\n", max_sev_asid
- min_sev_asid
+ 1);
1355 sev_supported
= true;
1357 /* SEV-ES support requested? */
1361 /* Does the CPU support SEV-ES? */
1362 if (!boot_cpu_has(X86_FEATURE_SEV_ES
))
1365 /* Has the system been allocated ASIDs for SEV-ES? */
1366 if (min_sev_asid
== 1)
1369 pr_info("SEV-ES supported: %u ASIDs\n", min_sev_asid
- 1);
1370 sev_es_supported
= true;
1373 sev
= sev_supported
;
1374 sev_es
= sev_es_supported
;
1377 void sev_hardware_teardown(void)
1379 if (!svm_sev_enabled())
1382 bitmap_free(sev_asid_bitmap
);
1383 bitmap_free(sev_reclaim_asid_bitmap
);
1389 * Pages used by hardware to hold guest encrypted state must be flushed before
1390 * returning them to the system.
1392 static void sev_flush_guest_memory(struct vcpu_svm
*svm
, void *va
,
1396 * If hardware enforced cache coherency for encrypted mappings of the
1397 * same physical page is supported, nothing to do.
1399 if (boot_cpu_has(X86_FEATURE_SME_COHERENT
))
1403 * If the VM Page Flush MSR is supported, use it to flush the page
1404 * (using the page virtual address and the guest ASID).
1406 if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH
)) {
1407 struct kvm_sev_info
*sev
;
1408 unsigned long va_start
;
1411 /* Align start and stop to page boundaries. */
1412 va_start
= (unsigned long)va
;
1413 start
= (u64
)va_start
& PAGE_MASK
;
1414 stop
= PAGE_ALIGN((u64
)va_start
+ len
);
1417 sev
= &to_kvm_svm(svm
->vcpu
.kvm
)->sev_info
;
1419 while (start
< stop
) {
1420 wrmsrl(MSR_AMD64_VM_PAGE_FLUSH
,
1429 WARN(1, "Address overflow, using WBINVD\n");
1433 * Hardware should always have one of the above features,
1434 * but if not, use WBINVD and issue a warning.
1436 WARN_ONCE(1, "Using WBINVD to flush guest memory\n");
1437 wbinvd_on_all_cpus();
1440 void sev_free_vcpu(struct kvm_vcpu
*vcpu
)
1442 struct vcpu_svm
*svm
;
1444 if (!sev_es_guest(vcpu
->kvm
))
1449 if (vcpu
->arch
.guest_state_protected
)
1450 sev_flush_guest_memory(svm
, svm
->vmsa
, PAGE_SIZE
);
1451 __free_page(virt_to_page(svm
->vmsa
));
1453 if (svm
->ghcb_sa_free
)
1454 kfree(svm
->ghcb_sa
);
1457 static void dump_ghcb(struct vcpu_svm
*svm
)
1459 struct ghcb
*ghcb
= svm
->ghcb
;
1462 /* Re-use the dump_invalid_vmcb module parameter */
1463 if (!dump_invalid_vmcb
) {
1464 pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
1468 nbits
= sizeof(ghcb
->save
.valid_bitmap
) * 8;
1470 pr_err("GHCB (GPA=%016llx):\n", svm
->vmcb
->control
.ghcb_gpa
);
1471 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
1472 ghcb
->save
.sw_exit_code
, ghcb_sw_exit_code_is_valid(ghcb
));
1473 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
1474 ghcb
->save
.sw_exit_info_1
, ghcb_sw_exit_info_1_is_valid(ghcb
));
1475 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
1476 ghcb
->save
.sw_exit_info_2
, ghcb_sw_exit_info_2_is_valid(ghcb
));
1477 pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
1478 ghcb
->save
.sw_scratch
, ghcb_sw_scratch_is_valid(ghcb
));
1479 pr_err("%-20s%*pb\n", "valid_bitmap", nbits
, ghcb
->save
.valid_bitmap
);
1482 static void sev_es_sync_to_ghcb(struct vcpu_svm
*svm
)
1484 struct kvm_vcpu
*vcpu
= &svm
->vcpu
;
1485 struct ghcb
*ghcb
= svm
->ghcb
;
1488 * The GHCB protocol so far allows for the following data
1490 * GPRs RAX, RBX, RCX, RDX
1492 * Copy their values, even if they may not have been written during the
1493 * VM-Exit. It's the guest's responsibility to not consume random data.
1495 ghcb_set_rax(ghcb
, vcpu
->arch
.regs
[VCPU_REGS_RAX
]);
1496 ghcb_set_rbx(ghcb
, vcpu
->arch
.regs
[VCPU_REGS_RBX
]);
1497 ghcb_set_rcx(ghcb
, vcpu
->arch
.regs
[VCPU_REGS_RCX
]);
1498 ghcb_set_rdx(ghcb
, vcpu
->arch
.regs
[VCPU_REGS_RDX
]);
1501 static void sev_es_sync_from_ghcb(struct vcpu_svm
*svm
)
1503 struct vmcb_control_area
*control
= &svm
->vmcb
->control
;
1504 struct kvm_vcpu
*vcpu
= &svm
->vcpu
;
1505 struct ghcb
*ghcb
= svm
->ghcb
;
1509 * The GHCB protocol so far allows for the following data
1511 * GPRs RAX, RBX, RCX, RDX
1515 * VMMCALL allows the guest to provide extra registers. KVM also
1516 * expects RSI for hypercalls, so include that, too.
1518 * Copy their values to the appropriate location if supplied.
1520 memset(vcpu
->arch
.regs
, 0, sizeof(vcpu
->arch
.regs
));
1522 vcpu
->arch
.regs
[VCPU_REGS_RAX
] = ghcb_get_rax_if_valid(ghcb
);
1523 vcpu
->arch
.regs
[VCPU_REGS_RBX
] = ghcb_get_rbx_if_valid(ghcb
);
1524 vcpu
->arch
.regs
[VCPU_REGS_RCX
] = ghcb_get_rcx_if_valid(ghcb
);
1525 vcpu
->arch
.regs
[VCPU_REGS_RDX
] = ghcb_get_rdx_if_valid(ghcb
);
1526 vcpu
->arch
.regs
[VCPU_REGS_RSI
] = ghcb_get_rsi_if_valid(ghcb
);
1528 svm
->vmcb
->save
.cpl
= ghcb_get_cpl_if_valid(ghcb
);
1530 if (ghcb_xcr0_is_valid(ghcb
)) {
1531 vcpu
->arch
.xcr0
= ghcb_get_xcr0(ghcb
);
1532 kvm_update_cpuid_runtime(vcpu
);
1535 /* Copy the GHCB exit information into the VMCB fields */
1536 exit_code
= ghcb_get_sw_exit_code(ghcb
);
1537 control
->exit_code
= lower_32_bits(exit_code
);
1538 control
->exit_code_hi
= upper_32_bits(exit_code
);
1539 control
->exit_info_1
= ghcb_get_sw_exit_info_1(ghcb
);
1540 control
->exit_info_2
= ghcb_get_sw_exit_info_2(ghcb
);
1542 /* Clear the valid entries fields */
1543 memset(ghcb
->save
.valid_bitmap
, 0, sizeof(ghcb
->save
.valid_bitmap
));
1546 static int sev_es_validate_vmgexit(struct vcpu_svm
*svm
)
1548 struct kvm_vcpu
*vcpu
;
1554 /* Only GHCB Usage code 0 is supported */
1555 if (ghcb
->ghcb_usage
)
1559 * Retrieve the exit code now even though is may not be marked valid
1560 * as it could help with debugging.
1562 exit_code
= ghcb_get_sw_exit_code(ghcb
);
1564 if (!ghcb_sw_exit_code_is_valid(ghcb
) ||
1565 !ghcb_sw_exit_info_1_is_valid(ghcb
) ||
1566 !ghcb_sw_exit_info_2_is_valid(ghcb
))
1569 switch (ghcb_get_sw_exit_code(ghcb
)) {
1570 case SVM_EXIT_READ_DR7
:
1572 case SVM_EXIT_WRITE_DR7
:
1573 if (!ghcb_rax_is_valid(ghcb
))
1576 case SVM_EXIT_RDTSC
:
1578 case SVM_EXIT_RDPMC
:
1579 if (!ghcb_rcx_is_valid(ghcb
))
1582 case SVM_EXIT_CPUID
:
1583 if (!ghcb_rax_is_valid(ghcb
) ||
1584 !ghcb_rcx_is_valid(ghcb
))
1586 if (ghcb_get_rax(ghcb
) == 0xd)
1587 if (!ghcb_xcr0_is_valid(ghcb
))
1593 if (ghcb_get_sw_exit_info_1(ghcb
) & SVM_IOIO_STR_MASK
) {
1594 if (!ghcb_sw_scratch_is_valid(ghcb
))
1597 if (!(ghcb_get_sw_exit_info_1(ghcb
) & SVM_IOIO_TYPE_MASK
))
1598 if (!ghcb_rax_is_valid(ghcb
))
1603 if (!ghcb_rcx_is_valid(ghcb
))
1605 if (ghcb_get_sw_exit_info_1(ghcb
)) {
1606 if (!ghcb_rax_is_valid(ghcb
) ||
1607 !ghcb_rdx_is_valid(ghcb
))
1611 case SVM_EXIT_VMMCALL
:
1612 if (!ghcb_rax_is_valid(ghcb
) ||
1613 !ghcb_cpl_is_valid(ghcb
))
1616 case SVM_EXIT_RDTSCP
:
1618 case SVM_EXIT_WBINVD
:
1620 case SVM_EXIT_MONITOR
:
1621 if (!ghcb_rax_is_valid(ghcb
) ||
1622 !ghcb_rcx_is_valid(ghcb
) ||
1623 !ghcb_rdx_is_valid(ghcb
))
1626 case SVM_EXIT_MWAIT
:
1627 if (!ghcb_rax_is_valid(ghcb
) ||
1628 !ghcb_rcx_is_valid(ghcb
))
1631 case SVM_VMGEXIT_MMIO_READ
:
1632 case SVM_VMGEXIT_MMIO_WRITE
:
1633 if (!ghcb_sw_scratch_is_valid(ghcb
))
1636 case SVM_VMGEXIT_NMI_COMPLETE
:
1637 case SVM_VMGEXIT_AP_HLT_LOOP
:
1638 case SVM_VMGEXIT_AP_JUMP_TABLE
:
1639 case SVM_VMGEXIT_UNSUPPORTED_EVENT
:
1650 if (ghcb
->ghcb_usage
) {
1651 vcpu_unimpl(vcpu
, "vmgexit: ghcb usage %#x is not valid\n",
1654 vcpu_unimpl(vcpu
, "vmgexit: exit reason %#llx is not valid\n",
1659 vcpu
->run
->exit_reason
= KVM_EXIT_INTERNAL_ERROR
;
1660 vcpu
->run
->internal
.suberror
= KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON
;
1661 vcpu
->run
->internal
.ndata
= 2;
1662 vcpu
->run
->internal
.data
[0] = exit_code
;
1663 vcpu
->run
->internal
.data
[1] = vcpu
->arch
.last_vmentry_cpu
;
1668 static void pre_sev_es_run(struct vcpu_svm
*svm
)
1673 if (svm
->ghcb_sa_free
) {
1675 * The scratch area lives outside the GHCB, so there is a
1676 * buffer that, depending on the operation performed, may
1677 * need to be synced, then freed.
1679 if (svm
->ghcb_sa_sync
) {
1680 kvm_write_guest(svm
->vcpu
.kvm
,
1681 ghcb_get_sw_scratch(svm
->ghcb
),
1682 svm
->ghcb_sa
, svm
->ghcb_sa_len
);
1683 svm
->ghcb_sa_sync
= false;
1686 kfree(svm
->ghcb_sa
);
1687 svm
->ghcb_sa
= NULL
;
1688 svm
->ghcb_sa_free
= false;
1691 trace_kvm_vmgexit_exit(svm
->vcpu
.vcpu_id
, svm
->ghcb
);
1693 sev_es_sync_to_ghcb(svm
);
1695 kvm_vcpu_unmap(&svm
->vcpu
, &svm
->ghcb_map
, true);
1699 void pre_sev_run(struct vcpu_svm
*svm
, int cpu
)
1701 struct svm_cpu_data
*sd
= per_cpu(svm_data
, cpu
);
1702 int asid
= sev_get_asid(svm
->vcpu
.kvm
);
1704 /* Perform any SEV-ES pre-run actions */
1705 pre_sev_es_run(svm
);
1707 /* Assign the asid allocated with this SEV guest */
1713 * 1) when different VMCB for the same ASID is to be run on the same host CPU.
1714 * 2) or this VMCB was executed on different host CPU in previous VMRUNs.
1716 if (sd
->sev_vmcbs
[asid
] == svm
->vmcb
&&
1717 svm
->vcpu
.arch
.last_vmentry_cpu
== cpu
)
1720 sd
->sev_vmcbs
[asid
] = svm
->vmcb
;
1721 svm
->vmcb
->control
.tlb_ctl
= TLB_CONTROL_FLUSH_ASID
;
1722 vmcb_mark_dirty(svm
->vmcb
, VMCB_ASID
);
1725 #define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE)
1726 static bool setup_vmgexit_scratch(struct vcpu_svm
*svm
, bool sync
, u64 len
)
1728 struct vmcb_control_area
*control
= &svm
->vmcb
->control
;
1729 struct ghcb
*ghcb
= svm
->ghcb
;
1730 u64 ghcb_scratch_beg
, ghcb_scratch_end
;
1731 u64 scratch_gpa_beg
, scratch_gpa_end
;
1734 scratch_gpa_beg
= ghcb_get_sw_scratch(ghcb
);
1735 if (!scratch_gpa_beg
) {
1736 pr_err("vmgexit: scratch gpa not provided\n");
1740 scratch_gpa_end
= scratch_gpa_beg
+ len
;
1741 if (scratch_gpa_end
< scratch_gpa_beg
) {
1742 pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n",
1743 len
, scratch_gpa_beg
);
1747 if ((scratch_gpa_beg
& PAGE_MASK
) == control
->ghcb_gpa
) {
1748 /* Scratch area begins within GHCB */
1749 ghcb_scratch_beg
= control
->ghcb_gpa
+
1750 offsetof(struct ghcb
, shared_buffer
);
1751 ghcb_scratch_end
= control
->ghcb_gpa
+
1752 offsetof(struct ghcb
, reserved_1
);
1755 * If the scratch area begins within the GHCB, it must be
1756 * completely contained in the GHCB shared buffer area.
1758 if (scratch_gpa_beg
< ghcb_scratch_beg
||
1759 scratch_gpa_end
> ghcb_scratch_end
) {
1760 pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n",
1761 scratch_gpa_beg
, scratch_gpa_end
);
1765 scratch_va
= (void *)svm
->ghcb
;
1766 scratch_va
+= (scratch_gpa_beg
- control
->ghcb_gpa
);
1769 * The guest memory must be read into a kernel buffer, so
1772 if (len
> GHCB_SCRATCH_AREA_LIMIT
) {
1773 pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n",
1774 len
, GHCB_SCRATCH_AREA_LIMIT
);
1777 scratch_va
= kzalloc(len
, GFP_KERNEL
);
1781 if (kvm_read_guest(svm
->vcpu
.kvm
, scratch_gpa_beg
, scratch_va
, len
)) {
1782 /* Unable to copy scratch area from guest */
1783 pr_err("vmgexit: kvm_read_guest for scratch area failed\n");
1790 * The scratch area is outside the GHCB. The operation will
1791 * dictate whether the buffer needs to be synced before running
1792 * the vCPU next time (i.e. a read was requested so the data
1793 * must be written back to the guest memory).
1795 svm
->ghcb_sa_sync
= sync
;
1796 svm
->ghcb_sa_free
= true;
1799 svm
->ghcb_sa
= scratch_va
;
1800 svm
->ghcb_sa_len
= len
;
1805 static void set_ghcb_msr_bits(struct vcpu_svm
*svm
, u64 value
, u64 mask
,
1808 svm
->vmcb
->control
.ghcb_gpa
&= ~(mask
<< pos
);
1809 svm
->vmcb
->control
.ghcb_gpa
|= (value
& mask
) << pos
;
1812 static u64
get_ghcb_msr_bits(struct vcpu_svm
*svm
, u64 mask
, unsigned int pos
)
1814 return (svm
->vmcb
->control
.ghcb_gpa
>> pos
) & mask
;
1817 static void set_ghcb_msr(struct vcpu_svm
*svm
, u64 value
)
1819 svm
->vmcb
->control
.ghcb_gpa
= value
;
1822 static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm
*svm
)
1824 struct vmcb_control_area
*control
= &svm
->vmcb
->control
;
1825 struct kvm_vcpu
*vcpu
= &svm
->vcpu
;
1829 ghcb_info
= control
->ghcb_gpa
& GHCB_MSR_INFO_MASK
;
1831 trace_kvm_vmgexit_msr_protocol_enter(svm
->vcpu
.vcpu_id
,
1834 switch (ghcb_info
) {
1835 case GHCB_MSR_SEV_INFO_REQ
:
1836 set_ghcb_msr(svm
, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX
,
1840 case GHCB_MSR_CPUID_REQ
: {
1841 u64 cpuid_fn
, cpuid_reg
, cpuid_value
;
1843 cpuid_fn
= get_ghcb_msr_bits(svm
,
1844 GHCB_MSR_CPUID_FUNC_MASK
,
1845 GHCB_MSR_CPUID_FUNC_POS
);
1847 /* Initialize the registers needed by the CPUID intercept */
1848 vcpu
->arch
.regs
[VCPU_REGS_RAX
] = cpuid_fn
;
1849 vcpu
->arch
.regs
[VCPU_REGS_RCX
] = 0;
1851 ret
= svm_invoke_exit_handler(vcpu
, SVM_EXIT_CPUID
);
1857 cpuid_reg
= get_ghcb_msr_bits(svm
,
1858 GHCB_MSR_CPUID_REG_MASK
,
1859 GHCB_MSR_CPUID_REG_POS
);
1861 cpuid_value
= vcpu
->arch
.regs
[VCPU_REGS_RAX
];
1862 else if (cpuid_reg
== 1)
1863 cpuid_value
= vcpu
->arch
.regs
[VCPU_REGS_RBX
];
1864 else if (cpuid_reg
== 2)
1865 cpuid_value
= vcpu
->arch
.regs
[VCPU_REGS_RCX
];
1867 cpuid_value
= vcpu
->arch
.regs
[VCPU_REGS_RDX
];
1869 set_ghcb_msr_bits(svm
, cpuid_value
,
1870 GHCB_MSR_CPUID_VALUE_MASK
,
1871 GHCB_MSR_CPUID_VALUE_POS
);
1873 set_ghcb_msr_bits(svm
, GHCB_MSR_CPUID_RESP
,
1878 case GHCB_MSR_TERM_REQ
: {
1879 u64 reason_set
, reason_code
;
1881 reason_set
= get_ghcb_msr_bits(svm
,
1882 GHCB_MSR_TERM_REASON_SET_MASK
,
1883 GHCB_MSR_TERM_REASON_SET_POS
);
1884 reason_code
= get_ghcb_msr_bits(svm
,
1885 GHCB_MSR_TERM_REASON_MASK
,
1886 GHCB_MSR_TERM_REASON_POS
);
1887 pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
1888 reason_set
, reason_code
);
1895 trace_kvm_vmgexit_msr_protocol_exit(svm
->vcpu
.vcpu_id
,
1896 control
->ghcb_gpa
, ret
);
1901 int sev_handle_vmgexit(struct kvm_vcpu
*vcpu
)
1903 struct vcpu_svm
*svm
= to_svm(vcpu
);
1904 struct vmcb_control_area
*control
= &svm
->vmcb
->control
;
1905 u64 ghcb_gpa
, exit_code
;
1909 /* Validate the GHCB */
1910 ghcb_gpa
= control
->ghcb_gpa
;
1911 if (ghcb_gpa
& GHCB_MSR_INFO_MASK
)
1912 return sev_handle_vmgexit_msr_protocol(svm
);
1915 vcpu_unimpl(vcpu
, "vmgexit: GHCB gpa is not set\n");
1919 if (kvm_vcpu_map(vcpu
, ghcb_gpa
>> PAGE_SHIFT
, &svm
->ghcb_map
)) {
1920 /* Unable to map GHCB from guest */
1921 vcpu_unimpl(vcpu
, "vmgexit: error mapping GHCB [%#llx] from guest\n",
1926 svm
->ghcb
= svm
->ghcb_map
.hva
;
1927 ghcb
= svm
->ghcb_map
.hva
;
1929 trace_kvm_vmgexit_enter(vcpu
->vcpu_id
, ghcb
);
1931 exit_code
= ghcb_get_sw_exit_code(ghcb
);
1933 ret
= sev_es_validate_vmgexit(svm
);
1937 sev_es_sync_from_ghcb(svm
);
1938 ghcb_set_sw_exit_info_1(ghcb
, 0);
1939 ghcb_set_sw_exit_info_2(ghcb
, 0);
1942 switch (exit_code
) {
1943 case SVM_VMGEXIT_MMIO_READ
:
1944 if (!setup_vmgexit_scratch(svm
, true, control
->exit_info_2
))
1947 ret
= kvm_sev_es_mmio_read(vcpu
,
1948 control
->exit_info_1
,
1949 control
->exit_info_2
,
1952 case SVM_VMGEXIT_MMIO_WRITE
:
1953 if (!setup_vmgexit_scratch(svm
, false, control
->exit_info_2
))
1956 ret
= kvm_sev_es_mmio_write(vcpu
,
1957 control
->exit_info_1
,
1958 control
->exit_info_2
,
1961 case SVM_VMGEXIT_NMI_COMPLETE
:
1962 ret
= svm_invoke_exit_handler(vcpu
, SVM_EXIT_IRET
);
1964 case SVM_VMGEXIT_AP_HLT_LOOP
:
1965 ret
= kvm_emulate_ap_reset_hold(vcpu
);
1967 case SVM_VMGEXIT_AP_JUMP_TABLE
: {
1968 struct kvm_sev_info
*sev
= &to_kvm_svm(vcpu
->kvm
)->sev_info
;
1970 switch (control
->exit_info_1
) {
1972 /* Set AP jump table address */
1973 sev
->ap_jump_table
= control
->exit_info_2
;
1976 /* Get AP jump table address */
1977 ghcb_set_sw_exit_info_2(ghcb
, sev
->ap_jump_table
);
1980 pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
1981 control
->exit_info_1
);
1982 ghcb_set_sw_exit_info_1(ghcb
, 1);
1983 ghcb_set_sw_exit_info_2(ghcb
,
1985 SVM_EVTINJ_TYPE_EXEPT
|
1992 case SVM_VMGEXIT_UNSUPPORTED_EVENT
:
1994 "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
1995 control
->exit_info_1
, control
->exit_info_2
);
1998 ret
= svm_invoke_exit_handler(vcpu
, exit_code
);
2004 int sev_es_string_io(struct vcpu_svm
*svm
, int size
, unsigned int port
, int in
)
2006 if (!setup_vmgexit_scratch(svm
, in
, svm
->vmcb
->control
.exit_info_2
))
2009 return kvm_sev_es_string_io(&svm
->vcpu
, size
, port
,
2010 svm
->ghcb_sa
, svm
->ghcb_sa_len
, in
);
2013 void sev_es_init_vmcb(struct vcpu_svm
*svm
)
2015 struct kvm_vcpu
*vcpu
= &svm
->vcpu
;
2017 svm
->vmcb
->control
.nested_ctl
|= SVM_NESTED_CTL_SEV_ES_ENABLE
;
2018 svm
->vmcb
->control
.virt_ext
|= LBR_CTL_ENABLE_MASK
;
2021 * An SEV-ES guest requires a VMSA area that is a separate from the
2022 * VMCB page. Do not include the encryption mask on the VMSA physical
2023 * address since hardware will access it using the guest key.
2025 svm
->vmcb
->control
.vmsa_pa
= __pa(svm
->vmsa
);
2027 /* Can't intercept CR register access, HV can't modify CR registers */
2028 svm_clr_intercept(svm
, INTERCEPT_CR0_READ
);
2029 svm_clr_intercept(svm
, INTERCEPT_CR4_READ
);
2030 svm_clr_intercept(svm
, INTERCEPT_CR8_READ
);
2031 svm_clr_intercept(svm
, INTERCEPT_CR0_WRITE
);
2032 svm_clr_intercept(svm
, INTERCEPT_CR4_WRITE
);
2033 svm_clr_intercept(svm
, INTERCEPT_CR8_WRITE
);
2035 svm_clr_intercept(svm
, INTERCEPT_SELECTIVE_CR0
);
2037 /* Track EFER/CR register changes */
2038 svm_set_intercept(svm
, TRAP_EFER_WRITE
);
2039 svm_set_intercept(svm
, TRAP_CR0_WRITE
);
2040 svm_set_intercept(svm
, TRAP_CR4_WRITE
);
2041 svm_set_intercept(svm
, TRAP_CR8_WRITE
);
2043 /* No support for enable_vmware_backdoor */
2044 clr_exception_intercept(svm
, GP_VECTOR
);
2046 /* Can't intercept XSETBV, HV can't modify XCR0 directly */
2047 svm_clr_intercept(svm
, INTERCEPT_XSETBV
);
2049 /* Clear intercepts on selected MSRs */
2050 set_msr_interception(vcpu
, svm
->msrpm
, MSR_EFER
, 1, 1);
2051 set_msr_interception(vcpu
, svm
->msrpm
, MSR_IA32_CR_PAT
, 1, 1);
2052 set_msr_interception(vcpu
, svm
->msrpm
, MSR_IA32_LASTBRANCHFROMIP
, 1, 1);
2053 set_msr_interception(vcpu
, svm
->msrpm
, MSR_IA32_LASTBRANCHTOIP
, 1, 1);
2054 set_msr_interception(vcpu
, svm
->msrpm
, MSR_IA32_LASTINTFROMIP
, 1, 1);
2055 set_msr_interception(vcpu
, svm
->msrpm
, MSR_IA32_LASTINTTOIP
, 1, 1);
2058 void sev_es_create_vcpu(struct vcpu_svm
*svm
)
2061 * Set the GHCB MSR value as per the GHCB specification when creating
2062 * a vCPU for an SEV-ES guest.
2064 set_ghcb_msr(svm
, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX
,
2069 void sev_es_prepare_guest_switch(struct vcpu_svm
*svm
, unsigned int cpu
)
2071 struct svm_cpu_data
*sd
= per_cpu(svm_data
, cpu
);
2072 struct vmcb_save_area
*hostsa
;
2075 * As an SEV-ES guest, hardware will restore the host state on VMEXIT,
2076 * of which one step is to perform a VMLOAD. Since hardware does not
2077 * perform a VMSAVE on VMRUN, the host savearea must be updated.
2079 vmsave(__sme_page_pa(sd
->save_area
));
2081 /* XCR0 is restored on VMEXIT, save the current host value */
2082 hostsa
= (struct vmcb_save_area
*)(page_address(sd
->save_area
) + 0x400);
2083 hostsa
->xcr0
= xgetbv(XCR_XFEATURE_ENABLED_MASK
);
2085 /* PKRU is restored on VMEXIT, save the curent host value */
2086 hostsa
->pkru
= read_pkru();
2088 /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
2089 hostsa
->xss
= host_xss
;
2092 void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu
*vcpu
, u8 vector
)
2094 struct vcpu_svm
*svm
= to_svm(vcpu
);
2096 /* First SIPI: Use the values as initially set by the VMM */
2097 if (!svm
->received_first_sipi
) {
2098 svm
->received_first_sipi
= true;
2103 * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where
2104 * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
2107 ghcb_set_sw_exit_info_2(svm
->ghcb
, 1);