arch/x86/kvm/svm/sev.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Kernel-based Virtual Machine driver for Linux
   4  *
   5  * AMD SVM-SEV support
   6  *
   7  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
   8  */
   9
  10 #include <linux/kvm_types.h>
  11 #include <linux/kvm_host.h>
  12 #include <linux/kernel.h>
  13 #include <linux/highmem.h>
  14 #include <linux/psp-sev.h>
  15 #include <linux/pagemap.h>
  16 #include <linux/swap.h>
  17 #include <linux/processor.h>
  18 #include <linux/trace_events.h>
  19 #include <asm/fpu/internal.h>
  20
  21 #include <asm/trapnr.h>
  22
  23 #include "x86.h"
  24 #include "svm.h"
  25 #include "svm_ops.h"
  26 #include "cpuid.h"
  27 #include "trace.h"
  28
  29 #define __ex(x) __kvm_handle_fault_on_reboot(x)
  30
  31 static u8 sev_enc_bit;
  32 static int sev_flush_asids(void);
  33 static DECLARE_RWSEM(sev_deactivate_lock);
  34 static DEFINE_MUTEX(sev_bitmap_lock);
  35 unsigned int max_sev_asid;
  36 static unsigned int min_sev_asid;
  37 static unsigned long *sev_asid_bitmap;
  38 static unsigned long *sev_reclaim_asid_bitmap;
  39
  40 struct enc_region {
  41         struct list_head list;
  42         unsigned long npages;
  43         struct page **pages;
  44         unsigned long uaddr;
  45         unsigned long size;
  46 };
  47
  48 static int sev_flush_asids(void)
  49 {
  50         int ret, error = 0;
  51
  52         /*
  53          * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail,
  54          * so it must be guarded.
  55          */
  56         down_write(&sev_deactivate_lock);
  57
  58         wbinvd_on_all_cpus();
  59         ret = sev_guest_df_flush(&error);
  60
  61         up_write(&sev_deactivate_lock);
  62
  63         if (ret)
  64                 pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n", ret, error);
  65
  66         return ret;
  67 }
  68
  69 /* Must be called with the sev_bitmap_lock held */
  70 static bool __sev_recycle_asids(int min_asid, int max_asid)
  71 {
  72         int pos;
  73
  74         /* Check if there are any ASIDs to reclaim before performing a flush */
  75         pos = find_next_bit(sev_reclaim_asid_bitmap, max_sev_asid, min_asid);
  76         if (pos >= max_asid)
  77                 return false;
  78
  79         if (sev_flush_asids())
  80                 return false;
  81
  82         /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */
  83         bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
  84                    max_sev_asid);
  85         bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
  86
  87         return true;
  88 }
  89
  90 static int sev_asid_new(bool es_active)
  91 {
  92         int pos, min_asid, max_asid;
  93         bool retry = true;
  94
  95         mutex_lock(&sev_bitmap_lock);
  96
  97         /*
  98          * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
  99          * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
 100          */
 101         min_asid = es_active ? 0 : min_sev_asid - 1;
 102         max_asid = es_active ? min_sev_asid - 1 : max_sev_asid;
 103 again:
 104         pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid);
 105         if (pos >= max_asid) {
 106                 if (retry && __sev_recycle_asids(min_asid, max_asid)) {
 107                         retry = false;
 108                         goto again;
 109                 }
 110                 mutex_unlock(&sev_bitmap_lock);
 111                 return -EBUSY;
 112         }
 113
 114         __set_bit(pos, sev_asid_bitmap);
 115
 116         mutex_unlock(&sev_bitmap_lock);
 117
 118         return pos + 1;
 119 }
 120
 121 static int sev_get_asid(struct kvm *kvm)
 122 {
 123         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 124
 125         return sev->asid;
 126 }
 127
 128 static void sev_asid_free(int asid)
 129 {
 130         struct svm_cpu_data *sd;
 131         int cpu, pos;
 132
 133         mutex_lock(&sev_bitmap_lock);
 134
 135         pos = asid - 1;
 136         __set_bit(pos, sev_reclaim_asid_bitmap);
 137
 138         for_each_possible_cpu(cpu) {
 139                 sd = per_cpu(svm_data, cpu);
 140                 sd->sev_vmcbs[pos] = NULL;
 141         }
 142
 143         mutex_unlock(&sev_bitmap_lock);
 144 }
 145
 146 static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
 147 {
 148         struct sev_data_decommission *decommission;
 149         struct sev_data_deactivate *data;
 150
 151         if (!handle)
 152                 return;
 153
 154         data = kzalloc(sizeof(*data), GFP_KERNEL);
 155         if (!data)
 156                 return;
 157
 158         /* deactivate handle */
 159         data->handle = handle;
 160
 161         /* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */
 162         down_read(&sev_deactivate_lock);
 163         sev_guest_deactivate(data, NULL);
 164         up_read(&sev_deactivate_lock);
 165
 166         kfree(data);
 167
 168         decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
 169         if (!decommission)
 170                 return;
 171
 172         /* decommission handle */
 173         decommission->handle = handle;
 174         sev_guest_decommission(decommission, NULL);
 175
 176         kfree(decommission);
 177 }
 178
 179 static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
 180 {
 181         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 182         bool es_active = argp->id == KVM_SEV_ES_INIT;
 183         int asid, ret;
 184
 185         if (kvm->created_vcpus)
 186                 return -EINVAL;
 187
 188         ret = -EBUSY;
 189         if (unlikely(sev->active))
 190                 return ret;
 191
 192         asid = sev_asid_new(es_active);
 193         if (asid < 0)
 194                 return ret;
 195
 196         ret = sev_platform_init(&argp->error);
 197         if (ret)
 198                 goto e_free;
 199
 200         sev->active = true;
 201         sev->es_active = es_active;
 202         sev->asid = asid;
 203         INIT_LIST_HEAD(&sev->regions_list);
 204
 205         return 0;
 206
 207 e_free:
 208         sev_asid_free(asid);
 209         return ret;
 210 }
 211
 212 static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
 213 {
 214         struct sev_data_activate *data;
 215         int asid = sev_get_asid(kvm);
 216         int ret;
 217
 218         data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
 219         if (!data)
 220                 return -ENOMEM;
 221
 222         /* activate ASID on the given handle */
 223         data->handle = handle;
 224         data->asid   = asid;
 225         ret = sev_guest_activate(data, error);
 226         kfree(data);
 227
 228         return ret;
 229 }
 230
 231 static int __sev_issue_cmd(int fd, int id, void *data, int *error)
 232 {
 233         struct fd f;
 234         int ret;
 235
 236         f = fdget(fd);
 237         if (!f.file)
 238                 return -EBADF;
 239
 240         ret = sev_issue_cmd_external_user(f.file, id, data, error);
 241
 242         fdput(f);
 243         return ret;
 244 }
 245
 246 static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
 247 {
 248         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 249
 250         return __sev_issue_cmd(sev->fd, id, data, error);
 251 }
 252
 253 static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
 254 {
 255         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 256         struct sev_data_launch_start *start;
 257         struct kvm_sev_launch_start params;
 258         void *dh_blob, *session_blob;
 259         int *error = &argp->error;
 260         int ret;
 261
 262         if (!sev_guest(kvm))
 263                 return -ENOTTY;
 264
 265         if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
 266                 return -EFAULT;
 267
 268         start = kzalloc(sizeof(*start), GFP_KERNEL_ACCOUNT);
 269         if (!start)
 270                 return -ENOMEM;
 271
 272         dh_blob = NULL;
 273         if (params.dh_uaddr) {
 274                 dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
 275                 if (IS_ERR(dh_blob)) {
 276                         ret = PTR_ERR(dh_blob);
 277                         goto e_free;
 278                 }
 279
 280                 start->dh_cert_address = __sme_set(__pa(dh_blob));
 281                 start->dh_cert_len = params.dh_len;
 282         }
 283
 284         session_blob = NULL;
 285         if (params.session_uaddr) {
 286                 session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
 287                 if (IS_ERR(session_blob)) {
 288                         ret = PTR_ERR(session_blob);
 289                         goto e_free_dh;
 290                 }
 291
 292                 start->session_address = __sme_set(__pa(session_blob));
 293                 start->session_len = params.session_len;
 294         }
 295
 296         start->handle = params.handle;
 297         start->policy = params.policy;
 298
 299         /* create memory encryption context */
 300         ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
 301         if (ret)
 302                 goto e_free_session;
 303
 304         /* Bind ASID to this guest */
 305         ret = sev_bind_asid(kvm, start->handle, error);
 306         if (ret)
 307                 goto e_free_session;
 308
 309         /* return handle to userspace */
 310         params.handle = start->handle;
 311         if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) {
 312                 sev_unbind_asid(kvm, start->handle);
 313                 ret = -EFAULT;
 314                 goto e_free_session;
 315         }
 316
 317         sev->handle = start->handle;
 318         sev->fd = argp->sev_fd;
 319
 320 e_free_session:
 321         kfree(session_blob);
 322 e_free_dh:
 323         kfree(dh_blob);
 324 e_free:
 325         kfree(start);
 326         return ret;
 327 }
 328
 329 static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
 330                                     unsigned long ulen, unsigned long *n,
 331                                     int write)
 332 {
 333         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 334         unsigned long npages, size;
 335         int npinned;
 336         unsigned long locked, lock_limit;
 337         struct page **pages;
 338         unsigned long first, last;
 339         int ret;
 340
 341         lockdep_assert_held(&kvm->lock);
 342
 343         if (ulen == 0 || uaddr + ulen < uaddr)
 344                 return ERR_PTR(-EINVAL);
 345
 346         /* Calculate number of pages. */
 347         first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
 348         last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
 349         npages = (last - first + 1);
 350
 351         locked = sev->pages_locked + npages;
 352         lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 353         if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
 354                 pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
 355                 return ERR_PTR(-ENOMEM);
 356         }
 357
 358         if (WARN_ON_ONCE(npages > INT_MAX))
 359                 return ERR_PTR(-EINVAL);
 360
 361         /* Avoid using vmalloc for smaller buffers. */
 362         size = npages * sizeof(struct page *);
 363         if (size > PAGE_SIZE)
 364                 pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
 365         else
 366                 pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
 367
 368         if (!pages)
 369                 return ERR_PTR(-ENOMEM);
 370
 371         /* Pin the user virtual address. */
 372         npinned = pin_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
 373         if (npinned != npages) {
 374                 pr_err("SEV: Failure locking %lu pages.\n", npages);
 375                 ret = -ENOMEM;
 376                 goto err;
 377         }
 378
 379         *n = npages;
 380         sev->pages_locked = locked;
 381
 382         return pages;
 383
 384 err:
 385         if (npinned > 0)
 386                 unpin_user_pages(pages, npinned);
 387
 388         kvfree(pages);
 389         return ERR_PTR(ret);
 390 }
 391
 392 static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
 393                              unsigned long npages)
 394 {
 395         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 396
 397         unpin_user_pages(pages, npages);
 398         kvfree(pages);
 399         sev->pages_locked -= npages;
 400 }
 401
 402 static void sev_clflush_pages(struct page *pages[], unsigned long npages)
 403 {
 404         uint8_t *page_virtual;
 405         unsigned long i;
 406
 407         if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 ||
 408             pages == NULL)
 409                 return;
 410
 411         for (i = 0; i < npages; i++) {
 412                 page_virtual = kmap_atomic(pages[i]);
 413                 clflush_cache_range(page_virtual, PAGE_SIZE);
 414                 kunmap_atomic(page_virtual);
 415         }
 416 }
 417
 418 static unsigned long get_num_contig_pages(unsigned long idx,
 419                                 struct page **inpages, unsigned long npages)
 420 {
 421         unsigned long paddr, next_paddr;
 422         unsigned long i = idx + 1, pages = 1;
 423
 424         /* find the number of contiguous pages starting from idx */
 425         paddr = __sme_page_pa(inpages[idx]);
 426         while (i < npages) {
 427                 next_paddr = __sme_page_pa(inpages[i++]);
 428                 if ((paddr + PAGE_SIZE) == next_paddr) {
 429                         pages++;
 430                         paddr = next_paddr;
 431                         continue;
 432                 }
 433                 break;
 434         }
 435
 436         return pages;
 437 }
 438
 439 static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
 440 {
 441         unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
 442         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 443         struct kvm_sev_launch_update_data params;
 444         struct sev_data_launch_update_data *data;
 445         struct page **inpages;
 446         int ret;
 447
 448         if (!sev_guest(kvm))
 449                 return -ENOTTY;
 450
 451         if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
 452                 return -EFAULT;
 453
 454         data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
 455         if (!data)
 456                 return -ENOMEM;
 457
 458         vaddr = params.uaddr;
 459         size = params.len;
 460         vaddr_end = vaddr + size;
 461
 462         /* Lock the user memory. */
 463         inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
 464         if (IS_ERR(inpages)) {
 465                 ret = PTR_ERR(inpages);
 466                 goto e_free;
 467         }
 468
 469         /*
 470          * Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in
 471          * place; the cache may contain the data that was written unencrypted.
 472          */
 473         sev_clflush_pages(inpages, npages);
 474
 475         for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
 476                 int offset, len;
 477
 478                 /*
 479                  * If the user buffer is not page-aligned, calculate the offset
 480                  * within the page.
 481                  */
 482                 offset = vaddr & (PAGE_SIZE - 1);
 483
 484                 /* Calculate the number of pages that can be encrypted in one go. */
 485                 pages = get_num_contig_pages(i, inpages, npages);
 486
 487                 len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
 488
 489                 data->handle = sev->handle;
 490                 data->len = len;
 491                 data->address = __sme_page_pa(inpages[i]) + offset;
 492                 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
 493                 if (ret)
 494                         goto e_unpin;
 495
 496                 size -= len;
 497                 next_vaddr = vaddr + len;
 498         }
 499
 500 e_unpin:
 501         /* content of memory is updated, mark pages dirty */
 502         for (i = 0; i < npages; i++) {
 503                 set_page_dirty_lock(inpages[i]);
 504                 mark_page_accessed(inpages[i]);
 505         }
 506         /* unlock the user pages */
 507         sev_unpin_memory(kvm, inpages, npages);
 508 e_free:
 509         kfree(data);
 510         return ret;
 511 }
 512
 513 static int sev_es_sync_vmsa(struct vcpu_svm *svm)
 514 {
 515         struct vmcb_save_area *save = &svm->vmcb->save;
 516
 517         /* Check some debug related fields before encrypting the VMSA */
 518         if (svm->vcpu.guest_debug || (save->dr7 & ~DR7_FIXED_1))
 519                 return -EINVAL;
 520
 521         /* Sync registgers */
 522         save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX];
 523         save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX];
 524         save->rcx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
 525         save->rdx = svm->vcpu.arch.regs[VCPU_REGS_RDX];
 526         save->rsp = svm->vcpu.arch.regs[VCPU_REGS_RSP];
 527         save->rbp = svm->vcpu.arch.regs[VCPU_REGS_RBP];
 528         save->rsi = svm->vcpu.arch.regs[VCPU_REGS_RSI];
 529         save->rdi = svm->vcpu.arch.regs[VCPU_REGS_RDI];
 530 #ifdef CONFIG_X86_64
 531         save->r8  = svm->vcpu.arch.regs[VCPU_REGS_R8];
 532         save->r9  = svm->vcpu.arch.regs[VCPU_REGS_R9];
 533         save->r10 = svm->vcpu.arch.regs[VCPU_REGS_R10];
 534         save->r11 = svm->vcpu.arch.regs[VCPU_REGS_R11];
 535         save->r12 = svm->vcpu.arch.regs[VCPU_REGS_R12];
 536         save->r13 = svm->vcpu.arch.regs[VCPU_REGS_R13];
 537         save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14];
 538         save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15];
 539 #endif
 540         save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP];
 541
 542         /* Sync some non-GPR registers before encrypting */
 543         save->xcr0 = svm->vcpu.arch.xcr0;
 544         save->pkru = svm->vcpu.arch.pkru;
 545         save->xss  = svm->vcpu.arch.ia32_xss;
 546
 547         /*
 548          * SEV-ES will use a VMSA that is pointed to by the VMCB, not
 549          * the traditional VMSA that is part of the VMCB. Copy the
 550          * traditional VMSA as it has been built so far (in prep
 551          * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
 552          */
 553         memcpy(svm->vmsa, save, sizeof(*save));
 554
 555         return 0;
 556 }
 557
 558 static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
 559 {
 560         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 561         struct sev_data_launch_update_vmsa *vmsa;
 562         struct kvm_vcpu *vcpu;
 563         int i, ret;
 564
 565         if (!sev_es_guest(kvm))
 566                 return -ENOTTY;
 567
 568         vmsa = kzalloc(sizeof(*vmsa), GFP_KERNEL);
 569         if (!vmsa)
 570                 return -ENOMEM;
 571
 572         kvm_for_each_vcpu(i, vcpu, kvm) {
 573                 struct vcpu_svm *svm = to_svm(vcpu);
 574
 575                 /* Perform some pre-encryption checks against the VMSA */
 576                 ret = sev_es_sync_vmsa(svm);
 577                 if (ret)
 578                         goto e_free;
 579
 580                 /*
 581                  * The LAUNCH_UPDATE_VMSA command will perform in-place
 582                  * encryption of the VMSA memory content (i.e it will write
 583                  * the same memory region with the guest's key), so invalidate
 584                  * it first.
 585                  */
 586                 clflush_cache_range(svm->vmsa, PAGE_SIZE);
 587
 588                 vmsa->handle = sev->handle;
 589                 vmsa->address = __sme_pa(svm->vmsa);
 590                 vmsa->len = PAGE_SIZE;
 591                 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, vmsa,
 592                                     &argp->error);
 593                 if (ret)
 594                         goto e_free;
 595
 596                 svm->vcpu.arch.guest_state_protected = true;
 597         }
 598
 599 e_free:
 600         kfree(vmsa);
 601         return ret;
 602 }
 603
 604 static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
 605 {
 606         void __user *measure = (void __user *)(uintptr_t)argp->data;
 607         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 608         struct sev_data_launch_measure *data;
 609         struct kvm_sev_launch_measure params;
 610         void __user *p = NULL;
 611         void *blob = NULL;
 612         int ret;
 613
 614         if (!sev_guest(kvm))
 615                 return -ENOTTY;
 616
 617         if (copy_from_user(&params, measure, sizeof(params)))
 618                 return -EFAULT;
 619
 620         data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
 621         if (!data)
 622                 return -ENOMEM;
 623
 624         /* User wants to query the blob length */
 625         if (!params.len)
 626                 goto cmd;
 627
 628         p = (void __user *)(uintptr_t)params.uaddr;
 629         if (p) {
 630                 if (params.len > SEV_FW_BLOB_MAX_SIZE) {
 631                         ret = -EINVAL;
 632                         goto e_free;
 633                 }
 634
 635                 ret = -ENOMEM;
 636                 blob = kmalloc(params.len, GFP_KERNEL);
 637                 if (!blob)
 638                         goto e_free;
 639
 640                 data->address = __psp_pa(blob);
 641                 data->len = params.len;
 642         }
 643
 644 cmd:
 645         data->handle = sev->handle;
 646         ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
 647
 648         /*
 649          * If we query the session length, FW responded with expected data.
 650          */
 651         if (!params.len)
 652                 goto done;
 653
 654         if (ret)
 655                 goto e_free_blob;
 656
 657         if (blob) {
 658                 if (copy_to_user(p, blob, params.len))
 659                         ret = -EFAULT;
 660         }
 661
 662 done:
 663         params.len = data->len;
 664         if (copy_to_user(measure, &params, sizeof(params)))
 665                 ret = -EFAULT;
 666 e_free_blob:
 667         kfree(blob);
 668 e_free:
 669         kfree(data);
 670         return ret;
 671 }
 672
 673 static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
 674 {
 675         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 676         struct sev_data_launch_finish *data;
 677         int ret;
 678
 679         if (!sev_guest(kvm))
 680                 return -ENOTTY;
 681
 682         data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
 683         if (!data)
 684                 return -ENOMEM;
 685
 686         data->handle = sev->handle;
 687         ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
 688
 689         kfree(data);
 690         return ret;
 691 }
 692
 693 static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
 694 {
 695         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 696         struct kvm_sev_guest_status params;
 697         struct sev_data_guest_status *data;
 698         int ret;
 699
 700         if (!sev_guest(kvm))
 701                 return -ENOTTY;
 702
 703         data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
 704         if (!data)
 705                 return -ENOMEM;
 706
 707         data->handle = sev->handle;
 708         ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
 709         if (ret)
 710                 goto e_free;
 711
 712         params.policy = data->policy;
 713         params.state = data->state;
 714         params.handle = data->handle;
 715
 716         if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
 717                 ret = -EFAULT;
 718 e_free:
 719         kfree(data);
 720         return ret;
 721 }
 722
 723 static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
 724                                unsigned long dst, int size,
 725                                int *error, bool enc)
 726 {
 727         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 728         struct sev_data_dbg *data;
 729         int ret;
 730
 731         data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
 732         if (!data)
 733                 return -ENOMEM;
 734
 735         data->handle = sev->handle;
 736         data->dst_addr = dst;
 737         data->src_addr = src;
 738         data->len = size;
 739
 740         ret = sev_issue_cmd(kvm,
 741                             enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
 742                             data, error);
 743         kfree(data);
 744         return ret;
 745 }
 746
 747 static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
 748                              unsigned long dst_paddr, int sz, int *err)
 749 {
 750         int offset;
 751
 752         /*
 753          * Its safe to read more than we are asked, caller should ensure that
 754          * destination has enough space.
 755          */
 756         offset = src_paddr & 15;
 757         src_paddr = round_down(src_paddr, 16);
 758         sz = round_up(sz + offset, 16);
 759
 760         return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
 761 }
 762
 763 static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
 764                                   unsigned long __user dst_uaddr,
 765                                   unsigned long dst_paddr,
 766                                   int size, int *err)
 767 {
 768         struct page *tpage = NULL;
 769         int ret, offset;
 770
 771         /* if inputs are not 16-byte then use intermediate buffer */
 772         if (!IS_ALIGNED(dst_paddr, 16) ||
 773             !IS_ALIGNED(paddr,     16) ||
 774             !IS_ALIGNED(size,      16)) {
 775                 tpage = (void *)alloc_page(GFP_KERNEL);
 776                 if (!tpage)
 777                         return -ENOMEM;
 778
 779                 dst_paddr = __sme_page_pa(tpage);
 780         }
 781
 782         ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
 783         if (ret)
 784                 goto e_free;
 785
 786         if (tpage) {
 787                 offset = paddr & 15;
 788                 if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
 789                                  page_address(tpage) + offset, size))
 790                         ret = -EFAULT;
 791         }
 792
 793 e_free:
 794         if (tpage)
 795                 __free_page(tpage);
 796
 797         return ret;
 798 }
 799
 800 static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
 801                                   unsigned long __user vaddr,
 802                                   unsigned long dst_paddr,
 803                                   unsigned long __user dst_vaddr,
 804                                   int size, int *error)
 805 {
 806         struct page *src_tpage = NULL;
 807         struct page *dst_tpage = NULL;
 808         int ret, len = size;
 809
 810         /* If source buffer is not aligned then use an intermediate buffer */
 811         if (!IS_ALIGNED(vaddr, 16)) {
 812                 src_tpage = alloc_page(GFP_KERNEL);
 813                 if (!src_tpage)
 814                         return -ENOMEM;
 815
 816                 if (copy_from_user(page_address(src_tpage),
 817                                 (void __user *)(uintptr_t)vaddr, size)) {
 818                         __free_page(src_tpage);
 819                         return -EFAULT;
 820                 }
 821
 822                 paddr = __sme_page_pa(src_tpage);
 823         }
 824
 825         /*
 826          *  If destination buffer or length is not aligned then do read-modify-write:
 827          *   - decrypt destination in an intermediate buffer
 828          *   - copy the source buffer in an intermediate buffer
 829          *   - use the intermediate buffer as source buffer
 830          */
 831         if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
 832                 int dst_offset;
 833
 834                 dst_tpage = alloc_page(GFP_KERNEL);
 835                 if (!dst_tpage) {
 836                         ret = -ENOMEM;
 837                         goto e_free;
 838                 }
 839
 840                 ret = __sev_dbg_decrypt(kvm, dst_paddr,
 841                                         __sme_page_pa(dst_tpage), size, error);
 842                 if (ret)
 843                         goto e_free;
 844
 845                 /*
 846                  *  If source is kernel buffer then use memcpy() otherwise
 847                  *  copy_from_user().
 848                  */
 849                 dst_offset = dst_paddr & 15;
 850
 851                 if (src_tpage)
 852                         memcpy(page_address(dst_tpage) + dst_offset,
 853                                page_address(src_tpage), size);
 854                 else {
 855                         if (copy_from_user(page_address(dst_tpage) + dst_offset,
 856                                            (void __user *)(uintptr_t)vaddr, size)) {
 857                                 ret = -EFAULT;
 858                                 goto e_free;
 859                         }
 860                 }
 861
 862                 paddr = __sme_page_pa(dst_tpage);
 863                 dst_paddr = round_down(dst_paddr, 16);
 864                 len = round_up(size, 16);
 865         }
 866
 867         ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
 868
 869 e_free:
 870         if (src_tpage)
 871                 __free_page(src_tpage);
 872         if (dst_tpage)
 873                 __free_page(dst_tpage);
 874         return ret;
 875 }
 876
 877 static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
 878 {
 879         unsigned long vaddr, vaddr_end, next_vaddr;
 880         unsigned long dst_vaddr;
 881         struct page **src_p, **dst_p;
 882         struct kvm_sev_dbg debug;
 883         unsigned long n;
 884         unsigned int size;
 885         int ret;
 886
 887         if (!sev_guest(kvm))
 888                 return -ENOTTY;
 889
 890         if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
 891                 return -EFAULT;
 892
 893         if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr)
 894                 return -EINVAL;
 895         if (!debug.dst_uaddr)
 896                 return -EINVAL;
 897
 898         vaddr = debug.src_uaddr;
 899         size = debug.len;
 900         vaddr_end = vaddr + size;
 901         dst_vaddr = debug.dst_uaddr;
 902
 903         for (; vaddr < vaddr_end; vaddr = next_vaddr) {
 904                 int len, s_off, d_off;
 905
 906                 /* lock userspace source and destination page */
 907                 src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
 908                 if (IS_ERR(src_p))
 909                         return PTR_ERR(src_p);
 910
 911                 dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
 912                 if (IS_ERR(dst_p)) {
 913                         sev_unpin_memory(kvm, src_p, n);
 914                         return PTR_ERR(dst_p);
 915                 }
 916
 917                 /*
 918                  * Flush (on non-coherent CPUs) before DBG_{DE,EN}CRYPT read or modify
 919                  * the pages; flush the destination too so that future accesses do not
 920                  * see stale data.
 921                  */
 922                 sev_clflush_pages(src_p, 1);
 923                 sev_clflush_pages(dst_p, 1);
 924
 925                 /*
 926                  * Since user buffer may not be page aligned, calculate the
 927                  * offset within the page.
 928                  */
 929                 s_off = vaddr & ~PAGE_MASK;
 930                 d_off = dst_vaddr & ~PAGE_MASK;
 931                 len = min_t(size_t, (PAGE_SIZE - s_off), size);
 932
 933                 if (dec)
 934                         ret = __sev_dbg_decrypt_user(kvm,
 935                                                      __sme_page_pa(src_p[0]) + s_off,
 936                                                      dst_vaddr,
 937                                                      __sme_page_pa(dst_p[0]) + d_off,
 938                                                      len, &argp->error);
 939                 else
 940                         ret = __sev_dbg_encrypt_user(kvm,
 941                                                      __sme_page_pa(src_p[0]) + s_off,
 942                                                      vaddr,
 943                                                      __sme_page_pa(dst_p[0]) + d_off,
 944                                                      dst_vaddr,
 945                                                      len, &argp->error);
 946
 947                 sev_unpin_memory(kvm, src_p, n);
 948                 sev_unpin_memory(kvm, dst_p, n);
 949
 950                 if (ret)
 951                         goto err;
 952
 953                 next_vaddr = vaddr + len;
 954                 dst_vaddr = dst_vaddr + len;
 955                 size -= len;
 956         }
 957 err:
 958         return ret;
 959 }
 960
 961 static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
 962 {
 963         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 964         struct sev_data_launch_secret *data;
 965         struct kvm_sev_launch_secret params;
 966         struct page **pages;
 967         void *blob, *hdr;
 968         unsigned long n, i;
 969         int ret, offset;
 970
 971         if (!sev_guest(kvm))
 972                 return -ENOTTY;
 973
 974         if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
 975                 return -EFAULT;
 976
 977         pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
 978         if (IS_ERR(pages))
 979                 return PTR_ERR(pages);
 980
 981         /*
 982          * Flush (on non-coherent CPUs) before LAUNCH_SECRET encrypts pages in
 983          * place; the cache may contain the data that was written unencrypted.
 984          */
 985         sev_clflush_pages(pages, n);
 986
 987         /*
 988          * The secret must be copied into contiguous memory region, lets verify
 989          * that userspace memory pages are contiguous before we issue command.
 990          */
 991         if (get_num_contig_pages(0, pages, n) != n) {
 992                 ret = -EINVAL;
 993                 goto e_unpin_memory;
 994         }
 995
 996         ret = -ENOMEM;
 997         data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
 998         if (!data)
 999                 goto e_unpin_memory;
1000
1001         offset = params.guest_uaddr & (PAGE_SIZE - 1);
1002         data->guest_address = __sme_page_pa(pages[0]) + offset;
1003         data->guest_len = params.guest_len;
1004
1005         blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
1006         if (IS_ERR(blob)) {
1007                 ret = PTR_ERR(blob);
1008                 goto e_free;
1009         }
1010
1011         data->trans_address = __psp_pa(blob);
1012         data->trans_len = params.trans_len;
1013
1014         hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
1015         if (IS_ERR(hdr)) {
1016                 ret = PTR_ERR(hdr);
1017                 goto e_free_blob;
1018         }
1019         data->hdr_address = __psp_pa(hdr);
1020         data->hdr_len = params.hdr_len;
1021
1022         data->handle = sev->handle;
1023         ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
1024
1025         kfree(hdr);
1026
1027 e_free_blob:
1028         kfree(blob);
1029 e_free:
1030         kfree(data);
1031 e_unpin_memory:
1032         /* content of memory is updated, mark pages dirty */
1033         for (i = 0; i < n; i++) {
1034                 set_page_dirty_lock(pages[i]);
1035                 mark_page_accessed(pages[i]);
1036         }
1037         sev_unpin_memory(kvm, pages, n);
1038         return ret;
1039 }
1040
1041 static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp)
1042 {
1043         void __user *report = (void __user *)(uintptr_t)argp->data;
1044         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1045         struct sev_data_attestation_report *data;
1046         struct kvm_sev_attestation_report params;
1047         void __user *p;
1048         void *blob = NULL;
1049         int ret;
1050
1051         if (!sev_guest(kvm))
1052                 return -ENOTTY;
1053
1054         if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
1055                 return -EFAULT;
1056
1057         data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
1058         if (!data)
1059                 return -ENOMEM;
1060
1061         /* User wants to query the blob length */
1062         if (!params.len)
1063                 goto cmd;
1064
1065         p = (void __user *)(uintptr_t)params.uaddr;
1066         if (p) {
1067                 if (params.len > SEV_FW_BLOB_MAX_SIZE) {
1068                         ret = -EINVAL;
1069                         goto e_free;
1070                 }
1071
1072                 ret = -ENOMEM;
1073                 blob = kmalloc(params.len, GFP_KERNEL);
1074                 if (!blob)
1075                         goto e_free;
1076
1077                 data->address = __psp_pa(blob);
1078                 data->len = params.len;
1079                 memcpy(data->mnonce, params.mnonce, sizeof(params.mnonce));
1080         }
1081 cmd:
1082         data->handle = sev->handle;
1083         ret = sev_issue_cmd(kvm, SEV_CMD_ATTESTATION_REPORT, data, &argp->error);
1084         /*
1085          * If we query the session length, FW responded with expected data.
1086          */
1087         if (!params.len)
1088                 goto done;
1089
1090         if (ret)
1091                 goto e_free_blob;
1092
1093         if (blob) {
1094                 if (copy_to_user(p, blob, params.len))
1095                         ret = -EFAULT;
1096         }
1097
1098 done:
1099         params.len = data->len;
1100         if (copy_to_user(report, &params, sizeof(params)))
1101                 ret = -EFAULT;
1102 e_free_blob:
1103         kfree(blob);
1104 e_free:
1105         kfree(data);
1106         return ret;
1107 }
1108
1109 int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
1110 {
1111         struct kvm_sev_cmd sev_cmd;
1112         int r;
1113
1114         if (!svm_sev_enabled() || !sev)
1115                 return -ENOTTY;
1116
1117         if (!argp)
1118                 return 0;
1119
1120         if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
1121                 return -EFAULT;
1122
1123         mutex_lock(&kvm->lock);
1124
1125         switch (sev_cmd.id) {
1126         case KVM_SEV_ES_INIT:
1127                 if (!sev_es) {
1128                         r = -ENOTTY;
1129                         goto out;
1130                 }
1131                 fallthrough;
1132         case KVM_SEV_INIT:
1133                 r = sev_guest_init(kvm, &sev_cmd);
1134                 break;
1135         case KVM_SEV_LAUNCH_START:
1136                 r = sev_launch_start(kvm, &sev_cmd);
1137                 break;
1138         case KVM_SEV_LAUNCH_UPDATE_DATA:
1139                 r = sev_launch_update_data(kvm, &sev_cmd);
1140                 break;
1141         case KVM_SEV_LAUNCH_UPDATE_VMSA:
1142                 r = sev_launch_update_vmsa(kvm, &sev_cmd);
1143                 break;
1144         case KVM_SEV_LAUNCH_MEASURE:
1145                 r = sev_launch_measure(kvm, &sev_cmd);
1146                 break;
1147         case KVM_SEV_LAUNCH_FINISH:
1148                 r = sev_launch_finish(kvm, &sev_cmd);
1149                 break;
1150         case KVM_SEV_GUEST_STATUS:
1151                 r = sev_guest_status(kvm, &sev_cmd);
1152                 break;
1153         case KVM_SEV_DBG_DECRYPT:
1154                 r = sev_dbg_crypt(kvm, &sev_cmd, true);
1155                 break;
1156         case KVM_SEV_DBG_ENCRYPT:
1157                 r = sev_dbg_crypt(kvm, &sev_cmd, false);
1158                 break;
1159         case KVM_SEV_LAUNCH_SECRET:
1160                 r = sev_launch_secret(kvm, &sev_cmd);
1161                 break;
1162         case KVM_SEV_GET_ATTESTATION_REPORT:
1163                 r = sev_get_attestation_report(kvm, &sev_cmd);
1164                 break;
1165         default:
1166                 r = -EINVAL;
1167                 goto out;
1168         }
1169
1170         if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
1171                 r = -EFAULT;
1172
1173 out:
1174         mutex_unlock(&kvm->lock);
1175         return r;
1176 }
1177
1178 int svm_register_enc_region(struct kvm *kvm,
1179                             struct kvm_enc_region *range)
1180 {
1181         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1182         struct enc_region *region;
1183         int ret = 0;
1184
1185         if (!sev_guest(kvm))
1186                 return -ENOTTY;
1187
1188         if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
1189                 return -EINVAL;
1190
1191         region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT);
1192         if (!region)
1193                 return -ENOMEM;
1194
1195         mutex_lock(&kvm->lock);
1196         region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
1197         if (IS_ERR(region->pages)) {
1198                 ret = PTR_ERR(region->pages);
1199                 mutex_unlock(&kvm->lock);
1200                 goto e_free;
1201         }
1202
1203         region->uaddr = range->addr;
1204         region->size = range->size;
1205
1206         list_add_tail(&region->list, &sev->regions_list);
1207         mutex_unlock(&kvm->lock);
1208
1209         /*
1210          * The guest may change the memory encryption attribute from C=0 -> C=1
1211          * or vice versa for this memory range. Lets make sure caches are
1212          * flushed to ensure that guest data gets written into memory with
1213          * correct C-bit.
1214          */
1215         sev_clflush_pages(region->pages, region->npages);
1216
1217         return ret;
1218
1219 e_free:
1220         kfree(region);
1221         return ret;
1222 }
1223
1224 static struct enc_region *
1225 find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
1226 {
1227         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1228         struct list_head *head = &sev->regions_list;
1229         struct enc_region *i;
1230
1231         list_for_each_entry(i, head, list) {
1232                 if (i->uaddr == range->addr &&
1233                     i->size == range->size)
1234                         return i;
1235         }
1236
1237         return NULL;
1238 }
1239
1240 static void __unregister_enc_region_locked(struct kvm *kvm,
1241                                            struct enc_region *region)
1242 {
1243         sev_unpin_memory(kvm, region->pages, region->npages);
1244         list_del(&region->list);
1245         kfree(region);
1246 }
1247
1248 int svm_unregister_enc_region(struct kvm *kvm,
1249                               struct kvm_enc_region *range)
1250 {
1251         struct enc_region *region;
1252         int ret;
1253
1254         mutex_lock(&kvm->lock);
1255
1256         if (!sev_guest(kvm)) {
1257                 ret = -ENOTTY;
1258                 goto failed;
1259         }
1260
1261         region = find_enc_region(kvm, range);
1262         if (!region) {
1263                 ret = -EINVAL;
1264                 goto failed;
1265         }
1266
1267         /*
1268          * Ensure that all guest tagged cache entries are flushed before
1269          * releasing the pages back to the system for use. CLFLUSH will
1270          * not do this, so issue a WBINVD.
1271          */
1272         wbinvd_on_all_cpus();
1273
1274         __unregister_enc_region_locked(kvm, region);
1275
1276         mutex_unlock(&kvm->lock);
1277         return 0;
1278
1279 failed:
1280         mutex_unlock(&kvm->lock);
1281         return ret;
1282 }
1283
1284 void sev_vm_destroy(struct kvm *kvm)
1285 {
1286         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1287         struct list_head *head = &sev->regions_list;
1288         struct list_head *pos, *q;
1289
1290         if (!sev_guest(kvm))
1291                 return;
1292
1293         mutex_lock(&kvm->lock);
1294
1295         /*
1296          * Ensure that all guest tagged cache entries are flushed before
1297          * releasing the pages back to the system for use. CLFLUSH will
1298          * not do this, so issue a WBINVD.
1299          */
1300         wbinvd_on_all_cpus();
1301
1302         /*
1303          * if userspace was terminated before unregistering the memory regions
1304          * then lets unpin all the registered memory.
1305          */
1306         if (!list_empty(head)) {
1307                 list_for_each_safe(pos, q, head) {
1308                         __unregister_enc_region_locked(kvm,
1309                                 list_entry(pos, struct enc_region, list));
1310                         cond_resched();
1311                 }
1312         }
1313
1314         mutex_unlock(&kvm->lock);
1315
1316         sev_unbind_asid(kvm, sev->handle);
1317         sev_asid_free(sev->asid);
1318 }
1319
1320 void __init sev_hardware_setup(void)
1321 {
1322         unsigned int eax, ebx, ecx, edx;
1323         bool sev_es_supported = false;
1324         bool sev_supported = false;
1325
1326         /* Does the CPU support SEV? */
1327         if (!boot_cpu_has(X86_FEATURE_SEV))
1328                 goto out;
1329
1330         /* Retrieve SEV CPUID information */
1331         cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);
1332
1333         /* Set encryption bit location for SEV-ES guests */
1334         sev_enc_bit = ebx & 0x3f;
1335
1336         /* Maximum number of encrypted guests supported simultaneously */
1337         max_sev_asid = ecx;
1338
1339         if (!svm_sev_enabled())
1340                 goto out;
1341
1342         /* Minimum ASID value that should be used for SEV guest */
1343         min_sev_asid = edx;
1344
1345         /* Initialize SEV ASID bitmaps */
1346         sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
1347         if (!sev_asid_bitmap)
1348                 goto out;
1349
1350         sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
1351         if (!sev_reclaim_asid_bitmap)
1352                 goto out;
1353
1354         pr_info("SEV supported: %u ASIDs\n", max_sev_asid - min_sev_asid + 1);
1355         sev_supported = true;
1356
1357         /* SEV-ES support requested? */
1358         if (!sev_es)
1359                 goto out;
1360
1361         /* Does the CPU support SEV-ES? */
1362         if (!boot_cpu_has(X86_FEATURE_SEV_ES))
1363                 goto out;
1364
1365         /* Has the system been allocated ASIDs for SEV-ES? */
1366         if (min_sev_asid == 1)
1367                 goto out;
1368
1369         pr_info("SEV-ES supported: %u ASIDs\n", min_sev_asid - 1);
1370         sev_es_supported = true;
1371
1372 out:
1373         sev = sev_supported;
1374         sev_es = sev_es_supported;
1375 }
1376
1377 void sev_hardware_teardown(void)
1378 {
1379         if (!svm_sev_enabled())
1380                 return;
1381
1382         bitmap_free(sev_asid_bitmap);
1383         bitmap_free(sev_reclaim_asid_bitmap);
1384
1385         sev_flush_asids();
1386 }
1387
1388 /*
1389  * Pages used by hardware to hold guest encrypted state must be flushed before
1390  * returning them to the system.
1391  */
1392 static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va,
1393                                    unsigned long len)
1394 {
1395         /*
1396          * If hardware enforced cache coherency for encrypted mappings of the
1397          * same physical page is supported, nothing to do.
1398          */
1399         if (boot_cpu_has(X86_FEATURE_SME_COHERENT))
1400                 return;
1401
1402         /*
1403          * If the VM Page Flush MSR is supported, use it to flush the page
1404          * (using the page virtual address and the guest ASID).
1405          */
1406         if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) {
1407                 struct kvm_sev_info *sev;
1408                 unsigned long va_start;
1409                 u64 start, stop;
1410
1411                 /* Align start and stop to page boundaries. */
1412                 va_start = (unsigned long)va;
1413                 start = (u64)va_start & PAGE_MASK;
1414                 stop = PAGE_ALIGN((u64)va_start + len);
1415
1416                 if (start < stop) {
1417                         sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
1418
1419                         while (start < stop) {
1420                                 wrmsrl(MSR_AMD64_VM_PAGE_FLUSH,
1421                                        start | sev->asid);
1422
1423                                 start += PAGE_SIZE;
1424                         }
1425
1426                         return;
1427                 }
1428
1429                 WARN(1, "Address overflow, using WBINVD\n");
1430         }
1431
1432         /*
1433          * Hardware should always have one of the above features,
1434          * but if not, use WBINVD and issue a warning.
1435          */
1436         WARN_ONCE(1, "Using WBINVD to flush guest memory\n");
1437         wbinvd_on_all_cpus();
1438 }
1439
1440 void sev_free_vcpu(struct kvm_vcpu *vcpu)
1441 {
1442         struct vcpu_svm *svm;
1443
1444         if (!sev_es_guest(vcpu->kvm))
1445                 return;
1446
1447         svm = to_svm(vcpu);
1448
1449         if (vcpu->arch.guest_state_protected)
1450                 sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
1451         __free_page(virt_to_page(svm->vmsa));
1452
1453         if (svm->ghcb_sa_free)
1454                 kfree(svm->ghcb_sa);
1455 }
1456
1457 static void dump_ghcb(struct vcpu_svm *svm)
1458 {
1459         struct ghcb *ghcb = svm->ghcb;
1460         unsigned int nbits;
1461
1462         /* Re-use the dump_invalid_vmcb module parameter */
1463         if (!dump_invalid_vmcb) {
1464                 pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
1465                 return;
1466         }
1467
1468         nbits = sizeof(ghcb->save.valid_bitmap) * 8;
1469
1470         pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa);
1471         pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
1472                ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb));
1473         pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
1474                ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb));
1475         pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
1476                ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb));
1477         pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
1478                ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
1479         pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
1480 }
1481
1482 static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
1483 {
1484         struct kvm_vcpu *vcpu = &svm->vcpu;
1485         struct ghcb *ghcb = svm->ghcb;
1486
1487         /*
1488          * The GHCB protocol so far allows for the following data
1489          * to be returned:
1490          *   GPRs RAX, RBX, RCX, RDX
1491          *
1492          * Copy their values, even if they may not have been written during the
1493          * VM-Exit.  It's the guest's responsibility to not consume random data.
1494          */
1495         ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]);
1496         ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
1497         ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
1498         ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
1499 }
1500
1501 static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
1502 {
1503         struct vmcb_control_area *control = &svm->vmcb->control;
1504         struct kvm_vcpu *vcpu = &svm->vcpu;
1505         struct ghcb *ghcb = svm->ghcb;
1506         u64 exit_code;
1507
1508         /*
1509          * The GHCB protocol so far allows for the following data
1510          * to be supplied:
1511          *   GPRs RAX, RBX, RCX, RDX
1512          *   XCR0
1513          *   CPL
1514          *
1515          * VMMCALL allows the guest to provide extra registers. KVM also
1516          * expects RSI for hypercalls, so include that, too.
1517          *
1518          * Copy their values to the appropriate location if supplied.
1519          */
1520         memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
1521
1522         vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb);
1523         vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb);
1524         vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb);
1525         vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb);
1526         vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb);
1527
1528         svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb);
1529
1530         if (ghcb_xcr0_is_valid(ghcb)) {
1531                 vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
1532                 kvm_update_cpuid_runtime(vcpu);
1533         }
1534
1535         /* Copy the GHCB exit information into the VMCB fields */
1536         exit_code = ghcb_get_sw_exit_code(ghcb);
1537         control->exit_code = lower_32_bits(exit_code);
1538         control->exit_code_hi = upper_32_bits(exit_code);
1539         control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb);
1540         control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb);
1541
1542         /* Clear the valid entries fields */
1543         memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
1544 }
1545
1546 static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
1547 {
1548         struct kvm_vcpu *vcpu;
1549         struct ghcb *ghcb;
1550         u64 exit_code = 0;
1551
1552         ghcb = svm->ghcb;
1553
1554         /* Only GHCB Usage code 0 is supported */
1555         if (ghcb->ghcb_usage)
1556                 goto vmgexit_err;
1557
1558         /*
1559          * Retrieve the exit code now even though is may not be marked valid
1560          * as it could help with debugging.
1561          */
1562         exit_code = ghcb_get_sw_exit_code(ghcb);
1563
1564         if (!ghcb_sw_exit_code_is_valid(ghcb) ||
1565             !ghcb_sw_exit_info_1_is_valid(ghcb) ||
1566             !ghcb_sw_exit_info_2_is_valid(ghcb))
1567                 goto vmgexit_err;
1568
1569         switch (ghcb_get_sw_exit_code(ghcb)) {
1570         case SVM_EXIT_READ_DR7:
1571                 break;
1572         case SVM_EXIT_WRITE_DR7:
1573                 if (!ghcb_rax_is_valid(ghcb))
1574                         goto vmgexit_err;
1575                 break;
1576         case SVM_EXIT_RDTSC:
1577                 break;
1578         case SVM_EXIT_RDPMC:
1579                 if (!ghcb_rcx_is_valid(ghcb))
1580                         goto vmgexit_err;
1581                 break;
1582         case SVM_EXIT_CPUID:
1583                 if (!ghcb_rax_is_valid(ghcb) ||
1584                     !ghcb_rcx_is_valid(ghcb))
1585                         goto vmgexit_err;
1586                 if (ghcb_get_rax(ghcb) == 0xd)
1587                         if (!ghcb_xcr0_is_valid(ghcb))
1588                                 goto vmgexit_err;
1589                 break;
1590         case SVM_EXIT_INVD:
1591                 break;
1592         case SVM_EXIT_IOIO:
1593                 if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) {
1594                         if (!ghcb_sw_scratch_is_valid(ghcb))
1595                                 goto vmgexit_err;
1596                 } else {
1597                         if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
1598                                 if (!ghcb_rax_is_valid(ghcb))
1599                                         goto vmgexit_err;
1600                 }
1601                 break;
1602         case SVM_EXIT_MSR:
1603                 if (!ghcb_rcx_is_valid(ghcb))
1604                         goto vmgexit_err;
1605                 if (ghcb_get_sw_exit_info_1(ghcb)) {
1606                         if (!ghcb_rax_is_valid(ghcb) ||
1607                             !ghcb_rdx_is_valid(ghcb))
1608                                 goto vmgexit_err;
1609                 }
1610                 break;
1611         case SVM_EXIT_VMMCALL:
1612                 if (!ghcb_rax_is_valid(ghcb) ||
1613                     !ghcb_cpl_is_valid(ghcb))
1614                         goto vmgexit_err;
1615                 break;
1616         case SVM_EXIT_RDTSCP:
1617                 break;
1618         case SVM_EXIT_WBINVD:
1619                 break;
1620         case SVM_EXIT_MONITOR:
1621                 if (!ghcb_rax_is_valid(ghcb) ||
1622                     !ghcb_rcx_is_valid(ghcb) ||
1623                     !ghcb_rdx_is_valid(ghcb))
1624                         goto vmgexit_err;
1625                 break;
1626         case SVM_EXIT_MWAIT:
1627                 if (!ghcb_rax_is_valid(ghcb) ||
1628                     !ghcb_rcx_is_valid(ghcb))
1629                         goto vmgexit_err;
1630                 break;
1631         case SVM_VMGEXIT_MMIO_READ:
1632         case SVM_VMGEXIT_MMIO_WRITE:
1633                 if (!ghcb_sw_scratch_is_valid(ghcb))
1634                         goto vmgexit_err;
1635                 break;
1636         case SVM_VMGEXIT_NMI_COMPLETE:
1637         case SVM_VMGEXIT_AP_HLT_LOOP:
1638         case SVM_VMGEXIT_AP_JUMP_TABLE:
1639         case SVM_VMGEXIT_UNSUPPORTED_EVENT:
1640                 break;
1641         default:
1642                 goto vmgexit_err;
1643         }
1644
1645         return 0;
1646
1647 vmgexit_err:
1648         vcpu = &svm->vcpu;
1649
1650         if (ghcb->ghcb_usage) {
1651                 vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
1652                             ghcb->ghcb_usage);
1653         } else {
1654                 vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n",
1655                             exit_code);
1656                 dump_ghcb(svm);
1657         }
1658
1659         vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1660         vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
1661         vcpu->run->internal.ndata = 2;
1662         vcpu->run->internal.data[0] = exit_code;
1663         vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
1664
1665         return -EINVAL;
1666 }
1667
1668 static void pre_sev_es_run(struct vcpu_svm *svm)
1669 {
1670         if (!svm->ghcb)
1671                 return;
1672
1673         if (svm->ghcb_sa_free) {
1674                 /*
1675                  * The scratch area lives outside the GHCB, so there is a
1676                  * buffer that, depending on the operation performed, may
1677                  * need to be synced, then freed.
1678                  */
1679                 if (svm->ghcb_sa_sync) {
1680                         kvm_write_guest(svm->vcpu.kvm,
1681                                         ghcb_get_sw_scratch(svm->ghcb),
1682                                         svm->ghcb_sa, svm->ghcb_sa_len);
1683                         svm->ghcb_sa_sync = false;
1684                 }
1685
1686                 kfree(svm->ghcb_sa);
1687                 svm->ghcb_sa = NULL;
1688                 svm->ghcb_sa_free = false;
1689         }
1690
1691         trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb);
1692
1693         sev_es_sync_to_ghcb(svm);
1694
1695         kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true);
1696         svm->ghcb = NULL;
1697 }
1698
1699 void pre_sev_run(struct vcpu_svm *svm, int cpu)
1700 {
1701         struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
1702         int asid = sev_get_asid(svm->vcpu.kvm);
1703
1704         /* Perform any SEV-ES pre-run actions */
1705         pre_sev_es_run(svm);
1706
1707         /* Assign the asid allocated with this SEV guest */
1708         svm->asid = asid;
1709
1710         /*
1711          * Flush guest TLB:
1712          *
1713          * 1) when different VMCB for the same ASID is to be run on the same host CPU.
1714          * 2) or this VMCB was executed on different host CPU in previous VMRUNs.
1715          */
1716         if (sd->sev_vmcbs[asid] == svm->vmcb &&
1717             svm->vcpu.arch.last_vmentry_cpu == cpu)
1718                 return;
1719
1720         sd->sev_vmcbs[asid] = svm->vmcb;
1721         svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
1722         vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
1723 }
1724
1725 #define GHCB_SCRATCH_AREA_LIMIT         (16ULL * PAGE_SIZE)
1726 static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
1727 {
1728         struct vmcb_control_area *control = &svm->vmcb->control;
1729         struct ghcb *ghcb = svm->ghcb;
1730         u64 ghcb_scratch_beg, ghcb_scratch_end;
1731         u64 scratch_gpa_beg, scratch_gpa_end;
1732         void *scratch_va;
1733
1734         scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
1735         if (!scratch_gpa_beg) {
1736                 pr_err("vmgexit: scratch gpa not provided\n");
1737                 return false;
1738         }
1739
1740         scratch_gpa_end = scratch_gpa_beg + len;
1741         if (scratch_gpa_end < scratch_gpa_beg) {
1742                 pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n",
1743                        len, scratch_gpa_beg);
1744                 return false;
1745         }
1746
1747         if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
1748                 /* Scratch area begins within GHCB */
1749                 ghcb_scratch_beg = control->ghcb_gpa +
1750                                    offsetof(struct ghcb, shared_buffer);
1751                 ghcb_scratch_end = control->ghcb_gpa +
1752                                    offsetof(struct ghcb, reserved_1);
1753
1754                 /*
1755                  * If the scratch area begins within the GHCB, it must be
1756                  * completely contained in the GHCB shared buffer area.
1757                  */
1758                 if (scratch_gpa_beg < ghcb_scratch_beg ||
1759                     scratch_gpa_end > ghcb_scratch_end) {
1760                         pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n",
1761                                scratch_gpa_beg, scratch_gpa_end);
1762                         return false;
1763                 }
1764
1765                 scratch_va = (void *)svm->ghcb;
1766                 scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
1767         } else {
1768                 /*
1769                  * The guest memory must be read into a kernel buffer, so
1770                  * limit the size
1771                  */
1772                 if (len > GHCB_SCRATCH_AREA_LIMIT) {
1773                         pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n",
1774                                len, GHCB_SCRATCH_AREA_LIMIT);
1775                         return false;
1776                 }
1777                 scratch_va = kzalloc(len, GFP_KERNEL);
1778                 if (!scratch_va)
1779                         return false;
1780
1781                 if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) {
1782                         /* Unable to copy scratch area from guest */
1783                         pr_err("vmgexit: kvm_read_guest for scratch area failed\n");
1784
1785                         kfree(scratch_va);
1786                         return false;
1787                 }
1788
1789                 /*
1790                  * The scratch area is outside the GHCB. The operation will
1791                  * dictate whether the buffer needs to be synced before running
1792                  * the vCPU next time (i.e. a read was requested so the data
1793                  * must be written back to the guest memory).
1794                  */
1795                 svm->ghcb_sa_sync = sync;
1796                 svm->ghcb_sa_free = true;
1797         }
1798
1799         svm->ghcb_sa = scratch_va;
1800         svm->ghcb_sa_len = len;
1801
1802         return true;
1803 }
1804
1805 static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask,
1806                               unsigned int pos)
1807 {
1808         svm->vmcb->control.ghcb_gpa &= ~(mask << pos);
1809         svm->vmcb->control.ghcb_gpa |= (value & mask) << pos;
1810 }
1811
1812 static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos)
1813 {
1814         return (svm->vmcb->control.ghcb_gpa >> pos) & mask;
1815 }
1816
1817 static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
1818 {
1819         svm->vmcb->control.ghcb_gpa = value;
1820 }
1821
1822 static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
1823 {
1824         struct vmcb_control_area *control = &svm->vmcb->control;
1825         struct kvm_vcpu *vcpu = &svm->vcpu;
1826         u64 ghcb_info;
1827         int ret = 1;
1828
1829         ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK;
1830
1831         trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id,
1832                                              control->ghcb_gpa);
1833
1834         switch (ghcb_info) {
1835         case GHCB_MSR_SEV_INFO_REQ:
1836                 set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
1837                                                     GHCB_VERSION_MIN,
1838                                                     sev_enc_bit));
1839                 break;
1840         case GHCB_MSR_CPUID_REQ: {
1841                 u64 cpuid_fn, cpuid_reg, cpuid_value;
1842
1843                 cpuid_fn = get_ghcb_msr_bits(svm,
1844                                              GHCB_MSR_CPUID_FUNC_MASK,
1845                                              GHCB_MSR_CPUID_FUNC_POS);
1846
1847                 /* Initialize the registers needed by the CPUID intercept */
1848                 vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn;
1849                 vcpu->arch.regs[VCPU_REGS_RCX] = 0;
1850
1851                 ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_CPUID);
1852                 if (!ret) {
1853                         ret = -EINVAL;
1854                         break;
1855                 }
1856
1857                 cpuid_reg = get_ghcb_msr_bits(svm,
1858                                               GHCB_MSR_CPUID_REG_MASK,
1859                                               GHCB_MSR_CPUID_REG_POS);
1860                 if (cpuid_reg == 0)
1861                         cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX];
1862                 else if (cpuid_reg == 1)
1863                         cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX];
1864                 else if (cpuid_reg == 2)
1865                         cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX];
1866                 else
1867                         cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX];
1868
1869                 set_ghcb_msr_bits(svm, cpuid_value,
1870                                   GHCB_MSR_CPUID_VALUE_MASK,
1871                                   GHCB_MSR_CPUID_VALUE_POS);
1872
1873                 set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP,
1874                                   GHCB_MSR_INFO_MASK,
1875                                   GHCB_MSR_INFO_POS);
1876                 break;
1877         }
1878         case GHCB_MSR_TERM_REQ: {
1879                 u64 reason_set, reason_code;
1880
1881                 reason_set = get_ghcb_msr_bits(svm,
1882                                                GHCB_MSR_TERM_REASON_SET_MASK,
1883                                                GHCB_MSR_TERM_REASON_SET_POS);
1884                 reason_code = get_ghcb_msr_bits(svm,
1885                                                 GHCB_MSR_TERM_REASON_MASK,
1886                                                 GHCB_MSR_TERM_REASON_POS);
1887                 pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
1888                         reason_set, reason_code);
1889                 fallthrough;
1890         }
1891         default:
1892                 ret = -EINVAL;
1893         }
1894
1895         trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id,
1896                                             control->ghcb_gpa, ret);
1897
1898         return ret;
1899 }
1900
1901 int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
1902 {
1903         struct vcpu_svm *svm = to_svm(vcpu);
1904         struct vmcb_control_area *control = &svm->vmcb->control;
1905         u64 ghcb_gpa, exit_code;
1906         struct ghcb *ghcb;
1907         int ret;
1908
1909         /* Validate the GHCB */
1910         ghcb_gpa = control->ghcb_gpa;
1911         if (ghcb_gpa & GHCB_MSR_INFO_MASK)
1912                 return sev_handle_vmgexit_msr_protocol(svm);
1913
1914         if (!ghcb_gpa) {
1915                 vcpu_unimpl(vcpu, "vmgexit: GHCB gpa is not set\n");
1916                 return -EINVAL;
1917         }
1918
1919         if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
1920                 /* Unable to map GHCB from guest */
1921                 vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
1922                             ghcb_gpa);
1923                 return -EINVAL;
1924         }
1925
1926         svm->ghcb = svm->ghcb_map.hva;
1927         ghcb = svm->ghcb_map.hva;
1928
1929         trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
1930
1931         exit_code = ghcb_get_sw_exit_code(ghcb);
1932
1933         ret = sev_es_validate_vmgexit(svm);
1934         if (ret)
1935                 return ret;
1936
1937         sev_es_sync_from_ghcb(svm);
1938         ghcb_set_sw_exit_info_1(ghcb, 0);
1939         ghcb_set_sw_exit_info_2(ghcb, 0);
1940
1941         ret = -EINVAL;
1942         switch (exit_code) {
1943         case SVM_VMGEXIT_MMIO_READ:
1944                 if (!setup_vmgexit_scratch(svm, true, control->exit_info_2))
1945                         break;
1946
1947                 ret = kvm_sev_es_mmio_read(vcpu,
1948                                            control->exit_info_1,
1949                                            control->exit_info_2,
1950                                            svm->ghcb_sa);
1951                 break;
1952         case SVM_VMGEXIT_MMIO_WRITE:
1953                 if (!setup_vmgexit_scratch(svm, false, control->exit_info_2))
1954                         break;
1955
1956                 ret = kvm_sev_es_mmio_write(vcpu,
1957                                             control->exit_info_1,
1958                                             control->exit_info_2,
1959                                             svm->ghcb_sa);
1960                 break;
1961         case SVM_VMGEXIT_NMI_COMPLETE:
1962                 ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
1963                 break;
1964         case SVM_VMGEXIT_AP_HLT_LOOP:
1965                 ret = kvm_emulate_ap_reset_hold(vcpu);
1966                 break;
1967         case SVM_VMGEXIT_AP_JUMP_TABLE: {
1968                 struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
1969
1970                 switch (control->exit_info_1) {
1971                 case 0:
1972                         /* Set AP jump table address */
1973                         sev->ap_jump_table = control->exit_info_2;
1974                         break;
1975                 case 1:
1976                         /* Get AP jump table address */
1977                         ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table);
1978                         break;
1979                 default:
1980                         pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
1981                                control->exit_info_1);
1982                         ghcb_set_sw_exit_info_1(ghcb, 1);
1983                         ghcb_set_sw_exit_info_2(ghcb,
1984                                                 X86_TRAP_UD |
1985                                                 SVM_EVTINJ_TYPE_EXEPT |
1986                                                 SVM_EVTINJ_VALID);
1987                 }
1988
1989                 ret = 1;
1990                 break;
1991         }
1992         case SVM_VMGEXIT_UNSUPPORTED_EVENT:
1993                 vcpu_unimpl(vcpu,
1994                             "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
1995                             control->exit_info_1, control->exit_info_2);
1996                 break;
1997         default:
1998                 ret = svm_invoke_exit_handler(vcpu, exit_code);
1999         }
2000
2001         return ret;
2002 }
2003
2004 int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
2005 {
2006         if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2))
2007                 return -EINVAL;
2008
2009         return kvm_sev_es_string_io(&svm->vcpu, size, port,
2010                                     svm->ghcb_sa, svm->ghcb_sa_len, in);
2011 }
2012
2013 void sev_es_init_vmcb(struct vcpu_svm *svm)
2014 {
2015         struct kvm_vcpu *vcpu = &svm->vcpu;
2016
2017         svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
2018         svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
2019
2020         /*
2021          * An SEV-ES guest requires a VMSA area that is a separate from the
2022          * VMCB page. Do not include the encryption mask on the VMSA physical
2023          * address since hardware will access it using the guest key.
2024          */
2025         svm->vmcb->control.vmsa_pa = __pa(svm->vmsa);
2026
2027         /* Can't intercept CR register access, HV can't modify CR registers */
2028         svm_clr_intercept(svm, INTERCEPT_CR0_READ);
2029         svm_clr_intercept(svm, INTERCEPT_CR4_READ);
2030         svm_clr_intercept(svm, INTERCEPT_CR8_READ);
2031         svm_clr_intercept(svm, INTERCEPT_CR0_WRITE);
2032         svm_clr_intercept(svm, INTERCEPT_CR4_WRITE);
2033         svm_clr_intercept(svm, INTERCEPT_CR8_WRITE);
2034
2035         svm_clr_intercept(svm, INTERCEPT_SELECTIVE_CR0);
2036
2037         /* Track EFER/CR register changes */
2038         svm_set_intercept(svm, TRAP_EFER_WRITE);
2039         svm_set_intercept(svm, TRAP_CR0_WRITE);
2040         svm_set_intercept(svm, TRAP_CR4_WRITE);
2041         svm_set_intercept(svm, TRAP_CR8_WRITE);
2042
2043         /* No support for enable_vmware_backdoor */
2044         clr_exception_intercept(svm, GP_VECTOR);
2045
2046         /* Can't intercept XSETBV, HV can't modify XCR0 directly */
2047         svm_clr_intercept(svm, INTERCEPT_XSETBV);
2048
2049         /* Clear intercepts on selected MSRs */
2050         set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
2051         set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
2052         set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
2053         set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
2054         set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
2055         set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
2056 }
2057
2058 void sev_es_create_vcpu(struct vcpu_svm *svm)
2059 {
2060         /*
2061          * Set the GHCB MSR value as per the GHCB specification when creating
2062          * a vCPU for an SEV-ES guest.
2063          */
2064         set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
2065                                             GHCB_VERSION_MIN,
2066                                             sev_enc_bit));
2067 }
2068
2069 void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu)
2070 {
2071         struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
2072         struct vmcb_save_area *hostsa;
2073
2074         /*
2075          * As an SEV-ES guest, hardware will restore the host state on VMEXIT,
2076          * of which one step is to perform a VMLOAD. Since hardware does not
2077          * perform a VMSAVE on VMRUN, the host savearea must be updated.
2078          */
2079         vmsave(__sme_page_pa(sd->save_area));
2080
2081         /* XCR0 is restored on VMEXIT, save the current host value */
2082         hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400);
2083         hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
2084
2085         /* PKRU is restored on VMEXIT, save the curent host value */
2086         hostsa->pkru = read_pkru();
2087
2088         /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
2089         hostsa->xss = host_xss;
2090 }
2091
2092 void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
2093 {
2094         struct vcpu_svm *svm = to_svm(vcpu);
2095
2096         /* First SIPI: Use the values as initially set by the VMM */
2097         if (!svm->received_first_sipi) {
2098                 svm->received_first_sipi = true;
2099                 return;
2100         }
2101
2102         /*
2103          * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where
2104          * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
2105          * non-zero value.
2106          */
2107         ghcb_set_sw_exit_info_2(svm->ghcb, 1);
2108 }