]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - arch/x86/kvm/svm.c
Merge tag 'kvm-s390-next-4.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[mirror_ubuntu-hirsute-kernel.git] / arch / x86 / kvm / svm.c
CommitLineData
6aa8b732
AK
1/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * AMD SVM support
5 *
6 * Copyright (C) 2006 Qumranet, Inc.
9611c187 7 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
6aa8b732
AK
8 *
9 * Authors:
10 * Yaniv Kamay <yaniv@qumranet.com>
11 * Avi Kivity <avi@qumranet.com>
12 *
13 * This work is licensed under the terms of the GNU GPL, version 2. See
14 * the COPYING file in the top-level directory.
15 *
16 */
44a95dae
SS
17
18#define pr_fmt(fmt) "SVM: " fmt
19
edf88417
AK
20#include <linux/kvm_host.h>
21
85f455f7 22#include "irq.h"
1d737c8a 23#include "mmu.h"
5fdbf976 24#include "kvm_cache_regs.h"
fe4c7b19 25#include "x86.h"
66f7b72e 26#include "cpuid.h"
25462f7f 27#include "pmu.h"
e495606d 28
6aa8b732 29#include <linux/module.h>
ae759544 30#include <linux/mod_devicetable.h>
9d8f549d 31#include <linux/kernel.h>
6aa8b732
AK
32#include <linux/vmalloc.h>
33#include <linux/highmem.h>
e8edc6e0 34#include <linux/sched.h>
af658dca 35#include <linux/trace_events.h>
5a0e3ad6 36#include <linux/slab.h>
5881f737
SS
37#include <linux/amd-iommu.h>
38#include <linux/hashtable.h>
c207aee4 39#include <linux/frame.h>
e9df0942 40#include <linux/psp-sev.h>
1654efcb 41#include <linux/file.h>
89c50580
BS
42#include <linux/pagemap.h>
43#include <linux/swap.h>
6aa8b732 44
8221c137 45#include <asm/apic.h>
1018faa6 46#include <asm/perf_event.h>
67ec6607 47#include <asm/tlbflush.h>
e495606d 48#include <asm/desc.h>
facb0139 49#include <asm/debugreg.h>
631bc487 50#include <asm/kvm_para.h>
411b44ba 51#include <asm/irq_remapping.h>
ecb586bd 52#include <asm/microcode.h>
117cc7a9 53#include <asm/nospec-branch.h>
6aa8b732 54
63d1142f 55#include <asm/virtext.h>
229456fc 56#include "trace.h"
63d1142f 57
4ecac3fd
AK
58#define __ex(x) __kvm_handle_fault_on_reboot(x)
59
6aa8b732
AK
60MODULE_AUTHOR("Qumranet");
61MODULE_LICENSE("GPL");
62
ae759544
JT
63static const struct x86_cpu_id svm_cpu_id[] = {
64 X86_FEATURE_MATCH(X86_FEATURE_SVM),
65 {}
66};
67MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
68
6aa8b732
AK
69#define IOPM_ALLOC_ORDER 2
70#define MSRPM_ALLOC_ORDER 1
71
6aa8b732
AK
72#define SEG_TYPE_LDT 2
73#define SEG_TYPE_BUSY_TSS16 3
74
6bc31bdc
AP
75#define SVM_FEATURE_NPT (1 << 0)
76#define SVM_FEATURE_LBRV (1 << 1)
77#define SVM_FEATURE_SVML (1 << 2)
78#define SVM_FEATURE_NRIP (1 << 3)
ddce97aa
AP
79#define SVM_FEATURE_TSC_RATE (1 << 4)
80#define SVM_FEATURE_VMCB_CLEAN (1 << 5)
81#define SVM_FEATURE_FLUSH_ASID (1 << 6)
82#define SVM_FEATURE_DECODE_ASSIST (1 << 7)
6bc31bdc 83#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
80b7706e 84
340d3bc3
SS
85#define SVM_AVIC_DOORBELL 0xc001011b
86
410e4d57
JR
87#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
88#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
89#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */
90
24e09cbf
JR
91#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
92
fbc0db76 93#define TSC_RATIO_RSVD 0xffffff0000000000ULL
92a1f12d
JR
94#define TSC_RATIO_MIN 0x0000000000000001ULL
95#define TSC_RATIO_MAX 0x000000ffffffffffULL
fbc0db76 96
5446a979 97#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
44a95dae
SS
98
99/*
100 * 0xff is broadcast, so the max index allowed for physical APIC ID
101 * table is 0xfe. APIC IDs above 0xff are reserved.
102 */
103#define AVIC_MAX_PHYSICAL_ID_COUNT 255
104
18f40c53
SS
105#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
106#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
107#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
108
5ea11f2b
SS
109/* AVIC GATAG is encoded using VM and VCPU IDs */
110#define AVIC_VCPU_ID_BITS 8
111#define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1)
112
113#define AVIC_VM_ID_BITS 24
114#define AVIC_VM_ID_NR (1 << AVIC_VM_ID_BITS)
115#define AVIC_VM_ID_MASK ((1 << AVIC_VM_ID_BITS) - 1)
116
117#define AVIC_GATAG(x, y) (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
118 (y & AVIC_VCPU_ID_MASK))
119#define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
120#define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK)
121
67ec6607
JR
122static bool erratum_383_found __read_mostly;
123
6c8166a7
AK
124static const u32 host_save_user_msrs[] = {
125#ifdef CONFIG_X86_64
126 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
127 MSR_FS_BASE,
128#endif
129 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
46896c73 130 MSR_TSC_AUX,
6c8166a7
AK
131};
132
133#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
134
135struct kvm_vcpu;
136
e6aa9abd
JR
137struct nested_state {
138 struct vmcb *hsave;
139 u64 hsave_msr;
4a810181 140 u64 vm_cr_msr;
e6aa9abd
JR
141 u64 vmcb;
142
143 /* These are the merged vectors */
144 u32 *msrpm;
145
146 /* gpa pointers to the real vectors */
147 u64 vmcb_msrpm;
ce2ac085 148 u64 vmcb_iopm;
aad42c64 149
cd3ff653
JR
150 /* A VMEXIT is required but not yet emulated */
151 bool exit_required;
152
aad42c64 153 /* cache for intercepts of the guest */
4ee546b4 154 u32 intercept_cr;
3aed041a 155 u32 intercept_dr;
aad42c64
JR
156 u32 intercept_exceptions;
157 u64 intercept;
158
5bd2edc3
JR
159 /* Nested Paging related state */
160 u64 nested_cr3;
e6aa9abd
JR
161};
162
323c3d80
JR
163#define MSRPM_OFFSETS 16
164static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
165
2b036c6b
BO
166/*
167 * Set osvw_len to higher value when updated Revision Guides
168 * are published and we know what the new status bits are
169 */
170static uint64_t osvw_len = 4, osvw_status;
171
6c8166a7
AK
172struct vcpu_svm {
173 struct kvm_vcpu vcpu;
174 struct vmcb *vmcb;
175 unsigned long vmcb_pa;
176 struct svm_cpu_data *svm_data;
177 uint64_t asid_generation;
178 uint64_t sysenter_esp;
179 uint64_t sysenter_eip;
46896c73 180 uint64_t tsc_aux;
6c8166a7 181
d1d93fa9
TL
182 u64 msr_decfg;
183
6c8166a7
AK
184 u64 next_rip;
185
186 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
afe9e66f 187 struct {
dacccfdd
AK
188 u16 fs;
189 u16 gs;
190 u16 ldt;
afe9e66f
AK
191 u64 gs_base;
192 } host;
6c8166a7 193
b2ac58f9
KA
194 u64 spec_ctrl;
195
6c8166a7 196 u32 *msrpm;
6c8166a7 197
bd3d1ec3
AK
198 ulong nmi_iret_rip;
199
e6aa9abd 200 struct nested_state nested;
6be7d306
JK
201
202 bool nmi_singlestep;
ab2f4d73 203 u64 nmi_singlestep_guest_rflags;
66b7138f
JK
204
205 unsigned int3_injected;
206 unsigned long int3_rip;
fbc0db76 207
6092d3d3
JR
208 /* cached guest cpuid flags for faster access */
209 bool nrips_enabled : 1;
44a95dae 210
18f40c53 211 u32 ldr_reg;
44a95dae
SS
212 struct page *avic_backing_page;
213 u64 *avic_physical_id_cache;
8221c137 214 bool avic_is_running;
411b44ba
SS
215
216 /*
217 * Per-vcpu list of struct amd_svm_iommu_ir:
218 * This is used mainly to store interrupt remapping information used
219 * when update the vcpu affinity. This avoids the need to scan for
220 * IRTE and try to match ga_tag in the IOMMU driver.
221 */
222 struct list_head ir_list;
223 spinlock_t ir_list_lock;
70cd94e6
BS
224
225 /* which host CPU was used for running this vcpu */
226 unsigned int last_cpu;
411b44ba
SS
227};
228
229/*
230 * This is a wrapper of struct amd_iommu_ir_data.
231 */
232struct amd_svm_iommu_ir {
233 struct list_head node; /* Used by SVM for per-vcpu ir_list */
234 void *data; /* Storing pointer to struct amd_ir_data */
6c8166a7
AK
235};
236
44a95dae
SS
237#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
238#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
239
240#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
241#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
242#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
243#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
244
fbc0db76
JR
245static DEFINE_PER_CPU(u64, current_tsc_ratio);
246#define TSC_RATIO_DEFAULT 0x0100000000ULL
247
455716fa
JR
248#define MSR_INVALID 0xffffffffU
249
09941fbb 250static const struct svm_direct_access_msrs {
ac72a9b7
JR
251 u32 index; /* Index of the MSR */
252 bool always; /* True if intercept is always on */
253} direct_access_msrs[] = {
8c06585d 254 { .index = MSR_STAR, .always = true },
ac72a9b7
JR
255 { .index = MSR_IA32_SYSENTER_CS, .always = true },
256#ifdef CONFIG_X86_64
257 { .index = MSR_GS_BASE, .always = true },
258 { .index = MSR_FS_BASE, .always = true },
259 { .index = MSR_KERNEL_GS_BASE, .always = true },
260 { .index = MSR_LSTAR, .always = true },
261 { .index = MSR_CSTAR, .always = true },
262 { .index = MSR_SYSCALL_MASK, .always = true },
263#endif
b2ac58f9 264 { .index = MSR_IA32_SPEC_CTRL, .always = false },
15d45071 265 { .index = MSR_IA32_PRED_CMD, .always = false },
ac72a9b7
JR
266 { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
267 { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
268 { .index = MSR_IA32_LASTINTFROMIP, .always = false },
269 { .index = MSR_IA32_LASTINTTOIP, .always = false },
270 { .index = MSR_INVALID, .always = false },
6c8166a7
AK
271};
272
709ddebf
JR
273/* enable NPT for AMD64 and X86 with PAE */
274#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
275static bool npt_enabled = true;
276#else
e0231715 277static bool npt_enabled;
709ddebf 278#endif
6c7dac72 279
e2358851
DB
280/* allow nested paging (virtualized MMU) for all guests */
281static int npt = true;
6c7dac72 282module_param(npt, int, S_IRUGO);
e3da3acd 283
e2358851
DB
284/* allow nested virtualization in KVM/SVM */
285static int nested = true;
236de055
AG
286module_param(nested, int, S_IRUGO);
287
44a95dae
SS
288/* enable / disable AVIC */
289static int avic;
5b8abf1f 290#ifdef CONFIG_X86_LOCAL_APIC
44a95dae 291module_param(avic, int, S_IRUGO);
5b8abf1f 292#endif
44a95dae 293
89c8a498
JN
294/* enable/disable Virtual VMLOAD VMSAVE */
295static int vls = true;
296module_param(vls, int, 0444);
297
640bd6e5
JN
298/* enable/disable Virtual GIF */
299static int vgif = true;
300module_param(vgif, int, 0444);
5ea11f2b 301
e9df0942
BS
302/* enable/disable SEV support */
303static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
304module_param(sev, int, 0444);
305
7607b717
BS
306static u8 rsm_ins_bytes[] = "\x0f\xaa";
307
79a8059d 308static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
c2ba05cc 309static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
a5c3832d 310static void svm_complete_interrupts(struct vcpu_svm *svm);
04d2cc77 311
410e4d57 312static int nested_svm_exit_handled(struct vcpu_svm *svm);
b8e88bc8 313static int nested_svm_intercept(struct vcpu_svm *svm);
cf74a78b 314static int nested_svm_vmexit(struct vcpu_svm *svm);
cf74a78b
AG
315static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
316 bool has_error_code, u32 error_code);
317
8d28fec4 318enum {
116a0a23
JR
319 VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
320 pause filter count */
f56838e4 321 VMCB_PERM_MAP, /* IOPM Base and MSRPM Base */
d48086d1 322 VMCB_ASID, /* ASID */
decdbf6a 323 VMCB_INTR, /* int_ctl, int_vector */
b2747166 324 VMCB_NPT, /* npt_en, nCR3, gPAT */
dcca1a65 325 VMCB_CR, /* CR0, CR3, CR4, EFER */
72214b96 326 VMCB_DR, /* DR6, DR7 */
17a703cb 327 VMCB_DT, /* GDT, IDT */
060d0c9a 328 VMCB_SEG, /* CS, DS, SS, ES, CPL */
0574dec0 329 VMCB_CR2, /* CR2 only */
b53ba3f9 330 VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
44a95dae
SS
331 VMCB_AVIC, /* AVIC APIC_BAR, AVIC APIC_BACKING_PAGE,
332 * AVIC PHYSICAL_TABLE pointer,
333 * AVIC LOGICAL_TABLE pointer
334 */
8d28fec4
RJ
335 VMCB_DIRTY_MAX,
336};
337
0574dec0
JR
338/* TPR and CR2 are always written before VMRUN */
339#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
8d28fec4 340
44a95dae
SS
341#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
342
ed3cd233 343static unsigned int max_sev_asid;
1654efcb
BS
344static unsigned int min_sev_asid;
345static unsigned long *sev_asid_bitmap;
89c50580 346#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
1654efcb 347
1e80fdc0
BS
348struct enc_region {
349 struct list_head list;
350 unsigned long npages;
351 struct page **pages;
352 unsigned long uaddr;
353 unsigned long size;
354};
355
1654efcb
BS
356static inline bool svm_sev_enabled(void)
357{
358 return max_sev_asid;
359}
360
361static inline bool sev_guest(struct kvm *kvm)
362{
363 struct kvm_sev_info *sev = &kvm->arch.sev_info;
364
365 return sev->active;
366}
ed3cd233 367
70cd94e6
BS
368static inline int sev_get_asid(struct kvm *kvm)
369{
370 struct kvm_sev_info *sev = &kvm->arch.sev_info;
371
372 return sev->asid;
373}
374
8d28fec4
RJ
375static inline void mark_all_dirty(struct vmcb *vmcb)
376{
377 vmcb->control.clean = 0;
378}
379
380static inline void mark_all_clean(struct vmcb *vmcb)
381{
382 vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
383 & ~VMCB_ALWAYS_DIRTY_MASK;
384}
385
386static inline void mark_dirty(struct vmcb *vmcb, int bit)
387{
388 vmcb->control.clean &= ~(1 << bit);
389}
390
a2fa3e9f
GH
391static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
392{
fb3f0f51 393 return container_of(vcpu, struct vcpu_svm, vcpu);
a2fa3e9f
GH
394}
395
44a95dae
SS
396static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
397{
398 svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
399 mark_dirty(svm->vmcb, VMCB_AVIC);
400}
401
340d3bc3
SS
402static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
403{
404 struct vcpu_svm *svm = to_svm(vcpu);
405 u64 *entry = svm->avic_physical_id_cache;
406
407 if (!entry)
408 return false;
409
410 return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
411}
412
384c6368
JR
413static void recalc_intercepts(struct vcpu_svm *svm)
414{
415 struct vmcb_control_area *c, *h;
416 struct nested_state *g;
417
116a0a23
JR
418 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
419
384c6368
JR
420 if (!is_guest_mode(&svm->vcpu))
421 return;
422
423 c = &svm->vmcb->control;
424 h = &svm->nested.hsave->control;
425 g = &svm->nested;
426
4ee546b4 427 c->intercept_cr = h->intercept_cr | g->intercept_cr;
3aed041a 428 c->intercept_dr = h->intercept_dr | g->intercept_dr;
bd89525a 429 c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
384c6368
JR
430 c->intercept = h->intercept | g->intercept;
431}
432
4ee546b4
RJ
433static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
434{
435 if (is_guest_mode(&svm->vcpu))
436 return svm->nested.hsave;
437 else
438 return svm->vmcb;
439}
440
441static inline void set_cr_intercept(struct vcpu_svm *svm, int bit)
442{
443 struct vmcb *vmcb = get_host_vmcb(svm);
444
445 vmcb->control.intercept_cr |= (1U << bit);
446
447 recalc_intercepts(svm);
448}
449
450static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit)
451{
452 struct vmcb *vmcb = get_host_vmcb(svm);
453
454 vmcb->control.intercept_cr &= ~(1U << bit);
455
456 recalc_intercepts(svm);
457}
458
459static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
460{
461 struct vmcb *vmcb = get_host_vmcb(svm);
462
463 return vmcb->control.intercept_cr & (1U << bit);
464}
465
5315c716 466static inline void set_dr_intercepts(struct vcpu_svm *svm)
3aed041a
JR
467{
468 struct vmcb *vmcb = get_host_vmcb(svm);
469
5315c716
PB
470 vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
471 | (1 << INTERCEPT_DR1_READ)
472 | (1 << INTERCEPT_DR2_READ)
473 | (1 << INTERCEPT_DR3_READ)
474 | (1 << INTERCEPT_DR4_READ)
475 | (1 << INTERCEPT_DR5_READ)
476 | (1 << INTERCEPT_DR6_READ)
477 | (1 << INTERCEPT_DR7_READ)
478 | (1 << INTERCEPT_DR0_WRITE)
479 | (1 << INTERCEPT_DR1_WRITE)
480 | (1 << INTERCEPT_DR2_WRITE)
481 | (1 << INTERCEPT_DR3_WRITE)
482 | (1 << INTERCEPT_DR4_WRITE)
483 | (1 << INTERCEPT_DR5_WRITE)
484 | (1 << INTERCEPT_DR6_WRITE)
485 | (1 << INTERCEPT_DR7_WRITE);
3aed041a
JR
486
487 recalc_intercepts(svm);
488}
489
5315c716 490static inline void clr_dr_intercepts(struct vcpu_svm *svm)
3aed041a
JR
491{
492 struct vmcb *vmcb = get_host_vmcb(svm);
493
5315c716 494 vmcb->control.intercept_dr = 0;
3aed041a
JR
495
496 recalc_intercepts(svm);
497}
498
18c918c5
JR
499static inline void set_exception_intercept(struct vcpu_svm *svm, int bit)
500{
501 struct vmcb *vmcb = get_host_vmcb(svm);
502
503 vmcb->control.intercept_exceptions |= (1U << bit);
504
505 recalc_intercepts(svm);
506}
507
508static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
509{
510 struct vmcb *vmcb = get_host_vmcb(svm);
511
512 vmcb->control.intercept_exceptions &= ~(1U << bit);
513
514 recalc_intercepts(svm);
515}
516
8a05a1b8
JR
517static inline void set_intercept(struct vcpu_svm *svm, int bit)
518{
519 struct vmcb *vmcb = get_host_vmcb(svm);
520
521 vmcb->control.intercept |= (1ULL << bit);
522
523 recalc_intercepts(svm);
524}
525
526static inline void clr_intercept(struct vcpu_svm *svm, int bit)
527{
528 struct vmcb *vmcb = get_host_vmcb(svm);
529
530 vmcb->control.intercept &= ~(1ULL << bit);
531
532 recalc_intercepts(svm);
533}
534
640bd6e5
JN
535static inline bool vgif_enabled(struct vcpu_svm *svm)
536{
537 return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK);
538}
539
2af9194d
JR
540static inline void enable_gif(struct vcpu_svm *svm)
541{
640bd6e5
JN
542 if (vgif_enabled(svm))
543 svm->vmcb->control.int_ctl |= V_GIF_MASK;
544 else
545 svm->vcpu.arch.hflags |= HF_GIF_MASK;
2af9194d
JR
546}
547
548static inline void disable_gif(struct vcpu_svm *svm)
549{
640bd6e5
JN
550 if (vgif_enabled(svm))
551 svm->vmcb->control.int_ctl &= ~V_GIF_MASK;
552 else
553 svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
2af9194d
JR
554}
555
556static inline bool gif_set(struct vcpu_svm *svm)
557{
640bd6e5
JN
558 if (vgif_enabled(svm))
559 return !!(svm->vmcb->control.int_ctl & V_GIF_MASK);
560 else
561 return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
2af9194d
JR
562}
563
4866d5e3 564static unsigned long iopm_base;
6aa8b732
AK
565
566struct kvm_ldttss_desc {
567 u16 limit0;
568 u16 base0;
e0231715
JR
569 unsigned base1:8, type:5, dpl:2, p:1;
570 unsigned limit1:4, zero0:3, g:1, base2:8;
6aa8b732
AK
571 u32 base3;
572 u32 zero1;
573} __attribute__((packed));
574
575struct svm_cpu_data {
576 int cpu;
577
5008fdf5
AK
578 u64 asid_generation;
579 u32 max_asid;
580 u32 next_asid;
4faefff3 581 u32 min_asid;
6aa8b732
AK
582 struct kvm_ldttss_desc *tss_desc;
583
584 struct page *save_area;
15d45071 585 struct vmcb *current_vmcb;
70cd94e6
BS
586
587 /* index = sev_asid, value = vmcb pointer */
588 struct vmcb **sev_vmcbs;
6aa8b732
AK
589};
590
591static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
592
593struct svm_init_data {
594 int cpu;
595 int r;
596};
597
09941fbb 598static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
6aa8b732 599
9d8f549d 600#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
6aa8b732
AK
601#define MSRS_RANGE_SIZE 2048
602#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
603
455716fa
JR
604static u32 svm_msrpm_offset(u32 msr)
605{
606 u32 offset;
607 int i;
608
609 for (i = 0; i < NUM_MSR_MAPS; i++) {
610 if (msr < msrpm_ranges[i] ||
611 msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
612 continue;
613
614 offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */
615 offset += (i * MSRS_RANGE_SIZE); /* add range offset */
616
617 /* Now we have the u8 offset - but need the u32 offset */
618 return offset / 4;
619 }
620
621 /* MSR not in any range */
622 return MSR_INVALID;
623}
624
6aa8b732
AK
625#define MAX_INST_SIZE 15
626
6aa8b732
AK
627static inline void clgi(void)
628{
4ecac3fd 629 asm volatile (__ex(SVM_CLGI));
6aa8b732
AK
630}
631
632static inline void stgi(void)
633{
4ecac3fd 634 asm volatile (__ex(SVM_STGI));
6aa8b732
AK
635}
636
637static inline void invlpga(unsigned long addr, u32 asid)
638{
e0231715 639 asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
6aa8b732
AK
640}
641
855feb67 642static int get_npt_level(struct kvm_vcpu *vcpu)
4b16184c
JR
643{
644#ifdef CONFIG_X86_64
2a7266a8 645 return PT64_ROOT_4LEVEL;
4b16184c
JR
646#else
647 return PT32E_ROOT_LEVEL;
648#endif
649}
650
6aa8b732
AK
651static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
652{
6dc696d4 653 vcpu->arch.efer = efer;
709ddebf 654 if (!npt_enabled && !(efer & EFER_LMA))
2b5203ee 655 efer &= ~EFER_LME;
6aa8b732 656
9962d032 657 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
dcca1a65 658 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
6aa8b732
AK
659}
660
6aa8b732
AK
661static int is_external_interrupt(u32 info)
662{
663 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
664 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
665}
666
37ccdcbe 667static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
2809f5d2
GC
668{
669 struct vcpu_svm *svm = to_svm(vcpu);
670 u32 ret = 0;
671
672 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
37ccdcbe
PB
673 ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
674 return ret;
2809f5d2
GC
675}
676
677static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
678{
679 struct vcpu_svm *svm = to_svm(vcpu);
680
681 if (mask == 0)
682 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
683 else
684 svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
685
686}
687
6aa8b732
AK
688static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
689{
a2fa3e9f
GH
690 struct vcpu_svm *svm = to_svm(vcpu);
691
f104765b 692 if (svm->vmcb->control.next_rip != 0) {
d2922422 693 WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
6bc31bdc 694 svm->next_rip = svm->vmcb->control.next_rip;
f104765b 695 }
6bc31bdc 696
a2fa3e9f 697 if (!svm->next_rip) {
51d8b661 698 if (emulate_instruction(vcpu, EMULTYPE_SKIP) !=
f629cf84
GN
699 EMULATE_DONE)
700 printk(KERN_DEBUG "%s: NOP\n", __func__);
6aa8b732
AK
701 return;
702 }
5fdbf976
MT
703 if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
704 printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
705 __func__, kvm_rip_read(vcpu), svm->next_rip);
6aa8b732 706
5fdbf976 707 kvm_rip_write(vcpu, svm->next_rip);
2809f5d2 708 svm_set_interrupt_shadow(vcpu, 0);
6aa8b732
AK
709}
710
cfcd20e5 711static void svm_queue_exception(struct kvm_vcpu *vcpu)
116a4752
JK
712{
713 struct vcpu_svm *svm = to_svm(vcpu);
cfcd20e5
WL
714 unsigned nr = vcpu->arch.exception.nr;
715 bool has_error_code = vcpu->arch.exception.has_error_code;
664f8e26 716 bool reinject = vcpu->arch.exception.injected;
cfcd20e5 717 u32 error_code = vcpu->arch.exception.error_code;
116a4752 718
e0231715
JR
719 /*
720 * If we are within a nested VM we'd better #VMEXIT and let the guest
721 * handle the exception
722 */
ce7ddec4
JR
723 if (!reinject &&
724 nested_svm_check_exception(svm, nr, has_error_code, error_code))
116a4752
JK
725 return;
726
2a6b20b8 727 if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
66b7138f
JK
728 unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
729
730 /*
731 * For guest debugging where we have to reinject #BP if some
732 * INT3 is guest-owned:
733 * Emulate nRIP by moving RIP forward. Will fail if injection
734 * raises a fault that is not intercepted. Still better than
735 * failing in all cases.
736 */
737 skip_emulated_instruction(&svm->vcpu);
738 rip = kvm_rip_read(&svm->vcpu);
739 svm->int3_rip = rip + svm->vmcb->save.cs.base;
740 svm->int3_injected = rip - old_rip;
741 }
742
116a4752
JK
743 svm->vmcb->control.event_inj = nr
744 | SVM_EVTINJ_VALID
745 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
746 | SVM_EVTINJ_TYPE_EXEPT;
747 svm->vmcb->control.event_inj_err = error_code;
748}
749
67ec6607
JR
750static void svm_init_erratum_383(void)
751{
752 u32 low, high;
753 int err;
754 u64 val;
755
e6ee94d5 756 if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH))
67ec6607
JR
757 return;
758
759 /* Use _safe variants to not break nested virtualization */
760 val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
761 if (err)
762 return;
763
764 val |= (1ULL << 47);
765
766 low = lower_32_bits(val);
767 high = upper_32_bits(val);
768
769 native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
770
771 erratum_383_found = true;
772}
773
2b036c6b
BO
774static void svm_init_osvw(struct kvm_vcpu *vcpu)
775{
776 /*
777 * Guests should see errata 400 and 415 as fixed (assuming that
778 * HLT and IO instructions are intercepted).
779 */
780 vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
781 vcpu->arch.osvw.status = osvw_status & ~(6ULL);
782
783 /*
784 * By increasing VCPU's osvw.length to 3 we are telling the guest that
785 * all osvw.status bits inside that length, including bit 0 (which is
786 * reserved for erratum 298), are valid. However, if host processor's
787 * osvw_len is 0 then osvw_status[0] carries no information. We need to
788 * be conservative here and therefore we tell the guest that erratum 298
789 * is present (because we really don't know).
790 */
791 if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
792 vcpu->arch.osvw.status |= 1;
793}
794
6aa8b732
AK
795static int has_svm(void)
796{
63d1142f 797 const char *msg;
6aa8b732 798
63d1142f 799 if (!cpu_has_svm(&msg)) {
ff81ff10 800 printk(KERN_INFO "has_svm: %s\n", msg);
6aa8b732
AK
801 return 0;
802 }
803
6aa8b732
AK
804 return 1;
805}
806
13a34e06 807static void svm_hardware_disable(void)
6aa8b732 808{
fbc0db76
JR
809 /* Make sure we clean up behind us */
810 if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
811 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
812
2c8dceeb 813 cpu_svm_disable();
1018faa6
JR
814
815 amd_pmu_disable_virt();
6aa8b732
AK
816}
817
13a34e06 818static int svm_hardware_enable(void)
6aa8b732
AK
819{
820
0fe1e009 821 struct svm_cpu_data *sd;
6aa8b732 822 uint64_t efer;
6aa8b732
AK
823 struct desc_struct *gdt;
824 int me = raw_smp_processor_id();
825
10474ae8
AG
826 rdmsrl(MSR_EFER, efer);
827 if (efer & EFER_SVME)
828 return -EBUSY;
829
6aa8b732 830 if (!has_svm()) {
1f5b77f5 831 pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
10474ae8 832 return -EINVAL;
6aa8b732 833 }
0fe1e009 834 sd = per_cpu(svm_data, me);
0fe1e009 835 if (!sd) {
1f5b77f5 836 pr_err("%s: svm_data is NULL on %d\n", __func__, me);
10474ae8 837 return -EINVAL;
6aa8b732
AK
838 }
839
0fe1e009
TH
840 sd->asid_generation = 1;
841 sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
842 sd->next_asid = sd->max_asid + 1;
ed3cd233 843 sd->min_asid = max_sev_asid + 1;
6aa8b732 844
45fc8757 845 gdt = get_current_gdt_rw();
0fe1e009 846 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
6aa8b732 847
9962d032 848 wrmsrl(MSR_EFER, efer | EFER_SVME);
6aa8b732 849
d0316554 850 wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
10474ae8 851
fbc0db76
JR
852 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
853 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
89cbc767 854 __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT);
fbc0db76
JR
855 }
856
2b036c6b
BO
857
858 /*
859 * Get OSVW bits.
860 *
861 * Note that it is possible to have a system with mixed processor
862 * revisions and therefore different OSVW bits. If bits are not the same
863 * on different processors then choose the worst case (i.e. if erratum
864 * is present on one processor and not on another then assume that the
865 * erratum is present everywhere).
866 */
867 if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
868 uint64_t len, status = 0;
869 int err;
870
871 len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
872 if (!err)
873 status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
874 &err);
875
876 if (err)
877 osvw_status = osvw_len = 0;
878 else {
879 if (len < osvw_len)
880 osvw_len = len;
881 osvw_status |= status;
882 osvw_status &= (1ULL << osvw_len) - 1;
883 }
884 } else
885 osvw_status = osvw_len = 0;
886
67ec6607
JR
887 svm_init_erratum_383();
888
1018faa6
JR
889 amd_pmu_enable_virt();
890
10474ae8 891 return 0;
6aa8b732
AK
892}
893
0da1db75
JR
894static void svm_cpu_uninit(int cpu)
895{
0fe1e009 896 struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
0da1db75 897
0fe1e009 898 if (!sd)
0da1db75
JR
899 return;
900
901 per_cpu(svm_data, raw_smp_processor_id()) = NULL;
70cd94e6 902 kfree(sd->sev_vmcbs);
0fe1e009
TH
903 __free_page(sd->save_area);
904 kfree(sd);
0da1db75
JR
905}
906
6aa8b732
AK
907static int svm_cpu_init(int cpu)
908{
0fe1e009 909 struct svm_cpu_data *sd;
6aa8b732
AK
910 int r;
911
0fe1e009
TH
912 sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
913 if (!sd)
6aa8b732 914 return -ENOMEM;
0fe1e009 915 sd->cpu = cpu;
6aa8b732 916 r = -ENOMEM;
70cd94e6 917 sd->save_area = alloc_page(GFP_KERNEL);
0fe1e009 918 if (!sd->save_area)
6aa8b732
AK
919 goto err_1;
920
70cd94e6
BS
921 if (svm_sev_enabled()) {
922 r = -ENOMEM;
923 sd->sev_vmcbs = kmalloc((max_sev_asid + 1) * sizeof(void *), GFP_KERNEL);
924 if (!sd->sev_vmcbs)
925 goto err_1;
926 }
927
0fe1e009 928 per_cpu(svm_data, cpu) = sd;
6aa8b732
AK
929
930 return 0;
931
932err_1:
0fe1e009 933 kfree(sd);
6aa8b732
AK
934 return r;
935
936}
937
ac72a9b7
JR
938static bool valid_msr_intercept(u32 index)
939{
940 int i;
941
942 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
943 if (direct_access_msrs[i].index == index)
944 return true;
945
946 return false;
947}
948
b2ac58f9
KA
949static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
950{
951 u8 bit_write;
952 unsigned long tmp;
953 u32 offset;
954 u32 *msrpm;
955
956 msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
957 to_svm(vcpu)->msrpm;
958
959 offset = svm_msrpm_offset(msr);
960 bit_write = 2 * (msr & 0x0f) + 1;
961 tmp = msrpm[offset];
962
963 BUG_ON(offset == MSR_INVALID);
964
965 return !!test_bit(bit_write, &tmp);
966}
967
bfc733a7
RR
968static void set_msr_interception(u32 *msrpm, unsigned msr,
969 int read, int write)
6aa8b732 970{
455716fa
JR
971 u8 bit_read, bit_write;
972 unsigned long tmp;
973 u32 offset;
6aa8b732 974
ac72a9b7
JR
975 /*
976 * If this warning triggers extend the direct_access_msrs list at the
977 * beginning of the file
978 */
979 WARN_ON(!valid_msr_intercept(msr));
980
455716fa
JR
981 offset = svm_msrpm_offset(msr);
982 bit_read = 2 * (msr & 0x0f);
983 bit_write = 2 * (msr & 0x0f) + 1;
984 tmp = msrpm[offset];
985
986 BUG_ON(offset == MSR_INVALID);
987
988 read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp);
989 write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
990
991 msrpm[offset] = tmp;
6aa8b732
AK
992}
993
f65c229c 994static void svm_vcpu_init_msrpm(u32 *msrpm)
6aa8b732
AK
995{
996 int i;
997
f65c229c
JR
998 memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
999
ac72a9b7
JR
1000 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
1001 if (!direct_access_msrs[i].always)
1002 continue;
1003
1004 set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
1005 }
f65c229c
JR
1006}
1007
323c3d80
JR
1008static void add_msr_offset(u32 offset)
1009{
1010 int i;
1011
1012 for (i = 0; i < MSRPM_OFFSETS; ++i) {
1013
1014 /* Offset already in list? */
1015 if (msrpm_offsets[i] == offset)
bfc733a7 1016 return;
323c3d80
JR
1017
1018 /* Slot used by another offset? */
1019 if (msrpm_offsets[i] != MSR_INVALID)
1020 continue;
1021
1022 /* Add offset to list */
1023 msrpm_offsets[i] = offset;
1024
1025 return;
6aa8b732 1026 }
323c3d80
JR
1027
1028 /*
1029 * If this BUG triggers the msrpm_offsets table has an overflow. Just
1030 * increase MSRPM_OFFSETS in this case.
1031 */
bfc733a7 1032 BUG();
6aa8b732
AK
1033}
1034
323c3d80 1035static void init_msrpm_offsets(void)
f65c229c 1036{
323c3d80 1037 int i;
f65c229c 1038
323c3d80
JR
1039 memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
1040
1041 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
1042 u32 offset;
1043
1044 offset = svm_msrpm_offset(direct_access_msrs[i].index);
1045 BUG_ON(offset == MSR_INVALID);
1046
1047 add_msr_offset(offset);
1048 }
f65c229c
JR
1049}
1050
24e09cbf
JR
1051static void svm_enable_lbrv(struct vcpu_svm *svm)
1052{
1053 u32 *msrpm = svm->msrpm;
1054
0dc92119 1055 svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
24e09cbf
JR
1056 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
1057 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
1058 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
1059 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
1060}
1061
1062static void svm_disable_lbrv(struct vcpu_svm *svm)
1063{
1064 u32 *msrpm = svm->msrpm;
1065
0dc92119 1066 svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
24e09cbf
JR
1067 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
1068 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
1069 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
1070 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
1071}
1072
4aebd0e9
LP
1073static void disable_nmi_singlestep(struct vcpu_svm *svm)
1074{
1075 svm->nmi_singlestep = false;
640bd6e5 1076
ab2f4d73
LP
1077 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
1078 /* Clear our flags if they were not set by the guest */
1079 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
1080 svm->vmcb->save.rflags &= ~X86_EFLAGS_TF;
1081 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
1082 svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
1083 }
4aebd0e9
LP
1084}
1085
5881f737
SS
1086/* Note:
1087 * This hash table is used to map VM_ID to a struct kvm_arch,
1088 * when handling AMD IOMMU GALOG notification to schedule in
1089 * a particular vCPU.
1090 */
1091#define SVM_VM_DATA_HASH_BITS 8
681bcea8 1092static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
3f0d4db7
DV
1093static u32 next_vm_id = 0;
1094static bool next_vm_id_wrapped = 0;
681bcea8 1095static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
5881f737
SS
1096
1097/* Note:
1098 * This function is called from IOMMU driver to notify
1099 * SVM to schedule in a particular vCPU of a particular VM.
1100 */
1101static int avic_ga_log_notifier(u32 ga_tag)
1102{
1103 unsigned long flags;
1104 struct kvm_arch *ka = NULL;
1105 struct kvm_vcpu *vcpu = NULL;
1106 u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
1107 u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
1108
1109 pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
1110
1111 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
1112 hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
1113 struct kvm *kvm = container_of(ka, struct kvm, arch);
1114 struct kvm_arch *vm_data = &kvm->arch;
1115
1116 if (vm_data->avic_vm_id != vm_id)
1117 continue;
1118 vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
1119 break;
1120 }
1121 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
1122
5881f737
SS
1123 /* Note:
1124 * At this point, the IOMMU should have already set the pending
1125 * bit in the vAPIC backing page. So, we just need to schedule
1126 * in the vcpu.
1127 */
1cf53587 1128 if (vcpu)
5881f737
SS
1129 kvm_vcpu_wake_up(vcpu);
1130
1131 return 0;
1132}
1133
e9df0942
BS
1134static __init int sev_hardware_setup(void)
1135{
1136 struct sev_user_data_status *status;
1137 int rc;
1138
1139 /* Maximum number of encrypted guests supported simultaneously */
1140 max_sev_asid = cpuid_ecx(0x8000001F);
1141
1142 if (!max_sev_asid)
1143 return 1;
1144
1654efcb
BS
1145 /* Minimum ASID value that should be used for SEV guest */
1146 min_sev_asid = cpuid_edx(0x8000001F);
1147
1148 /* Initialize SEV ASID bitmap */
1149 sev_asid_bitmap = kcalloc(BITS_TO_LONGS(max_sev_asid),
1150 sizeof(unsigned long), GFP_KERNEL);
1151 if (!sev_asid_bitmap)
1152 return 1;
1153
e9df0942
BS
1154 status = kmalloc(sizeof(*status), GFP_KERNEL);
1155 if (!status)
1156 return 1;
1157
1158 /*
1159 * Check SEV platform status.
1160 *
1161 * PLATFORM_STATUS can be called in any state, if we failed to query
1162 * the PLATFORM status then either PSP firmware does not support SEV
1163 * feature or SEV firmware is dead.
1164 */
1165 rc = sev_platform_status(status, NULL);
1166 if (rc)
1167 goto err;
1168
1169 pr_info("SEV supported\n");
1170
1171err:
1172 kfree(status);
1173 return rc;
1174}
1175
6aa8b732
AK
1176static __init int svm_hardware_setup(void)
1177{
1178 int cpu;
1179 struct page *iopm_pages;
f65c229c 1180 void *iopm_va;
6aa8b732
AK
1181 int r;
1182
6aa8b732
AK
1183 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
1184
1185 if (!iopm_pages)
1186 return -ENOMEM;
c8681339
AL
1187
1188 iopm_va = page_address(iopm_pages);
1189 memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
6aa8b732
AK
1190 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
1191
323c3d80
JR
1192 init_msrpm_offsets();
1193
50a37eb4
JR
1194 if (boot_cpu_has(X86_FEATURE_NX))
1195 kvm_enable_efer_bits(EFER_NX);
1196
1b2fd70c
AG
1197 if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
1198 kvm_enable_efer_bits(EFER_FFXSR);
1199
92a1f12d 1200 if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
92a1f12d 1201 kvm_has_tsc_control = true;
bc9b961b
HZ
1202 kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
1203 kvm_tsc_scaling_ratio_frac_bits = 32;
92a1f12d
JR
1204 }
1205
236de055
AG
1206 if (nested) {
1207 printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
eec4b140 1208 kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
236de055
AG
1209 }
1210
e9df0942
BS
1211 if (sev) {
1212 if (boot_cpu_has(X86_FEATURE_SEV) &&
1213 IS_ENABLED(CONFIG_KVM_AMD_SEV)) {
1214 r = sev_hardware_setup();
1215 if (r)
1216 sev = false;
1217 } else {
1218 sev = false;
1219 }
1220 }
1221
3230bb47 1222 for_each_possible_cpu(cpu) {
6aa8b732
AK
1223 r = svm_cpu_init(cpu);
1224 if (r)
f65c229c 1225 goto err;
6aa8b732 1226 }
33bd6a0b 1227
2a6b20b8 1228 if (!boot_cpu_has(X86_FEATURE_NPT))
e3da3acd
JR
1229 npt_enabled = false;
1230
6c7dac72
JR
1231 if (npt_enabled && !npt) {
1232 printk(KERN_INFO "kvm: Nested Paging disabled\n");
1233 npt_enabled = false;
1234 }
1235
18552672 1236 if (npt_enabled) {
e3da3acd 1237 printk(KERN_INFO "kvm: Nested Paging enabled\n");
18552672 1238 kvm_enable_tdp();
5f4cb662
JR
1239 } else
1240 kvm_disable_tdp();
e3da3acd 1241
5b8abf1f
SS
1242 if (avic) {
1243 if (!npt_enabled ||
1244 !boot_cpu_has(X86_FEATURE_AVIC) ||
5881f737 1245 !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) {
5b8abf1f 1246 avic = false;
5881f737 1247 } else {
5b8abf1f 1248 pr_info("AVIC enabled\n");
5881f737 1249
5881f737
SS
1250 amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
1251 }
5b8abf1f 1252 }
44a95dae 1253
89c8a498
JN
1254 if (vls) {
1255 if (!npt_enabled ||
5442c269 1256 !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
89c8a498
JN
1257 !IS_ENABLED(CONFIG_X86_64)) {
1258 vls = false;
1259 } else {
1260 pr_info("Virtual VMLOAD VMSAVE supported\n");
1261 }
1262 }
1263
640bd6e5
JN
1264 if (vgif) {
1265 if (!boot_cpu_has(X86_FEATURE_VGIF))
1266 vgif = false;
1267 else
1268 pr_info("Virtual GIF supported\n");
1269 }
1270
6aa8b732
AK
1271 return 0;
1272
f65c229c 1273err:
6aa8b732
AK
1274 __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
1275 iopm_base = 0;
1276 return r;
1277}
1278
1279static __exit void svm_hardware_unsetup(void)
1280{
0da1db75
JR
1281 int cpu;
1282
1654efcb
BS
1283 if (svm_sev_enabled())
1284 kfree(sev_asid_bitmap);
1285
3230bb47 1286 for_each_possible_cpu(cpu)
0da1db75
JR
1287 svm_cpu_uninit(cpu);
1288
6aa8b732 1289 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
f65c229c 1290 iopm_base = 0;
6aa8b732
AK
1291}
1292
1293static void init_seg(struct vmcb_seg *seg)
1294{
1295 seg->selector = 0;
1296 seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
e0231715 1297 SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
6aa8b732
AK
1298 seg->limit = 0xffff;
1299 seg->base = 0;
1300}
1301
1302static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
1303{
1304 seg->selector = 0;
1305 seg->attrib = SVM_SELECTOR_P_MASK | type;
1306 seg->limit = 0xffff;
1307 seg->base = 0;
1308}
1309
f4e1b3c8
ZA
1310static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1311{
1312 struct vcpu_svm *svm = to_svm(vcpu);
1313 u64 g_tsc_offset = 0;
1314
2030753d 1315 if (is_guest_mode(vcpu)) {
f4e1b3c8
ZA
1316 g_tsc_offset = svm->vmcb->control.tsc_offset -
1317 svm->nested.hsave->control.tsc_offset;
1318 svm->nested.hsave->control.tsc_offset = offset;
489223ed
YY
1319 } else
1320 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
1321 svm->vmcb->control.tsc_offset,
1322 offset);
f4e1b3c8
ZA
1323
1324 svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
116a0a23
JR
1325
1326 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
f4e1b3c8
ZA
1327}
1328
44a95dae
SS
1329static void avic_init_vmcb(struct vcpu_svm *svm)
1330{
1331 struct vmcb *vmcb = svm->vmcb;
1332 struct kvm_arch *vm_data = &svm->vcpu.kvm->arch;
d0ec49d4
TL
1333 phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
1334 phys_addr_t lpa = __sme_set(page_to_phys(vm_data->avic_logical_id_table_page));
1335 phys_addr_t ppa = __sme_set(page_to_phys(vm_data->avic_physical_id_table_page));
44a95dae
SS
1336
1337 vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
1338 vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
1339 vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
1340 vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
1341 vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
44a95dae
SS
1342}
1343
5690891b 1344static void init_vmcb(struct vcpu_svm *svm)
6aa8b732 1345{
e6101a96
JR
1346 struct vmcb_control_area *control = &svm->vmcb->control;
1347 struct vmcb_save_area *save = &svm->vmcb->save;
6aa8b732 1348
4ee546b4 1349 svm->vcpu.arch.hflags = 0;
bff78274 1350
4ee546b4
RJ
1351 set_cr_intercept(svm, INTERCEPT_CR0_READ);
1352 set_cr_intercept(svm, INTERCEPT_CR3_READ);
1353 set_cr_intercept(svm, INTERCEPT_CR4_READ);
1354 set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1355 set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1356 set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
3bbf3565
SS
1357 if (!kvm_vcpu_apicv_active(&svm->vcpu))
1358 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
6aa8b732 1359
5315c716 1360 set_dr_intercepts(svm);
6aa8b732 1361
18c918c5
JR
1362 set_exception_intercept(svm, PF_VECTOR);
1363 set_exception_intercept(svm, UD_VECTOR);
1364 set_exception_intercept(svm, MC_VECTOR);
54a20552 1365 set_exception_intercept(svm, AC_VECTOR);
cbdb967a 1366 set_exception_intercept(svm, DB_VECTOR);
9718420e
LA
1367 /*
1368 * Guest access to VMware backdoor ports could legitimately
1369 * trigger #GP because of TSS I/O permission bitmap.
1370 * We intercept those #GP and allow access to them anyway
1371 * as VMware does.
1372 */
1373 if (enable_vmware_backdoor)
1374 set_exception_intercept(svm, GP_VECTOR);
6aa8b732 1375
8a05a1b8
JR
1376 set_intercept(svm, INTERCEPT_INTR);
1377 set_intercept(svm, INTERCEPT_NMI);
1378 set_intercept(svm, INTERCEPT_SMI);
1379 set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
332b56e4 1380 set_intercept(svm, INTERCEPT_RDPMC);
8a05a1b8
JR
1381 set_intercept(svm, INTERCEPT_CPUID);
1382 set_intercept(svm, INTERCEPT_INVD);
1383 set_intercept(svm, INTERCEPT_HLT);
1384 set_intercept(svm, INTERCEPT_INVLPG);
1385 set_intercept(svm, INTERCEPT_INVLPGA);
1386 set_intercept(svm, INTERCEPT_IOIO_PROT);
1387 set_intercept(svm, INTERCEPT_MSR_PROT);
1388 set_intercept(svm, INTERCEPT_TASK_SWITCH);
1389 set_intercept(svm, INTERCEPT_SHUTDOWN);
1390 set_intercept(svm, INTERCEPT_VMRUN);
1391 set_intercept(svm, INTERCEPT_VMMCALL);
1392 set_intercept(svm, INTERCEPT_VMLOAD);
1393 set_intercept(svm, INTERCEPT_VMSAVE);
1394 set_intercept(svm, INTERCEPT_STGI);
1395 set_intercept(svm, INTERCEPT_CLGI);
1396 set_intercept(svm, INTERCEPT_SKINIT);
1397 set_intercept(svm, INTERCEPT_WBINVD);
81dd35d4 1398 set_intercept(svm, INTERCEPT_XSETBV);
7607b717 1399 set_intercept(svm, INTERCEPT_RSM);
6aa8b732 1400
668fffa3
MT
1401 if (!kvm_mwait_in_guest()) {
1402 set_intercept(svm, INTERCEPT_MONITOR);
1403 set_intercept(svm, INTERCEPT_MWAIT);
1404 }
1405
d0ec49d4
TL
1406 control->iopm_base_pa = __sme_set(iopm_base);
1407 control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
6aa8b732
AK
1408 control->int_ctl = V_INTR_MASKING_MASK;
1409
1410 init_seg(&save->es);
1411 init_seg(&save->ss);
1412 init_seg(&save->ds);
1413 init_seg(&save->fs);
1414 init_seg(&save->gs);
1415
1416 save->cs.selector = 0xf000;
04b66839 1417 save->cs.base = 0xffff0000;
6aa8b732
AK
1418 /* Executable/Readable Code Segment */
1419 save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
1420 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
1421 save->cs.limit = 0xffff;
6aa8b732
AK
1422
1423 save->gdtr.limit = 0xffff;
1424 save->idtr.limit = 0xffff;
1425
1426 init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
1427 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
1428
5690891b 1429 svm_set_efer(&svm->vcpu, 0);
d77c26fc 1430 save->dr6 = 0xffff0ff0;
f6e78475 1431 kvm_set_rflags(&svm->vcpu, 2);
6aa8b732 1432 save->rip = 0x0000fff0;
5fdbf976 1433 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
6aa8b732 1434
e0231715 1435 /*
18fa000a 1436 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
d28bc9dd 1437 * It also updates the guest-visible cr0 value.
6aa8b732 1438 */
79a8059d 1439 svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
ebae871a 1440 kvm_mmu_reset_context(&svm->vcpu);
18fa000a 1441
66aee91a 1442 save->cr4 = X86_CR4_PAE;
6aa8b732 1443 /* rdx = ?? */
709ddebf
JR
1444
1445 if (npt_enabled) {
1446 /* Setup VMCB for Nested Paging */
cea3a19b 1447 control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
8a05a1b8 1448 clr_intercept(svm, INTERCEPT_INVLPG);
18c918c5 1449 clr_exception_intercept(svm, PF_VECTOR);
4ee546b4
RJ
1450 clr_cr_intercept(svm, INTERCEPT_CR3_READ);
1451 clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
74545705 1452 save->g_pat = svm->vcpu.arch.pat;
709ddebf
JR
1453 save->cr3 = 0;
1454 save->cr4 = 0;
1455 }
f40f6a45 1456 svm->asid_generation = 0;
1371d904 1457
e6aa9abd 1458 svm->nested.vmcb = 0;
2af9194d
JR
1459 svm->vcpu.arch.hflags = 0;
1460
2a6b20b8 1461 if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
565d0998 1462 control->pause_filter_count = 3000;
8a05a1b8 1463 set_intercept(svm, INTERCEPT_PAUSE);
565d0998
ML
1464 }
1465
67034bb9 1466 if (kvm_vcpu_apicv_active(&svm->vcpu))
44a95dae
SS
1467 avic_init_vmcb(svm);
1468
89c8a498
JN
1469 /*
1470 * If hardware supports Virtual VMLOAD VMSAVE then enable it
1471 * in VMCB and clear intercepts to avoid #VMEXIT.
1472 */
1473 if (vls) {
1474 clr_intercept(svm, INTERCEPT_VMLOAD);
1475 clr_intercept(svm, INTERCEPT_VMSAVE);
1476 svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
1477 }
1478
640bd6e5
JN
1479 if (vgif) {
1480 clr_intercept(svm, INTERCEPT_STGI);
1481 clr_intercept(svm, INTERCEPT_CLGI);
1482 svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
1483 }
1484
35c6f649 1485 if (sev_guest(svm->vcpu.kvm)) {
1654efcb 1486 svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
35c6f649
BS
1487 clr_exception_intercept(svm, UD_VECTOR);
1488 }
1654efcb 1489
8d28fec4
RJ
1490 mark_all_dirty(svm->vmcb);
1491
2af9194d 1492 enable_gif(svm);
44a95dae
SS
1493
1494}
1495
d3e7dec0
DC
1496static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
1497 unsigned int index)
44a95dae
SS
1498{
1499 u64 *avic_physical_id_table;
1500 struct kvm_arch *vm_data = &vcpu->kvm->arch;
1501
1502 if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
1503 return NULL;
1504
1505 avic_physical_id_table = page_address(vm_data->avic_physical_id_table_page);
1506
1507 return &avic_physical_id_table[index];
1508}
1509
1510/**
1511 * Note:
1512 * AVIC hardware walks the nested page table to check permissions,
1513 * but does not use the SPA address specified in the leaf page
1514 * table entry since it uses address in the AVIC_BACKING_PAGE pointer
1515 * field of the VMCB. Therefore, we set up the
1516 * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
1517 */
1518static int avic_init_access_page(struct kvm_vcpu *vcpu)
1519{
1520 struct kvm *kvm = vcpu->kvm;
1521 int ret;
1522
1523 if (kvm->arch.apic_access_page_done)
1524 return 0;
1525
1526 ret = x86_set_memory_region(kvm,
1527 APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
1528 APIC_DEFAULT_PHYS_BASE,
1529 PAGE_SIZE);
1530 if (ret)
1531 return ret;
1532
1533 kvm->arch.apic_access_page_done = true;
1534 return 0;
1535}
1536
1537static int avic_init_backing_page(struct kvm_vcpu *vcpu)
1538{
1539 int ret;
1540 u64 *entry, new_entry;
1541 int id = vcpu->vcpu_id;
1542 struct vcpu_svm *svm = to_svm(vcpu);
1543
1544 ret = avic_init_access_page(vcpu);
1545 if (ret)
1546 return ret;
1547
1548 if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
1549 return -EINVAL;
1550
1551 if (!svm->vcpu.arch.apic->regs)
1552 return -EINVAL;
1553
1554 svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
1555
1556 /* Setting AVIC backing page address in the phy APIC ID table */
1557 entry = avic_get_physical_id_entry(vcpu, id);
1558 if (!entry)
1559 return -EINVAL;
1560
1561 new_entry = READ_ONCE(*entry);
d0ec49d4
TL
1562 new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
1563 AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
1564 AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
44a95dae
SS
1565 WRITE_ONCE(*entry, new_entry);
1566
1567 svm->avic_physical_id_cache = entry;
1568
1569 return 0;
1570}
1571
1654efcb
BS
1572static void __sev_asid_free(int asid)
1573{
70cd94e6
BS
1574 struct svm_cpu_data *sd;
1575 int cpu, pos;
1654efcb
BS
1576
1577 pos = asid - 1;
1578 clear_bit(pos, sev_asid_bitmap);
70cd94e6
BS
1579
1580 for_each_possible_cpu(cpu) {
1581 sd = per_cpu(svm_data, cpu);
1582 sd->sev_vmcbs[pos] = NULL;
1583 }
1654efcb
BS
1584}
1585
1586static void sev_asid_free(struct kvm *kvm)
1587{
1588 struct kvm_sev_info *sev = &kvm->arch.sev_info;
1589
1590 __sev_asid_free(sev->asid);
1591}
1592
59414c98
BS
1593static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
1594{
1595 struct sev_data_decommission *decommission;
1596 struct sev_data_deactivate *data;
1597
1598 if (!handle)
1599 return;
1600
1601 data = kzalloc(sizeof(*data), GFP_KERNEL);
1602 if (!data)
1603 return;
1604
1605 /* deactivate handle */
1606 data->handle = handle;
1607 sev_guest_deactivate(data, NULL);
1608
1609 wbinvd_on_all_cpus();
1610 sev_guest_df_flush(NULL);
1611 kfree(data);
1612
1613 decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
1614 if (!decommission)
1615 return;
1616
1617 /* decommission handle */
1618 decommission->handle = handle;
1619 sev_guest_decommission(decommission, NULL);
1620
1621 kfree(decommission);
1622}
1623
89c50580
BS
1624static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
1625 unsigned long ulen, unsigned long *n,
1626 int write)
1627{
1628 struct kvm_sev_info *sev = &kvm->arch.sev_info;
1629 unsigned long npages, npinned, size;
1630 unsigned long locked, lock_limit;
1631 struct page **pages;
1632 int first, last;
1633
1634 /* Calculate number of pages. */
1635 first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
1636 last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
1637 npages = (last - first + 1);
1638
1639 locked = sev->pages_locked + npages;
1640 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
1641 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
1642 pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
1643 return NULL;
1644 }
1645
1646 /* Avoid using vmalloc for smaller buffers. */
1647 size = npages * sizeof(struct page *);
1648 if (size > PAGE_SIZE)
1649 pages = vmalloc(size);
1650 else
1651 pages = kmalloc(size, GFP_KERNEL);
1652
1653 if (!pages)
1654 return NULL;
1655
1656 /* Pin the user virtual address. */
1657 npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
1658 if (npinned != npages) {
1659 pr_err("SEV: Failure locking %lu pages.\n", npages);
1660 goto err;
1661 }
1662
1663 *n = npages;
1664 sev->pages_locked = locked;
1665
1666 return pages;
1667
1668err:
1669 if (npinned > 0)
1670 release_pages(pages, npinned);
1671
1672 kvfree(pages);
1673 return NULL;
1674}
1675
1676static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
1677 unsigned long npages)
1678{
1679 struct kvm_sev_info *sev = &kvm->arch.sev_info;
1680
1681 release_pages(pages, npages);
1682 kvfree(pages);
1683 sev->pages_locked -= npages;
1684}
1685
1686static void sev_clflush_pages(struct page *pages[], unsigned long npages)
1687{
1688 uint8_t *page_virtual;
1689 unsigned long i;
1690
1691 if (npages == 0 || pages == NULL)
1692 return;
1693
1694 for (i = 0; i < npages; i++) {
1695 page_virtual = kmap_atomic(pages[i]);
1696 clflush_cache_range(page_virtual, PAGE_SIZE);
1697 kunmap_atomic(page_virtual);
1698 }
1699}
1700
1e80fdc0
BS
1701static void __unregister_enc_region_locked(struct kvm *kvm,
1702 struct enc_region *region)
1703{
1704 /*
1705 * The guest may change the memory encryption attribute from C=0 -> C=1
1706 * or vice versa for this memory range. Lets make sure caches are
1707 * flushed to ensure that guest data gets written into memory with
1708 * correct C-bit.
1709 */
1710 sev_clflush_pages(region->pages, region->npages);
1711
1712 sev_unpin_memory(kvm, region->pages, region->npages);
1713 list_del(&region->list);
1714 kfree(region);
1715}
1716
1654efcb
BS
1717static void sev_vm_destroy(struct kvm *kvm)
1718{
59414c98 1719 struct kvm_sev_info *sev = &kvm->arch.sev_info;
1e80fdc0
BS
1720 struct list_head *head = &sev->regions_list;
1721 struct list_head *pos, *q;
59414c98 1722
1654efcb
BS
1723 if (!sev_guest(kvm))
1724 return;
1725
1e80fdc0
BS
1726 mutex_lock(&kvm->lock);
1727
1728 /*
1729 * if userspace was terminated before unregistering the memory regions
1730 * then lets unpin all the registered memory.
1731 */
1732 if (!list_empty(head)) {
1733 list_for_each_safe(pos, q, head) {
1734 __unregister_enc_region_locked(kvm,
1735 list_entry(pos, struct enc_region, list));
1736 }
1737 }
1738
1739 mutex_unlock(&kvm->lock);
1740
59414c98 1741 sev_unbind_asid(kvm, sev->handle);
1654efcb
BS
1742 sev_asid_free(kvm);
1743}
1744
44a95dae
SS
1745static void avic_vm_destroy(struct kvm *kvm)
1746{
5881f737 1747 unsigned long flags;
44a95dae
SS
1748 struct kvm_arch *vm_data = &kvm->arch;
1749
3863dff0
DV
1750 if (!avic)
1751 return;
1752
44a95dae
SS
1753 if (vm_data->avic_logical_id_table_page)
1754 __free_page(vm_data->avic_logical_id_table_page);
1755 if (vm_data->avic_physical_id_table_page)
1756 __free_page(vm_data->avic_physical_id_table_page);
5881f737
SS
1757
1758 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
1759 hash_del(&vm_data->hnode);
1760 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
44a95dae
SS
1761}
1762
1654efcb
BS
1763static void svm_vm_destroy(struct kvm *kvm)
1764{
1765 avic_vm_destroy(kvm);
1766 sev_vm_destroy(kvm);
1767}
1768
44a95dae
SS
1769static int avic_vm_init(struct kvm *kvm)
1770{
5881f737 1771 unsigned long flags;
3f0d4db7 1772 int err = -ENOMEM;
44a95dae
SS
1773 struct kvm_arch *vm_data = &kvm->arch;
1774 struct page *p_page;
1775 struct page *l_page;
3f0d4db7
DV
1776 struct kvm_arch *ka;
1777 u32 vm_id;
44a95dae
SS
1778
1779 if (!avic)
1780 return 0;
1781
1782 /* Allocating physical APIC ID table (4KB) */
1783 p_page = alloc_page(GFP_KERNEL);
1784 if (!p_page)
1785 goto free_avic;
1786
1787 vm_data->avic_physical_id_table_page = p_page;
1788 clear_page(page_address(p_page));
1789
1790 /* Allocating logical APIC ID table (4KB) */
1791 l_page = alloc_page(GFP_KERNEL);
1792 if (!l_page)
1793 goto free_avic;
1794
1795 vm_data->avic_logical_id_table_page = l_page;
1796 clear_page(page_address(l_page));
1797
5881f737 1798 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
3f0d4db7
DV
1799 again:
1800 vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
1801 if (vm_id == 0) { /* id is 1-based, zero is not okay */
1802 next_vm_id_wrapped = 1;
1803 goto again;
1804 }
1805 /* Is it still in use? Only possible if wrapped at least once */
1806 if (next_vm_id_wrapped) {
1807 hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
1808 struct kvm *k2 = container_of(ka, struct kvm, arch);
1809 struct kvm_arch *vd2 = &k2->arch;
1810 if (vd2->avic_vm_id == vm_id)
1811 goto again;
1812 }
1813 }
1814 vm_data->avic_vm_id = vm_id;
5881f737
SS
1815 hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id);
1816 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
1817
44a95dae
SS
1818 return 0;
1819
1820free_avic:
1821 avic_vm_destroy(kvm);
1822 return err;
6aa8b732
AK
1823}
1824
411b44ba
SS
1825static inline int
1826avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
8221c137 1827{
411b44ba
SS
1828 int ret = 0;
1829 unsigned long flags;
1830 struct amd_svm_iommu_ir *ir;
8221c137
SS
1831 struct vcpu_svm *svm = to_svm(vcpu);
1832
411b44ba
SS
1833 if (!kvm_arch_has_assigned_device(vcpu->kvm))
1834 return 0;
8221c137 1835
411b44ba
SS
1836 /*
1837 * Here, we go through the per-vcpu ir_list to update all existing
1838 * interrupt remapping table entry targeting this vcpu.
1839 */
1840 spin_lock_irqsave(&svm->ir_list_lock, flags);
8221c137 1841
411b44ba
SS
1842 if (list_empty(&svm->ir_list))
1843 goto out;
8221c137 1844
411b44ba
SS
1845 list_for_each_entry(ir, &svm->ir_list, node) {
1846 ret = amd_iommu_update_ga(cpu, r, ir->data);
1847 if (ret)
1848 break;
1849 }
1850out:
1851 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
1852 return ret;
8221c137
SS
1853}
1854
1855static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1856{
1857 u64 entry;
1858 /* ID = 0xff (broadcast), ID > 0xff (reserved) */
7d669f50 1859 int h_physical_id = kvm_cpu_get_apicid(cpu);
8221c137
SS
1860 struct vcpu_svm *svm = to_svm(vcpu);
1861
1862 if (!kvm_vcpu_apicv_active(vcpu))
1863 return;
1864
1865 if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
1866 return;
1867
1868 entry = READ_ONCE(*(svm->avic_physical_id_cache));
1869 WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
1870
1871 entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
1872 entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
1873
1874 entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1875 if (svm->avic_is_running)
1876 entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1877
1878 WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
411b44ba
SS
1879 avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
1880 svm->avic_is_running);
8221c137
SS
1881}
1882
1883static void avic_vcpu_put(struct kvm_vcpu *vcpu)
1884{
1885 u64 entry;
1886 struct vcpu_svm *svm = to_svm(vcpu);
1887
1888 if (!kvm_vcpu_apicv_active(vcpu))
1889 return;
1890
1891 entry = READ_ONCE(*(svm->avic_physical_id_cache));
411b44ba
SS
1892 if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
1893 avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
1894
8221c137
SS
1895 entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1896 WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
6aa8b732
AK
1897}
1898
411b44ba
SS
1899/**
1900 * This function is called during VCPU halt/unhalt.
1901 */
1902static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
1903{
1904 struct vcpu_svm *svm = to_svm(vcpu);
1905
1906 svm->avic_is_running = is_run;
1907 if (is_run)
1908 avic_vcpu_load(vcpu, vcpu->cpu);
1909 else
1910 avic_vcpu_put(vcpu);
1911}
1912
d28bc9dd 1913static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
04d2cc77
AK
1914{
1915 struct vcpu_svm *svm = to_svm(vcpu);
66f7b72e
JS
1916 u32 dummy;
1917 u32 eax = 1;
04d2cc77 1918
518e7b94 1919 vcpu->arch.microcode_version = 0x01000065;
b2ac58f9
KA
1920 svm->spec_ctrl = 0;
1921
d28bc9dd
NA
1922 if (!init_event) {
1923 svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
1924 MSR_IA32_APICBASE_ENABLE;
1925 if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
1926 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
1927 }
5690891b 1928 init_vmcb(svm);
70433389 1929
e911eb3b 1930 kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true);
66f7b72e 1931 kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
44a95dae
SS
1932
1933 if (kvm_vcpu_apicv_active(vcpu) && !init_event)
1934 avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
04d2cc77
AK
1935}
1936
dfa20099
SS
1937static int avic_init_vcpu(struct vcpu_svm *svm)
1938{
1939 int ret;
1940
67034bb9 1941 if (!kvm_vcpu_apicv_active(&svm->vcpu))
dfa20099
SS
1942 return 0;
1943
1944 ret = avic_init_backing_page(&svm->vcpu);
1945 if (ret)
1946 return ret;
1947
1948 INIT_LIST_HEAD(&svm->ir_list);
1949 spin_lock_init(&svm->ir_list_lock);
1950
1951 return ret;
1952}
1953
fb3f0f51 1954static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
6aa8b732 1955{
a2fa3e9f 1956 struct vcpu_svm *svm;
6aa8b732 1957 struct page *page;
f65c229c 1958 struct page *msrpm_pages;
b286d5d8 1959 struct page *hsave_page;
3d6368ef 1960 struct page *nested_msrpm_pages;
fb3f0f51 1961 int err;
6aa8b732 1962
c16f862d 1963 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
fb3f0f51
RR
1964 if (!svm) {
1965 err = -ENOMEM;
1966 goto out;
1967 }
1968
1969 err = kvm_vcpu_init(&svm->vcpu, kvm, id);
1970 if (err)
1971 goto free_svm;
1972
b7af4043 1973 err = -ENOMEM;
6aa8b732 1974 page = alloc_page(GFP_KERNEL);
b7af4043 1975 if (!page)
fb3f0f51 1976 goto uninit;
6aa8b732 1977
f65c229c
JR
1978 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
1979 if (!msrpm_pages)
b7af4043 1980 goto free_page1;
3d6368ef
AG
1981
1982 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
1983 if (!nested_msrpm_pages)
b7af4043 1984 goto free_page2;
f65c229c 1985
b286d5d8
AG
1986 hsave_page = alloc_page(GFP_KERNEL);
1987 if (!hsave_page)
b7af4043
TY
1988 goto free_page3;
1989
dfa20099
SS
1990 err = avic_init_vcpu(svm);
1991 if (err)
1992 goto free_page4;
44a95dae 1993
8221c137
SS
1994 /* We initialize this flag to true to make sure that the is_running
1995 * bit would be set the first time the vcpu is loaded.
1996 */
1997 svm->avic_is_running = true;
1998
e6aa9abd 1999 svm->nested.hsave = page_address(hsave_page);
b286d5d8 2000
b7af4043
TY
2001 svm->msrpm = page_address(msrpm_pages);
2002 svm_vcpu_init_msrpm(svm->msrpm);
2003
e6aa9abd 2004 svm->nested.msrpm = page_address(nested_msrpm_pages);
323c3d80 2005 svm_vcpu_init_msrpm(svm->nested.msrpm);
3d6368ef 2006
a2fa3e9f
GH
2007 svm->vmcb = page_address(page);
2008 clear_page(svm->vmcb);
d0ec49d4 2009 svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT);
a2fa3e9f 2010 svm->asid_generation = 0;
5690891b 2011 init_vmcb(svm);
6aa8b732 2012
2b036c6b
BO
2013 svm_init_osvw(&svm->vcpu);
2014
fb3f0f51 2015 return &svm->vcpu;
36241b8c 2016
44a95dae
SS
2017free_page4:
2018 __free_page(hsave_page);
b7af4043
TY
2019free_page3:
2020 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
2021free_page2:
2022 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
2023free_page1:
2024 __free_page(page);
fb3f0f51
RR
2025uninit:
2026 kvm_vcpu_uninit(&svm->vcpu);
2027free_svm:
a4770347 2028 kmem_cache_free(kvm_vcpu_cache, svm);
fb3f0f51
RR
2029out:
2030 return ERR_PTR(err);
6aa8b732
AK
2031}
2032
2033static void svm_free_vcpu(struct kvm_vcpu *vcpu)
2034{
a2fa3e9f
GH
2035 struct vcpu_svm *svm = to_svm(vcpu);
2036
d0ec49d4 2037 __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
f65c229c 2038 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
e6aa9abd
JR
2039 __free_page(virt_to_page(svm->nested.hsave));
2040 __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
fb3f0f51 2041 kvm_vcpu_uninit(vcpu);
a4770347 2042 kmem_cache_free(kvm_vcpu_cache, svm);
15d45071
AR
2043 /*
2044 * The vmcb page can be recycled, causing a false negative in
2045 * svm_vcpu_load(). So do a full IBPB now.
2046 */
2047 indirect_branch_prediction_barrier();
6aa8b732
AK
2048}
2049
15ad7146 2050static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
6aa8b732 2051{
a2fa3e9f 2052 struct vcpu_svm *svm = to_svm(vcpu);
15d45071 2053 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
15ad7146 2054 int i;
0cc5064d 2055
0cc5064d 2056 if (unlikely(cpu != vcpu->cpu)) {
4b656b12 2057 svm->asid_generation = 0;
8d28fec4 2058 mark_all_dirty(svm->vmcb);
0cc5064d 2059 }
94dfbdb3 2060
82ca2d10
AK
2061#ifdef CONFIG_X86_64
2062 rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
2063#endif
dacccfdd
AK
2064 savesegment(fs, svm->host.fs);
2065 savesegment(gs, svm->host.gs);
2066 svm->host.ldt = kvm_read_ldt();
2067
94dfbdb3 2068 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
a2fa3e9f 2069 rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
fbc0db76 2070
ad721883
HZ
2071 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
2072 u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
2073 if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
2074 __this_cpu_write(current_tsc_ratio, tsc_ratio);
2075 wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
2076 }
fbc0db76 2077 }
46896c73
PB
2078 /* This assumes that the kernel never uses MSR_TSC_AUX */
2079 if (static_cpu_has(X86_FEATURE_RDTSCP))
2080 wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
8221c137 2081
15d45071
AR
2082 if (sd->current_vmcb != svm->vmcb) {
2083 sd->current_vmcb = svm->vmcb;
2084 indirect_branch_prediction_barrier();
2085 }
8221c137 2086 avic_vcpu_load(vcpu, cpu);
6aa8b732
AK
2087}
2088
2089static void svm_vcpu_put(struct kvm_vcpu *vcpu)
2090{
a2fa3e9f 2091 struct vcpu_svm *svm = to_svm(vcpu);
94dfbdb3
AL
2092 int i;
2093
8221c137
SS
2094 avic_vcpu_put(vcpu);
2095
e1beb1d3 2096 ++vcpu->stat.host_state_reload;
dacccfdd
AK
2097 kvm_load_ldt(svm->host.ldt);
2098#ifdef CONFIG_X86_64
2099 loadsegment(fs, svm->host.fs);
296f781a 2100 wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
893a5ab6 2101 load_gs_index(svm->host.gs);
dacccfdd 2102#else
831ca609 2103#ifdef CONFIG_X86_32_LAZY_GS
dacccfdd 2104 loadsegment(gs, svm->host.gs);
831ca609 2105#endif
dacccfdd 2106#endif
94dfbdb3 2107 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
a2fa3e9f 2108 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
6aa8b732
AK
2109}
2110
8221c137
SS
2111static void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
2112{
2113 avic_set_running(vcpu, false);
2114}
2115
2116static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
2117{
2118 avic_set_running(vcpu, true);
2119}
2120
6aa8b732
AK
2121static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
2122{
9b611747
LP
2123 struct vcpu_svm *svm = to_svm(vcpu);
2124 unsigned long rflags = svm->vmcb->save.rflags;
2125
2126 if (svm->nmi_singlestep) {
2127 /* Hide our flags if they were not set by the guest */
2128 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
2129 rflags &= ~X86_EFLAGS_TF;
2130 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
2131 rflags &= ~X86_EFLAGS_RF;
2132 }
2133 return rflags;
6aa8b732
AK
2134}
2135
2136static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
2137{
9b611747
LP
2138 if (to_svm(vcpu)->nmi_singlestep)
2139 rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
2140
ae9fedc7 2141 /*
bb3541f1 2142 * Any change of EFLAGS.VM is accompanied by a reload of SS
ae9fedc7
PB
2143 * (caused by either a task switch or an inter-privilege IRET),
2144 * so we do not need to update the CPL here.
2145 */
a2fa3e9f 2146 to_svm(vcpu)->vmcb->save.rflags = rflags;
6aa8b732
AK
2147}
2148
6de4f3ad
AK
2149static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
2150{
2151 switch (reg) {
2152 case VCPU_EXREG_PDPTR:
2153 BUG_ON(!npt_enabled);
9f8fe504 2154 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
6de4f3ad
AK
2155 break;
2156 default:
2157 BUG();
2158 }
2159}
2160
f0b85051
AG
2161static void svm_set_vintr(struct vcpu_svm *svm)
2162{
8a05a1b8 2163 set_intercept(svm, INTERCEPT_VINTR);
f0b85051
AG
2164}
2165
2166static void svm_clear_vintr(struct vcpu_svm *svm)
2167{
8a05a1b8 2168 clr_intercept(svm, INTERCEPT_VINTR);
f0b85051
AG
2169}
2170
6aa8b732
AK
2171static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
2172{
a2fa3e9f 2173 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
6aa8b732
AK
2174
2175 switch (seg) {
2176 case VCPU_SREG_CS: return &save->cs;
2177 case VCPU_SREG_DS: return &save->ds;
2178 case VCPU_SREG_ES: return &save->es;
2179 case VCPU_SREG_FS: return &save->fs;
2180 case VCPU_SREG_GS: return &save->gs;
2181 case VCPU_SREG_SS: return &save->ss;
2182 case VCPU_SREG_TR: return &save->tr;
2183 case VCPU_SREG_LDTR: return &save->ldtr;
2184 }
2185 BUG();
8b6d44c7 2186 return NULL;
6aa8b732
AK
2187}
2188
2189static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
2190{
2191 struct vmcb_seg *s = svm_seg(vcpu, seg);
2192
2193 return s->base;
2194}
2195
2196static void svm_get_segment(struct kvm_vcpu *vcpu,
2197 struct kvm_segment *var, int seg)
2198{
2199 struct vmcb_seg *s = svm_seg(vcpu, seg);
2200
2201 var->base = s->base;
2202 var->limit = s->limit;
2203 var->selector = s->selector;
2204 var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
2205 var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
2206 var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
2207 var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
2208 var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
2209 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
2210 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
80112c89
JM
2211
2212 /*
2213 * AMD CPUs circa 2014 track the G bit for all segments except CS.
2214 * However, the SVM spec states that the G bit is not observed by the
2215 * CPU, and some VMware virtual CPUs drop the G bit for all segments.
2216 * So let's synthesize a legal G bit for all segments, this helps
2217 * running KVM nested. It also helps cross-vendor migration, because
2218 * Intel's vmentry has a check on the 'G' bit.
2219 */
2220 var->g = s->limit > 0xfffff;
25022acc 2221
e0231715
JR
2222 /*
2223 * AMD's VMCB does not have an explicit unusable field, so emulate it
19bca6ab
AP
2224 * for cross vendor migration purposes by "not present"
2225 */
8eae9570 2226 var->unusable = !var->present;
19bca6ab 2227
1fbdc7a5 2228 switch (seg) {
1fbdc7a5
AP
2229 case VCPU_SREG_TR:
2230 /*
2231 * Work around a bug where the busy flag in the tr selector
2232 * isn't exposed
2233 */
c0d09828 2234 var->type |= 0x2;
1fbdc7a5
AP
2235 break;
2236 case VCPU_SREG_DS:
2237 case VCPU_SREG_ES:
2238 case VCPU_SREG_FS:
2239 case VCPU_SREG_GS:
2240 /*
2241 * The accessed bit must always be set in the segment
2242 * descriptor cache, although it can be cleared in the
2243 * descriptor, the cached bit always remains at 1. Since
2244 * Intel has a check on this, set it here to support
2245 * cross-vendor migration.
2246 */
2247 if (!var->unusable)
2248 var->type |= 0x1;
2249 break;
b586eb02 2250 case VCPU_SREG_SS:
e0231715
JR
2251 /*
2252 * On AMD CPUs sometimes the DB bit in the segment
b586eb02
AP
2253 * descriptor is left as 1, although the whole segment has
2254 * been made unusable. Clear it here to pass an Intel VMX
2255 * entry check when cross vendor migrating.
2256 */
2257 if (var->unusable)
2258 var->db = 0;
d9c1b543 2259 /* This is symmetric with svm_set_segment() */
33b458d2 2260 var->dpl = to_svm(vcpu)->vmcb->save.cpl;
b586eb02 2261 break;
1fbdc7a5 2262 }
6aa8b732
AK
2263}
2264
2e4d2653
IE
2265static int svm_get_cpl(struct kvm_vcpu *vcpu)
2266{
2267 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
2268
2269 return save->cpl;
2270}
2271
89a27f4d 2272static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
6aa8b732 2273{
a2fa3e9f
GH
2274 struct vcpu_svm *svm = to_svm(vcpu);
2275
89a27f4d
GN
2276 dt->size = svm->vmcb->save.idtr.limit;
2277 dt->address = svm->vmcb->save.idtr.base;
6aa8b732
AK
2278}
2279
89a27f4d 2280static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
6aa8b732 2281{
a2fa3e9f
GH
2282 struct vcpu_svm *svm = to_svm(vcpu);
2283
89a27f4d
GN
2284 svm->vmcb->save.idtr.limit = dt->size;
2285 svm->vmcb->save.idtr.base = dt->address ;
17a703cb 2286 mark_dirty(svm->vmcb, VMCB_DT);
6aa8b732
AK
2287}
2288
89a27f4d 2289static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
6aa8b732 2290{
a2fa3e9f
GH
2291 struct vcpu_svm *svm = to_svm(vcpu);
2292
89a27f4d
GN
2293 dt->size = svm->vmcb->save.gdtr.limit;
2294 dt->address = svm->vmcb->save.gdtr.base;
6aa8b732
AK
2295}
2296
89a27f4d 2297static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
6aa8b732 2298{
a2fa3e9f
GH
2299 struct vcpu_svm *svm = to_svm(vcpu);
2300
89a27f4d
GN
2301 svm->vmcb->save.gdtr.limit = dt->size;
2302 svm->vmcb->save.gdtr.base = dt->address ;
17a703cb 2303 mark_dirty(svm->vmcb, VMCB_DT);
6aa8b732
AK
2304}
2305
e8467fda
AK
2306static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
2307{
2308}
2309
aff48baa
AK
2310static void svm_decache_cr3(struct kvm_vcpu *vcpu)
2311{
2312}
2313
25c4c276 2314static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
399badf3
AK
2315{
2316}
2317
d225157b
AK
2318static void update_cr0_intercept(struct vcpu_svm *svm)
2319{
2320 ulong gcr0 = svm->vcpu.arch.cr0;
2321 u64 *hcr0 = &svm->vmcb->save.cr0;
2322
bd7e5b08
PB
2323 *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
2324 | (gcr0 & SVM_CR0_SELECTIVE_MASK);
d225157b 2325
dcca1a65 2326 mark_dirty(svm->vmcb, VMCB_CR);
d225157b 2327
bd7e5b08 2328 if (gcr0 == *hcr0) {
4ee546b4
RJ
2329 clr_cr_intercept(svm, INTERCEPT_CR0_READ);
2330 clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
d225157b 2331 } else {
4ee546b4
RJ
2332 set_cr_intercept(svm, INTERCEPT_CR0_READ);
2333 set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
d225157b
AK
2334 }
2335}
2336
6aa8b732
AK
2337static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
2338{
a2fa3e9f
GH
2339 struct vcpu_svm *svm = to_svm(vcpu);
2340
05b3e0c2 2341#ifdef CONFIG_X86_64
f6801dff 2342 if (vcpu->arch.efer & EFER_LME) {
707d92fa 2343 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
f6801dff 2344 vcpu->arch.efer |= EFER_LMA;
2b5203ee 2345 svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
6aa8b732
AK
2346 }
2347
d77c26fc 2348 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
f6801dff 2349 vcpu->arch.efer &= ~EFER_LMA;
2b5203ee 2350 svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
6aa8b732
AK
2351 }
2352 }
2353#endif
ad312c7c 2354 vcpu->arch.cr0 = cr0;
888f9f3e
AK
2355
2356 if (!npt_enabled)
2357 cr0 |= X86_CR0_PG | X86_CR0_WP;
02daab21 2358
bcf166a9
PB
2359 /*
2360 * re-enable caching here because the QEMU bios
2361 * does not do it - this results in some delay at
2362 * reboot
2363 */
2364 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
2365 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
a2fa3e9f 2366 svm->vmcb->save.cr0 = cr0;
dcca1a65 2367 mark_dirty(svm->vmcb, VMCB_CR);
d225157b 2368 update_cr0_intercept(svm);
6aa8b732
AK
2369}
2370
5e1746d6 2371static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
6aa8b732 2372{
1e02ce4c 2373 unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
e5eab0ce
JR
2374 unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
2375
5e1746d6
NHE
2376 if (cr4 & X86_CR4_VMXE)
2377 return 1;
2378
e5eab0ce 2379 if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
c2ba05cc 2380 svm_flush_tlb(vcpu, true);
6394b649 2381
ec077263
JR
2382 vcpu->arch.cr4 = cr4;
2383 if (!npt_enabled)
2384 cr4 |= X86_CR4_PAE;
6394b649 2385 cr4 |= host_cr4_mce;
ec077263 2386 to_svm(vcpu)->vmcb->save.cr4 = cr4;
dcca1a65 2387 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
5e1746d6 2388 return 0;
6aa8b732
AK
2389}
2390
2391static void svm_set_segment(struct kvm_vcpu *vcpu,
2392 struct kvm_segment *var, int seg)
2393{
a2fa3e9f 2394 struct vcpu_svm *svm = to_svm(vcpu);
6aa8b732
AK
2395 struct vmcb_seg *s = svm_seg(vcpu, seg);
2396
2397 s->base = var->base;
2398 s->limit = var->limit;
2399 s->selector = var->selector;
d9c1b543
RP
2400 s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
2401 s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
2402 s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
2403 s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT;
2404 s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
2405 s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
2406 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
2407 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
ae9fedc7
PB
2408
2409 /*
2410 * This is always accurate, except if SYSRET returned to a segment
2411 * with SS.DPL != 3. Intel does not have this quirk, and always
2412 * forces SS.DPL to 3 on sysret, so we ignore that case; fixing it
2413 * would entail passing the CPL to userspace and back.
2414 */
2415 if (seg == VCPU_SREG_SS)
d9c1b543
RP
2416 /* This is symmetric with svm_get_segment() */
2417 svm->vmcb->save.cpl = (var->dpl & 3);
6aa8b732 2418
060d0c9a 2419 mark_dirty(svm->vmcb, VMCB_SEG);
6aa8b732
AK
2420}
2421
cbdb967a 2422static void update_bp_intercept(struct kvm_vcpu *vcpu)
6aa8b732 2423{
d0bfb940
JK
2424 struct vcpu_svm *svm = to_svm(vcpu);
2425
18c918c5 2426 clr_exception_intercept(svm, BP_VECTOR);
44c11430 2427
d0bfb940 2428 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
d0bfb940 2429 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
18c918c5 2430 set_exception_intercept(svm, BP_VECTOR);
d0bfb940
JK
2431 } else
2432 vcpu->guest_debug = 0;
44c11430
GN
2433}
2434
0fe1e009 2435static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
6aa8b732 2436{
0fe1e009
TH
2437 if (sd->next_asid > sd->max_asid) {
2438 ++sd->asid_generation;
4faefff3 2439 sd->next_asid = sd->min_asid;
a2fa3e9f 2440 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
6aa8b732
AK
2441 }
2442
0fe1e009
TH
2443 svm->asid_generation = sd->asid_generation;
2444 svm->vmcb->control.asid = sd->next_asid++;
d48086d1
JR
2445
2446 mark_dirty(svm->vmcb, VMCB_ASID);
6aa8b732
AK
2447}
2448
73aaf249
JK
2449static u64 svm_get_dr6(struct kvm_vcpu *vcpu)
2450{
2451 return to_svm(vcpu)->vmcb->save.dr6;
2452}
2453
2454static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
2455{
2456 struct vcpu_svm *svm = to_svm(vcpu);
2457
2458 svm->vmcb->save.dr6 = value;
2459 mark_dirty(svm->vmcb, VMCB_DR);
2460}
2461
facb0139
PB
2462static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
2463{
2464 struct vcpu_svm *svm = to_svm(vcpu);
2465
2466 get_debugreg(vcpu->arch.db[0], 0);
2467 get_debugreg(vcpu->arch.db[1], 1);
2468 get_debugreg(vcpu->arch.db[2], 2);
2469 get_debugreg(vcpu->arch.db[3], 3);
2470 vcpu->arch.dr6 = svm_get_dr6(vcpu);
2471 vcpu->arch.dr7 = svm->vmcb->save.dr7;
2472
2473 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
2474 set_dr_intercepts(svm);
2475}
2476
020df079 2477static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
6aa8b732 2478{
42dbaa5a 2479 struct vcpu_svm *svm = to_svm(vcpu);
42dbaa5a 2480
020df079 2481 svm->vmcb->save.dr7 = value;
72214b96 2482 mark_dirty(svm->vmcb, VMCB_DR);
6aa8b732
AK
2483}
2484
851ba692 2485static int pf_interception(struct vcpu_svm *svm)
6aa8b732 2486{
0ede79e1 2487 u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
1261bfa3 2488 u64 error_code = svm->vmcb->control.exit_info_1;
6aa8b732 2489
1261bfa3 2490 return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
00b10fe1
BS
2491 static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
2492 svm->vmcb->control.insn_bytes : NULL,
d0006530
PB
2493 svm->vmcb->control.insn_len);
2494}
2495
2496static int npf_interception(struct vcpu_svm *svm)
2497{
0ede79e1 2498 u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
d0006530
PB
2499 u64 error_code = svm->vmcb->control.exit_info_1;
2500
2501 trace_kvm_page_fault(fault_address, error_code);
2502 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
00b10fe1
BS
2503 static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
2504 svm->vmcb->control.insn_bytes : NULL,
d0006530 2505 svm->vmcb->control.insn_len);
6aa8b732
AK
2506}
2507
851ba692 2508static int db_interception(struct vcpu_svm *svm)
d0bfb940 2509{
851ba692
AK
2510 struct kvm_run *kvm_run = svm->vcpu.run;
2511
d0bfb940 2512 if (!(svm->vcpu.guest_debug &
44c11430 2513 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
6be7d306 2514 !svm->nmi_singlestep) {
d0bfb940
JK
2515 kvm_queue_exception(&svm->vcpu, DB_VECTOR);
2516 return 1;
2517 }
44c11430 2518
6be7d306 2519 if (svm->nmi_singlestep) {
4aebd0e9 2520 disable_nmi_singlestep(svm);
44c11430
GN
2521 }
2522
2523 if (svm->vcpu.guest_debug &
e0231715 2524 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
44c11430
GN
2525 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2526 kvm_run->debug.arch.pc =
2527 svm->vmcb->save.cs.base + svm->vmcb->save.rip;
2528 kvm_run->debug.arch.exception = DB_VECTOR;
2529 return 0;
2530 }
2531
2532 return 1;
d0bfb940
JK
2533}
2534
851ba692 2535static int bp_interception(struct vcpu_svm *svm)
d0bfb940 2536{
851ba692
AK
2537 struct kvm_run *kvm_run = svm->vcpu.run;
2538
d0bfb940
JK
2539 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2540 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
2541 kvm_run->debug.arch.exception = BP_VECTOR;
2542 return 0;
2543}
2544
851ba692 2545static int ud_interception(struct vcpu_svm *svm)
7aa81cc0
AL
2546{
2547 int er;
2548
51d8b661 2549 er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
61cb57c9
LA
2550 if (er == EMULATE_USER_EXIT)
2551 return 0;
7aa81cc0 2552 if (er != EMULATE_DONE)
7ee5d940 2553 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
7aa81cc0
AL
2554 return 1;
2555}
2556
54a20552
EN
2557static int ac_interception(struct vcpu_svm *svm)
2558{
2559 kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0);
2560 return 1;
2561}
2562
9718420e
LA
2563static int gp_interception(struct vcpu_svm *svm)
2564{
2565 struct kvm_vcpu *vcpu = &svm->vcpu;
2566 u32 error_code = svm->vmcb->control.exit_info_1;
2567 int er;
2568
2569 WARN_ON_ONCE(!enable_vmware_backdoor);
2570
2571 er = emulate_instruction(vcpu,
2572 EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
2573 if (er == EMULATE_USER_EXIT)
2574 return 0;
2575 else if (er != EMULATE_DONE)
2576 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
2577 return 1;
2578}
2579
67ec6607
JR
2580static bool is_erratum_383(void)
2581{
2582 int err, i;
2583 u64 value;
2584
2585 if (!erratum_383_found)
2586 return false;
2587
2588 value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
2589 if (err)
2590 return false;
2591
2592 /* Bit 62 may or may not be set for this mce */
2593 value &= ~(1ULL << 62);
2594
2595 if (value != 0xb600000000010015ULL)
2596 return false;
2597
2598 /* Clear MCi_STATUS registers */
2599 for (i = 0; i < 6; ++i)
2600 native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
2601
2602 value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
2603 if (!err) {
2604 u32 low, high;
2605
2606 value &= ~(1ULL << 2);
2607 low = lower_32_bits(value);
2608 high = upper_32_bits(value);
2609
2610 native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
2611 }
2612
2613 /* Flush tlb to evict multi-match entries */
2614 __flush_tlb_all();
2615
2616 return true;
2617}
2618
fe5913e4 2619static void svm_handle_mce(struct vcpu_svm *svm)
53371b50 2620{
67ec6607
JR
2621 if (is_erratum_383()) {
2622 /*
2623 * Erratum 383 triggered. Guest state is corrupt so kill the
2624 * guest.
2625 */
2626 pr_err("KVM: Guest triggered AMD Erratum 383\n");
2627
a8eeb04a 2628 kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);
67ec6607
JR
2629
2630 return;
2631 }
2632
53371b50
JR
2633 /*
2634 * On an #MC intercept the MCE handler is not called automatically in
2635 * the host. So do it by hand here.
2636 */
2637 asm volatile (
2638 "int $0x12\n");
2639 /* not sure if we ever come back to this point */
2640
fe5913e4
JR
2641 return;
2642}
2643
2644static int mc_interception(struct vcpu_svm *svm)
2645{
53371b50
JR
2646 return 1;
2647}
2648
851ba692 2649static int shutdown_interception(struct vcpu_svm *svm)
46fe4ddd 2650{
851ba692
AK
2651 struct kvm_run *kvm_run = svm->vcpu.run;
2652
46fe4ddd
JR
2653 /*
2654 * VMCB is undefined after a SHUTDOWN intercept
2655 * so reinitialize it.
2656 */
a2fa3e9f 2657 clear_page(svm->vmcb);
5690891b 2658 init_vmcb(svm);
46fe4ddd
JR
2659
2660 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
2661 return 0;
2662}
2663
851ba692 2664static int io_interception(struct vcpu_svm *svm)
6aa8b732 2665{
cf8f70bf 2666 struct kvm_vcpu *vcpu = &svm->vcpu;
d77c26fc 2667 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
dca7f128 2668 int size, in, string;
039576c0 2669 unsigned port;
6aa8b732 2670
e756fc62 2671 ++svm->vcpu.stat.io_exits;
e70669ab 2672 string = (io_info & SVM_IOIO_STR_MASK) != 0;
039576c0 2673 in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
8370c3d0 2674 if (string)
51d8b661 2675 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
cf8f70bf 2676
039576c0
AK
2677 port = io_info >> 16;
2678 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
cf8f70bf 2679 svm->next_rip = svm->vmcb->control.exit_info_2;
cf8f70bf 2680
dca7f128 2681 return kvm_fast_pio(&svm->vcpu, size, port, in);
6aa8b732
AK
2682}
2683
851ba692 2684static int nmi_interception(struct vcpu_svm *svm)
c47f098d
JR
2685{
2686 return 1;
2687}
2688
851ba692 2689static int intr_interception(struct vcpu_svm *svm)
a0698055
JR
2690{
2691 ++svm->vcpu.stat.irq_exits;
2692 return 1;
2693}
2694
851ba692 2695static int nop_on_interception(struct vcpu_svm *svm)
6aa8b732
AK
2696{
2697 return 1;
2698}
2699
851ba692 2700static int halt_interception(struct vcpu_svm *svm)
6aa8b732 2701{
5fdbf976 2702 svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
e756fc62 2703 return kvm_emulate_halt(&svm->vcpu);
6aa8b732
AK
2704}
2705
851ba692 2706static int vmmcall_interception(struct vcpu_svm *svm)
02e235bc 2707{
5fdbf976 2708 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
0d9c055e 2709 return kvm_emulate_hypercall(&svm->vcpu);
02e235bc
AK
2710}
2711
5bd2edc3
JR
2712static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
2713{
2714 struct vcpu_svm *svm = to_svm(vcpu);
2715
2716 return svm->nested.nested_cr3;
2717}
2718
e4e517b4
AK
2719static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
2720{
2721 struct vcpu_svm *svm = to_svm(vcpu);
2722 u64 cr3 = svm->nested.nested_cr3;
2723 u64 pdpte;
2724 int ret;
2725
d0ec49d4 2726 ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte,
54bf36aa 2727 offset_in_page(cr3) + index * 8, 8);
e4e517b4
AK
2728 if (ret)
2729 return 0;
2730 return pdpte;
2731}
2732
5bd2edc3
JR
2733static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
2734 unsigned long root)
2735{
2736 struct vcpu_svm *svm = to_svm(vcpu);
2737
d0ec49d4 2738 svm->vmcb->control.nested_cr3 = __sme_set(root);
b2747166 2739 mark_dirty(svm->vmcb, VMCB_NPT);
c2ba05cc 2740 svm_flush_tlb(vcpu, true);
5bd2edc3
JR
2741}
2742
6389ee94
AK
2743static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
2744 struct x86_exception *fault)
5bd2edc3
JR
2745{
2746 struct vcpu_svm *svm = to_svm(vcpu);
2747
5e352519
PB
2748 if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) {
2749 /*
2750 * TODO: track the cause of the nested page fault, and
2751 * correctly fill in the high bits of exit_info_1.
2752 */
2753 svm->vmcb->control.exit_code = SVM_EXIT_NPF;
2754 svm->vmcb->control.exit_code_hi = 0;
2755 svm->vmcb->control.exit_info_1 = (1ULL << 32);
2756 svm->vmcb->control.exit_info_2 = fault->address;
2757 }
2758
2759 svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
2760 svm->vmcb->control.exit_info_1 |= fault->error_code;
2761
2762 /*
2763 * The present bit is always zero for page structure faults on real
2764 * hardware.
2765 */
2766 if (svm->vmcb->control.exit_info_1 & (2ULL << 32))
2767 svm->vmcb->control.exit_info_1 &= ~1;
5bd2edc3
JR
2768
2769 nested_svm_vmexit(svm);
2770}
2771
8a3c1a33 2772static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
4b16184c 2773{
ad896af0
PB
2774 WARN_ON(mmu_is_nested(vcpu));
2775 kvm_init_shadow_mmu(vcpu);
4b16184c
JR
2776 vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
2777 vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
e4e517b4 2778 vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr;
4b16184c 2779 vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
855feb67 2780 vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu);
c258b62b 2781 reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
4b16184c 2782 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
4b16184c
JR
2783}
2784
2785static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
2786{
2787 vcpu->arch.walk_mmu = &vcpu->arch.mmu;
2788}
2789
c0725420
AG
2790static int nested_svm_check_permissions(struct vcpu_svm *svm)
2791{
e9196ceb
DC
2792 if (!(svm->vcpu.arch.efer & EFER_SVME) ||
2793 !is_paging(&svm->vcpu)) {
c0725420
AG
2794 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2795 return 1;
2796 }
2797
2798 if (svm->vmcb->save.cpl) {
2799 kvm_inject_gp(&svm->vcpu, 0);
2800 return 1;
2801 }
2802
e9196ceb 2803 return 0;
c0725420
AG
2804}
2805
cf74a78b
AG
2806static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
2807 bool has_error_code, u32 error_code)
2808{
b8e88bc8
JR
2809 int vmexit;
2810
2030753d 2811 if (!is_guest_mode(&svm->vcpu))
0295ad7d 2812 return 0;
cf74a78b 2813
adfe20fb
WL
2814 vmexit = nested_svm_intercept(svm);
2815 if (vmexit != NESTED_EXIT_DONE)
2816 return 0;
2817
0295ad7d
JR
2818 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
2819 svm->vmcb->control.exit_code_hi = 0;
2820 svm->vmcb->control.exit_info_1 = error_code;
b96fb439
PB
2821
2822 /*
2823 * FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception.
2824 * The fix is to add the ancillary datum (CR2 or DR6) to structs
2825 * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be
2826 * written only when inject_pending_event runs (DR6 would written here
2827 * too). This should be conditional on a new capability---if the
2828 * capability is disabled, kvm_multiple_exception would write the
2829 * ancillary information to CR2 or DR6, for backwards ABI-compatibility.
2830 */
adfe20fb
WL
2831 if (svm->vcpu.arch.exception.nested_apf)
2832 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
2833 else
2834 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
b8e88bc8 2835
adfe20fb 2836 svm->nested.exit_required = true;
b8e88bc8 2837 return vmexit;
cf74a78b
AG
2838}
2839
8fe54654
JR
2840/* This function returns true if it is save to enable the irq window */
2841static inline bool nested_svm_intr(struct vcpu_svm *svm)
cf74a78b 2842{
2030753d 2843 if (!is_guest_mode(&svm->vcpu))
8fe54654 2844 return true;
cf74a78b 2845
26666957 2846 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
8fe54654 2847 return true;
cf74a78b 2848
26666957 2849 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
8fe54654 2850 return false;
cf74a78b 2851
a0a07cd2
GN
2852 /*
2853 * if vmexit was already requested (by intercepted exception
2854 * for instance) do not overwrite it with "external interrupt"
2855 * vmexit.
2856 */
2857 if (svm->nested.exit_required)
2858 return false;
2859
197717d5
JR
2860 svm->vmcb->control.exit_code = SVM_EXIT_INTR;
2861 svm->vmcb->control.exit_info_1 = 0;
2862 svm->vmcb->control.exit_info_2 = 0;
26666957 2863
cd3ff653
JR
2864 if (svm->nested.intercept & 1ULL) {
2865 /*
2866 * The #vmexit can't be emulated here directly because this
c5ec2e56 2867 * code path runs with irqs and preemption disabled. A
cd3ff653
JR
2868 * #vmexit emulation might sleep. Only signal request for
2869 * the #vmexit here.
2870 */
2871 svm->nested.exit_required = true;
236649de 2872 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
8fe54654 2873 return false;
cf74a78b
AG
2874 }
2875
8fe54654 2876 return true;
cf74a78b
AG
2877}
2878
887f500c
JR
2879/* This function returns true if it is save to enable the nmi window */
2880static inline bool nested_svm_nmi(struct vcpu_svm *svm)
2881{
2030753d 2882 if (!is_guest_mode(&svm->vcpu))
887f500c
JR
2883 return true;
2884
2885 if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
2886 return true;
2887
2888 svm->vmcb->control.exit_code = SVM_EXIT_NMI;
2889 svm->nested.exit_required = true;
2890
2891 return false;
cf74a78b
AG
2892}
2893
7597f129 2894static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
34f80cfa
JR
2895{
2896 struct page *page;
2897
6c3bd3d7
JR
2898 might_sleep();
2899
54bf36aa 2900 page = kvm_vcpu_gfn_to_page(&svm->vcpu, gpa >> PAGE_SHIFT);
34f80cfa
JR
2901 if (is_error_page(page))
2902 goto error;
2903
7597f129
JR
2904 *_page = page;
2905
2906 return kmap(page);
34f80cfa
JR
2907
2908error:
34f80cfa
JR
2909 kvm_inject_gp(&svm->vcpu, 0);
2910
2911 return NULL;
2912}
2913
7597f129 2914static void nested_svm_unmap(struct page *page)
34f80cfa 2915{
7597f129 2916 kunmap(page);
34f80cfa
JR
2917 kvm_release_page_dirty(page);
2918}
34f80cfa 2919
ce2ac085
JR
2920static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
2921{
9bf41833
JK
2922 unsigned port, size, iopm_len;
2923 u16 val, mask;
2924 u8 start_bit;
ce2ac085 2925 u64 gpa;
34f80cfa 2926
ce2ac085
JR
2927 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
2928 return NESTED_EXIT_HOST;
34f80cfa 2929
ce2ac085 2930 port = svm->vmcb->control.exit_info_1 >> 16;
9bf41833
JK
2931 size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
2932 SVM_IOIO_SIZE_SHIFT;
ce2ac085 2933 gpa = svm->nested.vmcb_iopm + (port / 8);
9bf41833
JK
2934 start_bit = port % 8;
2935 iopm_len = (start_bit + size > 8) ? 2 : 1;
2936 mask = (0xf >> (4 - size)) << start_bit;
2937 val = 0;
ce2ac085 2938
54bf36aa 2939 if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
9bf41833 2940 return NESTED_EXIT_DONE;
ce2ac085 2941
9bf41833 2942 return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
34f80cfa
JR
2943}
2944
d2477826 2945static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
4c2161ae 2946{
0d6b3537
JR
2947 u32 offset, msr, value;
2948 int write, mask;
4c2161ae 2949
3d62d9aa 2950 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
d2477826 2951 return NESTED_EXIT_HOST;
3d62d9aa 2952
0d6b3537
JR
2953 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2954 offset = svm_msrpm_offset(msr);
2955 write = svm->vmcb->control.exit_info_1 & 1;
2956 mask = 1 << ((2 * (msr & 0xf)) + write);
3d62d9aa 2957
0d6b3537
JR
2958 if (offset == MSR_INVALID)
2959 return NESTED_EXIT_DONE;
4c2161ae 2960
0d6b3537
JR
2961 /* Offset is in 32 bit units but need in 8 bit units */
2962 offset *= 4;
4c2161ae 2963
54bf36aa 2964 if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4))
0d6b3537 2965 return NESTED_EXIT_DONE;
3d62d9aa 2966
0d6b3537 2967 return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
4c2161ae
JR
2968}
2969
ab2f4d73
LP
2970/* DB exceptions for our internal use must not cause vmexit */
2971static int nested_svm_intercept_db(struct vcpu_svm *svm)
2972{
2973 unsigned long dr6;
2974
2975 /* if we're not singlestepping, it's not ours */
2976 if (!svm->nmi_singlestep)
2977 return NESTED_EXIT_DONE;
2978
2979 /* if it's not a singlestep exception, it's not ours */
2980 if (kvm_get_dr(&svm->vcpu, 6, &dr6))
2981 return NESTED_EXIT_DONE;
2982 if (!(dr6 & DR6_BS))
2983 return NESTED_EXIT_DONE;
2984
2985 /* if the guest is singlestepping, it should get the vmexit */
2986 if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
2987 disable_nmi_singlestep(svm);
2988 return NESTED_EXIT_DONE;
2989 }
2990
2991 /* it's ours, the nested hypervisor must not see this one */
2992 return NESTED_EXIT_HOST;
2993}
2994
410e4d57 2995static int nested_svm_exit_special(struct vcpu_svm *svm)
cf74a78b 2996{
cf74a78b 2997 u32 exit_code = svm->vmcb->control.exit_code;
4c2161ae 2998
410e4d57
JR
2999 switch (exit_code) {
3000 case SVM_EXIT_INTR:
3001 case SVM_EXIT_NMI:
ff47a49b 3002 case SVM_EXIT_EXCP_BASE + MC_VECTOR:
410e4d57 3003 return NESTED_EXIT_HOST;
410e4d57 3004 case SVM_EXIT_NPF:
e0231715 3005 /* For now we are always handling NPFs when using them */
410e4d57
JR
3006 if (npt_enabled)
3007 return NESTED_EXIT_HOST;
3008 break;
410e4d57 3009 case SVM_EXIT_EXCP_BASE + PF_VECTOR:
631bc487 3010 /* When we're shadowing, trap PFs, but not async PF */
1261bfa3 3011 if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
410e4d57
JR
3012 return NESTED_EXIT_HOST;
3013 break;
3014 default:
3015 break;
cf74a78b
AG
3016 }
3017
410e4d57
JR
3018 return NESTED_EXIT_CONTINUE;
3019}
3020
3021/*
3022 * If this function returns true, this #vmexit was already handled
3023 */
b8e88bc8 3024static int nested_svm_intercept(struct vcpu_svm *svm)
410e4d57
JR
3025{
3026 u32 exit_code = svm->vmcb->control.exit_code;
3027 int vmexit = NESTED_EXIT_HOST;
3028
cf74a78b 3029 switch (exit_code) {
9c4e40b9 3030 case SVM_EXIT_MSR:
3d62d9aa 3031 vmexit = nested_svm_exit_handled_msr(svm);
9c4e40b9 3032 break;
ce2ac085
JR
3033 case SVM_EXIT_IOIO:
3034 vmexit = nested_svm_intercept_ioio(svm);
3035 break;
4ee546b4
RJ
3036 case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
3037 u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
3038 if (svm->nested.intercept_cr & bit)
410e4d57 3039 vmexit = NESTED_EXIT_DONE;
cf74a78b
AG
3040 break;
3041 }
3aed041a
JR
3042 case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
3043 u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
3044 if (svm->nested.intercept_dr & bit)
410e4d57 3045 vmexit = NESTED_EXIT_DONE;
cf74a78b
AG
3046 break;
3047 }
3048 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
3049 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
ab2f4d73
LP
3050 if (svm->nested.intercept_exceptions & excp_bits) {
3051 if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
3052 vmexit = nested_svm_intercept_db(svm);
3053 else
3054 vmexit = NESTED_EXIT_DONE;
3055 }
631bc487
GN
3056 /* async page fault always cause vmexit */
3057 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
adfe20fb 3058 svm->vcpu.arch.exception.nested_apf != 0)
631bc487 3059 vmexit = NESTED_EXIT_DONE;
cf74a78b
AG
3060 break;
3061 }
228070b1
JR
3062 case SVM_EXIT_ERR: {
3063 vmexit = NESTED_EXIT_DONE;
3064 break;
3065 }
cf74a78b
AG
3066 default: {
3067 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
aad42c64 3068 if (svm->nested.intercept & exit_bits)
410e4d57 3069 vmexit = NESTED_EXIT_DONE;
cf74a78b
AG
3070 }
3071 }
3072
b8e88bc8
JR
3073 return vmexit;
3074}
3075
3076static int nested_svm_exit_handled(struct vcpu_svm *svm)
3077{
3078 int vmexit;
3079
3080 vmexit = nested_svm_intercept(svm);
3081
3082 if (vmexit == NESTED_EXIT_DONE)
9c4e40b9 3083 nested_svm_vmexit(svm);
9c4e40b9
JR
3084
3085 return vmexit;
cf74a78b
AG
3086}
3087
0460a979
JR
3088static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
3089{
3090 struct vmcb_control_area *dst = &dst_vmcb->control;
3091 struct vmcb_control_area *from = &from_vmcb->control;
3092
4ee546b4 3093 dst->intercept_cr = from->intercept_cr;
3aed041a 3094 dst->intercept_dr = from->intercept_dr;
0460a979
JR
3095 dst->intercept_exceptions = from->intercept_exceptions;
3096 dst->intercept = from->intercept;
3097 dst->iopm_base_pa = from->iopm_base_pa;
3098 dst->msrpm_base_pa = from->msrpm_base_pa;
3099 dst->tsc_offset = from->tsc_offset;
3100 dst->asid = from->asid;
3101 dst->tlb_ctl = from->tlb_ctl;
3102 dst->int_ctl = from->int_ctl;
3103 dst->int_vector = from->int_vector;
3104 dst->int_state = from->int_state;
3105 dst->exit_code = from->exit_code;
3106 dst->exit_code_hi = from->exit_code_hi;
3107 dst->exit_info_1 = from->exit_info_1;
3108 dst->exit_info_2 = from->exit_info_2;
3109 dst->exit_int_info = from->exit_int_info;
3110 dst->exit_int_info_err = from->exit_int_info_err;
3111 dst->nested_ctl = from->nested_ctl;
3112 dst->event_inj = from->event_inj;
3113 dst->event_inj_err = from->event_inj_err;
3114 dst->nested_cr3 = from->nested_cr3;
0dc92119 3115 dst->virt_ext = from->virt_ext;
0460a979
JR
3116}
3117
34f80cfa 3118static int nested_svm_vmexit(struct vcpu_svm *svm)
cf74a78b 3119{
34f80cfa 3120 struct vmcb *nested_vmcb;
e6aa9abd 3121 struct vmcb *hsave = svm->nested.hsave;
33740e40 3122 struct vmcb *vmcb = svm->vmcb;
7597f129 3123 struct page *page;
cf74a78b 3124
17897f36
JR
3125 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
3126 vmcb->control.exit_info_1,
3127 vmcb->control.exit_info_2,
3128 vmcb->control.exit_int_info,
e097e5ff
SH
3129 vmcb->control.exit_int_info_err,
3130 KVM_ISA_SVM);
17897f36 3131
7597f129 3132 nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
34f80cfa
JR
3133 if (!nested_vmcb)
3134 return 1;
3135
2030753d
JR
3136 /* Exit Guest-Mode */
3137 leave_guest_mode(&svm->vcpu);
06fc7772
JR
3138 svm->nested.vmcb = 0;
3139
cf74a78b 3140 /* Give the current vmcb to the guest */
33740e40
JR
3141 disable_gif(svm);
3142
3143 nested_vmcb->save.es = vmcb->save.es;
3144 nested_vmcb->save.cs = vmcb->save.cs;
3145 nested_vmcb->save.ss = vmcb->save.ss;
3146 nested_vmcb->save.ds = vmcb->save.ds;
3147 nested_vmcb->save.gdtr = vmcb->save.gdtr;
3148 nested_vmcb->save.idtr = vmcb->save.idtr;
3f6a9d16 3149 nested_vmcb->save.efer = svm->vcpu.arch.efer;
cdbbdc12 3150 nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu);
9f8fe504 3151 nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu);
33740e40 3152 nested_vmcb->save.cr2 = vmcb->save.cr2;
cdbbdc12 3153 nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
f6e78475 3154 nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
33740e40
JR
3155 nested_vmcb->save.rip = vmcb->save.rip;
3156 nested_vmcb->save.rsp = vmcb->save.rsp;
3157 nested_vmcb->save.rax = vmcb->save.rax;
3158 nested_vmcb->save.dr7 = vmcb->save.dr7;
3159 nested_vmcb->save.dr6 = vmcb->save.dr6;
3160 nested_vmcb->save.cpl = vmcb->save.cpl;
3161
3162 nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
3163 nested_vmcb->control.int_vector = vmcb->control.int_vector;
3164 nested_vmcb->control.int_state = vmcb->control.int_state;
3165 nested_vmcb->control.exit_code = vmcb->control.exit_code;
3166 nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi;
3167 nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1;
3168 nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
3169 nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
3170 nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
6092d3d3
JR
3171
3172 if (svm->nrips_enabled)
3173 nested_vmcb->control.next_rip = vmcb->control.next_rip;
8d23c466
AG
3174
3175 /*
3176 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
3177 * to make sure that we do not lose injected events. So check event_inj
3178 * here and copy it to exit_int_info if it is valid.
3179 * Exit_int_info and event_inj can't be both valid because the case
3180 * below only happens on a VMRUN instruction intercept which has
3181 * no valid exit_int_info set.
3182 */
3183 if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
3184 struct vmcb_control_area *nc = &nested_vmcb->control;
3185
3186 nc->exit_int_info = vmcb->control.event_inj;
3187 nc->exit_int_info_err = vmcb->control.event_inj_err;
3188 }
3189
33740e40
JR
3190 nested_vmcb->control.tlb_ctl = 0;
3191 nested_vmcb->control.event_inj = 0;
3192 nested_vmcb->control.event_inj_err = 0;
cf74a78b
AG
3193
3194 /* We always set V_INTR_MASKING and remember the old value in hflags */
3195 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
3196 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
3197
cf74a78b 3198 /* Restore the original control entries */
0460a979 3199 copy_vmcb_control_area(vmcb, hsave);
cf74a78b 3200
219b65dc
AG
3201 kvm_clear_exception_queue(&svm->vcpu);
3202 kvm_clear_interrupt_queue(&svm->vcpu);
cf74a78b 3203
4b16184c
JR
3204 svm->nested.nested_cr3 = 0;
3205
cf74a78b
AG
3206 /* Restore selected save entries */
3207 svm->vmcb->save.es = hsave->save.es;
3208 svm->vmcb->save.cs = hsave->save.cs;
3209 svm->vmcb->save.ss = hsave->save.ss;
3210 svm->vmcb->save.ds = hsave->save.ds;
3211 svm->vmcb->save.gdtr = hsave->save.gdtr;
3212 svm->vmcb->save.idtr = hsave->save.idtr;
f6e78475 3213 kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
cf74a78b
AG
3214 svm_set_efer(&svm->vcpu, hsave->save.efer);
3215 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
3216 svm_set_cr4(&svm->vcpu, hsave->save.cr4);
3217 if (npt_enabled) {
3218 svm->vmcb->save.cr3 = hsave->save.cr3;
3219 svm->vcpu.arch.cr3 = hsave->save.cr3;
3220 } else {
2390218b 3221 (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
cf74a78b
AG
3222 }
3223 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
3224 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
3225 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
3226 svm->vmcb->save.dr7 = 0;
3227 svm->vmcb->save.cpl = 0;
3228 svm->vmcb->control.exit_int_info = 0;
3229
8d28fec4
RJ
3230 mark_all_dirty(svm->vmcb);
3231
7597f129 3232 nested_svm_unmap(page);
cf74a78b 3233
4b16184c 3234 nested_svm_uninit_mmu_context(&svm->vcpu);
cf74a78b
AG
3235 kvm_mmu_reset_context(&svm->vcpu);
3236 kvm_mmu_load(&svm->vcpu);
3237
3238 return 0;
3239}
3d6368ef 3240
9738b2c9 3241static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
3d6368ef 3242{
323c3d80
JR
3243 /*
3244 * This function merges the msr permission bitmaps of kvm and the
c5ec2e56 3245 * nested vmcb. It is optimized in that it only merges the parts where
323c3d80
JR
3246 * the kvm msr permission bitmap may contain zero bits
3247 */
3d6368ef 3248 int i;
9738b2c9 3249
323c3d80
JR
3250 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
3251 return true;
9738b2c9 3252
323c3d80
JR
3253 for (i = 0; i < MSRPM_OFFSETS; i++) {
3254 u32 value, p;
3255 u64 offset;
9738b2c9 3256
323c3d80
JR
3257 if (msrpm_offsets[i] == 0xffffffff)
3258 break;
3d6368ef 3259
0d6b3537
JR
3260 p = msrpm_offsets[i];
3261 offset = svm->nested.vmcb_msrpm + (p * 4);
323c3d80 3262
54bf36aa 3263 if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
323c3d80
JR
3264 return false;
3265
3266 svm->nested.msrpm[p] = svm->msrpm[p] | value;
3267 }
3d6368ef 3268
d0ec49d4 3269 svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm));
9738b2c9
JR
3270
3271 return true;
3d6368ef
AG
3272}
3273
52c65a30
JR
3274static bool nested_vmcb_checks(struct vmcb *vmcb)
3275{
3276 if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
3277 return false;
3278
dbe77584
JR
3279 if (vmcb->control.asid == 0)
3280 return false;
3281
cea3a19b
TL
3282 if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
3283 !npt_enabled)
4b16184c
JR
3284 return false;
3285
52c65a30
JR
3286 return true;
3287}
3288
c2634065
LP
3289static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
3290 struct vmcb *nested_vmcb, struct page *page)
3d6368ef 3291{
f6e78475 3292 if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
3d6368ef
AG
3293 svm->vcpu.arch.hflags |= HF_HIF_MASK;
3294 else
3295 svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
3296
cea3a19b 3297 if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
4b16184c
JR
3298 kvm_mmu_unload(&svm->vcpu);
3299 svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
3300 nested_svm_init_mmu_context(&svm->vcpu);
3301 }
3302
3d6368ef
AG
3303 /* Load the nested guest state */
3304 svm->vmcb->save.es = nested_vmcb->save.es;
3305 svm->vmcb->save.cs = nested_vmcb->save.cs;
3306 svm->vmcb->save.ss = nested_vmcb->save.ss;
3307 svm->vmcb->save.ds = nested_vmcb->save.ds;
3308 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
3309 svm->vmcb->save.idtr = nested_vmcb->save.idtr;
f6e78475 3310 kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
3d6368ef
AG
3311 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
3312 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
3313 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
3314 if (npt_enabled) {
3315 svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
3316 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
0e5cbe36 3317 } else
2390218b 3318 (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
0e5cbe36
JR
3319
3320 /* Guest paging mode is active - reset mmu */
3321 kvm_mmu_reset_context(&svm->vcpu);
3322
defbba56 3323 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
3d6368ef
AG
3324 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
3325 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
3326 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
e0231715 3327
3d6368ef
AG
3328 /* In case we don't even reach vcpu_run, the fields are not updated */
3329 svm->vmcb->save.rax = nested_vmcb->save.rax;
3330 svm->vmcb->save.rsp = nested_vmcb->save.rsp;
3331 svm->vmcb->save.rip = nested_vmcb->save.rip;
3332 svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
3333 svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
3334 svm->vmcb->save.cpl = nested_vmcb->save.cpl;
3335
f7138538 3336 svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
ce2ac085 3337 svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
3d6368ef 3338
aad42c64 3339 /* cache intercepts */
4ee546b4 3340 svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
3aed041a 3341 svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
aad42c64
JR
3342 svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
3343 svm->nested.intercept = nested_vmcb->control.intercept;
3344
c2ba05cc 3345 svm_flush_tlb(&svm->vcpu, true);
3d6368ef 3346 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
3d6368ef
AG
3347 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
3348 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
3349 else
3350 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
3351
88ab24ad
JR
3352 if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
3353 /* We only want the cr8 intercept bits of the guest */
4ee546b4
RJ
3354 clr_cr_intercept(svm, INTERCEPT_CR8_READ);
3355 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
88ab24ad
JR
3356 }
3357
0d945bd9 3358 /* We don't want to see VMMCALLs from a nested guest */
8a05a1b8 3359 clr_intercept(svm, INTERCEPT_VMMCALL);
0d945bd9 3360
0dc92119 3361 svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
3d6368ef
AG
3362 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
3363 svm->vmcb->control.int_state = nested_vmcb->control.int_state;
3364 svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
3d6368ef
AG
3365 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
3366 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
3367
7597f129 3368 nested_svm_unmap(page);
9738b2c9 3369
2030753d
JR
3370 /* Enter Guest-Mode */
3371 enter_guest_mode(&svm->vcpu);
3372
384c6368
JR
3373 /*
3374 * Merge guest and host intercepts - must be called with vcpu in
3375 * guest-mode to take affect here
3376 */
3377 recalc_intercepts(svm);
3378
06fc7772 3379 svm->nested.vmcb = vmcb_gpa;
9738b2c9 3380
2af9194d 3381 enable_gif(svm);
3d6368ef 3382
8d28fec4 3383 mark_all_dirty(svm->vmcb);
c2634065
LP
3384}
3385
3386static bool nested_svm_vmrun(struct vcpu_svm *svm)
3387{
3388 struct vmcb *nested_vmcb;
3389 struct vmcb *hsave = svm->nested.hsave;
3390 struct vmcb *vmcb = svm->vmcb;
3391 struct page *page;
3392 u64 vmcb_gpa;
3393
3394 vmcb_gpa = svm->vmcb->save.rax;
3395
3396 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
3397 if (!nested_vmcb)
3398 return false;
3399
3400 if (!nested_vmcb_checks(nested_vmcb)) {
3401 nested_vmcb->control.exit_code = SVM_EXIT_ERR;
3402 nested_vmcb->control.exit_code_hi = 0;
3403 nested_vmcb->control.exit_info_1 = 0;
3404 nested_vmcb->control.exit_info_2 = 0;
3405
3406 nested_svm_unmap(page);
3407
3408 return false;
3409 }
3410
3411 trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
3412 nested_vmcb->save.rip,
3413 nested_vmcb->control.int_ctl,
3414 nested_vmcb->control.event_inj,
3415 nested_vmcb->control.nested_ctl);
3416
3417 trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
3418 nested_vmcb->control.intercept_cr >> 16,
3419 nested_vmcb->control.intercept_exceptions,
3420 nested_vmcb->control.intercept);
3421
3422 /* Clear internal status */
3423 kvm_clear_exception_queue(&svm->vcpu);
3424 kvm_clear_interrupt_queue(&svm->vcpu);
3425
3426 /*
3427 * Save the old vmcb, so we don't need to pick what we save, but can
3428 * restore everything when a VMEXIT occurs
3429 */
3430 hsave->save.es = vmcb->save.es;
3431 hsave->save.cs = vmcb->save.cs;
3432 hsave->save.ss = vmcb->save.ss;
3433 hsave->save.ds = vmcb->save.ds;
3434 hsave->save.gdtr = vmcb->save.gdtr;
3435 hsave->save.idtr = vmcb->save.idtr;
3436 hsave->save.efer = svm->vcpu.arch.efer;
3437 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
3438 hsave->save.cr4 = svm->vcpu.arch.cr4;
3439 hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
3440 hsave->save.rip = kvm_rip_read(&svm->vcpu);
3441 hsave->save.rsp = vmcb->save.rsp;
3442 hsave->save.rax = vmcb->save.rax;
3443 if (npt_enabled)
3444 hsave->save.cr3 = vmcb->save.cr3;
3445 else
3446 hsave->save.cr3 = kvm_read_cr3(&svm->vcpu);
3447
3448 copy_vmcb_control_area(hsave, vmcb);
3449
3450 enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, page);
8d28fec4 3451
9738b2c9 3452 return true;
3d6368ef
AG
3453}
3454
9966bf68 3455static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
5542675b
AG
3456{
3457 to_vmcb->save.fs = from_vmcb->save.fs;
3458 to_vmcb->save.gs = from_vmcb->save.gs;
3459 to_vmcb->save.tr = from_vmcb->save.tr;
3460 to_vmcb->save.ldtr = from_vmcb->save.ldtr;
3461 to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
3462 to_vmcb->save.star = from_vmcb->save.star;
3463 to_vmcb->save.lstar = from_vmcb->save.lstar;
3464 to_vmcb->save.cstar = from_vmcb->save.cstar;
3465 to_vmcb->save.sfmask = from_vmcb->save.sfmask;
3466 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
3467 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
3468 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
5542675b
AG
3469}
3470
851ba692 3471static int vmload_interception(struct vcpu_svm *svm)
5542675b 3472{
9966bf68 3473 struct vmcb *nested_vmcb;
7597f129 3474 struct page *page;
b742c1e6 3475 int ret;
9966bf68 3476
5542675b
AG
3477 if (nested_svm_check_permissions(svm))
3478 return 1;
3479
7597f129 3480 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
9966bf68
JR
3481 if (!nested_vmcb)
3482 return 1;
3483
e3e9ed3d 3484 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
b742c1e6 3485 ret = kvm_skip_emulated_instruction(&svm->vcpu);
e3e9ed3d 3486
9966bf68 3487 nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
7597f129 3488 nested_svm_unmap(page);
5542675b 3489
b742c1e6 3490 return ret;
5542675b
AG
3491}
3492
851ba692 3493static int vmsave_interception(struct vcpu_svm *svm)
5542675b 3494{
9966bf68 3495 struct vmcb *nested_vmcb;
7597f129 3496 struct page *page;
b742c1e6 3497 int ret;
9966bf68 3498
5542675b
AG
3499 if (nested_svm_check_permissions(svm))
3500 return 1;
3501
7597f129 3502 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
9966bf68
JR
3503 if (!nested_vmcb)
3504 return 1;
3505
e3e9ed3d 3506 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
b742c1e6 3507 ret = kvm_skip_emulated_instruction(&svm->vcpu);
e3e9ed3d 3508
9966bf68 3509 nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
7597f129 3510 nested_svm_unmap(page);
5542675b 3511
b742c1e6 3512 return ret;
5542675b
AG
3513}
3514
851ba692 3515static int vmrun_interception(struct vcpu_svm *svm)
3d6368ef 3516{
3d6368ef
AG
3517 if (nested_svm_check_permissions(svm))
3518 return 1;
3519
b75f4eb3
RJ
3520 /* Save rip after vmrun instruction */
3521 kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
3d6368ef 3522
9738b2c9 3523 if (!nested_svm_vmrun(svm))
3d6368ef
AG
3524 return 1;
3525
9738b2c9 3526 if (!nested_svm_vmrun_msrpm(svm))
1f8da478
JR
3527 goto failed;
3528
3529 return 1;
3530
3531failed:
3532
3533 svm->vmcb->control.exit_code = SVM_EXIT_ERR;
3534 svm->vmcb->control.exit_code_hi = 0;
3535 svm->vmcb->control.exit_info_1 = 0;
3536 svm->vmcb->control.exit_info_2 = 0;
3537
3538 nested_svm_vmexit(svm);
3d6368ef
AG
3539
3540 return 1;
3541}
3542
851ba692 3543static int stgi_interception(struct vcpu_svm *svm)
1371d904 3544{
b742c1e6
LP
3545 int ret;
3546
1371d904
AG
3547 if (nested_svm_check_permissions(svm))
3548 return 1;
3549
640bd6e5
JN
3550 /*
3551 * If VGIF is enabled, the STGI intercept is only added to
cc3d967f 3552 * detect the opening of the SMI/NMI window; remove it now.
640bd6e5
JN
3553 */
3554 if (vgif_enabled(svm))
3555 clr_intercept(svm, INTERCEPT_STGI);
3556
1371d904 3557 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
b742c1e6 3558 ret = kvm_skip_emulated_instruction(&svm->vcpu);
3842d135 3559 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
1371d904 3560
2af9194d 3561 enable_gif(svm);
1371d904 3562
b742c1e6 3563 return ret;
1371d904
AG
3564}
3565
851ba692 3566static int clgi_interception(struct vcpu_svm *svm)
1371d904 3567{
b742c1e6
LP
3568 int ret;
3569
1371d904
AG
3570 if (nested_svm_check_permissions(svm))
3571 return 1;
3572
3573 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
b742c1e6 3574 ret = kvm_skip_emulated_instruction(&svm->vcpu);
1371d904 3575
2af9194d 3576 disable_gif(svm);
1371d904
AG
3577
3578 /* After a CLGI no interrupts should come */
340d3bc3
SS
3579 if (!kvm_vcpu_apicv_active(&svm->vcpu)) {
3580 svm_clear_vintr(svm);
3581 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
3582 mark_dirty(svm->vmcb, VMCB_INTR);
3583 }
decdbf6a 3584
b742c1e6 3585 return ret;
1371d904
AG
3586}
3587
851ba692 3588static int invlpga_interception(struct vcpu_svm *svm)
ff092385
AG
3589{
3590 struct kvm_vcpu *vcpu = &svm->vcpu;
ff092385 3591
668f198f
DK
3592 trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX),
3593 kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
ec1ff790 3594
ff092385 3595 /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
668f198f 3596 kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
ff092385
AG
3597
3598 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
b742c1e6 3599 return kvm_skip_emulated_instruction(&svm->vcpu);
ff092385
AG
3600}
3601
532a46b9
JR
3602static int skinit_interception(struct vcpu_svm *svm)
3603{
668f198f 3604 trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
532a46b9
JR
3605
3606 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3607 return 1;
3608}
3609
dab429a7
DK
3610static int wbinvd_interception(struct vcpu_svm *svm)
3611{
6affcbed 3612 return kvm_emulate_wbinvd(&svm->vcpu);
dab429a7
DK
3613}
3614
81dd35d4
JR
3615static int xsetbv_interception(struct vcpu_svm *svm)
3616{
3617 u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
3618 u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
3619
3620 if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
3621 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
b742c1e6 3622 return kvm_skip_emulated_instruction(&svm->vcpu);
81dd35d4
JR
3623 }
3624
3625 return 1;
3626}
3627
851ba692 3628static int task_switch_interception(struct vcpu_svm *svm)
6aa8b732 3629{
37817f29 3630 u16 tss_selector;
64a7ec06
GN
3631 int reason;
3632 int int_type = svm->vmcb->control.exit_int_info &
3633 SVM_EXITINTINFO_TYPE_MASK;
8317c298 3634 int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
fe8e7f83
GN
3635 uint32_t type =
3636 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
3637 uint32_t idt_v =
3638 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
e269fb21
JK
3639 bool has_error_code = false;
3640 u32 error_code = 0;
37817f29
IE
3641
3642 tss_selector = (u16)svm->vmcb->control.exit_info_1;
64a7ec06 3643
37817f29
IE
3644 if (svm->vmcb->control.exit_info_2 &
3645 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
64a7ec06
GN
3646 reason = TASK_SWITCH_IRET;
3647 else if (svm->vmcb->control.exit_info_2 &
3648 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
3649 reason = TASK_SWITCH_JMP;
fe8e7f83 3650 else if (idt_v)
64a7ec06
GN
3651 reason = TASK_SWITCH_GATE;
3652 else
3653 reason = TASK_SWITCH_CALL;
3654
fe8e7f83
GN
3655 if (reason == TASK_SWITCH_GATE) {
3656 switch (type) {
3657 case SVM_EXITINTINFO_TYPE_NMI:
3658 svm->vcpu.arch.nmi_injected = false;
3659 break;
3660 case SVM_EXITINTINFO_TYPE_EXEPT:
e269fb21
JK
3661 if (svm->vmcb->control.exit_info_2 &
3662 (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) {
3663 has_error_code = true;
3664 error_code =
3665 (u32)svm->vmcb->control.exit_info_2;
3666 }
fe8e7f83
GN
3667 kvm_clear_exception_queue(&svm->vcpu);
3668 break;
3669 case SVM_EXITINTINFO_TYPE_INTR:
3670 kvm_clear_interrupt_queue(&svm->vcpu);
3671 break;
3672 default:
3673 break;
3674 }
3675 }
64a7ec06 3676
8317c298
GN
3677 if (reason != TASK_SWITCH_GATE ||
3678 int_type == SVM_EXITINTINFO_TYPE_SOFT ||
3679 (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
f629cf84
GN
3680 (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
3681 skip_emulated_instruction(&svm->vcpu);
64a7ec06 3682
7f3d35fd
KW
3683 if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
3684 int_vec = -1;
3685
3686 if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
acb54517
GN
3687 has_error_code, error_code) == EMULATE_FAIL) {
3688 svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3689 svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
3690 svm->vcpu.run->internal.ndata = 0;
3691 return 0;
3692 }
3693 return 1;
6aa8b732
AK
3694}
3695
851ba692 3696static int cpuid_interception(struct vcpu_svm *svm)
6aa8b732 3697{
5fdbf976 3698 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
6a908b62 3699 return kvm_emulate_cpuid(&svm->vcpu);
6aa8b732
AK
3700}
3701
851ba692 3702static int iret_interception(struct vcpu_svm *svm)
95ba8273
GN
3703{
3704 ++svm->vcpu.stat.nmi_window_exits;
8a05a1b8 3705 clr_intercept(svm, INTERCEPT_IRET);
44c11430 3706 svm->vcpu.arch.hflags |= HF_IRET_MASK;
bd3d1ec3 3707 svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
f303b4ce 3708 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
95ba8273
GN
3709 return 1;
3710}
3711
851ba692 3712static int invlpg_interception(struct vcpu_svm *svm)
a7052897 3713{
df4f3108
AP
3714 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
3715 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
3716
3717 kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
b742c1e6 3718 return kvm_skip_emulated_instruction(&svm->vcpu);
a7052897
MT
3719}
3720
851ba692 3721static int emulate_on_interception(struct vcpu_svm *svm)
6aa8b732 3722{
51d8b661 3723 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
6aa8b732
AK
3724}
3725
7607b717
BS
3726static int rsm_interception(struct vcpu_svm *svm)
3727{
3728 return x86_emulate_instruction(&svm->vcpu, 0, 0,
3729 rsm_ins_bytes, 2) == EMULATE_DONE;
3730}
3731
332b56e4
AK
3732static int rdpmc_interception(struct vcpu_svm *svm)
3733{
3734 int err;
3735
3736 if (!static_cpu_has(X86_FEATURE_NRIPS))
3737 return emulate_on_interception(svm);
3738
3739 err = kvm_rdpmc(&svm->vcpu);
6affcbed 3740 return kvm_complete_insn_gp(&svm->vcpu, err);
332b56e4
AK
3741}
3742
52eb5a6d
XL
3743static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
3744 unsigned long val)
628afd2a
JR
3745{
3746 unsigned long cr0 = svm->vcpu.arch.cr0;
3747 bool ret = false;
3748 u64 intercept;
3749
3750 intercept = svm->nested.intercept;
3751
3752 if (!is_guest_mode(&svm->vcpu) ||
3753 (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
3754 return false;
3755
3756 cr0 &= ~SVM_CR0_SELECTIVE_MASK;
3757 val &= ~SVM_CR0_SELECTIVE_MASK;
3758
3759 if (cr0 ^ val) {
3760 svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
3761 ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
3762 }
3763
3764 return ret;
3765}
3766
7ff76d58
AP
3767#define CR_VALID (1ULL << 63)
3768
3769static int cr_interception(struct vcpu_svm *svm)
3770{
3771 int reg, cr;
3772 unsigned long val;
3773 int err;
3774
3775 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
3776 return emulate_on_interception(svm);
3777
3778 if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
3779 return emulate_on_interception(svm);
3780
3781 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
5e57518d
DK
3782 if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE)
3783 cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0;
3784 else
3785 cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
7ff76d58
AP
3786
3787 err = 0;
3788 if (cr >= 16) { /* mov to cr */
3789 cr -= 16;
3790 val = kvm_register_read(&svm->vcpu, reg);
3791 switch (cr) {
3792 case 0:
628afd2a
JR
3793 if (!check_selective_cr0_intercepted(svm, val))
3794 err = kvm_set_cr0(&svm->vcpu, val);
977b2d03
JR
3795 else
3796 return 1;
3797
7ff76d58
AP
3798 break;
3799 case 3:
3800 err = kvm_set_cr3(&svm->vcpu, val);
3801 break;
3802 case 4:
3803 err = kvm_set_cr4(&svm->vcpu, val);
3804 break;
3805 case 8:
3806 err = kvm_set_cr8(&svm->vcpu, val);
3807 break;
3808 default:
3809 WARN(1, "unhandled write to CR%d", cr);
3810 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3811 return 1;
3812 }
3813 } else { /* mov from cr */
3814 switch (cr) {
3815 case 0:
3816 val = kvm_read_cr0(&svm->vcpu);
3817 break;
3818 case 2:
3819 val = svm->vcpu.arch.cr2;
3820 break;
3821 case 3:
9f8fe504 3822 val = kvm_read_cr3(&svm->vcpu);
7ff76d58
AP
3823 break;
3824 case 4:
3825 val = kvm_read_cr4(&svm->vcpu);
3826 break;
3827 case 8:
3828 val = kvm_get_cr8(&svm->vcpu);
3829 break;
3830 default:
3831 WARN(1, "unhandled read from CR%d", cr);
3832 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3833 return 1;
3834 }
3835 kvm_register_write(&svm->vcpu, reg, val);
3836 }
6affcbed 3837 return kvm_complete_insn_gp(&svm->vcpu, err);
7ff76d58
AP
3838}
3839
cae3797a
AP
3840static int dr_interception(struct vcpu_svm *svm)
3841{
3842 int reg, dr;
3843 unsigned long val;
cae3797a 3844
facb0139
PB
3845 if (svm->vcpu.guest_debug == 0) {
3846 /*
3847 * No more DR vmexits; force a reload of the debug registers
3848 * and reenter on this instruction. The next vmexit will
3849 * retrieve the full state of the debug registers.
3850 */
3851 clr_dr_intercepts(svm);
3852 svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
3853 return 1;
3854 }
3855
cae3797a
AP
3856 if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
3857 return emulate_on_interception(svm);
3858
3859 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
3860 dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
3861
3862 if (dr >= 16) { /* mov to DRn */
16f8a6f9
NA
3863 if (!kvm_require_dr(&svm->vcpu, dr - 16))
3864 return 1;
cae3797a
AP
3865 val = kvm_register_read(&svm->vcpu, reg);
3866 kvm_set_dr(&svm->vcpu, dr - 16, val);
3867 } else {
16f8a6f9
NA
3868 if (!kvm_require_dr(&svm->vcpu, dr))
3869 return 1;
3870 kvm_get_dr(&svm->vcpu, dr, &val);
3871 kvm_register_write(&svm->vcpu, reg, val);
cae3797a
AP
3872 }
3873
b742c1e6 3874 return kvm_skip_emulated_instruction(&svm->vcpu);
cae3797a
AP
3875}
3876
851ba692 3877static int cr8_write_interception(struct vcpu_svm *svm)
1d075434 3878{
851ba692 3879 struct kvm_run *kvm_run = svm->vcpu.run;
eea1cff9 3880 int r;
851ba692 3881
0a5fff19
GN
3882 u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
3883 /* instruction emulation calls kvm_set_cr8() */
7ff76d58 3884 r = cr_interception(svm);
35754c98 3885 if (lapic_in_kernel(&svm->vcpu))
7ff76d58 3886 return r;
0a5fff19 3887 if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
7ff76d58 3888 return r;
1d075434
JR
3889 kvm_run->exit_reason = KVM_EXIT_SET_TPR;
3890 return 0;
3891}
3892
801e459a
TL
3893static int svm_get_msr_feature(struct kvm_msr_entry *msr)
3894{
d1d93fa9
TL
3895 msr->data = 0;
3896
3897 switch (msr->index) {
3898 case MSR_F10H_DECFG:
3899 if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
3900 msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
3901 break;
3902 default:
3903 return 1;
3904 }
3905
3906 return 0;
801e459a
TL
3907}
3908
609e36d3 3909static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
6aa8b732 3910{
a2fa3e9f
GH
3911 struct vcpu_svm *svm = to_svm(vcpu);
3912
609e36d3 3913 switch (msr_info->index) {
af24a4e4 3914 case MSR_IA32_TSC: {
609e36d3 3915 msr_info->data = svm->vmcb->control.tsc_offset +
35181e86 3916 kvm_scale_tsc(vcpu, rdtsc());
fbc0db76 3917
6aa8b732
AK
3918 break;
3919 }
8c06585d 3920 case MSR_STAR:
609e36d3 3921 msr_info->data = svm->vmcb->save.star;
6aa8b732 3922 break;
0e859cac 3923#ifdef CONFIG_X86_64
6aa8b732 3924 case MSR_LSTAR:
609e36d3 3925 msr_info->data = svm->vmcb->save.lstar;
6aa8b732
AK
3926 break;
3927 case MSR_CSTAR:
609e36d3 3928 msr_info->data = svm->vmcb->save.cstar;
6aa8b732
AK
3929 break;
3930 case MSR_KERNEL_GS_BASE:
609e36d3 3931 msr_info->data = svm->vmcb->save.kernel_gs_base;
6aa8b732
AK
3932 break;
3933 case MSR_SYSCALL_MASK:
609e36d3 3934 msr_info->data = svm->vmcb->save.sfmask;
6aa8b732
AK
3935 break;
3936#endif
3937 case MSR_IA32_SYSENTER_CS:
609e36d3 3938 msr_info->data = svm->vmcb->save.sysenter_cs;
6aa8b732
AK
3939 break;
3940 case MSR_IA32_SYSENTER_EIP:
609e36d3 3941 msr_info->data = svm->sysenter_eip;
6aa8b732
AK
3942 break;
3943 case MSR_IA32_SYSENTER_ESP:
609e36d3 3944 msr_info->data = svm->sysenter_esp;
6aa8b732 3945 break;
46896c73
PB
3946 case MSR_TSC_AUX:
3947 if (!boot_cpu_has(X86_FEATURE_RDTSCP))
3948 return 1;
3949 msr_info->data = svm->tsc_aux;
3950 break;
e0231715
JR
3951 /*
3952 * Nobody will change the following 5 values in the VMCB so we can
3953 * safely return them on rdmsr. They will always be 0 until LBRV is
3954 * implemented.
3955 */
a2938c80 3956 case MSR_IA32_DEBUGCTLMSR:
609e36d3 3957 msr_info->data = svm->vmcb->save.dbgctl;
a2938c80
JR
3958 break;
3959 case MSR_IA32_LASTBRANCHFROMIP:
609e36d3 3960 msr_info->data = svm->vmcb->save.br_from;
a2938c80
JR
3961 break;
3962 case MSR_IA32_LASTBRANCHTOIP:
609e36d3 3963 msr_info->data = svm->vmcb->save.br_to;
a2938c80
JR
3964 break;
3965 case MSR_IA32_LASTINTFROMIP:
609e36d3 3966 msr_info->data = svm->vmcb->save.last_excp_from;
a2938c80
JR
3967 break;
3968 case MSR_IA32_LASTINTTOIP:
609e36d3 3969 msr_info->data = svm->vmcb->save.last_excp_to;
a2938c80 3970 break;
b286d5d8 3971 case MSR_VM_HSAVE_PA:
609e36d3 3972 msr_info->data = svm->nested.hsave_msr;
b286d5d8 3973 break;
eb6f302e 3974 case MSR_VM_CR:
609e36d3 3975 msr_info->data = svm->nested.vm_cr_msr;
eb6f302e 3976 break;
b2ac58f9
KA
3977 case MSR_IA32_SPEC_CTRL:
3978 if (!msr_info->host_initiated &&
3979 !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
3980 return 1;
3981
3982 msr_info->data = svm->spec_ctrl;
3983 break;
ae8b7875
BP
3984 case MSR_F15H_IC_CFG: {
3985
3986 int family, model;
3987
3988 family = guest_cpuid_family(vcpu);
3989 model = guest_cpuid_model(vcpu);
3990
3991 if (family < 0 || model < 0)
3992 return kvm_get_msr_common(vcpu, msr_info);
3993
3994 msr_info->data = 0;
3995
3996 if (family == 0x15 &&
3997 (model >= 0x2 && model < 0x20))
3998 msr_info->data = 0x1E;
3999 }
4000 break;
d1d93fa9
TL
4001 case MSR_F10H_DECFG:
4002 msr_info->data = svm->msr_decfg;
4003 break;
6aa8b732 4004 default:
609e36d3 4005 return kvm_get_msr_common(vcpu, msr_info);
6aa8b732
AK
4006 }
4007 return 0;
4008}
4009
851ba692 4010static int rdmsr_interception(struct vcpu_svm *svm)
6aa8b732 4011{
668f198f 4012 u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
609e36d3 4013 struct msr_data msr_info;
6aa8b732 4014
609e36d3
PB
4015 msr_info.index = ecx;
4016 msr_info.host_initiated = false;
4017 if (svm_get_msr(&svm->vcpu, &msr_info)) {
59200273 4018 trace_kvm_msr_read_ex(ecx);
c1a5d4f9 4019 kvm_inject_gp(&svm->vcpu, 0);
b742c1e6 4020 return 1;
59200273 4021 } else {
609e36d3 4022 trace_kvm_msr_read(ecx, msr_info.data);
af9ca2d7 4023
609e36d3
PB
4024 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX,
4025 msr_info.data & 0xffffffff);
4026 kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
4027 msr_info.data >> 32);
5fdbf976 4028 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
b742c1e6 4029 return kvm_skip_emulated_instruction(&svm->vcpu);
6aa8b732 4030 }
6aa8b732
AK
4031}
4032
4a810181
JR
4033static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
4034{
4035 struct vcpu_svm *svm = to_svm(vcpu);
4036 int svm_dis, chg_mask;
4037
4038 if (data & ~SVM_VM_CR_VALID_MASK)
4039 return 1;
4040
4041 chg_mask = SVM_VM_CR_VALID_MASK;
4042
4043 if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
4044 chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
4045
4046 svm->nested.vm_cr_msr &= ~chg_mask;
4047 svm->nested.vm_cr_msr |= (data & chg_mask);
4048
4049 svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
4050
4051 /* check for svm_disable while efer.svme is set */
4052 if (svm_dis && (vcpu->arch.efer & EFER_SVME))
4053 return 1;
4054
4055 return 0;
4056}
4057
8fe8ab46 4058static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
6aa8b732 4059{
a2fa3e9f
GH
4060 struct vcpu_svm *svm = to_svm(vcpu);
4061
8fe8ab46
WA
4062 u32 ecx = msr->index;
4063 u64 data = msr->data;
6aa8b732 4064 switch (ecx) {
15038e14
PB
4065 case MSR_IA32_CR_PAT:
4066 if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
4067 return 1;
4068 vcpu->arch.pat = data;
4069 svm->vmcb->save.g_pat = data;
4070 mark_dirty(svm->vmcb, VMCB_NPT);
4071 break;
f4e1b3c8 4072 case MSR_IA32_TSC:
8fe8ab46 4073 kvm_write_tsc(vcpu, msr);
6aa8b732 4074 break;
b2ac58f9
KA
4075 case MSR_IA32_SPEC_CTRL:
4076 if (!msr->host_initiated &&
4077 !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
4078 return 1;
4079
4080 /* The STIBP bit doesn't fault even if it's not advertised */
4081 if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
4082 return 1;
4083
4084 svm->spec_ctrl = data;
4085
4086 if (!data)
4087 break;
4088
4089 /*
4090 * For non-nested:
4091 * When it's written (to non-zero) for the first time, pass
4092 * it through.
4093 *
4094 * For nested:
4095 * The handling of the MSR bitmap for L2 guests is done in
4096 * nested_svm_vmrun_msrpm.
4097 * We update the L1 MSR bit as well since it will end up
4098 * touching the MSR anyway now.
4099 */
4100 set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
4101 break;
15d45071
AR
4102 case MSR_IA32_PRED_CMD:
4103 if (!msr->host_initiated &&
4104 !guest_cpuid_has(vcpu, X86_FEATURE_IBPB))
4105 return 1;
4106
4107 if (data & ~PRED_CMD_IBPB)
4108 return 1;
4109
4110 if (!data)
4111 break;
4112
4113 wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
4114 if (is_guest_mode(vcpu))
4115 break;
4116 set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
4117 break;
8c06585d 4118 case MSR_STAR:
a2fa3e9f 4119 svm->vmcb->save.star = data;
6aa8b732 4120 break;
49b14f24 4121#ifdef CONFIG_X86_64
6aa8b732 4122 case MSR_LSTAR:
a2fa3e9f 4123 svm->vmcb->save.lstar = data;
6aa8b732
AK
4124 break;
4125 case MSR_CSTAR:
a2fa3e9f 4126 svm->vmcb->save.cstar = data;
6aa8b732
AK
4127 break;
4128 case MSR_KERNEL_GS_BASE:
a2fa3e9f 4129 svm->vmcb->save.kernel_gs_base = data;
6aa8b732
AK
4130 break;
4131 case MSR_SYSCALL_MASK:
a2fa3e9f 4132 svm->vmcb->save.sfmask = data;
6aa8b732
AK
4133 break;
4134#endif
4135 case MSR_IA32_SYSENTER_CS:
a2fa3e9f 4136 svm->vmcb->save.sysenter_cs = data;
6aa8b732
AK
4137 break;
4138 case MSR_IA32_SYSENTER_EIP:
017cb99e 4139 svm->sysenter_eip = data;
a2fa3e9f 4140 svm->vmcb->save.sysenter_eip = data;
6aa8b732
AK
4141 break;
4142 case MSR_IA32_SYSENTER_ESP:
017cb99e 4143 svm->sysenter_esp = data;
a2fa3e9f 4144 svm->vmcb->save.sysenter_esp = data;
6aa8b732 4145 break;
46896c73
PB
4146 case MSR_TSC_AUX:
4147 if (!boot_cpu_has(X86_FEATURE_RDTSCP))
4148 return 1;
4149
4150 /*
4151 * This is rare, so we update the MSR here instead of using
4152 * direct_access_msrs. Doing that would require a rdmsr in
4153 * svm_vcpu_put.
4154 */
4155 svm->tsc_aux = data;
4156 wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
4157 break;
a2938c80 4158 case MSR_IA32_DEBUGCTLMSR:
2a6b20b8 4159 if (!boot_cpu_has(X86_FEATURE_LBRV)) {
a737f256
CD
4160 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
4161 __func__, data);
24e09cbf
JR
4162 break;
4163 }
4164 if (data & DEBUGCTL_RESERVED_BITS)
4165 return 1;
4166
4167 svm->vmcb->save.dbgctl = data;
b53ba3f9 4168 mark_dirty(svm->vmcb, VMCB_LBR);
24e09cbf
JR
4169 if (data & (1ULL<<0))
4170 svm_enable_lbrv(svm);
4171 else
4172 svm_disable_lbrv(svm);
a2938c80 4173 break;
b286d5d8 4174 case MSR_VM_HSAVE_PA:
e6aa9abd 4175 svm->nested.hsave_msr = data;
62b9abaa 4176 break;
3c5d0a44 4177 case MSR_VM_CR:
4a810181 4178 return svm_set_vm_cr(vcpu, data);
3c5d0a44 4179 case MSR_VM_IGNNE:
a737f256 4180 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
3c5d0a44 4181 break;
d1d93fa9
TL
4182 case MSR_F10H_DECFG: {
4183 struct kvm_msr_entry msr_entry;
4184
4185 msr_entry.index = msr->index;
4186 if (svm_get_msr_feature(&msr_entry))
4187 return 1;
4188
4189 /* Check the supported bits */
4190 if (data & ~msr_entry.data)
4191 return 1;
4192
4193 /* Don't allow the guest to change a bit, #GP */
4194 if (!msr->host_initiated && (data ^ msr_entry.data))
4195 return 1;
4196
4197 svm->msr_decfg = data;
4198 break;
4199 }
44a95dae
SS
4200 case MSR_IA32_APICBASE:
4201 if (kvm_vcpu_apicv_active(vcpu))
4202 avic_update_vapic_bar(to_svm(vcpu), data);
4203 /* Follow through */
6aa8b732 4204 default:
8fe8ab46 4205 return kvm_set_msr_common(vcpu, msr);
6aa8b732
AK
4206 }
4207 return 0;
4208}
4209
851ba692 4210static int wrmsr_interception(struct vcpu_svm *svm)
6aa8b732 4211{
8fe8ab46 4212 struct msr_data msr;
668f198f
DK
4213 u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
4214 u64 data = kvm_read_edx_eax(&svm->vcpu);
af9ca2d7 4215
8fe8ab46
WA
4216 msr.data = data;
4217 msr.index = ecx;
4218 msr.host_initiated = false;
af9ca2d7 4219
5fdbf976 4220 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
854e8bb1 4221 if (kvm_set_msr(&svm->vcpu, &msr)) {
59200273 4222 trace_kvm_msr_write_ex(ecx, data);
c1a5d4f9 4223 kvm_inject_gp(&svm->vcpu, 0);
b742c1e6 4224 return 1;
59200273
AK
4225 } else {
4226 trace_kvm_msr_write(ecx, data);
b742c1e6 4227 return kvm_skip_emulated_instruction(&svm->vcpu);
59200273 4228 }
6aa8b732
AK
4229}
4230
851ba692 4231static int msr_interception(struct vcpu_svm *svm)
6aa8b732 4232{
e756fc62 4233 if (svm->vmcb->control.exit_info_1)
851ba692 4234 return wrmsr_interception(svm);
6aa8b732 4235 else
851ba692 4236 return rdmsr_interception(svm);
6aa8b732
AK
4237}
4238
851ba692 4239static int interrupt_window_interception(struct vcpu_svm *svm)
c1150d8c 4240{
3842d135 4241 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
f0b85051 4242 svm_clear_vintr(svm);
85f455f7 4243 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
decdbf6a 4244 mark_dirty(svm->vmcb, VMCB_INTR);
675acb75 4245 ++svm->vcpu.stat.irq_window_exits;
c1150d8c
DL
4246 return 1;
4247}
4248
565d0998
ML
4249static int pause_interception(struct vcpu_svm *svm)
4250{
de63ad4c
LM
4251 struct kvm_vcpu *vcpu = &svm->vcpu;
4252 bool in_kernel = (svm_get_cpl(vcpu) == 0);
4253
4254 kvm_vcpu_on_spin(vcpu, in_kernel);
565d0998
ML
4255 return 1;
4256}
4257
87c00572
GS
4258static int nop_interception(struct vcpu_svm *svm)
4259{
b742c1e6 4260 return kvm_skip_emulated_instruction(&(svm->vcpu));
87c00572
GS
4261}
4262
4263static int monitor_interception(struct vcpu_svm *svm)
4264{
4265 printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
4266 return nop_interception(svm);
4267}
4268
4269static int mwait_interception(struct vcpu_svm *svm)
4270{
4271 printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
4272 return nop_interception(svm);
4273}
4274
18f40c53
SS
4275enum avic_ipi_failure_cause {
4276 AVIC_IPI_FAILURE_INVALID_INT_TYPE,
4277 AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
4278 AVIC_IPI_FAILURE_INVALID_TARGET,
4279 AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
4280};
4281
4282static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
4283{
4284 u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
4285 u32 icrl = svm->vmcb->control.exit_info_1;
4286 u32 id = svm->vmcb->control.exit_info_2 >> 32;
5446a979 4287 u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
18f40c53
SS
4288 struct kvm_lapic *apic = svm->vcpu.arch.apic;
4289
4290 trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
4291
4292 switch (id) {
4293 case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
4294 /*
4295 * AVIC hardware handles the generation of
4296 * IPIs when the specified Message Type is Fixed
4297 * (also known as fixed delivery mode) and
4298 * the Trigger Mode is edge-triggered. The hardware
4299 * also supports self and broadcast delivery modes
4300 * specified via the Destination Shorthand(DSH)
4301 * field of the ICRL. Logical and physical APIC ID
4302 * formats are supported. All other IPI types cause
4303 * a #VMEXIT, which needs to emulated.
4304 */
4305 kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
4306 kvm_lapic_reg_write(apic, APIC_ICR, icrl);
4307 break;
4308 case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
4309 int i;
4310 struct kvm_vcpu *vcpu;
4311 struct kvm *kvm = svm->vcpu.kvm;
4312 struct kvm_lapic *apic = svm->vcpu.arch.apic;
4313
4314 /*
4315 * At this point, we expect that the AVIC HW has already
4316 * set the appropriate IRR bits on the valid target
4317 * vcpus. So, we just need to kick the appropriate vcpu.
4318 */
4319 kvm_for_each_vcpu(i, vcpu, kvm) {
4320 bool m = kvm_apic_match_dest(vcpu, apic,
4321 icrl & KVM_APIC_SHORT_MASK,
4322 GET_APIC_DEST_FIELD(icrh),
4323 icrl & KVM_APIC_DEST_MASK);
4324
4325 if (m && !avic_vcpu_is_running(vcpu))
4326 kvm_vcpu_wake_up(vcpu);
4327 }
4328 break;
4329 }
4330 case AVIC_IPI_FAILURE_INVALID_TARGET:
4331 break;
4332 case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
4333 WARN_ONCE(1, "Invalid backing page\n");
4334 break;
4335 default:
4336 pr_err("Unknown IPI interception\n");
4337 }
4338
4339 return 1;
4340}
4341
4342static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
4343{
4344 struct kvm_arch *vm_data = &vcpu->kvm->arch;
4345 int index;
4346 u32 *logical_apic_id_table;
4347 int dlid = GET_APIC_LOGICAL_ID(ldr);
4348
4349 if (!dlid)
4350 return NULL;
4351
4352 if (flat) { /* flat */
4353 index = ffs(dlid) - 1;
4354 if (index > 7)
4355 return NULL;
4356 } else { /* cluster */
4357 int cluster = (dlid & 0xf0) >> 4;
4358 int apic = ffs(dlid & 0x0f) - 1;
4359
4360 if ((apic < 0) || (apic > 7) ||
4361 (cluster >= 0xf))
4362 return NULL;
4363 index = (cluster << 2) + apic;
4364 }
4365
4366 logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page);
4367
4368 return &logical_apic_id_table[index];
4369}
4370
4371static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr,
4372 bool valid)
4373{
4374 bool flat;
4375 u32 *entry, new_entry;
4376
4377 flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
4378 entry = avic_get_logical_id_entry(vcpu, ldr, flat);
4379 if (!entry)
4380 return -EINVAL;
4381
4382 new_entry = READ_ONCE(*entry);
4383 new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
4384 new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
4385 if (valid)
4386 new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
4387 else
4388 new_entry &= ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
4389 WRITE_ONCE(*entry, new_entry);
4390
4391 return 0;
4392}
4393
4394static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
4395{
4396 int ret;
4397 struct vcpu_svm *svm = to_svm(vcpu);
4398 u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
4399
4400 if (!ldr)
4401 return 1;
4402
4403 ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr, true);
4404 if (ret && svm->ldr_reg) {
4405 avic_ldr_write(vcpu, 0, svm->ldr_reg, false);
4406 svm->ldr_reg = 0;
4407 } else {
4408 svm->ldr_reg = ldr;
4409 }
4410 return ret;
4411}
4412
4413static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
4414{
4415 u64 *old, *new;
4416 struct vcpu_svm *svm = to_svm(vcpu);
4417 u32 apic_id_reg = kvm_lapic_get_reg(vcpu->arch.apic, APIC_ID);
4418 u32 id = (apic_id_reg >> 24) & 0xff;
4419
4420 if (vcpu->vcpu_id == id)
4421 return 0;
4422
4423 old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
4424 new = avic_get_physical_id_entry(vcpu, id);
4425 if (!new || !old)
4426 return 1;
4427
4428 /* We need to move physical_id_entry to new offset */
4429 *new = *old;
4430 *old = 0ULL;
4431 to_svm(vcpu)->avic_physical_id_cache = new;
4432
4433 /*
4434 * Also update the guest physical APIC ID in the logical
4435 * APIC ID table entry if already setup the LDR.
4436 */
4437 if (svm->ldr_reg)
4438 avic_handle_ldr_update(vcpu);
4439
4440 return 0;
4441}
4442
4443static int avic_handle_dfr_update(struct kvm_vcpu *vcpu)
4444{
4445 struct vcpu_svm *svm = to_svm(vcpu);
4446 struct kvm_arch *vm_data = &vcpu->kvm->arch;
4447 u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
4448 u32 mod = (dfr >> 28) & 0xf;
4449
4450 /*
4451 * We assume that all local APICs are using the same type.
4452 * If this changes, we need to flush the AVIC logical
4453 * APID id table.
4454 */
4455 if (vm_data->ldr_mode == mod)
4456 return 0;
4457
4458 clear_page(page_address(vm_data->avic_logical_id_table_page));
4459 vm_data->ldr_mode = mod;
4460
4461 if (svm->ldr_reg)
4462 avic_handle_ldr_update(vcpu);
4463 return 0;
4464}
4465
4466static int avic_unaccel_trap_write(struct vcpu_svm *svm)
4467{
4468 struct kvm_lapic *apic = svm->vcpu.arch.apic;
4469 u32 offset = svm->vmcb->control.exit_info_1 &
4470 AVIC_UNACCEL_ACCESS_OFFSET_MASK;
4471
4472 switch (offset) {
4473 case APIC_ID:
4474 if (avic_handle_apic_id_update(&svm->vcpu))
4475 return 0;
4476 break;
4477 case APIC_LDR:
4478 if (avic_handle_ldr_update(&svm->vcpu))
4479 return 0;
4480 break;
4481 case APIC_DFR:
4482 avic_handle_dfr_update(&svm->vcpu);
4483 break;
4484 default:
4485 break;
4486 }
4487
4488 kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
4489
4490 return 1;
4491}
4492
4493static bool is_avic_unaccelerated_access_trap(u32 offset)
4494{
4495 bool ret = false;
4496
4497 switch (offset) {
4498 case APIC_ID:
4499 case APIC_EOI:
4500 case APIC_RRR:
4501 case APIC_LDR:
4502 case APIC_DFR:
4503 case APIC_SPIV:
4504 case APIC_ESR:
4505 case APIC_ICR:
4506 case APIC_LVTT:
4507 case APIC_LVTTHMR:
4508 case APIC_LVTPC:
4509 case APIC_LVT0:
4510 case APIC_LVT1:
4511 case APIC_LVTERR:
4512 case APIC_TMICT:
4513 case APIC_TDCR:
4514 ret = true;
4515 break;
4516 default:
4517 break;
4518 }
4519 return ret;
4520}
4521
4522static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
4523{
4524 int ret = 0;
4525 u32 offset = svm->vmcb->control.exit_info_1 &
4526 AVIC_UNACCEL_ACCESS_OFFSET_MASK;
4527 u32 vector = svm->vmcb->control.exit_info_2 &
4528 AVIC_UNACCEL_ACCESS_VECTOR_MASK;
4529 bool write = (svm->vmcb->control.exit_info_1 >> 32) &
4530 AVIC_UNACCEL_ACCESS_WRITE_MASK;
4531 bool trap = is_avic_unaccelerated_access_trap(offset);
4532
4533 trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
4534 trap, write, vector);
4535 if (trap) {
4536 /* Handling Trap */
4537 WARN_ONCE(!write, "svm: Handling trap read.\n");
4538 ret = avic_unaccel_trap_write(svm);
4539 } else {
4540 /* Handling Fault */
4541 ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
4542 }
4543
4544 return ret;
4545}
4546
09941fbb 4547static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
7ff76d58
AP
4548 [SVM_EXIT_READ_CR0] = cr_interception,
4549 [SVM_EXIT_READ_CR3] = cr_interception,
4550 [SVM_EXIT_READ_CR4] = cr_interception,
4551 [SVM_EXIT_READ_CR8] = cr_interception,
5e57518d 4552 [SVM_EXIT_CR0_SEL_WRITE] = cr_interception,
628afd2a 4553 [SVM_EXIT_WRITE_CR0] = cr_interception,
7ff76d58
AP
4554 [SVM_EXIT_WRITE_CR3] = cr_interception,
4555 [SVM_EXIT_WRITE_CR4] = cr_interception,
e0231715 4556 [SVM_EXIT_WRITE_CR8] = cr8_write_interception,
cae3797a
AP
4557 [SVM_EXIT_READ_DR0] = dr_interception,
4558 [SVM_EXIT_READ_DR1] = dr_interception,
4559 [SVM_EXIT_READ_DR2] = dr_interception,
4560 [SVM_EXIT_READ_DR3] = dr_interception,
4561 [SVM_EXIT_READ_DR4] = dr_interception,
4562 [SVM_EXIT_READ_DR5] = dr_interception,
4563 [SVM_EXIT_READ_DR6] = dr_interception,
4564 [SVM_EXIT_READ_DR7] = dr_interception,
4565 [SVM_EXIT_WRITE_DR0] = dr_interception,
4566 [SVM_EXIT_WRITE_DR1] = dr_interception,
4567 [SVM_EXIT_WRITE_DR2] = dr_interception,
4568 [SVM_EXIT_WRITE_DR3] = dr_interception,
4569 [SVM_EXIT_WRITE_DR4] = dr_interception,
4570 [SVM_EXIT_WRITE_DR5] = dr_interception,
4571 [SVM_EXIT_WRITE_DR6] = dr_interception,
4572 [SVM_EXIT_WRITE_DR7] = dr_interception,
d0bfb940
JK
4573 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
4574 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
7aa81cc0 4575 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
e0231715 4576 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
e0231715 4577 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
54a20552 4578 [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception,
9718420e 4579 [SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception,
e0231715 4580 [SVM_EXIT_INTR] = intr_interception,
c47f098d 4581 [SVM_EXIT_NMI] = nmi_interception,
6aa8b732
AK
4582 [SVM_EXIT_SMI] = nop_on_interception,
4583 [SVM_EXIT_INIT] = nop_on_interception,
c1150d8c 4584 [SVM_EXIT_VINTR] = interrupt_window_interception,
332b56e4 4585 [SVM_EXIT_RDPMC] = rdpmc_interception,
6aa8b732 4586 [SVM_EXIT_CPUID] = cpuid_interception,
95ba8273 4587 [SVM_EXIT_IRET] = iret_interception,
cf5a94d1 4588 [SVM_EXIT_INVD] = emulate_on_interception,
565d0998 4589 [SVM_EXIT_PAUSE] = pause_interception,
6aa8b732 4590 [SVM_EXIT_HLT] = halt_interception,
a7052897 4591 [SVM_EXIT_INVLPG] = invlpg_interception,
ff092385 4592 [SVM_EXIT_INVLPGA] = invlpga_interception,
e0231715 4593 [SVM_EXIT_IOIO] = io_interception,
6aa8b732
AK
4594 [SVM_EXIT_MSR] = msr_interception,
4595 [SVM_EXIT_TASK_SWITCH] = task_switch_interception,
46fe4ddd 4596 [SVM_EXIT_SHUTDOWN] = shutdown_interception,
3d6368ef 4597 [SVM_EXIT_VMRUN] = vmrun_interception,
02e235bc 4598 [SVM_EXIT_VMMCALL] = vmmcall_interception,
5542675b
AG
4599 [SVM_EXIT_VMLOAD] = vmload_interception,
4600 [SVM_EXIT_VMSAVE] = vmsave_interception,
1371d904
AG
4601 [SVM_EXIT_STGI] = stgi_interception,
4602 [SVM_EXIT_CLGI] = clgi_interception,
532a46b9 4603 [SVM_EXIT_SKINIT] = skinit_interception,
dab429a7 4604 [SVM_EXIT_WBINVD] = wbinvd_interception,
87c00572
GS
4605 [SVM_EXIT_MONITOR] = monitor_interception,
4606 [SVM_EXIT_MWAIT] = mwait_interception,
81dd35d4 4607 [SVM_EXIT_XSETBV] = xsetbv_interception,
d0006530 4608 [SVM_EXIT_NPF] = npf_interception,
7607b717 4609 [SVM_EXIT_RSM] = rsm_interception,
18f40c53
SS
4610 [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
4611 [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
6aa8b732
AK
4612};
4613
ae8cc059 4614static void dump_vmcb(struct kvm_vcpu *vcpu)
3f10c846
JR
4615{
4616 struct vcpu_svm *svm = to_svm(vcpu);
4617 struct vmcb_control_area *control = &svm->vmcb->control;
4618 struct vmcb_save_area *save = &svm->vmcb->save;
4619
4620 pr_err("VMCB Control Area:\n");
ae8cc059
JP
4621 pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
4622 pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
4623 pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
4624 pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
4625 pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
4626 pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
4627 pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
4628 pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
4629 pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
4630 pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
4631 pr_err("%-20s%d\n", "asid:", control->asid);
4632 pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
4633 pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
4634 pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
4635 pr_err("%-20s%08x\n", "int_state:", control->int_state);
4636 pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
4637 pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
4638 pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
4639 pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
4640 pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
4641 pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
4642 pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
44a95dae 4643 pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
ae8cc059
JP
4644 pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
4645 pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
0dc92119 4646 pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
ae8cc059 4647 pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
44a95dae
SS
4648 pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
4649 pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
4650 pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id);
3f10c846 4651 pr_err("VMCB State Save Area:\n");
ae8cc059
JP
4652 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4653 "es:",
4654 save->es.selector, save->es.attrib,
4655 save->es.limit, save->es.base);
4656 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4657 "cs:",
4658 save->cs.selector, save->cs.attrib,
4659 save->cs.limit, save->cs.base);
4660 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4661 "ss:",
4662 save->ss.selector, save->ss.attrib,
4663 save->ss.limit, save->ss.base);
4664 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4665 "ds:",
4666 save->ds.selector, save->ds.attrib,
4667 save->ds.limit, save->ds.base);
4668 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4669 "fs:",
4670 save->fs.selector, save->fs.attrib,
4671 save->fs.limit, save->fs.base);
4672 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4673 "gs:",
4674 save->gs.selector, save->gs.attrib,
4675 save->gs.limit, save->gs.base);
4676 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4677 "gdtr:",
4678 save->gdtr.selector, save->gdtr.attrib,
4679 save->gdtr.limit, save->gdtr.base);
4680 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4681 "ldtr:",
4682 save->ldtr.selector, save->ldtr.attrib,
4683 save->ldtr.limit, save->ldtr.base);
4684 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4685 "idtr:",
4686 save->idtr.selector, save->idtr.attrib,
4687 save->idtr.limit, save->idtr.base);
4688 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4689 "tr:",
4690 save->tr.selector, save->tr.attrib,
4691 save->tr.limit, save->tr.base);
3f10c846
JR
4692 pr_err("cpl: %d efer: %016llx\n",
4693 save->cpl, save->efer);
ae8cc059
JP
4694 pr_err("%-15s %016llx %-13s %016llx\n",
4695 "cr0:", save->cr0, "cr2:", save->cr2);
4696 pr_err("%-15s %016llx %-13s %016llx\n",
4697 "cr3:", save->cr3, "cr4:", save->cr4);
4698 pr_err("%-15s %016llx %-13s %016llx\n",
4699 "dr6:", save->dr6, "dr7:", save->dr7);
4700 pr_err("%-15s %016llx %-13s %016llx\n",
4701 "rip:", save->rip, "rflags:", save->rflags);
4702 pr_err("%-15s %016llx %-13s %016llx\n",
4703 "rsp:", save->rsp, "rax:", save->rax);
4704 pr_err("%-15s %016llx %-13s %016llx\n",
4705 "star:", save->star, "lstar:", save->lstar);
4706 pr_err("%-15s %016llx %-13s %016llx\n",
4707 "cstar:", save->cstar, "sfmask:", save->sfmask);
4708 pr_err("%-15s %016llx %-13s %016llx\n",
4709 "kernel_gs_base:", save->kernel_gs_base,
4710 "sysenter_cs:", save->sysenter_cs);
4711 pr_err("%-15s %016llx %-13s %016llx\n",
4712 "sysenter_esp:", save->sysenter_esp,
4713 "sysenter_eip:", save->sysenter_eip);
4714 pr_err("%-15s %016llx %-13s %016llx\n",
4715 "gpat:", save->g_pat, "dbgctl:", save->dbgctl);
4716 pr_err("%-15s %016llx %-13s %016llx\n",
4717 "br_from:", save->br_from, "br_to:", save->br_to);
4718 pr_err("%-15s %016llx %-13s %016llx\n",
4719 "excp_from:", save->last_excp_from,
4720 "excp_to:", save->last_excp_to);
3f10c846
JR
4721}
4722
586f9607
AK
4723static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
4724{
4725 struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
4726
4727 *info1 = control->exit_info_1;
4728 *info2 = control->exit_info_2;
4729}
4730
851ba692 4731static int handle_exit(struct kvm_vcpu *vcpu)
6aa8b732 4732{
04d2cc77 4733 struct vcpu_svm *svm = to_svm(vcpu);
851ba692 4734 struct kvm_run *kvm_run = vcpu->run;
a2fa3e9f 4735 u32 exit_code = svm->vmcb->control.exit_code;
6aa8b732 4736
8b89fe1f
PB
4737 trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
4738
4ee546b4 4739 if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
2be4fc7a
JR
4740 vcpu->arch.cr0 = svm->vmcb->save.cr0;
4741 if (npt_enabled)
4742 vcpu->arch.cr3 = svm->vmcb->save.cr3;
af9ca2d7 4743
cd3ff653
JR
4744 if (unlikely(svm->nested.exit_required)) {
4745 nested_svm_vmexit(svm);
4746 svm->nested.exit_required = false;
4747
4748 return 1;
4749 }
4750
2030753d 4751 if (is_guest_mode(vcpu)) {
410e4d57
JR
4752 int vmexit;
4753
d8cabddf
JR
4754 trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
4755 svm->vmcb->control.exit_info_1,
4756 svm->vmcb->control.exit_info_2,
4757 svm->vmcb->control.exit_int_info,
e097e5ff
SH
4758 svm->vmcb->control.exit_int_info_err,
4759 KVM_ISA_SVM);
d8cabddf 4760
410e4d57
JR
4761 vmexit = nested_svm_exit_special(svm);
4762
4763 if (vmexit == NESTED_EXIT_CONTINUE)
4764 vmexit = nested_svm_exit_handled(svm);
4765
4766 if (vmexit == NESTED_EXIT_DONE)
cf74a78b 4767 return 1;
cf74a78b
AG
4768 }
4769
a5c3832d
JR
4770 svm_complete_interrupts(svm);
4771
04d2cc77
AK
4772 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
4773 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
4774 kvm_run->fail_entry.hardware_entry_failure_reason
4775 = svm->vmcb->control.exit_code;
3f10c846
JR
4776 pr_err("KVM: FAILED VMRUN WITH VMCB:\n");
4777 dump_vmcb(vcpu);
04d2cc77
AK
4778 return 0;
4779 }
4780
a2fa3e9f 4781 if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
709ddebf 4782 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
55c5e464
JR
4783 exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
4784 exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
6614c7d0 4785 printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
6aa8b732 4786 "exit_code 0x%x\n",
b8688d51 4787 __func__, svm->vmcb->control.exit_int_info,
6aa8b732
AK
4788 exit_code);
4789
9d8f549d 4790 if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
56919c5c 4791 || !svm_exit_handlers[exit_code]) {
faac2458 4792 WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code);
2bc19dc3
MT
4793 kvm_queue_exception(vcpu, UD_VECTOR);
4794 return 1;
6aa8b732
AK
4795 }
4796
851ba692 4797 return svm_exit_handlers[exit_code](svm);
6aa8b732
AK
4798}
4799
4800static void reload_tss(struct kvm_vcpu *vcpu)
4801{
4802 int cpu = raw_smp_processor_id();
4803
0fe1e009
TH
4804 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
4805 sd->tss_desc->type = 9; /* available 32/64-bit TSS */
6aa8b732
AK
4806 load_TR_desc();
4807}
4808
70cd94e6
BS
4809static void pre_sev_run(struct vcpu_svm *svm, int cpu)
4810{
4811 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
4812 int asid = sev_get_asid(svm->vcpu.kvm);
4813
4814 /* Assign the asid allocated with this SEV guest */
4815 svm->vmcb->control.asid = asid;
4816
4817 /*
4818 * Flush guest TLB:
4819 *
4820 * 1) when different VMCB for the same ASID is to be run on the same host CPU.
4821 * 2) or this VMCB was executed on different host CPU in previous VMRUNs.
4822 */
4823 if (sd->sev_vmcbs[asid] == svm->vmcb &&
4824 svm->last_cpu == cpu)
4825 return;
4826
4827 svm->last_cpu = cpu;
4828 sd->sev_vmcbs[asid] = svm->vmcb;
4829 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
4830 mark_dirty(svm->vmcb, VMCB_ASID);
4831}
4832
e756fc62 4833static void pre_svm_run(struct vcpu_svm *svm)
6aa8b732
AK
4834{
4835 int cpu = raw_smp_processor_id();
4836
0fe1e009 4837 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
6aa8b732 4838
70cd94e6
BS
4839 if (sev_guest(svm->vcpu.kvm))
4840 return pre_sev_run(svm, cpu);
4841
4b656b12 4842 /* FIXME: handle wraparound of asid_generation */
0fe1e009
TH
4843 if (svm->asid_generation != sd->asid_generation)
4844 new_asid(svm, sd);
6aa8b732
AK
4845}
4846
95ba8273
GN
4847static void svm_inject_nmi(struct kvm_vcpu *vcpu)
4848{
4849 struct vcpu_svm *svm = to_svm(vcpu);
4850
4851 svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
4852 vcpu->arch.hflags |= HF_NMI_MASK;
8a05a1b8 4853 set_intercept(svm, INTERCEPT_IRET);
95ba8273
GN
4854 ++vcpu->stat.nmi_injections;
4855}
6aa8b732 4856
85f455f7 4857static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
6aa8b732
AK
4858{
4859 struct vmcb_control_area *control;
4860
340d3bc3 4861 /* The following fields are ignored when AVIC is enabled */
e756fc62 4862 control = &svm->vmcb->control;
85f455f7 4863 control->int_vector = irq;
6aa8b732
AK
4864 control->int_ctl &= ~V_INTR_PRIO_MASK;
4865 control->int_ctl |= V_IRQ_MASK |
4866 ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
decdbf6a 4867 mark_dirty(svm->vmcb, VMCB_INTR);
6aa8b732
AK
4868}
4869
66fd3f7f 4870static void svm_set_irq(struct kvm_vcpu *vcpu)
2a8067f1
ED
4871{
4872 struct vcpu_svm *svm = to_svm(vcpu);
4873
2af9194d 4874 BUG_ON(!(gif_set(svm)));
cf74a78b 4875
9fb2d2b4
GN
4876 trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
4877 ++vcpu->stat.irq_injections;
4878
219b65dc
AG
4879 svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
4880 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
2a8067f1
ED
4881}
4882
3bbf3565
SS
4883static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
4884{
4885 return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK);
4886}
4887
95ba8273 4888static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
aaacfc9a
JR
4889{
4890 struct vcpu_svm *svm = to_svm(vcpu);
aaacfc9a 4891
3bbf3565
SS
4892 if (svm_nested_virtualize_tpr(vcpu) ||
4893 kvm_vcpu_apicv_active(vcpu))
88ab24ad
JR
4894 return;
4895
596f3142
RK
4896 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
4897
95ba8273 4898 if (irr == -1)
aaacfc9a
JR
4899 return;
4900
95ba8273 4901 if (tpr >= irr)
4ee546b4 4902 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
95ba8273 4903}
aaacfc9a 4904
8d14695f
YZ
4905static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
4906{
4907 return;
4908}
4909
b2a05fef 4910static bool svm_get_enable_apicv(struct kvm_vcpu *vcpu)
d62caabb 4911{
67034bb9 4912 return avic && irqchip_split(vcpu->kvm);
44a95dae
SS
4913}
4914
4915static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
4916{
d62caabb
AS
4917}
4918
67c9dddc 4919static void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
44a95dae 4920{
d62caabb
AS
4921}
4922
44a95dae 4923/* Note: Currently only used by Hyper-V. */
d62caabb 4924static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
c7c9c56c 4925{
44a95dae
SS
4926 struct vcpu_svm *svm = to_svm(vcpu);
4927 struct vmcb *vmcb = svm->vmcb;
4928
67034bb9 4929 if (!kvm_vcpu_apicv_active(&svm->vcpu))
44a95dae
SS
4930 return;
4931
4932 vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
4933 mark_dirty(vmcb, VMCB_INTR);
c7c9c56c
YZ
4934}
4935
6308630b 4936static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
c7c9c56c
YZ
4937{
4938 return;
4939}
4940
340d3bc3
SS
4941static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
4942{
4943 kvm_lapic_set_irr(vec, vcpu->arch.apic);
4944 smp_mb__after_atomic();
4945
4946 if (avic_vcpu_is_running(vcpu))
4947 wrmsrl(SVM_AVIC_DOORBELL,
7d669f50 4948 kvm_cpu_get_apicid(vcpu->cpu));
340d3bc3
SS
4949 else
4950 kvm_vcpu_wake_up(vcpu);
4951}
4952
411b44ba
SS
4953static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
4954{
4955 unsigned long flags;
4956 struct amd_svm_iommu_ir *cur;
4957
4958 spin_lock_irqsave(&svm->ir_list_lock, flags);
4959 list_for_each_entry(cur, &svm->ir_list, node) {
4960 if (cur->data != pi->ir_data)
4961 continue;
4962 list_del(&cur->node);
4963 kfree(cur);
4964 break;
4965 }
4966 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
4967}
4968
4969static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
4970{
4971 int ret = 0;
4972 unsigned long flags;
4973 struct amd_svm_iommu_ir *ir;
4974
4975 /**
4976 * In some cases, the existing irte is updaed and re-set,
4977 * so we need to check here if it's already been * added
4978 * to the ir_list.
4979 */
4980 if (pi->ir_data && (pi->prev_ga_tag != 0)) {
4981 struct kvm *kvm = svm->vcpu.kvm;
4982 u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
4983 struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
4984 struct vcpu_svm *prev_svm;
4985
4986 if (!prev_vcpu) {
4987 ret = -EINVAL;
4988 goto out;
4989 }
4990
4991 prev_svm = to_svm(prev_vcpu);
4992 svm_ir_list_del(prev_svm, pi);
4993 }
4994
4995 /**
4996 * Allocating new amd_iommu_pi_data, which will get
4997 * add to the per-vcpu ir_list.
4998 */
4999 ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL);
5000 if (!ir) {
5001 ret = -ENOMEM;
5002 goto out;
5003 }
5004 ir->data = pi->ir_data;
5005
5006 spin_lock_irqsave(&svm->ir_list_lock, flags);
5007 list_add(&ir->node, &svm->ir_list);
5008 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
5009out:
5010 return ret;
5011}
5012
5013/**
5014 * Note:
5015 * The HW cannot support posting multicast/broadcast
5016 * interrupts to a vCPU. So, we still use legacy interrupt
5017 * remapping for these kind of interrupts.
5018 *
5019 * For lowest-priority interrupts, we only support
5020 * those with single CPU as the destination, e.g. user
5021 * configures the interrupts via /proc/irq or uses
5022 * irqbalance to make the interrupts single-CPU.
5023 */
5024static int
5025get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
5026 struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
5027{
5028 struct kvm_lapic_irq irq;
5029 struct kvm_vcpu *vcpu = NULL;
5030
5031 kvm_set_msi_irq(kvm, e, &irq);
5032
5033 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
5034 pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
5035 __func__, irq.vector);
5036 return -1;
5037 }
5038
5039 pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
5040 irq.vector);
5041 *svm = to_svm(vcpu);
d0ec49d4 5042 vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
411b44ba
SS
5043 vcpu_info->vector = irq.vector;
5044
5045 return 0;
5046}
5047
5048/*
5049 * svm_update_pi_irte - set IRTE for Posted-Interrupts
5050 *
5051 * @kvm: kvm
5052 * @host_irq: host irq of the interrupt
5053 * @guest_irq: gsi of the interrupt
5054 * @set: set or unset PI
5055 * returns 0 on success, < 0 on failure
5056 */
5057static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
5058 uint32_t guest_irq, bool set)
5059{
5060 struct kvm_kernel_irq_routing_entry *e;
5061 struct kvm_irq_routing_table *irq_rt;
5062 int idx, ret = -EINVAL;
5063
5064 if (!kvm_arch_has_assigned_device(kvm) ||
5065 !irq_remapping_cap(IRQ_POSTING_CAP))
5066 return 0;
5067
5068 pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
5069 __func__, host_irq, guest_irq, set);
5070
5071 idx = srcu_read_lock(&kvm->irq_srcu);
5072 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
5073 WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
5074
5075 hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
5076 struct vcpu_data vcpu_info;
5077 struct vcpu_svm *svm = NULL;
5078
5079 if (e->type != KVM_IRQ_ROUTING_MSI)
5080 continue;
5081
5082 /**
5083 * Here, we setup with legacy mode in the following cases:
5084 * 1. When cannot target interrupt to a specific vcpu.
5085 * 2. Unsetting posted interrupt.
5086 * 3. APIC virtialization is disabled for the vcpu.
5087 */
5088 if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
5089 kvm_vcpu_apicv_active(&svm->vcpu)) {
5090 struct amd_iommu_pi_data pi;
5091
5092 /* Try to enable guest_mode in IRTE */
d0ec49d4
TL
5093 pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
5094 AVIC_HPA_MASK);
411b44ba
SS
5095 pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id,
5096 svm->vcpu.vcpu_id);
5097 pi.is_guest_mode = true;
5098 pi.vcpu_data = &vcpu_info;
5099 ret = irq_set_vcpu_affinity(host_irq, &pi);
5100
5101 /**
5102 * Here, we successfully setting up vcpu affinity in
5103 * IOMMU guest mode. Now, we need to store the posted
5104 * interrupt information in a per-vcpu ir_list so that
5105 * we can reference to them directly when we update vcpu
5106 * scheduling information in IOMMU irte.
5107 */
5108 if (!ret && pi.is_guest_mode)
5109 svm_ir_list_add(svm, &pi);
5110 } else {
5111 /* Use legacy mode in IRTE */
5112 struct amd_iommu_pi_data pi;
5113
5114 /**
5115 * Here, pi is used to:
5116 * - Tell IOMMU to use legacy mode for this interrupt.
5117 * - Retrieve ga_tag of prior interrupt remapping data.
5118 */
5119 pi.is_guest_mode = false;
5120 ret = irq_set_vcpu_affinity(host_irq, &pi);
5121
5122 /**
5123 * Check if the posted interrupt was previously
5124 * setup with the guest_mode by checking if the ga_tag
5125 * was cached. If so, we need to clean up the per-vcpu
5126 * ir_list.
5127 */
5128 if (!ret && pi.prev_ga_tag) {
5129 int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
5130 struct kvm_vcpu *vcpu;
5131
5132 vcpu = kvm_get_vcpu_by_id(kvm, id);
5133 if (vcpu)
5134 svm_ir_list_del(to_svm(vcpu), &pi);
5135 }
5136 }
5137
5138 if (!ret && svm) {
5139 trace_kvm_pi_irte_update(svm->vcpu.vcpu_id,
5140 host_irq, e->gsi,
5141 vcpu_info.vector,
5142 vcpu_info.pi_desc_addr, set);
5143 }
5144
5145 if (ret < 0) {
5146 pr_err("%s: failed to update PI IRTE\n", __func__);
5147 goto out;
5148 }
5149 }
5150
5151 ret = 0;
5152out:
5153 srcu_read_unlock(&kvm->irq_srcu, idx);
5154 return ret;
5155}
5156
95ba8273
GN
5157static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
5158{
5159 struct vcpu_svm *svm = to_svm(vcpu);
5160 struct vmcb *vmcb = svm->vmcb;
924584cc
JR
5161 int ret;
5162 ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
5163 !(svm->vcpu.arch.hflags & HF_NMI_MASK);
5164 ret = ret && gif_set(svm) && nested_svm_nmi(svm);
5165
5166 return ret;
aaacfc9a
JR
5167}
5168
3cfc3092
JK
5169static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
5170{
5171 struct vcpu_svm *svm = to_svm(vcpu);
5172
5173 return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
5174}
5175
5176static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
5177{
5178 struct vcpu_svm *svm = to_svm(vcpu);
5179
5180 if (masked) {
5181 svm->vcpu.arch.hflags |= HF_NMI_MASK;
8a05a1b8 5182 set_intercept(svm, INTERCEPT_IRET);
3cfc3092
JK
5183 } else {
5184 svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
8a05a1b8 5185 clr_intercept(svm, INTERCEPT_IRET);
3cfc3092
JK
5186 }
5187}
5188
78646121
GN
5189static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
5190{
5191 struct vcpu_svm *svm = to_svm(vcpu);
5192 struct vmcb *vmcb = svm->vmcb;
7fcdb510
JR
5193 int ret;
5194
5195 if (!gif_set(svm) ||
5196 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
5197 return 0;
5198
f6e78475 5199 ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
7fcdb510 5200
2030753d 5201 if (is_guest_mode(vcpu))
7fcdb510
JR
5202 return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
5203
5204 return ret;
78646121
GN
5205}
5206
c9a7953f 5207static void enable_irq_window(struct kvm_vcpu *vcpu)
6aa8b732 5208{
219b65dc 5209 struct vcpu_svm *svm = to_svm(vcpu);
219b65dc 5210
340d3bc3
SS
5211 if (kvm_vcpu_apicv_active(vcpu))
5212 return;
5213
e0231715
JR
5214 /*
5215 * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
5216 * 1, because that's a separate STGI/VMRUN intercept. The next time we
5217 * get that intercept, this function will be called again though and
640bd6e5
JN
5218 * we'll get the vintr intercept. However, if the vGIF feature is
5219 * enabled, the STGI interception will not occur. Enable the irq
5220 * window under the assumption that the hardware will set the GIF.
e0231715 5221 */
640bd6e5 5222 if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) {
219b65dc
AG
5223 svm_set_vintr(svm);
5224 svm_inject_irq(svm, 0x0);
5225 }
85f455f7
ED
5226}
5227
c9a7953f 5228static void enable_nmi_window(struct kvm_vcpu *vcpu)
c1150d8c 5229{
04d2cc77 5230 struct vcpu_svm *svm = to_svm(vcpu);
c1150d8c 5231
44c11430
GN
5232 if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
5233 == HF_NMI_MASK)
c9a7953f 5234 return; /* IRET will cause a vm exit */
44c11430 5235
640bd6e5
JN
5236 if (!gif_set(svm)) {
5237 if (vgif_enabled(svm))
5238 set_intercept(svm, INTERCEPT_STGI);
1a5e1852 5239 return; /* STGI will cause a vm exit */
640bd6e5 5240 }
1a5e1852
LP
5241
5242 if (svm->nested.exit_required)
5243 return; /* we're not going to run the guest yet */
5244
e0231715
JR
5245 /*
5246 * Something prevents NMI from been injected. Single step over possible
5247 * problem (IRET or exception injection or interrupt shadow)
5248 */
ab2f4d73 5249 svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu);
6be7d306 5250 svm->nmi_singlestep = true;
44c11430 5251 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
c1150d8c
DL
5252}
5253
cbc94022
IE
5254static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
5255{
5256 return 0;
5257}
5258
c2ba05cc 5259static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
d9e368d6 5260{
38e5e92f
JR
5261 struct vcpu_svm *svm = to_svm(vcpu);
5262
5263 if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
5264 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
5265 else
5266 svm->asid_generation--;
d9e368d6
AK
5267}
5268
04d2cc77
AK
5269static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
5270{
5271}
5272
d7bf8221
JR
5273static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
5274{
5275 struct vcpu_svm *svm = to_svm(vcpu);
5276
3bbf3565 5277 if (svm_nested_virtualize_tpr(vcpu))
88ab24ad
JR
5278 return;
5279
4ee546b4 5280 if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
d7bf8221 5281 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
615d5193 5282 kvm_set_cr8(vcpu, cr8);
d7bf8221
JR
5283 }
5284}
5285
649d6864
JR
5286static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
5287{
5288 struct vcpu_svm *svm = to_svm(vcpu);
5289 u64 cr8;
5290
3bbf3565
SS
5291 if (svm_nested_virtualize_tpr(vcpu) ||
5292 kvm_vcpu_apicv_active(vcpu))
88ab24ad
JR
5293 return;
5294
649d6864
JR
5295 cr8 = kvm_get_cr8(vcpu);
5296 svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
5297 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
5298}
5299
9222be18
GN
5300static void svm_complete_interrupts(struct vcpu_svm *svm)
5301{
5302 u8 vector;
5303 int type;
5304 u32 exitintinfo = svm->vmcb->control.exit_int_info;
66b7138f
JK
5305 unsigned int3_injected = svm->int3_injected;
5306
5307 svm->int3_injected = 0;
9222be18 5308
bd3d1ec3
AK
5309 /*
5310 * If we've made progress since setting HF_IRET_MASK, we've
5311 * executed an IRET and can allow NMI injection.
5312 */
5313 if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
5314 && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
44c11430 5315 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
3842d135
AK
5316 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
5317 }
44c11430 5318
9222be18
GN
5319 svm->vcpu.arch.nmi_injected = false;
5320 kvm_clear_exception_queue(&svm->vcpu);
5321 kvm_clear_interrupt_queue(&svm->vcpu);
5322
5323 if (!(exitintinfo & SVM_EXITINTINFO_VALID))
5324 return;
5325
3842d135
AK
5326 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
5327
9222be18
GN
5328 vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
5329 type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
5330
5331 switch (type) {
5332 case SVM_EXITINTINFO_TYPE_NMI:
5333 svm->vcpu.arch.nmi_injected = true;
5334 break;
5335 case SVM_EXITINTINFO_TYPE_EXEPT:
66b7138f
JK
5336 /*
5337 * In case of software exceptions, do not reinject the vector,
5338 * but re-execute the instruction instead. Rewind RIP first
5339 * if we emulated INT3 before.
5340 */
5341 if (kvm_exception_is_soft(vector)) {
5342 if (vector == BP_VECTOR && int3_injected &&
5343 kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
5344 kvm_rip_write(&svm->vcpu,
5345 kvm_rip_read(&svm->vcpu) -
5346 int3_injected);
9222be18 5347 break;
66b7138f 5348 }
9222be18
GN
5349 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
5350 u32 err = svm->vmcb->control.exit_int_info_err;
ce7ddec4 5351 kvm_requeue_exception_e(&svm->vcpu, vector, err);
9222be18
GN
5352
5353 } else
ce7ddec4 5354 kvm_requeue_exception(&svm->vcpu, vector);
9222be18
GN
5355 break;
5356 case SVM_EXITINTINFO_TYPE_INTR:
66fd3f7f 5357 kvm_queue_interrupt(&svm->vcpu, vector, false);
9222be18
GN
5358 break;
5359 default:
5360 break;
5361 }
5362}
5363
b463a6f7
AK
5364static void svm_cancel_injection(struct kvm_vcpu *vcpu)
5365{
5366 struct vcpu_svm *svm = to_svm(vcpu);
5367 struct vmcb_control_area *control = &svm->vmcb->control;
5368
5369 control->exit_int_info = control->event_inj;
5370 control->exit_int_info_err = control->event_inj_err;
5371 control->event_inj = 0;
5372 svm_complete_interrupts(svm);
5373}
5374
851ba692 5375static void svm_vcpu_run(struct kvm_vcpu *vcpu)
6aa8b732 5376{
a2fa3e9f 5377 struct vcpu_svm *svm = to_svm(vcpu);
d9e368d6 5378
2041a06a
JR
5379 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
5380 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
5381 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
5382
cd3ff653
JR
5383 /*
5384 * A vmexit emulation is required before the vcpu can be executed
5385 * again.
5386 */
5387 if (unlikely(svm->nested.exit_required))
5388 return;
5389
a12713c2
LP
5390 /*
5391 * Disable singlestep if we're injecting an interrupt/exception.
5392 * We don't want our modified rflags to be pushed on the stack where
5393 * we might not be able to easily reset them if we disabled NMI
5394 * singlestep later.
5395 */
5396 if (svm->nmi_singlestep && svm->vmcb->control.event_inj) {
5397 /*
5398 * Event injection happens before external interrupts cause a
5399 * vmexit and interrupts are disabled here, so smp_send_reschedule
5400 * is enough to force an immediate vmexit.
5401 */
5402 disable_nmi_singlestep(svm);
5403 smp_send_reschedule(vcpu->cpu);
5404 }
5405
e756fc62 5406 pre_svm_run(svm);
6aa8b732 5407
649d6864
JR
5408 sync_lapic_to_cr8(vcpu);
5409
cda0ffdd 5410 svm->vmcb->save.cr2 = vcpu->arch.cr2;
6aa8b732 5411
04d2cc77
AK
5412 clgi();
5413
5414 local_irq_enable();
36241b8c 5415
b2ac58f9
KA
5416 /*
5417 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
5418 * it's non-zero. Since vmentry is serialising on affected CPUs, there
5419 * is no need to worry about the conditional branch over the wrmsr
5420 * being speculatively taken.
5421 */
5422 if (svm->spec_ctrl)
ecb586bd 5423 native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
b2ac58f9 5424
6aa8b732 5425 asm volatile (
7454766f
AK
5426 "push %%" _ASM_BP "; \n\t"
5427 "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
5428 "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t"
5429 "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t"
5430 "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t"
5431 "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t"
5432 "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t"
05b3e0c2 5433#ifdef CONFIG_X86_64
fb3f0f51
RR
5434 "mov %c[r8](%[svm]), %%r8 \n\t"
5435 "mov %c[r9](%[svm]), %%r9 \n\t"
5436 "mov %c[r10](%[svm]), %%r10 \n\t"
5437 "mov %c[r11](%[svm]), %%r11 \n\t"
5438 "mov %c[r12](%[svm]), %%r12 \n\t"
5439 "mov %c[r13](%[svm]), %%r13 \n\t"
5440 "mov %c[r14](%[svm]), %%r14 \n\t"
5441 "mov %c[r15](%[svm]), %%r15 \n\t"
6aa8b732
AK
5442#endif
5443
6aa8b732 5444 /* Enter guest mode */
7454766f
AK
5445 "push %%" _ASM_AX " \n\t"
5446 "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t"
4ecac3fd
AK
5447 __ex(SVM_VMLOAD) "\n\t"
5448 __ex(SVM_VMRUN) "\n\t"
5449 __ex(SVM_VMSAVE) "\n\t"
7454766f 5450 "pop %%" _ASM_AX " \n\t"
6aa8b732
AK
5451
5452 /* Save guest registers, load host registers */
7454766f
AK
5453 "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t"
5454 "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t"
5455 "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t"
5456 "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t"
5457 "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t"
5458 "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t"
05b3e0c2 5459#ifdef CONFIG_X86_64
fb3f0f51
RR
5460 "mov %%r8, %c[r8](%[svm]) \n\t"
5461 "mov %%r9, %c[r9](%[svm]) \n\t"
5462 "mov %%r10, %c[r10](%[svm]) \n\t"
5463 "mov %%r11, %c[r11](%[svm]) \n\t"
5464 "mov %%r12, %c[r12](%[svm]) \n\t"
5465 "mov %%r13, %c[r13](%[svm]) \n\t"
5466 "mov %%r14, %c[r14](%[svm]) \n\t"
5467 "mov %%r15, %c[r15](%[svm]) \n\t"
0cb5b306
JM
5468#endif
5469 /*
5470 * Clear host registers marked as clobbered to prevent
5471 * speculative use.
5472 */
5473 "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
5474 "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
5475 "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
5476 "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
5477 "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
5478#ifdef CONFIG_X86_64
5479 "xor %%r8, %%r8 \n\t"
5480 "xor %%r9, %%r9 \n\t"
5481 "xor %%r10, %%r10 \n\t"
5482 "xor %%r11, %%r11 \n\t"
5483 "xor %%r12, %%r12 \n\t"
5484 "xor %%r13, %%r13 \n\t"
5485 "xor %%r14, %%r14 \n\t"
5486 "xor %%r15, %%r15 \n\t"
6aa8b732 5487#endif
7454766f 5488 "pop %%" _ASM_BP
6aa8b732 5489 :
fb3f0f51 5490 : [svm]"a"(svm),
6aa8b732 5491 [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
ad312c7c
ZX
5492 [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
5493 [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
5494 [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
5495 [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
5496 [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
5497 [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
05b3e0c2 5498#ifdef CONFIG_X86_64
ad312c7c
ZX
5499 , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
5500 [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
5501 [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
5502 [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
5503 [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
5504 [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
5505 [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
5506 [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
6aa8b732 5507#endif
54a08c04
LV
5508 : "cc", "memory"
5509#ifdef CONFIG_X86_64
7454766f 5510 , "rbx", "rcx", "rdx", "rsi", "rdi"
54a08c04 5511 , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
7454766f
AK
5512#else
5513 , "ebx", "ecx", "edx", "esi", "edi"
54a08c04
LV
5514#endif
5515 );
6aa8b732 5516
b2ac58f9
KA
5517 /*
5518 * We do not use IBRS in the kernel. If this vCPU has used the
5519 * SPEC_CTRL MSR it may have left it on; save the value and
5520 * turn it off. This is much more efficient than blindly adding
5521 * it to the atomic save/restore list. Especially as the former
5522 * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
5523 *
5524 * For non-nested case:
5525 * If the L01 MSR bitmap does not intercept the MSR, then we need to
5526 * save it.
5527 *
5528 * For nested case:
5529 * If the L02 MSR bitmap does not intercept the MSR, then we need to
5530 * save it.
5531 */
946fbbc1 5532 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
ecb586bd 5533 svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
b2ac58f9
KA
5534
5535 if (svm->spec_ctrl)
ecb586bd 5536 native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
b2ac58f9 5537
117cc7a9
DW
5538 /* Eliminate branch target predictions from guest mode */
5539 vmexit_fill_RSB();
5540
82ca2d10
AK
5541#ifdef CONFIG_X86_64
5542 wrmsrl(MSR_GS_BASE, svm->host.gs_base);
5543#else
dacccfdd 5544 loadsegment(fs, svm->host.fs);
831ca609
AK
5545#ifndef CONFIG_X86_32_LAZY_GS
5546 loadsegment(gs, svm->host.gs);
5547#endif
9581d442 5548#endif
6aa8b732
AK
5549
5550 reload_tss(vcpu);
5551
56ba47dd
AK
5552 local_irq_disable();
5553
13c34e07
AK
5554 vcpu->arch.cr2 = svm->vmcb->save.cr2;
5555 vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
5556 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
5557 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
5558
3781c01c
JR
5559 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
5560 kvm_before_handle_nmi(&svm->vcpu);
5561
5562 stgi();
5563
5564 /* Any pending NMI will happen here */
5565
5566 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
5567 kvm_after_handle_nmi(&svm->vcpu);
5568
d7bf8221
JR
5569 sync_cr8_to_lapic(vcpu);
5570
a2fa3e9f 5571 svm->next_rip = 0;
9222be18 5572
38e5e92f
JR
5573 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
5574
631bc487
GN
5575 /* if exit due to PF check for async PF */
5576 if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
1261bfa3 5577 svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
631bc487 5578
6de4f3ad
AK
5579 if (npt_enabled) {
5580 vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
5581 vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
5582 }
fe5913e4
JR
5583
5584 /*
5585 * We need to handle MC intercepts here before the vcpu has a chance to
5586 * change the physical cpu
5587 */
5588 if (unlikely(svm->vmcb->control.exit_code ==
5589 SVM_EXIT_EXCP_BASE + MC_VECTOR))
5590 svm_handle_mce(svm);
8d28fec4
RJ
5591
5592 mark_all_clean(svm->vmcb);
6aa8b732 5593}
c207aee4 5594STACK_FRAME_NON_STANDARD(svm_vcpu_run);
6aa8b732 5595
6aa8b732
AK
5596static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
5597{
a2fa3e9f
GH
5598 struct vcpu_svm *svm = to_svm(vcpu);
5599
d0ec49d4 5600 svm->vmcb->save.cr3 = __sme_set(root);
dcca1a65 5601 mark_dirty(svm->vmcb, VMCB_CR);
c2ba05cc 5602 svm_flush_tlb(vcpu, true);
6aa8b732
AK
5603}
5604
1c97f0a0
JR
5605static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
5606{
5607 struct vcpu_svm *svm = to_svm(vcpu);
5608
d0ec49d4 5609 svm->vmcb->control.nested_cr3 = __sme_set(root);
b2747166 5610 mark_dirty(svm->vmcb, VMCB_NPT);
1c97f0a0
JR
5611
5612 /* Also sync guest cr3 here in case we live migrate */
9f8fe504 5613 svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
dcca1a65 5614 mark_dirty(svm->vmcb, VMCB_CR);
1c97f0a0 5615
c2ba05cc 5616 svm_flush_tlb(vcpu, true);
1c97f0a0
JR
5617}
5618
6aa8b732
AK
5619static int is_disabled(void)
5620{
6031a61c
JR
5621 u64 vm_cr;
5622
5623 rdmsrl(MSR_VM_CR, vm_cr);
5624 if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
5625 return 1;
5626
6aa8b732
AK
5627 return 0;
5628}
5629
102d8325
IM
5630static void
5631svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
5632{
5633 /*
5634 * Patch in the VMMCALL instruction:
5635 */
5636 hypercall[0] = 0x0f;
5637 hypercall[1] = 0x01;
5638 hypercall[2] = 0xd9;
102d8325
IM
5639}
5640
002c7f7c
YS
5641static void svm_check_processor_compat(void *rtn)
5642{
5643 *(int *)rtn = 0;
5644}
5645
774ead3a
AK
5646static bool svm_cpu_has_accelerated_tpr(void)
5647{
5648 return false;
5649}
5650
6d396b55
PB
5651static bool svm_has_high_real_mode_segbase(void)
5652{
5653 return true;
5654}
5655
fc07e76a
PB
5656static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
5657{
5658 return 0;
5659}
5660
0e851880
SY
5661static void svm_cpuid_update(struct kvm_vcpu *vcpu)
5662{
6092d3d3
JR
5663 struct vcpu_svm *svm = to_svm(vcpu);
5664
5665 /* Update nrips enabled cache */
d6321d49 5666 svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
46781eae
SS
5667
5668 if (!kvm_vcpu_apicv_active(vcpu))
5669 return;
5670
1b4d56b8 5671 guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC);
0e851880
SY
5672}
5673
d4330ef2
JR
5674static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
5675{
c2c63a49 5676 switch (func) {
46781eae
SS
5677 case 0x1:
5678 if (avic)
5679 entry->ecx &= ~bit(X86_FEATURE_X2APIC);
5680 break;
4c62a2dc
JR
5681 case 0x80000001:
5682 if (nested)
5683 entry->ecx |= (1 << 2); /* Set SVM bit */
5684 break;
c2c63a49
JR
5685 case 0x8000000A:
5686 entry->eax = 1; /* SVM revision 1 */
5687 entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
5688 ASID emulation to nested SVM */
5689 entry->ecx = 0; /* Reserved */
7a190667
JR
5690 entry->edx = 0; /* Per default do not support any
5691 additional features */
5692
5693 /* Support next_rip if host supports it */
2a6b20b8 5694 if (boot_cpu_has(X86_FEATURE_NRIPS))
7a190667 5695 entry->edx |= SVM_FEATURE_NRIP;
c2c63a49 5696
3d4aeaad
JR
5697 /* Support NPT for the guest if enabled */
5698 if (npt_enabled)
5699 entry->edx |= SVM_FEATURE_NPT;
5700
c2c63a49 5701 break;
8765d753
BS
5702 case 0x8000001F:
5703 /* Support memory encryption cpuid if host supports it */
5704 if (boot_cpu_has(X86_FEATURE_SEV))
5705 cpuid(0x8000001f, &entry->eax, &entry->ebx,
5706 &entry->ecx, &entry->edx);
5707
c2c63a49 5708 }
d4330ef2
JR
5709}
5710
17cc3935 5711static int svm_get_lpage_level(void)
344f414f 5712{
17cc3935 5713 return PT_PDPE_LEVEL;
344f414f
JR
5714}
5715
4e47c7a6
SY
5716static bool svm_rdtscp_supported(void)
5717{
46896c73 5718 return boot_cpu_has(X86_FEATURE_RDTSCP);
4e47c7a6
SY
5719}
5720
ad756a16
MJ
5721static bool svm_invpcid_supported(void)
5722{
5723 return false;
5724}
5725
93c4adc7
PB
5726static bool svm_mpx_supported(void)
5727{
5728 return false;
5729}
5730
55412b2e
WL
5731static bool svm_xsaves_supported(void)
5732{
5733 return false;
5734}
5735
66336cab
PB
5736static bool svm_umip_emulated(void)
5737{
5738 return false;
5739}
5740
f5f48ee1
SY
5741static bool svm_has_wbinvd_exit(void)
5742{
5743 return true;
5744}
5745
8061252e 5746#define PRE_EX(exit) { .exit_code = (exit), \
40e19b51 5747 .stage = X86_ICPT_PRE_EXCEPT, }
cfec82cb 5748#define POST_EX(exit) { .exit_code = (exit), \
40e19b51 5749 .stage = X86_ICPT_POST_EXCEPT, }
d7eb8203 5750#define POST_MEM(exit) { .exit_code = (exit), \
40e19b51 5751 .stage = X86_ICPT_POST_MEMACCESS, }
cfec82cb 5752
09941fbb 5753static const struct __x86_intercept {
cfec82cb
JR
5754 u32 exit_code;
5755 enum x86_intercept_stage stage;
cfec82cb
JR
5756} x86_intercept_map[] = {
5757 [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0),
5758 [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0),
5759 [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0),
5760 [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0),
5761 [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0),
3b88e41a
JR
5762 [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0),
5763 [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0),
dee6bb70
JR
5764 [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ),
5765 [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ),
5766 [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE),
5767 [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE),
5768 [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ),
5769 [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ),
5770 [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE),
5771 [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE),
01de8b09
JR
5772 [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN),
5773 [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL),
5774 [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD),
5775 [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE),
5776 [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI),
5777 [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI),
5778 [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT),
5779 [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA),
d7eb8203
JR
5780 [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP),
5781 [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR),
5782 [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT),
8061252e
JR
5783 [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG),
5784 [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD),
5785 [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD),
5786 [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR),
5787 [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC),
5788 [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR),
5789 [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC),
5790 [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID),
5791 [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM),
bf608f88
JR
5792 [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE),
5793 [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF),
5794 [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF),
5795 [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT),
5796 [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET),
5797 [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP),
5798 [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT),
f6511935
JR
5799 [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO),
5800 [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO),
5801 [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO),
5802 [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO),
cfec82cb
JR
5803};
5804
8061252e 5805#undef PRE_EX
cfec82cb 5806#undef POST_EX
d7eb8203 5807#undef POST_MEM
cfec82cb 5808
8a76d7f2
JR
5809static int svm_check_intercept(struct kvm_vcpu *vcpu,
5810 struct x86_instruction_info *info,
5811 enum x86_intercept_stage stage)
5812{
cfec82cb
JR
5813 struct vcpu_svm *svm = to_svm(vcpu);
5814 int vmexit, ret = X86EMUL_CONTINUE;
5815 struct __x86_intercept icpt_info;
5816 struct vmcb *vmcb = svm->vmcb;
5817
5818 if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
5819 goto out;
5820
5821 icpt_info = x86_intercept_map[info->intercept];
5822
40e19b51 5823 if (stage != icpt_info.stage)
cfec82cb
JR
5824 goto out;
5825
5826 switch (icpt_info.exit_code) {
5827 case SVM_EXIT_READ_CR0:
5828 if (info->intercept == x86_intercept_cr_read)
5829 icpt_info.exit_code += info->modrm_reg;
5830 break;
5831 case SVM_EXIT_WRITE_CR0: {
5832 unsigned long cr0, val;
5833 u64 intercept;
5834
5835 if (info->intercept == x86_intercept_cr_write)
5836 icpt_info.exit_code += info->modrm_reg;
5837
62baf44c
JK
5838 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 ||
5839 info->intercept == x86_intercept_clts)
cfec82cb
JR
5840 break;
5841
5842 intercept = svm->nested.intercept;
5843
5844 if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
5845 break;
5846
5847 cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
5848 val = info->src_val & ~SVM_CR0_SELECTIVE_MASK;
5849
5850 if (info->intercept == x86_intercept_lmsw) {
5851 cr0 &= 0xfUL;
5852 val &= 0xfUL;
5853 /* lmsw can't clear PE - catch this here */
5854 if (cr0 & X86_CR0_PE)
5855 val |= X86_CR0_PE;
5856 }
5857
5858 if (cr0 ^ val)
5859 icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
5860
5861 break;
5862 }
3b88e41a
JR
5863 case SVM_EXIT_READ_DR0:
5864 case SVM_EXIT_WRITE_DR0:
5865 icpt_info.exit_code += info->modrm_reg;
5866 break;
8061252e
JR
5867 case SVM_EXIT_MSR:
5868 if (info->intercept == x86_intercept_wrmsr)
5869 vmcb->control.exit_info_1 = 1;
5870 else
5871 vmcb->control.exit_info_1 = 0;
5872 break;
bf608f88
JR
5873 case SVM_EXIT_PAUSE:
5874 /*
5875 * We get this for NOP only, but pause
5876 * is rep not, check this here
5877 */
5878 if (info->rep_prefix != REPE_PREFIX)
5879 goto out;
49a8afca 5880 break;
f6511935
JR
5881 case SVM_EXIT_IOIO: {
5882 u64 exit_info;
5883 u32 bytes;
5884
f6511935
JR
5885 if (info->intercept == x86_intercept_in ||
5886 info->intercept == x86_intercept_ins) {
6cbc5f5a
JK
5887 exit_info = ((info->src_val & 0xffff) << 16) |
5888 SVM_IOIO_TYPE_MASK;
f6511935 5889 bytes = info->dst_bytes;
6493f157 5890 } else {
6cbc5f5a 5891 exit_info = (info->dst_val & 0xffff) << 16;
6493f157 5892 bytes = info->src_bytes;
f6511935
JR
5893 }
5894
5895 if (info->intercept == x86_intercept_outs ||
5896 info->intercept == x86_intercept_ins)
5897 exit_info |= SVM_IOIO_STR_MASK;
5898
5899 if (info->rep_prefix)
5900 exit_info |= SVM_IOIO_REP_MASK;
5901
5902 bytes = min(bytes, 4u);
5903
5904 exit_info |= bytes << SVM_IOIO_SIZE_SHIFT;
5905
5906 exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1);
5907
5908 vmcb->control.exit_info_1 = exit_info;
5909 vmcb->control.exit_info_2 = info->next_rip;
5910
5911 break;
5912 }
cfec82cb
JR
5913 default:
5914 break;
5915 }
5916
f104765b
BD
5917 /* TODO: Advertise NRIPS to guest hypervisor unconditionally */
5918 if (static_cpu_has(X86_FEATURE_NRIPS))
5919 vmcb->control.next_rip = info->next_rip;
cfec82cb
JR
5920 vmcb->control.exit_code = icpt_info.exit_code;
5921 vmexit = nested_svm_exit_handled(svm);
5922
5923 ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
5924 : X86EMUL_CONTINUE;
5925
5926out:
5927 return ret;
8a76d7f2
JR
5928}
5929
a547c6db
YZ
5930static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
5931{
5932 local_irq_enable();
f2485b3e
PB
5933 /*
5934 * We must have an instruction with interrupts enabled, so
5935 * the timer interrupt isn't delayed by the interrupt shadow.
5936 */
5937 asm("nop");
5938 local_irq_disable();
a547c6db
YZ
5939}
5940
ae97a3b8
RK
5941static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
5942{
5943}
5944
be8ca170
SS
5945static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
5946{
5947 if (avic_handle_apic_id_update(vcpu) != 0)
5948 return;
5949 if (avic_handle_dfr_update(vcpu) != 0)
5950 return;
5951 avic_handle_ldr_update(vcpu);
5952}
5953
74f16909
BP
5954static void svm_setup_mce(struct kvm_vcpu *vcpu)
5955{
5956 /* [63:9] are reserved. */
5957 vcpu->arch.mcg_cap &= 0x1ff;
5958}
5959
72d7b374
LP
5960static int svm_smi_allowed(struct kvm_vcpu *vcpu)
5961{
05cade71
LP
5962 struct vcpu_svm *svm = to_svm(vcpu);
5963
5964 /* Per APM Vol.2 15.22.2 "Response to SMI" */
5965 if (!gif_set(svm))
5966 return 0;
5967
5968 if (is_guest_mode(&svm->vcpu) &&
5969 svm->nested.intercept & (1ULL << INTERCEPT_SMI)) {
5970 /* TODO: Might need to set exit_info_1 and exit_info_2 here */
5971 svm->vmcb->control.exit_code = SVM_EXIT_SMI;
5972 svm->nested.exit_required = true;
5973 return 0;
5974 }
5975
72d7b374
LP
5976 return 1;
5977}
5978
0234bf88
LP
5979static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
5980{
05cade71
LP
5981 struct vcpu_svm *svm = to_svm(vcpu);
5982 int ret;
5983
5984 if (is_guest_mode(vcpu)) {
5985 /* FED8h - SVM Guest */
5986 put_smstate(u64, smstate, 0x7ed8, 1);
5987 /* FEE0h - SVM Guest VMCB Physical Address */
5988 put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb);
5989
5990 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
5991 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
5992 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
5993
5994 ret = nested_svm_vmexit(svm);
5995 if (ret)
5996 return ret;
5997 }
0234bf88
LP
5998 return 0;
5999}
6000
6001static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
6002{
05cade71
LP
6003 struct vcpu_svm *svm = to_svm(vcpu);
6004 struct vmcb *nested_vmcb;
6005 struct page *page;
6006 struct {
6007 u64 guest;
6008 u64 vmcb;
6009 } svm_state_save;
6010 int ret;
6011
6012 ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfed8, &svm_state_save,
6013 sizeof(svm_state_save));
6014 if (ret)
6015 return ret;
6016
6017 if (svm_state_save.guest) {
6018 vcpu->arch.hflags &= ~HF_SMM_MASK;
6019 nested_vmcb = nested_svm_map(svm, svm_state_save.vmcb, &page);
6020 if (nested_vmcb)
6021 enter_svm_guest_mode(svm, svm_state_save.vmcb, nested_vmcb, page);
6022 else
6023 ret = 1;
6024 vcpu->arch.hflags |= HF_SMM_MASK;
6025 }
6026 return ret;
0234bf88
LP
6027}
6028
cc3d967f
LP
6029static int enable_smi_window(struct kvm_vcpu *vcpu)
6030{
6031 struct vcpu_svm *svm = to_svm(vcpu);
6032
6033 if (!gif_set(svm)) {
6034 if (vgif_enabled(svm))
6035 set_intercept(svm, INTERCEPT_STGI);
6036 /* STGI will cause a vm exit */
6037 return 1;
6038 }
6039 return 0;
6040}
6041
1654efcb
BS
6042static int sev_asid_new(void)
6043{
6044 int pos;
6045
6046 /*
6047 * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
6048 */
6049 pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
6050 if (pos >= max_sev_asid)
6051 return -EBUSY;
6052
6053 set_bit(pos, sev_asid_bitmap);
6054 return pos + 1;
6055}
6056
6057static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
6058{
6059 struct kvm_sev_info *sev = &kvm->arch.sev_info;
6060 int asid, ret;
6061
6062 ret = -EBUSY;
6063 asid = sev_asid_new();
6064 if (asid < 0)
6065 return ret;
6066
6067 ret = sev_platform_init(&argp->error);
6068 if (ret)
6069 goto e_free;
6070
6071 sev->active = true;
6072 sev->asid = asid;
1e80fdc0 6073 INIT_LIST_HEAD(&sev->regions_list);
1654efcb
BS
6074
6075 return 0;
6076
6077e_free:
6078 __sev_asid_free(asid);
6079 return ret;
6080}
6081
59414c98
BS
6082static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
6083{
6084 struct sev_data_activate *data;
6085 int asid = sev_get_asid(kvm);
6086 int ret;
6087
6088 wbinvd_on_all_cpus();
6089
6090 ret = sev_guest_df_flush(error);
6091 if (ret)
6092 return ret;
6093
6094 data = kzalloc(sizeof(*data), GFP_KERNEL);
6095 if (!data)
6096 return -ENOMEM;
6097
6098 /* activate ASID on the given handle */
6099 data->handle = handle;
6100 data->asid = asid;
6101 ret = sev_guest_activate(data, error);
6102 kfree(data);
6103
6104 return ret;
6105}
6106
89c50580 6107static int __sev_issue_cmd(int fd, int id, void *data, int *error)
59414c98
BS
6108{
6109 struct fd f;
6110 int ret;
6111
6112 f = fdget(fd);
6113 if (!f.file)
6114 return -EBADF;
6115
6116 ret = sev_issue_cmd_external_user(f.file, id, data, error);
6117
6118 fdput(f);
6119 return ret;
6120}
6121
89c50580
BS
6122static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
6123{
6124 struct kvm_sev_info *sev = &kvm->arch.sev_info;
6125
6126 return __sev_issue_cmd(sev->fd, id, data, error);
6127}
6128
59414c98
BS
6129static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
6130{
6131 struct kvm_sev_info *sev = &kvm->arch.sev_info;
6132 struct sev_data_launch_start *start;
6133 struct kvm_sev_launch_start params;
6134 void *dh_blob, *session_blob;
6135 int *error = &argp->error;
6136 int ret;
6137
6138 if (!sev_guest(kvm))
6139 return -ENOTTY;
6140
6141 if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
6142 return -EFAULT;
6143
6144 start = kzalloc(sizeof(*start), GFP_KERNEL);
6145 if (!start)
6146 return -ENOMEM;
6147
6148 dh_blob = NULL;
6149 if (params.dh_uaddr) {
6150 dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
6151 if (IS_ERR(dh_blob)) {
6152 ret = PTR_ERR(dh_blob);
6153 goto e_free;
6154 }
6155
6156 start->dh_cert_address = __sme_set(__pa(dh_blob));
6157 start->dh_cert_len = params.dh_len;
6158 }
6159
6160 session_blob = NULL;
6161 if (params.session_uaddr) {
6162 session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
6163 if (IS_ERR(session_blob)) {
6164 ret = PTR_ERR(session_blob);
6165 goto e_free_dh;
6166 }
6167
6168 start->session_address = __sme_set(__pa(session_blob));
6169 start->session_len = params.session_len;
6170 }
6171
6172 start->handle = params.handle;
6173 start->policy = params.policy;
6174
6175 /* create memory encryption context */
89c50580 6176 ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
59414c98
BS
6177 if (ret)
6178 goto e_free_session;
6179
6180 /* Bind ASID to this guest */
6181 ret = sev_bind_asid(kvm, start->handle, error);
6182 if (ret)
6183 goto e_free_session;
6184
6185 /* return handle to userspace */
6186 params.handle = start->handle;
6187 if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) {
6188 sev_unbind_asid(kvm, start->handle);
6189 ret = -EFAULT;
6190 goto e_free_session;
6191 }
6192
6193 sev->handle = start->handle;
6194 sev->fd = argp->sev_fd;
6195
6196e_free_session:
6197 kfree(session_blob);
6198e_free_dh:
6199 kfree(dh_blob);
6200e_free:
6201 kfree(start);
6202 return ret;
6203}
6204
89c50580
BS
6205static int get_num_contig_pages(int idx, struct page **inpages,
6206 unsigned long npages)
6207{
6208 unsigned long paddr, next_paddr;
6209 int i = idx + 1, pages = 1;
6210
6211 /* find the number of contiguous pages starting from idx */
6212 paddr = __sme_page_pa(inpages[idx]);
6213 while (i < npages) {
6214 next_paddr = __sme_page_pa(inpages[i++]);
6215 if ((paddr + PAGE_SIZE) == next_paddr) {
6216 pages++;
6217 paddr = next_paddr;
6218 continue;
6219 }
6220 break;
6221 }
6222
6223 return pages;
6224}
6225
6226static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
6227{
6228 unsigned long vaddr, vaddr_end, next_vaddr, npages, size;
6229 struct kvm_sev_info *sev = &kvm->arch.sev_info;
6230 struct kvm_sev_launch_update_data params;
6231 struct sev_data_launch_update_data *data;
6232 struct page **inpages;
6233 int i, ret, pages;
6234
6235 if (!sev_guest(kvm))
6236 return -ENOTTY;
6237
6238 if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
6239 return -EFAULT;
6240
6241 data = kzalloc(sizeof(*data), GFP_KERNEL);
6242 if (!data)
6243 return -ENOMEM;
6244
6245 vaddr = params.uaddr;
6246 size = params.len;
6247 vaddr_end = vaddr + size;
6248
6249 /* Lock the user memory. */
6250 inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
6251 if (!inpages) {
6252 ret = -ENOMEM;
6253 goto e_free;
6254 }
6255
6256 /*
6257 * The LAUNCH_UPDATE command will perform in-place encryption of the
6258 * memory content (i.e it will write the same memory region with C=1).
6259 * It's possible that the cache may contain the data with C=0, i.e.,
6260 * unencrypted so invalidate it first.
6261 */
6262 sev_clflush_pages(inpages, npages);
6263
6264 for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
6265 int offset, len;
6266
6267 /*
6268 * If the user buffer is not page-aligned, calculate the offset
6269 * within the page.
6270 */
6271 offset = vaddr & (PAGE_SIZE - 1);
6272
6273 /* Calculate the number of pages that can be encrypted in one go. */
6274 pages = get_num_contig_pages(i, inpages, npages);
6275
6276 len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
6277
6278 data->handle = sev->handle;
6279 data->len = len;
6280 data->address = __sme_page_pa(inpages[i]) + offset;
6281 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
6282 if (ret)
6283 goto e_unpin;
6284
6285 size -= len;
6286 next_vaddr = vaddr + len;
6287 }
6288
6289e_unpin:
6290 /* content of memory is updated, mark pages dirty */
6291 for (i = 0; i < npages; i++) {
6292 set_page_dirty_lock(inpages[i]);
6293 mark_page_accessed(inpages[i]);
6294 }
6295 /* unlock the user pages */
6296 sev_unpin_memory(kvm, inpages, npages);
6297e_free:
6298 kfree(data);
6299 return ret;
6300}
6301
0d0736f7
BS
6302static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
6303{
3e233385 6304 void __user *measure = (void __user *)(uintptr_t)argp->data;
0d0736f7
BS
6305 struct kvm_sev_info *sev = &kvm->arch.sev_info;
6306 struct sev_data_launch_measure *data;
6307 struct kvm_sev_launch_measure params;
3e233385 6308 void __user *p = NULL;
0d0736f7
BS
6309 void *blob = NULL;
6310 int ret;
6311
6312 if (!sev_guest(kvm))
6313 return -ENOTTY;
6314
3e233385 6315 if (copy_from_user(&params, measure, sizeof(params)))
0d0736f7
BS
6316 return -EFAULT;
6317
6318 data = kzalloc(sizeof(*data), GFP_KERNEL);
6319 if (!data)
6320 return -ENOMEM;
6321
6322 /* User wants to query the blob length */
6323 if (!params.len)
6324 goto cmd;
6325
3e233385
BS
6326 p = (void __user *)(uintptr_t)params.uaddr;
6327 if (p) {
0d0736f7
BS
6328 if (params.len > SEV_FW_BLOB_MAX_SIZE) {
6329 ret = -EINVAL;
6330 goto e_free;
6331 }
6332
0d0736f7
BS
6333 ret = -ENOMEM;
6334 blob = kmalloc(params.len, GFP_KERNEL);
6335 if (!blob)
6336 goto e_free;
6337
6338 data->address = __psp_pa(blob);
6339 data->len = params.len;
6340 }
6341
6342cmd:
6343 data->handle = sev->handle;
6344 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
6345
6346 /*
6347 * If we query the session length, FW responded with expected data.
6348 */
6349 if (!params.len)
6350 goto done;
6351
6352 if (ret)
6353 goto e_free_blob;
6354
6355 if (blob) {
3e233385 6356 if (copy_to_user(p, blob, params.len))
0d0736f7
BS
6357 ret = -EFAULT;
6358 }
6359
6360done:
6361 params.len = data->len;
3e233385 6362 if (copy_to_user(measure, &params, sizeof(params)))
0d0736f7
BS
6363 ret = -EFAULT;
6364e_free_blob:
6365 kfree(blob);
6366e_free:
6367 kfree(data);
6368 return ret;
6369}
6370
5bdb0e2f
BS
6371static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
6372{
6373 struct kvm_sev_info *sev = &kvm->arch.sev_info;
6374 struct sev_data_launch_finish *data;
6375 int ret;
6376
6377 if (!sev_guest(kvm))
6378 return -ENOTTY;
6379
6380 data = kzalloc(sizeof(*data), GFP_KERNEL);
6381 if (!data)
6382 return -ENOMEM;
6383
6384 data->handle = sev->handle;
6385 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
6386
6387 kfree(data);
6388 return ret;
6389}
6390
255d9e75
BS
6391static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
6392{
6393 struct kvm_sev_info *sev = &kvm->arch.sev_info;
6394 struct kvm_sev_guest_status params;
6395 struct sev_data_guest_status *data;
6396 int ret;
6397
6398 if (!sev_guest(kvm))
6399 return -ENOTTY;
6400
6401 data = kzalloc(sizeof(*data), GFP_KERNEL);
6402 if (!data)
6403 return -ENOMEM;
6404
6405 data->handle = sev->handle;
6406 ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
6407 if (ret)
6408 goto e_free;
6409
6410 params.policy = data->policy;
6411 params.state = data->state;
6412 params.handle = data->handle;
6413
6414 if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
6415 ret = -EFAULT;
6416e_free:
6417 kfree(data);
6418 return ret;
6419}
6420
24f41fb2
BS
6421static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
6422 unsigned long dst, int size,
6423 int *error, bool enc)
6424{
6425 struct kvm_sev_info *sev = &kvm->arch.sev_info;
6426 struct sev_data_dbg *data;
6427 int ret;
6428
6429 data = kzalloc(sizeof(*data), GFP_KERNEL);
6430 if (!data)
6431 return -ENOMEM;
6432
6433 data->handle = sev->handle;
6434 data->dst_addr = dst;
6435 data->src_addr = src;
6436 data->len = size;
6437
6438 ret = sev_issue_cmd(kvm,
6439 enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
6440 data, error);
6441 kfree(data);
6442 return ret;
6443}
6444
6445static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
6446 unsigned long dst_paddr, int sz, int *err)
6447{
6448 int offset;
6449
6450 /*
6451 * Its safe to read more than we are asked, caller should ensure that
6452 * destination has enough space.
6453 */
6454 src_paddr = round_down(src_paddr, 16);
6455 offset = src_paddr & 15;
6456 sz = round_up(sz + offset, 16);
6457
6458 return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
6459}
6460
6461static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
6462 unsigned long __user dst_uaddr,
6463 unsigned long dst_paddr,
6464 int size, int *err)
6465{
6466 struct page *tpage = NULL;
6467 int ret, offset;
6468
6469 /* if inputs are not 16-byte then use intermediate buffer */
6470 if (!IS_ALIGNED(dst_paddr, 16) ||
6471 !IS_ALIGNED(paddr, 16) ||
6472 !IS_ALIGNED(size, 16)) {
6473 tpage = (void *)alloc_page(GFP_KERNEL);
6474 if (!tpage)
6475 return -ENOMEM;
6476
6477 dst_paddr = __sme_page_pa(tpage);
6478 }
6479
6480 ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
6481 if (ret)
6482 goto e_free;
6483
6484 if (tpage) {
6485 offset = paddr & 15;
6486 if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
6487 page_address(tpage) + offset, size))
6488 ret = -EFAULT;
6489 }
6490
6491e_free:
6492 if (tpage)
6493 __free_page(tpage);
6494
6495 return ret;
6496}
6497
7d1594f5
BS
6498static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
6499 unsigned long __user vaddr,
6500 unsigned long dst_paddr,
6501 unsigned long __user dst_vaddr,
6502 int size, int *error)
6503{
6504 struct page *src_tpage = NULL;
6505 struct page *dst_tpage = NULL;
6506 int ret, len = size;
6507
6508 /* If source buffer is not aligned then use an intermediate buffer */
6509 if (!IS_ALIGNED(vaddr, 16)) {
6510 src_tpage = alloc_page(GFP_KERNEL);
6511 if (!src_tpage)
6512 return -ENOMEM;
6513
6514 if (copy_from_user(page_address(src_tpage),
6515 (void __user *)(uintptr_t)vaddr, size)) {
6516 __free_page(src_tpage);
6517 return -EFAULT;
6518 }
6519
6520 paddr = __sme_page_pa(src_tpage);
6521 }
6522
6523 /*
6524 * If destination buffer or length is not aligned then do read-modify-write:
6525 * - decrypt destination in an intermediate buffer
6526 * - copy the source buffer in an intermediate buffer
6527 * - use the intermediate buffer as source buffer
6528 */
6529 if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
6530 int dst_offset;
6531
6532 dst_tpage = alloc_page(GFP_KERNEL);
6533 if (!dst_tpage) {
6534 ret = -ENOMEM;
6535 goto e_free;
6536 }
6537
6538 ret = __sev_dbg_decrypt(kvm, dst_paddr,
6539 __sme_page_pa(dst_tpage), size, error);
6540 if (ret)
6541 goto e_free;
6542
6543 /*
6544 * If source is kernel buffer then use memcpy() otherwise
6545 * copy_from_user().
6546 */
6547 dst_offset = dst_paddr & 15;
6548
6549 if (src_tpage)
6550 memcpy(page_address(dst_tpage) + dst_offset,
6551 page_address(src_tpage), size);
6552 else {
6553 if (copy_from_user(page_address(dst_tpage) + dst_offset,
6554 (void __user *)(uintptr_t)vaddr, size)) {
6555 ret = -EFAULT;
6556 goto e_free;
6557 }
6558 }
6559
6560 paddr = __sme_page_pa(dst_tpage);
6561 dst_paddr = round_down(dst_paddr, 16);
6562 len = round_up(size, 16);
6563 }
6564
6565 ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
6566
6567e_free:
6568 if (src_tpage)
6569 __free_page(src_tpage);
6570 if (dst_tpage)
6571 __free_page(dst_tpage);
6572 return ret;
6573}
6574
24f41fb2
BS
6575static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
6576{
6577 unsigned long vaddr, vaddr_end, next_vaddr;
6578 unsigned long dst_vaddr, dst_vaddr_end;
6579 struct page **src_p, **dst_p;
6580 struct kvm_sev_dbg debug;
6581 unsigned long n;
6582 int ret, size;
6583
6584 if (!sev_guest(kvm))
6585 return -ENOTTY;
6586
6587 if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
6588 return -EFAULT;
6589
6590 vaddr = debug.src_uaddr;
6591 size = debug.len;
6592 vaddr_end = vaddr + size;
6593 dst_vaddr = debug.dst_uaddr;
6594 dst_vaddr_end = dst_vaddr + size;
6595
6596 for (; vaddr < vaddr_end; vaddr = next_vaddr) {
6597 int len, s_off, d_off;
6598
6599 /* lock userspace source and destination page */
6600 src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
6601 if (!src_p)
6602 return -EFAULT;
6603
6604 dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
6605 if (!dst_p) {
6606 sev_unpin_memory(kvm, src_p, n);
6607 return -EFAULT;
6608 }
6609
6610 /*
6611 * The DBG_{DE,EN}CRYPT commands will perform {dec,en}cryption of the
6612 * memory content (i.e it will write the same memory region with C=1).
6613 * It's possible that the cache may contain the data with C=0, i.e.,
6614 * unencrypted so invalidate it first.
6615 */
6616 sev_clflush_pages(src_p, 1);
6617 sev_clflush_pages(dst_p, 1);
6618
6619 /*
6620 * Since user buffer may not be page aligned, calculate the
6621 * offset within the page.
6622 */
6623 s_off = vaddr & ~PAGE_MASK;
6624 d_off = dst_vaddr & ~PAGE_MASK;
6625 len = min_t(size_t, (PAGE_SIZE - s_off), size);
6626
7d1594f5
BS
6627 if (dec)
6628 ret = __sev_dbg_decrypt_user(kvm,
6629 __sme_page_pa(src_p[0]) + s_off,
6630 dst_vaddr,
6631 __sme_page_pa(dst_p[0]) + d_off,
6632 len, &argp->error);
6633 else
6634 ret = __sev_dbg_encrypt_user(kvm,
6635 __sme_page_pa(src_p[0]) + s_off,
6636 vaddr,
6637 __sme_page_pa(dst_p[0]) + d_off,
6638 dst_vaddr,
6639 len, &argp->error);
24f41fb2
BS
6640
6641 sev_unpin_memory(kvm, src_p, 1);
6642 sev_unpin_memory(kvm, dst_p, 1);
6643
6644 if (ret)
6645 goto err;
6646
6647 next_vaddr = vaddr + len;
6648 dst_vaddr = dst_vaddr + len;
6649 size -= len;
6650 }
6651err:
6652 return ret;
6653}
6654
9f5b5b95
BS
6655static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
6656{
6657 struct kvm_sev_info *sev = &kvm->arch.sev_info;
6658 struct sev_data_launch_secret *data;
6659 struct kvm_sev_launch_secret params;
6660 struct page **pages;
6661 void *blob, *hdr;
6662 unsigned long n;
9c5e0afa 6663 int ret, offset;
9f5b5b95
BS
6664
6665 if (!sev_guest(kvm))
6666 return -ENOTTY;
6667
6668 if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
6669 return -EFAULT;
6670
6671 pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
6672 if (!pages)
6673 return -ENOMEM;
6674
6675 /*
6676 * The secret must be copied into contiguous memory region, lets verify
6677 * that userspace memory pages are contiguous before we issue command.
6678 */
6679 if (get_num_contig_pages(0, pages, n) != n) {
6680 ret = -EINVAL;
6681 goto e_unpin_memory;
6682 }
6683
6684 ret = -ENOMEM;
6685 data = kzalloc(sizeof(*data), GFP_KERNEL);
6686 if (!data)
6687 goto e_unpin_memory;
6688
9c5e0afa
BS
6689 offset = params.guest_uaddr & (PAGE_SIZE - 1);
6690 data->guest_address = __sme_page_pa(pages[0]) + offset;
6691 data->guest_len = params.guest_len;
6692
9f5b5b95
BS
6693 blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
6694 if (IS_ERR(blob)) {
6695 ret = PTR_ERR(blob);
6696 goto e_free;
6697 }
6698
6699 data->trans_address = __psp_pa(blob);
6700 data->trans_len = params.trans_len;
6701
6702 hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
6703 if (IS_ERR(hdr)) {
6704 ret = PTR_ERR(hdr);
6705 goto e_free_blob;
6706 }
9c5e0afa
BS
6707 data->hdr_address = __psp_pa(hdr);
6708 data->hdr_len = params.hdr_len;
9f5b5b95
BS
6709
6710 data->handle = sev->handle;
6711 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
6712
6713 kfree(hdr);
6714
6715e_free_blob:
6716 kfree(blob);
6717e_free:
6718 kfree(data);
6719e_unpin_memory:
6720 sev_unpin_memory(kvm, pages, n);
6721 return ret;
6722}
6723
1654efcb
BS
6724static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
6725{
6726 struct kvm_sev_cmd sev_cmd;
6727 int r;
6728
6729 if (!svm_sev_enabled())
6730 return -ENOTTY;
6731
6732 if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
6733 return -EFAULT;
6734
6735 mutex_lock(&kvm->lock);
6736
6737 switch (sev_cmd.id) {
6738 case KVM_SEV_INIT:
6739 r = sev_guest_init(kvm, &sev_cmd);
6740 break;
59414c98
BS
6741 case KVM_SEV_LAUNCH_START:
6742 r = sev_launch_start(kvm, &sev_cmd);
6743 break;
89c50580
BS
6744 case KVM_SEV_LAUNCH_UPDATE_DATA:
6745 r = sev_launch_update_data(kvm, &sev_cmd);
6746 break;
0d0736f7
BS
6747 case KVM_SEV_LAUNCH_MEASURE:
6748 r = sev_launch_measure(kvm, &sev_cmd);
6749 break;
5bdb0e2f
BS
6750 case KVM_SEV_LAUNCH_FINISH:
6751 r = sev_launch_finish(kvm, &sev_cmd);
6752 break;
255d9e75
BS
6753 case KVM_SEV_GUEST_STATUS:
6754 r = sev_guest_status(kvm, &sev_cmd);
6755 break;
24f41fb2
BS
6756 case KVM_SEV_DBG_DECRYPT:
6757 r = sev_dbg_crypt(kvm, &sev_cmd, true);
6758 break;
7d1594f5
BS
6759 case KVM_SEV_DBG_ENCRYPT:
6760 r = sev_dbg_crypt(kvm, &sev_cmd, false);
6761 break;
9f5b5b95
BS
6762 case KVM_SEV_LAUNCH_SECRET:
6763 r = sev_launch_secret(kvm, &sev_cmd);
6764 break;
1654efcb
BS
6765 default:
6766 r = -EINVAL;
6767 goto out;
6768 }
6769
6770 if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
6771 r = -EFAULT;
6772
6773out:
6774 mutex_unlock(&kvm->lock);
6775 return r;
6776}
6777
1e80fdc0
BS
6778static int svm_register_enc_region(struct kvm *kvm,
6779 struct kvm_enc_region *range)
6780{
6781 struct kvm_sev_info *sev = &kvm->arch.sev_info;
6782 struct enc_region *region;
6783 int ret = 0;
6784
6785 if (!sev_guest(kvm))
6786 return -ENOTTY;
6787
6788 region = kzalloc(sizeof(*region), GFP_KERNEL);
6789 if (!region)
6790 return -ENOMEM;
6791
6792 region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
6793 if (!region->pages) {
6794 ret = -ENOMEM;
6795 goto e_free;
6796 }
6797
6798 /*
6799 * The guest may change the memory encryption attribute from C=0 -> C=1
6800 * or vice versa for this memory range. Lets make sure caches are
6801 * flushed to ensure that guest data gets written into memory with
6802 * correct C-bit.
6803 */
6804 sev_clflush_pages(region->pages, region->npages);
6805
6806 region->uaddr = range->addr;
6807 region->size = range->size;
6808
6809 mutex_lock(&kvm->lock);
6810 list_add_tail(&region->list, &sev->regions_list);
6811 mutex_unlock(&kvm->lock);
6812
6813 return ret;
6814
6815e_free:
6816 kfree(region);
6817 return ret;
6818}
6819
6820static struct enc_region *
6821find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
6822{
6823 struct kvm_sev_info *sev = &kvm->arch.sev_info;
6824 struct list_head *head = &sev->regions_list;
6825 struct enc_region *i;
6826
6827 list_for_each_entry(i, head, list) {
6828 if (i->uaddr == range->addr &&
6829 i->size == range->size)
6830 return i;
6831 }
6832
6833 return NULL;
6834}
6835
6836
6837static int svm_unregister_enc_region(struct kvm *kvm,
6838 struct kvm_enc_region *range)
6839{
6840 struct enc_region *region;
6841 int ret;
6842
6843 mutex_lock(&kvm->lock);
6844
6845 if (!sev_guest(kvm)) {
6846 ret = -ENOTTY;
6847 goto failed;
6848 }
6849
6850 region = find_enc_region(kvm, range);
6851 if (!region) {
6852 ret = -EINVAL;
6853 goto failed;
6854 }
6855
6856 __unregister_enc_region_locked(kvm, region);
6857
6858 mutex_unlock(&kvm->lock);
6859 return 0;
6860
6861failed:
6862 mutex_unlock(&kvm->lock);
6863 return ret;
6864}
6865
404f6aac 6866static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
6aa8b732
AK
6867 .cpu_has_kvm_support = has_svm,
6868 .disabled_by_bios = is_disabled,
6869 .hardware_setup = svm_hardware_setup,
6870 .hardware_unsetup = svm_hardware_unsetup,
002c7f7c 6871 .check_processor_compatibility = svm_check_processor_compat,
6aa8b732
AK
6872 .hardware_enable = svm_hardware_enable,
6873 .hardware_disable = svm_hardware_disable,
774ead3a 6874 .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
6d396b55 6875 .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase,
6aa8b732
AK
6876
6877 .vcpu_create = svm_create_vcpu,
6878 .vcpu_free = svm_free_vcpu,
04d2cc77 6879 .vcpu_reset = svm_vcpu_reset,
6aa8b732 6880
44a95dae 6881 .vm_init = avic_vm_init,
1654efcb 6882 .vm_destroy = svm_vm_destroy,
44a95dae 6883
04d2cc77 6884 .prepare_guest_switch = svm_prepare_guest_switch,
6aa8b732
AK
6885 .vcpu_load = svm_vcpu_load,
6886 .vcpu_put = svm_vcpu_put,
8221c137
SS
6887 .vcpu_blocking = svm_vcpu_blocking,
6888 .vcpu_unblocking = svm_vcpu_unblocking,
6aa8b732 6889
a96036b8 6890 .update_bp_intercept = update_bp_intercept,
801e459a 6891 .get_msr_feature = svm_get_msr_feature,
6aa8b732
AK
6892 .get_msr = svm_get_msr,
6893 .set_msr = svm_set_msr,
6894 .get_segment_base = svm_get_segment_base,
6895 .get_segment = svm_get_segment,
6896 .set_segment = svm_set_segment,
2e4d2653 6897 .get_cpl = svm_get_cpl,
1747fb71 6898 .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
e8467fda 6899 .decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
aff48baa 6900 .decache_cr3 = svm_decache_cr3,
25c4c276 6901 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
6aa8b732 6902 .set_cr0 = svm_set_cr0,
6aa8b732
AK
6903 .set_cr3 = svm_set_cr3,
6904 .set_cr4 = svm_set_cr4,
6905 .set_efer = svm_set_efer,
6906 .get_idt = svm_get_idt,
6907 .set_idt = svm_set_idt,
6908 .get_gdt = svm_get_gdt,
6909 .set_gdt = svm_set_gdt,
73aaf249
JK
6910 .get_dr6 = svm_get_dr6,
6911 .set_dr6 = svm_set_dr6,
020df079 6912 .set_dr7 = svm_set_dr7,
facb0139 6913 .sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
6de4f3ad 6914 .cache_reg = svm_cache_reg,
6aa8b732
AK
6915 .get_rflags = svm_get_rflags,
6916 .set_rflags = svm_set_rflags,
be94f6b7 6917
6aa8b732 6918 .tlb_flush = svm_flush_tlb,
6aa8b732 6919
6aa8b732 6920 .run = svm_vcpu_run,
04d2cc77 6921 .handle_exit = handle_exit,
6aa8b732 6922 .skip_emulated_instruction = skip_emulated_instruction,
2809f5d2
GC
6923 .set_interrupt_shadow = svm_set_interrupt_shadow,
6924 .get_interrupt_shadow = svm_get_interrupt_shadow,
102d8325 6925 .patch_hypercall = svm_patch_hypercall,
2a8067f1 6926 .set_irq = svm_set_irq,
95ba8273 6927 .set_nmi = svm_inject_nmi,
298101da 6928 .queue_exception = svm_queue_exception,
b463a6f7 6929 .cancel_injection = svm_cancel_injection,
78646121 6930 .interrupt_allowed = svm_interrupt_allowed,
95ba8273 6931 .nmi_allowed = svm_nmi_allowed,
3cfc3092
JK
6932 .get_nmi_mask = svm_get_nmi_mask,
6933 .set_nmi_mask = svm_set_nmi_mask,
95ba8273
GN
6934 .enable_nmi_window = enable_nmi_window,
6935 .enable_irq_window = enable_irq_window,
6936 .update_cr8_intercept = update_cr8_intercept,
8d14695f 6937 .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
d62caabb
AS
6938 .get_enable_apicv = svm_get_enable_apicv,
6939 .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
c7c9c56c 6940 .load_eoi_exitmap = svm_load_eoi_exitmap,
44a95dae
SS
6941 .hwapic_irr_update = svm_hwapic_irr_update,
6942 .hwapic_isr_update = svm_hwapic_isr_update,
fa59cc00 6943 .sync_pir_to_irr = kvm_lapic_find_highest_irr,
be8ca170 6944 .apicv_post_state_restore = avic_post_state_restore,
cbc94022
IE
6945
6946 .set_tss_addr = svm_set_tss_addr,
67253af5 6947 .get_tdp_level = get_npt_level,
4b12f0de 6948 .get_mt_mask = svm_get_mt_mask,
229456fc 6949
586f9607 6950 .get_exit_info = svm_get_exit_info,
586f9607 6951
17cc3935 6952 .get_lpage_level = svm_get_lpage_level,
0e851880
SY
6953
6954 .cpuid_update = svm_cpuid_update,
4e47c7a6
SY
6955
6956 .rdtscp_supported = svm_rdtscp_supported,
ad756a16 6957 .invpcid_supported = svm_invpcid_supported,
93c4adc7 6958 .mpx_supported = svm_mpx_supported,
55412b2e 6959 .xsaves_supported = svm_xsaves_supported,
66336cab 6960 .umip_emulated = svm_umip_emulated,
d4330ef2
JR
6961
6962 .set_supported_cpuid = svm_set_supported_cpuid,
f5f48ee1
SY
6963
6964 .has_wbinvd_exit = svm_has_wbinvd_exit,
99e3e30a
ZA
6965
6966 .write_tsc_offset = svm_write_tsc_offset,
1c97f0a0
JR
6967
6968 .set_tdp_cr3 = set_tdp_cr3,
8a76d7f2
JR
6969
6970 .check_intercept = svm_check_intercept,
a547c6db 6971 .handle_external_intr = svm_handle_external_intr,
ae97a3b8
RK
6972
6973 .sched_in = svm_sched_in,
25462f7f
WH
6974
6975 .pmu_ops = &amd_pmu_ops,
340d3bc3 6976 .deliver_posted_interrupt = svm_deliver_avic_intr,
411b44ba 6977 .update_pi_irte = svm_update_pi_irte,
74f16909 6978 .setup_mce = svm_setup_mce,
0234bf88 6979
72d7b374 6980 .smi_allowed = svm_smi_allowed,
0234bf88
LP
6981 .pre_enter_smm = svm_pre_enter_smm,
6982 .pre_leave_smm = svm_pre_leave_smm,
cc3d967f 6983 .enable_smi_window = enable_smi_window,
1654efcb
BS
6984
6985 .mem_enc_op = svm_mem_enc_op,
1e80fdc0
BS
6986 .mem_enc_reg_region = svm_register_enc_region,
6987 .mem_enc_unreg_region = svm_unregister_enc_region,
6aa8b732
AK
6988};
6989
6990static int __init svm_init(void)
6991{
cb498ea2 6992 return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
0ee75bea 6993 __alignof__(struct vcpu_svm), THIS_MODULE);
6aa8b732
AK
6994}
6995
6996static void __exit svm_exit(void)
6997{
cb498ea2 6998 kvm_exit();
6aa8b732
AK
6999}
7000
7001module_init(svm_init)
7002module_exit(svm_exit)