]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/x86/kvm/svm.c
KVM: race-free exit from KVM_RUN without POSIX signals
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kvm / svm.c
CommitLineData
6aa8b732
AK
1/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * AMD SVM support
5 *
6 * Copyright (C) 2006 Qumranet, Inc.
9611c187 7 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
6aa8b732
AK
8 *
9 * Authors:
10 * Yaniv Kamay <yaniv@qumranet.com>
11 * Avi Kivity <avi@qumranet.com>
12 *
13 * This work is licensed under the terms of the GNU GPL, version 2. See
14 * the COPYING file in the top-level directory.
15 *
16 */
44a95dae
SS
17
18#define pr_fmt(fmt) "SVM: " fmt
19
edf88417
AK
20#include <linux/kvm_host.h>
21
85f455f7 22#include "irq.h"
1d737c8a 23#include "mmu.h"
5fdbf976 24#include "kvm_cache_regs.h"
fe4c7b19 25#include "x86.h"
66f7b72e 26#include "cpuid.h"
25462f7f 27#include "pmu.h"
e495606d 28
6aa8b732 29#include <linux/module.h>
ae759544 30#include <linux/mod_devicetable.h>
9d8f549d 31#include <linux/kernel.h>
6aa8b732
AK
32#include <linux/vmalloc.h>
33#include <linux/highmem.h>
e8edc6e0 34#include <linux/sched.h>
af658dca 35#include <linux/trace_events.h>
5a0e3ad6 36#include <linux/slab.h>
5881f737
SS
37#include <linux/amd-iommu.h>
38#include <linux/hashtable.h>
6aa8b732 39
8221c137 40#include <asm/apic.h>
1018faa6 41#include <asm/perf_event.h>
67ec6607 42#include <asm/tlbflush.h>
e495606d 43#include <asm/desc.h>
facb0139 44#include <asm/debugreg.h>
631bc487 45#include <asm/kvm_para.h>
411b44ba 46#include <asm/irq_remapping.h>
6aa8b732 47
63d1142f 48#include <asm/virtext.h>
229456fc 49#include "trace.h"
63d1142f 50
4ecac3fd
AK
51#define __ex(x) __kvm_handle_fault_on_reboot(x)
52
6aa8b732
AK
53MODULE_AUTHOR("Qumranet");
54MODULE_LICENSE("GPL");
55
ae759544
JT
56static const struct x86_cpu_id svm_cpu_id[] = {
57 X86_FEATURE_MATCH(X86_FEATURE_SVM),
58 {}
59};
60MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
61
6aa8b732
AK
62#define IOPM_ALLOC_ORDER 2
63#define MSRPM_ALLOC_ORDER 1
64
6aa8b732
AK
65#define SEG_TYPE_LDT 2
66#define SEG_TYPE_BUSY_TSS16 3
67
6bc31bdc
AP
68#define SVM_FEATURE_NPT (1 << 0)
69#define SVM_FEATURE_LBRV (1 << 1)
70#define SVM_FEATURE_SVML (1 << 2)
71#define SVM_FEATURE_NRIP (1 << 3)
ddce97aa
AP
72#define SVM_FEATURE_TSC_RATE (1 << 4)
73#define SVM_FEATURE_VMCB_CLEAN (1 << 5)
74#define SVM_FEATURE_FLUSH_ASID (1 << 6)
75#define SVM_FEATURE_DECODE_ASSIST (1 << 7)
6bc31bdc 76#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
80b7706e 77
340d3bc3
SS
78#define SVM_AVIC_DOORBELL 0xc001011b
79
410e4d57
JR
80#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
81#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
82#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */
83
24e09cbf
JR
84#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
85
fbc0db76 86#define TSC_RATIO_RSVD 0xffffff0000000000ULL
92a1f12d
JR
87#define TSC_RATIO_MIN 0x0000000000000001ULL
88#define TSC_RATIO_MAX 0x000000ffffffffffULL
fbc0db76 89
5446a979 90#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
44a95dae
SS
91
92/*
93 * 0xff is broadcast, so the max index allowed for physical APIC ID
94 * table is 0xfe. APIC IDs above 0xff are reserved.
95 */
96#define AVIC_MAX_PHYSICAL_ID_COUNT 255
97
18f40c53
SS
98#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
99#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
100#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
101
5ea11f2b
SS
102/* AVIC GATAG is encoded using VM and VCPU IDs */
103#define AVIC_VCPU_ID_BITS 8
104#define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1)
105
106#define AVIC_VM_ID_BITS 24
107#define AVIC_VM_ID_NR (1 << AVIC_VM_ID_BITS)
108#define AVIC_VM_ID_MASK ((1 << AVIC_VM_ID_BITS) - 1)
109
110#define AVIC_GATAG(x, y) (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
111 (y & AVIC_VCPU_ID_MASK))
112#define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
113#define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK)
114
67ec6607
JR
115static bool erratum_383_found __read_mostly;
116
6c8166a7
AK
117static const u32 host_save_user_msrs[] = {
118#ifdef CONFIG_X86_64
119 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
120 MSR_FS_BASE,
121#endif
122 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
46896c73 123 MSR_TSC_AUX,
6c8166a7
AK
124};
125
126#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
127
128struct kvm_vcpu;
129
e6aa9abd
JR
130struct nested_state {
131 struct vmcb *hsave;
132 u64 hsave_msr;
4a810181 133 u64 vm_cr_msr;
e6aa9abd
JR
134 u64 vmcb;
135
136 /* These are the merged vectors */
137 u32 *msrpm;
138
139 /* gpa pointers to the real vectors */
140 u64 vmcb_msrpm;
ce2ac085 141 u64 vmcb_iopm;
aad42c64 142
cd3ff653
JR
143 /* A VMEXIT is required but not yet emulated */
144 bool exit_required;
145
aad42c64 146 /* cache for intercepts of the guest */
4ee546b4 147 u32 intercept_cr;
3aed041a 148 u32 intercept_dr;
aad42c64
JR
149 u32 intercept_exceptions;
150 u64 intercept;
151
5bd2edc3
JR
152 /* Nested Paging related state */
153 u64 nested_cr3;
e6aa9abd
JR
154};
155
323c3d80
JR
156#define MSRPM_OFFSETS 16
157static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
158
2b036c6b
BO
159/*
160 * Set osvw_len to higher value when updated Revision Guides
161 * are published and we know what the new status bits are
162 */
163static uint64_t osvw_len = 4, osvw_status;
164
6c8166a7
AK
165struct vcpu_svm {
166 struct kvm_vcpu vcpu;
167 struct vmcb *vmcb;
168 unsigned long vmcb_pa;
169 struct svm_cpu_data *svm_data;
170 uint64_t asid_generation;
171 uint64_t sysenter_esp;
172 uint64_t sysenter_eip;
46896c73 173 uint64_t tsc_aux;
6c8166a7
AK
174
175 u64 next_rip;
176
177 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
afe9e66f 178 struct {
dacccfdd
AK
179 u16 fs;
180 u16 gs;
181 u16 ldt;
afe9e66f
AK
182 u64 gs_base;
183 } host;
6c8166a7
AK
184
185 u32 *msrpm;
6c8166a7 186
bd3d1ec3
AK
187 ulong nmi_iret_rip;
188
e6aa9abd 189 struct nested_state nested;
6be7d306
JK
190
191 bool nmi_singlestep;
66b7138f
JK
192
193 unsigned int3_injected;
194 unsigned long int3_rip;
631bc487 195 u32 apf_reason;
fbc0db76 196
6092d3d3
JR
197 /* cached guest cpuid flags for faster access */
198 bool nrips_enabled : 1;
44a95dae 199
18f40c53 200 u32 ldr_reg;
44a95dae
SS
201 struct page *avic_backing_page;
202 u64 *avic_physical_id_cache;
8221c137 203 bool avic_is_running;
411b44ba
SS
204
205 /*
206 * Per-vcpu list of struct amd_svm_iommu_ir:
207 * This is used mainly to store interrupt remapping information used
208 * when update the vcpu affinity. This avoids the need to scan for
209 * IRTE and try to match ga_tag in the IOMMU driver.
210 */
211 struct list_head ir_list;
212 spinlock_t ir_list_lock;
213};
214
215/*
216 * This is a wrapper of struct amd_iommu_ir_data.
217 */
218struct amd_svm_iommu_ir {
219 struct list_head node; /* Used by SVM for per-vcpu ir_list */
220 void *data; /* Storing pointer to struct amd_ir_data */
6c8166a7
AK
221};
222
44a95dae
SS
223#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
224#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
225
226#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
227#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
228#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
229#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
230
fbc0db76
JR
231static DEFINE_PER_CPU(u64, current_tsc_ratio);
232#define TSC_RATIO_DEFAULT 0x0100000000ULL
233
455716fa
JR
234#define MSR_INVALID 0xffffffffU
235
09941fbb 236static const struct svm_direct_access_msrs {
ac72a9b7
JR
237 u32 index; /* Index of the MSR */
238 bool always; /* True if intercept is always on */
239} direct_access_msrs[] = {
8c06585d 240 { .index = MSR_STAR, .always = true },
ac72a9b7
JR
241 { .index = MSR_IA32_SYSENTER_CS, .always = true },
242#ifdef CONFIG_X86_64
243 { .index = MSR_GS_BASE, .always = true },
244 { .index = MSR_FS_BASE, .always = true },
245 { .index = MSR_KERNEL_GS_BASE, .always = true },
246 { .index = MSR_LSTAR, .always = true },
247 { .index = MSR_CSTAR, .always = true },
248 { .index = MSR_SYSCALL_MASK, .always = true },
249#endif
250 { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
251 { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
252 { .index = MSR_IA32_LASTINTFROMIP, .always = false },
253 { .index = MSR_IA32_LASTINTTOIP, .always = false },
254 { .index = MSR_INVALID, .always = false },
6c8166a7
AK
255};
256
709ddebf
JR
257/* enable NPT for AMD64 and X86 with PAE */
258#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
259static bool npt_enabled = true;
260#else
e0231715 261static bool npt_enabled;
709ddebf 262#endif
6c7dac72 263
e2358851
DB
264/* allow nested paging (virtualized MMU) for all guests */
265static int npt = true;
6c7dac72 266module_param(npt, int, S_IRUGO);
e3da3acd 267
e2358851
DB
268/* allow nested virtualization in KVM/SVM */
269static int nested = true;
236de055
AG
270module_param(nested, int, S_IRUGO);
271
44a95dae
SS
272/* enable / disable AVIC */
273static int avic;
5b8abf1f 274#ifdef CONFIG_X86_LOCAL_APIC
44a95dae 275module_param(avic, int, S_IRUGO);
5b8abf1f 276#endif
44a95dae 277
5ea11f2b
SS
278/* AVIC VM ID bit masks and lock */
279static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR);
280static DEFINE_SPINLOCK(avic_vm_id_lock);
281
79a8059d 282static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
44874f84 283static void svm_flush_tlb(struct kvm_vcpu *vcpu);
a5c3832d 284static void svm_complete_interrupts(struct vcpu_svm *svm);
04d2cc77 285
410e4d57 286static int nested_svm_exit_handled(struct vcpu_svm *svm);
b8e88bc8 287static int nested_svm_intercept(struct vcpu_svm *svm);
cf74a78b 288static int nested_svm_vmexit(struct vcpu_svm *svm);
cf74a78b
AG
289static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
290 bool has_error_code, u32 error_code);
291
8d28fec4 292enum {
116a0a23
JR
293 VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
294 pause filter count */
f56838e4 295 VMCB_PERM_MAP, /* IOPM Base and MSRPM Base */
d48086d1 296 VMCB_ASID, /* ASID */
decdbf6a 297 VMCB_INTR, /* int_ctl, int_vector */
b2747166 298 VMCB_NPT, /* npt_en, nCR3, gPAT */
dcca1a65 299 VMCB_CR, /* CR0, CR3, CR4, EFER */
72214b96 300 VMCB_DR, /* DR6, DR7 */
17a703cb 301 VMCB_DT, /* GDT, IDT */
060d0c9a 302 VMCB_SEG, /* CS, DS, SS, ES, CPL */
0574dec0 303 VMCB_CR2, /* CR2 only */
b53ba3f9 304 VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
44a95dae
SS
305 VMCB_AVIC, /* AVIC APIC_BAR, AVIC APIC_BACKING_PAGE,
306 * AVIC PHYSICAL_TABLE pointer,
307 * AVIC LOGICAL_TABLE pointer
308 */
8d28fec4
RJ
309 VMCB_DIRTY_MAX,
310};
311
0574dec0
JR
312/* TPR and CR2 are always written before VMRUN */
313#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
8d28fec4 314
44a95dae
SS
315#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
316
8d28fec4
RJ
317static inline void mark_all_dirty(struct vmcb *vmcb)
318{
319 vmcb->control.clean = 0;
320}
321
322static inline void mark_all_clean(struct vmcb *vmcb)
323{
324 vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
325 & ~VMCB_ALWAYS_DIRTY_MASK;
326}
327
328static inline void mark_dirty(struct vmcb *vmcb, int bit)
329{
330 vmcb->control.clean &= ~(1 << bit);
331}
332
a2fa3e9f
GH
333static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
334{
fb3f0f51 335 return container_of(vcpu, struct vcpu_svm, vcpu);
a2fa3e9f
GH
336}
337
44a95dae
SS
338static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
339{
340 svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
341 mark_dirty(svm->vmcb, VMCB_AVIC);
342}
343
340d3bc3
SS
344static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
345{
346 struct vcpu_svm *svm = to_svm(vcpu);
347 u64 *entry = svm->avic_physical_id_cache;
348
349 if (!entry)
350 return false;
351
352 return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
353}
354
384c6368
JR
355static void recalc_intercepts(struct vcpu_svm *svm)
356{
357 struct vmcb_control_area *c, *h;
358 struct nested_state *g;
359
116a0a23
JR
360 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
361
384c6368
JR
362 if (!is_guest_mode(&svm->vcpu))
363 return;
364
365 c = &svm->vmcb->control;
366 h = &svm->nested.hsave->control;
367 g = &svm->nested;
368
4ee546b4 369 c->intercept_cr = h->intercept_cr | g->intercept_cr;
3aed041a 370 c->intercept_dr = h->intercept_dr | g->intercept_dr;
384c6368
JR
371 c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
372 c->intercept = h->intercept | g->intercept;
373}
374
4ee546b4
RJ
375static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
376{
377 if (is_guest_mode(&svm->vcpu))
378 return svm->nested.hsave;
379 else
380 return svm->vmcb;
381}
382
383static inline void set_cr_intercept(struct vcpu_svm *svm, int bit)
384{
385 struct vmcb *vmcb = get_host_vmcb(svm);
386
387 vmcb->control.intercept_cr |= (1U << bit);
388
389 recalc_intercepts(svm);
390}
391
392static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit)
393{
394 struct vmcb *vmcb = get_host_vmcb(svm);
395
396 vmcb->control.intercept_cr &= ~(1U << bit);
397
398 recalc_intercepts(svm);
399}
400
401static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
402{
403 struct vmcb *vmcb = get_host_vmcb(svm);
404
405 return vmcb->control.intercept_cr & (1U << bit);
406}
407
5315c716 408static inline void set_dr_intercepts(struct vcpu_svm *svm)
3aed041a
JR
409{
410 struct vmcb *vmcb = get_host_vmcb(svm);
411
5315c716
PB
412 vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
413 | (1 << INTERCEPT_DR1_READ)
414 | (1 << INTERCEPT_DR2_READ)
415 | (1 << INTERCEPT_DR3_READ)
416 | (1 << INTERCEPT_DR4_READ)
417 | (1 << INTERCEPT_DR5_READ)
418 | (1 << INTERCEPT_DR6_READ)
419 | (1 << INTERCEPT_DR7_READ)
420 | (1 << INTERCEPT_DR0_WRITE)
421 | (1 << INTERCEPT_DR1_WRITE)
422 | (1 << INTERCEPT_DR2_WRITE)
423 | (1 << INTERCEPT_DR3_WRITE)
424 | (1 << INTERCEPT_DR4_WRITE)
425 | (1 << INTERCEPT_DR5_WRITE)
426 | (1 << INTERCEPT_DR6_WRITE)
427 | (1 << INTERCEPT_DR7_WRITE);
3aed041a
JR
428
429 recalc_intercepts(svm);
430}
431
5315c716 432static inline void clr_dr_intercepts(struct vcpu_svm *svm)
3aed041a
JR
433{
434 struct vmcb *vmcb = get_host_vmcb(svm);
435
5315c716 436 vmcb->control.intercept_dr = 0;
3aed041a
JR
437
438 recalc_intercepts(svm);
439}
440
18c918c5
JR
441static inline void set_exception_intercept(struct vcpu_svm *svm, int bit)
442{
443 struct vmcb *vmcb = get_host_vmcb(svm);
444
445 vmcb->control.intercept_exceptions |= (1U << bit);
446
447 recalc_intercepts(svm);
448}
449
450static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
451{
452 struct vmcb *vmcb = get_host_vmcb(svm);
453
454 vmcb->control.intercept_exceptions &= ~(1U << bit);
455
456 recalc_intercepts(svm);
457}
458
8a05a1b8
JR
459static inline void set_intercept(struct vcpu_svm *svm, int bit)
460{
461 struct vmcb *vmcb = get_host_vmcb(svm);
462
463 vmcb->control.intercept |= (1ULL << bit);
464
465 recalc_intercepts(svm);
466}
467
468static inline void clr_intercept(struct vcpu_svm *svm, int bit)
469{
470 struct vmcb *vmcb = get_host_vmcb(svm);
471
472 vmcb->control.intercept &= ~(1ULL << bit);
473
474 recalc_intercepts(svm);
475}
476
2af9194d
JR
477static inline void enable_gif(struct vcpu_svm *svm)
478{
479 svm->vcpu.arch.hflags |= HF_GIF_MASK;
480}
481
482static inline void disable_gif(struct vcpu_svm *svm)
483{
484 svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
485}
486
487static inline bool gif_set(struct vcpu_svm *svm)
488{
489 return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
490}
491
4866d5e3 492static unsigned long iopm_base;
6aa8b732
AK
493
494struct kvm_ldttss_desc {
495 u16 limit0;
496 u16 base0;
e0231715
JR
497 unsigned base1:8, type:5, dpl:2, p:1;
498 unsigned limit1:4, zero0:3, g:1, base2:8;
6aa8b732
AK
499 u32 base3;
500 u32 zero1;
501} __attribute__((packed));
502
503struct svm_cpu_data {
504 int cpu;
505
5008fdf5
AK
506 u64 asid_generation;
507 u32 max_asid;
508 u32 next_asid;
6aa8b732
AK
509 struct kvm_ldttss_desc *tss_desc;
510
511 struct page *save_area;
512};
513
514static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
515
516struct svm_init_data {
517 int cpu;
518 int r;
519};
520
09941fbb 521static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
6aa8b732 522
9d8f549d 523#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
6aa8b732
AK
524#define MSRS_RANGE_SIZE 2048
525#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
526
455716fa
JR
527static u32 svm_msrpm_offset(u32 msr)
528{
529 u32 offset;
530 int i;
531
532 for (i = 0; i < NUM_MSR_MAPS; i++) {
533 if (msr < msrpm_ranges[i] ||
534 msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
535 continue;
536
537 offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */
538 offset += (i * MSRS_RANGE_SIZE); /* add range offset */
539
540 /* Now we have the u8 offset - but need the u32 offset */
541 return offset / 4;
542 }
543
544 /* MSR not in any range */
545 return MSR_INVALID;
546}
547
6aa8b732
AK
548#define MAX_INST_SIZE 15
549
6aa8b732
AK
550static inline void clgi(void)
551{
4ecac3fd 552 asm volatile (__ex(SVM_CLGI));
6aa8b732
AK
553}
554
555static inline void stgi(void)
556{
4ecac3fd 557 asm volatile (__ex(SVM_STGI));
6aa8b732
AK
558}
559
560static inline void invlpga(unsigned long addr, u32 asid)
561{
e0231715 562 asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
6aa8b732
AK
563}
564
4b16184c
JR
565static int get_npt_level(void)
566{
567#ifdef CONFIG_X86_64
568 return PT64_ROOT_LEVEL;
569#else
570 return PT32E_ROOT_LEVEL;
571#endif
572}
573
6aa8b732
AK
574static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
575{
6dc696d4 576 vcpu->arch.efer = efer;
709ddebf 577 if (!npt_enabled && !(efer & EFER_LMA))
2b5203ee 578 efer &= ~EFER_LME;
6aa8b732 579
9962d032 580 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
dcca1a65 581 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
6aa8b732
AK
582}
583
6aa8b732
AK
584static int is_external_interrupt(u32 info)
585{
586 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
587 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
588}
589
37ccdcbe 590static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
2809f5d2
GC
591{
592 struct vcpu_svm *svm = to_svm(vcpu);
593 u32 ret = 0;
594
595 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
37ccdcbe
PB
596 ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
597 return ret;
2809f5d2
GC
598}
599
600static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
601{
602 struct vcpu_svm *svm = to_svm(vcpu);
603
604 if (mask == 0)
605 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
606 else
607 svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
608
609}
610
6aa8b732
AK
611static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
612{
a2fa3e9f
GH
613 struct vcpu_svm *svm = to_svm(vcpu);
614
f104765b 615 if (svm->vmcb->control.next_rip != 0) {
d2922422 616 WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
6bc31bdc 617 svm->next_rip = svm->vmcb->control.next_rip;
f104765b 618 }
6bc31bdc 619
a2fa3e9f 620 if (!svm->next_rip) {
51d8b661 621 if (emulate_instruction(vcpu, EMULTYPE_SKIP) !=
f629cf84
GN
622 EMULATE_DONE)
623 printk(KERN_DEBUG "%s: NOP\n", __func__);
6aa8b732
AK
624 return;
625 }
5fdbf976
MT
626 if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
627 printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
628 __func__, kvm_rip_read(vcpu), svm->next_rip);
6aa8b732 629
5fdbf976 630 kvm_rip_write(vcpu, svm->next_rip);
2809f5d2 631 svm_set_interrupt_shadow(vcpu, 0);
6aa8b732
AK
632}
633
116a4752 634static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
ce7ddec4
JR
635 bool has_error_code, u32 error_code,
636 bool reinject)
116a4752
JK
637{
638 struct vcpu_svm *svm = to_svm(vcpu);
639
e0231715
JR
640 /*
641 * If we are within a nested VM we'd better #VMEXIT and let the guest
642 * handle the exception
643 */
ce7ddec4
JR
644 if (!reinject &&
645 nested_svm_check_exception(svm, nr, has_error_code, error_code))
116a4752
JK
646 return;
647
2a6b20b8 648 if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
66b7138f
JK
649 unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
650
651 /*
652 * For guest debugging where we have to reinject #BP if some
653 * INT3 is guest-owned:
654 * Emulate nRIP by moving RIP forward. Will fail if injection
655 * raises a fault that is not intercepted. Still better than
656 * failing in all cases.
657 */
658 skip_emulated_instruction(&svm->vcpu);
659 rip = kvm_rip_read(&svm->vcpu);
660 svm->int3_rip = rip + svm->vmcb->save.cs.base;
661 svm->int3_injected = rip - old_rip;
662 }
663
116a4752
JK
664 svm->vmcb->control.event_inj = nr
665 | SVM_EVTINJ_VALID
666 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
667 | SVM_EVTINJ_TYPE_EXEPT;
668 svm->vmcb->control.event_inj_err = error_code;
669}
670
67ec6607
JR
671static void svm_init_erratum_383(void)
672{
673 u32 low, high;
674 int err;
675 u64 val;
676
e6ee94d5 677 if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH))
67ec6607
JR
678 return;
679
680 /* Use _safe variants to not break nested virtualization */
681 val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
682 if (err)
683 return;
684
685 val |= (1ULL << 47);
686
687 low = lower_32_bits(val);
688 high = upper_32_bits(val);
689
690 native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
691
692 erratum_383_found = true;
693}
694
2b036c6b
BO
695static void svm_init_osvw(struct kvm_vcpu *vcpu)
696{
697 /*
698 * Guests should see errata 400 and 415 as fixed (assuming that
699 * HLT and IO instructions are intercepted).
700 */
701 vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
702 vcpu->arch.osvw.status = osvw_status & ~(6ULL);
703
704 /*
705 * By increasing VCPU's osvw.length to 3 we are telling the guest that
706 * all osvw.status bits inside that length, including bit 0 (which is
707 * reserved for erratum 298), are valid. However, if host processor's
708 * osvw_len is 0 then osvw_status[0] carries no information. We need to
709 * be conservative here and therefore we tell the guest that erratum 298
710 * is present (because we really don't know).
711 */
712 if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
713 vcpu->arch.osvw.status |= 1;
714}
715
6aa8b732
AK
716static int has_svm(void)
717{
63d1142f 718 const char *msg;
6aa8b732 719
63d1142f 720 if (!cpu_has_svm(&msg)) {
ff81ff10 721 printk(KERN_INFO "has_svm: %s\n", msg);
6aa8b732
AK
722 return 0;
723 }
724
6aa8b732
AK
725 return 1;
726}
727
13a34e06 728static void svm_hardware_disable(void)
6aa8b732 729{
fbc0db76
JR
730 /* Make sure we clean up behind us */
731 if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
732 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
733
2c8dceeb 734 cpu_svm_disable();
1018faa6
JR
735
736 amd_pmu_disable_virt();
6aa8b732
AK
737}
738
13a34e06 739static int svm_hardware_enable(void)
6aa8b732
AK
740{
741
0fe1e009 742 struct svm_cpu_data *sd;
6aa8b732 743 uint64_t efer;
89a27f4d 744 struct desc_ptr gdt_descr;
6aa8b732
AK
745 struct desc_struct *gdt;
746 int me = raw_smp_processor_id();
747
10474ae8
AG
748 rdmsrl(MSR_EFER, efer);
749 if (efer & EFER_SVME)
750 return -EBUSY;
751
6aa8b732 752 if (!has_svm()) {
1f5b77f5 753 pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
10474ae8 754 return -EINVAL;
6aa8b732 755 }
0fe1e009 756 sd = per_cpu(svm_data, me);
0fe1e009 757 if (!sd) {
1f5b77f5 758 pr_err("%s: svm_data is NULL on %d\n", __func__, me);
10474ae8 759 return -EINVAL;
6aa8b732
AK
760 }
761
0fe1e009
TH
762 sd->asid_generation = 1;
763 sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
764 sd->next_asid = sd->max_asid + 1;
6aa8b732 765
d6ab1ed4 766 native_store_gdt(&gdt_descr);
89a27f4d 767 gdt = (struct desc_struct *)gdt_descr.address;
0fe1e009 768 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
6aa8b732 769
9962d032 770 wrmsrl(MSR_EFER, efer | EFER_SVME);
6aa8b732 771
d0316554 772 wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
10474ae8 773
fbc0db76
JR
774 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
775 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
89cbc767 776 __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT);
fbc0db76
JR
777 }
778
2b036c6b
BO
779
780 /*
781 * Get OSVW bits.
782 *
783 * Note that it is possible to have a system with mixed processor
784 * revisions and therefore different OSVW bits. If bits are not the same
785 * on different processors then choose the worst case (i.e. if erratum
786 * is present on one processor and not on another then assume that the
787 * erratum is present everywhere).
788 */
789 if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
790 uint64_t len, status = 0;
791 int err;
792
793 len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
794 if (!err)
795 status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
796 &err);
797
798 if (err)
799 osvw_status = osvw_len = 0;
800 else {
801 if (len < osvw_len)
802 osvw_len = len;
803 osvw_status |= status;
804 osvw_status &= (1ULL << osvw_len) - 1;
805 }
806 } else
807 osvw_status = osvw_len = 0;
808
67ec6607
JR
809 svm_init_erratum_383();
810
1018faa6
JR
811 amd_pmu_enable_virt();
812
10474ae8 813 return 0;
6aa8b732
AK
814}
815
0da1db75
JR
816static void svm_cpu_uninit(int cpu)
817{
0fe1e009 818 struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
0da1db75 819
0fe1e009 820 if (!sd)
0da1db75
JR
821 return;
822
823 per_cpu(svm_data, raw_smp_processor_id()) = NULL;
0fe1e009
TH
824 __free_page(sd->save_area);
825 kfree(sd);
0da1db75
JR
826}
827
6aa8b732
AK
828static int svm_cpu_init(int cpu)
829{
0fe1e009 830 struct svm_cpu_data *sd;
6aa8b732
AK
831 int r;
832
0fe1e009
TH
833 sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
834 if (!sd)
6aa8b732 835 return -ENOMEM;
0fe1e009
TH
836 sd->cpu = cpu;
837 sd->save_area = alloc_page(GFP_KERNEL);
6aa8b732 838 r = -ENOMEM;
0fe1e009 839 if (!sd->save_area)
6aa8b732
AK
840 goto err_1;
841
0fe1e009 842 per_cpu(svm_data, cpu) = sd;
6aa8b732
AK
843
844 return 0;
845
846err_1:
0fe1e009 847 kfree(sd);
6aa8b732
AK
848 return r;
849
850}
851
ac72a9b7
JR
852static bool valid_msr_intercept(u32 index)
853{
854 int i;
855
856 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
857 if (direct_access_msrs[i].index == index)
858 return true;
859
860 return false;
861}
862
bfc733a7
RR
863static void set_msr_interception(u32 *msrpm, unsigned msr,
864 int read, int write)
6aa8b732 865{
455716fa
JR
866 u8 bit_read, bit_write;
867 unsigned long tmp;
868 u32 offset;
6aa8b732 869
ac72a9b7
JR
870 /*
871 * If this warning triggers extend the direct_access_msrs list at the
872 * beginning of the file
873 */
874 WARN_ON(!valid_msr_intercept(msr));
875
455716fa
JR
876 offset = svm_msrpm_offset(msr);
877 bit_read = 2 * (msr & 0x0f);
878 bit_write = 2 * (msr & 0x0f) + 1;
879 tmp = msrpm[offset];
880
881 BUG_ON(offset == MSR_INVALID);
882
883 read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp);
884 write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
885
886 msrpm[offset] = tmp;
6aa8b732
AK
887}
888
f65c229c 889static void svm_vcpu_init_msrpm(u32 *msrpm)
6aa8b732
AK
890{
891 int i;
892
f65c229c
JR
893 memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
894
ac72a9b7
JR
895 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
896 if (!direct_access_msrs[i].always)
897 continue;
898
899 set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
900 }
f65c229c
JR
901}
902
323c3d80
JR
903static void add_msr_offset(u32 offset)
904{
905 int i;
906
907 for (i = 0; i < MSRPM_OFFSETS; ++i) {
908
909 /* Offset already in list? */
910 if (msrpm_offsets[i] == offset)
bfc733a7 911 return;
323c3d80
JR
912
913 /* Slot used by another offset? */
914 if (msrpm_offsets[i] != MSR_INVALID)
915 continue;
916
917 /* Add offset to list */
918 msrpm_offsets[i] = offset;
919
920 return;
6aa8b732 921 }
323c3d80
JR
922
923 /*
924 * If this BUG triggers the msrpm_offsets table has an overflow. Just
925 * increase MSRPM_OFFSETS in this case.
926 */
bfc733a7 927 BUG();
6aa8b732
AK
928}
929
323c3d80 930static void init_msrpm_offsets(void)
f65c229c 931{
323c3d80 932 int i;
f65c229c 933
323c3d80
JR
934 memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
935
936 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
937 u32 offset;
938
939 offset = svm_msrpm_offset(direct_access_msrs[i].index);
940 BUG_ON(offset == MSR_INVALID);
941
942 add_msr_offset(offset);
943 }
f65c229c
JR
944}
945
24e09cbf
JR
946static void svm_enable_lbrv(struct vcpu_svm *svm)
947{
948 u32 *msrpm = svm->msrpm;
949
950 svm->vmcb->control.lbr_ctl = 1;
951 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
952 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
953 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
954 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
955}
956
957static void svm_disable_lbrv(struct vcpu_svm *svm)
958{
959 u32 *msrpm = svm->msrpm;
960
961 svm->vmcb->control.lbr_ctl = 0;
962 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
963 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
964 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
965 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
966}
967
5881f737
SS
968/* Note:
969 * This hash table is used to map VM_ID to a struct kvm_arch,
970 * when handling AMD IOMMU GALOG notification to schedule in
971 * a particular vCPU.
972 */
973#define SVM_VM_DATA_HASH_BITS 8
681bcea8
DH
974static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
975static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
5881f737
SS
976
977/* Note:
978 * This function is called from IOMMU driver to notify
979 * SVM to schedule in a particular vCPU of a particular VM.
980 */
981static int avic_ga_log_notifier(u32 ga_tag)
982{
983 unsigned long flags;
984 struct kvm_arch *ka = NULL;
985 struct kvm_vcpu *vcpu = NULL;
986 u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
987 u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
988
989 pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
990
991 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
992 hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
993 struct kvm *kvm = container_of(ka, struct kvm, arch);
994 struct kvm_arch *vm_data = &kvm->arch;
995
996 if (vm_data->avic_vm_id != vm_id)
997 continue;
998 vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
999 break;
1000 }
1001 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
1002
1003 if (!vcpu)
1004 return 0;
1005
1006 /* Note:
1007 * At this point, the IOMMU should have already set the pending
1008 * bit in the vAPIC backing page. So, we just need to schedule
1009 * in the vcpu.
1010 */
1011 if (vcpu->mode == OUTSIDE_GUEST_MODE)
1012 kvm_vcpu_wake_up(vcpu);
1013
1014 return 0;
1015}
1016
6aa8b732
AK
1017static __init int svm_hardware_setup(void)
1018{
1019 int cpu;
1020 struct page *iopm_pages;
f65c229c 1021 void *iopm_va;
6aa8b732
AK
1022 int r;
1023
6aa8b732
AK
1024 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
1025
1026 if (!iopm_pages)
1027 return -ENOMEM;
c8681339
AL
1028
1029 iopm_va = page_address(iopm_pages);
1030 memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
6aa8b732
AK
1031 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
1032
323c3d80
JR
1033 init_msrpm_offsets();
1034
50a37eb4
JR
1035 if (boot_cpu_has(X86_FEATURE_NX))
1036 kvm_enable_efer_bits(EFER_NX);
1037
1b2fd70c
AG
1038 if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
1039 kvm_enable_efer_bits(EFER_FFXSR);
1040
92a1f12d 1041 if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
92a1f12d 1042 kvm_has_tsc_control = true;
bc9b961b
HZ
1043 kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
1044 kvm_tsc_scaling_ratio_frac_bits = 32;
92a1f12d
JR
1045 }
1046
236de055
AG
1047 if (nested) {
1048 printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
eec4b140 1049 kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
236de055
AG
1050 }
1051
3230bb47 1052 for_each_possible_cpu(cpu) {
6aa8b732
AK
1053 r = svm_cpu_init(cpu);
1054 if (r)
f65c229c 1055 goto err;
6aa8b732 1056 }
33bd6a0b 1057
2a6b20b8 1058 if (!boot_cpu_has(X86_FEATURE_NPT))
e3da3acd
JR
1059 npt_enabled = false;
1060
6c7dac72
JR
1061 if (npt_enabled && !npt) {
1062 printk(KERN_INFO "kvm: Nested Paging disabled\n");
1063 npt_enabled = false;
1064 }
1065
18552672 1066 if (npt_enabled) {
e3da3acd 1067 printk(KERN_INFO "kvm: Nested Paging enabled\n");
18552672 1068 kvm_enable_tdp();
5f4cb662
JR
1069 } else
1070 kvm_disable_tdp();
e3da3acd 1071
5b8abf1f
SS
1072 if (avic) {
1073 if (!npt_enabled ||
1074 !boot_cpu_has(X86_FEATURE_AVIC) ||
5881f737 1075 !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) {
5b8abf1f 1076 avic = false;
5881f737 1077 } else {
5b8abf1f 1078 pr_info("AVIC enabled\n");
5881f737 1079
5881f737
SS
1080 amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
1081 }
5b8abf1f 1082 }
44a95dae 1083
6aa8b732
AK
1084 return 0;
1085
f65c229c 1086err:
6aa8b732
AK
1087 __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
1088 iopm_base = 0;
1089 return r;
1090}
1091
1092static __exit void svm_hardware_unsetup(void)
1093{
0da1db75
JR
1094 int cpu;
1095
3230bb47 1096 for_each_possible_cpu(cpu)
0da1db75
JR
1097 svm_cpu_uninit(cpu);
1098
6aa8b732 1099 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
f65c229c 1100 iopm_base = 0;
6aa8b732
AK
1101}
1102
1103static void init_seg(struct vmcb_seg *seg)
1104{
1105 seg->selector = 0;
1106 seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
e0231715 1107 SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
6aa8b732
AK
1108 seg->limit = 0xffff;
1109 seg->base = 0;
1110}
1111
1112static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
1113{
1114 seg->selector = 0;
1115 seg->attrib = SVM_SELECTOR_P_MASK | type;
1116 seg->limit = 0xffff;
1117 seg->base = 0;
1118}
1119
f4e1b3c8
ZA
1120static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1121{
1122 struct vcpu_svm *svm = to_svm(vcpu);
1123 u64 g_tsc_offset = 0;
1124
2030753d 1125 if (is_guest_mode(vcpu)) {
f4e1b3c8
ZA
1126 g_tsc_offset = svm->vmcb->control.tsc_offset -
1127 svm->nested.hsave->control.tsc_offset;
1128 svm->nested.hsave->control.tsc_offset = offset;
489223ed
YY
1129 } else
1130 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
1131 svm->vmcb->control.tsc_offset,
1132 offset);
f4e1b3c8
ZA
1133
1134 svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
116a0a23
JR
1135
1136 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
f4e1b3c8
ZA
1137}
1138
44a95dae
SS
1139static void avic_init_vmcb(struct vcpu_svm *svm)
1140{
1141 struct vmcb *vmcb = svm->vmcb;
1142 struct kvm_arch *vm_data = &svm->vcpu.kvm->arch;
1143 phys_addr_t bpa = page_to_phys(svm->avic_backing_page);
1144 phys_addr_t lpa = page_to_phys(vm_data->avic_logical_id_table_page);
1145 phys_addr_t ppa = page_to_phys(vm_data->avic_physical_id_table_page);
1146
1147 vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
1148 vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
1149 vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
1150 vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
1151 vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
1152 svm->vcpu.arch.apicv_active = true;
1153}
1154
5690891b 1155static void init_vmcb(struct vcpu_svm *svm)
6aa8b732 1156{
e6101a96
JR
1157 struct vmcb_control_area *control = &svm->vmcb->control;
1158 struct vmcb_save_area *save = &svm->vmcb->save;
6aa8b732 1159
bff78274 1160 svm->vcpu.fpu_active = 1;
4ee546b4 1161 svm->vcpu.arch.hflags = 0;
bff78274 1162
4ee546b4
RJ
1163 set_cr_intercept(svm, INTERCEPT_CR0_READ);
1164 set_cr_intercept(svm, INTERCEPT_CR3_READ);
1165 set_cr_intercept(svm, INTERCEPT_CR4_READ);
1166 set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1167 set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1168 set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
3bbf3565
SS
1169 if (!kvm_vcpu_apicv_active(&svm->vcpu))
1170 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
6aa8b732 1171
5315c716 1172 set_dr_intercepts(svm);
6aa8b732 1173
18c918c5
JR
1174 set_exception_intercept(svm, PF_VECTOR);
1175 set_exception_intercept(svm, UD_VECTOR);
1176 set_exception_intercept(svm, MC_VECTOR);
54a20552 1177 set_exception_intercept(svm, AC_VECTOR);
cbdb967a 1178 set_exception_intercept(svm, DB_VECTOR);
6aa8b732 1179
8a05a1b8
JR
1180 set_intercept(svm, INTERCEPT_INTR);
1181 set_intercept(svm, INTERCEPT_NMI);
1182 set_intercept(svm, INTERCEPT_SMI);
1183 set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
332b56e4 1184 set_intercept(svm, INTERCEPT_RDPMC);
8a05a1b8
JR
1185 set_intercept(svm, INTERCEPT_CPUID);
1186 set_intercept(svm, INTERCEPT_INVD);
1187 set_intercept(svm, INTERCEPT_HLT);
1188 set_intercept(svm, INTERCEPT_INVLPG);
1189 set_intercept(svm, INTERCEPT_INVLPGA);
1190 set_intercept(svm, INTERCEPT_IOIO_PROT);
1191 set_intercept(svm, INTERCEPT_MSR_PROT);
1192 set_intercept(svm, INTERCEPT_TASK_SWITCH);
1193 set_intercept(svm, INTERCEPT_SHUTDOWN);
1194 set_intercept(svm, INTERCEPT_VMRUN);
1195 set_intercept(svm, INTERCEPT_VMMCALL);
1196 set_intercept(svm, INTERCEPT_VMLOAD);
1197 set_intercept(svm, INTERCEPT_VMSAVE);
1198 set_intercept(svm, INTERCEPT_STGI);
1199 set_intercept(svm, INTERCEPT_CLGI);
1200 set_intercept(svm, INTERCEPT_SKINIT);
1201 set_intercept(svm, INTERCEPT_WBINVD);
1202 set_intercept(svm, INTERCEPT_MONITOR);
1203 set_intercept(svm, INTERCEPT_MWAIT);
81dd35d4 1204 set_intercept(svm, INTERCEPT_XSETBV);
6aa8b732
AK
1205
1206 control->iopm_base_pa = iopm_base;
f65c229c 1207 control->msrpm_base_pa = __pa(svm->msrpm);
6aa8b732
AK
1208 control->int_ctl = V_INTR_MASKING_MASK;
1209
1210 init_seg(&save->es);
1211 init_seg(&save->ss);
1212 init_seg(&save->ds);
1213 init_seg(&save->fs);
1214 init_seg(&save->gs);
1215
1216 save->cs.selector = 0xf000;
04b66839 1217 save->cs.base = 0xffff0000;
6aa8b732
AK
1218 /* Executable/Readable Code Segment */
1219 save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
1220 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
1221 save->cs.limit = 0xffff;
6aa8b732
AK
1222
1223 save->gdtr.limit = 0xffff;
1224 save->idtr.limit = 0xffff;
1225
1226 init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
1227 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
1228
5690891b 1229 svm_set_efer(&svm->vcpu, 0);
d77c26fc 1230 save->dr6 = 0xffff0ff0;
f6e78475 1231 kvm_set_rflags(&svm->vcpu, 2);
6aa8b732 1232 save->rip = 0x0000fff0;
5fdbf976 1233 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
6aa8b732 1234
e0231715 1235 /*
18fa000a 1236 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
d28bc9dd 1237 * It also updates the guest-visible cr0 value.
6aa8b732 1238 */
79a8059d 1239 svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
ebae871a 1240 kvm_mmu_reset_context(&svm->vcpu);
18fa000a 1241
66aee91a 1242 save->cr4 = X86_CR4_PAE;
6aa8b732 1243 /* rdx = ?? */
709ddebf
JR
1244
1245 if (npt_enabled) {
1246 /* Setup VMCB for Nested Paging */
1247 control->nested_ctl = 1;
8a05a1b8 1248 clr_intercept(svm, INTERCEPT_INVLPG);
18c918c5 1249 clr_exception_intercept(svm, PF_VECTOR);
4ee546b4
RJ
1250 clr_cr_intercept(svm, INTERCEPT_CR3_READ);
1251 clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
74545705 1252 save->g_pat = svm->vcpu.arch.pat;
709ddebf
JR
1253 save->cr3 = 0;
1254 save->cr4 = 0;
1255 }
f40f6a45 1256 svm->asid_generation = 0;
1371d904 1257
e6aa9abd 1258 svm->nested.vmcb = 0;
2af9194d
JR
1259 svm->vcpu.arch.hflags = 0;
1260
2a6b20b8 1261 if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
565d0998 1262 control->pause_filter_count = 3000;
8a05a1b8 1263 set_intercept(svm, INTERCEPT_PAUSE);
565d0998
ML
1264 }
1265
44a95dae
SS
1266 if (avic)
1267 avic_init_vmcb(svm);
1268
8d28fec4
RJ
1269 mark_all_dirty(svm->vmcb);
1270
2af9194d 1271 enable_gif(svm);
44a95dae
SS
1272
1273}
1274
1275static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, int index)
1276{
1277 u64 *avic_physical_id_table;
1278 struct kvm_arch *vm_data = &vcpu->kvm->arch;
1279
1280 if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
1281 return NULL;
1282
1283 avic_physical_id_table = page_address(vm_data->avic_physical_id_table_page);
1284
1285 return &avic_physical_id_table[index];
1286}
1287
1288/**
1289 * Note:
1290 * AVIC hardware walks the nested page table to check permissions,
1291 * but does not use the SPA address specified in the leaf page
1292 * table entry since it uses address in the AVIC_BACKING_PAGE pointer
1293 * field of the VMCB. Therefore, we set up the
1294 * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
1295 */
1296static int avic_init_access_page(struct kvm_vcpu *vcpu)
1297{
1298 struct kvm *kvm = vcpu->kvm;
1299 int ret;
1300
1301 if (kvm->arch.apic_access_page_done)
1302 return 0;
1303
1304 ret = x86_set_memory_region(kvm,
1305 APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
1306 APIC_DEFAULT_PHYS_BASE,
1307 PAGE_SIZE);
1308 if (ret)
1309 return ret;
1310
1311 kvm->arch.apic_access_page_done = true;
1312 return 0;
1313}
1314
1315static int avic_init_backing_page(struct kvm_vcpu *vcpu)
1316{
1317 int ret;
1318 u64 *entry, new_entry;
1319 int id = vcpu->vcpu_id;
1320 struct vcpu_svm *svm = to_svm(vcpu);
1321
1322 ret = avic_init_access_page(vcpu);
1323 if (ret)
1324 return ret;
1325
1326 if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
1327 return -EINVAL;
1328
1329 if (!svm->vcpu.arch.apic->regs)
1330 return -EINVAL;
1331
1332 svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
1333
1334 /* Setting AVIC backing page address in the phy APIC ID table */
1335 entry = avic_get_physical_id_entry(vcpu, id);
1336 if (!entry)
1337 return -EINVAL;
1338
1339 new_entry = READ_ONCE(*entry);
1340 new_entry = (page_to_phys(svm->avic_backing_page) &
1341 AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
1342 AVIC_PHYSICAL_ID_ENTRY_VALID_MASK;
1343 WRITE_ONCE(*entry, new_entry);
1344
1345 svm->avic_physical_id_cache = entry;
1346
1347 return 0;
1348}
1349
5ea11f2b
SS
1350static inline int avic_get_next_vm_id(void)
1351{
1352 int id;
1353
1354 spin_lock(&avic_vm_id_lock);
1355
1356 /* AVIC VM ID is one-based. */
1357 id = find_next_zero_bit(avic_vm_id_bitmap, AVIC_VM_ID_NR, 1);
1358 if (id <= AVIC_VM_ID_MASK)
1359 __set_bit(id, avic_vm_id_bitmap);
1360 else
1361 id = -EAGAIN;
1362
1363 spin_unlock(&avic_vm_id_lock);
1364 return id;
1365}
1366
1367static inline int avic_free_vm_id(int id)
1368{
1369 if (id <= 0 || id > AVIC_VM_ID_MASK)
1370 return -EINVAL;
1371
1372 spin_lock(&avic_vm_id_lock);
1373 __clear_bit(id, avic_vm_id_bitmap);
1374 spin_unlock(&avic_vm_id_lock);
1375 return 0;
1376}
1377
44a95dae
SS
1378static void avic_vm_destroy(struct kvm *kvm)
1379{
5881f737 1380 unsigned long flags;
44a95dae
SS
1381 struct kvm_arch *vm_data = &kvm->arch;
1382
5ea11f2b
SS
1383 avic_free_vm_id(vm_data->avic_vm_id);
1384
44a95dae
SS
1385 if (vm_data->avic_logical_id_table_page)
1386 __free_page(vm_data->avic_logical_id_table_page);
1387 if (vm_data->avic_physical_id_table_page)
1388 __free_page(vm_data->avic_physical_id_table_page);
5881f737
SS
1389
1390 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
1391 hash_del(&vm_data->hnode);
1392 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
44a95dae
SS
1393}
1394
1395static int avic_vm_init(struct kvm *kvm)
1396{
5881f737 1397 unsigned long flags;
adad0d02 1398 int vm_id, err = -ENOMEM;
44a95dae
SS
1399 struct kvm_arch *vm_data = &kvm->arch;
1400 struct page *p_page;
1401 struct page *l_page;
1402
1403 if (!avic)
1404 return 0;
1405
adad0d02
CIK
1406 vm_id = avic_get_next_vm_id();
1407 if (vm_id < 0)
1408 return vm_id;
1409 vm_data->avic_vm_id = (u32)vm_id;
5ea11f2b 1410
44a95dae
SS
1411 /* Allocating physical APIC ID table (4KB) */
1412 p_page = alloc_page(GFP_KERNEL);
1413 if (!p_page)
1414 goto free_avic;
1415
1416 vm_data->avic_physical_id_table_page = p_page;
1417 clear_page(page_address(p_page));
1418
1419 /* Allocating logical APIC ID table (4KB) */
1420 l_page = alloc_page(GFP_KERNEL);
1421 if (!l_page)
1422 goto free_avic;
1423
1424 vm_data->avic_logical_id_table_page = l_page;
1425 clear_page(page_address(l_page));
1426
5881f737
SS
1427 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
1428 hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id);
1429 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
1430
44a95dae
SS
1431 return 0;
1432
1433free_avic:
1434 avic_vm_destroy(kvm);
1435 return err;
6aa8b732
AK
1436}
1437
411b44ba
SS
1438static inline int
1439avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
8221c137 1440{
411b44ba
SS
1441 int ret = 0;
1442 unsigned long flags;
1443 struct amd_svm_iommu_ir *ir;
8221c137
SS
1444 struct vcpu_svm *svm = to_svm(vcpu);
1445
411b44ba
SS
1446 if (!kvm_arch_has_assigned_device(vcpu->kvm))
1447 return 0;
8221c137 1448
411b44ba
SS
1449 /*
1450 * Here, we go through the per-vcpu ir_list to update all existing
1451 * interrupt remapping table entry targeting this vcpu.
1452 */
1453 spin_lock_irqsave(&svm->ir_list_lock, flags);
8221c137 1454
411b44ba
SS
1455 if (list_empty(&svm->ir_list))
1456 goto out;
8221c137 1457
411b44ba
SS
1458 list_for_each_entry(ir, &svm->ir_list, node) {
1459 ret = amd_iommu_update_ga(cpu, r, ir->data);
1460 if (ret)
1461 break;
1462 }
1463out:
1464 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
1465 return ret;
8221c137
SS
1466}
1467
1468static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1469{
1470 u64 entry;
1471 /* ID = 0xff (broadcast), ID > 0xff (reserved) */
7d669f50 1472 int h_physical_id = kvm_cpu_get_apicid(cpu);
8221c137
SS
1473 struct vcpu_svm *svm = to_svm(vcpu);
1474
1475 if (!kvm_vcpu_apicv_active(vcpu))
1476 return;
1477
1478 if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
1479 return;
1480
1481 entry = READ_ONCE(*(svm->avic_physical_id_cache));
1482 WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
1483
1484 entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
1485 entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
1486
1487 entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1488 if (svm->avic_is_running)
1489 entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1490
1491 WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
411b44ba
SS
1492 avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
1493 svm->avic_is_running);
8221c137
SS
1494}
1495
1496static void avic_vcpu_put(struct kvm_vcpu *vcpu)
1497{
1498 u64 entry;
1499 struct vcpu_svm *svm = to_svm(vcpu);
1500
1501 if (!kvm_vcpu_apicv_active(vcpu))
1502 return;
1503
1504 entry = READ_ONCE(*(svm->avic_physical_id_cache));
411b44ba
SS
1505 if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
1506 avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
1507
8221c137
SS
1508 entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1509 WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
6aa8b732
AK
1510}
1511
411b44ba
SS
1512/**
1513 * This function is called during VCPU halt/unhalt.
1514 */
1515static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
1516{
1517 struct vcpu_svm *svm = to_svm(vcpu);
1518
1519 svm->avic_is_running = is_run;
1520 if (is_run)
1521 avic_vcpu_load(vcpu, vcpu->cpu);
1522 else
1523 avic_vcpu_put(vcpu);
1524}
1525
d28bc9dd 1526static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
04d2cc77
AK
1527{
1528 struct vcpu_svm *svm = to_svm(vcpu);
66f7b72e
JS
1529 u32 dummy;
1530 u32 eax = 1;
04d2cc77 1531
d28bc9dd
NA
1532 if (!init_event) {
1533 svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
1534 MSR_IA32_APICBASE_ENABLE;
1535 if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
1536 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
1537 }
5690891b 1538 init_vmcb(svm);
70433389 1539
66f7b72e
JS
1540 kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
1541 kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
44a95dae
SS
1542
1543 if (kvm_vcpu_apicv_active(vcpu) && !init_event)
1544 avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
04d2cc77
AK
1545}
1546
fb3f0f51 1547static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
6aa8b732 1548{
a2fa3e9f 1549 struct vcpu_svm *svm;
6aa8b732 1550 struct page *page;
f65c229c 1551 struct page *msrpm_pages;
b286d5d8 1552 struct page *hsave_page;
3d6368ef 1553 struct page *nested_msrpm_pages;
fb3f0f51 1554 int err;
6aa8b732 1555
c16f862d 1556 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
fb3f0f51
RR
1557 if (!svm) {
1558 err = -ENOMEM;
1559 goto out;
1560 }
1561
1562 err = kvm_vcpu_init(&svm->vcpu, kvm, id);
1563 if (err)
1564 goto free_svm;
1565
b7af4043 1566 err = -ENOMEM;
6aa8b732 1567 page = alloc_page(GFP_KERNEL);
b7af4043 1568 if (!page)
fb3f0f51 1569 goto uninit;
6aa8b732 1570
f65c229c
JR
1571 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
1572 if (!msrpm_pages)
b7af4043 1573 goto free_page1;
3d6368ef
AG
1574
1575 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
1576 if (!nested_msrpm_pages)
b7af4043 1577 goto free_page2;
f65c229c 1578
b286d5d8
AG
1579 hsave_page = alloc_page(GFP_KERNEL);
1580 if (!hsave_page)
b7af4043
TY
1581 goto free_page3;
1582
44a95dae
SS
1583 if (avic) {
1584 err = avic_init_backing_page(&svm->vcpu);
1585 if (err)
1586 goto free_page4;
411b44ba
SS
1587
1588 INIT_LIST_HEAD(&svm->ir_list);
1589 spin_lock_init(&svm->ir_list_lock);
44a95dae
SS
1590 }
1591
8221c137
SS
1592 /* We initialize this flag to true to make sure that the is_running
1593 * bit would be set the first time the vcpu is loaded.
1594 */
1595 svm->avic_is_running = true;
1596
e6aa9abd 1597 svm->nested.hsave = page_address(hsave_page);
b286d5d8 1598
b7af4043
TY
1599 svm->msrpm = page_address(msrpm_pages);
1600 svm_vcpu_init_msrpm(svm->msrpm);
1601
e6aa9abd 1602 svm->nested.msrpm = page_address(nested_msrpm_pages);
323c3d80 1603 svm_vcpu_init_msrpm(svm->nested.msrpm);
3d6368ef 1604
a2fa3e9f
GH
1605 svm->vmcb = page_address(page);
1606 clear_page(svm->vmcb);
1607 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
1608 svm->asid_generation = 0;
5690891b 1609 init_vmcb(svm);
6aa8b732 1610
2b036c6b
BO
1611 svm_init_osvw(&svm->vcpu);
1612
fb3f0f51 1613 return &svm->vcpu;
36241b8c 1614
44a95dae
SS
1615free_page4:
1616 __free_page(hsave_page);
b7af4043
TY
1617free_page3:
1618 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
1619free_page2:
1620 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
1621free_page1:
1622 __free_page(page);
fb3f0f51
RR
1623uninit:
1624 kvm_vcpu_uninit(&svm->vcpu);
1625free_svm:
a4770347 1626 kmem_cache_free(kvm_vcpu_cache, svm);
fb3f0f51
RR
1627out:
1628 return ERR_PTR(err);
6aa8b732
AK
1629}
1630
1631static void svm_free_vcpu(struct kvm_vcpu *vcpu)
1632{
a2fa3e9f
GH
1633 struct vcpu_svm *svm = to_svm(vcpu);
1634
fb3f0f51 1635 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
f65c229c 1636 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
e6aa9abd
JR
1637 __free_page(virt_to_page(svm->nested.hsave));
1638 __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
fb3f0f51 1639 kvm_vcpu_uninit(vcpu);
a4770347 1640 kmem_cache_free(kvm_vcpu_cache, svm);
6aa8b732
AK
1641}
1642
15ad7146 1643static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
6aa8b732 1644{
a2fa3e9f 1645 struct vcpu_svm *svm = to_svm(vcpu);
15ad7146 1646 int i;
0cc5064d 1647
0cc5064d 1648 if (unlikely(cpu != vcpu->cpu)) {
4b656b12 1649 svm->asid_generation = 0;
8d28fec4 1650 mark_all_dirty(svm->vmcb);
0cc5064d 1651 }
94dfbdb3 1652
82ca2d10
AK
1653#ifdef CONFIG_X86_64
1654 rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
1655#endif
dacccfdd
AK
1656 savesegment(fs, svm->host.fs);
1657 savesegment(gs, svm->host.gs);
1658 svm->host.ldt = kvm_read_ldt();
1659
94dfbdb3 1660 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
a2fa3e9f 1661 rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
fbc0db76 1662
ad721883
HZ
1663 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
1664 u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
1665 if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
1666 __this_cpu_write(current_tsc_ratio, tsc_ratio);
1667 wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
1668 }
fbc0db76 1669 }
46896c73
PB
1670 /* This assumes that the kernel never uses MSR_TSC_AUX */
1671 if (static_cpu_has(X86_FEATURE_RDTSCP))
1672 wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
8221c137
SS
1673
1674 avic_vcpu_load(vcpu, cpu);
6aa8b732
AK
1675}
1676
1677static void svm_vcpu_put(struct kvm_vcpu *vcpu)
1678{
a2fa3e9f 1679 struct vcpu_svm *svm = to_svm(vcpu);
94dfbdb3
AL
1680 int i;
1681
8221c137
SS
1682 avic_vcpu_put(vcpu);
1683
e1beb1d3 1684 ++vcpu->stat.host_state_reload;
dacccfdd
AK
1685 kvm_load_ldt(svm->host.ldt);
1686#ifdef CONFIG_X86_64
1687 loadsegment(fs, svm->host.fs);
296f781a 1688 wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
893a5ab6 1689 load_gs_index(svm->host.gs);
dacccfdd 1690#else
831ca609 1691#ifdef CONFIG_X86_32_LAZY_GS
dacccfdd 1692 loadsegment(gs, svm->host.gs);
831ca609 1693#endif
dacccfdd 1694#endif
94dfbdb3 1695 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
a2fa3e9f 1696 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
6aa8b732
AK
1697}
1698
8221c137
SS
1699static void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
1700{
1701 avic_set_running(vcpu, false);
1702}
1703
1704static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
1705{
1706 avic_set_running(vcpu, true);
1707}
1708
6aa8b732
AK
1709static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1710{
a2fa3e9f 1711 return to_svm(vcpu)->vmcb->save.rflags;
6aa8b732
AK
1712}
1713
1714static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1715{
ae9fedc7 1716 /*
bb3541f1 1717 * Any change of EFLAGS.VM is accompanied by a reload of SS
ae9fedc7
PB
1718 * (caused by either a task switch or an inter-privilege IRET),
1719 * so we do not need to update the CPL here.
1720 */
a2fa3e9f 1721 to_svm(vcpu)->vmcb->save.rflags = rflags;
6aa8b732
AK
1722}
1723
be94f6b7
HH
1724static u32 svm_get_pkru(struct kvm_vcpu *vcpu)
1725{
1726 return 0;
1727}
1728
6de4f3ad
AK
1729static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
1730{
1731 switch (reg) {
1732 case VCPU_EXREG_PDPTR:
1733 BUG_ON(!npt_enabled);
9f8fe504 1734 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
6de4f3ad
AK
1735 break;
1736 default:
1737 BUG();
1738 }
1739}
1740
f0b85051
AG
1741static void svm_set_vintr(struct vcpu_svm *svm)
1742{
8a05a1b8 1743 set_intercept(svm, INTERCEPT_VINTR);
f0b85051
AG
1744}
1745
1746static void svm_clear_vintr(struct vcpu_svm *svm)
1747{
8a05a1b8 1748 clr_intercept(svm, INTERCEPT_VINTR);
f0b85051
AG
1749}
1750
6aa8b732
AK
1751static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
1752{
a2fa3e9f 1753 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
6aa8b732
AK
1754
1755 switch (seg) {
1756 case VCPU_SREG_CS: return &save->cs;
1757 case VCPU_SREG_DS: return &save->ds;
1758 case VCPU_SREG_ES: return &save->es;
1759 case VCPU_SREG_FS: return &save->fs;
1760 case VCPU_SREG_GS: return &save->gs;
1761 case VCPU_SREG_SS: return &save->ss;
1762 case VCPU_SREG_TR: return &save->tr;
1763 case VCPU_SREG_LDTR: return &save->ldtr;
1764 }
1765 BUG();
8b6d44c7 1766 return NULL;
6aa8b732
AK
1767}
1768
1769static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
1770{
1771 struct vmcb_seg *s = svm_seg(vcpu, seg);
1772
1773 return s->base;
1774}
1775
1776static void svm_get_segment(struct kvm_vcpu *vcpu,
1777 struct kvm_segment *var, int seg)
1778{
1779 struct vmcb_seg *s = svm_seg(vcpu, seg);
1780
1781 var->base = s->base;
1782 var->limit = s->limit;
1783 var->selector = s->selector;
1784 var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
1785 var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
1786 var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
1787 var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
1788 var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
1789 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
1790 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
80112c89
JM
1791
1792 /*
1793 * AMD CPUs circa 2014 track the G bit for all segments except CS.
1794 * However, the SVM spec states that the G bit is not observed by the
1795 * CPU, and some VMware virtual CPUs drop the G bit for all segments.
1796 * So let's synthesize a legal G bit for all segments, this helps
1797 * running KVM nested. It also helps cross-vendor migration, because
1798 * Intel's vmentry has a check on the 'G' bit.
1799 */
1800 var->g = s->limit > 0xfffff;
25022acc 1801
e0231715
JR
1802 /*
1803 * AMD's VMCB does not have an explicit unusable field, so emulate it
19bca6ab
AP
1804 * for cross vendor migration purposes by "not present"
1805 */
1806 var->unusable = !var->present || (var->type == 0);
1807
1fbdc7a5 1808 switch (seg) {
1fbdc7a5
AP
1809 case VCPU_SREG_TR:
1810 /*
1811 * Work around a bug where the busy flag in the tr selector
1812 * isn't exposed
1813 */
c0d09828 1814 var->type |= 0x2;
1fbdc7a5
AP
1815 break;
1816 case VCPU_SREG_DS:
1817 case VCPU_SREG_ES:
1818 case VCPU_SREG_FS:
1819 case VCPU_SREG_GS:
1820 /*
1821 * The accessed bit must always be set in the segment
1822 * descriptor cache, although it can be cleared in the
1823 * descriptor, the cached bit always remains at 1. Since
1824 * Intel has a check on this, set it here to support
1825 * cross-vendor migration.
1826 */
1827 if (!var->unusable)
1828 var->type |= 0x1;
1829 break;
b586eb02 1830 case VCPU_SREG_SS:
e0231715
JR
1831 /*
1832 * On AMD CPUs sometimes the DB bit in the segment
b586eb02
AP
1833 * descriptor is left as 1, although the whole segment has
1834 * been made unusable. Clear it here to pass an Intel VMX
1835 * entry check when cross vendor migrating.
1836 */
1837 if (var->unusable)
1838 var->db = 0;
33b458d2 1839 var->dpl = to_svm(vcpu)->vmcb->save.cpl;
b586eb02 1840 break;
1fbdc7a5 1841 }
6aa8b732
AK
1842}
1843
2e4d2653
IE
1844static int svm_get_cpl(struct kvm_vcpu *vcpu)
1845{
1846 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1847
1848 return save->cpl;
1849}
1850
89a27f4d 1851static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
6aa8b732 1852{
a2fa3e9f
GH
1853 struct vcpu_svm *svm = to_svm(vcpu);
1854
89a27f4d
GN
1855 dt->size = svm->vmcb->save.idtr.limit;
1856 dt->address = svm->vmcb->save.idtr.base;
6aa8b732
AK
1857}
1858
89a27f4d 1859static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
6aa8b732 1860{
a2fa3e9f
GH
1861 struct vcpu_svm *svm = to_svm(vcpu);
1862
89a27f4d
GN
1863 svm->vmcb->save.idtr.limit = dt->size;
1864 svm->vmcb->save.idtr.base = dt->address ;
17a703cb 1865 mark_dirty(svm->vmcb, VMCB_DT);
6aa8b732
AK
1866}
1867
89a27f4d 1868static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
6aa8b732 1869{
a2fa3e9f
GH
1870 struct vcpu_svm *svm = to_svm(vcpu);
1871
89a27f4d
GN
1872 dt->size = svm->vmcb->save.gdtr.limit;
1873 dt->address = svm->vmcb->save.gdtr.base;
6aa8b732
AK
1874}
1875
89a27f4d 1876static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
6aa8b732 1877{
a2fa3e9f
GH
1878 struct vcpu_svm *svm = to_svm(vcpu);
1879
89a27f4d
GN
1880 svm->vmcb->save.gdtr.limit = dt->size;
1881 svm->vmcb->save.gdtr.base = dt->address ;
17a703cb 1882 mark_dirty(svm->vmcb, VMCB_DT);
6aa8b732
AK
1883}
1884
e8467fda
AK
1885static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
1886{
1887}
1888
aff48baa
AK
1889static void svm_decache_cr3(struct kvm_vcpu *vcpu)
1890{
1891}
1892
25c4c276 1893static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
399badf3
AK
1894{
1895}
1896
d225157b
AK
1897static void update_cr0_intercept(struct vcpu_svm *svm)
1898{
1899 ulong gcr0 = svm->vcpu.arch.cr0;
1900 u64 *hcr0 = &svm->vmcb->save.cr0;
1901
1902 if (!svm->vcpu.fpu_active)
1903 *hcr0 |= SVM_CR0_SELECTIVE_MASK;
1904 else
1905 *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
1906 | (gcr0 & SVM_CR0_SELECTIVE_MASK);
1907
dcca1a65 1908 mark_dirty(svm->vmcb, VMCB_CR);
d225157b
AK
1909
1910 if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
4ee546b4
RJ
1911 clr_cr_intercept(svm, INTERCEPT_CR0_READ);
1912 clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
d225157b 1913 } else {
4ee546b4
RJ
1914 set_cr_intercept(svm, INTERCEPT_CR0_READ);
1915 set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
d225157b
AK
1916 }
1917}
1918
6aa8b732
AK
1919static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1920{
a2fa3e9f
GH
1921 struct vcpu_svm *svm = to_svm(vcpu);
1922
05b3e0c2 1923#ifdef CONFIG_X86_64
f6801dff 1924 if (vcpu->arch.efer & EFER_LME) {
707d92fa 1925 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
f6801dff 1926 vcpu->arch.efer |= EFER_LMA;
2b5203ee 1927 svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
6aa8b732
AK
1928 }
1929
d77c26fc 1930 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
f6801dff 1931 vcpu->arch.efer &= ~EFER_LMA;
2b5203ee 1932 svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
6aa8b732
AK
1933 }
1934 }
1935#endif
ad312c7c 1936 vcpu->arch.cr0 = cr0;
888f9f3e
AK
1937
1938 if (!npt_enabled)
1939 cr0 |= X86_CR0_PG | X86_CR0_WP;
02daab21
AK
1940
1941 if (!vcpu->fpu_active)
334df50a 1942 cr0 |= X86_CR0_TS;
bcf166a9
PB
1943 /*
1944 * re-enable caching here because the QEMU bios
1945 * does not do it - this results in some delay at
1946 * reboot
1947 */
1948 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
1949 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
a2fa3e9f 1950 svm->vmcb->save.cr0 = cr0;
dcca1a65 1951 mark_dirty(svm->vmcb, VMCB_CR);
d225157b 1952 update_cr0_intercept(svm);
6aa8b732
AK
1953}
1954
5e1746d6 1955static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
6aa8b732 1956{
1e02ce4c 1957 unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
e5eab0ce
JR
1958 unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
1959
5e1746d6
NHE
1960 if (cr4 & X86_CR4_VMXE)
1961 return 1;
1962
e5eab0ce 1963 if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
f40f6a45 1964 svm_flush_tlb(vcpu);
6394b649 1965
ec077263
JR
1966 vcpu->arch.cr4 = cr4;
1967 if (!npt_enabled)
1968 cr4 |= X86_CR4_PAE;
6394b649 1969 cr4 |= host_cr4_mce;
ec077263 1970 to_svm(vcpu)->vmcb->save.cr4 = cr4;
dcca1a65 1971 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
5e1746d6 1972 return 0;
6aa8b732
AK
1973}
1974
1975static void svm_set_segment(struct kvm_vcpu *vcpu,
1976 struct kvm_segment *var, int seg)
1977{
a2fa3e9f 1978 struct vcpu_svm *svm = to_svm(vcpu);
6aa8b732
AK
1979 struct vmcb_seg *s = svm_seg(vcpu, seg);
1980
1981 s->base = var->base;
1982 s->limit = var->limit;
1983 s->selector = var->selector;
1984 if (var->unusable)
1985 s->attrib = 0;
1986 else {
1987 s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
1988 s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
1989 s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
1990 s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT;
1991 s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
1992 s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
1993 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
1994 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
1995 }
ae9fedc7
PB
1996
1997 /*
1998 * This is always accurate, except if SYSRET returned to a segment
1999 * with SS.DPL != 3. Intel does not have this quirk, and always
2000 * forces SS.DPL to 3 on sysret, so we ignore that case; fixing it
2001 * would entail passing the CPL to userspace and back.
2002 */
2003 if (seg == VCPU_SREG_SS)
2004 svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
6aa8b732 2005
060d0c9a 2006 mark_dirty(svm->vmcb, VMCB_SEG);
6aa8b732
AK
2007}
2008
cbdb967a 2009static void update_bp_intercept(struct kvm_vcpu *vcpu)
6aa8b732 2010{
d0bfb940
JK
2011 struct vcpu_svm *svm = to_svm(vcpu);
2012
18c918c5 2013 clr_exception_intercept(svm, BP_VECTOR);
44c11430 2014
d0bfb940 2015 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
d0bfb940 2016 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
18c918c5 2017 set_exception_intercept(svm, BP_VECTOR);
d0bfb940
JK
2018 } else
2019 vcpu->guest_debug = 0;
44c11430
GN
2020}
2021
0fe1e009 2022static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
6aa8b732 2023{
0fe1e009
TH
2024 if (sd->next_asid > sd->max_asid) {
2025 ++sd->asid_generation;
2026 sd->next_asid = 1;
a2fa3e9f 2027 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
6aa8b732
AK
2028 }
2029
0fe1e009
TH
2030 svm->asid_generation = sd->asid_generation;
2031 svm->vmcb->control.asid = sd->next_asid++;
d48086d1
JR
2032
2033 mark_dirty(svm->vmcb, VMCB_ASID);
6aa8b732
AK
2034}
2035
73aaf249
JK
2036static u64 svm_get_dr6(struct kvm_vcpu *vcpu)
2037{
2038 return to_svm(vcpu)->vmcb->save.dr6;
2039}
2040
2041static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
2042{
2043 struct vcpu_svm *svm = to_svm(vcpu);
2044
2045 svm->vmcb->save.dr6 = value;
2046 mark_dirty(svm->vmcb, VMCB_DR);
2047}
2048
facb0139
PB
2049static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
2050{
2051 struct vcpu_svm *svm = to_svm(vcpu);
2052
2053 get_debugreg(vcpu->arch.db[0], 0);
2054 get_debugreg(vcpu->arch.db[1], 1);
2055 get_debugreg(vcpu->arch.db[2], 2);
2056 get_debugreg(vcpu->arch.db[3], 3);
2057 vcpu->arch.dr6 = svm_get_dr6(vcpu);
2058 vcpu->arch.dr7 = svm->vmcb->save.dr7;
2059
2060 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
2061 set_dr_intercepts(svm);
2062}
2063
020df079 2064static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
6aa8b732 2065{
42dbaa5a 2066 struct vcpu_svm *svm = to_svm(vcpu);
42dbaa5a 2067
020df079 2068 svm->vmcb->save.dr7 = value;
72214b96 2069 mark_dirty(svm->vmcb, VMCB_DR);
6aa8b732
AK
2070}
2071
851ba692 2072static int pf_interception(struct vcpu_svm *svm)
6aa8b732 2073{
631bc487 2074 u64 fault_address = svm->vmcb->control.exit_info_2;
14727754 2075 u64 error_code;
631bc487 2076 int r = 1;
6aa8b732 2077
631bc487
GN
2078 switch (svm->apf_reason) {
2079 default:
2080 error_code = svm->vmcb->control.exit_info_1;
af9ca2d7 2081
631bc487
GN
2082 trace_kvm_page_fault(fault_address, error_code);
2083 if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
2084 kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
dc25e89e
AP
2085 r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
2086 svm->vmcb->control.insn_bytes,
2087 svm->vmcb->control.insn_len);
631bc487
GN
2088 break;
2089 case KVM_PV_REASON_PAGE_NOT_PRESENT:
2090 svm->apf_reason = 0;
2091 local_irq_disable();
2092 kvm_async_pf_task_wait(fault_address);
2093 local_irq_enable();
2094 break;
2095 case KVM_PV_REASON_PAGE_READY:
2096 svm->apf_reason = 0;
2097 local_irq_disable();
2098 kvm_async_pf_task_wake(fault_address);
2099 local_irq_enable();
2100 break;
2101 }
2102 return r;
6aa8b732
AK
2103}
2104
851ba692 2105static int db_interception(struct vcpu_svm *svm)
d0bfb940 2106{
851ba692
AK
2107 struct kvm_run *kvm_run = svm->vcpu.run;
2108
d0bfb940 2109 if (!(svm->vcpu.guest_debug &
44c11430 2110 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
6be7d306 2111 !svm->nmi_singlestep) {
d0bfb940
JK
2112 kvm_queue_exception(&svm->vcpu, DB_VECTOR);
2113 return 1;
2114 }
44c11430 2115
6be7d306
JK
2116 if (svm->nmi_singlestep) {
2117 svm->nmi_singlestep = false;
44c11430
GN
2118 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
2119 svm->vmcb->save.rflags &=
2120 ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
44c11430
GN
2121 }
2122
2123 if (svm->vcpu.guest_debug &
e0231715 2124 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
44c11430
GN
2125 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2126 kvm_run->debug.arch.pc =
2127 svm->vmcb->save.cs.base + svm->vmcb->save.rip;
2128 kvm_run->debug.arch.exception = DB_VECTOR;
2129 return 0;
2130 }
2131
2132 return 1;
d0bfb940
JK
2133}
2134
851ba692 2135static int bp_interception(struct vcpu_svm *svm)
d0bfb940 2136{
851ba692
AK
2137 struct kvm_run *kvm_run = svm->vcpu.run;
2138
d0bfb940
JK
2139 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2140 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
2141 kvm_run->debug.arch.exception = BP_VECTOR;
2142 return 0;
2143}
2144
851ba692 2145static int ud_interception(struct vcpu_svm *svm)
7aa81cc0
AL
2146{
2147 int er;
2148
51d8b661 2149 er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
7aa81cc0 2150 if (er != EMULATE_DONE)
7ee5d940 2151 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
7aa81cc0
AL
2152 return 1;
2153}
2154
54a20552
EN
2155static int ac_interception(struct vcpu_svm *svm)
2156{
2157 kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0);
2158 return 1;
2159}
2160
6b52d186 2161static void svm_fpu_activate(struct kvm_vcpu *vcpu)
7807fa6c 2162{
6b52d186 2163 struct vcpu_svm *svm = to_svm(vcpu);
66a562f7 2164
18c918c5 2165 clr_exception_intercept(svm, NM_VECTOR);
66a562f7 2166
e756fc62 2167 svm->vcpu.fpu_active = 1;
d225157b 2168 update_cr0_intercept(svm);
6b52d186 2169}
a2fa3e9f 2170
6b52d186
AK
2171static int nm_interception(struct vcpu_svm *svm)
2172{
2173 svm_fpu_activate(&svm->vcpu);
a2fa3e9f 2174 return 1;
7807fa6c
AL
2175}
2176
67ec6607
JR
2177static bool is_erratum_383(void)
2178{
2179 int err, i;
2180 u64 value;
2181
2182 if (!erratum_383_found)
2183 return false;
2184
2185 value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
2186 if (err)
2187 return false;
2188
2189 /* Bit 62 may or may not be set for this mce */
2190 value &= ~(1ULL << 62);
2191
2192 if (value != 0xb600000000010015ULL)
2193 return false;
2194
2195 /* Clear MCi_STATUS registers */
2196 for (i = 0; i < 6; ++i)
2197 native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
2198
2199 value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
2200 if (!err) {
2201 u32 low, high;
2202
2203 value &= ~(1ULL << 2);
2204 low = lower_32_bits(value);
2205 high = upper_32_bits(value);
2206
2207 native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
2208 }
2209
2210 /* Flush tlb to evict multi-match entries */
2211 __flush_tlb_all();
2212
2213 return true;
2214}
2215
fe5913e4 2216static void svm_handle_mce(struct vcpu_svm *svm)
53371b50 2217{
67ec6607
JR
2218 if (is_erratum_383()) {
2219 /*
2220 * Erratum 383 triggered. Guest state is corrupt so kill the
2221 * guest.
2222 */
2223 pr_err("KVM: Guest triggered AMD Erratum 383\n");
2224
a8eeb04a 2225 kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);
67ec6607
JR
2226
2227 return;
2228 }
2229
53371b50
JR
2230 /*
2231 * On an #MC intercept the MCE handler is not called automatically in
2232 * the host. So do it by hand here.
2233 */
2234 asm volatile (
2235 "int $0x12\n");
2236 /* not sure if we ever come back to this point */
2237
fe5913e4
JR
2238 return;
2239}
2240
2241static int mc_interception(struct vcpu_svm *svm)
2242{
53371b50
JR
2243 return 1;
2244}
2245
851ba692 2246static int shutdown_interception(struct vcpu_svm *svm)
46fe4ddd 2247{
851ba692
AK
2248 struct kvm_run *kvm_run = svm->vcpu.run;
2249
46fe4ddd
JR
2250 /*
2251 * VMCB is undefined after a SHUTDOWN intercept
2252 * so reinitialize it.
2253 */
a2fa3e9f 2254 clear_page(svm->vmcb);
5690891b 2255 init_vmcb(svm);
46fe4ddd
JR
2256
2257 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
2258 return 0;
2259}
2260
851ba692 2261static int io_interception(struct vcpu_svm *svm)
6aa8b732 2262{
cf8f70bf 2263 struct kvm_vcpu *vcpu = &svm->vcpu;
d77c26fc 2264 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
34c33d16 2265 int size, in, string;
039576c0 2266 unsigned port;
6aa8b732 2267
e756fc62 2268 ++svm->vcpu.stat.io_exits;
e70669ab 2269 string = (io_info & SVM_IOIO_STR_MASK) != 0;
039576c0 2270 in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
8370c3d0 2271 if (string)
51d8b661 2272 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
cf8f70bf 2273
039576c0
AK
2274 port = io_info >> 16;
2275 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
cf8f70bf 2276 svm->next_rip = svm->vmcb->control.exit_info_2;
e93f36bc 2277 skip_emulated_instruction(&svm->vcpu);
cf8f70bf 2278
8370c3d0
TL
2279 return in ? kvm_fast_pio_in(vcpu, size, port)
2280 : kvm_fast_pio_out(vcpu, size, port);
6aa8b732
AK
2281}
2282
851ba692 2283static int nmi_interception(struct vcpu_svm *svm)
c47f098d
JR
2284{
2285 return 1;
2286}
2287
851ba692 2288static int intr_interception(struct vcpu_svm *svm)
a0698055
JR
2289{
2290 ++svm->vcpu.stat.irq_exits;
2291 return 1;
2292}
2293
851ba692 2294static int nop_on_interception(struct vcpu_svm *svm)
6aa8b732
AK
2295{
2296 return 1;
2297}
2298
851ba692 2299static int halt_interception(struct vcpu_svm *svm)
6aa8b732 2300{
5fdbf976 2301 svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
e756fc62 2302 return kvm_emulate_halt(&svm->vcpu);
6aa8b732
AK
2303}
2304
851ba692 2305static int vmmcall_interception(struct vcpu_svm *svm)
02e235bc 2306{
5fdbf976 2307 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
0d9c055e 2308 return kvm_emulate_hypercall(&svm->vcpu);
02e235bc
AK
2309}
2310
5bd2edc3
JR
2311static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
2312{
2313 struct vcpu_svm *svm = to_svm(vcpu);
2314
2315 return svm->nested.nested_cr3;
2316}
2317
e4e517b4
AK
2318static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
2319{
2320 struct vcpu_svm *svm = to_svm(vcpu);
2321 u64 cr3 = svm->nested.nested_cr3;
2322 u64 pdpte;
2323 int ret;
2324
54bf36aa
PB
2325 ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte,
2326 offset_in_page(cr3) + index * 8, 8);
e4e517b4
AK
2327 if (ret)
2328 return 0;
2329 return pdpte;
2330}
2331
5bd2edc3
JR
2332static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
2333 unsigned long root)
2334{
2335 struct vcpu_svm *svm = to_svm(vcpu);
2336
2337 svm->vmcb->control.nested_cr3 = root;
b2747166 2338 mark_dirty(svm->vmcb, VMCB_NPT);
f40f6a45 2339 svm_flush_tlb(vcpu);
5bd2edc3
JR
2340}
2341
6389ee94
AK
2342static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
2343 struct x86_exception *fault)
5bd2edc3
JR
2344{
2345 struct vcpu_svm *svm = to_svm(vcpu);
2346
5e352519
PB
2347 if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) {
2348 /*
2349 * TODO: track the cause of the nested page fault, and
2350 * correctly fill in the high bits of exit_info_1.
2351 */
2352 svm->vmcb->control.exit_code = SVM_EXIT_NPF;
2353 svm->vmcb->control.exit_code_hi = 0;
2354 svm->vmcb->control.exit_info_1 = (1ULL << 32);
2355 svm->vmcb->control.exit_info_2 = fault->address;
2356 }
2357
2358 svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
2359 svm->vmcb->control.exit_info_1 |= fault->error_code;
2360
2361 /*
2362 * The present bit is always zero for page structure faults on real
2363 * hardware.
2364 */
2365 if (svm->vmcb->control.exit_info_1 & (2ULL << 32))
2366 svm->vmcb->control.exit_info_1 &= ~1;
5bd2edc3
JR
2367
2368 nested_svm_vmexit(svm);
2369}
2370
8a3c1a33 2371static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
4b16184c 2372{
ad896af0
PB
2373 WARN_ON(mmu_is_nested(vcpu));
2374 kvm_init_shadow_mmu(vcpu);
4b16184c
JR
2375 vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
2376 vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
e4e517b4 2377 vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr;
4b16184c
JR
2378 vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
2379 vcpu->arch.mmu.shadow_root_level = get_npt_level();
c258b62b 2380 reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
4b16184c 2381 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
4b16184c
JR
2382}
2383
2384static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
2385{
2386 vcpu->arch.walk_mmu = &vcpu->arch.mmu;
2387}
2388
c0725420
AG
2389static int nested_svm_check_permissions(struct vcpu_svm *svm)
2390{
f6801dff 2391 if (!(svm->vcpu.arch.efer & EFER_SVME)
c0725420
AG
2392 || !is_paging(&svm->vcpu)) {
2393 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2394 return 1;
2395 }
2396
2397 if (svm->vmcb->save.cpl) {
2398 kvm_inject_gp(&svm->vcpu, 0);
2399 return 1;
2400 }
2401
2402 return 0;
2403}
2404
cf74a78b
AG
2405static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
2406 bool has_error_code, u32 error_code)
2407{
b8e88bc8
JR
2408 int vmexit;
2409
2030753d 2410 if (!is_guest_mode(&svm->vcpu))
0295ad7d 2411 return 0;
cf74a78b 2412
0295ad7d
JR
2413 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
2414 svm->vmcb->control.exit_code_hi = 0;
2415 svm->vmcb->control.exit_info_1 = error_code;
2416 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
2417
b8e88bc8
JR
2418 vmexit = nested_svm_intercept(svm);
2419 if (vmexit == NESTED_EXIT_DONE)
2420 svm->nested.exit_required = true;
2421
2422 return vmexit;
cf74a78b
AG
2423}
2424
8fe54654
JR
2425/* This function returns true if it is save to enable the irq window */
2426static inline bool nested_svm_intr(struct vcpu_svm *svm)
cf74a78b 2427{
2030753d 2428 if (!is_guest_mode(&svm->vcpu))
8fe54654 2429 return true;
cf74a78b 2430
26666957 2431 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
8fe54654 2432 return true;
cf74a78b 2433
26666957 2434 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
8fe54654 2435 return false;
cf74a78b 2436
a0a07cd2
GN
2437 /*
2438 * if vmexit was already requested (by intercepted exception
2439 * for instance) do not overwrite it with "external interrupt"
2440 * vmexit.
2441 */
2442 if (svm->nested.exit_required)
2443 return false;
2444
197717d5
JR
2445 svm->vmcb->control.exit_code = SVM_EXIT_INTR;
2446 svm->vmcb->control.exit_info_1 = 0;
2447 svm->vmcb->control.exit_info_2 = 0;
26666957 2448
cd3ff653
JR
2449 if (svm->nested.intercept & 1ULL) {
2450 /*
2451 * The #vmexit can't be emulated here directly because this
c5ec2e56 2452 * code path runs with irqs and preemption disabled. A
cd3ff653
JR
2453 * #vmexit emulation might sleep. Only signal request for
2454 * the #vmexit here.
2455 */
2456 svm->nested.exit_required = true;
236649de 2457 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
8fe54654 2458 return false;
cf74a78b
AG
2459 }
2460
8fe54654 2461 return true;
cf74a78b
AG
2462}
2463
887f500c
JR
2464/* This function returns true if it is save to enable the nmi window */
2465static inline bool nested_svm_nmi(struct vcpu_svm *svm)
2466{
2030753d 2467 if (!is_guest_mode(&svm->vcpu))
887f500c
JR
2468 return true;
2469
2470 if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
2471 return true;
2472
2473 svm->vmcb->control.exit_code = SVM_EXIT_NMI;
2474 svm->nested.exit_required = true;
2475
2476 return false;
cf74a78b
AG
2477}
2478
7597f129 2479static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
34f80cfa
JR
2480{
2481 struct page *page;
2482
6c3bd3d7
JR
2483 might_sleep();
2484
54bf36aa 2485 page = kvm_vcpu_gfn_to_page(&svm->vcpu, gpa >> PAGE_SHIFT);
34f80cfa
JR
2486 if (is_error_page(page))
2487 goto error;
2488
7597f129
JR
2489 *_page = page;
2490
2491 return kmap(page);
34f80cfa
JR
2492
2493error:
34f80cfa
JR
2494 kvm_inject_gp(&svm->vcpu, 0);
2495
2496 return NULL;
2497}
2498
7597f129 2499static void nested_svm_unmap(struct page *page)
34f80cfa 2500{
7597f129 2501 kunmap(page);
34f80cfa
JR
2502 kvm_release_page_dirty(page);
2503}
34f80cfa 2504
ce2ac085
JR
2505static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
2506{
9bf41833
JK
2507 unsigned port, size, iopm_len;
2508 u16 val, mask;
2509 u8 start_bit;
ce2ac085 2510 u64 gpa;
34f80cfa 2511
ce2ac085
JR
2512 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
2513 return NESTED_EXIT_HOST;
34f80cfa 2514
ce2ac085 2515 port = svm->vmcb->control.exit_info_1 >> 16;
9bf41833
JK
2516 size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
2517 SVM_IOIO_SIZE_SHIFT;
ce2ac085 2518 gpa = svm->nested.vmcb_iopm + (port / 8);
9bf41833
JK
2519 start_bit = port % 8;
2520 iopm_len = (start_bit + size > 8) ? 2 : 1;
2521 mask = (0xf >> (4 - size)) << start_bit;
2522 val = 0;
ce2ac085 2523
54bf36aa 2524 if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
9bf41833 2525 return NESTED_EXIT_DONE;
ce2ac085 2526
9bf41833 2527 return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
34f80cfa
JR
2528}
2529
d2477826 2530static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
4c2161ae 2531{
0d6b3537
JR
2532 u32 offset, msr, value;
2533 int write, mask;
4c2161ae 2534
3d62d9aa 2535 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
d2477826 2536 return NESTED_EXIT_HOST;
3d62d9aa 2537
0d6b3537
JR
2538 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2539 offset = svm_msrpm_offset(msr);
2540 write = svm->vmcb->control.exit_info_1 & 1;
2541 mask = 1 << ((2 * (msr & 0xf)) + write);
3d62d9aa 2542
0d6b3537
JR
2543 if (offset == MSR_INVALID)
2544 return NESTED_EXIT_DONE;
4c2161ae 2545
0d6b3537
JR
2546 /* Offset is in 32 bit units but need in 8 bit units */
2547 offset *= 4;
4c2161ae 2548
54bf36aa 2549 if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4))
0d6b3537 2550 return NESTED_EXIT_DONE;
3d62d9aa 2551
0d6b3537 2552 return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
4c2161ae
JR
2553}
2554
410e4d57 2555static int nested_svm_exit_special(struct vcpu_svm *svm)
cf74a78b 2556{
cf74a78b 2557 u32 exit_code = svm->vmcb->control.exit_code;
4c2161ae 2558
410e4d57
JR
2559 switch (exit_code) {
2560 case SVM_EXIT_INTR:
2561 case SVM_EXIT_NMI:
ff47a49b 2562 case SVM_EXIT_EXCP_BASE + MC_VECTOR:
410e4d57 2563 return NESTED_EXIT_HOST;
410e4d57 2564 case SVM_EXIT_NPF:
e0231715 2565 /* For now we are always handling NPFs when using them */
410e4d57
JR
2566 if (npt_enabled)
2567 return NESTED_EXIT_HOST;
2568 break;
410e4d57 2569 case SVM_EXIT_EXCP_BASE + PF_VECTOR:
631bc487
GN
2570 /* When we're shadowing, trap PFs, but not async PF */
2571 if (!npt_enabled && svm->apf_reason == 0)
410e4d57
JR
2572 return NESTED_EXIT_HOST;
2573 break;
66a562f7
JR
2574 case SVM_EXIT_EXCP_BASE + NM_VECTOR:
2575 nm_interception(svm);
2576 break;
410e4d57
JR
2577 default:
2578 break;
cf74a78b
AG
2579 }
2580
410e4d57
JR
2581 return NESTED_EXIT_CONTINUE;
2582}
2583
2584/*
2585 * If this function returns true, this #vmexit was already handled
2586 */
b8e88bc8 2587static int nested_svm_intercept(struct vcpu_svm *svm)
410e4d57
JR
2588{
2589 u32 exit_code = svm->vmcb->control.exit_code;
2590 int vmexit = NESTED_EXIT_HOST;
2591
cf74a78b 2592 switch (exit_code) {
9c4e40b9 2593 case SVM_EXIT_MSR:
3d62d9aa 2594 vmexit = nested_svm_exit_handled_msr(svm);
9c4e40b9 2595 break;
ce2ac085
JR
2596 case SVM_EXIT_IOIO:
2597 vmexit = nested_svm_intercept_ioio(svm);
2598 break;
4ee546b4
RJ
2599 case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
2600 u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
2601 if (svm->nested.intercept_cr & bit)
410e4d57 2602 vmexit = NESTED_EXIT_DONE;
cf74a78b
AG
2603 break;
2604 }
3aed041a
JR
2605 case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
2606 u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
2607 if (svm->nested.intercept_dr & bit)
410e4d57 2608 vmexit = NESTED_EXIT_DONE;
cf74a78b
AG
2609 break;
2610 }
2611 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
2612 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
aad42c64 2613 if (svm->nested.intercept_exceptions & excp_bits)
410e4d57 2614 vmexit = NESTED_EXIT_DONE;
631bc487
GN
2615 /* async page fault always cause vmexit */
2616 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
2617 svm->apf_reason != 0)
2618 vmexit = NESTED_EXIT_DONE;
cf74a78b
AG
2619 break;
2620 }
228070b1
JR
2621 case SVM_EXIT_ERR: {
2622 vmexit = NESTED_EXIT_DONE;
2623 break;
2624 }
cf74a78b
AG
2625 default: {
2626 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
aad42c64 2627 if (svm->nested.intercept & exit_bits)
410e4d57 2628 vmexit = NESTED_EXIT_DONE;
cf74a78b
AG
2629 }
2630 }
2631
b8e88bc8
JR
2632 return vmexit;
2633}
2634
2635static int nested_svm_exit_handled(struct vcpu_svm *svm)
2636{
2637 int vmexit;
2638
2639 vmexit = nested_svm_intercept(svm);
2640
2641 if (vmexit == NESTED_EXIT_DONE)
9c4e40b9 2642 nested_svm_vmexit(svm);
9c4e40b9
JR
2643
2644 return vmexit;
cf74a78b
AG
2645}
2646
0460a979
JR
2647static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
2648{
2649 struct vmcb_control_area *dst = &dst_vmcb->control;
2650 struct vmcb_control_area *from = &from_vmcb->control;
2651
4ee546b4 2652 dst->intercept_cr = from->intercept_cr;
3aed041a 2653 dst->intercept_dr = from->intercept_dr;
0460a979
JR
2654 dst->intercept_exceptions = from->intercept_exceptions;
2655 dst->intercept = from->intercept;
2656 dst->iopm_base_pa = from->iopm_base_pa;
2657 dst->msrpm_base_pa = from->msrpm_base_pa;
2658 dst->tsc_offset = from->tsc_offset;
2659 dst->asid = from->asid;
2660 dst->tlb_ctl = from->tlb_ctl;
2661 dst->int_ctl = from->int_ctl;
2662 dst->int_vector = from->int_vector;
2663 dst->int_state = from->int_state;
2664 dst->exit_code = from->exit_code;
2665 dst->exit_code_hi = from->exit_code_hi;
2666 dst->exit_info_1 = from->exit_info_1;
2667 dst->exit_info_2 = from->exit_info_2;
2668 dst->exit_int_info = from->exit_int_info;
2669 dst->exit_int_info_err = from->exit_int_info_err;
2670 dst->nested_ctl = from->nested_ctl;
2671 dst->event_inj = from->event_inj;
2672 dst->event_inj_err = from->event_inj_err;
2673 dst->nested_cr3 = from->nested_cr3;
2674 dst->lbr_ctl = from->lbr_ctl;
2675}
2676
34f80cfa 2677static int nested_svm_vmexit(struct vcpu_svm *svm)
cf74a78b 2678{
34f80cfa 2679 struct vmcb *nested_vmcb;
e6aa9abd 2680 struct vmcb *hsave = svm->nested.hsave;
33740e40 2681 struct vmcb *vmcb = svm->vmcb;
7597f129 2682 struct page *page;
cf74a78b 2683
17897f36
JR
2684 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
2685 vmcb->control.exit_info_1,
2686 vmcb->control.exit_info_2,
2687 vmcb->control.exit_int_info,
e097e5ff
SH
2688 vmcb->control.exit_int_info_err,
2689 KVM_ISA_SVM);
17897f36 2690
7597f129 2691 nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
34f80cfa
JR
2692 if (!nested_vmcb)
2693 return 1;
2694
2030753d
JR
2695 /* Exit Guest-Mode */
2696 leave_guest_mode(&svm->vcpu);
06fc7772
JR
2697 svm->nested.vmcb = 0;
2698
cf74a78b 2699 /* Give the current vmcb to the guest */
33740e40
JR
2700 disable_gif(svm);
2701
2702 nested_vmcb->save.es = vmcb->save.es;
2703 nested_vmcb->save.cs = vmcb->save.cs;
2704 nested_vmcb->save.ss = vmcb->save.ss;
2705 nested_vmcb->save.ds = vmcb->save.ds;
2706 nested_vmcb->save.gdtr = vmcb->save.gdtr;
2707 nested_vmcb->save.idtr = vmcb->save.idtr;
3f6a9d16 2708 nested_vmcb->save.efer = svm->vcpu.arch.efer;
cdbbdc12 2709 nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu);
9f8fe504 2710 nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu);
33740e40 2711 nested_vmcb->save.cr2 = vmcb->save.cr2;
cdbbdc12 2712 nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
f6e78475 2713 nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
33740e40
JR
2714 nested_vmcb->save.rip = vmcb->save.rip;
2715 nested_vmcb->save.rsp = vmcb->save.rsp;
2716 nested_vmcb->save.rax = vmcb->save.rax;
2717 nested_vmcb->save.dr7 = vmcb->save.dr7;
2718 nested_vmcb->save.dr6 = vmcb->save.dr6;
2719 nested_vmcb->save.cpl = vmcb->save.cpl;
2720
2721 nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
2722 nested_vmcb->control.int_vector = vmcb->control.int_vector;
2723 nested_vmcb->control.int_state = vmcb->control.int_state;
2724 nested_vmcb->control.exit_code = vmcb->control.exit_code;
2725 nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi;
2726 nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1;
2727 nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
2728 nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
2729 nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
6092d3d3
JR
2730
2731 if (svm->nrips_enabled)
2732 nested_vmcb->control.next_rip = vmcb->control.next_rip;
8d23c466
AG
2733
2734 /*
2735 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
2736 * to make sure that we do not lose injected events. So check event_inj
2737 * here and copy it to exit_int_info if it is valid.
2738 * Exit_int_info and event_inj can't be both valid because the case
2739 * below only happens on a VMRUN instruction intercept which has
2740 * no valid exit_int_info set.
2741 */
2742 if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
2743 struct vmcb_control_area *nc = &nested_vmcb->control;
2744
2745 nc->exit_int_info = vmcb->control.event_inj;
2746 nc->exit_int_info_err = vmcb->control.event_inj_err;
2747 }
2748
33740e40
JR
2749 nested_vmcb->control.tlb_ctl = 0;
2750 nested_vmcb->control.event_inj = 0;
2751 nested_vmcb->control.event_inj_err = 0;
cf74a78b
AG
2752
2753 /* We always set V_INTR_MASKING and remember the old value in hflags */
2754 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
2755 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
2756
cf74a78b 2757 /* Restore the original control entries */
0460a979 2758 copy_vmcb_control_area(vmcb, hsave);
cf74a78b 2759
219b65dc
AG
2760 kvm_clear_exception_queue(&svm->vcpu);
2761 kvm_clear_interrupt_queue(&svm->vcpu);
cf74a78b 2762
4b16184c
JR
2763 svm->nested.nested_cr3 = 0;
2764
cf74a78b
AG
2765 /* Restore selected save entries */
2766 svm->vmcb->save.es = hsave->save.es;
2767 svm->vmcb->save.cs = hsave->save.cs;
2768 svm->vmcb->save.ss = hsave->save.ss;
2769 svm->vmcb->save.ds = hsave->save.ds;
2770 svm->vmcb->save.gdtr = hsave->save.gdtr;
2771 svm->vmcb->save.idtr = hsave->save.idtr;
f6e78475 2772 kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
cf74a78b
AG
2773 svm_set_efer(&svm->vcpu, hsave->save.efer);
2774 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
2775 svm_set_cr4(&svm->vcpu, hsave->save.cr4);
2776 if (npt_enabled) {
2777 svm->vmcb->save.cr3 = hsave->save.cr3;
2778 svm->vcpu.arch.cr3 = hsave->save.cr3;
2779 } else {
2390218b 2780 (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
cf74a78b
AG
2781 }
2782 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
2783 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
2784 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
2785 svm->vmcb->save.dr7 = 0;
2786 svm->vmcb->save.cpl = 0;
2787 svm->vmcb->control.exit_int_info = 0;
2788
8d28fec4
RJ
2789 mark_all_dirty(svm->vmcb);
2790
7597f129 2791 nested_svm_unmap(page);
cf74a78b 2792
4b16184c 2793 nested_svm_uninit_mmu_context(&svm->vcpu);
cf74a78b
AG
2794 kvm_mmu_reset_context(&svm->vcpu);
2795 kvm_mmu_load(&svm->vcpu);
2796
2797 return 0;
2798}
3d6368ef 2799
9738b2c9 2800static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
3d6368ef 2801{
323c3d80
JR
2802 /*
2803 * This function merges the msr permission bitmaps of kvm and the
c5ec2e56 2804 * nested vmcb. It is optimized in that it only merges the parts where
323c3d80
JR
2805 * the kvm msr permission bitmap may contain zero bits
2806 */
3d6368ef 2807 int i;
9738b2c9 2808
323c3d80
JR
2809 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
2810 return true;
9738b2c9 2811
323c3d80
JR
2812 for (i = 0; i < MSRPM_OFFSETS; i++) {
2813 u32 value, p;
2814 u64 offset;
9738b2c9 2815
323c3d80
JR
2816 if (msrpm_offsets[i] == 0xffffffff)
2817 break;
3d6368ef 2818
0d6b3537
JR
2819 p = msrpm_offsets[i];
2820 offset = svm->nested.vmcb_msrpm + (p * 4);
323c3d80 2821
54bf36aa 2822 if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
323c3d80
JR
2823 return false;
2824
2825 svm->nested.msrpm[p] = svm->msrpm[p] | value;
2826 }
3d6368ef 2827
323c3d80 2828 svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
9738b2c9
JR
2829
2830 return true;
3d6368ef
AG
2831}
2832
52c65a30
JR
2833static bool nested_vmcb_checks(struct vmcb *vmcb)
2834{
2835 if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
2836 return false;
2837
dbe77584
JR
2838 if (vmcb->control.asid == 0)
2839 return false;
2840
4b16184c
JR
2841 if (vmcb->control.nested_ctl && !npt_enabled)
2842 return false;
2843
52c65a30
JR
2844 return true;
2845}
2846
9738b2c9 2847static bool nested_svm_vmrun(struct vcpu_svm *svm)
3d6368ef 2848{
9738b2c9 2849 struct vmcb *nested_vmcb;
e6aa9abd 2850 struct vmcb *hsave = svm->nested.hsave;
defbba56 2851 struct vmcb *vmcb = svm->vmcb;
7597f129 2852 struct page *page;
06fc7772 2853 u64 vmcb_gpa;
3d6368ef 2854
06fc7772 2855 vmcb_gpa = svm->vmcb->save.rax;
3d6368ef 2856
7597f129 2857 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
9738b2c9
JR
2858 if (!nested_vmcb)
2859 return false;
2860
52c65a30
JR
2861 if (!nested_vmcb_checks(nested_vmcb)) {
2862 nested_vmcb->control.exit_code = SVM_EXIT_ERR;
2863 nested_vmcb->control.exit_code_hi = 0;
2864 nested_vmcb->control.exit_info_1 = 0;
2865 nested_vmcb->control.exit_info_2 = 0;
2866
2867 nested_svm_unmap(page);
2868
2869 return false;
2870 }
2871
b75f4eb3 2872 trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
0ac406de
JR
2873 nested_vmcb->save.rip,
2874 nested_vmcb->control.int_ctl,
2875 nested_vmcb->control.event_inj,
2876 nested_vmcb->control.nested_ctl);
2877
4ee546b4
RJ
2878 trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
2879 nested_vmcb->control.intercept_cr >> 16,
2e554e8d
JR
2880 nested_vmcb->control.intercept_exceptions,
2881 nested_vmcb->control.intercept);
2882
3d6368ef 2883 /* Clear internal status */
219b65dc
AG
2884 kvm_clear_exception_queue(&svm->vcpu);
2885 kvm_clear_interrupt_queue(&svm->vcpu);
3d6368ef 2886
e0231715
JR
2887 /*
2888 * Save the old vmcb, so we don't need to pick what we save, but can
2889 * restore everything when a VMEXIT occurs
2890 */
defbba56
JR
2891 hsave->save.es = vmcb->save.es;
2892 hsave->save.cs = vmcb->save.cs;
2893 hsave->save.ss = vmcb->save.ss;
2894 hsave->save.ds = vmcb->save.ds;
2895 hsave->save.gdtr = vmcb->save.gdtr;
2896 hsave->save.idtr = vmcb->save.idtr;
f6801dff 2897 hsave->save.efer = svm->vcpu.arch.efer;
4d4ec087 2898 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
defbba56 2899 hsave->save.cr4 = svm->vcpu.arch.cr4;
f6e78475 2900 hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
b75f4eb3 2901 hsave->save.rip = kvm_rip_read(&svm->vcpu);
defbba56
JR
2902 hsave->save.rsp = vmcb->save.rsp;
2903 hsave->save.rax = vmcb->save.rax;
2904 if (npt_enabled)
2905 hsave->save.cr3 = vmcb->save.cr3;
2906 else
9f8fe504 2907 hsave->save.cr3 = kvm_read_cr3(&svm->vcpu);
defbba56 2908
0460a979 2909 copy_vmcb_control_area(hsave, vmcb);
3d6368ef 2910
f6e78475 2911 if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
3d6368ef
AG
2912 svm->vcpu.arch.hflags |= HF_HIF_MASK;
2913 else
2914 svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
2915
4b16184c
JR
2916 if (nested_vmcb->control.nested_ctl) {
2917 kvm_mmu_unload(&svm->vcpu);
2918 svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
2919 nested_svm_init_mmu_context(&svm->vcpu);
2920 }
2921
3d6368ef
AG
2922 /* Load the nested guest state */
2923 svm->vmcb->save.es = nested_vmcb->save.es;
2924 svm->vmcb->save.cs = nested_vmcb->save.cs;
2925 svm->vmcb->save.ss = nested_vmcb->save.ss;
2926 svm->vmcb->save.ds = nested_vmcb->save.ds;
2927 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
2928 svm->vmcb->save.idtr = nested_vmcb->save.idtr;
f6e78475 2929 kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
3d6368ef
AG
2930 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
2931 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
2932 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
2933 if (npt_enabled) {
2934 svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
2935 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
0e5cbe36 2936 } else
2390218b 2937 (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
0e5cbe36
JR
2938
2939 /* Guest paging mode is active - reset mmu */
2940 kvm_mmu_reset_context(&svm->vcpu);
2941
defbba56 2942 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
3d6368ef
AG
2943 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
2944 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
2945 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
e0231715 2946
3d6368ef
AG
2947 /* In case we don't even reach vcpu_run, the fields are not updated */
2948 svm->vmcb->save.rax = nested_vmcb->save.rax;
2949 svm->vmcb->save.rsp = nested_vmcb->save.rsp;
2950 svm->vmcb->save.rip = nested_vmcb->save.rip;
2951 svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
2952 svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
2953 svm->vmcb->save.cpl = nested_vmcb->save.cpl;
2954
f7138538 2955 svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
ce2ac085 2956 svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
3d6368ef 2957
aad42c64 2958 /* cache intercepts */
4ee546b4 2959 svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
3aed041a 2960 svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
aad42c64
JR
2961 svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
2962 svm->nested.intercept = nested_vmcb->control.intercept;
2963
f40f6a45 2964 svm_flush_tlb(&svm->vcpu);
3d6368ef 2965 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
3d6368ef
AG
2966 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
2967 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
2968 else
2969 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
2970
88ab24ad
JR
2971 if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
2972 /* We only want the cr8 intercept bits of the guest */
4ee546b4
RJ
2973 clr_cr_intercept(svm, INTERCEPT_CR8_READ);
2974 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
88ab24ad
JR
2975 }
2976
0d945bd9 2977 /* We don't want to see VMMCALLs from a nested guest */
8a05a1b8 2978 clr_intercept(svm, INTERCEPT_VMMCALL);
0d945bd9 2979
88ab24ad 2980 svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
3d6368ef
AG
2981 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
2982 svm->vmcb->control.int_state = nested_vmcb->control.int_state;
2983 svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
3d6368ef
AG
2984 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
2985 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
2986
7597f129 2987 nested_svm_unmap(page);
9738b2c9 2988
2030753d
JR
2989 /* Enter Guest-Mode */
2990 enter_guest_mode(&svm->vcpu);
2991
384c6368
JR
2992 /*
2993 * Merge guest and host intercepts - must be called with vcpu in
2994 * guest-mode to take affect here
2995 */
2996 recalc_intercepts(svm);
2997
06fc7772 2998 svm->nested.vmcb = vmcb_gpa;
9738b2c9 2999
2af9194d 3000 enable_gif(svm);
3d6368ef 3001
8d28fec4
RJ
3002 mark_all_dirty(svm->vmcb);
3003
9738b2c9 3004 return true;
3d6368ef
AG
3005}
3006
9966bf68 3007static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
5542675b
AG
3008{
3009 to_vmcb->save.fs = from_vmcb->save.fs;
3010 to_vmcb->save.gs = from_vmcb->save.gs;
3011 to_vmcb->save.tr = from_vmcb->save.tr;
3012 to_vmcb->save.ldtr = from_vmcb->save.ldtr;
3013 to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
3014 to_vmcb->save.star = from_vmcb->save.star;
3015 to_vmcb->save.lstar = from_vmcb->save.lstar;
3016 to_vmcb->save.cstar = from_vmcb->save.cstar;
3017 to_vmcb->save.sfmask = from_vmcb->save.sfmask;
3018 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
3019 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
3020 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
5542675b
AG
3021}
3022
851ba692 3023static int vmload_interception(struct vcpu_svm *svm)
5542675b 3024{
9966bf68 3025 struct vmcb *nested_vmcb;
7597f129 3026 struct page *page;
9966bf68 3027
5542675b
AG
3028 if (nested_svm_check_permissions(svm))
3029 return 1;
3030
7597f129 3031 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
9966bf68
JR
3032 if (!nested_vmcb)
3033 return 1;
3034
e3e9ed3d
JR
3035 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
3036 skip_emulated_instruction(&svm->vcpu);
3037
9966bf68 3038 nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
7597f129 3039 nested_svm_unmap(page);
5542675b
AG
3040
3041 return 1;
3042}
3043
851ba692 3044static int vmsave_interception(struct vcpu_svm *svm)
5542675b 3045{
9966bf68 3046 struct vmcb *nested_vmcb;
7597f129 3047 struct page *page;
9966bf68 3048
5542675b
AG
3049 if (nested_svm_check_permissions(svm))
3050 return 1;
3051
7597f129 3052 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
9966bf68
JR
3053 if (!nested_vmcb)
3054 return 1;
3055
e3e9ed3d
JR
3056 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
3057 skip_emulated_instruction(&svm->vcpu);
3058
9966bf68 3059 nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
7597f129 3060 nested_svm_unmap(page);
5542675b
AG
3061
3062 return 1;
3063}
3064
851ba692 3065static int vmrun_interception(struct vcpu_svm *svm)
3d6368ef 3066{
3d6368ef
AG
3067 if (nested_svm_check_permissions(svm))
3068 return 1;
3069
b75f4eb3
RJ
3070 /* Save rip after vmrun instruction */
3071 kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
3d6368ef 3072
9738b2c9 3073 if (!nested_svm_vmrun(svm))
3d6368ef
AG
3074 return 1;
3075
9738b2c9 3076 if (!nested_svm_vmrun_msrpm(svm))
1f8da478
JR
3077 goto failed;
3078
3079 return 1;
3080
3081failed:
3082
3083 svm->vmcb->control.exit_code = SVM_EXIT_ERR;
3084 svm->vmcb->control.exit_code_hi = 0;
3085 svm->vmcb->control.exit_info_1 = 0;
3086 svm->vmcb->control.exit_info_2 = 0;
3087
3088 nested_svm_vmexit(svm);
3d6368ef
AG
3089
3090 return 1;
3091}
3092
851ba692 3093static int stgi_interception(struct vcpu_svm *svm)
1371d904
AG
3094{
3095 if (nested_svm_check_permissions(svm))
3096 return 1;
3097
3098 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
3099 skip_emulated_instruction(&svm->vcpu);
3842d135 3100 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
1371d904 3101
2af9194d 3102 enable_gif(svm);
1371d904
AG
3103
3104 return 1;
3105}
3106
851ba692 3107static int clgi_interception(struct vcpu_svm *svm)
1371d904
AG
3108{
3109 if (nested_svm_check_permissions(svm))
3110 return 1;
3111
3112 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
3113 skip_emulated_instruction(&svm->vcpu);
3114
2af9194d 3115 disable_gif(svm);
1371d904
AG
3116
3117 /* After a CLGI no interrupts should come */
340d3bc3
SS
3118 if (!kvm_vcpu_apicv_active(&svm->vcpu)) {
3119 svm_clear_vintr(svm);
3120 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
3121 mark_dirty(svm->vmcb, VMCB_INTR);
3122 }
decdbf6a 3123
1371d904
AG
3124 return 1;
3125}
3126
851ba692 3127static int invlpga_interception(struct vcpu_svm *svm)
ff092385
AG
3128{
3129 struct kvm_vcpu *vcpu = &svm->vcpu;
ff092385 3130
668f198f
DK
3131 trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX),
3132 kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
ec1ff790 3133
ff092385 3134 /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
668f198f 3135 kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
ff092385
AG
3136
3137 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
3138 skip_emulated_instruction(&svm->vcpu);
3139 return 1;
3140}
3141
532a46b9
JR
3142static int skinit_interception(struct vcpu_svm *svm)
3143{
668f198f 3144 trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
532a46b9
JR
3145
3146 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3147 return 1;
3148}
3149
dab429a7
DK
3150static int wbinvd_interception(struct vcpu_svm *svm)
3151{
6affcbed 3152 return kvm_emulate_wbinvd(&svm->vcpu);
dab429a7
DK
3153}
3154
81dd35d4
JR
3155static int xsetbv_interception(struct vcpu_svm *svm)
3156{
3157 u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
3158 u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
3159
3160 if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
3161 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
3162 skip_emulated_instruction(&svm->vcpu);
3163 }
3164
3165 return 1;
3166}
3167
851ba692 3168static int task_switch_interception(struct vcpu_svm *svm)
6aa8b732 3169{
37817f29 3170 u16 tss_selector;
64a7ec06
GN
3171 int reason;
3172 int int_type = svm->vmcb->control.exit_int_info &
3173 SVM_EXITINTINFO_TYPE_MASK;
8317c298 3174 int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
fe8e7f83
GN
3175 uint32_t type =
3176 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
3177 uint32_t idt_v =
3178 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
e269fb21
JK
3179 bool has_error_code = false;
3180 u32 error_code = 0;
37817f29
IE
3181
3182 tss_selector = (u16)svm->vmcb->control.exit_info_1;
64a7ec06 3183
37817f29
IE
3184 if (svm->vmcb->control.exit_info_2 &
3185 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
64a7ec06
GN
3186 reason = TASK_SWITCH_IRET;
3187 else if (svm->vmcb->control.exit_info_2 &
3188 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
3189 reason = TASK_SWITCH_JMP;
fe8e7f83 3190 else if (idt_v)
64a7ec06
GN
3191 reason = TASK_SWITCH_GATE;
3192 else
3193 reason = TASK_SWITCH_CALL;
3194
fe8e7f83
GN
3195 if (reason == TASK_SWITCH_GATE) {
3196 switch (type) {
3197 case SVM_EXITINTINFO_TYPE_NMI:
3198 svm->vcpu.arch.nmi_injected = false;
3199 break;
3200 case SVM_EXITINTINFO_TYPE_EXEPT:
e269fb21
JK
3201 if (svm->vmcb->control.exit_info_2 &
3202 (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) {
3203 has_error_code = true;
3204 error_code =
3205 (u32)svm->vmcb->control.exit_info_2;
3206 }
fe8e7f83
GN
3207 kvm_clear_exception_queue(&svm->vcpu);
3208 break;
3209 case SVM_EXITINTINFO_TYPE_INTR:
3210 kvm_clear_interrupt_queue(&svm->vcpu);
3211 break;
3212 default:
3213 break;
3214 }
3215 }
64a7ec06 3216
8317c298
GN
3217 if (reason != TASK_SWITCH_GATE ||
3218 int_type == SVM_EXITINTINFO_TYPE_SOFT ||
3219 (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
f629cf84
GN
3220 (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
3221 skip_emulated_instruction(&svm->vcpu);
64a7ec06 3222
7f3d35fd
KW
3223 if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
3224 int_vec = -1;
3225
3226 if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
acb54517
GN
3227 has_error_code, error_code) == EMULATE_FAIL) {
3228 svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3229 svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
3230 svm->vcpu.run->internal.ndata = 0;
3231 return 0;
3232 }
3233 return 1;
6aa8b732
AK
3234}
3235
851ba692 3236static int cpuid_interception(struct vcpu_svm *svm)
6aa8b732 3237{
5fdbf976 3238 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
6a908b62 3239 return kvm_emulate_cpuid(&svm->vcpu);
6aa8b732
AK
3240}
3241
851ba692 3242static int iret_interception(struct vcpu_svm *svm)
95ba8273
GN
3243{
3244 ++svm->vcpu.stat.nmi_window_exits;
8a05a1b8 3245 clr_intercept(svm, INTERCEPT_IRET);
44c11430 3246 svm->vcpu.arch.hflags |= HF_IRET_MASK;
bd3d1ec3 3247 svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
f303b4ce 3248 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
95ba8273
GN
3249 return 1;
3250}
3251
851ba692 3252static int invlpg_interception(struct vcpu_svm *svm)
a7052897 3253{
df4f3108
AP
3254 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
3255 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
3256
3257 kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
3258 skip_emulated_instruction(&svm->vcpu);
3259 return 1;
a7052897
MT
3260}
3261
851ba692 3262static int emulate_on_interception(struct vcpu_svm *svm)
6aa8b732 3263{
51d8b661 3264 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
6aa8b732
AK
3265}
3266
332b56e4
AK
3267static int rdpmc_interception(struct vcpu_svm *svm)
3268{
3269 int err;
3270
3271 if (!static_cpu_has(X86_FEATURE_NRIPS))
3272 return emulate_on_interception(svm);
3273
3274 err = kvm_rdpmc(&svm->vcpu);
6affcbed 3275 return kvm_complete_insn_gp(&svm->vcpu, err);
332b56e4
AK
3276}
3277
52eb5a6d
XL
3278static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
3279 unsigned long val)
628afd2a
JR
3280{
3281 unsigned long cr0 = svm->vcpu.arch.cr0;
3282 bool ret = false;
3283 u64 intercept;
3284
3285 intercept = svm->nested.intercept;
3286
3287 if (!is_guest_mode(&svm->vcpu) ||
3288 (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
3289 return false;
3290
3291 cr0 &= ~SVM_CR0_SELECTIVE_MASK;
3292 val &= ~SVM_CR0_SELECTIVE_MASK;
3293
3294 if (cr0 ^ val) {
3295 svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
3296 ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
3297 }
3298
3299 return ret;
3300}
3301
7ff76d58
AP
3302#define CR_VALID (1ULL << 63)
3303
3304static int cr_interception(struct vcpu_svm *svm)
3305{
3306 int reg, cr;
3307 unsigned long val;
3308 int err;
3309
3310 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
3311 return emulate_on_interception(svm);
3312
3313 if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
3314 return emulate_on_interception(svm);
3315
3316 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
5e57518d
DK
3317 if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE)
3318 cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0;
3319 else
3320 cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
7ff76d58
AP
3321
3322 err = 0;
3323 if (cr >= 16) { /* mov to cr */
3324 cr -= 16;
3325 val = kvm_register_read(&svm->vcpu, reg);
3326 switch (cr) {
3327 case 0:
628afd2a
JR
3328 if (!check_selective_cr0_intercepted(svm, val))
3329 err = kvm_set_cr0(&svm->vcpu, val);
977b2d03
JR
3330 else
3331 return 1;
3332
7ff76d58
AP
3333 break;
3334 case 3:
3335 err = kvm_set_cr3(&svm->vcpu, val);
3336 break;
3337 case 4:
3338 err = kvm_set_cr4(&svm->vcpu, val);
3339 break;
3340 case 8:
3341 err = kvm_set_cr8(&svm->vcpu, val);
3342 break;
3343 default:
3344 WARN(1, "unhandled write to CR%d", cr);
3345 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3346 return 1;
3347 }
3348 } else { /* mov from cr */
3349 switch (cr) {
3350 case 0:
3351 val = kvm_read_cr0(&svm->vcpu);
3352 break;
3353 case 2:
3354 val = svm->vcpu.arch.cr2;
3355 break;
3356 case 3:
9f8fe504 3357 val = kvm_read_cr3(&svm->vcpu);
7ff76d58
AP
3358 break;
3359 case 4:
3360 val = kvm_read_cr4(&svm->vcpu);
3361 break;
3362 case 8:
3363 val = kvm_get_cr8(&svm->vcpu);
3364 break;
3365 default:
3366 WARN(1, "unhandled read from CR%d", cr);
3367 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3368 return 1;
3369 }
3370 kvm_register_write(&svm->vcpu, reg, val);
3371 }
6affcbed 3372 return kvm_complete_insn_gp(&svm->vcpu, err);
7ff76d58
AP
3373}
3374
cae3797a
AP
3375static int dr_interception(struct vcpu_svm *svm)
3376{
3377 int reg, dr;
3378 unsigned long val;
cae3797a 3379
facb0139
PB
3380 if (svm->vcpu.guest_debug == 0) {
3381 /*
3382 * No more DR vmexits; force a reload of the debug registers
3383 * and reenter on this instruction. The next vmexit will
3384 * retrieve the full state of the debug registers.
3385 */
3386 clr_dr_intercepts(svm);
3387 svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
3388 return 1;
3389 }
3390
cae3797a
AP
3391 if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
3392 return emulate_on_interception(svm);
3393
3394 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
3395 dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
3396
3397 if (dr >= 16) { /* mov to DRn */
16f8a6f9
NA
3398 if (!kvm_require_dr(&svm->vcpu, dr - 16))
3399 return 1;
cae3797a
AP
3400 val = kvm_register_read(&svm->vcpu, reg);
3401 kvm_set_dr(&svm->vcpu, dr - 16, val);
3402 } else {
16f8a6f9
NA
3403 if (!kvm_require_dr(&svm->vcpu, dr))
3404 return 1;
3405 kvm_get_dr(&svm->vcpu, dr, &val);
3406 kvm_register_write(&svm->vcpu, reg, val);
cae3797a
AP
3407 }
3408
2c46d2ae
JR
3409 skip_emulated_instruction(&svm->vcpu);
3410
cae3797a
AP
3411 return 1;
3412}
3413
851ba692 3414static int cr8_write_interception(struct vcpu_svm *svm)
1d075434 3415{
851ba692 3416 struct kvm_run *kvm_run = svm->vcpu.run;
eea1cff9 3417 int r;
851ba692 3418
0a5fff19
GN
3419 u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
3420 /* instruction emulation calls kvm_set_cr8() */
7ff76d58 3421 r = cr_interception(svm);
35754c98 3422 if (lapic_in_kernel(&svm->vcpu))
7ff76d58 3423 return r;
0a5fff19 3424 if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
7ff76d58 3425 return r;
1d075434
JR
3426 kvm_run->exit_reason = KVM_EXIT_SET_TPR;
3427 return 0;
3428}
3429
609e36d3 3430static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
6aa8b732 3431{
a2fa3e9f
GH
3432 struct vcpu_svm *svm = to_svm(vcpu);
3433
609e36d3 3434 switch (msr_info->index) {
af24a4e4 3435 case MSR_IA32_TSC: {
609e36d3 3436 msr_info->data = svm->vmcb->control.tsc_offset +
35181e86 3437 kvm_scale_tsc(vcpu, rdtsc());
fbc0db76 3438
6aa8b732
AK
3439 break;
3440 }
8c06585d 3441 case MSR_STAR:
609e36d3 3442 msr_info->data = svm->vmcb->save.star;
6aa8b732 3443 break;
0e859cac 3444#ifdef CONFIG_X86_64
6aa8b732 3445 case MSR_LSTAR:
609e36d3 3446 msr_info->data = svm->vmcb->save.lstar;
6aa8b732
AK
3447 break;
3448 case MSR_CSTAR:
609e36d3 3449 msr_info->data = svm->vmcb->save.cstar;
6aa8b732
AK
3450 break;
3451 case MSR_KERNEL_GS_BASE:
609e36d3 3452 msr_info->data = svm->vmcb->save.kernel_gs_base;
6aa8b732
AK
3453 break;
3454 case MSR_SYSCALL_MASK:
609e36d3 3455 msr_info->data = svm->vmcb->save.sfmask;
6aa8b732
AK
3456 break;
3457#endif
3458 case MSR_IA32_SYSENTER_CS:
609e36d3 3459 msr_info->data = svm->vmcb->save.sysenter_cs;
6aa8b732
AK
3460 break;
3461 case MSR_IA32_SYSENTER_EIP:
609e36d3 3462 msr_info->data = svm->sysenter_eip;
6aa8b732
AK
3463 break;
3464 case MSR_IA32_SYSENTER_ESP:
609e36d3 3465 msr_info->data = svm->sysenter_esp;
6aa8b732 3466 break;
46896c73
PB
3467 case MSR_TSC_AUX:
3468 if (!boot_cpu_has(X86_FEATURE_RDTSCP))
3469 return 1;
3470 msr_info->data = svm->tsc_aux;
3471 break;
e0231715
JR
3472 /*
3473 * Nobody will change the following 5 values in the VMCB so we can
3474 * safely return them on rdmsr. They will always be 0 until LBRV is
3475 * implemented.
3476 */
a2938c80 3477 case MSR_IA32_DEBUGCTLMSR:
609e36d3 3478 msr_info->data = svm->vmcb->save.dbgctl;
a2938c80
JR
3479 break;
3480 case MSR_IA32_LASTBRANCHFROMIP:
609e36d3 3481 msr_info->data = svm->vmcb->save.br_from;
a2938c80
JR
3482 break;
3483 case MSR_IA32_LASTBRANCHTOIP:
609e36d3 3484 msr_info->data = svm->vmcb->save.br_to;
a2938c80
JR
3485 break;
3486 case MSR_IA32_LASTINTFROMIP:
609e36d3 3487 msr_info->data = svm->vmcb->save.last_excp_from;
a2938c80
JR
3488 break;
3489 case MSR_IA32_LASTINTTOIP:
609e36d3 3490 msr_info->data = svm->vmcb->save.last_excp_to;
a2938c80 3491 break;
b286d5d8 3492 case MSR_VM_HSAVE_PA:
609e36d3 3493 msr_info->data = svm->nested.hsave_msr;
b286d5d8 3494 break;
eb6f302e 3495 case MSR_VM_CR:
609e36d3 3496 msr_info->data = svm->nested.vm_cr_msr;
eb6f302e 3497 break;
c8a73f18 3498 case MSR_IA32_UCODE_REV:
609e36d3 3499 msr_info->data = 0x01000065;
c8a73f18 3500 break;
ae8b7875
BP
3501 case MSR_F15H_IC_CFG: {
3502
3503 int family, model;
3504
3505 family = guest_cpuid_family(vcpu);
3506 model = guest_cpuid_model(vcpu);
3507
3508 if (family < 0 || model < 0)
3509 return kvm_get_msr_common(vcpu, msr_info);
3510
3511 msr_info->data = 0;
3512
3513 if (family == 0x15 &&
3514 (model >= 0x2 && model < 0x20))
3515 msr_info->data = 0x1E;
3516 }
3517 break;
6aa8b732 3518 default:
609e36d3 3519 return kvm_get_msr_common(vcpu, msr_info);
6aa8b732
AK
3520 }
3521 return 0;
3522}
3523
851ba692 3524static int rdmsr_interception(struct vcpu_svm *svm)
6aa8b732 3525{
668f198f 3526 u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
609e36d3 3527 struct msr_data msr_info;
6aa8b732 3528
609e36d3
PB
3529 msr_info.index = ecx;
3530 msr_info.host_initiated = false;
3531 if (svm_get_msr(&svm->vcpu, &msr_info)) {
59200273 3532 trace_kvm_msr_read_ex(ecx);
c1a5d4f9 3533 kvm_inject_gp(&svm->vcpu, 0);
59200273 3534 } else {
609e36d3 3535 trace_kvm_msr_read(ecx, msr_info.data);
af9ca2d7 3536
609e36d3
PB
3537 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX,
3538 msr_info.data & 0xffffffff);
3539 kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
3540 msr_info.data >> 32);
5fdbf976 3541 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
e756fc62 3542 skip_emulated_instruction(&svm->vcpu);
6aa8b732
AK
3543 }
3544 return 1;
3545}
3546
4a810181
JR
3547static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
3548{
3549 struct vcpu_svm *svm = to_svm(vcpu);
3550 int svm_dis, chg_mask;
3551
3552 if (data & ~SVM_VM_CR_VALID_MASK)
3553 return 1;
3554
3555 chg_mask = SVM_VM_CR_VALID_MASK;
3556
3557 if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
3558 chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
3559
3560 svm->nested.vm_cr_msr &= ~chg_mask;
3561 svm->nested.vm_cr_msr |= (data & chg_mask);
3562
3563 svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
3564
3565 /* check for svm_disable while efer.svme is set */
3566 if (svm_dis && (vcpu->arch.efer & EFER_SVME))
3567 return 1;
3568
3569 return 0;
3570}
3571
8fe8ab46 3572static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
6aa8b732 3573{
a2fa3e9f
GH
3574 struct vcpu_svm *svm = to_svm(vcpu);
3575
8fe8ab46
WA
3576 u32 ecx = msr->index;
3577 u64 data = msr->data;
6aa8b732 3578 switch (ecx) {
f4e1b3c8 3579 case MSR_IA32_TSC:
8fe8ab46 3580 kvm_write_tsc(vcpu, msr);
6aa8b732 3581 break;
8c06585d 3582 case MSR_STAR:
a2fa3e9f 3583 svm->vmcb->save.star = data;
6aa8b732 3584 break;
49b14f24 3585#ifdef CONFIG_X86_64
6aa8b732 3586 case MSR_LSTAR:
a2fa3e9f 3587 svm->vmcb->save.lstar = data;
6aa8b732
AK
3588 break;
3589 case MSR_CSTAR:
a2fa3e9f 3590 svm->vmcb->save.cstar = data;
6aa8b732
AK
3591 break;
3592 case MSR_KERNEL_GS_BASE:
a2fa3e9f 3593 svm->vmcb->save.kernel_gs_base = data;
6aa8b732
AK
3594 break;
3595 case MSR_SYSCALL_MASK:
a2fa3e9f 3596 svm->vmcb->save.sfmask = data;
6aa8b732
AK
3597 break;
3598#endif
3599 case MSR_IA32_SYSENTER_CS:
a2fa3e9f 3600 svm->vmcb->save.sysenter_cs = data;
6aa8b732
AK
3601 break;
3602 case MSR_IA32_SYSENTER_EIP:
017cb99e 3603 svm->sysenter_eip = data;
a2fa3e9f 3604 svm->vmcb->save.sysenter_eip = data;
6aa8b732
AK
3605 break;
3606 case MSR_IA32_SYSENTER_ESP:
017cb99e 3607 svm->sysenter_esp = data;
a2fa3e9f 3608 svm->vmcb->save.sysenter_esp = data;
6aa8b732 3609 break;
46896c73
PB
3610 case MSR_TSC_AUX:
3611 if (!boot_cpu_has(X86_FEATURE_RDTSCP))
3612 return 1;
3613
3614 /*
3615 * This is rare, so we update the MSR here instead of using
3616 * direct_access_msrs. Doing that would require a rdmsr in
3617 * svm_vcpu_put.
3618 */
3619 svm->tsc_aux = data;
3620 wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
3621 break;
a2938c80 3622 case MSR_IA32_DEBUGCTLMSR:
2a6b20b8 3623 if (!boot_cpu_has(X86_FEATURE_LBRV)) {
a737f256
CD
3624 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
3625 __func__, data);
24e09cbf
JR
3626 break;
3627 }
3628 if (data & DEBUGCTL_RESERVED_BITS)
3629 return 1;
3630
3631 svm->vmcb->save.dbgctl = data;
b53ba3f9 3632 mark_dirty(svm->vmcb, VMCB_LBR);
24e09cbf
JR
3633 if (data & (1ULL<<0))
3634 svm_enable_lbrv(svm);
3635 else
3636 svm_disable_lbrv(svm);
a2938c80 3637 break;
b286d5d8 3638 case MSR_VM_HSAVE_PA:
e6aa9abd 3639 svm->nested.hsave_msr = data;
62b9abaa 3640 break;
3c5d0a44 3641 case MSR_VM_CR:
4a810181 3642 return svm_set_vm_cr(vcpu, data);
3c5d0a44 3643 case MSR_VM_IGNNE:
a737f256 3644 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
3c5d0a44 3645 break;
44a95dae
SS
3646 case MSR_IA32_APICBASE:
3647 if (kvm_vcpu_apicv_active(vcpu))
3648 avic_update_vapic_bar(to_svm(vcpu), data);
3649 /* Follow through */
6aa8b732 3650 default:
8fe8ab46 3651 return kvm_set_msr_common(vcpu, msr);
6aa8b732
AK
3652 }
3653 return 0;
3654}
3655
851ba692 3656static int wrmsr_interception(struct vcpu_svm *svm)
6aa8b732 3657{
8fe8ab46 3658 struct msr_data msr;
668f198f
DK
3659 u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
3660 u64 data = kvm_read_edx_eax(&svm->vcpu);
af9ca2d7 3661
8fe8ab46
WA
3662 msr.data = data;
3663 msr.index = ecx;
3664 msr.host_initiated = false;
af9ca2d7 3665
5fdbf976 3666 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
854e8bb1 3667 if (kvm_set_msr(&svm->vcpu, &msr)) {
59200273 3668 trace_kvm_msr_write_ex(ecx, data);
c1a5d4f9 3669 kvm_inject_gp(&svm->vcpu, 0);
59200273
AK
3670 } else {
3671 trace_kvm_msr_write(ecx, data);
e756fc62 3672 skip_emulated_instruction(&svm->vcpu);
59200273 3673 }
6aa8b732
AK
3674 return 1;
3675}
3676
851ba692 3677static int msr_interception(struct vcpu_svm *svm)
6aa8b732 3678{
e756fc62 3679 if (svm->vmcb->control.exit_info_1)
851ba692 3680 return wrmsr_interception(svm);
6aa8b732 3681 else
851ba692 3682 return rdmsr_interception(svm);
6aa8b732
AK
3683}
3684
851ba692 3685static int interrupt_window_interception(struct vcpu_svm *svm)
c1150d8c 3686{
3842d135 3687 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
f0b85051 3688 svm_clear_vintr(svm);
85f455f7 3689 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
decdbf6a 3690 mark_dirty(svm->vmcb, VMCB_INTR);
675acb75 3691 ++svm->vcpu.stat.irq_window_exits;
c1150d8c
DL
3692 return 1;
3693}
3694
565d0998
ML
3695static int pause_interception(struct vcpu_svm *svm)
3696{
3697 kvm_vcpu_on_spin(&(svm->vcpu));
3698 return 1;
3699}
3700
87c00572
GS
3701static int nop_interception(struct vcpu_svm *svm)
3702{
3703 skip_emulated_instruction(&(svm->vcpu));
3704 return 1;
3705}
3706
3707static int monitor_interception(struct vcpu_svm *svm)
3708{
3709 printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
3710 return nop_interception(svm);
3711}
3712
3713static int mwait_interception(struct vcpu_svm *svm)
3714{
3715 printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
3716 return nop_interception(svm);
3717}
3718
18f40c53
SS
3719enum avic_ipi_failure_cause {
3720 AVIC_IPI_FAILURE_INVALID_INT_TYPE,
3721 AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
3722 AVIC_IPI_FAILURE_INVALID_TARGET,
3723 AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
3724};
3725
3726static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
3727{
3728 u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
3729 u32 icrl = svm->vmcb->control.exit_info_1;
3730 u32 id = svm->vmcb->control.exit_info_2 >> 32;
5446a979 3731 u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
18f40c53
SS
3732 struct kvm_lapic *apic = svm->vcpu.arch.apic;
3733
3734 trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
3735
3736 switch (id) {
3737 case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
3738 /*
3739 * AVIC hardware handles the generation of
3740 * IPIs when the specified Message Type is Fixed
3741 * (also known as fixed delivery mode) and
3742 * the Trigger Mode is edge-triggered. The hardware
3743 * also supports self and broadcast delivery modes
3744 * specified via the Destination Shorthand(DSH)
3745 * field of the ICRL. Logical and physical APIC ID
3746 * formats are supported. All other IPI types cause
3747 * a #VMEXIT, which needs to emulated.
3748 */
3749 kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
3750 kvm_lapic_reg_write(apic, APIC_ICR, icrl);
3751 break;
3752 case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
3753 int i;
3754 struct kvm_vcpu *vcpu;
3755 struct kvm *kvm = svm->vcpu.kvm;
3756 struct kvm_lapic *apic = svm->vcpu.arch.apic;
3757
3758 /*
3759 * At this point, we expect that the AVIC HW has already
3760 * set the appropriate IRR bits on the valid target
3761 * vcpus. So, we just need to kick the appropriate vcpu.
3762 */
3763 kvm_for_each_vcpu(i, vcpu, kvm) {
3764 bool m = kvm_apic_match_dest(vcpu, apic,
3765 icrl & KVM_APIC_SHORT_MASK,
3766 GET_APIC_DEST_FIELD(icrh),
3767 icrl & KVM_APIC_DEST_MASK);
3768
3769 if (m && !avic_vcpu_is_running(vcpu))
3770 kvm_vcpu_wake_up(vcpu);
3771 }
3772 break;
3773 }
3774 case AVIC_IPI_FAILURE_INVALID_TARGET:
3775 break;
3776 case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
3777 WARN_ONCE(1, "Invalid backing page\n");
3778 break;
3779 default:
3780 pr_err("Unknown IPI interception\n");
3781 }
3782
3783 return 1;
3784}
3785
3786static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
3787{
3788 struct kvm_arch *vm_data = &vcpu->kvm->arch;
3789 int index;
3790 u32 *logical_apic_id_table;
3791 int dlid = GET_APIC_LOGICAL_ID(ldr);
3792
3793 if (!dlid)
3794 return NULL;
3795
3796 if (flat) { /* flat */
3797 index = ffs(dlid) - 1;
3798 if (index > 7)
3799 return NULL;
3800 } else { /* cluster */
3801 int cluster = (dlid & 0xf0) >> 4;
3802 int apic = ffs(dlid & 0x0f) - 1;
3803
3804 if ((apic < 0) || (apic > 7) ||
3805 (cluster >= 0xf))
3806 return NULL;
3807 index = (cluster << 2) + apic;
3808 }
3809
3810 logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page);
3811
3812 return &logical_apic_id_table[index];
3813}
3814
3815static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr,
3816 bool valid)
3817{
3818 bool flat;
3819 u32 *entry, new_entry;
3820
3821 flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
3822 entry = avic_get_logical_id_entry(vcpu, ldr, flat);
3823 if (!entry)
3824 return -EINVAL;
3825
3826 new_entry = READ_ONCE(*entry);
3827 new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
3828 new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
3829 if (valid)
3830 new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
3831 else
3832 new_entry &= ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
3833 WRITE_ONCE(*entry, new_entry);
3834
3835 return 0;
3836}
3837
3838static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
3839{
3840 int ret;
3841 struct vcpu_svm *svm = to_svm(vcpu);
3842 u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
3843
3844 if (!ldr)
3845 return 1;
3846
3847 ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr, true);
3848 if (ret && svm->ldr_reg) {
3849 avic_ldr_write(vcpu, 0, svm->ldr_reg, false);
3850 svm->ldr_reg = 0;
3851 } else {
3852 svm->ldr_reg = ldr;
3853 }
3854 return ret;
3855}
3856
3857static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
3858{
3859 u64 *old, *new;
3860 struct vcpu_svm *svm = to_svm(vcpu);
3861 u32 apic_id_reg = kvm_lapic_get_reg(vcpu->arch.apic, APIC_ID);
3862 u32 id = (apic_id_reg >> 24) & 0xff;
3863
3864 if (vcpu->vcpu_id == id)
3865 return 0;
3866
3867 old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
3868 new = avic_get_physical_id_entry(vcpu, id);
3869 if (!new || !old)
3870 return 1;
3871
3872 /* We need to move physical_id_entry to new offset */
3873 *new = *old;
3874 *old = 0ULL;
3875 to_svm(vcpu)->avic_physical_id_cache = new;
3876
3877 /*
3878 * Also update the guest physical APIC ID in the logical
3879 * APIC ID table entry if already setup the LDR.
3880 */
3881 if (svm->ldr_reg)
3882 avic_handle_ldr_update(vcpu);
3883
3884 return 0;
3885}
3886
3887static int avic_handle_dfr_update(struct kvm_vcpu *vcpu)
3888{
3889 struct vcpu_svm *svm = to_svm(vcpu);
3890 struct kvm_arch *vm_data = &vcpu->kvm->arch;
3891 u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
3892 u32 mod = (dfr >> 28) & 0xf;
3893
3894 /*
3895 * We assume that all local APICs are using the same type.
3896 * If this changes, we need to flush the AVIC logical
3897 * APID id table.
3898 */
3899 if (vm_data->ldr_mode == mod)
3900 return 0;
3901
3902 clear_page(page_address(vm_data->avic_logical_id_table_page));
3903 vm_data->ldr_mode = mod;
3904
3905 if (svm->ldr_reg)
3906 avic_handle_ldr_update(vcpu);
3907 return 0;
3908}
3909
3910static int avic_unaccel_trap_write(struct vcpu_svm *svm)
3911{
3912 struct kvm_lapic *apic = svm->vcpu.arch.apic;
3913 u32 offset = svm->vmcb->control.exit_info_1 &
3914 AVIC_UNACCEL_ACCESS_OFFSET_MASK;
3915
3916 switch (offset) {
3917 case APIC_ID:
3918 if (avic_handle_apic_id_update(&svm->vcpu))
3919 return 0;
3920 break;
3921 case APIC_LDR:
3922 if (avic_handle_ldr_update(&svm->vcpu))
3923 return 0;
3924 break;
3925 case APIC_DFR:
3926 avic_handle_dfr_update(&svm->vcpu);
3927 break;
3928 default:
3929 break;
3930 }
3931
3932 kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
3933
3934 return 1;
3935}
3936
3937static bool is_avic_unaccelerated_access_trap(u32 offset)
3938{
3939 bool ret = false;
3940
3941 switch (offset) {
3942 case APIC_ID:
3943 case APIC_EOI:
3944 case APIC_RRR:
3945 case APIC_LDR:
3946 case APIC_DFR:
3947 case APIC_SPIV:
3948 case APIC_ESR:
3949 case APIC_ICR:
3950 case APIC_LVTT:
3951 case APIC_LVTTHMR:
3952 case APIC_LVTPC:
3953 case APIC_LVT0:
3954 case APIC_LVT1:
3955 case APIC_LVTERR:
3956 case APIC_TMICT:
3957 case APIC_TDCR:
3958 ret = true;
3959 break;
3960 default:
3961 break;
3962 }
3963 return ret;
3964}
3965
3966static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
3967{
3968 int ret = 0;
3969 u32 offset = svm->vmcb->control.exit_info_1 &
3970 AVIC_UNACCEL_ACCESS_OFFSET_MASK;
3971 u32 vector = svm->vmcb->control.exit_info_2 &
3972 AVIC_UNACCEL_ACCESS_VECTOR_MASK;
3973 bool write = (svm->vmcb->control.exit_info_1 >> 32) &
3974 AVIC_UNACCEL_ACCESS_WRITE_MASK;
3975 bool trap = is_avic_unaccelerated_access_trap(offset);
3976
3977 trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
3978 trap, write, vector);
3979 if (trap) {
3980 /* Handling Trap */
3981 WARN_ONCE(!write, "svm: Handling trap read.\n");
3982 ret = avic_unaccel_trap_write(svm);
3983 } else {
3984 /* Handling Fault */
3985 ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
3986 }
3987
3988 return ret;
3989}
3990
09941fbb 3991static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
7ff76d58
AP
3992 [SVM_EXIT_READ_CR0] = cr_interception,
3993 [SVM_EXIT_READ_CR3] = cr_interception,
3994 [SVM_EXIT_READ_CR4] = cr_interception,
3995 [SVM_EXIT_READ_CR8] = cr_interception,
5e57518d 3996 [SVM_EXIT_CR0_SEL_WRITE] = cr_interception,
628afd2a 3997 [SVM_EXIT_WRITE_CR0] = cr_interception,
7ff76d58
AP
3998 [SVM_EXIT_WRITE_CR3] = cr_interception,
3999 [SVM_EXIT_WRITE_CR4] = cr_interception,
e0231715 4000 [SVM_EXIT_WRITE_CR8] = cr8_write_interception,
cae3797a
AP
4001 [SVM_EXIT_READ_DR0] = dr_interception,
4002 [SVM_EXIT_READ_DR1] = dr_interception,
4003 [SVM_EXIT_READ_DR2] = dr_interception,
4004 [SVM_EXIT_READ_DR3] = dr_interception,
4005 [SVM_EXIT_READ_DR4] = dr_interception,
4006 [SVM_EXIT_READ_DR5] = dr_interception,
4007 [SVM_EXIT_READ_DR6] = dr_interception,
4008 [SVM_EXIT_READ_DR7] = dr_interception,
4009 [SVM_EXIT_WRITE_DR0] = dr_interception,
4010 [SVM_EXIT_WRITE_DR1] = dr_interception,
4011 [SVM_EXIT_WRITE_DR2] = dr_interception,
4012 [SVM_EXIT_WRITE_DR3] = dr_interception,
4013 [SVM_EXIT_WRITE_DR4] = dr_interception,
4014 [SVM_EXIT_WRITE_DR5] = dr_interception,
4015 [SVM_EXIT_WRITE_DR6] = dr_interception,
4016 [SVM_EXIT_WRITE_DR7] = dr_interception,
d0bfb940
JK
4017 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
4018 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
7aa81cc0 4019 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
e0231715
JR
4020 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
4021 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
4022 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
54a20552 4023 [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception,
e0231715 4024 [SVM_EXIT_INTR] = intr_interception,
c47f098d 4025 [SVM_EXIT_NMI] = nmi_interception,
6aa8b732
AK
4026 [SVM_EXIT_SMI] = nop_on_interception,
4027 [SVM_EXIT_INIT] = nop_on_interception,
c1150d8c 4028 [SVM_EXIT_VINTR] = interrupt_window_interception,
332b56e4 4029 [SVM_EXIT_RDPMC] = rdpmc_interception,
6aa8b732 4030 [SVM_EXIT_CPUID] = cpuid_interception,
95ba8273 4031 [SVM_EXIT_IRET] = iret_interception,
cf5a94d1 4032 [SVM_EXIT_INVD] = emulate_on_interception,
565d0998 4033 [SVM_EXIT_PAUSE] = pause_interception,
6aa8b732 4034 [SVM_EXIT_HLT] = halt_interception,
a7052897 4035 [SVM_EXIT_INVLPG] = invlpg_interception,
ff092385 4036 [SVM_EXIT_INVLPGA] = invlpga_interception,
e0231715 4037 [SVM_EXIT_IOIO] = io_interception,
6aa8b732
AK
4038 [SVM_EXIT_MSR] = msr_interception,
4039 [SVM_EXIT_TASK_SWITCH] = task_switch_interception,
46fe4ddd 4040 [SVM_EXIT_SHUTDOWN] = shutdown_interception,
3d6368ef 4041 [SVM_EXIT_VMRUN] = vmrun_interception,
02e235bc 4042 [SVM_EXIT_VMMCALL] = vmmcall_interception,
5542675b
AG
4043 [SVM_EXIT_VMLOAD] = vmload_interception,
4044 [SVM_EXIT_VMSAVE] = vmsave_interception,
1371d904
AG
4045 [SVM_EXIT_STGI] = stgi_interception,
4046 [SVM_EXIT_CLGI] = clgi_interception,
532a46b9 4047 [SVM_EXIT_SKINIT] = skinit_interception,
dab429a7 4048 [SVM_EXIT_WBINVD] = wbinvd_interception,
87c00572
GS
4049 [SVM_EXIT_MONITOR] = monitor_interception,
4050 [SVM_EXIT_MWAIT] = mwait_interception,
81dd35d4 4051 [SVM_EXIT_XSETBV] = xsetbv_interception,
709ddebf 4052 [SVM_EXIT_NPF] = pf_interception,
64d60670 4053 [SVM_EXIT_RSM] = emulate_on_interception,
18f40c53
SS
4054 [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
4055 [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
6aa8b732
AK
4056};
4057
ae8cc059 4058static void dump_vmcb(struct kvm_vcpu *vcpu)
3f10c846
JR
4059{
4060 struct vcpu_svm *svm = to_svm(vcpu);
4061 struct vmcb_control_area *control = &svm->vmcb->control;
4062 struct vmcb_save_area *save = &svm->vmcb->save;
4063
4064 pr_err("VMCB Control Area:\n");
ae8cc059
JP
4065 pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
4066 pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
4067 pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
4068 pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
4069 pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
4070 pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
4071 pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
4072 pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
4073 pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
4074 pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
4075 pr_err("%-20s%d\n", "asid:", control->asid);
4076 pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
4077 pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
4078 pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
4079 pr_err("%-20s%08x\n", "int_state:", control->int_state);
4080 pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
4081 pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
4082 pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
4083 pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
4084 pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
4085 pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
4086 pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
44a95dae 4087 pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
ae8cc059
JP
4088 pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
4089 pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
4090 pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
4091 pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
44a95dae
SS
4092 pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
4093 pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
4094 pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id);
3f10c846 4095 pr_err("VMCB State Save Area:\n");
ae8cc059
JP
4096 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4097 "es:",
4098 save->es.selector, save->es.attrib,
4099 save->es.limit, save->es.base);
4100 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4101 "cs:",
4102 save->cs.selector, save->cs.attrib,
4103 save->cs.limit, save->cs.base);
4104 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4105 "ss:",
4106 save->ss.selector, save->ss.attrib,
4107 save->ss.limit, save->ss.base);
4108 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4109 "ds:",
4110 save->ds.selector, save->ds.attrib,
4111 save->ds.limit, save->ds.base);
4112 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4113 "fs:",
4114 save->fs.selector, save->fs.attrib,
4115 save->fs.limit, save->fs.base);
4116 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4117 "gs:",
4118 save->gs.selector, save->gs.attrib,
4119 save->gs.limit, save->gs.base);
4120 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4121 "gdtr:",
4122 save->gdtr.selector, save->gdtr.attrib,
4123 save->gdtr.limit, save->gdtr.base);
4124 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4125 "ldtr:",
4126 save->ldtr.selector, save->ldtr.attrib,
4127 save->ldtr.limit, save->ldtr.base);
4128 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4129 "idtr:",
4130 save->idtr.selector, save->idtr.attrib,
4131 save->idtr.limit, save->idtr.base);
4132 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4133 "tr:",
4134 save->tr.selector, save->tr.attrib,
4135 save->tr.limit, save->tr.base);
3f10c846
JR
4136 pr_err("cpl: %d efer: %016llx\n",
4137 save->cpl, save->efer);
ae8cc059
JP
4138 pr_err("%-15s %016llx %-13s %016llx\n",
4139 "cr0:", save->cr0, "cr2:", save->cr2);
4140 pr_err("%-15s %016llx %-13s %016llx\n",
4141 "cr3:", save->cr3, "cr4:", save->cr4);
4142 pr_err("%-15s %016llx %-13s %016llx\n",
4143 "dr6:", save->dr6, "dr7:", save->dr7);
4144 pr_err("%-15s %016llx %-13s %016llx\n",
4145 "rip:", save->rip, "rflags:", save->rflags);
4146 pr_err("%-15s %016llx %-13s %016llx\n",
4147 "rsp:", save->rsp, "rax:", save->rax);
4148 pr_err("%-15s %016llx %-13s %016llx\n",
4149 "star:", save->star, "lstar:", save->lstar);
4150 pr_err("%-15s %016llx %-13s %016llx\n",
4151 "cstar:", save->cstar, "sfmask:", save->sfmask);
4152 pr_err("%-15s %016llx %-13s %016llx\n",
4153 "kernel_gs_base:", save->kernel_gs_base,
4154 "sysenter_cs:", save->sysenter_cs);
4155 pr_err("%-15s %016llx %-13s %016llx\n",
4156 "sysenter_esp:", save->sysenter_esp,
4157 "sysenter_eip:", save->sysenter_eip);
4158 pr_err("%-15s %016llx %-13s %016llx\n",
4159 "gpat:", save->g_pat, "dbgctl:", save->dbgctl);
4160 pr_err("%-15s %016llx %-13s %016llx\n",
4161 "br_from:", save->br_from, "br_to:", save->br_to);
4162 pr_err("%-15s %016llx %-13s %016llx\n",
4163 "excp_from:", save->last_excp_from,
4164 "excp_to:", save->last_excp_to);
3f10c846
JR
4165}
4166
586f9607
AK
4167static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
4168{
4169 struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
4170
4171 *info1 = control->exit_info_1;
4172 *info2 = control->exit_info_2;
4173}
4174
851ba692 4175static int handle_exit(struct kvm_vcpu *vcpu)
6aa8b732 4176{
04d2cc77 4177 struct vcpu_svm *svm = to_svm(vcpu);
851ba692 4178 struct kvm_run *kvm_run = vcpu->run;
a2fa3e9f 4179 u32 exit_code = svm->vmcb->control.exit_code;
6aa8b732 4180
8b89fe1f
PB
4181 trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
4182
0f89b207
TL
4183 vcpu->arch.gpa_available = (exit_code == SVM_EXIT_NPF);
4184
4ee546b4 4185 if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
2be4fc7a
JR
4186 vcpu->arch.cr0 = svm->vmcb->save.cr0;
4187 if (npt_enabled)
4188 vcpu->arch.cr3 = svm->vmcb->save.cr3;
af9ca2d7 4189
cd3ff653
JR
4190 if (unlikely(svm->nested.exit_required)) {
4191 nested_svm_vmexit(svm);
4192 svm->nested.exit_required = false;
4193
4194 return 1;
4195 }
4196
2030753d 4197 if (is_guest_mode(vcpu)) {
410e4d57
JR
4198 int vmexit;
4199
d8cabddf
JR
4200 trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
4201 svm->vmcb->control.exit_info_1,
4202 svm->vmcb->control.exit_info_2,
4203 svm->vmcb->control.exit_int_info,
e097e5ff
SH
4204 svm->vmcb->control.exit_int_info_err,
4205 KVM_ISA_SVM);
d8cabddf 4206
410e4d57
JR
4207 vmexit = nested_svm_exit_special(svm);
4208
4209 if (vmexit == NESTED_EXIT_CONTINUE)
4210 vmexit = nested_svm_exit_handled(svm);
4211
4212 if (vmexit == NESTED_EXIT_DONE)
cf74a78b 4213 return 1;
cf74a78b
AG
4214 }
4215
a5c3832d
JR
4216 svm_complete_interrupts(svm);
4217
04d2cc77
AK
4218 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
4219 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
4220 kvm_run->fail_entry.hardware_entry_failure_reason
4221 = svm->vmcb->control.exit_code;
3f10c846
JR
4222 pr_err("KVM: FAILED VMRUN WITH VMCB:\n");
4223 dump_vmcb(vcpu);
04d2cc77
AK
4224 return 0;
4225 }
4226
a2fa3e9f 4227 if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
709ddebf 4228 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
55c5e464
JR
4229 exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
4230 exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
6614c7d0 4231 printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
6aa8b732 4232 "exit_code 0x%x\n",
b8688d51 4233 __func__, svm->vmcb->control.exit_int_info,
6aa8b732
AK
4234 exit_code);
4235
9d8f549d 4236 if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
56919c5c 4237 || !svm_exit_handlers[exit_code]) {
faac2458 4238 WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code);
2bc19dc3
MT
4239 kvm_queue_exception(vcpu, UD_VECTOR);
4240 return 1;
6aa8b732
AK
4241 }
4242
851ba692 4243 return svm_exit_handlers[exit_code](svm);
6aa8b732
AK
4244}
4245
4246static void reload_tss(struct kvm_vcpu *vcpu)
4247{
4248 int cpu = raw_smp_processor_id();
4249
0fe1e009
TH
4250 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
4251 sd->tss_desc->type = 9; /* available 32/64-bit TSS */
6aa8b732
AK
4252 load_TR_desc();
4253}
4254
e756fc62 4255static void pre_svm_run(struct vcpu_svm *svm)
6aa8b732
AK
4256{
4257 int cpu = raw_smp_processor_id();
4258
0fe1e009 4259 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
6aa8b732 4260
4b656b12 4261 /* FIXME: handle wraparound of asid_generation */
0fe1e009
TH
4262 if (svm->asid_generation != sd->asid_generation)
4263 new_asid(svm, sd);
6aa8b732
AK
4264}
4265
95ba8273
GN
4266static void svm_inject_nmi(struct kvm_vcpu *vcpu)
4267{
4268 struct vcpu_svm *svm = to_svm(vcpu);
4269
4270 svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
4271 vcpu->arch.hflags |= HF_NMI_MASK;
8a05a1b8 4272 set_intercept(svm, INTERCEPT_IRET);
95ba8273
GN
4273 ++vcpu->stat.nmi_injections;
4274}
6aa8b732 4275
85f455f7 4276static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
6aa8b732
AK
4277{
4278 struct vmcb_control_area *control;
4279
340d3bc3 4280 /* The following fields are ignored when AVIC is enabled */
e756fc62 4281 control = &svm->vmcb->control;
85f455f7 4282 control->int_vector = irq;
6aa8b732
AK
4283 control->int_ctl &= ~V_INTR_PRIO_MASK;
4284 control->int_ctl |= V_IRQ_MASK |
4285 ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
decdbf6a 4286 mark_dirty(svm->vmcb, VMCB_INTR);
6aa8b732
AK
4287}
4288
66fd3f7f 4289static void svm_set_irq(struct kvm_vcpu *vcpu)
2a8067f1
ED
4290{
4291 struct vcpu_svm *svm = to_svm(vcpu);
4292
2af9194d 4293 BUG_ON(!(gif_set(svm)));
cf74a78b 4294
9fb2d2b4
GN
4295 trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
4296 ++vcpu->stat.irq_injections;
4297
219b65dc
AG
4298 svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
4299 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
2a8067f1
ED
4300}
4301
3bbf3565
SS
4302static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
4303{
4304 return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK);
4305}
4306
95ba8273 4307static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
aaacfc9a
JR
4308{
4309 struct vcpu_svm *svm = to_svm(vcpu);
aaacfc9a 4310
3bbf3565
SS
4311 if (svm_nested_virtualize_tpr(vcpu) ||
4312 kvm_vcpu_apicv_active(vcpu))
88ab24ad
JR
4313 return;
4314
596f3142
RK
4315 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
4316
95ba8273 4317 if (irr == -1)
aaacfc9a
JR
4318 return;
4319
95ba8273 4320 if (tpr >= irr)
4ee546b4 4321 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
95ba8273 4322}
aaacfc9a 4323
8d14695f
YZ
4324static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
4325{
4326 return;
4327}
4328
d62caabb
AS
4329static bool svm_get_enable_apicv(void)
4330{
44a95dae
SS
4331 return avic;
4332}
4333
4334static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
4335{
d62caabb
AS
4336}
4337
67c9dddc 4338static void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
44a95dae 4339{
d62caabb
AS
4340}
4341
44a95dae 4342/* Note: Currently only used by Hyper-V. */
d62caabb 4343static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
c7c9c56c 4344{
44a95dae
SS
4345 struct vcpu_svm *svm = to_svm(vcpu);
4346 struct vmcb *vmcb = svm->vmcb;
4347
4348 if (!avic)
4349 return;
4350
4351 vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
4352 mark_dirty(vmcb, VMCB_INTR);
c7c9c56c
YZ
4353}
4354
6308630b 4355static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
c7c9c56c
YZ
4356{
4357 return;
4358}
4359
340d3bc3
SS
4360static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
4361{
4362 kvm_lapic_set_irr(vec, vcpu->arch.apic);
4363 smp_mb__after_atomic();
4364
4365 if (avic_vcpu_is_running(vcpu))
4366 wrmsrl(SVM_AVIC_DOORBELL,
7d669f50 4367 kvm_cpu_get_apicid(vcpu->cpu));
340d3bc3
SS
4368 else
4369 kvm_vcpu_wake_up(vcpu);
4370}
4371
411b44ba
SS
4372static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
4373{
4374 unsigned long flags;
4375 struct amd_svm_iommu_ir *cur;
4376
4377 spin_lock_irqsave(&svm->ir_list_lock, flags);
4378 list_for_each_entry(cur, &svm->ir_list, node) {
4379 if (cur->data != pi->ir_data)
4380 continue;
4381 list_del(&cur->node);
4382 kfree(cur);
4383 break;
4384 }
4385 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
4386}
4387
4388static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
4389{
4390 int ret = 0;
4391 unsigned long flags;
4392 struct amd_svm_iommu_ir *ir;
4393
4394 /**
4395 * In some cases, the existing irte is updaed and re-set,
4396 * so we need to check here if it's already been * added
4397 * to the ir_list.
4398 */
4399 if (pi->ir_data && (pi->prev_ga_tag != 0)) {
4400 struct kvm *kvm = svm->vcpu.kvm;
4401 u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
4402 struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
4403 struct vcpu_svm *prev_svm;
4404
4405 if (!prev_vcpu) {
4406 ret = -EINVAL;
4407 goto out;
4408 }
4409
4410 prev_svm = to_svm(prev_vcpu);
4411 svm_ir_list_del(prev_svm, pi);
4412 }
4413
4414 /**
4415 * Allocating new amd_iommu_pi_data, which will get
4416 * add to the per-vcpu ir_list.
4417 */
4418 ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL);
4419 if (!ir) {
4420 ret = -ENOMEM;
4421 goto out;
4422 }
4423 ir->data = pi->ir_data;
4424
4425 spin_lock_irqsave(&svm->ir_list_lock, flags);
4426 list_add(&ir->node, &svm->ir_list);
4427 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
4428out:
4429 return ret;
4430}
4431
4432/**
4433 * Note:
4434 * The HW cannot support posting multicast/broadcast
4435 * interrupts to a vCPU. So, we still use legacy interrupt
4436 * remapping for these kind of interrupts.
4437 *
4438 * For lowest-priority interrupts, we only support
4439 * those with single CPU as the destination, e.g. user
4440 * configures the interrupts via /proc/irq or uses
4441 * irqbalance to make the interrupts single-CPU.
4442 */
4443static int
4444get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
4445 struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
4446{
4447 struct kvm_lapic_irq irq;
4448 struct kvm_vcpu *vcpu = NULL;
4449
4450 kvm_set_msi_irq(kvm, e, &irq);
4451
4452 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
4453 pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
4454 __func__, irq.vector);
4455 return -1;
4456 }
4457
4458 pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
4459 irq.vector);
4460 *svm = to_svm(vcpu);
4461 vcpu_info->pi_desc_addr = page_to_phys((*svm)->avic_backing_page);
4462 vcpu_info->vector = irq.vector;
4463
4464 return 0;
4465}
4466
4467/*
4468 * svm_update_pi_irte - set IRTE for Posted-Interrupts
4469 *
4470 * @kvm: kvm
4471 * @host_irq: host irq of the interrupt
4472 * @guest_irq: gsi of the interrupt
4473 * @set: set or unset PI
4474 * returns 0 on success, < 0 on failure
4475 */
4476static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
4477 uint32_t guest_irq, bool set)
4478{
4479 struct kvm_kernel_irq_routing_entry *e;
4480 struct kvm_irq_routing_table *irq_rt;
4481 int idx, ret = -EINVAL;
4482
4483 if (!kvm_arch_has_assigned_device(kvm) ||
4484 !irq_remapping_cap(IRQ_POSTING_CAP))
4485 return 0;
4486
4487 pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
4488 __func__, host_irq, guest_irq, set);
4489
4490 idx = srcu_read_lock(&kvm->irq_srcu);
4491 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
4492 WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
4493
4494 hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
4495 struct vcpu_data vcpu_info;
4496 struct vcpu_svm *svm = NULL;
4497
4498 if (e->type != KVM_IRQ_ROUTING_MSI)
4499 continue;
4500
4501 /**
4502 * Here, we setup with legacy mode in the following cases:
4503 * 1. When cannot target interrupt to a specific vcpu.
4504 * 2. Unsetting posted interrupt.
4505 * 3. APIC virtialization is disabled for the vcpu.
4506 */
4507 if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
4508 kvm_vcpu_apicv_active(&svm->vcpu)) {
4509 struct amd_iommu_pi_data pi;
4510
4511 /* Try to enable guest_mode in IRTE */
4512 pi.base = page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK;
4513 pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id,
4514 svm->vcpu.vcpu_id);
4515 pi.is_guest_mode = true;
4516 pi.vcpu_data = &vcpu_info;
4517 ret = irq_set_vcpu_affinity(host_irq, &pi);
4518
4519 /**
4520 * Here, we successfully setting up vcpu affinity in
4521 * IOMMU guest mode. Now, we need to store the posted
4522 * interrupt information in a per-vcpu ir_list so that
4523 * we can reference to them directly when we update vcpu
4524 * scheduling information in IOMMU irte.
4525 */
4526 if (!ret && pi.is_guest_mode)
4527 svm_ir_list_add(svm, &pi);
4528 } else {
4529 /* Use legacy mode in IRTE */
4530 struct amd_iommu_pi_data pi;
4531
4532 /**
4533 * Here, pi is used to:
4534 * - Tell IOMMU to use legacy mode for this interrupt.
4535 * - Retrieve ga_tag of prior interrupt remapping data.
4536 */
4537 pi.is_guest_mode = false;
4538 ret = irq_set_vcpu_affinity(host_irq, &pi);
4539
4540 /**
4541 * Check if the posted interrupt was previously
4542 * setup with the guest_mode by checking if the ga_tag
4543 * was cached. If so, we need to clean up the per-vcpu
4544 * ir_list.
4545 */
4546 if (!ret && pi.prev_ga_tag) {
4547 int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
4548 struct kvm_vcpu *vcpu;
4549
4550 vcpu = kvm_get_vcpu_by_id(kvm, id);
4551 if (vcpu)
4552 svm_ir_list_del(to_svm(vcpu), &pi);
4553 }
4554 }
4555
4556 if (!ret && svm) {
4557 trace_kvm_pi_irte_update(svm->vcpu.vcpu_id,
4558 host_irq, e->gsi,
4559 vcpu_info.vector,
4560 vcpu_info.pi_desc_addr, set);
4561 }
4562
4563 if (ret < 0) {
4564 pr_err("%s: failed to update PI IRTE\n", __func__);
4565 goto out;
4566 }
4567 }
4568
4569 ret = 0;
4570out:
4571 srcu_read_unlock(&kvm->irq_srcu, idx);
4572 return ret;
4573}
4574
95ba8273
GN
4575static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
4576{
4577 struct vcpu_svm *svm = to_svm(vcpu);
4578 struct vmcb *vmcb = svm->vmcb;
924584cc
JR
4579 int ret;
4580 ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
4581 !(svm->vcpu.arch.hflags & HF_NMI_MASK);
4582 ret = ret && gif_set(svm) && nested_svm_nmi(svm);
4583
4584 return ret;
aaacfc9a
JR
4585}
4586
3cfc3092
JK
4587static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
4588{
4589 struct vcpu_svm *svm = to_svm(vcpu);
4590
4591 return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
4592}
4593
4594static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
4595{
4596 struct vcpu_svm *svm = to_svm(vcpu);
4597
4598 if (masked) {
4599 svm->vcpu.arch.hflags |= HF_NMI_MASK;
8a05a1b8 4600 set_intercept(svm, INTERCEPT_IRET);
3cfc3092
JK
4601 } else {
4602 svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
8a05a1b8 4603 clr_intercept(svm, INTERCEPT_IRET);
3cfc3092
JK
4604 }
4605}
4606
78646121
GN
4607static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
4608{
4609 struct vcpu_svm *svm = to_svm(vcpu);
4610 struct vmcb *vmcb = svm->vmcb;
7fcdb510
JR
4611 int ret;
4612
4613 if (!gif_set(svm) ||
4614 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
4615 return 0;
4616
f6e78475 4617 ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
7fcdb510 4618
2030753d 4619 if (is_guest_mode(vcpu))
7fcdb510
JR
4620 return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
4621
4622 return ret;
78646121
GN
4623}
4624
c9a7953f 4625static void enable_irq_window(struct kvm_vcpu *vcpu)
6aa8b732 4626{
219b65dc 4627 struct vcpu_svm *svm = to_svm(vcpu);
219b65dc 4628
340d3bc3
SS
4629 if (kvm_vcpu_apicv_active(vcpu))
4630 return;
4631
e0231715
JR
4632 /*
4633 * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
4634 * 1, because that's a separate STGI/VMRUN intercept. The next time we
4635 * get that intercept, this function will be called again though and
4636 * we'll get the vintr intercept.
4637 */
8fe54654 4638 if (gif_set(svm) && nested_svm_intr(svm)) {
219b65dc
AG
4639 svm_set_vintr(svm);
4640 svm_inject_irq(svm, 0x0);
4641 }
85f455f7
ED
4642}
4643
c9a7953f 4644static void enable_nmi_window(struct kvm_vcpu *vcpu)
c1150d8c 4645{
04d2cc77 4646 struct vcpu_svm *svm = to_svm(vcpu);
c1150d8c 4647
44c11430
GN
4648 if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
4649 == HF_NMI_MASK)
c9a7953f 4650 return; /* IRET will cause a vm exit */
44c11430 4651
e0231715
JR
4652 /*
4653 * Something prevents NMI from been injected. Single step over possible
4654 * problem (IRET or exception injection or interrupt shadow)
4655 */
6be7d306 4656 svm->nmi_singlestep = true;
44c11430 4657 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
c1150d8c
DL
4658}
4659
cbc94022
IE
4660static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
4661{
4662 return 0;
4663}
4664
d9e368d6
AK
4665static void svm_flush_tlb(struct kvm_vcpu *vcpu)
4666{
38e5e92f
JR
4667 struct vcpu_svm *svm = to_svm(vcpu);
4668
4669 if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
4670 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
4671 else
4672 svm->asid_generation--;
d9e368d6
AK
4673}
4674
04d2cc77
AK
4675static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
4676{
4677}
4678
d7bf8221
JR
4679static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
4680{
4681 struct vcpu_svm *svm = to_svm(vcpu);
4682
3bbf3565 4683 if (svm_nested_virtualize_tpr(vcpu))
88ab24ad
JR
4684 return;
4685
4ee546b4 4686 if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
d7bf8221 4687 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
615d5193 4688 kvm_set_cr8(vcpu, cr8);
d7bf8221
JR
4689 }
4690}
4691
649d6864
JR
4692static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
4693{
4694 struct vcpu_svm *svm = to_svm(vcpu);
4695 u64 cr8;
4696
3bbf3565
SS
4697 if (svm_nested_virtualize_tpr(vcpu) ||
4698 kvm_vcpu_apicv_active(vcpu))
88ab24ad
JR
4699 return;
4700
649d6864
JR
4701 cr8 = kvm_get_cr8(vcpu);
4702 svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
4703 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
4704}
4705
9222be18
GN
4706static void svm_complete_interrupts(struct vcpu_svm *svm)
4707{
4708 u8 vector;
4709 int type;
4710 u32 exitintinfo = svm->vmcb->control.exit_int_info;
66b7138f
JK
4711 unsigned int3_injected = svm->int3_injected;
4712
4713 svm->int3_injected = 0;
9222be18 4714
bd3d1ec3
AK
4715 /*
4716 * If we've made progress since setting HF_IRET_MASK, we've
4717 * executed an IRET and can allow NMI injection.
4718 */
4719 if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
4720 && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
44c11430 4721 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
3842d135
AK
4722 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
4723 }
44c11430 4724
9222be18
GN
4725 svm->vcpu.arch.nmi_injected = false;
4726 kvm_clear_exception_queue(&svm->vcpu);
4727 kvm_clear_interrupt_queue(&svm->vcpu);
4728
4729 if (!(exitintinfo & SVM_EXITINTINFO_VALID))
4730 return;
4731
3842d135
AK
4732 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
4733
9222be18
GN
4734 vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
4735 type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
4736
4737 switch (type) {
4738 case SVM_EXITINTINFO_TYPE_NMI:
4739 svm->vcpu.arch.nmi_injected = true;
4740 break;
4741 case SVM_EXITINTINFO_TYPE_EXEPT:
66b7138f
JK
4742 /*
4743 * In case of software exceptions, do not reinject the vector,
4744 * but re-execute the instruction instead. Rewind RIP first
4745 * if we emulated INT3 before.
4746 */
4747 if (kvm_exception_is_soft(vector)) {
4748 if (vector == BP_VECTOR && int3_injected &&
4749 kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
4750 kvm_rip_write(&svm->vcpu,
4751 kvm_rip_read(&svm->vcpu) -
4752 int3_injected);
9222be18 4753 break;
66b7138f 4754 }
9222be18
GN
4755 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
4756 u32 err = svm->vmcb->control.exit_int_info_err;
ce7ddec4 4757 kvm_requeue_exception_e(&svm->vcpu, vector, err);
9222be18
GN
4758
4759 } else
ce7ddec4 4760 kvm_requeue_exception(&svm->vcpu, vector);
9222be18
GN
4761 break;
4762 case SVM_EXITINTINFO_TYPE_INTR:
66fd3f7f 4763 kvm_queue_interrupt(&svm->vcpu, vector, false);
9222be18
GN
4764 break;
4765 default:
4766 break;
4767 }
4768}
4769
b463a6f7
AK
4770static void svm_cancel_injection(struct kvm_vcpu *vcpu)
4771{
4772 struct vcpu_svm *svm = to_svm(vcpu);
4773 struct vmcb_control_area *control = &svm->vmcb->control;
4774
4775 control->exit_int_info = control->event_inj;
4776 control->exit_int_info_err = control->event_inj_err;
4777 control->event_inj = 0;
4778 svm_complete_interrupts(svm);
4779}
4780
851ba692 4781static void svm_vcpu_run(struct kvm_vcpu *vcpu)
6aa8b732 4782{
a2fa3e9f 4783 struct vcpu_svm *svm = to_svm(vcpu);
d9e368d6 4784
2041a06a
JR
4785 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
4786 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
4787 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
4788
cd3ff653
JR
4789 /*
4790 * A vmexit emulation is required before the vcpu can be executed
4791 * again.
4792 */
4793 if (unlikely(svm->nested.exit_required))
4794 return;
4795
e756fc62 4796 pre_svm_run(svm);
6aa8b732 4797
649d6864
JR
4798 sync_lapic_to_cr8(vcpu);
4799
cda0ffdd 4800 svm->vmcb->save.cr2 = vcpu->arch.cr2;
6aa8b732 4801
04d2cc77
AK
4802 clgi();
4803
4804 local_irq_enable();
36241b8c 4805
6aa8b732 4806 asm volatile (
7454766f
AK
4807 "push %%" _ASM_BP "; \n\t"
4808 "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
4809 "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t"
4810 "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t"
4811 "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t"
4812 "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t"
4813 "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t"
05b3e0c2 4814#ifdef CONFIG_X86_64
fb3f0f51
RR
4815 "mov %c[r8](%[svm]), %%r8 \n\t"
4816 "mov %c[r9](%[svm]), %%r9 \n\t"
4817 "mov %c[r10](%[svm]), %%r10 \n\t"
4818 "mov %c[r11](%[svm]), %%r11 \n\t"
4819 "mov %c[r12](%[svm]), %%r12 \n\t"
4820 "mov %c[r13](%[svm]), %%r13 \n\t"
4821 "mov %c[r14](%[svm]), %%r14 \n\t"
4822 "mov %c[r15](%[svm]), %%r15 \n\t"
6aa8b732
AK
4823#endif
4824
6aa8b732 4825 /* Enter guest mode */
7454766f
AK
4826 "push %%" _ASM_AX " \n\t"
4827 "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t"
4ecac3fd
AK
4828 __ex(SVM_VMLOAD) "\n\t"
4829 __ex(SVM_VMRUN) "\n\t"
4830 __ex(SVM_VMSAVE) "\n\t"
7454766f 4831 "pop %%" _ASM_AX " \n\t"
6aa8b732
AK
4832
4833 /* Save guest registers, load host registers */
7454766f
AK
4834 "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t"
4835 "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t"
4836 "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t"
4837 "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t"
4838 "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t"
4839 "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t"
05b3e0c2 4840#ifdef CONFIG_X86_64
fb3f0f51
RR
4841 "mov %%r8, %c[r8](%[svm]) \n\t"
4842 "mov %%r9, %c[r9](%[svm]) \n\t"
4843 "mov %%r10, %c[r10](%[svm]) \n\t"
4844 "mov %%r11, %c[r11](%[svm]) \n\t"
4845 "mov %%r12, %c[r12](%[svm]) \n\t"
4846 "mov %%r13, %c[r13](%[svm]) \n\t"
4847 "mov %%r14, %c[r14](%[svm]) \n\t"
4848 "mov %%r15, %c[r15](%[svm]) \n\t"
6aa8b732 4849#endif
7454766f 4850 "pop %%" _ASM_BP
6aa8b732 4851 :
fb3f0f51 4852 : [svm]"a"(svm),
6aa8b732 4853 [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
ad312c7c
ZX
4854 [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
4855 [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
4856 [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
4857 [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
4858 [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
4859 [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
05b3e0c2 4860#ifdef CONFIG_X86_64
ad312c7c
ZX
4861 , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
4862 [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
4863 [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
4864 [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
4865 [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
4866 [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
4867 [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
4868 [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
6aa8b732 4869#endif
54a08c04
LV
4870 : "cc", "memory"
4871#ifdef CONFIG_X86_64
7454766f 4872 , "rbx", "rcx", "rdx", "rsi", "rdi"
54a08c04 4873 , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
7454766f
AK
4874#else
4875 , "ebx", "ecx", "edx", "esi", "edi"
54a08c04
LV
4876#endif
4877 );
6aa8b732 4878
82ca2d10
AK
4879#ifdef CONFIG_X86_64
4880 wrmsrl(MSR_GS_BASE, svm->host.gs_base);
4881#else
dacccfdd 4882 loadsegment(fs, svm->host.fs);
831ca609
AK
4883#ifndef CONFIG_X86_32_LAZY_GS
4884 loadsegment(gs, svm->host.gs);
4885#endif
9581d442 4886#endif
6aa8b732
AK
4887
4888 reload_tss(vcpu);
4889
56ba47dd
AK
4890 local_irq_disable();
4891
13c34e07
AK
4892 vcpu->arch.cr2 = svm->vmcb->save.cr2;
4893 vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
4894 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
4895 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
4896
3781c01c
JR
4897 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
4898 kvm_before_handle_nmi(&svm->vcpu);
4899
4900 stgi();
4901
4902 /* Any pending NMI will happen here */
4903
4904 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
4905 kvm_after_handle_nmi(&svm->vcpu);
4906
d7bf8221
JR
4907 sync_cr8_to_lapic(vcpu);
4908
a2fa3e9f 4909 svm->next_rip = 0;
9222be18 4910
38e5e92f
JR
4911 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
4912
631bc487
GN
4913 /* if exit due to PF check for async PF */
4914 if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
4915 svm->apf_reason = kvm_read_and_reset_pf_reason();
4916
6de4f3ad
AK
4917 if (npt_enabled) {
4918 vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
4919 vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
4920 }
fe5913e4
JR
4921
4922 /*
4923 * We need to handle MC intercepts here before the vcpu has a chance to
4924 * change the physical cpu
4925 */
4926 if (unlikely(svm->vmcb->control.exit_code ==
4927 SVM_EXIT_EXCP_BASE + MC_VECTOR))
4928 svm_handle_mce(svm);
8d28fec4
RJ
4929
4930 mark_all_clean(svm->vmcb);
6aa8b732
AK
4931}
4932
6aa8b732
AK
4933static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
4934{
a2fa3e9f
GH
4935 struct vcpu_svm *svm = to_svm(vcpu);
4936
4937 svm->vmcb->save.cr3 = root;
dcca1a65 4938 mark_dirty(svm->vmcb, VMCB_CR);
f40f6a45 4939 svm_flush_tlb(vcpu);
6aa8b732
AK
4940}
4941
1c97f0a0
JR
4942static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
4943{
4944 struct vcpu_svm *svm = to_svm(vcpu);
4945
4946 svm->vmcb->control.nested_cr3 = root;
b2747166 4947 mark_dirty(svm->vmcb, VMCB_NPT);
1c97f0a0
JR
4948
4949 /* Also sync guest cr3 here in case we live migrate */
9f8fe504 4950 svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
dcca1a65 4951 mark_dirty(svm->vmcb, VMCB_CR);
1c97f0a0 4952
f40f6a45 4953 svm_flush_tlb(vcpu);
1c97f0a0
JR
4954}
4955
6aa8b732
AK
4956static int is_disabled(void)
4957{
6031a61c
JR
4958 u64 vm_cr;
4959
4960 rdmsrl(MSR_VM_CR, vm_cr);
4961 if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
4962 return 1;
4963
6aa8b732
AK
4964 return 0;
4965}
4966
102d8325
IM
4967static void
4968svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
4969{
4970 /*
4971 * Patch in the VMMCALL instruction:
4972 */
4973 hypercall[0] = 0x0f;
4974 hypercall[1] = 0x01;
4975 hypercall[2] = 0xd9;
102d8325
IM
4976}
4977
002c7f7c
YS
4978static void svm_check_processor_compat(void *rtn)
4979{
4980 *(int *)rtn = 0;
4981}
4982
774ead3a
AK
4983static bool svm_cpu_has_accelerated_tpr(void)
4984{
4985 return false;
4986}
4987
6d396b55
PB
4988static bool svm_has_high_real_mode_segbase(void)
4989{
4990 return true;
4991}
4992
fc07e76a
PB
4993static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
4994{
4995 return 0;
4996}
4997
0e851880
SY
4998static void svm_cpuid_update(struct kvm_vcpu *vcpu)
4999{
6092d3d3 5000 struct vcpu_svm *svm = to_svm(vcpu);
46781eae 5001 struct kvm_cpuid_entry2 *entry;
6092d3d3
JR
5002
5003 /* Update nrips enabled cache */
5004 svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu);
46781eae
SS
5005
5006 if (!kvm_vcpu_apicv_active(vcpu))
5007 return;
5008
5009 entry = kvm_find_cpuid_entry(vcpu, 1, 0);
5010 if (entry)
5011 entry->ecx &= ~bit(X86_FEATURE_X2APIC);
0e851880
SY
5012}
5013
d4330ef2
JR
5014static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
5015{
c2c63a49 5016 switch (func) {
46781eae
SS
5017 case 0x1:
5018 if (avic)
5019 entry->ecx &= ~bit(X86_FEATURE_X2APIC);
5020 break;
4c62a2dc
JR
5021 case 0x80000001:
5022 if (nested)
5023 entry->ecx |= (1 << 2); /* Set SVM bit */
5024 break;
c2c63a49
JR
5025 case 0x8000000A:
5026 entry->eax = 1; /* SVM revision 1 */
5027 entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
5028 ASID emulation to nested SVM */
5029 entry->ecx = 0; /* Reserved */
7a190667
JR
5030 entry->edx = 0; /* Per default do not support any
5031 additional features */
5032
5033 /* Support next_rip if host supports it */
2a6b20b8 5034 if (boot_cpu_has(X86_FEATURE_NRIPS))
7a190667 5035 entry->edx |= SVM_FEATURE_NRIP;
c2c63a49 5036
3d4aeaad
JR
5037 /* Support NPT for the guest if enabled */
5038 if (npt_enabled)
5039 entry->edx |= SVM_FEATURE_NPT;
5040
c2c63a49
JR
5041 break;
5042 }
d4330ef2
JR
5043}
5044
17cc3935 5045static int svm_get_lpage_level(void)
344f414f 5046{
17cc3935 5047 return PT_PDPE_LEVEL;
344f414f
JR
5048}
5049
4e47c7a6
SY
5050static bool svm_rdtscp_supported(void)
5051{
46896c73 5052 return boot_cpu_has(X86_FEATURE_RDTSCP);
4e47c7a6
SY
5053}
5054
ad756a16
MJ
5055static bool svm_invpcid_supported(void)
5056{
5057 return false;
5058}
5059
93c4adc7
PB
5060static bool svm_mpx_supported(void)
5061{
5062 return false;
5063}
5064
55412b2e
WL
5065static bool svm_xsaves_supported(void)
5066{
5067 return false;
5068}
5069
f5f48ee1
SY
5070static bool svm_has_wbinvd_exit(void)
5071{
5072 return true;
5073}
5074
02daab21
AK
5075static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
5076{
5077 struct vcpu_svm *svm = to_svm(vcpu);
5078
18c918c5 5079 set_exception_intercept(svm, NM_VECTOR);
66a562f7 5080 update_cr0_intercept(svm);
02daab21
AK
5081}
5082
8061252e 5083#define PRE_EX(exit) { .exit_code = (exit), \
40e19b51 5084 .stage = X86_ICPT_PRE_EXCEPT, }
cfec82cb 5085#define POST_EX(exit) { .exit_code = (exit), \
40e19b51 5086 .stage = X86_ICPT_POST_EXCEPT, }
d7eb8203 5087#define POST_MEM(exit) { .exit_code = (exit), \
40e19b51 5088 .stage = X86_ICPT_POST_MEMACCESS, }
cfec82cb 5089
09941fbb 5090static const struct __x86_intercept {
cfec82cb
JR
5091 u32 exit_code;
5092 enum x86_intercept_stage stage;
cfec82cb
JR
5093} x86_intercept_map[] = {
5094 [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0),
5095 [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0),
5096 [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0),
5097 [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0),
5098 [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0),
3b88e41a
JR
5099 [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0),
5100 [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0),
dee6bb70
JR
5101 [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ),
5102 [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ),
5103 [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE),
5104 [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE),
5105 [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ),
5106 [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ),
5107 [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE),
5108 [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE),
01de8b09
JR
5109 [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN),
5110 [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL),
5111 [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD),
5112 [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE),
5113 [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI),
5114 [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI),
5115 [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT),
5116 [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA),
d7eb8203
JR
5117 [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP),
5118 [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR),
5119 [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT),
8061252e
JR
5120 [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG),
5121 [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD),
5122 [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD),
5123 [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR),
5124 [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC),
5125 [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR),
5126 [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC),
5127 [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID),
5128 [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM),
bf608f88
JR
5129 [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE),
5130 [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF),
5131 [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF),
5132 [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT),
5133 [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET),
5134 [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP),
5135 [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT),
f6511935
JR
5136 [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO),
5137 [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO),
5138 [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO),
5139 [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO),
cfec82cb
JR
5140};
5141
8061252e 5142#undef PRE_EX
cfec82cb 5143#undef POST_EX
d7eb8203 5144#undef POST_MEM
cfec82cb 5145
8a76d7f2
JR
5146static int svm_check_intercept(struct kvm_vcpu *vcpu,
5147 struct x86_instruction_info *info,
5148 enum x86_intercept_stage stage)
5149{
cfec82cb
JR
5150 struct vcpu_svm *svm = to_svm(vcpu);
5151 int vmexit, ret = X86EMUL_CONTINUE;
5152 struct __x86_intercept icpt_info;
5153 struct vmcb *vmcb = svm->vmcb;
5154
5155 if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
5156 goto out;
5157
5158 icpt_info = x86_intercept_map[info->intercept];
5159
40e19b51 5160 if (stage != icpt_info.stage)
cfec82cb
JR
5161 goto out;
5162
5163 switch (icpt_info.exit_code) {
5164 case SVM_EXIT_READ_CR0:
5165 if (info->intercept == x86_intercept_cr_read)
5166 icpt_info.exit_code += info->modrm_reg;
5167 break;
5168 case SVM_EXIT_WRITE_CR0: {
5169 unsigned long cr0, val;
5170 u64 intercept;
5171
5172 if (info->intercept == x86_intercept_cr_write)
5173 icpt_info.exit_code += info->modrm_reg;
5174
62baf44c
JK
5175 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 ||
5176 info->intercept == x86_intercept_clts)
cfec82cb
JR
5177 break;
5178
5179 intercept = svm->nested.intercept;
5180
5181 if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
5182 break;
5183
5184 cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
5185 val = info->src_val & ~SVM_CR0_SELECTIVE_MASK;
5186
5187 if (info->intercept == x86_intercept_lmsw) {
5188 cr0 &= 0xfUL;
5189 val &= 0xfUL;
5190 /* lmsw can't clear PE - catch this here */
5191 if (cr0 & X86_CR0_PE)
5192 val |= X86_CR0_PE;
5193 }
5194
5195 if (cr0 ^ val)
5196 icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
5197
5198 break;
5199 }
3b88e41a
JR
5200 case SVM_EXIT_READ_DR0:
5201 case SVM_EXIT_WRITE_DR0:
5202 icpt_info.exit_code += info->modrm_reg;
5203 break;
8061252e
JR
5204 case SVM_EXIT_MSR:
5205 if (info->intercept == x86_intercept_wrmsr)
5206 vmcb->control.exit_info_1 = 1;
5207 else
5208 vmcb->control.exit_info_1 = 0;
5209 break;
bf608f88
JR
5210 case SVM_EXIT_PAUSE:
5211 /*
5212 * We get this for NOP only, but pause
5213 * is rep not, check this here
5214 */
5215 if (info->rep_prefix != REPE_PREFIX)
5216 goto out;
f6511935
JR
5217 case SVM_EXIT_IOIO: {
5218 u64 exit_info;
5219 u32 bytes;
5220
f6511935
JR
5221 if (info->intercept == x86_intercept_in ||
5222 info->intercept == x86_intercept_ins) {
6cbc5f5a
JK
5223 exit_info = ((info->src_val & 0xffff) << 16) |
5224 SVM_IOIO_TYPE_MASK;
f6511935 5225 bytes = info->dst_bytes;
6493f157 5226 } else {
6cbc5f5a 5227 exit_info = (info->dst_val & 0xffff) << 16;
6493f157 5228 bytes = info->src_bytes;
f6511935
JR
5229 }
5230
5231 if (info->intercept == x86_intercept_outs ||
5232 info->intercept == x86_intercept_ins)
5233 exit_info |= SVM_IOIO_STR_MASK;
5234
5235 if (info->rep_prefix)
5236 exit_info |= SVM_IOIO_REP_MASK;
5237
5238 bytes = min(bytes, 4u);
5239
5240 exit_info |= bytes << SVM_IOIO_SIZE_SHIFT;
5241
5242 exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1);
5243
5244 vmcb->control.exit_info_1 = exit_info;
5245 vmcb->control.exit_info_2 = info->next_rip;
5246
5247 break;
5248 }
cfec82cb
JR
5249 default:
5250 break;
5251 }
5252
f104765b
BD
5253 /* TODO: Advertise NRIPS to guest hypervisor unconditionally */
5254 if (static_cpu_has(X86_FEATURE_NRIPS))
5255 vmcb->control.next_rip = info->next_rip;
cfec82cb
JR
5256 vmcb->control.exit_code = icpt_info.exit_code;
5257 vmexit = nested_svm_exit_handled(svm);
5258
5259 ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
5260 : X86EMUL_CONTINUE;
5261
5262out:
5263 return ret;
8a76d7f2
JR
5264}
5265
a547c6db
YZ
5266static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
5267{
5268 local_irq_enable();
f2485b3e
PB
5269 /*
5270 * We must have an instruction with interrupts enabled, so
5271 * the timer interrupt isn't delayed by the interrupt shadow.
5272 */
5273 asm("nop");
5274 local_irq_disable();
a547c6db
YZ
5275}
5276
ae97a3b8
RK
5277static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
5278{
5279}
5280
be8ca170
SS
5281static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
5282{
5283 if (avic_handle_apic_id_update(vcpu) != 0)
5284 return;
5285 if (avic_handle_dfr_update(vcpu) != 0)
5286 return;
5287 avic_handle_ldr_update(vcpu);
5288}
5289
404f6aac 5290static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
6aa8b732
AK
5291 .cpu_has_kvm_support = has_svm,
5292 .disabled_by_bios = is_disabled,
5293 .hardware_setup = svm_hardware_setup,
5294 .hardware_unsetup = svm_hardware_unsetup,
002c7f7c 5295 .check_processor_compatibility = svm_check_processor_compat,
6aa8b732
AK
5296 .hardware_enable = svm_hardware_enable,
5297 .hardware_disable = svm_hardware_disable,
774ead3a 5298 .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
6d396b55 5299 .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase,
6aa8b732
AK
5300
5301 .vcpu_create = svm_create_vcpu,
5302 .vcpu_free = svm_free_vcpu,
04d2cc77 5303 .vcpu_reset = svm_vcpu_reset,
6aa8b732 5304
44a95dae
SS
5305 .vm_init = avic_vm_init,
5306 .vm_destroy = avic_vm_destroy,
5307
04d2cc77 5308 .prepare_guest_switch = svm_prepare_guest_switch,
6aa8b732
AK
5309 .vcpu_load = svm_vcpu_load,
5310 .vcpu_put = svm_vcpu_put,
8221c137
SS
5311 .vcpu_blocking = svm_vcpu_blocking,
5312 .vcpu_unblocking = svm_vcpu_unblocking,
6aa8b732 5313
a96036b8 5314 .update_bp_intercept = update_bp_intercept,
6aa8b732
AK
5315 .get_msr = svm_get_msr,
5316 .set_msr = svm_set_msr,
5317 .get_segment_base = svm_get_segment_base,
5318 .get_segment = svm_get_segment,
5319 .set_segment = svm_set_segment,
2e4d2653 5320 .get_cpl = svm_get_cpl,
1747fb71 5321 .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
e8467fda 5322 .decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
aff48baa 5323 .decache_cr3 = svm_decache_cr3,
25c4c276 5324 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
6aa8b732 5325 .set_cr0 = svm_set_cr0,
6aa8b732
AK
5326 .set_cr3 = svm_set_cr3,
5327 .set_cr4 = svm_set_cr4,
5328 .set_efer = svm_set_efer,
5329 .get_idt = svm_get_idt,
5330 .set_idt = svm_set_idt,
5331 .get_gdt = svm_get_gdt,
5332 .set_gdt = svm_set_gdt,
73aaf249
JK
5333 .get_dr6 = svm_get_dr6,
5334 .set_dr6 = svm_set_dr6,
020df079 5335 .set_dr7 = svm_set_dr7,
facb0139 5336 .sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
6de4f3ad 5337 .cache_reg = svm_cache_reg,
6aa8b732
AK
5338 .get_rflags = svm_get_rflags,
5339 .set_rflags = svm_set_rflags,
be94f6b7
HH
5340
5341 .get_pkru = svm_get_pkru,
5342
0fdd74f7 5343 .fpu_activate = svm_fpu_activate,
02daab21 5344 .fpu_deactivate = svm_fpu_deactivate,
6aa8b732 5345
6aa8b732 5346 .tlb_flush = svm_flush_tlb,
6aa8b732 5347
6aa8b732 5348 .run = svm_vcpu_run,
04d2cc77 5349 .handle_exit = handle_exit,
6aa8b732 5350 .skip_emulated_instruction = skip_emulated_instruction,
2809f5d2
GC
5351 .set_interrupt_shadow = svm_set_interrupt_shadow,
5352 .get_interrupt_shadow = svm_get_interrupt_shadow,
102d8325 5353 .patch_hypercall = svm_patch_hypercall,
2a8067f1 5354 .set_irq = svm_set_irq,
95ba8273 5355 .set_nmi = svm_inject_nmi,
298101da 5356 .queue_exception = svm_queue_exception,
b463a6f7 5357 .cancel_injection = svm_cancel_injection,
78646121 5358 .interrupt_allowed = svm_interrupt_allowed,
95ba8273 5359 .nmi_allowed = svm_nmi_allowed,
3cfc3092
JK
5360 .get_nmi_mask = svm_get_nmi_mask,
5361 .set_nmi_mask = svm_set_nmi_mask,
95ba8273
GN
5362 .enable_nmi_window = enable_nmi_window,
5363 .enable_irq_window = enable_irq_window,
5364 .update_cr8_intercept = update_cr8_intercept,
8d14695f 5365 .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
d62caabb
AS
5366 .get_enable_apicv = svm_get_enable_apicv,
5367 .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
c7c9c56c 5368 .load_eoi_exitmap = svm_load_eoi_exitmap,
44a95dae
SS
5369 .hwapic_irr_update = svm_hwapic_irr_update,
5370 .hwapic_isr_update = svm_hwapic_isr_update,
be8ca170 5371 .apicv_post_state_restore = avic_post_state_restore,
cbc94022
IE
5372
5373 .set_tss_addr = svm_set_tss_addr,
67253af5 5374 .get_tdp_level = get_npt_level,
4b12f0de 5375 .get_mt_mask = svm_get_mt_mask,
229456fc 5376
586f9607 5377 .get_exit_info = svm_get_exit_info,
586f9607 5378
17cc3935 5379 .get_lpage_level = svm_get_lpage_level,
0e851880
SY
5380
5381 .cpuid_update = svm_cpuid_update,
4e47c7a6
SY
5382
5383 .rdtscp_supported = svm_rdtscp_supported,
ad756a16 5384 .invpcid_supported = svm_invpcid_supported,
93c4adc7 5385 .mpx_supported = svm_mpx_supported,
55412b2e 5386 .xsaves_supported = svm_xsaves_supported,
d4330ef2
JR
5387
5388 .set_supported_cpuid = svm_set_supported_cpuid,
f5f48ee1
SY
5389
5390 .has_wbinvd_exit = svm_has_wbinvd_exit,
99e3e30a
ZA
5391
5392 .write_tsc_offset = svm_write_tsc_offset,
1c97f0a0
JR
5393
5394 .set_tdp_cr3 = set_tdp_cr3,
8a76d7f2
JR
5395
5396 .check_intercept = svm_check_intercept,
a547c6db 5397 .handle_external_intr = svm_handle_external_intr,
ae97a3b8
RK
5398
5399 .sched_in = svm_sched_in,
25462f7f
WH
5400
5401 .pmu_ops = &amd_pmu_ops,
340d3bc3 5402 .deliver_posted_interrupt = svm_deliver_avic_intr,
411b44ba 5403 .update_pi_irte = svm_update_pi_irte,
6aa8b732
AK
5404};
5405
5406static int __init svm_init(void)
5407{
cb498ea2 5408 return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
0ee75bea 5409 __alignof__(struct vcpu_svm), THIS_MODULE);
6aa8b732
AK
5410}
5411
5412static void __exit svm_exit(void)
5413{
cb498ea2 5414 kvm_exit();
6aa8b732
AK
5415}
5416
5417module_init(svm_init)
5418module_exit(svm_exit)