]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/x86/kvm/svm.c
KVM: x86: Collect information for setting TSC scaling ratio
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kvm / svm.c
CommitLineData
6aa8b732
AK
1/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * AMD SVM support
5 *
6 * Copyright (C) 2006 Qumranet, Inc.
9611c187 7 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
6aa8b732
AK
8 *
9 * Authors:
10 * Yaniv Kamay <yaniv@qumranet.com>
11 * Avi Kivity <avi@qumranet.com>
12 *
13 * This work is licensed under the terms of the GNU GPL, version 2. See
14 * the COPYING file in the top-level directory.
15 *
16 */
edf88417
AK
17#include <linux/kvm_host.h>
18
85f455f7 19#include "irq.h"
1d737c8a 20#include "mmu.h"
5fdbf976 21#include "kvm_cache_regs.h"
fe4c7b19 22#include "x86.h"
66f7b72e 23#include "cpuid.h"
25462f7f 24#include "pmu.h"
e495606d 25
6aa8b732 26#include <linux/module.h>
ae759544 27#include <linux/mod_devicetable.h>
9d8f549d 28#include <linux/kernel.h>
6aa8b732
AK
29#include <linux/vmalloc.h>
30#include <linux/highmem.h>
e8edc6e0 31#include <linux/sched.h>
af658dca 32#include <linux/trace_events.h>
5a0e3ad6 33#include <linux/slab.h>
6aa8b732 34
1018faa6 35#include <asm/perf_event.h>
67ec6607 36#include <asm/tlbflush.h>
e495606d 37#include <asm/desc.h>
facb0139 38#include <asm/debugreg.h>
631bc487 39#include <asm/kvm_para.h>
6aa8b732 40
63d1142f 41#include <asm/virtext.h>
229456fc 42#include "trace.h"
63d1142f 43
4ecac3fd
AK
44#define __ex(x) __kvm_handle_fault_on_reboot(x)
45
6aa8b732
AK
46MODULE_AUTHOR("Qumranet");
47MODULE_LICENSE("GPL");
48
ae759544
JT
49static const struct x86_cpu_id svm_cpu_id[] = {
50 X86_FEATURE_MATCH(X86_FEATURE_SVM),
51 {}
52};
53MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
54
6aa8b732
AK
55#define IOPM_ALLOC_ORDER 2
56#define MSRPM_ALLOC_ORDER 1
57
6aa8b732
AK
58#define SEG_TYPE_LDT 2
59#define SEG_TYPE_BUSY_TSS16 3
60
6bc31bdc
AP
61#define SVM_FEATURE_NPT (1 << 0)
62#define SVM_FEATURE_LBRV (1 << 1)
63#define SVM_FEATURE_SVML (1 << 2)
64#define SVM_FEATURE_NRIP (1 << 3)
ddce97aa
AP
65#define SVM_FEATURE_TSC_RATE (1 << 4)
66#define SVM_FEATURE_VMCB_CLEAN (1 << 5)
67#define SVM_FEATURE_FLUSH_ASID (1 << 6)
68#define SVM_FEATURE_DECODE_ASSIST (1 << 7)
6bc31bdc 69#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
80b7706e 70
410e4d57
JR
71#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
72#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
73#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */
74
24e09cbf
JR
75#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
76
fbc0db76 77#define TSC_RATIO_RSVD 0xffffff0000000000ULL
92a1f12d
JR
78#define TSC_RATIO_MIN 0x0000000000000001ULL
79#define TSC_RATIO_MAX 0x000000ffffffffffULL
fbc0db76 80
67ec6607
JR
81static bool erratum_383_found __read_mostly;
82
6c8166a7
AK
83static const u32 host_save_user_msrs[] = {
84#ifdef CONFIG_X86_64
85 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
86 MSR_FS_BASE,
87#endif
88 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
89};
90
91#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
92
93struct kvm_vcpu;
94
e6aa9abd
JR
95struct nested_state {
96 struct vmcb *hsave;
97 u64 hsave_msr;
4a810181 98 u64 vm_cr_msr;
e6aa9abd
JR
99 u64 vmcb;
100
101 /* These are the merged vectors */
102 u32 *msrpm;
103
104 /* gpa pointers to the real vectors */
105 u64 vmcb_msrpm;
ce2ac085 106 u64 vmcb_iopm;
aad42c64 107
cd3ff653
JR
108 /* A VMEXIT is required but not yet emulated */
109 bool exit_required;
110
aad42c64 111 /* cache for intercepts of the guest */
4ee546b4 112 u32 intercept_cr;
3aed041a 113 u32 intercept_dr;
aad42c64
JR
114 u32 intercept_exceptions;
115 u64 intercept;
116
5bd2edc3
JR
117 /* Nested Paging related state */
118 u64 nested_cr3;
e6aa9abd
JR
119};
120
323c3d80
JR
121#define MSRPM_OFFSETS 16
122static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
123
2b036c6b
BO
124/*
125 * Set osvw_len to higher value when updated Revision Guides
126 * are published and we know what the new status bits are
127 */
128static uint64_t osvw_len = 4, osvw_status;
129
6c8166a7
AK
130struct vcpu_svm {
131 struct kvm_vcpu vcpu;
132 struct vmcb *vmcb;
133 unsigned long vmcb_pa;
134 struct svm_cpu_data *svm_data;
135 uint64_t asid_generation;
136 uint64_t sysenter_esp;
137 uint64_t sysenter_eip;
138
139 u64 next_rip;
140
141 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
afe9e66f 142 struct {
dacccfdd
AK
143 u16 fs;
144 u16 gs;
145 u16 ldt;
afe9e66f
AK
146 u64 gs_base;
147 } host;
6c8166a7
AK
148
149 u32 *msrpm;
6c8166a7 150
bd3d1ec3
AK
151 ulong nmi_iret_rip;
152
e6aa9abd 153 struct nested_state nested;
6be7d306
JK
154
155 bool nmi_singlestep;
66b7138f
JK
156
157 unsigned int3_injected;
158 unsigned long int3_rip;
631bc487 159 u32 apf_reason;
fbc0db76
JR
160
161 u64 tsc_ratio;
6092d3d3
JR
162
163 /* cached guest cpuid flags for faster access */
164 bool nrips_enabled : 1;
6c8166a7
AK
165};
166
fbc0db76
JR
167static DEFINE_PER_CPU(u64, current_tsc_ratio);
168#define TSC_RATIO_DEFAULT 0x0100000000ULL
169
455716fa
JR
170#define MSR_INVALID 0xffffffffU
171
09941fbb 172static const struct svm_direct_access_msrs {
ac72a9b7
JR
173 u32 index; /* Index of the MSR */
174 bool always; /* True if intercept is always on */
175} direct_access_msrs[] = {
8c06585d 176 { .index = MSR_STAR, .always = true },
ac72a9b7
JR
177 { .index = MSR_IA32_SYSENTER_CS, .always = true },
178#ifdef CONFIG_X86_64
179 { .index = MSR_GS_BASE, .always = true },
180 { .index = MSR_FS_BASE, .always = true },
181 { .index = MSR_KERNEL_GS_BASE, .always = true },
182 { .index = MSR_LSTAR, .always = true },
183 { .index = MSR_CSTAR, .always = true },
184 { .index = MSR_SYSCALL_MASK, .always = true },
185#endif
186 { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
187 { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
188 { .index = MSR_IA32_LASTINTFROMIP, .always = false },
189 { .index = MSR_IA32_LASTINTTOIP, .always = false },
190 { .index = MSR_INVALID, .always = false },
6c8166a7
AK
191};
192
709ddebf
JR
193/* enable NPT for AMD64 and X86 with PAE */
194#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
195static bool npt_enabled = true;
196#else
e0231715 197static bool npt_enabled;
709ddebf 198#endif
6c7dac72 199
e2358851
DB
200/* allow nested paging (virtualized MMU) for all guests */
201static int npt = true;
6c7dac72 202module_param(npt, int, S_IRUGO);
e3da3acd 203
e2358851
DB
204/* allow nested virtualization in KVM/SVM */
205static int nested = true;
236de055
AG
206module_param(nested, int, S_IRUGO);
207
79a8059d 208static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
44874f84 209static void svm_flush_tlb(struct kvm_vcpu *vcpu);
a5c3832d 210static void svm_complete_interrupts(struct vcpu_svm *svm);
04d2cc77 211
410e4d57 212static int nested_svm_exit_handled(struct vcpu_svm *svm);
b8e88bc8 213static int nested_svm_intercept(struct vcpu_svm *svm);
cf74a78b 214static int nested_svm_vmexit(struct vcpu_svm *svm);
cf74a78b
AG
215static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
216 bool has_error_code, u32 error_code);
92a1f12d 217static u64 __scale_tsc(u64 ratio, u64 tsc);
cf74a78b 218
8d28fec4 219enum {
116a0a23
JR
220 VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
221 pause filter count */
f56838e4 222 VMCB_PERM_MAP, /* IOPM Base and MSRPM Base */
d48086d1 223 VMCB_ASID, /* ASID */
decdbf6a 224 VMCB_INTR, /* int_ctl, int_vector */
b2747166 225 VMCB_NPT, /* npt_en, nCR3, gPAT */
dcca1a65 226 VMCB_CR, /* CR0, CR3, CR4, EFER */
72214b96 227 VMCB_DR, /* DR6, DR7 */
17a703cb 228 VMCB_DT, /* GDT, IDT */
060d0c9a 229 VMCB_SEG, /* CS, DS, SS, ES, CPL */
0574dec0 230 VMCB_CR2, /* CR2 only */
b53ba3f9 231 VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
8d28fec4
RJ
232 VMCB_DIRTY_MAX,
233};
234
0574dec0
JR
235/* TPR and CR2 are always written before VMRUN */
236#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
8d28fec4
RJ
237
238static inline void mark_all_dirty(struct vmcb *vmcb)
239{
240 vmcb->control.clean = 0;
241}
242
243static inline void mark_all_clean(struct vmcb *vmcb)
244{
245 vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
246 & ~VMCB_ALWAYS_DIRTY_MASK;
247}
248
249static inline void mark_dirty(struct vmcb *vmcb, int bit)
250{
251 vmcb->control.clean &= ~(1 << bit);
252}
253
a2fa3e9f
GH
254static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
255{
fb3f0f51 256 return container_of(vcpu, struct vcpu_svm, vcpu);
a2fa3e9f
GH
257}
258
384c6368
JR
259static void recalc_intercepts(struct vcpu_svm *svm)
260{
261 struct vmcb_control_area *c, *h;
262 struct nested_state *g;
263
116a0a23
JR
264 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
265
384c6368
JR
266 if (!is_guest_mode(&svm->vcpu))
267 return;
268
269 c = &svm->vmcb->control;
270 h = &svm->nested.hsave->control;
271 g = &svm->nested;
272
4ee546b4 273 c->intercept_cr = h->intercept_cr | g->intercept_cr;
3aed041a 274 c->intercept_dr = h->intercept_dr | g->intercept_dr;
384c6368
JR
275 c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
276 c->intercept = h->intercept | g->intercept;
277}
278
4ee546b4
RJ
279static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
280{
281 if (is_guest_mode(&svm->vcpu))
282 return svm->nested.hsave;
283 else
284 return svm->vmcb;
285}
286
287static inline void set_cr_intercept(struct vcpu_svm *svm, int bit)
288{
289 struct vmcb *vmcb = get_host_vmcb(svm);
290
291 vmcb->control.intercept_cr |= (1U << bit);
292
293 recalc_intercepts(svm);
294}
295
296static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit)
297{
298 struct vmcb *vmcb = get_host_vmcb(svm);
299
300 vmcb->control.intercept_cr &= ~(1U << bit);
301
302 recalc_intercepts(svm);
303}
304
305static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
306{
307 struct vmcb *vmcb = get_host_vmcb(svm);
308
309 return vmcb->control.intercept_cr & (1U << bit);
310}
311
5315c716 312static inline void set_dr_intercepts(struct vcpu_svm *svm)
3aed041a
JR
313{
314 struct vmcb *vmcb = get_host_vmcb(svm);
315
5315c716
PB
316 vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
317 | (1 << INTERCEPT_DR1_READ)
318 | (1 << INTERCEPT_DR2_READ)
319 | (1 << INTERCEPT_DR3_READ)
320 | (1 << INTERCEPT_DR4_READ)
321 | (1 << INTERCEPT_DR5_READ)
322 | (1 << INTERCEPT_DR6_READ)
323 | (1 << INTERCEPT_DR7_READ)
324 | (1 << INTERCEPT_DR0_WRITE)
325 | (1 << INTERCEPT_DR1_WRITE)
326 | (1 << INTERCEPT_DR2_WRITE)
327 | (1 << INTERCEPT_DR3_WRITE)
328 | (1 << INTERCEPT_DR4_WRITE)
329 | (1 << INTERCEPT_DR5_WRITE)
330 | (1 << INTERCEPT_DR6_WRITE)
331 | (1 << INTERCEPT_DR7_WRITE);
3aed041a
JR
332
333 recalc_intercepts(svm);
334}
335
5315c716 336static inline void clr_dr_intercepts(struct vcpu_svm *svm)
3aed041a
JR
337{
338 struct vmcb *vmcb = get_host_vmcb(svm);
339
5315c716 340 vmcb->control.intercept_dr = 0;
3aed041a
JR
341
342 recalc_intercepts(svm);
343}
344
18c918c5
JR
345static inline void set_exception_intercept(struct vcpu_svm *svm, int bit)
346{
347 struct vmcb *vmcb = get_host_vmcb(svm);
348
349 vmcb->control.intercept_exceptions |= (1U << bit);
350
351 recalc_intercepts(svm);
352}
353
354static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
355{
356 struct vmcb *vmcb = get_host_vmcb(svm);
357
358 vmcb->control.intercept_exceptions &= ~(1U << bit);
359
360 recalc_intercepts(svm);
361}
362
8a05a1b8
JR
363static inline void set_intercept(struct vcpu_svm *svm, int bit)
364{
365 struct vmcb *vmcb = get_host_vmcb(svm);
366
367 vmcb->control.intercept |= (1ULL << bit);
368
369 recalc_intercepts(svm);
370}
371
372static inline void clr_intercept(struct vcpu_svm *svm, int bit)
373{
374 struct vmcb *vmcb = get_host_vmcb(svm);
375
376 vmcb->control.intercept &= ~(1ULL << bit);
377
378 recalc_intercepts(svm);
379}
380
2af9194d
JR
381static inline void enable_gif(struct vcpu_svm *svm)
382{
383 svm->vcpu.arch.hflags |= HF_GIF_MASK;
384}
385
386static inline void disable_gif(struct vcpu_svm *svm)
387{
388 svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
389}
390
391static inline bool gif_set(struct vcpu_svm *svm)
392{
393 return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
394}
395
4866d5e3 396static unsigned long iopm_base;
6aa8b732
AK
397
398struct kvm_ldttss_desc {
399 u16 limit0;
400 u16 base0;
e0231715
JR
401 unsigned base1:8, type:5, dpl:2, p:1;
402 unsigned limit1:4, zero0:3, g:1, base2:8;
6aa8b732
AK
403 u32 base3;
404 u32 zero1;
405} __attribute__((packed));
406
407struct svm_cpu_data {
408 int cpu;
409
5008fdf5
AK
410 u64 asid_generation;
411 u32 max_asid;
412 u32 next_asid;
6aa8b732
AK
413 struct kvm_ldttss_desc *tss_desc;
414
415 struct page *save_area;
416};
417
418static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
419
420struct svm_init_data {
421 int cpu;
422 int r;
423};
424
09941fbb 425static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
6aa8b732 426
9d8f549d 427#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
6aa8b732
AK
428#define MSRS_RANGE_SIZE 2048
429#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
430
455716fa
JR
431static u32 svm_msrpm_offset(u32 msr)
432{
433 u32 offset;
434 int i;
435
436 for (i = 0; i < NUM_MSR_MAPS; i++) {
437 if (msr < msrpm_ranges[i] ||
438 msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
439 continue;
440
441 offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */
442 offset += (i * MSRS_RANGE_SIZE); /* add range offset */
443
444 /* Now we have the u8 offset - but need the u32 offset */
445 return offset / 4;
446 }
447
448 /* MSR not in any range */
449 return MSR_INVALID;
450}
451
6aa8b732
AK
452#define MAX_INST_SIZE 15
453
6aa8b732
AK
454static inline void clgi(void)
455{
4ecac3fd 456 asm volatile (__ex(SVM_CLGI));
6aa8b732
AK
457}
458
459static inline void stgi(void)
460{
4ecac3fd 461 asm volatile (__ex(SVM_STGI));
6aa8b732
AK
462}
463
464static inline void invlpga(unsigned long addr, u32 asid)
465{
e0231715 466 asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
6aa8b732
AK
467}
468
4b16184c
JR
469static int get_npt_level(void)
470{
471#ifdef CONFIG_X86_64
472 return PT64_ROOT_LEVEL;
473#else
474 return PT32E_ROOT_LEVEL;
475#endif
476}
477
6aa8b732
AK
478static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
479{
6dc696d4 480 vcpu->arch.efer = efer;
709ddebf 481 if (!npt_enabled && !(efer & EFER_LMA))
2b5203ee 482 efer &= ~EFER_LME;
6aa8b732 483
9962d032 484 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
dcca1a65 485 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
6aa8b732
AK
486}
487
6aa8b732
AK
488static int is_external_interrupt(u32 info)
489{
490 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
491 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
492}
493
37ccdcbe 494static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
2809f5d2
GC
495{
496 struct vcpu_svm *svm = to_svm(vcpu);
497 u32 ret = 0;
498
499 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
37ccdcbe
PB
500 ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
501 return ret;
2809f5d2
GC
502}
503
504static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
505{
506 struct vcpu_svm *svm = to_svm(vcpu);
507
508 if (mask == 0)
509 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
510 else
511 svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
512
513}
514
6aa8b732
AK
515static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
516{
a2fa3e9f
GH
517 struct vcpu_svm *svm = to_svm(vcpu);
518
f104765b 519 if (svm->vmcb->control.next_rip != 0) {
d2922422 520 WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
6bc31bdc 521 svm->next_rip = svm->vmcb->control.next_rip;
f104765b 522 }
6bc31bdc 523
a2fa3e9f 524 if (!svm->next_rip) {
51d8b661 525 if (emulate_instruction(vcpu, EMULTYPE_SKIP) !=
f629cf84
GN
526 EMULATE_DONE)
527 printk(KERN_DEBUG "%s: NOP\n", __func__);
6aa8b732
AK
528 return;
529 }
5fdbf976
MT
530 if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
531 printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
532 __func__, kvm_rip_read(vcpu), svm->next_rip);
6aa8b732 533
5fdbf976 534 kvm_rip_write(vcpu, svm->next_rip);
2809f5d2 535 svm_set_interrupt_shadow(vcpu, 0);
6aa8b732
AK
536}
537
116a4752 538static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
ce7ddec4
JR
539 bool has_error_code, u32 error_code,
540 bool reinject)
116a4752
JK
541{
542 struct vcpu_svm *svm = to_svm(vcpu);
543
e0231715
JR
544 /*
545 * If we are within a nested VM we'd better #VMEXIT and let the guest
546 * handle the exception
547 */
ce7ddec4
JR
548 if (!reinject &&
549 nested_svm_check_exception(svm, nr, has_error_code, error_code))
116a4752
JK
550 return;
551
2a6b20b8 552 if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
66b7138f
JK
553 unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
554
555 /*
556 * For guest debugging where we have to reinject #BP if some
557 * INT3 is guest-owned:
558 * Emulate nRIP by moving RIP forward. Will fail if injection
559 * raises a fault that is not intercepted. Still better than
560 * failing in all cases.
561 */
562 skip_emulated_instruction(&svm->vcpu);
563 rip = kvm_rip_read(&svm->vcpu);
564 svm->int3_rip = rip + svm->vmcb->save.cs.base;
565 svm->int3_injected = rip - old_rip;
566 }
567
116a4752
JK
568 svm->vmcb->control.event_inj = nr
569 | SVM_EVTINJ_VALID
570 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
571 | SVM_EVTINJ_TYPE_EXEPT;
572 svm->vmcb->control.event_inj_err = error_code;
573}
574
67ec6607
JR
575static void svm_init_erratum_383(void)
576{
577 u32 low, high;
578 int err;
579 u64 val;
580
e6ee94d5 581 if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH))
67ec6607
JR
582 return;
583
584 /* Use _safe variants to not break nested virtualization */
585 val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
586 if (err)
587 return;
588
589 val |= (1ULL << 47);
590
591 low = lower_32_bits(val);
592 high = upper_32_bits(val);
593
594 native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
595
596 erratum_383_found = true;
597}
598
2b036c6b
BO
599static void svm_init_osvw(struct kvm_vcpu *vcpu)
600{
601 /*
602 * Guests should see errata 400 and 415 as fixed (assuming that
603 * HLT and IO instructions are intercepted).
604 */
605 vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
606 vcpu->arch.osvw.status = osvw_status & ~(6ULL);
607
608 /*
609 * By increasing VCPU's osvw.length to 3 we are telling the guest that
610 * all osvw.status bits inside that length, including bit 0 (which is
611 * reserved for erratum 298), are valid. However, if host processor's
612 * osvw_len is 0 then osvw_status[0] carries no information. We need to
613 * be conservative here and therefore we tell the guest that erratum 298
614 * is present (because we really don't know).
615 */
616 if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
617 vcpu->arch.osvw.status |= 1;
618}
619
6aa8b732
AK
620static int has_svm(void)
621{
63d1142f 622 const char *msg;
6aa8b732 623
63d1142f 624 if (!cpu_has_svm(&msg)) {
ff81ff10 625 printk(KERN_INFO "has_svm: %s\n", msg);
6aa8b732
AK
626 return 0;
627 }
628
6aa8b732
AK
629 return 1;
630}
631
13a34e06 632static void svm_hardware_disable(void)
6aa8b732 633{
fbc0db76
JR
634 /* Make sure we clean up behind us */
635 if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
636 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
637
2c8dceeb 638 cpu_svm_disable();
1018faa6
JR
639
640 amd_pmu_disable_virt();
6aa8b732
AK
641}
642
13a34e06 643static int svm_hardware_enable(void)
6aa8b732
AK
644{
645
0fe1e009 646 struct svm_cpu_data *sd;
6aa8b732 647 uint64_t efer;
89a27f4d 648 struct desc_ptr gdt_descr;
6aa8b732
AK
649 struct desc_struct *gdt;
650 int me = raw_smp_processor_id();
651
10474ae8
AG
652 rdmsrl(MSR_EFER, efer);
653 if (efer & EFER_SVME)
654 return -EBUSY;
655
6aa8b732 656 if (!has_svm()) {
1f5b77f5 657 pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
10474ae8 658 return -EINVAL;
6aa8b732 659 }
0fe1e009 660 sd = per_cpu(svm_data, me);
0fe1e009 661 if (!sd) {
1f5b77f5 662 pr_err("%s: svm_data is NULL on %d\n", __func__, me);
10474ae8 663 return -EINVAL;
6aa8b732
AK
664 }
665
0fe1e009
TH
666 sd->asid_generation = 1;
667 sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
668 sd->next_asid = sd->max_asid + 1;
6aa8b732 669
d6ab1ed4 670 native_store_gdt(&gdt_descr);
89a27f4d 671 gdt = (struct desc_struct *)gdt_descr.address;
0fe1e009 672 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
6aa8b732 673
9962d032 674 wrmsrl(MSR_EFER, efer | EFER_SVME);
6aa8b732 675
d0316554 676 wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
10474ae8 677
fbc0db76
JR
678 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
679 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
89cbc767 680 __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT);
fbc0db76
JR
681 }
682
2b036c6b
BO
683
684 /*
685 * Get OSVW bits.
686 *
687 * Note that it is possible to have a system with mixed processor
688 * revisions and therefore different OSVW bits. If bits are not the same
689 * on different processors then choose the worst case (i.e. if erratum
690 * is present on one processor and not on another then assume that the
691 * erratum is present everywhere).
692 */
693 if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
694 uint64_t len, status = 0;
695 int err;
696
697 len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
698 if (!err)
699 status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
700 &err);
701
702 if (err)
703 osvw_status = osvw_len = 0;
704 else {
705 if (len < osvw_len)
706 osvw_len = len;
707 osvw_status |= status;
708 osvw_status &= (1ULL << osvw_len) - 1;
709 }
710 } else
711 osvw_status = osvw_len = 0;
712
67ec6607
JR
713 svm_init_erratum_383();
714
1018faa6
JR
715 amd_pmu_enable_virt();
716
10474ae8 717 return 0;
6aa8b732
AK
718}
719
0da1db75
JR
720static void svm_cpu_uninit(int cpu)
721{
0fe1e009 722 struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
0da1db75 723
0fe1e009 724 if (!sd)
0da1db75
JR
725 return;
726
727 per_cpu(svm_data, raw_smp_processor_id()) = NULL;
0fe1e009
TH
728 __free_page(sd->save_area);
729 kfree(sd);
0da1db75
JR
730}
731
6aa8b732
AK
732static int svm_cpu_init(int cpu)
733{
0fe1e009 734 struct svm_cpu_data *sd;
6aa8b732
AK
735 int r;
736
0fe1e009
TH
737 sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
738 if (!sd)
6aa8b732 739 return -ENOMEM;
0fe1e009
TH
740 sd->cpu = cpu;
741 sd->save_area = alloc_page(GFP_KERNEL);
6aa8b732 742 r = -ENOMEM;
0fe1e009 743 if (!sd->save_area)
6aa8b732
AK
744 goto err_1;
745
0fe1e009 746 per_cpu(svm_data, cpu) = sd;
6aa8b732
AK
747
748 return 0;
749
750err_1:
0fe1e009 751 kfree(sd);
6aa8b732
AK
752 return r;
753
754}
755
ac72a9b7
JR
756static bool valid_msr_intercept(u32 index)
757{
758 int i;
759
760 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
761 if (direct_access_msrs[i].index == index)
762 return true;
763
764 return false;
765}
766
bfc733a7
RR
767static void set_msr_interception(u32 *msrpm, unsigned msr,
768 int read, int write)
6aa8b732 769{
455716fa
JR
770 u8 bit_read, bit_write;
771 unsigned long tmp;
772 u32 offset;
6aa8b732 773
ac72a9b7
JR
774 /*
775 * If this warning triggers extend the direct_access_msrs list at the
776 * beginning of the file
777 */
778 WARN_ON(!valid_msr_intercept(msr));
779
455716fa
JR
780 offset = svm_msrpm_offset(msr);
781 bit_read = 2 * (msr & 0x0f);
782 bit_write = 2 * (msr & 0x0f) + 1;
783 tmp = msrpm[offset];
784
785 BUG_ON(offset == MSR_INVALID);
786
787 read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp);
788 write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
789
790 msrpm[offset] = tmp;
6aa8b732
AK
791}
792
f65c229c 793static void svm_vcpu_init_msrpm(u32 *msrpm)
6aa8b732
AK
794{
795 int i;
796
f65c229c
JR
797 memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
798
ac72a9b7
JR
799 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
800 if (!direct_access_msrs[i].always)
801 continue;
802
803 set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
804 }
f65c229c
JR
805}
806
323c3d80
JR
807static void add_msr_offset(u32 offset)
808{
809 int i;
810
811 for (i = 0; i < MSRPM_OFFSETS; ++i) {
812
813 /* Offset already in list? */
814 if (msrpm_offsets[i] == offset)
bfc733a7 815 return;
323c3d80
JR
816
817 /* Slot used by another offset? */
818 if (msrpm_offsets[i] != MSR_INVALID)
819 continue;
820
821 /* Add offset to list */
822 msrpm_offsets[i] = offset;
823
824 return;
6aa8b732 825 }
323c3d80
JR
826
827 /*
828 * If this BUG triggers the msrpm_offsets table has an overflow. Just
829 * increase MSRPM_OFFSETS in this case.
830 */
bfc733a7 831 BUG();
6aa8b732
AK
832}
833
323c3d80 834static void init_msrpm_offsets(void)
f65c229c 835{
323c3d80 836 int i;
f65c229c 837
323c3d80
JR
838 memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
839
840 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
841 u32 offset;
842
843 offset = svm_msrpm_offset(direct_access_msrs[i].index);
844 BUG_ON(offset == MSR_INVALID);
845
846 add_msr_offset(offset);
847 }
f65c229c
JR
848}
849
24e09cbf
JR
850static void svm_enable_lbrv(struct vcpu_svm *svm)
851{
852 u32 *msrpm = svm->msrpm;
853
854 svm->vmcb->control.lbr_ctl = 1;
855 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
856 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
857 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
858 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
859}
860
861static void svm_disable_lbrv(struct vcpu_svm *svm)
862{
863 u32 *msrpm = svm->msrpm;
864
865 svm->vmcb->control.lbr_ctl = 0;
866 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
867 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
868 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
869 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
870}
871
6aa8b732
AK
872static __init int svm_hardware_setup(void)
873{
874 int cpu;
875 struct page *iopm_pages;
f65c229c 876 void *iopm_va;
6aa8b732
AK
877 int r;
878
6aa8b732
AK
879 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
880
881 if (!iopm_pages)
882 return -ENOMEM;
c8681339
AL
883
884 iopm_va = page_address(iopm_pages);
885 memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
6aa8b732
AK
886 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
887
323c3d80
JR
888 init_msrpm_offsets();
889
50a37eb4
JR
890 if (boot_cpu_has(X86_FEATURE_NX))
891 kvm_enable_efer_bits(EFER_NX);
892
1b2fd70c
AG
893 if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
894 kvm_enable_efer_bits(EFER_FFXSR);
895
92a1f12d
JR
896 if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
897 u64 max;
898
899 kvm_has_tsc_control = true;
900
901 /*
902 * Make sure the user can only configure tsc_khz values that
903 * fit into a signed integer.
904 * A min value is not calculated needed because it will always
905 * be 1 on all machines and a value of 0 is used to disable
906 * tsc-scaling for the vcpu.
907 */
908 max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX));
909
910 kvm_max_guest_tsc_khz = max;
bc9b961b
HZ
911
912 kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
913 kvm_tsc_scaling_ratio_frac_bits = 32;
92a1f12d
JR
914 }
915
236de055
AG
916 if (nested) {
917 printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
eec4b140 918 kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
236de055
AG
919 }
920
3230bb47 921 for_each_possible_cpu(cpu) {
6aa8b732
AK
922 r = svm_cpu_init(cpu);
923 if (r)
f65c229c 924 goto err;
6aa8b732 925 }
33bd6a0b 926
2a6b20b8 927 if (!boot_cpu_has(X86_FEATURE_NPT))
e3da3acd
JR
928 npt_enabled = false;
929
6c7dac72
JR
930 if (npt_enabled && !npt) {
931 printk(KERN_INFO "kvm: Nested Paging disabled\n");
932 npt_enabled = false;
933 }
934
18552672 935 if (npt_enabled) {
e3da3acd 936 printk(KERN_INFO "kvm: Nested Paging enabled\n");
18552672 937 kvm_enable_tdp();
5f4cb662
JR
938 } else
939 kvm_disable_tdp();
e3da3acd 940
6aa8b732
AK
941 return 0;
942
f65c229c 943err:
6aa8b732
AK
944 __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
945 iopm_base = 0;
946 return r;
947}
948
949static __exit void svm_hardware_unsetup(void)
950{
0da1db75
JR
951 int cpu;
952
3230bb47 953 for_each_possible_cpu(cpu)
0da1db75
JR
954 svm_cpu_uninit(cpu);
955
6aa8b732 956 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
f65c229c 957 iopm_base = 0;
6aa8b732
AK
958}
959
960static void init_seg(struct vmcb_seg *seg)
961{
962 seg->selector = 0;
963 seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
e0231715 964 SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
6aa8b732
AK
965 seg->limit = 0xffff;
966 seg->base = 0;
967}
968
969static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
970{
971 seg->selector = 0;
972 seg->attrib = SVM_SELECTOR_P_MASK | type;
973 seg->limit = 0xffff;
974 seg->base = 0;
975}
976
fbc0db76
JR
977static u64 __scale_tsc(u64 ratio, u64 tsc)
978{
979 u64 mult, frac, _tsc;
980
981 mult = ratio >> 32;
982 frac = ratio & ((1ULL << 32) - 1);
983
984 _tsc = tsc;
985 _tsc *= mult;
986 _tsc += (tsc >> 32) * frac;
987 _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32;
988
989 return _tsc;
990}
991
992static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
993{
994 struct vcpu_svm *svm = to_svm(vcpu);
995 u64 _tsc = tsc;
996
997 if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
998 _tsc = __scale_tsc(svm->tsc_ratio, tsc);
999
1000 return _tsc;
1001}
1002
cc578287 1003static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
4051b188
JR
1004{
1005 struct vcpu_svm *svm = to_svm(vcpu);
1006 u64 ratio;
1007 u64 khz;
1008
cc578287
ZA
1009 /* Guest TSC same frequency as host TSC? */
1010 if (!scale) {
1011 svm->tsc_ratio = TSC_RATIO_DEFAULT;
4051b188 1012 return;
cc578287 1013 }
4051b188 1014
cc578287
ZA
1015 /* TSC scaling supported? */
1016 if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
1017 if (user_tsc_khz > tsc_khz) {
1018 vcpu->arch.tsc_catchup = 1;
1019 vcpu->arch.tsc_always_catchup = 1;
1020 } else
1021 WARN(1, "user requested TSC rate below hardware speed\n");
4051b188
JR
1022 return;
1023 }
1024
1025 khz = user_tsc_khz;
1026
1027 /* TSC scaling required - calculate ratio */
1028 ratio = khz << 32;
1029 do_div(ratio, tsc_khz);
1030
1031 if (ratio == 0 || ratio & TSC_RATIO_RSVD) {
1032 WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n",
1033 user_tsc_khz);
1034 return;
1035 }
4051b188
JR
1036 svm->tsc_ratio = ratio;
1037}
1038
ba904635
WA
1039static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu)
1040{
1041 struct vcpu_svm *svm = to_svm(vcpu);
1042
1043 return svm->vmcb->control.tsc_offset;
1044}
1045
f4e1b3c8
ZA
1046static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1047{
1048 struct vcpu_svm *svm = to_svm(vcpu);
1049 u64 g_tsc_offset = 0;
1050
2030753d 1051 if (is_guest_mode(vcpu)) {
f4e1b3c8
ZA
1052 g_tsc_offset = svm->vmcb->control.tsc_offset -
1053 svm->nested.hsave->control.tsc_offset;
1054 svm->nested.hsave->control.tsc_offset = offset;
489223ed
YY
1055 } else
1056 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
1057 svm->vmcb->control.tsc_offset,
1058 offset);
f4e1b3c8
ZA
1059
1060 svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
116a0a23
JR
1061
1062 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
f4e1b3c8
ZA
1063}
1064
f1e2b260 1065static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
e48672fa
ZA
1066{
1067 struct vcpu_svm *svm = to_svm(vcpu);
1068
d913b904
CA
1069 if (host) {
1070 if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
1071 WARN_ON(adjustment < 0);
1072 adjustment = svm_scale_tsc(vcpu, (u64)adjustment);
1073 }
f1e2b260 1074
e48672fa 1075 svm->vmcb->control.tsc_offset += adjustment;
2030753d 1076 if (is_guest_mode(vcpu))
e48672fa 1077 svm->nested.hsave->control.tsc_offset += adjustment;
489223ed
YY
1078 else
1079 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
1080 svm->vmcb->control.tsc_offset - adjustment,
1081 svm->vmcb->control.tsc_offset);
1082
116a0a23 1083 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
e48672fa
ZA
1084}
1085
857e4099
JR
1086static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
1087{
1088 u64 tsc;
1089
4ea1636b 1090 tsc = svm_scale_tsc(vcpu, rdtsc());
857e4099
JR
1091
1092 return target_tsc - tsc;
1093}
1094
5690891b 1095static void init_vmcb(struct vcpu_svm *svm)
6aa8b732 1096{
e6101a96
JR
1097 struct vmcb_control_area *control = &svm->vmcb->control;
1098 struct vmcb_save_area *save = &svm->vmcb->save;
6aa8b732 1099
bff78274 1100 svm->vcpu.fpu_active = 1;
4ee546b4 1101 svm->vcpu.arch.hflags = 0;
bff78274 1102
4ee546b4
RJ
1103 set_cr_intercept(svm, INTERCEPT_CR0_READ);
1104 set_cr_intercept(svm, INTERCEPT_CR3_READ);
1105 set_cr_intercept(svm, INTERCEPT_CR4_READ);
1106 set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1107 set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1108 set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
1109 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
6aa8b732 1110
5315c716 1111 set_dr_intercepts(svm);
6aa8b732 1112
18c918c5
JR
1113 set_exception_intercept(svm, PF_VECTOR);
1114 set_exception_intercept(svm, UD_VECTOR);
1115 set_exception_intercept(svm, MC_VECTOR);
6aa8b732 1116
8a05a1b8
JR
1117 set_intercept(svm, INTERCEPT_INTR);
1118 set_intercept(svm, INTERCEPT_NMI);
1119 set_intercept(svm, INTERCEPT_SMI);
1120 set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
332b56e4 1121 set_intercept(svm, INTERCEPT_RDPMC);
8a05a1b8
JR
1122 set_intercept(svm, INTERCEPT_CPUID);
1123 set_intercept(svm, INTERCEPT_INVD);
1124 set_intercept(svm, INTERCEPT_HLT);
1125 set_intercept(svm, INTERCEPT_INVLPG);
1126 set_intercept(svm, INTERCEPT_INVLPGA);
1127 set_intercept(svm, INTERCEPT_IOIO_PROT);
1128 set_intercept(svm, INTERCEPT_MSR_PROT);
1129 set_intercept(svm, INTERCEPT_TASK_SWITCH);
1130 set_intercept(svm, INTERCEPT_SHUTDOWN);
1131 set_intercept(svm, INTERCEPT_VMRUN);
1132 set_intercept(svm, INTERCEPT_VMMCALL);
1133 set_intercept(svm, INTERCEPT_VMLOAD);
1134 set_intercept(svm, INTERCEPT_VMSAVE);
1135 set_intercept(svm, INTERCEPT_STGI);
1136 set_intercept(svm, INTERCEPT_CLGI);
1137 set_intercept(svm, INTERCEPT_SKINIT);
1138 set_intercept(svm, INTERCEPT_WBINVD);
1139 set_intercept(svm, INTERCEPT_MONITOR);
1140 set_intercept(svm, INTERCEPT_MWAIT);
81dd35d4 1141 set_intercept(svm, INTERCEPT_XSETBV);
6aa8b732
AK
1142
1143 control->iopm_base_pa = iopm_base;
f65c229c 1144 control->msrpm_base_pa = __pa(svm->msrpm);
6aa8b732
AK
1145 control->int_ctl = V_INTR_MASKING_MASK;
1146
1147 init_seg(&save->es);
1148 init_seg(&save->ss);
1149 init_seg(&save->ds);
1150 init_seg(&save->fs);
1151 init_seg(&save->gs);
1152
1153 save->cs.selector = 0xf000;
04b66839 1154 save->cs.base = 0xffff0000;
6aa8b732
AK
1155 /* Executable/Readable Code Segment */
1156 save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
1157 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
1158 save->cs.limit = 0xffff;
6aa8b732
AK
1159
1160 save->gdtr.limit = 0xffff;
1161 save->idtr.limit = 0xffff;
1162
1163 init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
1164 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
1165
5690891b 1166 svm_set_efer(&svm->vcpu, 0);
d77c26fc 1167 save->dr6 = 0xffff0ff0;
f6e78475 1168 kvm_set_rflags(&svm->vcpu, 2);
6aa8b732 1169 save->rip = 0x0000fff0;
5fdbf976 1170 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
6aa8b732 1171
e0231715 1172 /*
18fa000a 1173 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
d28bc9dd 1174 * It also updates the guest-visible cr0 value.
6aa8b732 1175 */
79a8059d 1176 svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
ebae871a 1177 kvm_mmu_reset_context(&svm->vcpu);
18fa000a 1178
66aee91a 1179 save->cr4 = X86_CR4_PAE;
6aa8b732 1180 /* rdx = ?? */
709ddebf
JR
1181
1182 if (npt_enabled) {
1183 /* Setup VMCB for Nested Paging */
1184 control->nested_ctl = 1;
8a05a1b8 1185 clr_intercept(svm, INTERCEPT_INVLPG);
18c918c5 1186 clr_exception_intercept(svm, PF_VECTOR);
4ee546b4
RJ
1187 clr_cr_intercept(svm, INTERCEPT_CR3_READ);
1188 clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
74545705 1189 save->g_pat = svm->vcpu.arch.pat;
709ddebf
JR
1190 save->cr3 = 0;
1191 save->cr4 = 0;
1192 }
f40f6a45 1193 svm->asid_generation = 0;
1371d904 1194
e6aa9abd 1195 svm->nested.vmcb = 0;
2af9194d
JR
1196 svm->vcpu.arch.hflags = 0;
1197
2a6b20b8 1198 if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
565d0998 1199 control->pause_filter_count = 3000;
8a05a1b8 1200 set_intercept(svm, INTERCEPT_PAUSE);
565d0998
ML
1201 }
1202
8d28fec4
RJ
1203 mark_all_dirty(svm->vmcb);
1204
2af9194d 1205 enable_gif(svm);
6aa8b732
AK
1206}
1207
d28bc9dd 1208static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
04d2cc77
AK
1209{
1210 struct vcpu_svm *svm = to_svm(vcpu);
66f7b72e
JS
1211 u32 dummy;
1212 u32 eax = 1;
04d2cc77 1213
d28bc9dd
NA
1214 if (!init_event) {
1215 svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
1216 MSR_IA32_APICBASE_ENABLE;
1217 if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
1218 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
1219 }
5690891b 1220 init_vmcb(svm);
70433389 1221
66f7b72e
JS
1222 kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
1223 kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
04d2cc77
AK
1224}
1225
fb3f0f51 1226static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
6aa8b732 1227{
a2fa3e9f 1228 struct vcpu_svm *svm;
6aa8b732 1229 struct page *page;
f65c229c 1230 struct page *msrpm_pages;
b286d5d8 1231 struct page *hsave_page;
3d6368ef 1232 struct page *nested_msrpm_pages;
fb3f0f51 1233 int err;
6aa8b732 1234
c16f862d 1235 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
fb3f0f51
RR
1236 if (!svm) {
1237 err = -ENOMEM;
1238 goto out;
1239 }
1240
fbc0db76
JR
1241 svm->tsc_ratio = TSC_RATIO_DEFAULT;
1242
fb3f0f51
RR
1243 err = kvm_vcpu_init(&svm->vcpu, kvm, id);
1244 if (err)
1245 goto free_svm;
1246
b7af4043 1247 err = -ENOMEM;
6aa8b732 1248 page = alloc_page(GFP_KERNEL);
b7af4043 1249 if (!page)
fb3f0f51 1250 goto uninit;
6aa8b732 1251
f65c229c
JR
1252 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
1253 if (!msrpm_pages)
b7af4043 1254 goto free_page1;
3d6368ef
AG
1255
1256 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
1257 if (!nested_msrpm_pages)
b7af4043 1258 goto free_page2;
f65c229c 1259
b286d5d8
AG
1260 hsave_page = alloc_page(GFP_KERNEL);
1261 if (!hsave_page)
b7af4043
TY
1262 goto free_page3;
1263
e6aa9abd 1264 svm->nested.hsave = page_address(hsave_page);
b286d5d8 1265
b7af4043
TY
1266 svm->msrpm = page_address(msrpm_pages);
1267 svm_vcpu_init_msrpm(svm->msrpm);
1268
e6aa9abd 1269 svm->nested.msrpm = page_address(nested_msrpm_pages);
323c3d80 1270 svm_vcpu_init_msrpm(svm->nested.msrpm);
3d6368ef 1271
a2fa3e9f
GH
1272 svm->vmcb = page_address(page);
1273 clear_page(svm->vmcb);
1274 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
1275 svm->asid_generation = 0;
5690891b 1276 init_vmcb(svm);
6aa8b732 1277
2b036c6b
BO
1278 svm_init_osvw(&svm->vcpu);
1279
fb3f0f51 1280 return &svm->vcpu;
36241b8c 1281
b7af4043
TY
1282free_page3:
1283 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
1284free_page2:
1285 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
1286free_page1:
1287 __free_page(page);
fb3f0f51
RR
1288uninit:
1289 kvm_vcpu_uninit(&svm->vcpu);
1290free_svm:
a4770347 1291 kmem_cache_free(kvm_vcpu_cache, svm);
fb3f0f51
RR
1292out:
1293 return ERR_PTR(err);
6aa8b732
AK
1294}
1295
1296static void svm_free_vcpu(struct kvm_vcpu *vcpu)
1297{
a2fa3e9f
GH
1298 struct vcpu_svm *svm = to_svm(vcpu);
1299
fb3f0f51 1300 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
f65c229c 1301 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
e6aa9abd
JR
1302 __free_page(virt_to_page(svm->nested.hsave));
1303 __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
fb3f0f51 1304 kvm_vcpu_uninit(vcpu);
a4770347 1305 kmem_cache_free(kvm_vcpu_cache, svm);
6aa8b732
AK
1306}
1307
15ad7146 1308static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
6aa8b732 1309{
a2fa3e9f 1310 struct vcpu_svm *svm = to_svm(vcpu);
15ad7146 1311 int i;
0cc5064d 1312
0cc5064d 1313 if (unlikely(cpu != vcpu->cpu)) {
4b656b12 1314 svm->asid_generation = 0;
8d28fec4 1315 mark_all_dirty(svm->vmcb);
0cc5064d 1316 }
94dfbdb3 1317
82ca2d10
AK
1318#ifdef CONFIG_X86_64
1319 rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
1320#endif
dacccfdd
AK
1321 savesegment(fs, svm->host.fs);
1322 savesegment(gs, svm->host.gs);
1323 svm->host.ldt = kvm_read_ldt();
1324
94dfbdb3 1325 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
a2fa3e9f 1326 rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
fbc0db76
JR
1327
1328 if (static_cpu_has(X86_FEATURE_TSCRATEMSR) &&
89cbc767
CL
1329 svm->tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
1330 __this_cpu_write(current_tsc_ratio, svm->tsc_ratio);
fbc0db76
JR
1331 wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio);
1332 }
6aa8b732
AK
1333}
1334
1335static void svm_vcpu_put(struct kvm_vcpu *vcpu)
1336{
a2fa3e9f 1337 struct vcpu_svm *svm = to_svm(vcpu);
94dfbdb3
AL
1338 int i;
1339
e1beb1d3 1340 ++vcpu->stat.host_state_reload;
dacccfdd
AK
1341 kvm_load_ldt(svm->host.ldt);
1342#ifdef CONFIG_X86_64
1343 loadsegment(fs, svm->host.fs);
dacccfdd 1344 wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
893a5ab6 1345 load_gs_index(svm->host.gs);
dacccfdd 1346#else
831ca609 1347#ifdef CONFIG_X86_32_LAZY_GS
dacccfdd 1348 loadsegment(gs, svm->host.gs);
831ca609 1349#endif
dacccfdd 1350#endif
94dfbdb3 1351 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
a2fa3e9f 1352 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
6aa8b732
AK
1353}
1354
6aa8b732
AK
1355static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1356{
a2fa3e9f 1357 return to_svm(vcpu)->vmcb->save.rflags;
6aa8b732
AK
1358}
1359
1360static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1361{
ae9fedc7
PB
1362 /*
1363 * Any change of EFLAGS.VM is accompained by a reload of SS
1364 * (caused by either a task switch or an inter-privilege IRET),
1365 * so we do not need to update the CPL here.
1366 */
a2fa3e9f 1367 to_svm(vcpu)->vmcb->save.rflags = rflags;
6aa8b732
AK
1368}
1369
6de4f3ad
AK
1370static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
1371{
1372 switch (reg) {
1373 case VCPU_EXREG_PDPTR:
1374 BUG_ON(!npt_enabled);
9f8fe504 1375 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
6de4f3ad
AK
1376 break;
1377 default:
1378 BUG();
1379 }
1380}
1381
f0b85051
AG
1382static void svm_set_vintr(struct vcpu_svm *svm)
1383{
8a05a1b8 1384 set_intercept(svm, INTERCEPT_VINTR);
f0b85051
AG
1385}
1386
1387static void svm_clear_vintr(struct vcpu_svm *svm)
1388{
8a05a1b8 1389 clr_intercept(svm, INTERCEPT_VINTR);
f0b85051
AG
1390}
1391
6aa8b732
AK
1392static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
1393{
a2fa3e9f 1394 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
6aa8b732
AK
1395
1396 switch (seg) {
1397 case VCPU_SREG_CS: return &save->cs;
1398 case VCPU_SREG_DS: return &save->ds;
1399 case VCPU_SREG_ES: return &save->es;
1400 case VCPU_SREG_FS: return &save->fs;
1401 case VCPU_SREG_GS: return &save->gs;
1402 case VCPU_SREG_SS: return &save->ss;
1403 case VCPU_SREG_TR: return &save->tr;
1404 case VCPU_SREG_LDTR: return &save->ldtr;
1405 }
1406 BUG();
8b6d44c7 1407 return NULL;
6aa8b732
AK
1408}
1409
1410static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
1411{
1412 struct vmcb_seg *s = svm_seg(vcpu, seg);
1413
1414 return s->base;
1415}
1416
1417static void svm_get_segment(struct kvm_vcpu *vcpu,
1418 struct kvm_segment *var, int seg)
1419{
1420 struct vmcb_seg *s = svm_seg(vcpu, seg);
1421
1422 var->base = s->base;
1423 var->limit = s->limit;
1424 var->selector = s->selector;
1425 var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
1426 var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
1427 var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
1428 var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
1429 var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
1430 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
1431 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
80112c89
JM
1432
1433 /*
1434 * AMD CPUs circa 2014 track the G bit for all segments except CS.
1435 * However, the SVM spec states that the G bit is not observed by the
1436 * CPU, and some VMware virtual CPUs drop the G bit for all segments.
1437 * So let's synthesize a legal G bit for all segments, this helps
1438 * running KVM nested. It also helps cross-vendor migration, because
1439 * Intel's vmentry has a check on the 'G' bit.
1440 */
1441 var->g = s->limit > 0xfffff;
25022acc 1442
e0231715
JR
1443 /*
1444 * AMD's VMCB does not have an explicit unusable field, so emulate it
19bca6ab
AP
1445 * for cross vendor migration purposes by "not present"
1446 */
1447 var->unusable = !var->present || (var->type == 0);
1448
1fbdc7a5 1449 switch (seg) {
1fbdc7a5
AP
1450 case VCPU_SREG_TR:
1451 /*
1452 * Work around a bug where the busy flag in the tr selector
1453 * isn't exposed
1454 */
c0d09828 1455 var->type |= 0x2;
1fbdc7a5
AP
1456 break;
1457 case VCPU_SREG_DS:
1458 case VCPU_SREG_ES:
1459 case VCPU_SREG_FS:
1460 case VCPU_SREG_GS:
1461 /*
1462 * The accessed bit must always be set in the segment
1463 * descriptor cache, although it can be cleared in the
1464 * descriptor, the cached bit always remains at 1. Since
1465 * Intel has a check on this, set it here to support
1466 * cross-vendor migration.
1467 */
1468 if (!var->unusable)
1469 var->type |= 0x1;
1470 break;
b586eb02 1471 case VCPU_SREG_SS:
e0231715
JR
1472 /*
1473 * On AMD CPUs sometimes the DB bit in the segment
b586eb02
AP
1474 * descriptor is left as 1, although the whole segment has
1475 * been made unusable. Clear it here to pass an Intel VMX
1476 * entry check when cross vendor migrating.
1477 */
1478 if (var->unusable)
1479 var->db = 0;
33b458d2 1480 var->dpl = to_svm(vcpu)->vmcb->save.cpl;
b586eb02 1481 break;
1fbdc7a5 1482 }
6aa8b732
AK
1483}
1484
2e4d2653
IE
1485static int svm_get_cpl(struct kvm_vcpu *vcpu)
1486{
1487 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1488
1489 return save->cpl;
1490}
1491
89a27f4d 1492static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
6aa8b732 1493{
a2fa3e9f
GH
1494 struct vcpu_svm *svm = to_svm(vcpu);
1495
89a27f4d
GN
1496 dt->size = svm->vmcb->save.idtr.limit;
1497 dt->address = svm->vmcb->save.idtr.base;
6aa8b732
AK
1498}
1499
89a27f4d 1500static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
6aa8b732 1501{
a2fa3e9f
GH
1502 struct vcpu_svm *svm = to_svm(vcpu);
1503
89a27f4d
GN
1504 svm->vmcb->save.idtr.limit = dt->size;
1505 svm->vmcb->save.idtr.base = dt->address ;
17a703cb 1506 mark_dirty(svm->vmcb, VMCB_DT);
6aa8b732
AK
1507}
1508
89a27f4d 1509static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
6aa8b732 1510{
a2fa3e9f
GH
1511 struct vcpu_svm *svm = to_svm(vcpu);
1512
89a27f4d
GN
1513 dt->size = svm->vmcb->save.gdtr.limit;
1514 dt->address = svm->vmcb->save.gdtr.base;
6aa8b732
AK
1515}
1516
89a27f4d 1517static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
6aa8b732 1518{
a2fa3e9f
GH
1519 struct vcpu_svm *svm = to_svm(vcpu);
1520
89a27f4d
GN
1521 svm->vmcb->save.gdtr.limit = dt->size;
1522 svm->vmcb->save.gdtr.base = dt->address ;
17a703cb 1523 mark_dirty(svm->vmcb, VMCB_DT);
6aa8b732
AK
1524}
1525
e8467fda
AK
1526static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
1527{
1528}
1529
aff48baa
AK
1530static void svm_decache_cr3(struct kvm_vcpu *vcpu)
1531{
1532}
1533
25c4c276 1534static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
399badf3
AK
1535{
1536}
1537
d225157b
AK
1538static void update_cr0_intercept(struct vcpu_svm *svm)
1539{
1540 ulong gcr0 = svm->vcpu.arch.cr0;
1541 u64 *hcr0 = &svm->vmcb->save.cr0;
1542
1543 if (!svm->vcpu.fpu_active)
1544 *hcr0 |= SVM_CR0_SELECTIVE_MASK;
1545 else
1546 *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
1547 | (gcr0 & SVM_CR0_SELECTIVE_MASK);
1548
dcca1a65 1549 mark_dirty(svm->vmcb, VMCB_CR);
d225157b
AK
1550
1551 if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
4ee546b4
RJ
1552 clr_cr_intercept(svm, INTERCEPT_CR0_READ);
1553 clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
d225157b 1554 } else {
4ee546b4
RJ
1555 set_cr_intercept(svm, INTERCEPT_CR0_READ);
1556 set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
d225157b
AK
1557 }
1558}
1559
6aa8b732
AK
1560static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1561{
a2fa3e9f
GH
1562 struct vcpu_svm *svm = to_svm(vcpu);
1563
05b3e0c2 1564#ifdef CONFIG_X86_64
f6801dff 1565 if (vcpu->arch.efer & EFER_LME) {
707d92fa 1566 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
f6801dff 1567 vcpu->arch.efer |= EFER_LMA;
2b5203ee 1568 svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
6aa8b732
AK
1569 }
1570
d77c26fc 1571 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
f6801dff 1572 vcpu->arch.efer &= ~EFER_LMA;
2b5203ee 1573 svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
6aa8b732
AK
1574 }
1575 }
1576#endif
ad312c7c 1577 vcpu->arch.cr0 = cr0;
888f9f3e
AK
1578
1579 if (!npt_enabled)
1580 cr0 |= X86_CR0_PG | X86_CR0_WP;
02daab21
AK
1581
1582 if (!vcpu->fpu_active)
334df50a 1583 cr0 |= X86_CR0_TS;
bcf166a9
PB
1584 /*
1585 * re-enable caching here because the QEMU bios
1586 * does not do it - this results in some delay at
1587 * reboot
1588 */
1589 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
1590 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
a2fa3e9f 1591 svm->vmcb->save.cr0 = cr0;
dcca1a65 1592 mark_dirty(svm->vmcb, VMCB_CR);
d225157b 1593 update_cr0_intercept(svm);
6aa8b732
AK
1594}
1595
5e1746d6 1596static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
6aa8b732 1597{
1e02ce4c 1598 unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
e5eab0ce
JR
1599 unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
1600
5e1746d6
NHE
1601 if (cr4 & X86_CR4_VMXE)
1602 return 1;
1603
e5eab0ce 1604 if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
f40f6a45 1605 svm_flush_tlb(vcpu);
6394b649 1606
ec077263
JR
1607 vcpu->arch.cr4 = cr4;
1608 if (!npt_enabled)
1609 cr4 |= X86_CR4_PAE;
6394b649 1610 cr4 |= host_cr4_mce;
ec077263 1611 to_svm(vcpu)->vmcb->save.cr4 = cr4;
dcca1a65 1612 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
5e1746d6 1613 return 0;
6aa8b732
AK
1614}
1615
1616static void svm_set_segment(struct kvm_vcpu *vcpu,
1617 struct kvm_segment *var, int seg)
1618{
a2fa3e9f 1619 struct vcpu_svm *svm = to_svm(vcpu);
6aa8b732
AK
1620 struct vmcb_seg *s = svm_seg(vcpu, seg);
1621
1622 s->base = var->base;
1623 s->limit = var->limit;
1624 s->selector = var->selector;
1625 if (var->unusable)
1626 s->attrib = 0;
1627 else {
1628 s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
1629 s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
1630 s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
1631 s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT;
1632 s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
1633 s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
1634 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
1635 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
1636 }
ae9fedc7
PB
1637
1638 /*
1639 * This is always accurate, except if SYSRET returned to a segment
1640 * with SS.DPL != 3. Intel does not have this quirk, and always
1641 * forces SS.DPL to 3 on sysret, so we ignore that case; fixing it
1642 * would entail passing the CPL to userspace and back.
1643 */
1644 if (seg == VCPU_SREG_SS)
1645 svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
6aa8b732 1646
060d0c9a 1647 mark_dirty(svm->vmcb, VMCB_SEG);
6aa8b732
AK
1648}
1649
c8639010 1650static void update_db_bp_intercept(struct kvm_vcpu *vcpu)
6aa8b732 1651{
d0bfb940
JK
1652 struct vcpu_svm *svm = to_svm(vcpu);
1653
18c918c5
JR
1654 clr_exception_intercept(svm, DB_VECTOR);
1655 clr_exception_intercept(svm, BP_VECTOR);
44c11430 1656
6be7d306 1657 if (svm->nmi_singlestep)
18c918c5 1658 set_exception_intercept(svm, DB_VECTOR);
44c11430 1659
d0bfb940
JK
1660 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
1661 if (vcpu->guest_debug &
1662 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
18c918c5 1663 set_exception_intercept(svm, DB_VECTOR);
d0bfb940 1664 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
18c918c5 1665 set_exception_intercept(svm, BP_VECTOR);
d0bfb940
JK
1666 } else
1667 vcpu->guest_debug = 0;
44c11430
GN
1668}
1669
0fe1e009 1670static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
6aa8b732 1671{
0fe1e009
TH
1672 if (sd->next_asid > sd->max_asid) {
1673 ++sd->asid_generation;
1674 sd->next_asid = 1;
a2fa3e9f 1675 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
6aa8b732
AK
1676 }
1677
0fe1e009
TH
1678 svm->asid_generation = sd->asid_generation;
1679 svm->vmcb->control.asid = sd->next_asid++;
d48086d1
JR
1680
1681 mark_dirty(svm->vmcb, VMCB_ASID);
6aa8b732
AK
1682}
1683
73aaf249
JK
1684static u64 svm_get_dr6(struct kvm_vcpu *vcpu)
1685{
1686 return to_svm(vcpu)->vmcb->save.dr6;
1687}
1688
1689static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
1690{
1691 struct vcpu_svm *svm = to_svm(vcpu);
1692
1693 svm->vmcb->save.dr6 = value;
1694 mark_dirty(svm->vmcb, VMCB_DR);
1695}
1696
facb0139
PB
1697static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
1698{
1699 struct vcpu_svm *svm = to_svm(vcpu);
1700
1701 get_debugreg(vcpu->arch.db[0], 0);
1702 get_debugreg(vcpu->arch.db[1], 1);
1703 get_debugreg(vcpu->arch.db[2], 2);
1704 get_debugreg(vcpu->arch.db[3], 3);
1705 vcpu->arch.dr6 = svm_get_dr6(vcpu);
1706 vcpu->arch.dr7 = svm->vmcb->save.dr7;
1707
1708 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
1709 set_dr_intercepts(svm);
1710}
1711
020df079 1712static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
6aa8b732 1713{
42dbaa5a 1714 struct vcpu_svm *svm = to_svm(vcpu);
42dbaa5a 1715
020df079 1716 svm->vmcb->save.dr7 = value;
72214b96 1717 mark_dirty(svm->vmcb, VMCB_DR);
6aa8b732
AK
1718}
1719
851ba692 1720static int pf_interception(struct vcpu_svm *svm)
6aa8b732 1721{
631bc487 1722 u64 fault_address = svm->vmcb->control.exit_info_2;
6aa8b732 1723 u32 error_code;
631bc487 1724 int r = 1;
6aa8b732 1725
631bc487
GN
1726 switch (svm->apf_reason) {
1727 default:
1728 error_code = svm->vmcb->control.exit_info_1;
af9ca2d7 1729
631bc487
GN
1730 trace_kvm_page_fault(fault_address, error_code);
1731 if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
1732 kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
dc25e89e
AP
1733 r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
1734 svm->vmcb->control.insn_bytes,
1735 svm->vmcb->control.insn_len);
631bc487
GN
1736 break;
1737 case KVM_PV_REASON_PAGE_NOT_PRESENT:
1738 svm->apf_reason = 0;
1739 local_irq_disable();
1740 kvm_async_pf_task_wait(fault_address);
1741 local_irq_enable();
1742 break;
1743 case KVM_PV_REASON_PAGE_READY:
1744 svm->apf_reason = 0;
1745 local_irq_disable();
1746 kvm_async_pf_task_wake(fault_address);
1747 local_irq_enable();
1748 break;
1749 }
1750 return r;
6aa8b732
AK
1751}
1752
851ba692 1753static int db_interception(struct vcpu_svm *svm)
d0bfb940 1754{
851ba692
AK
1755 struct kvm_run *kvm_run = svm->vcpu.run;
1756
d0bfb940 1757 if (!(svm->vcpu.guest_debug &
44c11430 1758 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
6be7d306 1759 !svm->nmi_singlestep) {
d0bfb940
JK
1760 kvm_queue_exception(&svm->vcpu, DB_VECTOR);
1761 return 1;
1762 }
44c11430 1763
6be7d306
JK
1764 if (svm->nmi_singlestep) {
1765 svm->nmi_singlestep = false;
44c11430
GN
1766 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
1767 svm->vmcb->save.rflags &=
1768 ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
c8639010 1769 update_db_bp_intercept(&svm->vcpu);
44c11430
GN
1770 }
1771
1772 if (svm->vcpu.guest_debug &
e0231715 1773 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
44c11430
GN
1774 kvm_run->exit_reason = KVM_EXIT_DEBUG;
1775 kvm_run->debug.arch.pc =
1776 svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1777 kvm_run->debug.arch.exception = DB_VECTOR;
1778 return 0;
1779 }
1780
1781 return 1;
d0bfb940
JK
1782}
1783
851ba692 1784static int bp_interception(struct vcpu_svm *svm)
d0bfb940 1785{
851ba692
AK
1786 struct kvm_run *kvm_run = svm->vcpu.run;
1787
d0bfb940
JK
1788 kvm_run->exit_reason = KVM_EXIT_DEBUG;
1789 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1790 kvm_run->debug.arch.exception = BP_VECTOR;
1791 return 0;
1792}
1793
851ba692 1794static int ud_interception(struct vcpu_svm *svm)
7aa81cc0
AL
1795{
1796 int er;
1797
51d8b661 1798 er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
7aa81cc0 1799 if (er != EMULATE_DONE)
7ee5d940 1800 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
7aa81cc0
AL
1801 return 1;
1802}
1803
6b52d186 1804static void svm_fpu_activate(struct kvm_vcpu *vcpu)
7807fa6c 1805{
6b52d186 1806 struct vcpu_svm *svm = to_svm(vcpu);
66a562f7 1807
18c918c5 1808 clr_exception_intercept(svm, NM_VECTOR);
66a562f7 1809
e756fc62 1810 svm->vcpu.fpu_active = 1;
d225157b 1811 update_cr0_intercept(svm);
6b52d186 1812}
a2fa3e9f 1813
6b52d186
AK
1814static int nm_interception(struct vcpu_svm *svm)
1815{
1816 svm_fpu_activate(&svm->vcpu);
a2fa3e9f 1817 return 1;
7807fa6c
AL
1818}
1819
67ec6607
JR
1820static bool is_erratum_383(void)
1821{
1822 int err, i;
1823 u64 value;
1824
1825 if (!erratum_383_found)
1826 return false;
1827
1828 value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
1829 if (err)
1830 return false;
1831
1832 /* Bit 62 may or may not be set for this mce */
1833 value &= ~(1ULL << 62);
1834
1835 if (value != 0xb600000000010015ULL)
1836 return false;
1837
1838 /* Clear MCi_STATUS registers */
1839 for (i = 0; i < 6; ++i)
1840 native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
1841
1842 value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
1843 if (!err) {
1844 u32 low, high;
1845
1846 value &= ~(1ULL << 2);
1847 low = lower_32_bits(value);
1848 high = upper_32_bits(value);
1849
1850 native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
1851 }
1852
1853 /* Flush tlb to evict multi-match entries */
1854 __flush_tlb_all();
1855
1856 return true;
1857}
1858
fe5913e4 1859static void svm_handle_mce(struct vcpu_svm *svm)
53371b50 1860{
67ec6607
JR
1861 if (is_erratum_383()) {
1862 /*
1863 * Erratum 383 triggered. Guest state is corrupt so kill the
1864 * guest.
1865 */
1866 pr_err("KVM: Guest triggered AMD Erratum 383\n");
1867
a8eeb04a 1868 kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);
67ec6607
JR
1869
1870 return;
1871 }
1872
53371b50
JR
1873 /*
1874 * On an #MC intercept the MCE handler is not called automatically in
1875 * the host. So do it by hand here.
1876 */
1877 asm volatile (
1878 "int $0x12\n");
1879 /* not sure if we ever come back to this point */
1880
fe5913e4
JR
1881 return;
1882}
1883
1884static int mc_interception(struct vcpu_svm *svm)
1885{
53371b50
JR
1886 return 1;
1887}
1888
851ba692 1889static int shutdown_interception(struct vcpu_svm *svm)
46fe4ddd 1890{
851ba692
AK
1891 struct kvm_run *kvm_run = svm->vcpu.run;
1892
46fe4ddd
JR
1893 /*
1894 * VMCB is undefined after a SHUTDOWN intercept
1895 * so reinitialize it.
1896 */
a2fa3e9f 1897 clear_page(svm->vmcb);
5690891b 1898 init_vmcb(svm);
46fe4ddd
JR
1899
1900 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
1901 return 0;
1902}
1903
851ba692 1904static int io_interception(struct vcpu_svm *svm)
6aa8b732 1905{
cf8f70bf 1906 struct kvm_vcpu *vcpu = &svm->vcpu;
d77c26fc 1907 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
34c33d16 1908 int size, in, string;
039576c0 1909 unsigned port;
6aa8b732 1910
e756fc62 1911 ++svm->vcpu.stat.io_exits;
e70669ab 1912 string = (io_info & SVM_IOIO_STR_MASK) != 0;
039576c0 1913 in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
cf8f70bf 1914 if (string || in)
51d8b661 1915 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
cf8f70bf 1916
039576c0
AK
1917 port = io_info >> 16;
1918 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
cf8f70bf 1919 svm->next_rip = svm->vmcb->control.exit_info_2;
e93f36bc 1920 skip_emulated_instruction(&svm->vcpu);
cf8f70bf
GN
1921
1922 return kvm_fast_pio_out(vcpu, size, port);
6aa8b732
AK
1923}
1924
851ba692 1925static int nmi_interception(struct vcpu_svm *svm)
c47f098d
JR
1926{
1927 return 1;
1928}
1929
851ba692 1930static int intr_interception(struct vcpu_svm *svm)
a0698055
JR
1931{
1932 ++svm->vcpu.stat.irq_exits;
1933 return 1;
1934}
1935
851ba692 1936static int nop_on_interception(struct vcpu_svm *svm)
6aa8b732
AK
1937{
1938 return 1;
1939}
1940
851ba692 1941static int halt_interception(struct vcpu_svm *svm)
6aa8b732 1942{
5fdbf976 1943 svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
e756fc62 1944 return kvm_emulate_halt(&svm->vcpu);
6aa8b732
AK
1945}
1946
851ba692 1947static int vmmcall_interception(struct vcpu_svm *svm)
02e235bc 1948{
5fdbf976 1949 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
7aa81cc0
AL
1950 kvm_emulate_hypercall(&svm->vcpu);
1951 return 1;
02e235bc
AK
1952}
1953
5bd2edc3
JR
1954static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
1955{
1956 struct vcpu_svm *svm = to_svm(vcpu);
1957
1958 return svm->nested.nested_cr3;
1959}
1960
e4e517b4
AK
1961static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
1962{
1963 struct vcpu_svm *svm = to_svm(vcpu);
1964 u64 cr3 = svm->nested.nested_cr3;
1965 u64 pdpte;
1966 int ret;
1967
54bf36aa
PB
1968 ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte,
1969 offset_in_page(cr3) + index * 8, 8);
e4e517b4
AK
1970 if (ret)
1971 return 0;
1972 return pdpte;
1973}
1974
5bd2edc3
JR
1975static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
1976 unsigned long root)
1977{
1978 struct vcpu_svm *svm = to_svm(vcpu);
1979
1980 svm->vmcb->control.nested_cr3 = root;
b2747166 1981 mark_dirty(svm->vmcb, VMCB_NPT);
f40f6a45 1982 svm_flush_tlb(vcpu);
5bd2edc3
JR
1983}
1984
6389ee94
AK
1985static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
1986 struct x86_exception *fault)
5bd2edc3
JR
1987{
1988 struct vcpu_svm *svm = to_svm(vcpu);
1989
5e352519
PB
1990 if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) {
1991 /*
1992 * TODO: track the cause of the nested page fault, and
1993 * correctly fill in the high bits of exit_info_1.
1994 */
1995 svm->vmcb->control.exit_code = SVM_EXIT_NPF;
1996 svm->vmcb->control.exit_code_hi = 0;
1997 svm->vmcb->control.exit_info_1 = (1ULL << 32);
1998 svm->vmcb->control.exit_info_2 = fault->address;
1999 }
2000
2001 svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
2002 svm->vmcb->control.exit_info_1 |= fault->error_code;
2003
2004 /*
2005 * The present bit is always zero for page structure faults on real
2006 * hardware.
2007 */
2008 if (svm->vmcb->control.exit_info_1 & (2ULL << 32))
2009 svm->vmcb->control.exit_info_1 &= ~1;
5bd2edc3
JR
2010
2011 nested_svm_vmexit(svm);
2012}
2013
8a3c1a33 2014static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
4b16184c 2015{
ad896af0
PB
2016 WARN_ON(mmu_is_nested(vcpu));
2017 kvm_init_shadow_mmu(vcpu);
4b16184c
JR
2018 vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
2019 vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
e4e517b4 2020 vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr;
4b16184c
JR
2021 vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
2022 vcpu->arch.mmu.shadow_root_level = get_npt_level();
c258b62b 2023 reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
4b16184c 2024 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
4b16184c
JR
2025}
2026
2027static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
2028{
2029 vcpu->arch.walk_mmu = &vcpu->arch.mmu;
2030}
2031
c0725420
AG
2032static int nested_svm_check_permissions(struct vcpu_svm *svm)
2033{
f6801dff 2034 if (!(svm->vcpu.arch.efer & EFER_SVME)
c0725420
AG
2035 || !is_paging(&svm->vcpu)) {
2036 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2037 return 1;
2038 }
2039
2040 if (svm->vmcb->save.cpl) {
2041 kvm_inject_gp(&svm->vcpu, 0);
2042 return 1;
2043 }
2044
2045 return 0;
2046}
2047
cf74a78b
AG
2048static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
2049 bool has_error_code, u32 error_code)
2050{
b8e88bc8
JR
2051 int vmexit;
2052
2030753d 2053 if (!is_guest_mode(&svm->vcpu))
0295ad7d 2054 return 0;
cf74a78b 2055
0295ad7d
JR
2056 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
2057 svm->vmcb->control.exit_code_hi = 0;
2058 svm->vmcb->control.exit_info_1 = error_code;
2059 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
2060
b8e88bc8
JR
2061 vmexit = nested_svm_intercept(svm);
2062 if (vmexit == NESTED_EXIT_DONE)
2063 svm->nested.exit_required = true;
2064
2065 return vmexit;
cf74a78b
AG
2066}
2067
8fe54654
JR
2068/* This function returns true if it is save to enable the irq window */
2069static inline bool nested_svm_intr(struct vcpu_svm *svm)
cf74a78b 2070{
2030753d 2071 if (!is_guest_mode(&svm->vcpu))
8fe54654 2072 return true;
cf74a78b 2073
26666957 2074 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
8fe54654 2075 return true;
cf74a78b 2076
26666957 2077 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
8fe54654 2078 return false;
cf74a78b 2079
a0a07cd2
GN
2080 /*
2081 * if vmexit was already requested (by intercepted exception
2082 * for instance) do not overwrite it with "external interrupt"
2083 * vmexit.
2084 */
2085 if (svm->nested.exit_required)
2086 return false;
2087
197717d5
JR
2088 svm->vmcb->control.exit_code = SVM_EXIT_INTR;
2089 svm->vmcb->control.exit_info_1 = 0;
2090 svm->vmcb->control.exit_info_2 = 0;
26666957 2091
cd3ff653
JR
2092 if (svm->nested.intercept & 1ULL) {
2093 /*
2094 * The #vmexit can't be emulated here directly because this
c5ec2e56 2095 * code path runs with irqs and preemption disabled. A
cd3ff653
JR
2096 * #vmexit emulation might sleep. Only signal request for
2097 * the #vmexit here.
2098 */
2099 svm->nested.exit_required = true;
236649de 2100 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
8fe54654 2101 return false;
cf74a78b
AG
2102 }
2103
8fe54654 2104 return true;
cf74a78b
AG
2105}
2106
887f500c
JR
2107/* This function returns true if it is save to enable the nmi window */
2108static inline bool nested_svm_nmi(struct vcpu_svm *svm)
2109{
2030753d 2110 if (!is_guest_mode(&svm->vcpu))
887f500c
JR
2111 return true;
2112
2113 if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
2114 return true;
2115
2116 svm->vmcb->control.exit_code = SVM_EXIT_NMI;
2117 svm->nested.exit_required = true;
2118
2119 return false;
cf74a78b
AG
2120}
2121
7597f129 2122static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
34f80cfa
JR
2123{
2124 struct page *page;
2125
6c3bd3d7
JR
2126 might_sleep();
2127
54bf36aa 2128 page = kvm_vcpu_gfn_to_page(&svm->vcpu, gpa >> PAGE_SHIFT);
34f80cfa
JR
2129 if (is_error_page(page))
2130 goto error;
2131
7597f129
JR
2132 *_page = page;
2133
2134 return kmap(page);
34f80cfa
JR
2135
2136error:
34f80cfa
JR
2137 kvm_inject_gp(&svm->vcpu, 0);
2138
2139 return NULL;
2140}
2141
7597f129 2142static void nested_svm_unmap(struct page *page)
34f80cfa 2143{
7597f129 2144 kunmap(page);
34f80cfa
JR
2145 kvm_release_page_dirty(page);
2146}
34f80cfa 2147
ce2ac085
JR
2148static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
2149{
9bf41833
JK
2150 unsigned port, size, iopm_len;
2151 u16 val, mask;
2152 u8 start_bit;
ce2ac085 2153 u64 gpa;
34f80cfa 2154
ce2ac085
JR
2155 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
2156 return NESTED_EXIT_HOST;
34f80cfa 2157
ce2ac085 2158 port = svm->vmcb->control.exit_info_1 >> 16;
9bf41833
JK
2159 size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
2160 SVM_IOIO_SIZE_SHIFT;
ce2ac085 2161 gpa = svm->nested.vmcb_iopm + (port / 8);
9bf41833
JK
2162 start_bit = port % 8;
2163 iopm_len = (start_bit + size > 8) ? 2 : 1;
2164 mask = (0xf >> (4 - size)) << start_bit;
2165 val = 0;
ce2ac085 2166
54bf36aa 2167 if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
9bf41833 2168 return NESTED_EXIT_DONE;
ce2ac085 2169
9bf41833 2170 return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
34f80cfa
JR
2171}
2172
d2477826 2173static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
4c2161ae 2174{
0d6b3537
JR
2175 u32 offset, msr, value;
2176 int write, mask;
4c2161ae 2177
3d62d9aa 2178 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
d2477826 2179 return NESTED_EXIT_HOST;
3d62d9aa 2180
0d6b3537
JR
2181 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2182 offset = svm_msrpm_offset(msr);
2183 write = svm->vmcb->control.exit_info_1 & 1;
2184 mask = 1 << ((2 * (msr & 0xf)) + write);
3d62d9aa 2185
0d6b3537
JR
2186 if (offset == MSR_INVALID)
2187 return NESTED_EXIT_DONE;
4c2161ae 2188
0d6b3537
JR
2189 /* Offset is in 32 bit units but need in 8 bit units */
2190 offset *= 4;
4c2161ae 2191
54bf36aa 2192 if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4))
0d6b3537 2193 return NESTED_EXIT_DONE;
3d62d9aa 2194
0d6b3537 2195 return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
4c2161ae
JR
2196}
2197
410e4d57 2198static int nested_svm_exit_special(struct vcpu_svm *svm)
cf74a78b 2199{
cf74a78b 2200 u32 exit_code = svm->vmcb->control.exit_code;
4c2161ae 2201
410e4d57
JR
2202 switch (exit_code) {
2203 case SVM_EXIT_INTR:
2204 case SVM_EXIT_NMI:
ff47a49b 2205 case SVM_EXIT_EXCP_BASE + MC_VECTOR:
410e4d57 2206 return NESTED_EXIT_HOST;
410e4d57 2207 case SVM_EXIT_NPF:
e0231715 2208 /* For now we are always handling NPFs when using them */
410e4d57
JR
2209 if (npt_enabled)
2210 return NESTED_EXIT_HOST;
2211 break;
410e4d57 2212 case SVM_EXIT_EXCP_BASE + PF_VECTOR:
631bc487
GN
2213 /* When we're shadowing, trap PFs, but not async PF */
2214 if (!npt_enabled && svm->apf_reason == 0)
410e4d57
JR
2215 return NESTED_EXIT_HOST;
2216 break;
66a562f7
JR
2217 case SVM_EXIT_EXCP_BASE + NM_VECTOR:
2218 nm_interception(svm);
2219 break;
410e4d57
JR
2220 default:
2221 break;
cf74a78b
AG
2222 }
2223
410e4d57
JR
2224 return NESTED_EXIT_CONTINUE;
2225}
2226
2227/*
2228 * If this function returns true, this #vmexit was already handled
2229 */
b8e88bc8 2230static int nested_svm_intercept(struct vcpu_svm *svm)
410e4d57
JR
2231{
2232 u32 exit_code = svm->vmcb->control.exit_code;
2233 int vmexit = NESTED_EXIT_HOST;
2234
cf74a78b 2235 switch (exit_code) {
9c4e40b9 2236 case SVM_EXIT_MSR:
3d62d9aa 2237 vmexit = nested_svm_exit_handled_msr(svm);
9c4e40b9 2238 break;
ce2ac085
JR
2239 case SVM_EXIT_IOIO:
2240 vmexit = nested_svm_intercept_ioio(svm);
2241 break;
4ee546b4
RJ
2242 case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
2243 u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
2244 if (svm->nested.intercept_cr & bit)
410e4d57 2245 vmexit = NESTED_EXIT_DONE;
cf74a78b
AG
2246 break;
2247 }
3aed041a
JR
2248 case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
2249 u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
2250 if (svm->nested.intercept_dr & bit)
410e4d57 2251 vmexit = NESTED_EXIT_DONE;
cf74a78b
AG
2252 break;
2253 }
2254 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
2255 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
aad42c64 2256 if (svm->nested.intercept_exceptions & excp_bits)
410e4d57 2257 vmexit = NESTED_EXIT_DONE;
631bc487
GN
2258 /* async page fault always cause vmexit */
2259 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
2260 svm->apf_reason != 0)
2261 vmexit = NESTED_EXIT_DONE;
cf74a78b
AG
2262 break;
2263 }
228070b1
JR
2264 case SVM_EXIT_ERR: {
2265 vmexit = NESTED_EXIT_DONE;
2266 break;
2267 }
cf74a78b
AG
2268 default: {
2269 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
aad42c64 2270 if (svm->nested.intercept & exit_bits)
410e4d57 2271 vmexit = NESTED_EXIT_DONE;
cf74a78b
AG
2272 }
2273 }
2274
b8e88bc8
JR
2275 return vmexit;
2276}
2277
2278static int nested_svm_exit_handled(struct vcpu_svm *svm)
2279{
2280 int vmexit;
2281
2282 vmexit = nested_svm_intercept(svm);
2283
2284 if (vmexit == NESTED_EXIT_DONE)
9c4e40b9 2285 nested_svm_vmexit(svm);
9c4e40b9
JR
2286
2287 return vmexit;
cf74a78b
AG
2288}
2289
0460a979
JR
2290static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
2291{
2292 struct vmcb_control_area *dst = &dst_vmcb->control;
2293 struct vmcb_control_area *from = &from_vmcb->control;
2294
4ee546b4 2295 dst->intercept_cr = from->intercept_cr;
3aed041a 2296 dst->intercept_dr = from->intercept_dr;
0460a979
JR
2297 dst->intercept_exceptions = from->intercept_exceptions;
2298 dst->intercept = from->intercept;
2299 dst->iopm_base_pa = from->iopm_base_pa;
2300 dst->msrpm_base_pa = from->msrpm_base_pa;
2301 dst->tsc_offset = from->tsc_offset;
2302 dst->asid = from->asid;
2303 dst->tlb_ctl = from->tlb_ctl;
2304 dst->int_ctl = from->int_ctl;
2305 dst->int_vector = from->int_vector;
2306 dst->int_state = from->int_state;
2307 dst->exit_code = from->exit_code;
2308 dst->exit_code_hi = from->exit_code_hi;
2309 dst->exit_info_1 = from->exit_info_1;
2310 dst->exit_info_2 = from->exit_info_2;
2311 dst->exit_int_info = from->exit_int_info;
2312 dst->exit_int_info_err = from->exit_int_info_err;
2313 dst->nested_ctl = from->nested_ctl;
2314 dst->event_inj = from->event_inj;
2315 dst->event_inj_err = from->event_inj_err;
2316 dst->nested_cr3 = from->nested_cr3;
2317 dst->lbr_ctl = from->lbr_ctl;
2318}
2319
34f80cfa 2320static int nested_svm_vmexit(struct vcpu_svm *svm)
cf74a78b 2321{
34f80cfa 2322 struct vmcb *nested_vmcb;
e6aa9abd 2323 struct vmcb *hsave = svm->nested.hsave;
33740e40 2324 struct vmcb *vmcb = svm->vmcb;
7597f129 2325 struct page *page;
cf74a78b 2326
17897f36
JR
2327 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
2328 vmcb->control.exit_info_1,
2329 vmcb->control.exit_info_2,
2330 vmcb->control.exit_int_info,
e097e5ff
SH
2331 vmcb->control.exit_int_info_err,
2332 KVM_ISA_SVM);
17897f36 2333
7597f129 2334 nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
34f80cfa
JR
2335 if (!nested_vmcb)
2336 return 1;
2337
2030753d
JR
2338 /* Exit Guest-Mode */
2339 leave_guest_mode(&svm->vcpu);
06fc7772
JR
2340 svm->nested.vmcb = 0;
2341
cf74a78b 2342 /* Give the current vmcb to the guest */
33740e40
JR
2343 disable_gif(svm);
2344
2345 nested_vmcb->save.es = vmcb->save.es;
2346 nested_vmcb->save.cs = vmcb->save.cs;
2347 nested_vmcb->save.ss = vmcb->save.ss;
2348 nested_vmcb->save.ds = vmcb->save.ds;
2349 nested_vmcb->save.gdtr = vmcb->save.gdtr;
2350 nested_vmcb->save.idtr = vmcb->save.idtr;
3f6a9d16 2351 nested_vmcb->save.efer = svm->vcpu.arch.efer;
cdbbdc12 2352 nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu);
9f8fe504 2353 nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu);
33740e40 2354 nested_vmcb->save.cr2 = vmcb->save.cr2;
cdbbdc12 2355 nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
f6e78475 2356 nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
33740e40
JR
2357 nested_vmcb->save.rip = vmcb->save.rip;
2358 nested_vmcb->save.rsp = vmcb->save.rsp;
2359 nested_vmcb->save.rax = vmcb->save.rax;
2360 nested_vmcb->save.dr7 = vmcb->save.dr7;
2361 nested_vmcb->save.dr6 = vmcb->save.dr6;
2362 nested_vmcb->save.cpl = vmcb->save.cpl;
2363
2364 nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
2365 nested_vmcb->control.int_vector = vmcb->control.int_vector;
2366 nested_vmcb->control.int_state = vmcb->control.int_state;
2367 nested_vmcb->control.exit_code = vmcb->control.exit_code;
2368 nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi;
2369 nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1;
2370 nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
2371 nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
2372 nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
6092d3d3
JR
2373
2374 if (svm->nrips_enabled)
2375 nested_vmcb->control.next_rip = vmcb->control.next_rip;
8d23c466
AG
2376
2377 /*
2378 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
2379 * to make sure that we do not lose injected events. So check event_inj
2380 * here and copy it to exit_int_info if it is valid.
2381 * Exit_int_info and event_inj can't be both valid because the case
2382 * below only happens on a VMRUN instruction intercept which has
2383 * no valid exit_int_info set.
2384 */
2385 if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
2386 struct vmcb_control_area *nc = &nested_vmcb->control;
2387
2388 nc->exit_int_info = vmcb->control.event_inj;
2389 nc->exit_int_info_err = vmcb->control.event_inj_err;
2390 }
2391
33740e40
JR
2392 nested_vmcb->control.tlb_ctl = 0;
2393 nested_vmcb->control.event_inj = 0;
2394 nested_vmcb->control.event_inj_err = 0;
cf74a78b
AG
2395
2396 /* We always set V_INTR_MASKING and remember the old value in hflags */
2397 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
2398 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
2399
cf74a78b 2400 /* Restore the original control entries */
0460a979 2401 copy_vmcb_control_area(vmcb, hsave);
cf74a78b 2402
219b65dc
AG
2403 kvm_clear_exception_queue(&svm->vcpu);
2404 kvm_clear_interrupt_queue(&svm->vcpu);
cf74a78b 2405
4b16184c
JR
2406 svm->nested.nested_cr3 = 0;
2407
cf74a78b
AG
2408 /* Restore selected save entries */
2409 svm->vmcb->save.es = hsave->save.es;
2410 svm->vmcb->save.cs = hsave->save.cs;
2411 svm->vmcb->save.ss = hsave->save.ss;
2412 svm->vmcb->save.ds = hsave->save.ds;
2413 svm->vmcb->save.gdtr = hsave->save.gdtr;
2414 svm->vmcb->save.idtr = hsave->save.idtr;
f6e78475 2415 kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
cf74a78b
AG
2416 svm_set_efer(&svm->vcpu, hsave->save.efer);
2417 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
2418 svm_set_cr4(&svm->vcpu, hsave->save.cr4);
2419 if (npt_enabled) {
2420 svm->vmcb->save.cr3 = hsave->save.cr3;
2421 svm->vcpu.arch.cr3 = hsave->save.cr3;
2422 } else {
2390218b 2423 (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
cf74a78b
AG
2424 }
2425 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
2426 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
2427 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
2428 svm->vmcb->save.dr7 = 0;
2429 svm->vmcb->save.cpl = 0;
2430 svm->vmcb->control.exit_int_info = 0;
2431
8d28fec4
RJ
2432 mark_all_dirty(svm->vmcb);
2433
7597f129 2434 nested_svm_unmap(page);
cf74a78b 2435
4b16184c 2436 nested_svm_uninit_mmu_context(&svm->vcpu);
cf74a78b
AG
2437 kvm_mmu_reset_context(&svm->vcpu);
2438 kvm_mmu_load(&svm->vcpu);
2439
2440 return 0;
2441}
3d6368ef 2442
9738b2c9 2443static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
3d6368ef 2444{
323c3d80
JR
2445 /*
2446 * This function merges the msr permission bitmaps of kvm and the
c5ec2e56 2447 * nested vmcb. It is optimized in that it only merges the parts where
323c3d80
JR
2448 * the kvm msr permission bitmap may contain zero bits
2449 */
3d6368ef 2450 int i;
9738b2c9 2451
323c3d80
JR
2452 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
2453 return true;
9738b2c9 2454
323c3d80
JR
2455 for (i = 0; i < MSRPM_OFFSETS; i++) {
2456 u32 value, p;
2457 u64 offset;
9738b2c9 2458
323c3d80
JR
2459 if (msrpm_offsets[i] == 0xffffffff)
2460 break;
3d6368ef 2461
0d6b3537
JR
2462 p = msrpm_offsets[i];
2463 offset = svm->nested.vmcb_msrpm + (p * 4);
323c3d80 2464
54bf36aa 2465 if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
323c3d80
JR
2466 return false;
2467
2468 svm->nested.msrpm[p] = svm->msrpm[p] | value;
2469 }
3d6368ef 2470
323c3d80 2471 svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
9738b2c9
JR
2472
2473 return true;
3d6368ef
AG
2474}
2475
52c65a30
JR
2476static bool nested_vmcb_checks(struct vmcb *vmcb)
2477{
2478 if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
2479 return false;
2480
dbe77584
JR
2481 if (vmcb->control.asid == 0)
2482 return false;
2483
4b16184c
JR
2484 if (vmcb->control.nested_ctl && !npt_enabled)
2485 return false;
2486
52c65a30
JR
2487 return true;
2488}
2489
9738b2c9 2490static bool nested_svm_vmrun(struct vcpu_svm *svm)
3d6368ef 2491{
9738b2c9 2492 struct vmcb *nested_vmcb;
e6aa9abd 2493 struct vmcb *hsave = svm->nested.hsave;
defbba56 2494 struct vmcb *vmcb = svm->vmcb;
7597f129 2495 struct page *page;
06fc7772 2496 u64 vmcb_gpa;
3d6368ef 2497
06fc7772 2498 vmcb_gpa = svm->vmcb->save.rax;
3d6368ef 2499
7597f129 2500 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
9738b2c9
JR
2501 if (!nested_vmcb)
2502 return false;
2503
52c65a30
JR
2504 if (!nested_vmcb_checks(nested_vmcb)) {
2505 nested_vmcb->control.exit_code = SVM_EXIT_ERR;
2506 nested_vmcb->control.exit_code_hi = 0;
2507 nested_vmcb->control.exit_info_1 = 0;
2508 nested_vmcb->control.exit_info_2 = 0;
2509
2510 nested_svm_unmap(page);
2511
2512 return false;
2513 }
2514
b75f4eb3 2515 trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
0ac406de
JR
2516 nested_vmcb->save.rip,
2517 nested_vmcb->control.int_ctl,
2518 nested_vmcb->control.event_inj,
2519 nested_vmcb->control.nested_ctl);
2520
4ee546b4
RJ
2521 trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
2522 nested_vmcb->control.intercept_cr >> 16,
2e554e8d
JR
2523 nested_vmcb->control.intercept_exceptions,
2524 nested_vmcb->control.intercept);
2525
3d6368ef 2526 /* Clear internal status */
219b65dc
AG
2527 kvm_clear_exception_queue(&svm->vcpu);
2528 kvm_clear_interrupt_queue(&svm->vcpu);
3d6368ef 2529
e0231715
JR
2530 /*
2531 * Save the old vmcb, so we don't need to pick what we save, but can
2532 * restore everything when a VMEXIT occurs
2533 */
defbba56
JR
2534 hsave->save.es = vmcb->save.es;
2535 hsave->save.cs = vmcb->save.cs;
2536 hsave->save.ss = vmcb->save.ss;
2537 hsave->save.ds = vmcb->save.ds;
2538 hsave->save.gdtr = vmcb->save.gdtr;
2539 hsave->save.idtr = vmcb->save.idtr;
f6801dff 2540 hsave->save.efer = svm->vcpu.arch.efer;
4d4ec087 2541 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
defbba56 2542 hsave->save.cr4 = svm->vcpu.arch.cr4;
f6e78475 2543 hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
b75f4eb3 2544 hsave->save.rip = kvm_rip_read(&svm->vcpu);
defbba56
JR
2545 hsave->save.rsp = vmcb->save.rsp;
2546 hsave->save.rax = vmcb->save.rax;
2547 if (npt_enabled)
2548 hsave->save.cr3 = vmcb->save.cr3;
2549 else
9f8fe504 2550 hsave->save.cr3 = kvm_read_cr3(&svm->vcpu);
defbba56 2551
0460a979 2552 copy_vmcb_control_area(hsave, vmcb);
3d6368ef 2553
f6e78475 2554 if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
3d6368ef
AG
2555 svm->vcpu.arch.hflags |= HF_HIF_MASK;
2556 else
2557 svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
2558
4b16184c
JR
2559 if (nested_vmcb->control.nested_ctl) {
2560 kvm_mmu_unload(&svm->vcpu);
2561 svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
2562 nested_svm_init_mmu_context(&svm->vcpu);
2563 }
2564
3d6368ef
AG
2565 /* Load the nested guest state */
2566 svm->vmcb->save.es = nested_vmcb->save.es;
2567 svm->vmcb->save.cs = nested_vmcb->save.cs;
2568 svm->vmcb->save.ss = nested_vmcb->save.ss;
2569 svm->vmcb->save.ds = nested_vmcb->save.ds;
2570 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
2571 svm->vmcb->save.idtr = nested_vmcb->save.idtr;
f6e78475 2572 kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
3d6368ef
AG
2573 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
2574 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
2575 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
2576 if (npt_enabled) {
2577 svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
2578 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
0e5cbe36 2579 } else
2390218b 2580 (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
0e5cbe36
JR
2581
2582 /* Guest paging mode is active - reset mmu */
2583 kvm_mmu_reset_context(&svm->vcpu);
2584
defbba56 2585 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
3d6368ef
AG
2586 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
2587 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
2588 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
e0231715 2589
3d6368ef
AG
2590 /* In case we don't even reach vcpu_run, the fields are not updated */
2591 svm->vmcb->save.rax = nested_vmcb->save.rax;
2592 svm->vmcb->save.rsp = nested_vmcb->save.rsp;
2593 svm->vmcb->save.rip = nested_vmcb->save.rip;
2594 svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
2595 svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
2596 svm->vmcb->save.cpl = nested_vmcb->save.cpl;
2597
f7138538 2598 svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
ce2ac085 2599 svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
3d6368ef 2600
aad42c64 2601 /* cache intercepts */
4ee546b4 2602 svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
3aed041a 2603 svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
aad42c64
JR
2604 svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
2605 svm->nested.intercept = nested_vmcb->control.intercept;
2606
f40f6a45 2607 svm_flush_tlb(&svm->vcpu);
3d6368ef 2608 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
3d6368ef
AG
2609 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
2610 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
2611 else
2612 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
2613
88ab24ad
JR
2614 if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
2615 /* We only want the cr8 intercept bits of the guest */
4ee546b4
RJ
2616 clr_cr_intercept(svm, INTERCEPT_CR8_READ);
2617 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
88ab24ad
JR
2618 }
2619
0d945bd9 2620 /* We don't want to see VMMCALLs from a nested guest */
8a05a1b8 2621 clr_intercept(svm, INTERCEPT_VMMCALL);
0d945bd9 2622
88ab24ad 2623 svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
3d6368ef
AG
2624 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
2625 svm->vmcb->control.int_state = nested_vmcb->control.int_state;
2626 svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
3d6368ef
AG
2627 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
2628 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
2629
7597f129 2630 nested_svm_unmap(page);
9738b2c9 2631
2030753d
JR
2632 /* Enter Guest-Mode */
2633 enter_guest_mode(&svm->vcpu);
2634
384c6368
JR
2635 /*
2636 * Merge guest and host intercepts - must be called with vcpu in
2637 * guest-mode to take affect here
2638 */
2639 recalc_intercepts(svm);
2640
06fc7772 2641 svm->nested.vmcb = vmcb_gpa;
9738b2c9 2642
2af9194d 2643 enable_gif(svm);
3d6368ef 2644
8d28fec4
RJ
2645 mark_all_dirty(svm->vmcb);
2646
9738b2c9 2647 return true;
3d6368ef
AG
2648}
2649
9966bf68 2650static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
5542675b
AG
2651{
2652 to_vmcb->save.fs = from_vmcb->save.fs;
2653 to_vmcb->save.gs = from_vmcb->save.gs;
2654 to_vmcb->save.tr = from_vmcb->save.tr;
2655 to_vmcb->save.ldtr = from_vmcb->save.ldtr;
2656 to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
2657 to_vmcb->save.star = from_vmcb->save.star;
2658 to_vmcb->save.lstar = from_vmcb->save.lstar;
2659 to_vmcb->save.cstar = from_vmcb->save.cstar;
2660 to_vmcb->save.sfmask = from_vmcb->save.sfmask;
2661 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
2662 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
2663 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
5542675b
AG
2664}
2665
851ba692 2666static int vmload_interception(struct vcpu_svm *svm)
5542675b 2667{
9966bf68 2668 struct vmcb *nested_vmcb;
7597f129 2669 struct page *page;
9966bf68 2670
5542675b
AG
2671 if (nested_svm_check_permissions(svm))
2672 return 1;
2673
7597f129 2674 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
9966bf68
JR
2675 if (!nested_vmcb)
2676 return 1;
2677
e3e9ed3d
JR
2678 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2679 skip_emulated_instruction(&svm->vcpu);
2680
9966bf68 2681 nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
7597f129 2682 nested_svm_unmap(page);
5542675b
AG
2683
2684 return 1;
2685}
2686
851ba692 2687static int vmsave_interception(struct vcpu_svm *svm)
5542675b 2688{
9966bf68 2689 struct vmcb *nested_vmcb;
7597f129 2690 struct page *page;
9966bf68 2691
5542675b
AG
2692 if (nested_svm_check_permissions(svm))
2693 return 1;
2694
7597f129 2695 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
9966bf68
JR
2696 if (!nested_vmcb)
2697 return 1;
2698
e3e9ed3d
JR
2699 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2700 skip_emulated_instruction(&svm->vcpu);
2701
9966bf68 2702 nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
7597f129 2703 nested_svm_unmap(page);
5542675b
AG
2704
2705 return 1;
2706}
2707
851ba692 2708static int vmrun_interception(struct vcpu_svm *svm)
3d6368ef 2709{
3d6368ef
AG
2710 if (nested_svm_check_permissions(svm))
2711 return 1;
2712
b75f4eb3
RJ
2713 /* Save rip after vmrun instruction */
2714 kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
3d6368ef 2715
9738b2c9 2716 if (!nested_svm_vmrun(svm))
3d6368ef
AG
2717 return 1;
2718
9738b2c9 2719 if (!nested_svm_vmrun_msrpm(svm))
1f8da478
JR
2720 goto failed;
2721
2722 return 1;
2723
2724failed:
2725
2726 svm->vmcb->control.exit_code = SVM_EXIT_ERR;
2727 svm->vmcb->control.exit_code_hi = 0;
2728 svm->vmcb->control.exit_info_1 = 0;
2729 svm->vmcb->control.exit_info_2 = 0;
2730
2731 nested_svm_vmexit(svm);
3d6368ef
AG
2732
2733 return 1;
2734}
2735
851ba692 2736static int stgi_interception(struct vcpu_svm *svm)
1371d904
AG
2737{
2738 if (nested_svm_check_permissions(svm))
2739 return 1;
2740
2741 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2742 skip_emulated_instruction(&svm->vcpu);
3842d135 2743 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
1371d904 2744
2af9194d 2745 enable_gif(svm);
1371d904
AG
2746
2747 return 1;
2748}
2749
851ba692 2750static int clgi_interception(struct vcpu_svm *svm)
1371d904
AG
2751{
2752 if (nested_svm_check_permissions(svm))
2753 return 1;
2754
2755 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2756 skip_emulated_instruction(&svm->vcpu);
2757
2af9194d 2758 disable_gif(svm);
1371d904
AG
2759
2760 /* After a CLGI no interrupts should come */
2761 svm_clear_vintr(svm);
2762 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
2763
decdbf6a
JR
2764 mark_dirty(svm->vmcb, VMCB_INTR);
2765
1371d904
AG
2766 return 1;
2767}
2768
851ba692 2769static int invlpga_interception(struct vcpu_svm *svm)
ff092385
AG
2770{
2771 struct kvm_vcpu *vcpu = &svm->vcpu;
ff092385 2772
668f198f
DK
2773 trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX),
2774 kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
ec1ff790 2775
ff092385 2776 /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
668f198f 2777 kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
ff092385
AG
2778
2779 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2780 skip_emulated_instruction(&svm->vcpu);
2781 return 1;
2782}
2783
532a46b9
JR
2784static int skinit_interception(struct vcpu_svm *svm)
2785{
668f198f 2786 trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
532a46b9
JR
2787
2788 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2789 return 1;
2790}
2791
dab429a7
DK
2792static int wbinvd_interception(struct vcpu_svm *svm)
2793{
2794 kvm_emulate_wbinvd(&svm->vcpu);
2795 return 1;
2796}
2797
81dd35d4
JR
2798static int xsetbv_interception(struct vcpu_svm *svm)
2799{
2800 u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
2801 u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
2802
2803 if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
2804 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2805 skip_emulated_instruction(&svm->vcpu);
2806 }
2807
2808 return 1;
2809}
2810
851ba692 2811static int task_switch_interception(struct vcpu_svm *svm)
6aa8b732 2812{
37817f29 2813 u16 tss_selector;
64a7ec06
GN
2814 int reason;
2815 int int_type = svm->vmcb->control.exit_int_info &
2816 SVM_EXITINTINFO_TYPE_MASK;
8317c298 2817 int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
fe8e7f83
GN
2818 uint32_t type =
2819 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
2820 uint32_t idt_v =
2821 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
e269fb21
JK
2822 bool has_error_code = false;
2823 u32 error_code = 0;
37817f29
IE
2824
2825 tss_selector = (u16)svm->vmcb->control.exit_info_1;
64a7ec06 2826
37817f29
IE
2827 if (svm->vmcb->control.exit_info_2 &
2828 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
64a7ec06
GN
2829 reason = TASK_SWITCH_IRET;
2830 else if (svm->vmcb->control.exit_info_2 &
2831 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
2832 reason = TASK_SWITCH_JMP;
fe8e7f83 2833 else if (idt_v)
64a7ec06
GN
2834 reason = TASK_SWITCH_GATE;
2835 else
2836 reason = TASK_SWITCH_CALL;
2837
fe8e7f83
GN
2838 if (reason == TASK_SWITCH_GATE) {
2839 switch (type) {
2840 case SVM_EXITINTINFO_TYPE_NMI:
2841 svm->vcpu.arch.nmi_injected = false;
2842 break;
2843 case SVM_EXITINTINFO_TYPE_EXEPT:
e269fb21
JK
2844 if (svm->vmcb->control.exit_info_2 &
2845 (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) {
2846 has_error_code = true;
2847 error_code =
2848 (u32)svm->vmcb->control.exit_info_2;
2849 }
fe8e7f83
GN
2850 kvm_clear_exception_queue(&svm->vcpu);
2851 break;
2852 case SVM_EXITINTINFO_TYPE_INTR:
2853 kvm_clear_interrupt_queue(&svm->vcpu);
2854 break;
2855 default:
2856 break;
2857 }
2858 }
64a7ec06 2859
8317c298
GN
2860 if (reason != TASK_SWITCH_GATE ||
2861 int_type == SVM_EXITINTINFO_TYPE_SOFT ||
2862 (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
f629cf84
GN
2863 (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
2864 skip_emulated_instruction(&svm->vcpu);
64a7ec06 2865
7f3d35fd
KW
2866 if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
2867 int_vec = -1;
2868
2869 if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
acb54517
GN
2870 has_error_code, error_code) == EMULATE_FAIL) {
2871 svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
2872 svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
2873 svm->vcpu.run->internal.ndata = 0;
2874 return 0;
2875 }
2876 return 1;
6aa8b732
AK
2877}
2878
851ba692 2879static int cpuid_interception(struct vcpu_svm *svm)
6aa8b732 2880{
5fdbf976 2881 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
e756fc62 2882 kvm_emulate_cpuid(&svm->vcpu);
06465c5a 2883 return 1;
6aa8b732
AK
2884}
2885
851ba692 2886static int iret_interception(struct vcpu_svm *svm)
95ba8273
GN
2887{
2888 ++svm->vcpu.stat.nmi_window_exits;
8a05a1b8 2889 clr_intercept(svm, INTERCEPT_IRET);
44c11430 2890 svm->vcpu.arch.hflags |= HF_IRET_MASK;
bd3d1ec3 2891 svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
f303b4ce 2892 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
95ba8273
GN
2893 return 1;
2894}
2895
851ba692 2896static int invlpg_interception(struct vcpu_svm *svm)
a7052897 2897{
df4f3108
AP
2898 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2899 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
2900
2901 kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
2902 skip_emulated_instruction(&svm->vcpu);
2903 return 1;
a7052897
MT
2904}
2905
851ba692 2906static int emulate_on_interception(struct vcpu_svm *svm)
6aa8b732 2907{
51d8b661 2908 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
6aa8b732
AK
2909}
2910
332b56e4
AK
2911static int rdpmc_interception(struct vcpu_svm *svm)
2912{
2913 int err;
2914
2915 if (!static_cpu_has(X86_FEATURE_NRIPS))
2916 return emulate_on_interception(svm);
2917
2918 err = kvm_rdpmc(&svm->vcpu);
2919 kvm_complete_insn_gp(&svm->vcpu, err);
2920
2921 return 1;
2922}
2923
52eb5a6d
XL
2924static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
2925 unsigned long val)
628afd2a
JR
2926{
2927 unsigned long cr0 = svm->vcpu.arch.cr0;
2928 bool ret = false;
2929 u64 intercept;
2930
2931 intercept = svm->nested.intercept;
2932
2933 if (!is_guest_mode(&svm->vcpu) ||
2934 (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
2935 return false;
2936
2937 cr0 &= ~SVM_CR0_SELECTIVE_MASK;
2938 val &= ~SVM_CR0_SELECTIVE_MASK;
2939
2940 if (cr0 ^ val) {
2941 svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
2942 ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
2943 }
2944
2945 return ret;
2946}
2947
7ff76d58
AP
2948#define CR_VALID (1ULL << 63)
2949
2950static int cr_interception(struct vcpu_svm *svm)
2951{
2952 int reg, cr;
2953 unsigned long val;
2954 int err;
2955
2956 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2957 return emulate_on_interception(svm);
2958
2959 if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
2960 return emulate_on_interception(svm);
2961
2962 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
5e57518d
DK
2963 if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE)
2964 cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0;
2965 else
2966 cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
7ff76d58
AP
2967
2968 err = 0;
2969 if (cr >= 16) { /* mov to cr */
2970 cr -= 16;
2971 val = kvm_register_read(&svm->vcpu, reg);
2972 switch (cr) {
2973 case 0:
628afd2a
JR
2974 if (!check_selective_cr0_intercepted(svm, val))
2975 err = kvm_set_cr0(&svm->vcpu, val);
977b2d03
JR
2976 else
2977 return 1;
2978
7ff76d58
AP
2979 break;
2980 case 3:
2981 err = kvm_set_cr3(&svm->vcpu, val);
2982 break;
2983 case 4:
2984 err = kvm_set_cr4(&svm->vcpu, val);
2985 break;
2986 case 8:
2987 err = kvm_set_cr8(&svm->vcpu, val);
2988 break;
2989 default:
2990 WARN(1, "unhandled write to CR%d", cr);
2991 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2992 return 1;
2993 }
2994 } else { /* mov from cr */
2995 switch (cr) {
2996 case 0:
2997 val = kvm_read_cr0(&svm->vcpu);
2998 break;
2999 case 2:
3000 val = svm->vcpu.arch.cr2;
3001 break;
3002 case 3:
9f8fe504 3003 val = kvm_read_cr3(&svm->vcpu);
7ff76d58
AP
3004 break;
3005 case 4:
3006 val = kvm_read_cr4(&svm->vcpu);
3007 break;
3008 case 8:
3009 val = kvm_get_cr8(&svm->vcpu);
3010 break;
3011 default:
3012 WARN(1, "unhandled read from CR%d", cr);
3013 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3014 return 1;
3015 }
3016 kvm_register_write(&svm->vcpu, reg, val);
3017 }
3018 kvm_complete_insn_gp(&svm->vcpu, err);
3019
3020 return 1;
3021}
3022
cae3797a
AP
3023static int dr_interception(struct vcpu_svm *svm)
3024{
3025 int reg, dr;
3026 unsigned long val;
cae3797a 3027
facb0139
PB
3028 if (svm->vcpu.guest_debug == 0) {
3029 /*
3030 * No more DR vmexits; force a reload of the debug registers
3031 * and reenter on this instruction. The next vmexit will
3032 * retrieve the full state of the debug registers.
3033 */
3034 clr_dr_intercepts(svm);
3035 svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
3036 return 1;
3037 }
3038
cae3797a
AP
3039 if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
3040 return emulate_on_interception(svm);
3041
3042 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
3043 dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
3044
3045 if (dr >= 16) { /* mov to DRn */
16f8a6f9
NA
3046 if (!kvm_require_dr(&svm->vcpu, dr - 16))
3047 return 1;
cae3797a
AP
3048 val = kvm_register_read(&svm->vcpu, reg);
3049 kvm_set_dr(&svm->vcpu, dr - 16, val);
3050 } else {
16f8a6f9
NA
3051 if (!kvm_require_dr(&svm->vcpu, dr))
3052 return 1;
3053 kvm_get_dr(&svm->vcpu, dr, &val);
3054 kvm_register_write(&svm->vcpu, reg, val);
cae3797a
AP
3055 }
3056
2c46d2ae
JR
3057 skip_emulated_instruction(&svm->vcpu);
3058
cae3797a
AP
3059 return 1;
3060}
3061
851ba692 3062static int cr8_write_interception(struct vcpu_svm *svm)
1d075434 3063{
851ba692 3064 struct kvm_run *kvm_run = svm->vcpu.run;
eea1cff9 3065 int r;
851ba692 3066
0a5fff19
GN
3067 u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
3068 /* instruction emulation calls kvm_set_cr8() */
7ff76d58 3069 r = cr_interception(svm);
35754c98 3070 if (lapic_in_kernel(&svm->vcpu))
7ff76d58 3071 return r;
0a5fff19 3072 if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
7ff76d58 3073 return r;
1d075434
JR
3074 kvm_run->exit_reason = KVM_EXIT_SET_TPR;
3075 return 0;
3076}
3077
48d89b92 3078static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
d5c1785d
NHE
3079{
3080 struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
3081 return vmcb->control.tsc_offset +
886b470c 3082 svm_scale_tsc(vcpu, host_tsc);
d5c1785d
NHE
3083}
3084
609e36d3 3085static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
6aa8b732 3086{
a2fa3e9f
GH
3087 struct vcpu_svm *svm = to_svm(vcpu);
3088
609e36d3 3089 switch (msr_info->index) {
af24a4e4 3090 case MSR_IA32_TSC: {
609e36d3 3091 msr_info->data = svm->vmcb->control.tsc_offset +
4ea1636b 3092 svm_scale_tsc(vcpu, rdtsc());
fbc0db76 3093
6aa8b732
AK
3094 break;
3095 }
8c06585d 3096 case MSR_STAR:
609e36d3 3097 msr_info->data = svm->vmcb->save.star;
6aa8b732 3098 break;
0e859cac 3099#ifdef CONFIG_X86_64
6aa8b732 3100 case MSR_LSTAR:
609e36d3 3101 msr_info->data = svm->vmcb->save.lstar;
6aa8b732
AK
3102 break;
3103 case MSR_CSTAR:
609e36d3 3104 msr_info->data = svm->vmcb->save.cstar;
6aa8b732
AK
3105 break;
3106 case MSR_KERNEL_GS_BASE:
609e36d3 3107 msr_info->data = svm->vmcb->save.kernel_gs_base;
6aa8b732
AK
3108 break;
3109 case MSR_SYSCALL_MASK:
609e36d3 3110 msr_info->data = svm->vmcb->save.sfmask;
6aa8b732
AK
3111 break;
3112#endif
3113 case MSR_IA32_SYSENTER_CS:
609e36d3 3114 msr_info->data = svm->vmcb->save.sysenter_cs;
6aa8b732
AK
3115 break;
3116 case MSR_IA32_SYSENTER_EIP:
609e36d3 3117 msr_info->data = svm->sysenter_eip;
6aa8b732
AK
3118 break;
3119 case MSR_IA32_SYSENTER_ESP:
609e36d3 3120 msr_info->data = svm->sysenter_esp;
6aa8b732 3121 break;
e0231715
JR
3122 /*
3123 * Nobody will change the following 5 values in the VMCB so we can
3124 * safely return them on rdmsr. They will always be 0 until LBRV is
3125 * implemented.
3126 */
a2938c80 3127 case MSR_IA32_DEBUGCTLMSR:
609e36d3 3128 msr_info->data = svm->vmcb->save.dbgctl;
a2938c80
JR
3129 break;
3130 case MSR_IA32_LASTBRANCHFROMIP:
609e36d3 3131 msr_info->data = svm->vmcb->save.br_from;
a2938c80
JR
3132 break;
3133 case MSR_IA32_LASTBRANCHTOIP:
609e36d3 3134 msr_info->data = svm->vmcb->save.br_to;
a2938c80
JR
3135 break;
3136 case MSR_IA32_LASTINTFROMIP:
609e36d3 3137 msr_info->data = svm->vmcb->save.last_excp_from;
a2938c80
JR
3138 break;
3139 case MSR_IA32_LASTINTTOIP:
609e36d3 3140 msr_info->data = svm->vmcb->save.last_excp_to;
a2938c80 3141 break;
b286d5d8 3142 case MSR_VM_HSAVE_PA:
609e36d3 3143 msr_info->data = svm->nested.hsave_msr;
b286d5d8 3144 break;
eb6f302e 3145 case MSR_VM_CR:
609e36d3 3146 msr_info->data = svm->nested.vm_cr_msr;
eb6f302e 3147 break;
c8a73f18 3148 case MSR_IA32_UCODE_REV:
609e36d3 3149 msr_info->data = 0x01000065;
c8a73f18 3150 break;
6aa8b732 3151 default:
609e36d3 3152 return kvm_get_msr_common(vcpu, msr_info);
6aa8b732
AK
3153 }
3154 return 0;
3155}
3156
851ba692 3157static int rdmsr_interception(struct vcpu_svm *svm)
6aa8b732 3158{
668f198f 3159 u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
609e36d3 3160 struct msr_data msr_info;
6aa8b732 3161
609e36d3
PB
3162 msr_info.index = ecx;
3163 msr_info.host_initiated = false;
3164 if (svm_get_msr(&svm->vcpu, &msr_info)) {
59200273 3165 trace_kvm_msr_read_ex(ecx);
c1a5d4f9 3166 kvm_inject_gp(&svm->vcpu, 0);
59200273 3167 } else {
609e36d3 3168 trace_kvm_msr_read(ecx, msr_info.data);
af9ca2d7 3169
609e36d3
PB
3170 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX,
3171 msr_info.data & 0xffffffff);
3172 kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
3173 msr_info.data >> 32);
5fdbf976 3174 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
e756fc62 3175 skip_emulated_instruction(&svm->vcpu);
6aa8b732
AK
3176 }
3177 return 1;
3178}
3179
4a810181
JR
3180static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
3181{
3182 struct vcpu_svm *svm = to_svm(vcpu);
3183 int svm_dis, chg_mask;
3184
3185 if (data & ~SVM_VM_CR_VALID_MASK)
3186 return 1;
3187
3188 chg_mask = SVM_VM_CR_VALID_MASK;
3189
3190 if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
3191 chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
3192
3193 svm->nested.vm_cr_msr &= ~chg_mask;
3194 svm->nested.vm_cr_msr |= (data & chg_mask);
3195
3196 svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
3197
3198 /* check for svm_disable while efer.svme is set */
3199 if (svm_dis && (vcpu->arch.efer & EFER_SVME))
3200 return 1;
3201
3202 return 0;
3203}
3204
8fe8ab46 3205static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
6aa8b732 3206{
a2fa3e9f
GH
3207 struct vcpu_svm *svm = to_svm(vcpu);
3208
8fe8ab46
WA
3209 u32 ecx = msr->index;
3210 u64 data = msr->data;
6aa8b732 3211 switch (ecx) {
f4e1b3c8 3212 case MSR_IA32_TSC:
8fe8ab46 3213 kvm_write_tsc(vcpu, msr);
6aa8b732 3214 break;
8c06585d 3215 case MSR_STAR:
a2fa3e9f 3216 svm->vmcb->save.star = data;
6aa8b732 3217 break;
49b14f24 3218#ifdef CONFIG_X86_64
6aa8b732 3219 case MSR_LSTAR:
a2fa3e9f 3220 svm->vmcb->save.lstar = data;
6aa8b732
AK
3221 break;
3222 case MSR_CSTAR:
a2fa3e9f 3223 svm->vmcb->save.cstar = data;
6aa8b732
AK
3224 break;
3225 case MSR_KERNEL_GS_BASE:
a2fa3e9f 3226 svm->vmcb->save.kernel_gs_base = data;
6aa8b732
AK
3227 break;
3228 case MSR_SYSCALL_MASK:
a2fa3e9f 3229 svm->vmcb->save.sfmask = data;
6aa8b732
AK
3230 break;
3231#endif
3232 case MSR_IA32_SYSENTER_CS:
a2fa3e9f 3233 svm->vmcb->save.sysenter_cs = data;
6aa8b732
AK
3234 break;
3235 case MSR_IA32_SYSENTER_EIP:
017cb99e 3236 svm->sysenter_eip = data;
a2fa3e9f 3237 svm->vmcb->save.sysenter_eip = data;
6aa8b732
AK
3238 break;
3239 case MSR_IA32_SYSENTER_ESP:
017cb99e 3240 svm->sysenter_esp = data;
a2fa3e9f 3241 svm->vmcb->save.sysenter_esp = data;
6aa8b732 3242 break;
a2938c80 3243 case MSR_IA32_DEBUGCTLMSR:
2a6b20b8 3244 if (!boot_cpu_has(X86_FEATURE_LBRV)) {
a737f256
CD
3245 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
3246 __func__, data);
24e09cbf
JR
3247 break;
3248 }
3249 if (data & DEBUGCTL_RESERVED_BITS)
3250 return 1;
3251
3252 svm->vmcb->save.dbgctl = data;
b53ba3f9 3253 mark_dirty(svm->vmcb, VMCB_LBR);
24e09cbf
JR
3254 if (data & (1ULL<<0))
3255 svm_enable_lbrv(svm);
3256 else
3257 svm_disable_lbrv(svm);
a2938c80 3258 break;
b286d5d8 3259 case MSR_VM_HSAVE_PA:
e6aa9abd 3260 svm->nested.hsave_msr = data;
62b9abaa 3261 break;
3c5d0a44 3262 case MSR_VM_CR:
4a810181 3263 return svm_set_vm_cr(vcpu, data);
3c5d0a44 3264 case MSR_VM_IGNNE:
a737f256 3265 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
3c5d0a44 3266 break;
6aa8b732 3267 default:
8fe8ab46 3268 return kvm_set_msr_common(vcpu, msr);
6aa8b732
AK
3269 }
3270 return 0;
3271}
3272
851ba692 3273static int wrmsr_interception(struct vcpu_svm *svm)
6aa8b732 3274{
8fe8ab46 3275 struct msr_data msr;
668f198f
DK
3276 u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
3277 u64 data = kvm_read_edx_eax(&svm->vcpu);
af9ca2d7 3278
8fe8ab46
WA
3279 msr.data = data;
3280 msr.index = ecx;
3281 msr.host_initiated = false;
af9ca2d7 3282
5fdbf976 3283 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
854e8bb1 3284 if (kvm_set_msr(&svm->vcpu, &msr)) {
59200273 3285 trace_kvm_msr_write_ex(ecx, data);
c1a5d4f9 3286 kvm_inject_gp(&svm->vcpu, 0);
59200273
AK
3287 } else {
3288 trace_kvm_msr_write(ecx, data);
e756fc62 3289 skip_emulated_instruction(&svm->vcpu);
59200273 3290 }
6aa8b732
AK
3291 return 1;
3292}
3293
851ba692 3294static int msr_interception(struct vcpu_svm *svm)
6aa8b732 3295{
e756fc62 3296 if (svm->vmcb->control.exit_info_1)
851ba692 3297 return wrmsr_interception(svm);
6aa8b732 3298 else
851ba692 3299 return rdmsr_interception(svm);
6aa8b732
AK
3300}
3301
851ba692 3302static int interrupt_window_interception(struct vcpu_svm *svm)
c1150d8c 3303{
3842d135 3304 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
f0b85051 3305 svm_clear_vintr(svm);
85f455f7 3306 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
decdbf6a 3307 mark_dirty(svm->vmcb, VMCB_INTR);
675acb75 3308 ++svm->vcpu.stat.irq_window_exits;
c1150d8c
DL
3309 return 1;
3310}
3311
565d0998
ML
3312static int pause_interception(struct vcpu_svm *svm)
3313{
3314 kvm_vcpu_on_spin(&(svm->vcpu));
3315 return 1;
3316}
3317
87c00572
GS
3318static int nop_interception(struct vcpu_svm *svm)
3319{
3320 skip_emulated_instruction(&(svm->vcpu));
3321 return 1;
3322}
3323
3324static int monitor_interception(struct vcpu_svm *svm)
3325{
3326 printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
3327 return nop_interception(svm);
3328}
3329
3330static int mwait_interception(struct vcpu_svm *svm)
3331{
3332 printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
3333 return nop_interception(svm);
3334}
3335
09941fbb 3336static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
7ff76d58
AP
3337 [SVM_EXIT_READ_CR0] = cr_interception,
3338 [SVM_EXIT_READ_CR3] = cr_interception,
3339 [SVM_EXIT_READ_CR4] = cr_interception,
3340 [SVM_EXIT_READ_CR8] = cr_interception,
5e57518d 3341 [SVM_EXIT_CR0_SEL_WRITE] = cr_interception,
628afd2a 3342 [SVM_EXIT_WRITE_CR0] = cr_interception,
7ff76d58
AP
3343 [SVM_EXIT_WRITE_CR3] = cr_interception,
3344 [SVM_EXIT_WRITE_CR4] = cr_interception,
e0231715 3345 [SVM_EXIT_WRITE_CR8] = cr8_write_interception,
cae3797a
AP
3346 [SVM_EXIT_READ_DR0] = dr_interception,
3347 [SVM_EXIT_READ_DR1] = dr_interception,
3348 [SVM_EXIT_READ_DR2] = dr_interception,
3349 [SVM_EXIT_READ_DR3] = dr_interception,
3350 [SVM_EXIT_READ_DR4] = dr_interception,
3351 [SVM_EXIT_READ_DR5] = dr_interception,
3352 [SVM_EXIT_READ_DR6] = dr_interception,
3353 [SVM_EXIT_READ_DR7] = dr_interception,
3354 [SVM_EXIT_WRITE_DR0] = dr_interception,
3355 [SVM_EXIT_WRITE_DR1] = dr_interception,
3356 [SVM_EXIT_WRITE_DR2] = dr_interception,
3357 [SVM_EXIT_WRITE_DR3] = dr_interception,
3358 [SVM_EXIT_WRITE_DR4] = dr_interception,
3359 [SVM_EXIT_WRITE_DR5] = dr_interception,
3360 [SVM_EXIT_WRITE_DR6] = dr_interception,
3361 [SVM_EXIT_WRITE_DR7] = dr_interception,
d0bfb940
JK
3362 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
3363 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
7aa81cc0 3364 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
e0231715
JR
3365 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
3366 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
3367 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
3368 [SVM_EXIT_INTR] = intr_interception,
c47f098d 3369 [SVM_EXIT_NMI] = nmi_interception,
6aa8b732
AK
3370 [SVM_EXIT_SMI] = nop_on_interception,
3371 [SVM_EXIT_INIT] = nop_on_interception,
c1150d8c 3372 [SVM_EXIT_VINTR] = interrupt_window_interception,
332b56e4 3373 [SVM_EXIT_RDPMC] = rdpmc_interception,
6aa8b732 3374 [SVM_EXIT_CPUID] = cpuid_interception,
95ba8273 3375 [SVM_EXIT_IRET] = iret_interception,
cf5a94d1 3376 [SVM_EXIT_INVD] = emulate_on_interception,
565d0998 3377 [SVM_EXIT_PAUSE] = pause_interception,
6aa8b732 3378 [SVM_EXIT_HLT] = halt_interception,
a7052897 3379 [SVM_EXIT_INVLPG] = invlpg_interception,
ff092385 3380 [SVM_EXIT_INVLPGA] = invlpga_interception,
e0231715 3381 [SVM_EXIT_IOIO] = io_interception,
6aa8b732
AK
3382 [SVM_EXIT_MSR] = msr_interception,
3383 [SVM_EXIT_TASK_SWITCH] = task_switch_interception,
46fe4ddd 3384 [SVM_EXIT_SHUTDOWN] = shutdown_interception,
3d6368ef 3385 [SVM_EXIT_VMRUN] = vmrun_interception,
02e235bc 3386 [SVM_EXIT_VMMCALL] = vmmcall_interception,
5542675b
AG
3387 [SVM_EXIT_VMLOAD] = vmload_interception,
3388 [SVM_EXIT_VMSAVE] = vmsave_interception,
1371d904
AG
3389 [SVM_EXIT_STGI] = stgi_interception,
3390 [SVM_EXIT_CLGI] = clgi_interception,
532a46b9 3391 [SVM_EXIT_SKINIT] = skinit_interception,
dab429a7 3392 [SVM_EXIT_WBINVD] = wbinvd_interception,
87c00572
GS
3393 [SVM_EXIT_MONITOR] = monitor_interception,
3394 [SVM_EXIT_MWAIT] = mwait_interception,
81dd35d4 3395 [SVM_EXIT_XSETBV] = xsetbv_interception,
709ddebf 3396 [SVM_EXIT_NPF] = pf_interception,
64d60670 3397 [SVM_EXIT_RSM] = emulate_on_interception,
6aa8b732
AK
3398};
3399
ae8cc059 3400static void dump_vmcb(struct kvm_vcpu *vcpu)
3f10c846
JR
3401{
3402 struct vcpu_svm *svm = to_svm(vcpu);
3403 struct vmcb_control_area *control = &svm->vmcb->control;
3404 struct vmcb_save_area *save = &svm->vmcb->save;
3405
3406 pr_err("VMCB Control Area:\n");
ae8cc059
JP
3407 pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
3408 pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
3409 pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
3410 pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
3411 pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
3412 pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
3413 pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
3414 pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
3415 pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
3416 pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
3417 pr_err("%-20s%d\n", "asid:", control->asid);
3418 pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
3419 pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
3420 pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
3421 pr_err("%-20s%08x\n", "int_state:", control->int_state);
3422 pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
3423 pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
3424 pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
3425 pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
3426 pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
3427 pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
3428 pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
3429 pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
3430 pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
3431 pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
3432 pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
3f10c846 3433 pr_err("VMCB State Save Area:\n");
ae8cc059
JP
3434 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3435 "es:",
3436 save->es.selector, save->es.attrib,
3437 save->es.limit, save->es.base);
3438 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3439 "cs:",
3440 save->cs.selector, save->cs.attrib,
3441 save->cs.limit, save->cs.base);
3442 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3443 "ss:",
3444 save->ss.selector, save->ss.attrib,
3445 save->ss.limit, save->ss.base);
3446 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3447 "ds:",
3448 save->ds.selector, save->ds.attrib,
3449 save->ds.limit, save->ds.base);
3450 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3451 "fs:",
3452 save->fs.selector, save->fs.attrib,
3453 save->fs.limit, save->fs.base);
3454 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3455 "gs:",
3456 save->gs.selector, save->gs.attrib,
3457 save->gs.limit, save->gs.base);
3458 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3459 "gdtr:",
3460 save->gdtr.selector, save->gdtr.attrib,
3461 save->gdtr.limit, save->gdtr.base);
3462 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3463 "ldtr:",
3464 save->ldtr.selector, save->ldtr.attrib,
3465 save->ldtr.limit, save->ldtr.base);
3466 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3467 "idtr:",
3468 save->idtr.selector, save->idtr.attrib,
3469 save->idtr.limit, save->idtr.base);
3470 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3471 "tr:",
3472 save->tr.selector, save->tr.attrib,
3473 save->tr.limit, save->tr.base);
3f10c846
JR
3474 pr_err("cpl: %d efer: %016llx\n",
3475 save->cpl, save->efer);
ae8cc059
JP
3476 pr_err("%-15s %016llx %-13s %016llx\n",
3477 "cr0:", save->cr0, "cr2:", save->cr2);
3478 pr_err("%-15s %016llx %-13s %016llx\n",
3479 "cr3:", save->cr3, "cr4:", save->cr4);
3480 pr_err("%-15s %016llx %-13s %016llx\n",
3481 "dr6:", save->dr6, "dr7:", save->dr7);
3482 pr_err("%-15s %016llx %-13s %016llx\n",
3483 "rip:", save->rip, "rflags:", save->rflags);
3484 pr_err("%-15s %016llx %-13s %016llx\n",
3485 "rsp:", save->rsp, "rax:", save->rax);
3486 pr_err("%-15s %016llx %-13s %016llx\n",
3487 "star:", save->star, "lstar:", save->lstar);
3488 pr_err("%-15s %016llx %-13s %016llx\n",
3489 "cstar:", save->cstar, "sfmask:", save->sfmask);
3490 pr_err("%-15s %016llx %-13s %016llx\n",
3491 "kernel_gs_base:", save->kernel_gs_base,
3492 "sysenter_cs:", save->sysenter_cs);
3493 pr_err("%-15s %016llx %-13s %016llx\n",
3494 "sysenter_esp:", save->sysenter_esp,
3495 "sysenter_eip:", save->sysenter_eip);
3496 pr_err("%-15s %016llx %-13s %016llx\n",
3497 "gpat:", save->g_pat, "dbgctl:", save->dbgctl);
3498 pr_err("%-15s %016llx %-13s %016llx\n",
3499 "br_from:", save->br_from, "br_to:", save->br_to);
3500 pr_err("%-15s %016llx %-13s %016llx\n",
3501 "excp_from:", save->last_excp_from,
3502 "excp_to:", save->last_excp_to);
3f10c846
JR
3503}
3504
586f9607
AK
3505static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
3506{
3507 struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
3508
3509 *info1 = control->exit_info_1;
3510 *info2 = control->exit_info_2;
3511}
3512
851ba692 3513static int handle_exit(struct kvm_vcpu *vcpu)
6aa8b732 3514{
04d2cc77 3515 struct vcpu_svm *svm = to_svm(vcpu);
851ba692 3516 struct kvm_run *kvm_run = vcpu->run;
a2fa3e9f 3517 u32 exit_code = svm->vmcb->control.exit_code;
6aa8b732 3518
4ee546b4 3519 if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
2be4fc7a
JR
3520 vcpu->arch.cr0 = svm->vmcb->save.cr0;
3521 if (npt_enabled)
3522 vcpu->arch.cr3 = svm->vmcb->save.cr3;
af9ca2d7 3523
cd3ff653
JR
3524 if (unlikely(svm->nested.exit_required)) {
3525 nested_svm_vmexit(svm);
3526 svm->nested.exit_required = false;
3527
3528 return 1;
3529 }
3530
2030753d 3531 if (is_guest_mode(vcpu)) {
410e4d57
JR
3532 int vmexit;
3533
d8cabddf
JR
3534 trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
3535 svm->vmcb->control.exit_info_1,
3536 svm->vmcb->control.exit_info_2,
3537 svm->vmcb->control.exit_int_info,
e097e5ff
SH
3538 svm->vmcb->control.exit_int_info_err,
3539 KVM_ISA_SVM);
d8cabddf 3540
410e4d57
JR
3541 vmexit = nested_svm_exit_special(svm);
3542
3543 if (vmexit == NESTED_EXIT_CONTINUE)
3544 vmexit = nested_svm_exit_handled(svm);
3545
3546 if (vmexit == NESTED_EXIT_DONE)
cf74a78b 3547 return 1;
cf74a78b
AG
3548 }
3549
a5c3832d
JR
3550 svm_complete_interrupts(svm);
3551
04d2cc77
AK
3552 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
3553 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
3554 kvm_run->fail_entry.hardware_entry_failure_reason
3555 = svm->vmcb->control.exit_code;
3f10c846
JR
3556 pr_err("KVM: FAILED VMRUN WITH VMCB:\n");
3557 dump_vmcb(vcpu);
04d2cc77
AK
3558 return 0;
3559 }
3560
a2fa3e9f 3561 if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
709ddebf 3562 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
55c5e464
JR
3563 exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
3564 exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
6614c7d0 3565 printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
6aa8b732 3566 "exit_code 0x%x\n",
b8688d51 3567 __func__, svm->vmcb->control.exit_int_info,
6aa8b732
AK
3568 exit_code);
3569
9d8f549d 3570 if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
56919c5c 3571 || !svm_exit_handlers[exit_code]) {
faac2458 3572 WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code);
2bc19dc3
MT
3573 kvm_queue_exception(vcpu, UD_VECTOR);
3574 return 1;
6aa8b732
AK
3575 }
3576
851ba692 3577 return svm_exit_handlers[exit_code](svm);
6aa8b732
AK
3578}
3579
3580static void reload_tss(struct kvm_vcpu *vcpu)
3581{
3582 int cpu = raw_smp_processor_id();
3583
0fe1e009
TH
3584 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
3585 sd->tss_desc->type = 9; /* available 32/64-bit TSS */
6aa8b732
AK
3586 load_TR_desc();
3587}
3588
e756fc62 3589static void pre_svm_run(struct vcpu_svm *svm)
6aa8b732
AK
3590{
3591 int cpu = raw_smp_processor_id();
3592
0fe1e009 3593 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
6aa8b732 3594
4b656b12 3595 /* FIXME: handle wraparound of asid_generation */
0fe1e009
TH
3596 if (svm->asid_generation != sd->asid_generation)
3597 new_asid(svm, sd);
6aa8b732
AK
3598}
3599
95ba8273
GN
3600static void svm_inject_nmi(struct kvm_vcpu *vcpu)
3601{
3602 struct vcpu_svm *svm = to_svm(vcpu);
3603
3604 svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
3605 vcpu->arch.hflags |= HF_NMI_MASK;
8a05a1b8 3606 set_intercept(svm, INTERCEPT_IRET);
95ba8273
GN
3607 ++vcpu->stat.nmi_injections;
3608}
6aa8b732 3609
85f455f7 3610static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
6aa8b732
AK
3611{
3612 struct vmcb_control_area *control;
3613
e756fc62 3614 control = &svm->vmcb->control;
85f455f7 3615 control->int_vector = irq;
6aa8b732
AK
3616 control->int_ctl &= ~V_INTR_PRIO_MASK;
3617 control->int_ctl |= V_IRQ_MASK |
3618 ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
decdbf6a 3619 mark_dirty(svm->vmcb, VMCB_INTR);
6aa8b732
AK
3620}
3621
66fd3f7f 3622static void svm_set_irq(struct kvm_vcpu *vcpu)
2a8067f1
ED
3623{
3624 struct vcpu_svm *svm = to_svm(vcpu);
3625
2af9194d 3626 BUG_ON(!(gif_set(svm)));
cf74a78b 3627
9fb2d2b4
GN
3628 trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
3629 ++vcpu->stat.irq_injections;
3630
219b65dc
AG
3631 svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
3632 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
2a8067f1
ED
3633}
3634
95ba8273 3635static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
aaacfc9a
JR
3636{
3637 struct vcpu_svm *svm = to_svm(vcpu);
aaacfc9a 3638
2030753d 3639 if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
88ab24ad
JR
3640 return;
3641
596f3142
RK
3642 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
3643
95ba8273 3644 if (irr == -1)
aaacfc9a
JR
3645 return;
3646
95ba8273 3647 if (tpr >= irr)
4ee546b4 3648 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
95ba8273 3649}
aaacfc9a 3650
8d14695f
YZ
3651static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
3652{
3653 return;
3654}
3655
d50ab6c1 3656static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu)
c7c9c56c
YZ
3657{
3658 return 0;
3659}
3660
3bb345f3 3661static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu)
c7c9c56c
YZ
3662{
3663 return;
3664}
3665
a20ed54d
YZ
3666static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
3667{
3668 return;
3669}
3670
95ba8273
GN
3671static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
3672{
3673 struct vcpu_svm *svm = to_svm(vcpu);
3674 struct vmcb *vmcb = svm->vmcb;
924584cc
JR
3675 int ret;
3676 ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
3677 !(svm->vcpu.arch.hflags & HF_NMI_MASK);
3678 ret = ret && gif_set(svm) && nested_svm_nmi(svm);
3679
3680 return ret;
aaacfc9a
JR
3681}
3682
3cfc3092
JK
3683static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
3684{
3685 struct vcpu_svm *svm = to_svm(vcpu);
3686
3687 return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
3688}
3689
3690static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
3691{
3692 struct vcpu_svm *svm = to_svm(vcpu);
3693
3694 if (masked) {
3695 svm->vcpu.arch.hflags |= HF_NMI_MASK;
8a05a1b8 3696 set_intercept(svm, INTERCEPT_IRET);
3cfc3092
JK
3697 } else {
3698 svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
8a05a1b8 3699 clr_intercept(svm, INTERCEPT_IRET);
3cfc3092
JK
3700 }
3701}
3702
78646121
GN
3703static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
3704{
3705 struct vcpu_svm *svm = to_svm(vcpu);
3706 struct vmcb *vmcb = svm->vmcb;
7fcdb510
JR
3707 int ret;
3708
3709 if (!gif_set(svm) ||
3710 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
3711 return 0;
3712
f6e78475 3713 ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
7fcdb510 3714
2030753d 3715 if (is_guest_mode(vcpu))
7fcdb510
JR
3716 return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
3717
3718 return ret;
78646121
GN
3719}
3720
c9a7953f 3721static void enable_irq_window(struct kvm_vcpu *vcpu)
6aa8b732 3722{
219b65dc 3723 struct vcpu_svm *svm = to_svm(vcpu);
219b65dc 3724
e0231715
JR
3725 /*
3726 * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
3727 * 1, because that's a separate STGI/VMRUN intercept. The next time we
3728 * get that intercept, this function will be called again though and
3729 * we'll get the vintr intercept.
3730 */
8fe54654 3731 if (gif_set(svm) && nested_svm_intr(svm)) {
219b65dc
AG
3732 svm_set_vintr(svm);
3733 svm_inject_irq(svm, 0x0);
3734 }
85f455f7
ED
3735}
3736
c9a7953f 3737static void enable_nmi_window(struct kvm_vcpu *vcpu)
c1150d8c 3738{
04d2cc77 3739 struct vcpu_svm *svm = to_svm(vcpu);
c1150d8c 3740
44c11430
GN
3741 if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
3742 == HF_NMI_MASK)
c9a7953f 3743 return; /* IRET will cause a vm exit */
44c11430 3744
e0231715
JR
3745 /*
3746 * Something prevents NMI from been injected. Single step over possible
3747 * problem (IRET or exception injection or interrupt shadow)
3748 */
6be7d306 3749 svm->nmi_singlestep = true;
44c11430 3750 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
c8639010 3751 update_db_bp_intercept(vcpu);
c1150d8c
DL
3752}
3753
cbc94022
IE
3754static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
3755{
3756 return 0;
3757}
3758
d9e368d6
AK
3759static void svm_flush_tlb(struct kvm_vcpu *vcpu)
3760{
38e5e92f
JR
3761 struct vcpu_svm *svm = to_svm(vcpu);
3762
3763 if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
3764 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
3765 else
3766 svm->asid_generation--;
d9e368d6
AK
3767}
3768
04d2cc77
AK
3769static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
3770{
3771}
3772
d7bf8221
JR
3773static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
3774{
3775 struct vcpu_svm *svm = to_svm(vcpu);
3776
2030753d 3777 if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
88ab24ad
JR
3778 return;
3779
4ee546b4 3780 if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
d7bf8221 3781 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
615d5193 3782 kvm_set_cr8(vcpu, cr8);
d7bf8221
JR
3783 }
3784}
3785
649d6864
JR
3786static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
3787{
3788 struct vcpu_svm *svm = to_svm(vcpu);
3789 u64 cr8;
3790
2030753d 3791 if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
88ab24ad
JR
3792 return;
3793
649d6864
JR
3794 cr8 = kvm_get_cr8(vcpu);
3795 svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
3796 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
3797}
3798
9222be18
GN
3799static void svm_complete_interrupts(struct vcpu_svm *svm)
3800{
3801 u8 vector;
3802 int type;
3803 u32 exitintinfo = svm->vmcb->control.exit_int_info;
66b7138f
JK
3804 unsigned int3_injected = svm->int3_injected;
3805
3806 svm->int3_injected = 0;
9222be18 3807
bd3d1ec3
AK
3808 /*
3809 * If we've made progress since setting HF_IRET_MASK, we've
3810 * executed an IRET and can allow NMI injection.
3811 */
3812 if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
3813 && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
44c11430 3814 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
3842d135
AK
3815 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3816 }
44c11430 3817
9222be18
GN
3818 svm->vcpu.arch.nmi_injected = false;
3819 kvm_clear_exception_queue(&svm->vcpu);
3820 kvm_clear_interrupt_queue(&svm->vcpu);
3821
3822 if (!(exitintinfo & SVM_EXITINTINFO_VALID))
3823 return;
3824
3842d135
AK
3825 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3826
9222be18
GN
3827 vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
3828 type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
3829
3830 switch (type) {
3831 case SVM_EXITINTINFO_TYPE_NMI:
3832 svm->vcpu.arch.nmi_injected = true;
3833 break;
3834 case SVM_EXITINTINFO_TYPE_EXEPT:
66b7138f
JK
3835 /*
3836 * In case of software exceptions, do not reinject the vector,
3837 * but re-execute the instruction instead. Rewind RIP first
3838 * if we emulated INT3 before.
3839 */
3840 if (kvm_exception_is_soft(vector)) {
3841 if (vector == BP_VECTOR && int3_injected &&
3842 kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
3843 kvm_rip_write(&svm->vcpu,
3844 kvm_rip_read(&svm->vcpu) -
3845 int3_injected);
9222be18 3846 break;
66b7138f 3847 }
9222be18
GN
3848 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
3849 u32 err = svm->vmcb->control.exit_int_info_err;
ce7ddec4 3850 kvm_requeue_exception_e(&svm->vcpu, vector, err);
9222be18
GN
3851
3852 } else
ce7ddec4 3853 kvm_requeue_exception(&svm->vcpu, vector);
9222be18
GN
3854 break;
3855 case SVM_EXITINTINFO_TYPE_INTR:
66fd3f7f 3856 kvm_queue_interrupt(&svm->vcpu, vector, false);
9222be18
GN
3857 break;
3858 default:
3859 break;
3860 }
3861}
3862
b463a6f7
AK
3863static void svm_cancel_injection(struct kvm_vcpu *vcpu)
3864{
3865 struct vcpu_svm *svm = to_svm(vcpu);
3866 struct vmcb_control_area *control = &svm->vmcb->control;
3867
3868 control->exit_int_info = control->event_inj;
3869 control->exit_int_info_err = control->event_inj_err;
3870 control->event_inj = 0;
3871 svm_complete_interrupts(svm);
3872}
3873
851ba692 3874static void svm_vcpu_run(struct kvm_vcpu *vcpu)
6aa8b732 3875{
a2fa3e9f 3876 struct vcpu_svm *svm = to_svm(vcpu);
d9e368d6 3877
2041a06a
JR
3878 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
3879 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
3880 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
3881
cd3ff653
JR
3882 /*
3883 * A vmexit emulation is required before the vcpu can be executed
3884 * again.
3885 */
3886 if (unlikely(svm->nested.exit_required))
3887 return;
3888
e756fc62 3889 pre_svm_run(svm);
6aa8b732 3890
649d6864
JR
3891 sync_lapic_to_cr8(vcpu);
3892
cda0ffdd 3893 svm->vmcb->save.cr2 = vcpu->arch.cr2;
6aa8b732 3894
04d2cc77
AK
3895 clgi();
3896
3897 local_irq_enable();
36241b8c 3898
6aa8b732 3899 asm volatile (
7454766f
AK
3900 "push %%" _ASM_BP "; \n\t"
3901 "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
3902 "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t"
3903 "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t"
3904 "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t"
3905 "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t"
3906 "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t"
05b3e0c2 3907#ifdef CONFIG_X86_64
fb3f0f51
RR
3908 "mov %c[r8](%[svm]), %%r8 \n\t"
3909 "mov %c[r9](%[svm]), %%r9 \n\t"
3910 "mov %c[r10](%[svm]), %%r10 \n\t"
3911 "mov %c[r11](%[svm]), %%r11 \n\t"
3912 "mov %c[r12](%[svm]), %%r12 \n\t"
3913 "mov %c[r13](%[svm]), %%r13 \n\t"
3914 "mov %c[r14](%[svm]), %%r14 \n\t"
3915 "mov %c[r15](%[svm]), %%r15 \n\t"
6aa8b732
AK
3916#endif
3917
6aa8b732 3918 /* Enter guest mode */
7454766f
AK
3919 "push %%" _ASM_AX " \n\t"
3920 "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t"
4ecac3fd
AK
3921 __ex(SVM_VMLOAD) "\n\t"
3922 __ex(SVM_VMRUN) "\n\t"
3923 __ex(SVM_VMSAVE) "\n\t"
7454766f 3924 "pop %%" _ASM_AX " \n\t"
6aa8b732
AK
3925
3926 /* Save guest registers, load host registers */
7454766f
AK
3927 "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t"
3928 "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t"
3929 "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t"
3930 "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t"
3931 "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t"
3932 "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t"
05b3e0c2 3933#ifdef CONFIG_X86_64
fb3f0f51
RR
3934 "mov %%r8, %c[r8](%[svm]) \n\t"
3935 "mov %%r9, %c[r9](%[svm]) \n\t"
3936 "mov %%r10, %c[r10](%[svm]) \n\t"
3937 "mov %%r11, %c[r11](%[svm]) \n\t"
3938 "mov %%r12, %c[r12](%[svm]) \n\t"
3939 "mov %%r13, %c[r13](%[svm]) \n\t"
3940 "mov %%r14, %c[r14](%[svm]) \n\t"
3941 "mov %%r15, %c[r15](%[svm]) \n\t"
6aa8b732 3942#endif
7454766f 3943 "pop %%" _ASM_BP
6aa8b732 3944 :
fb3f0f51 3945 : [svm]"a"(svm),
6aa8b732 3946 [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
ad312c7c
ZX
3947 [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
3948 [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
3949 [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
3950 [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
3951 [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
3952 [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
05b3e0c2 3953#ifdef CONFIG_X86_64
ad312c7c
ZX
3954 , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
3955 [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
3956 [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
3957 [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
3958 [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
3959 [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
3960 [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
3961 [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
6aa8b732 3962#endif
54a08c04
LV
3963 : "cc", "memory"
3964#ifdef CONFIG_X86_64
7454766f 3965 , "rbx", "rcx", "rdx", "rsi", "rdi"
54a08c04 3966 , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
7454766f
AK
3967#else
3968 , "ebx", "ecx", "edx", "esi", "edi"
54a08c04
LV
3969#endif
3970 );
6aa8b732 3971
82ca2d10
AK
3972#ifdef CONFIG_X86_64
3973 wrmsrl(MSR_GS_BASE, svm->host.gs_base);
3974#else
dacccfdd 3975 loadsegment(fs, svm->host.fs);
831ca609
AK
3976#ifndef CONFIG_X86_32_LAZY_GS
3977 loadsegment(gs, svm->host.gs);
3978#endif
9581d442 3979#endif
6aa8b732
AK
3980
3981 reload_tss(vcpu);
3982
56ba47dd
AK
3983 local_irq_disable();
3984
13c34e07
AK
3985 vcpu->arch.cr2 = svm->vmcb->save.cr2;
3986 vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
3987 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
3988 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
3989
1e2b1dd7
JK
3990 trace_kvm_exit(svm->vmcb->control.exit_code, vcpu, KVM_ISA_SVM);
3991
3781c01c
JR
3992 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
3993 kvm_before_handle_nmi(&svm->vcpu);
3994
3995 stgi();
3996
3997 /* Any pending NMI will happen here */
3998
3999 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
4000 kvm_after_handle_nmi(&svm->vcpu);
4001
d7bf8221
JR
4002 sync_cr8_to_lapic(vcpu);
4003
a2fa3e9f 4004 svm->next_rip = 0;
9222be18 4005
38e5e92f
JR
4006 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
4007
631bc487
GN
4008 /* if exit due to PF check for async PF */
4009 if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
4010 svm->apf_reason = kvm_read_and_reset_pf_reason();
4011
6de4f3ad
AK
4012 if (npt_enabled) {
4013 vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
4014 vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
4015 }
fe5913e4
JR
4016
4017 /*
4018 * We need to handle MC intercepts here before the vcpu has a chance to
4019 * change the physical cpu
4020 */
4021 if (unlikely(svm->vmcb->control.exit_code ==
4022 SVM_EXIT_EXCP_BASE + MC_VECTOR))
4023 svm_handle_mce(svm);
8d28fec4
RJ
4024
4025 mark_all_clean(svm->vmcb);
6aa8b732
AK
4026}
4027
6aa8b732
AK
4028static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
4029{
a2fa3e9f
GH
4030 struct vcpu_svm *svm = to_svm(vcpu);
4031
4032 svm->vmcb->save.cr3 = root;
dcca1a65 4033 mark_dirty(svm->vmcb, VMCB_CR);
f40f6a45 4034 svm_flush_tlb(vcpu);
6aa8b732
AK
4035}
4036
1c97f0a0
JR
4037static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
4038{
4039 struct vcpu_svm *svm = to_svm(vcpu);
4040
4041 svm->vmcb->control.nested_cr3 = root;
b2747166 4042 mark_dirty(svm->vmcb, VMCB_NPT);
1c97f0a0
JR
4043
4044 /* Also sync guest cr3 here in case we live migrate */
9f8fe504 4045 svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
dcca1a65 4046 mark_dirty(svm->vmcb, VMCB_CR);
1c97f0a0 4047
f40f6a45 4048 svm_flush_tlb(vcpu);
1c97f0a0
JR
4049}
4050
6aa8b732
AK
4051static int is_disabled(void)
4052{
6031a61c
JR
4053 u64 vm_cr;
4054
4055 rdmsrl(MSR_VM_CR, vm_cr);
4056 if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
4057 return 1;
4058
6aa8b732
AK
4059 return 0;
4060}
4061
102d8325
IM
4062static void
4063svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
4064{
4065 /*
4066 * Patch in the VMMCALL instruction:
4067 */
4068 hypercall[0] = 0x0f;
4069 hypercall[1] = 0x01;
4070 hypercall[2] = 0xd9;
102d8325
IM
4071}
4072
002c7f7c
YS
4073static void svm_check_processor_compat(void *rtn)
4074{
4075 *(int *)rtn = 0;
4076}
4077
774ead3a
AK
4078static bool svm_cpu_has_accelerated_tpr(void)
4079{
4080 return false;
4081}
4082
6d396b55
PB
4083static bool svm_has_high_real_mode_segbase(void)
4084{
4085 return true;
4086}
4087
fc07e76a
PB
4088static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
4089{
4090 return 0;
4091}
4092
0e851880
SY
4093static void svm_cpuid_update(struct kvm_vcpu *vcpu)
4094{
6092d3d3
JR
4095 struct vcpu_svm *svm = to_svm(vcpu);
4096
4097 /* Update nrips enabled cache */
4098 svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu);
0e851880
SY
4099}
4100
d4330ef2
JR
4101static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
4102{
c2c63a49 4103 switch (func) {
4c62a2dc
JR
4104 case 0x80000001:
4105 if (nested)
4106 entry->ecx |= (1 << 2); /* Set SVM bit */
4107 break;
c2c63a49
JR
4108 case 0x8000000A:
4109 entry->eax = 1; /* SVM revision 1 */
4110 entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
4111 ASID emulation to nested SVM */
4112 entry->ecx = 0; /* Reserved */
7a190667
JR
4113 entry->edx = 0; /* Per default do not support any
4114 additional features */
4115
4116 /* Support next_rip if host supports it */
2a6b20b8 4117 if (boot_cpu_has(X86_FEATURE_NRIPS))
7a190667 4118 entry->edx |= SVM_FEATURE_NRIP;
c2c63a49 4119
3d4aeaad
JR
4120 /* Support NPT for the guest if enabled */
4121 if (npt_enabled)
4122 entry->edx |= SVM_FEATURE_NPT;
4123
c2c63a49
JR
4124 break;
4125 }
d4330ef2
JR
4126}
4127
17cc3935 4128static int svm_get_lpage_level(void)
344f414f 4129{
17cc3935 4130 return PT_PDPE_LEVEL;
344f414f
JR
4131}
4132
4e47c7a6
SY
4133static bool svm_rdtscp_supported(void)
4134{
4135 return false;
4136}
4137
ad756a16
MJ
4138static bool svm_invpcid_supported(void)
4139{
4140 return false;
4141}
4142
93c4adc7
PB
4143static bool svm_mpx_supported(void)
4144{
4145 return false;
4146}
4147
55412b2e
WL
4148static bool svm_xsaves_supported(void)
4149{
4150 return false;
4151}
4152
f5f48ee1
SY
4153static bool svm_has_wbinvd_exit(void)
4154{
4155 return true;
4156}
4157
02daab21
AK
4158static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
4159{
4160 struct vcpu_svm *svm = to_svm(vcpu);
4161
18c918c5 4162 set_exception_intercept(svm, NM_VECTOR);
66a562f7 4163 update_cr0_intercept(svm);
02daab21
AK
4164}
4165
8061252e 4166#define PRE_EX(exit) { .exit_code = (exit), \
40e19b51 4167 .stage = X86_ICPT_PRE_EXCEPT, }
cfec82cb 4168#define POST_EX(exit) { .exit_code = (exit), \
40e19b51 4169 .stage = X86_ICPT_POST_EXCEPT, }
d7eb8203 4170#define POST_MEM(exit) { .exit_code = (exit), \
40e19b51 4171 .stage = X86_ICPT_POST_MEMACCESS, }
cfec82cb 4172
09941fbb 4173static const struct __x86_intercept {
cfec82cb
JR
4174 u32 exit_code;
4175 enum x86_intercept_stage stage;
cfec82cb
JR
4176} x86_intercept_map[] = {
4177 [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0),
4178 [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0),
4179 [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0),
4180 [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0),
4181 [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0),
3b88e41a
JR
4182 [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0),
4183 [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0),
dee6bb70
JR
4184 [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ),
4185 [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ),
4186 [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE),
4187 [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE),
4188 [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ),
4189 [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ),
4190 [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE),
4191 [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE),
01de8b09
JR
4192 [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN),
4193 [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL),
4194 [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD),
4195 [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE),
4196 [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI),
4197 [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI),
4198 [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT),
4199 [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA),
d7eb8203
JR
4200 [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP),
4201 [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR),
4202 [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT),
8061252e
JR
4203 [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG),
4204 [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD),
4205 [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD),
4206 [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR),
4207 [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC),
4208 [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR),
4209 [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC),
4210 [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID),
4211 [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM),
bf608f88
JR
4212 [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE),
4213 [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF),
4214 [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF),
4215 [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT),
4216 [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET),
4217 [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP),
4218 [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT),
f6511935
JR
4219 [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO),
4220 [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO),
4221 [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO),
4222 [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO),
cfec82cb
JR
4223};
4224
8061252e 4225#undef PRE_EX
cfec82cb 4226#undef POST_EX
d7eb8203 4227#undef POST_MEM
cfec82cb 4228
8a76d7f2
JR
4229static int svm_check_intercept(struct kvm_vcpu *vcpu,
4230 struct x86_instruction_info *info,
4231 enum x86_intercept_stage stage)
4232{
cfec82cb
JR
4233 struct vcpu_svm *svm = to_svm(vcpu);
4234 int vmexit, ret = X86EMUL_CONTINUE;
4235 struct __x86_intercept icpt_info;
4236 struct vmcb *vmcb = svm->vmcb;
4237
4238 if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
4239 goto out;
4240
4241 icpt_info = x86_intercept_map[info->intercept];
4242
40e19b51 4243 if (stage != icpt_info.stage)
cfec82cb
JR
4244 goto out;
4245
4246 switch (icpt_info.exit_code) {
4247 case SVM_EXIT_READ_CR0:
4248 if (info->intercept == x86_intercept_cr_read)
4249 icpt_info.exit_code += info->modrm_reg;
4250 break;
4251 case SVM_EXIT_WRITE_CR0: {
4252 unsigned long cr0, val;
4253 u64 intercept;
4254
4255 if (info->intercept == x86_intercept_cr_write)
4256 icpt_info.exit_code += info->modrm_reg;
4257
62baf44c
JK
4258 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 ||
4259 info->intercept == x86_intercept_clts)
cfec82cb
JR
4260 break;
4261
4262 intercept = svm->nested.intercept;
4263
4264 if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
4265 break;
4266
4267 cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
4268 val = info->src_val & ~SVM_CR0_SELECTIVE_MASK;
4269
4270 if (info->intercept == x86_intercept_lmsw) {
4271 cr0 &= 0xfUL;
4272 val &= 0xfUL;
4273 /* lmsw can't clear PE - catch this here */
4274 if (cr0 & X86_CR0_PE)
4275 val |= X86_CR0_PE;
4276 }
4277
4278 if (cr0 ^ val)
4279 icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
4280
4281 break;
4282 }
3b88e41a
JR
4283 case SVM_EXIT_READ_DR0:
4284 case SVM_EXIT_WRITE_DR0:
4285 icpt_info.exit_code += info->modrm_reg;
4286 break;
8061252e
JR
4287 case SVM_EXIT_MSR:
4288 if (info->intercept == x86_intercept_wrmsr)
4289 vmcb->control.exit_info_1 = 1;
4290 else
4291 vmcb->control.exit_info_1 = 0;
4292 break;
bf608f88
JR
4293 case SVM_EXIT_PAUSE:
4294 /*
4295 * We get this for NOP only, but pause
4296 * is rep not, check this here
4297 */
4298 if (info->rep_prefix != REPE_PREFIX)
4299 goto out;
f6511935
JR
4300 case SVM_EXIT_IOIO: {
4301 u64 exit_info;
4302 u32 bytes;
4303
f6511935
JR
4304 if (info->intercept == x86_intercept_in ||
4305 info->intercept == x86_intercept_ins) {
6cbc5f5a
JK
4306 exit_info = ((info->src_val & 0xffff) << 16) |
4307 SVM_IOIO_TYPE_MASK;
f6511935 4308 bytes = info->dst_bytes;
6493f157 4309 } else {
6cbc5f5a 4310 exit_info = (info->dst_val & 0xffff) << 16;
6493f157 4311 bytes = info->src_bytes;
f6511935
JR
4312 }
4313
4314 if (info->intercept == x86_intercept_outs ||
4315 info->intercept == x86_intercept_ins)
4316 exit_info |= SVM_IOIO_STR_MASK;
4317
4318 if (info->rep_prefix)
4319 exit_info |= SVM_IOIO_REP_MASK;
4320
4321 bytes = min(bytes, 4u);
4322
4323 exit_info |= bytes << SVM_IOIO_SIZE_SHIFT;
4324
4325 exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1);
4326
4327 vmcb->control.exit_info_1 = exit_info;
4328 vmcb->control.exit_info_2 = info->next_rip;
4329
4330 break;
4331 }
cfec82cb
JR
4332 default:
4333 break;
4334 }
4335
f104765b
BD
4336 /* TODO: Advertise NRIPS to guest hypervisor unconditionally */
4337 if (static_cpu_has(X86_FEATURE_NRIPS))
4338 vmcb->control.next_rip = info->next_rip;
cfec82cb
JR
4339 vmcb->control.exit_code = icpt_info.exit_code;
4340 vmexit = nested_svm_exit_handled(svm);
4341
4342 ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
4343 : X86EMUL_CONTINUE;
4344
4345out:
4346 return ret;
8a76d7f2
JR
4347}
4348
a547c6db
YZ
4349static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
4350{
4351 local_irq_enable();
4352}
4353
ae97a3b8
RK
4354static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
4355{
4356}
4357
cbdd1bea 4358static struct kvm_x86_ops svm_x86_ops = {
6aa8b732
AK
4359 .cpu_has_kvm_support = has_svm,
4360 .disabled_by_bios = is_disabled,
4361 .hardware_setup = svm_hardware_setup,
4362 .hardware_unsetup = svm_hardware_unsetup,
002c7f7c 4363 .check_processor_compatibility = svm_check_processor_compat,
6aa8b732
AK
4364 .hardware_enable = svm_hardware_enable,
4365 .hardware_disable = svm_hardware_disable,
774ead3a 4366 .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
6d396b55 4367 .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase,
6aa8b732
AK
4368
4369 .vcpu_create = svm_create_vcpu,
4370 .vcpu_free = svm_free_vcpu,
04d2cc77 4371 .vcpu_reset = svm_vcpu_reset,
6aa8b732 4372
04d2cc77 4373 .prepare_guest_switch = svm_prepare_guest_switch,
6aa8b732
AK
4374 .vcpu_load = svm_vcpu_load,
4375 .vcpu_put = svm_vcpu_put,
4376
c8639010 4377 .update_db_bp_intercept = update_db_bp_intercept,
6aa8b732
AK
4378 .get_msr = svm_get_msr,
4379 .set_msr = svm_set_msr,
4380 .get_segment_base = svm_get_segment_base,
4381 .get_segment = svm_get_segment,
4382 .set_segment = svm_set_segment,
2e4d2653 4383 .get_cpl = svm_get_cpl,
1747fb71 4384 .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
e8467fda 4385 .decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
aff48baa 4386 .decache_cr3 = svm_decache_cr3,
25c4c276 4387 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
6aa8b732 4388 .set_cr0 = svm_set_cr0,
6aa8b732
AK
4389 .set_cr3 = svm_set_cr3,
4390 .set_cr4 = svm_set_cr4,
4391 .set_efer = svm_set_efer,
4392 .get_idt = svm_get_idt,
4393 .set_idt = svm_set_idt,
4394 .get_gdt = svm_get_gdt,
4395 .set_gdt = svm_set_gdt,
73aaf249
JK
4396 .get_dr6 = svm_get_dr6,
4397 .set_dr6 = svm_set_dr6,
020df079 4398 .set_dr7 = svm_set_dr7,
facb0139 4399 .sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
6de4f3ad 4400 .cache_reg = svm_cache_reg,
6aa8b732
AK
4401 .get_rflags = svm_get_rflags,
4402 .set_rflags = svm_set_rflags,
0fdd74f7 4403 .fpu_activate = svm_fpu_activate,
02daab21 4404 .fpu_deactivate = svm_fpu_deactivate,
6aa8b732 4405
6aa8b732 4406 .tlb_flush = svm_flush_tlb,
6aa8b732 4407
6aa8b732 4408 .run = svm_vcpu_run,
04d2cc77 4409 .handle_exit = handle_exit,
6aa8b732 4410 .skip_emulated_instruction = skip_emulated_instruction,
2809f5d2
GC
4411 .set_interrupt_shadow = svm_set_interrupt_shadow,
4412 .get_interrupt_shadow = svm_get_interrupt_shadow,
102d8325 4413 .patch_hypercall = svm_patch_hypercall,
2a8067f1 4414 .set_irq = svm_set_irq,
95ba8273 4415 .set_nmi = svm_inject_nmi,
298101da 4416 .queue_exception = svm_queue_exception,
b463a6f7 4417 .cancel_injection = svm_cancel_injection,
78646121 4418 .interrupt_allowed = svm_interrupt_allowed,
95ba8273 4419 .nmi_allowed = svm_nmi_allowed,
3cfc3092
JK
4420 .get_nmi_mask = svm_get_nmi_mask,
4421 .set_nmi_mask = svm_set_nmi_mask,
95ba8273
GN
4422 .enable_nmi_window = enable_nmi_window,
4423 .enable_irq_window = enable_irq_window,
4424 .update_cr8_intercept = update_cr8_intercept,
8d14695f 4425 .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
d50ab6c1 4426 .cpu_uses_apicv = svm_cpu_uses_apicv,
c7c9c56c 4427 .load_eoi_exitmap = svm_load_eoi_exitmap,
a20ed54d 4428 .sync_pir_to_irr = svm_sync_pir_to_irr,
cbc94022
IE
4429
4430 .set_tss_addr = svm_set_tss_addr,
67253af5 4431 .get_tdp_level = get_npt_level,
4b12f0de 4432 .get_mt_mask = svm_get_mt_mask,
229456fc 4433
586f9607 4434 .get_exit_info = svm_get_exit_info,
586f9607 4435
17cc3935 4436 .get_lpage_level = svm_get_lpage_level,
0e851880
SY
4437
4438 .cpuid_update = svm_cpuid_update,
4e47c7a6
SY
4439
4440 .rdtscp_supported = svm_rdtscp_supported,
ad756a16 4441 .invpcid_supported = svm_invpcid_supported,
93c4adc7 4442 .mpx_supported = svm_mpx_supported,
55412b2e 4443 .xsaves_supported = svm_xsaves_supported,
d4330ef2
JR
4444
4445 .set_supported_cpuid = svm_set_supported_cpuid,
f5f48ee1
SY
4446
4447 .has_wbinvd_exit = svm_has_wbinvd_exit,
99e3e30a 4448
4051b188 4449 .set_tsc_khz = svm_set_tsc_khz,
ba904635 4450 .read_tsc_offset = svm_read_tsc_offset,
99e3e30a 4451 .write_tsc_offset = svm_write_tsc_offset,
e48672fa 4452 .adjust_tsc_offset = svm_adjust_tsc_offset,
857e4099 4453 .compute_tsc_offset = svm_compute_tsc_offset,
d5c1785d 4454 .read_l1_tsc = svm_read_l1_tsc,
1c97f0a0
JR
4455
4456 .set_tdp_cr3 = set_tdp_cr3,
8a76d7f2
JR
4457
4458 .check_intercept = svm_check_intercept,
a547c6db 4459 .handle_external_intr = svm_handle_external_intr,
ae97a3b8
RK
4460
4461 .sched_in = svm_sched_in,
25462f7f
WH
4462
4463 .pmu_ops = &amd_pmu_ops,
6aa8b732
AK
4464};
4465
4466static int __init svm_init(void)
4467{
cb498ea2 4468 return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
0ee75bea 4469 __alignof__(struct vcpu_svm), THIS_MODULE);
6aa8b732
AK
4470}
4471
4472static void __exit svm_exit(void)
4473{
cb498ea2 4474 kvm_exit();
6aa8b732
AK
4475}
4476
4477module_init(svm_init)
4478module_exit(svm_exit)