1 // SPDX-License-Identifier: GPL-2.0-only
3 * Kernel-based Virtual Machine driver for Linux
7 * Copyright (C) 2006 Qumranet, Inc.
8 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
11 * Yaniv Kamay <yaniv@qumranet.com>
12 * Avi Kivity <avi@qumranet.com>
15 #define pr_fmt(fmt) "SVM: " fmt
17 #include <linux/kvm_types.h>
18 #include <linux/kvm_host.h>
19 #include <linux/kernel.h>
21 #include <asm/msr-index.h>
22 #include <asm/debugreg.h>
24 #include "kvm_emulate.h"
32 #define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
34 static void nested_svm_inject_npf_exit(struct kvm_vcpu
*vcpu
,
35 struct x86_exception
*fault
)
37 struct vcpu_svm
*svm
= to_svm(vcpu
);
39 if (svm
->vmcb
->control
.exit_code
!= SVM_EXIT_NPF
) {
41 * TODO: track the cause of the nested page fault, and
42 * correctly fill in the high bits of exit_info_1.
44 svm
->vmcb
->control
.exit_code
= SVM_EXIT_NPF
;
45 svm
->vmcb
->control
.exit_code_hi
= 0;
46 svm
->vmcb
->control
.exit_info_1
= (1ULL << 32);
47 svm
->vmcb
->control
.exit_info_2
= fault
->address
;
50 svm
->vmcb
->control
.exit_info_1
&= ~0xffffffffULL
;
51 svm
->vmcb
->control
.exit_info_1
|= fault
->error_code
;
53 nested_svm_vmexit(svm
);
56 static void svm_inject_page_fault_nested(struct kvm_vcpu
*vcpu
, struct x86_exception
*fault
)
58 struct vcpu_svm
*svm
= to_svm(vcpu
);
59 WARN_ON(!is_guest_mode(vcpu
));
61 if (vmcb_is_intercept(&svm
->nested
.ctl
, INTERCEPT_EXCEPTION_OFFSET
+ PF_VECTOR
) &&
62 !svm
->nested
.nested_run_pending
) {
63 svm
->vmcb
->control
.exit_code
= SVM_EXIT_EXCP_BASE
+ PF_VECTOR
;
64 svm
->vmcb
->control
.exit_code_hi
= 0;
65 svm
->vmcb
->control
.exit_info_1
= fault
->error_code
;
66 svm
->vmcb
->control
.exit_info_2
= fault
->address
;
67 nested_svm_vmexit(svm
);
69 kvm_inject_page_fault(vcpu
, fault
);
73 static u64
nested_svm_get_tdp_pdptr(struct kvm_vcpu
*vcpu
, int index
)
75 struct vcpu_svm
*svm
= to_svm(vcpu
);
76 u64 cr3
= svm
->nested
.ctl
.nested_cr3
;
80 ret
= kvm_vcpu_read_guest_page(vcpu
, gpa_to_gfn(cr3
), &pdpte
,
81 offset_in_page(cr3
) + index
* 8, 8);
87 static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu
*vcpu
)
89 struct vcpu_svm
*svm
= to_svm(vcpu
);
91 return svm
->nested
.ctl
.nested_cr3
;
94 static void nested_svm_init_mmu_context(struct kvm_vcpu
*vcpu
)
96 struct vcpu_svm
*svm
= to_svm(vcpu
);
98 WARN_ON(mmu_is_nested(vcpu
));
100 vcpu
->arch
.mmu
= &vcpu
->arch
.guest_mmu
;
101 kvm_init_shadow_npt_mmu(vcpu
, X86_CR0_PG
, svm
->vmcb01
.ptr
->save
.cr4
,
102 svm
->vmcb01
.ptr
->save
.efer
,
103 svm
->nested
.ctl
.nested_cr3
);
104 vcpu
->arch
.mmu
->get_guest_pgd
= nested_svm_get_tdp_cr3
;
105 vcpu
->arch
.mmu
->get_pdptr
= nested_svm_get_tdp_pdptr
;
106 vcpu
->arch
.mmu
->inject_page_fault
= nested_svm_inject_npf_exit
;
107 reset_shadow_zero_bits_mask(vcpu
, vcpu
->arch
.mmu
);
108 vcpu
->arch
.walk_mmu
= &vcpu
->arch
.nested_mmu
;
111 static void nested_svm_uninit_mmu_context(struct kvm_vcpu
*vcpu
)
113 vcpu
->arch
.mmu
= &vcpu
->arch
.root_mmu
;
114 vcpu
->arch
.walk_mmu
= &vcpu
->arch
.root_mmu
;
117 void recalc_intercepts(struct vcpu_svm
*svm
)
119 struct vmcb_control_area
*c
, *h
, *g
;
122 vmcb_mark_dirty(svm
->vmcb
, VMCB_INTERCEPTS
);
124 if (!is_guest_mode(&svm
->vcpu
))
127 c
= &svm
->vmcb
->control
;
128 h
= &svm
->vmcb01
.ptr
->control
;
129 g
= &svm
->nested
.ctl
;
131 for (i
= 0; i
< MAX_INTERCEPT
; i
++)
132 c
->intercepts
[i
] = h
->intercepts
[i
];
134 if (g
->int_ctl
& V_INTR_MASKING_MASK
) {
135 /* We only want the cr8 intercept bits of L1 */
136 vmcb_clr_intercept(c
, INTERCEPT_CR8_READ
);
137 vmcb_clr_intercept(c
, INTERCEPT_CR8_WRITE
);
140 * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not
141 * affect any interrupt we may want to inject; therefore,
142 * interrupt window vmexits are irrelevant to L0.
144 vmcb_clr_intercept(c
, INTERCEPT_VINTR
);
147 /* We don't want to see VMMCALLs from a nested guest */
148 vmcb_clr_intercept(c
, INTERCEPT_VMMCALL
);
150 for (i
= 0; i
< MAX_INTERCEPT
; i
++)
151 c
->intercepts
[i
] |= g
->intercepts
[i
];
154 static void copy_vmcb_control_area(struct vmcb_control_area
*dst
,
155 struct vmcb_control_area
*from
)
159 for (i
= 0; i
< MAX_INTERCEPT
; i
++)
160 dst
->intercepts
[i
] = from
->intercepts
[i
];
162 dst
->iopm_base_pa
= from
->iopm_base_pa
;
163 dst
->msrpm_base_pa
= from
->msrpm_base_pa
;
164 dst
->tsc_offset
= from
->tsc_offset
;
165 /* asid not copied, it is handled manually for svm->vmcb. */
166 dst
->tlb_ctl
= from
->tlb_ctl
;
167 dst
->int_ctl
= from
->int_ctl
;
168 dst
->int_vector
= from
->int_vector
;
169 dst
->int_state
= from
->int_state
;
170 dst
->exit_code
= from
->exit_code
;
171 dst
->exit_code_hi
= from
->exit_code_hi
;
172 dst
->exit_info_1
= from
->exit_info_1
;
173 dst
->exit_info_2
= from
->exit_info_2
;
174 dst
->exit_int_info
= from
->exit_int_info
;
175 dst
->exit_int_info_err
= from
->exit_int_info_err
;
176 dst
->nested_ctl
= from
->nested_ctl
;
177 dst
->event_inj
= from
->event_inj
;
178 dst
->event_inj_err
= from
->event_inj_err
;
179 dst
->nested_cr3
= from
->nested_cr3
;
180 dst
->virt_ext
= from
->virt_ext
;
181 dst
->pause_filter_count
= from
->pause_filter_count
;
182 dst
->pause_filter_thresh
= from
->pause_filter_thresh
;
185 static bool nested_svm_vmrun_msrpm(struct vcpu_svm
*svm
)
188 * This function merges the msr permission bitmaps of kvm and the
189 * nested vmcb. It is optimized in that it only merges the parts where
190 * the kvm msr permission bitmap may contain zero bits
194 if (!(vmcb_is_intercept(&svm
->nested
.ctl
, INTERCEPT_MSR_PROT
)))
197 for (i
= 0; i
< MSRPM_OFFSETS
; i
++) {
201 if (msrpm_offsets
[i
] == 0xffffffff)
204 p
= msrpm_offsets
[i
];
205 offset
= svm
->nested
.ctl
.msrpm_base_pa
+ (p
* 4);
207 if (kvm_vcpu_read_guest(&svm
->vcpu
, offset
, &value
, 4))
210 svm
->nested
.msrpm
[p
] = svm
->msrpm
[p
] | value
;
213 svm
->vmcb
->control
.msrpm_base_pa
= __sme_set(__pa(svm
->nested
.msrpm
));
218 static bool svm_get_nested_state_pages(struct kvm_vcpu
*vcpu
)
220 struct vcpu_svm
*svm
= to_svm(vcpu
);
222 if (WARN_ON(!is_guest_mode(vcpu
)))
225 if (!nested_svm_vmrun_msrpm(svm
)) {
226 vcpu
->run
->exit_reason
= KVM_EXIT_INTERNAL_ERROR
;
227 vcpu
->run
->internal
.suberror
=
228 KVM_INTERNAL_ERROR_EMULATION
;
229 vcpu
->run
->internal
.ndata
= 0;
236 static bool nested_vmcb_check_controls(struct vmcb_control_area
*control
)
238 if (CC(!vmcb_is_intercept(control
, INTERCEPT_VMRUN
)))
241 if (CC(control
->asid
== 0))
244 if (CC((control
->nested_ctl
& SVM_NESTED_CTL_NP_ENABLE
) && !npt_enabled
))
250 static bool nested_vmcb_check_cr3_cr4(struct kvm_vcpu
*vcpu
,
251 struct vmcb_save_area
*save
)
254 * These checks are also performed by KVM_SET_SREGS,
255 * except that EFER.LMA is not checked by SVM against
256 * CR0.PG && EFER.LME.
258 if ((save
->efer
& EFER_LME
) && (save
->cr0
& X86_CR0_PG
)) {
259 if (CC(!(save
->cr4
& X86_CR4_PAE
)) ||
260 CC(!(save
->cr0
& X86_CR0_PE
)) ||
261 CC(kvm_vcpu_is_illegal_gpa(vcpu
, save
->cr3
)))
265 if (CC(!kvm_is_valid_cr4(vcpu
, save
->cr4
)))
271 /* Common checks that apply to both L1 and L2 state. */
272 static bool nested_vmcb_valid_sregs(struct kvm_vcpu
*vcpu
,
273 struct vmcb_save_area
*save
)
275 if (CC(!(save
->efer
& EFER_SVME
)))
278 if (CC((save
->cr0
& X86_CR0_CD
) == 0 && (save
->cr0
& X86_CR0_NW
)) ||
279 CC(save
->cr0
& ~0xffffffffULL
))
282 if (CC(!kvm_dr6_valid(save
->dr6
)) || CC(!kvm_dr7_valid(save
->dr7
)))
285 if (!nested_vmcb_check_cr3_cr4(vcpu
, save
))
288 if (CC(!kvm_valid_efer(vcpu
, save
->efer
)))
294 static bool nested_vmcb_checks(struct kvm_vcpu
*vcpu
, struct vmcb
*vmcb12
)
296 if (!nested_vmcb_valid_sregs(vcpu
, &vmcb12
->save
))
299 return nested_vmcb_check_controls(&vmcb12
->control
);
302 static void nested_load_control_from_vmcb12(struct vcpu_svm
*svm
,
303 struct vmcb_control_area
*control
)
305 copy_vmcb_control_area(&svm
->nested
.ctl
, control
);
307 /* Copy it here because nested_svm_check_controls will check it. */
308 svm
->nested
.ctl
.asid
= control
->asid
;
309 svm
->nested
.ctl
.msrpm_base_pa
&= ~0x0fffULL
;
310 svm
->nested
.ctl
.iopm_base_pa
&= ~0x0fffULL
;
314 * Synchronize fields that are written by the processor, so that
315 * they can be copied back into the vmcb12.
317 void nested_sync_control_from_vmcb02(struct vcpu_svm
*svm
)
320 svm
->nested
.ctl
.event_inj
= svm
->vmcb
->control
.event_inj
;
321 svm
->nested
.ctl
.event_inj_err
= svm
->vmcb
->control
.event_inj_err
;
323 /* Only a few fields of int_ctl are written by the processor. */
324 mask
= V_IRQ_MASK
| V_TPR_MASK
;
325 if (!(svm
->nested
.ctl
.int_ctl
& V_INTR_MASKING_MASK
) &&
326 svm_is_intercept(svm
, INTERCEPT_VINTR
)) {
328 * In order to request an interrupt window, L0 is usurping
329 * svm->vmcb->control.int_ctl and possibly setting V_IRQ
330 * even if it was clear in L1's VMCB. Restoring it would be
331 * wrong. However, in this case V_IRQ will remain true until
332 * interrupt_window_interception calls svm_clear_vintr and
333 * restores int_ctl. We can just leave it aside.
337 svm
->nested
.ctl
.int_ctl
&= ~mask
;
338 svm
->nested
.ctl
.int_ctl
|= svm
->vmcb
->control
.int_ctl
& mask
;
342 * Transfer any event that L0 or L1 wanted to inject into L2 to
345 static void nested_save_pending_event_to_vmcb12(struct vcpu_svm
*svm
,
348 struct kvm_vcpu
*vcpu
= &svm
->vcpu
;
349 u32 exit_int_info
= 0;
352 if (vcpu
->arch
.exception
.injected
) {
353 nr
= vcpu
->arch
.exception
.nr
;
354 exit_int_info
= nr
| SVM_EVTINJ_VALID
| SVM_EVTINJ_TYPE_EXEPT
;
356 if (vcpu
->arch
.exception
.has_error_code
) {
357 exit_int_info
|= SVM_EVTINJ_VALID_ERR
;
358 vmcb12
->control
.exit_int_info_err
=
359 vcpu
->arch
.exception
.error_code
;
362 } else if (vcpu
->arch
.nmi_injected
) {
363 exit_int_info
= SVM_EVTINJ_VALID
| SVM_EVTINJ_TYPE_NMI
;
365 } else if (vcpu
->arch
.interrupt
.injected
) {
366 nr
= vcpu
->arch
.interrupt
.nr
;
367 exit_int_info
= nr
| SVM_EVTINJ_VALID
;
369 if (vcpu
->arch
.interrupt
.soft
)
370 exit_int_info
|= SVM_EVTINJ_TYPE_SOFT
;
372 exit_int_info
|= SVM_EVTINJ_TYPE_INTR
;
375 vmcb12
->control
.exit_int_info
= exit_int_info
;
378 static inline bool nested_npt_enabled(struct vcpu_svm
*svm
)
380 return svm
->nested
.ctl
.nested_ctl
& SVM_NESTED_CTL_NP_ENABLE
;
384 * Load guest's/host's cr3 on nested vmentry or vmexit. @nested_npt is true
385 * if we are emulating VM-Entry into a guest with NPT enabled.
387 static int nested_svm_load_cr3(struct kvm_vcpu
*vcpu
, unsigned long cr3
,
390 if (CC(kvm_vcpu_is_illegal_gpa(vcpu
, cr3
)))
393 if (!nested_npt
&& is_pae_paging(vcpu
) &&
394 (cr3
!= kvm_read_cr3(vcpu
) || pdptrs_changed(vcpu
))) {
395 if (CC(!load_pdptrs(vcpu
, vcpu
->arch
.walk_mmu
, cr3
)))
400 * TODO: optimize unconditional TLB flush/MMU sync here and in
401 * kvm_init_shadow_npt_mmu().
404 kvm_mmu_new_pgd(vcpu
, cr3
, false, false);
406 vcpu
->arch
.cr3
= cr3
;
407 kvm_register_mark_available(vcpu
, VCPU_EXREG_CR3
);
409 kvm_init_mmu(vcpu
, false);
414 void nested_vmcb02_compute_g_pat(struct vcpu_svm
*svm
)
416 if (!svm
->nested
.vmcb02
.ptr
)
419 /* FIXME: merge g_pat from vmcb01 and vmcb12. */
420 svm
->nested
.vmcb02
.ptr
->save
.g_pat
= svm
->vmcb01
.ptr
->save
.g_pat
;
423 static void nested_vmcb02_prepare_save(struct vcpu_svm
*svm
, struct vmcb
*vmcb12
)
425 bool new_vmcb12
= false;
427 nested_vmcb02_compute_g_pat(svm
);
429 /* Load the nested guest state */
431 if (svm
->nested
.vmcb12_gpa
!= svm
->nested
.last_vmcb12_gpa
) {
433 svm
->nested
.last_vmcb12_gpa
= svm
->nested
.vmcb12_gpa
;
436 if (unlikely(new_vmcb12
|| vmcb_is_dirty(vmcb12
, VMCB_SEG
))) {
437 svm
->vmcb
->save
.es
= vmcb12
->save
.es
;
438 svm
->vmcb
->save
.cs
= vmcb12
->save
.cs
;
439 svm
->vmcb
->save
.ss
= vmcb12
->save
.ss
;
440 svm
->vmcb
->save
.ds
= vmcb12
->save
.ds
;
441 svm
->vmcb
->save
.cpl
= vmcb12
->save
.cpl
;
442 vmcb_mark_dirty(svm
->vmcb
, VMCB_SEG
);
445 if (unlikely(new_vmcb12
|| vmcb_is_dirty(vmcb12
, VMCB_DT
))) {
446 svm
->vmcb
->save
.gdtr
= vmcb12
->save
.gdtr
;
447 svm
->vmcb
->save
.idtr
= vmcb12
->save
.idtr
;
448 vmcb_mark_dirty(svm
->vmcb
, VMCB_DT
);
451 kvm_set_rflags(&svm
->vcpu
, vmcb12
->save
.rflags
| X86_EFLAGS_FIXED
);
452 svm_set_efer(&svm
->vcpu
, vmcb12
->save
.efer
);
453 svm_set_cr0(&svm
->vcpu
, vmcb12
->save
.cr0
);
454 svm_set_cr4(&svm
->vcpu
, vmcb12
->save
.cr4
);
456 svm
->vcpu
.arch
.cr2
= vmcb12
->save
.cr2
;
458 kvm_rax_write(&svm
->vcpu
, vmcb12
->save
.rax
);
459 kvm_rsp_write(&svm
->vcpu
, vmcb12
->save
.rsp
);
460 kvm_rip_write(&svm
->vcpu
, vmcb12
->save
.rip
);
462 /* In case we don't even reach vcpu_run, the fields are not updated */
463 svm
->vmcb
->save
.rax
= vmcb12
->save
.rax
;
464 svm
->vmcb
->save
.rsp
= vmcb12
->save
.rsp
;
465 svm
->vmcb
->save
.rip
= vmcb12
->save
.rip
;
467 /* These bits will be set properly on the first execution when new_vmc12 is true */
468 if (unlikely(new_vmcb12
|| vmcb_is_dirty(vmcb12
, VMCB_DR
))) {
469 svm
->vmcb
->save
.dr7
= vmcb12
->save
.dr7
| DR7_FIXED_1
;
470 svm
->vcpu
.arch
.dr6
= vmcb12
->save
.dr6
| DR6_ACTIVE_LOW
;
471 vmcb_mark_dirty(svm
->vmcb
, VMCB_DR
);
475 static void nested_vmcb02_prepare_control(struct vcpu_svm
*svm
)
477 const u32 mask
= V_INTR_MASKING_MASK
| V_GIF_ENABLE_MASK
| V_GIF_MASK
;
480 * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
481 * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
485 * Also covers avic_vapic_bar, avic_backing_page, avic_logical_id,
488 WARN_ON(svm
->vmcb01
.ptr
->control
.int_ctl
& AVIC_ENABLE_MASK
);
490 /* Copied from vmcb01. msrpm_base can be overwritten later. */
491 svm
->vmcb
->control
.nested_ctl
= svm
->vmcb01
.ptr
->control
.nested_ctl
;
492 svm
->vmcb
->control
.iopm_base_pa
= svm
->vmcb01
.ptr
->control
.iopm_base_pa
;
493 svm
->vmcb
->control
.msrpm_base_pa
= svm
->vmcb01
.ptr
->control
.msrpm_base_pa
;
495 /* Done at vmrun: asid. */
497 /* Also overwritten later if necessary. */
498 svm
->vmcb
->control
.tlb_ctl
= TLB_CONTROL_DO_NOTHING
;
501 if (nested_npt_enabled(svm
))
502 nested_svm_init_mmu_context(&svm
->vcpu
);
504 svm
->vmcb
->control
.tsc_offset
= svm
->vcpu
.arch
.tsc_offset
=
505 svm
->vcpu
.arch
.l1_tsc_offset
+ svm
->nested
.ctl
.tsc_offset
;
507 svm
->vmcb
->control
.int_ctl
=
508 (svm
->nested
.ctl
.int_ctl
& ~mask
) |
509 (svm
->vmcb01
.ptr
->control
.int_ctl
& mask
);
511 svm
->vmcb
->control
.virt_ext
= svm
->nested
.ctl
.virt_ext
;
512 svm
->vmcb
->control
.int_vector
= svm
->nested
.ctl
.int_vector
;
513 svm
->vmcb
->control
.int_state
= svm
->nested
.ctl
.int_state
;
514 svm
->vmcb
->control
.event_inj
= svm
->nested
.ctl
.event_inj
;
515 svm
->vmcb
->control
.event_inj_err
= svm
->nested
.ctl
.event_inj_err
;
517 svm
->vmcb
->control
.pause_filter_count
= svm
->nested
.ctl
.pause_filter_count
;
518 svm
->vmcb
->control
.pause_filter_thresh
= svm
->nested
.ctl
.pause_filter_thresh
;
520 /* Enter Guest-Mode */
521 enter_guest_mode(&svm
->vcpu
);
524 * Merge guest and host intercepts - must be called with vcpu in
525 * guest-mode to take effect.
527 recalc_intercepts(svm
);
530 static void nested_svm_copy_common_state(struct vmcb
*from_vmcb
, struct vmcb
*to_vmcb
)
533 * Some VMCB state is shared between L1 and L2 and thus has to be
534 * moved at the time of nested vmrun and vmexit.
536 * VMLOAD/VMSAVE state would also belong in this category, but KVM
537 * always performs VMLOAD and VMSAVE from the VMCB01.
539 to_vmcb
->save
.spec_ctrl
= from_vmcb
->save
.spec_ctrl
;
542 int enter_svm_guest_mode(struct kvm_vcpu
*vcpu
, u64 vmcb12_gpa
,
545 struct vcpu_svm
*svm
= to_svm(vcpu
);
548 trace_kvm_nested_vmrun(svm
->vmcb
->save
.rip
, vmcb12_gpa
,
550 vmcb12
->control
.int_ctl
,
551 vmcb12
->control
.event_inj
,
552 vmcb12
->control
.nested_ctl
);
554 trace_kvm_nested_intercepts(vmcb12
->control
.intercepts
[INTERCEPT_CR
] & 0xffff,
555 vmcb12
->control
.intercepts
[INTERCEPT_CR
] >> 16,
556 vmcb12
->control
.intercepts
[INTERCEPT_EXCEPTION
],
557 vmcb12
->control
.intercepts
[INTERCEPT_WORD3
],
558 vmcb12
->control
.intercepts
[INTERCEPT_WORD4
],
559 vmcb12
->control
.intercepts
[INTERCEPT_WORD5
]);
562 svm
->nested
.vmcb12_gpa
= vmcb12_gpa
;
564 WARN_ON(svm
->vmcb
== svm
->nested
.vmcb02
.ptr
);
566 nested_svm_copy_common_state(svm
->vmcb01
.ptr
, svm
->nested
.vmcb02
.ptr
);
567 nested_load_control_from_vmcb12(svm
, &vmcb12
->control
);
569 svm_switch_vmcb(svm
, &svm
->nested
.vmcb02
);
570 nested_vmcb02_prepare_control(svm
);
571 nested_vmcb02_prepare_save(svm
, vmcb12
);
573 ret
= nested_svm_load_cr3(&svm
->vcpu
, vmcb12
->save
.cr3
,
574 nested_npt_enabled(svm
));
579 vcpu
->arch
.mmu
->inject_page_fault
= svm_inject_page_fault_nested
;
581 svm_set_gif(svm
, true);
586 int nested_svm_vmrun(struct kvm_vcpu
*vcpu
)
588 struct vcpu_svm
*svm
= to_svm(vcpu
);
591 struct kvm_host_map map
;
594 ++vcpu
->stat
.nested_run
;
597 kvm_queue_exception(vcpu
, UD_VECTOR
);
601 vmcb12_gpa
= svm
->vmcb
->save
.rax
;
602 ret
= kvm_vcpu_map(vcpu
, gpa_to_gfn(vmcb12_gpa
), &map
);
603 if (ret
== -EINVAL
) {
604 kvm_inject_gp(vcpu
, 0);
607 return kvm_skip_emulated_instruction(vcpu
);
610 ret
= kvm_skip_emulated_instruction(vcpu
);
614 if (WARN_ON_ONCE(!svm
->nested
.initialized
))
617 if (!nested_vmcb_checks(vcpu
, vmcb12
)) {
618 vmcb12
->control
.exit_code
= SVM_EXIT_ERR
;
619 vmcb12
->control
.exit_code_hi
= 0;
620 vmcb12
->control
.exit_info_1
= 0;
621 vmcb12
->control
.exit_info_2
= 0;
626 /* Clear internal status */
627 kvm_clear_exception_queue(vcpu
);
628 kvm_clear_interrupt_queue(vcpu
);
631 * Since vmcb01 is not in use, we can use it to store some of the L1
634 svm
->vmcb01
.ptr
->save
.efer
= vcpu
->arch
.efer
;
635 svm
->vmcb01
.ptr
->save
.cr0
= kvm_read_cr0(vcpu
);
636 svm
->vmcb01
.ptr
->save
.cr4
= vcpu
->arch
.cr4
;
637 svm
->vmcb01
.ptr
->save
.rflags
= kvm_get_rflags(vcpu
);
638 svm
->vmcb01
.ptr
->save
.rip
= kvm_rip_read(vcpu
);
641 svm
->vmcb01
.ptr
->save
.cr3
= kvm_read_cr3(vcpu
);
643 svm
->nested
.nested_run_pending
= 1;
645 if (enter_svm_guest_mode(vcpu
, vmcb12_gpa
, vmcb12
))
648 if (nested_svm_vmrun_msrpm(svm
))
652 svm
->nested
.nested_run_pending
= 0;
654 svm
->vmcb
->control
.exit_code
= SVM_EXIT_ERR
;
655 svm
->vmcb
->control
.exit_code_hi
= 0;
656 svm
->vmcb
->control
.exit_info_1
= 0;
657 svm
->vmcb
->control
.exit_info_2
= 0;
659 nested_svm_vmexit(svm
);
662 kvm_vcpu_unmap(vcpu
, &map
, true);
667 void nested_svm_vmloadsave(struct vmcb
*from_vmcb
, struct vmcb
*to_vmcb
)
669 to_vmcb
->save
.fs
= from_vmcb
->save
.fs
;
670 to_vmcb
->save
.gs
= from_vmcb
->save
.gs
;
671 to_vmcb
->save
.tr
= from_vmcb
->save
.tr
;
672 to_vmcb
->save
.ldtr
= from_vmcb
->save
.ldtr
;
673 to_vmcb
->save
.kernel_gs_base
= from_vmcb
->save
.kernel_gs_base
;
674 to_vmcb
->save
.star
= from_vmcb
->save
.star
;
675 to_vmcb
->save
.lstar
= from_vmcb
->save
.lstar
;
676 to_vmcb
->save
.cstar
= from_vmcb
->save
.cstar
;
677 to_vmcb
->save
.sfmask
= from_vmcb
->save
.sfmask
;
678 to_vmcb
->save
.sysenter_cs
= from_vmcb
->save
.sysenter_cs
;
679 to_vmcb
->save
.sysenter_esp
= from_vmcb
->save
.sysenter_esp
;
680 to_vmcb
->save
.sysenter_eip
= from_vmcb
->save
.sysenter_eip
;
683 int nested_svm_vmexit(struct vcpu_svm
*svm
)
685 struct kvm_vcpu
*vcpu
= &svm
->vcpu
;
687 struct vmcb
*vmcb
= svm
->vmcb
;
688 struct kvm_host_map map
;
691 /* Triple faults in L2 should never escape. */
692 WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT
, vcpu
));
694 rc
= kvm_vcpu_map(vcpu
, gpa_to_gfn(svm
->nested
.vmcb12_gpa
), &map
);
697 kvm_inject_gp(vcpu
, 0);
703 /* Exit Guest-Mode */
704 leave_guest_mode(vcpu
);
705 svm
->nested
.vmcb12_gpa
= 0;
706 WARN_ON_ONCE(svm
->nested
.nested_run_pending
);
708 kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES
, vcpu
);
710 /* in case we halted in L2 */
711 svm
->vcpu
.arch
.mp_state
= KVM_MP_STATE_RUNNABLE
;
713 /* Give the current vmcb to the guest */
715 vmcb12
->save
.es
= vmcb
->save
.es
;
716 vmcb12
->save
.cs
= vmcb
->save
.cs
;
717 vmcb12
->save
.ss
= vmcb
->save
.ss
;
718 vmcb12
->save
.ds
= vmcb
->save
.ds
;
719 vmcb12
->save
.gdtr
= vmcb
->save
.gdtr
;
720 vmcb12
->save
.idtr
= vmcb
->save
.idtr
;
721 vmcb12
->save
.efer
= svm
->vcpu
.arch
.efer
;
722 vmcb12
->save
.cr0
= kvm_read_cr0(vcpu
);
723 vmcb12
->save
.cr3
= kvm_read_cr3(vcpu
);
724 vmcb12
->save
.cr2
= vmcb
->save
.cr2
;
725 vmcb12
->save
.cr4
= svm
->vcpu
.arch
.cr4
;
726 vmcb12
->save
.rflags
= kvm_get_rflags(vcpu
);
727 vmcb12
->save
.rip
= kvm_rip_read(vcpu
);
728 vmcb12
->save
.rsp
= kvm_rsp_read(vcpu
);
729 vmcb12
->save
.rax
= kvm_rax_read(vcpu
);
730 vmcb12
->save
.dr7
= vmcb
->save
.dr7
;
731 vmcb12
->save
.dr6
= svm
->vcpu
.arch
.dr6
;
732 vmcb12
->save
.cpl
= vmcb
->save
.cpl
;
734 vmcb12
->control
.int_state
= vmcb
->control
.int_state
;
735 vmcb12
->control
.exit_code
= vmcb
->control
.exit_code
;
736 vmcb12
->control
.exit_code_hi
= vmcb
->control
.exit_code_hi
;
737 vmcb12
->control
.exit_info_1
= vmcb
->control
.exit_info_1
;
738 vmcb12
->control
.exit_info_2
= vmcb
->control
.exit_info_2
;
740 if (vmcb12
->control
.exit_code
!= SVM_EXIT_ERR
)
741 nested_save_pending_event_to_vmcb12(svm
, vmcb12
);
743 if (svm
->nrips_enabled
)
744 vmcb12
->control
.next_rip
= vmcb
->control
.next_rip
;
746 vmcb12
->control
.int_ctl
= svm
->nested
.ctl
.int_ctl
;
747 vmcb12
->control
.tlb_ctl
= svm
->nested
.ctl
.tlb_ctl
;
748 vmcb12
->control
.event_inj
= svm
->nested
.ctl
.event_inj
;
749 vmcb12
->control
.event_inj_err
= svm
->nested
.ctl
.event_inj_err
;
751 vmcb12
->control
.pause_filter_count
=
752 svm
->vmcb
->control
.pause_filter_count
;
753 vmcb12
->control
.pause_filter_thresh
=
754 svm
->vmcb
->control
.pause_filter_thresh
;
756 nested_svm_copy_common_state(svm
->nested
.vmcb02
.ptr
, svm
->vmcb01
.ptr
);
758 svm_switch_vmcb(svm
, &svm
->vmcb01
);
761 * On vmexit the GIF is set to false and
762 * no event can be injected in L1.
764 svm_set_gif(svm
, false);
765 svm
->vmcb
->control
.exit_int_info
= 0;
767 svm
->vcpu
.arch
.tsc_offset
= svm
->vcpu
.arch
.l1_tsc_offset
;
768 if (svm
->vmcb
->control
.tsc_offset
!= svm
->vcpu
.arch
.tsc_offset
) {
769 svm
->vmcb
->control
.tsc_offset
= svm
->vcpu
.arch
.tsc_offset
;
770 vmcb_mark_dirty(svm
->vmcb
, VMCB_INTERCEPTS
);
773 svm
->nested
.ctl
.nested_cr3
= 0;
776 * Restore processor state that had been saved in vmcb01
778 kvm_set_rflags(vcpu
, svm
->vmcb
->save
.rflags
);
779 svm_set_efer(vcpu
, svm
->vmcb
->save
.efer
);
780 svm_set_cr0(vcpu
, svm
->vmcb
->save
.cr0
| X86_CR0_PE
);
781 svm_set_cr4(vcpu
, svm
->vmcb
->save
.cr4
);
782 kvm_rax_write(vcpu
, svm
->vmcb
->save
.rax
);
783 kvm_rsp_write(vcpu
, svm
->vmcb
->save
.rsp
);
784 kvm_rip_write(vcpu
, svm
->vmcb
->save
.rip
);
786 svm
->vcpu
.arch
.dr7
= DR7_FIXED_1
;
787 kvm_update_dr7(&svm
->vcpu
);
789 trace_kvm_nested_vmexit_inject(vmcb12
->control
.exit_code
,
790 vmcb12
->control
.exit_info_1
,
791 vmcb12
->control
.exit_info_2
,
792 vmcb12
->control
.exit_int_info
,
793 vmcb12
->control
.exit_int_info_err
,
796 kvm_vcpu_unmap(vcpu
, &map
, true);
798 nested_svm_uninit_mmu_context(vcpu
);
800 rc
= nested_svm_load_cr3(vcpu
, svm
->vmcb
->save
.cr3
, false);
805 * Drop what we picked up for L2 via svm_complete_interrupts() so it
806 * doesn't end up in L1.
808 svm
->vcpu
.arch
.nmi_injected
= false;
809 kvm_clear_exception_queue(vcpu
);
810 kvm_clear_interrupt_queue(vcpu
);
815 static void nested_svm_triple_fault(struct kvm_vcpu
*vcpu
)
817 nested_svm_simple_vmexit(to_svm(vcpu
), SVM_EXIT_SHUTDOWN
);
820 int svm_allocate_nested(struct vcpu_svm
*svm
)
822 struct page
*vmcb02_page
;
824 if (svm
->nested
.initialized
)
827 vmcb02_page
= alloc_page(GFP_KERNEL_ACCOUNT
| __GFP_ZERO
);
830 svm
->nested
.vmcb02
.ptr
= page_address(vmcb02_page
);
831 svm
->nested
.vmcb02
.pa
= __sme_set(page_to_pfn(vmcb02_page
) << PAGE_SHIFT
);
833 svm
->nested
.msrpm
= svm_vcpu_alloc_msrpm();
834 if (!svm
->nested
.msrpm
)
835 goto err_free_vmcb02
;
836 svm_vcpu_init_msrpm(&svm
->vcpu
, svm
->nested
.msrpm
);
838 svm
->nested
.initialized
= true;
842 __free_page(vmcb02_page
);
846 void svm_free_nested(struct vcpu_svm
*svm
)
848 if (!svm
->nested
.initialized
)
851 svm_vcpu_free_msrpm(svm
->nested
.msrpm
);
852 svm
->nested
.msrpm
= NULL
;
854 __free_page(virt_to_page(svm
->nested
.vmcb02
.ptr
));
855 svm
->nested
.vmcb02
.ptr
= NULL
;
857 svm
->nested
.initialized
= false;
861 * Forcibly leave nested mode in order to be able to reset the VCPU later on.
863 void svm_leave_nested(struct vcpu_svm
*svm
)
865 struct kvm_vcpu
*vcpu
= &svm
->vcpu
;
867 if (is_guest_mode(vcpu
)) {
868 svm
->nested
.nested_run_pending
= 0;
869 leave_guest_mode(vcpu
);
871 svm_switch_vmcb(svm
, &svm
->nested
.vmcb02
);
873 nested_svm_uninit_mmu_context(vcpu
);
874 vmcb_mark_all_dirty(svm
->vmcb
);
877 kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES
, vcpu
);
880 static int nested_svm_exit_handled_msr(struct vcpu_svm
*svm
)
882 u32 offset
, msr
, value
;
885 if (!(vmcb_is_intercept(&svm
->nested
.ctl
, INTERCEPT_MSR_PROT
)))
886 return NESTED_EXIT_HOST
;
888 msr
= svm
->vcpu
.arch
.regs
[VCPU_REGS_RCX
];
889 offset
= svm_msrpm_offset(msr
);
890 write
= svm
->vmcb
->control
.exit_info_1
& 1;
891 mask
= 1 << ((2 * (msr
& 0xf)) + write
);
893 if (offset
== MSR_INVALID
)
894 return NESTED_EXIT_DONE
;
896 /* Offset is in 32 bit units but need in 8 bit units */
899 if (kvm_vcpu_read_guest(&svm
->vcpu
, svm
->nested
.ctl
.msrpm_base_pa
+ offset
, &value
, 4))
900 return NESTED_EXIT_DONE
;
902 return (value
& mask
) ? NESTED_EXIT_DONE
: NESTED_EXIT_HOST
;
905 static int nested_svm_intercept_ioio(struct vcpu_svm
*svm
)
907 unsigned port
, size
, iopm_len
;
912 if (!(vmcb_is_intercept(&svm
->nested
.ctl
, INTERCEPT_IOIO_PROT
)))
913 return NESTED_EXIT_HOST
;
915 port
= svm
->vmcb
->control
.exit_info_1
>> 16;
916 size
= (svm
->vmcb
->control
.exit_info_1
& SVM_IOIO_SIZE_MASK
) >>
918 gpa
= svm
->nested
.ctl
.iopm_base_pa
+ (port
/ 8);
919 start_bit
= port
% 8;
920 iopm_len
= (start_bit
+ size
> 8) ? 2 : 1;
921 mask
= (0xf >> (4 - size
)) << start_bit
;
924 if (kvm_vcpu_read_guest(&svm
->vcpu
, gpa
, &val
, iopm_len
))
925 return NESTED_EXIT_DONE
;
927 return (val
& mask
) ? NESTED_EXIT_DONE
: NESTED_EXIT_HOST
;
930 static int nested_svm_intercept(struct vcpu_svm
*svm
)
932 u32 exit_code
= svm
->vmcb
->control
.exit_code
;
933 int vmexit
= NESTED_EXIT_HOST
;
937 vmexit
= nested_svm_exit_handled_msr(svm
);
940 vmexit
= nested_svm_intercept_ioio(svm
);
942 case SVM_EXIT_READ_CR0
... SVM_EXIT_WRITE_CR8
: {
943 if (vmcb_is_intercept(&svm
->nested
.ctl
, exit_code
))
944 vmexit
= NESTED_EXIT_DONE
;
947 case SVM_EXIT_READ_DR0
... SVM_EXIT_WRITE_DR7
: {
948 if (vmcb_is_intercept(&svm
->nested
.ctl
, exit_code
))
949 vmexit
= NESTED_EXIT_DONE
;
952 case SVM_EXIT_EXCP_BASE
... SVM_EXIT_EXCP_BASE
+ 0x1f: {
954 * Host-intercepted exceptions have been checked already in
955 * nested_svm_exit_special. There is nothing to do here,
956 * the vmexit is injected by svm_check_nested_events.
958 vmexit
= NESTED_EXIT_DONE
;
962 vmexit
= NESTED_EXIT_DONE
;
966 if (vmcb_is_intercept(&svm
->nested
.ctl
, exit_code
))
967 vmexit
= NESTED_EXIT_DONE
;
974 int nested_svm_exit_handled(struct vcpu_svm
*svm
)
978 vmexit
= nested_svm_intercept(svm
);
980 if (vmexit
== NESTED_EXIT_DONE
)
981 nested_svm_vmexit(svm
);
986 int nested_svm_check_permissions(struct kvm_vcpu
*vcpu
)
988 if (!(vcpu
->arch
.efer
& EFER_SVME
) || !is_paging(vcpu
)) {
989 kvm_queue_exception(vcpu
, UD_VECTOR
);
993 if (to_svm(vcpu
)->vmcb
->save
.cpl
) {
994 kvm_inject_gp(vcpu
, 0);
1001 static bool nested_exit_on_exception(struct vcpu_svm
*svm
)
1003 unsigned int nr
= svm
->vcpu
.arch
.exception
.nr
;
1005 return (svm
->nested
.ctl
.intercepts
[INTERCEPT_EXCEPTION
] & BIT(nr
));
1008 static void nested_svm_inject_exception_vmexit(struct vcpu_svm
*svm
)
1010 unsigned int nr
= svm
->vcpu
.arch
.exception
.nr
;
1012 svm
->vmcb
->control
.exit_code
= SVM_EXIT_EXCP_BASE
+ nr
;
1013 svm
->vmcb
->control
.exit_code_hi
= 0;
1015 if (svm
->vcpu
.arch
.exception
.has_error_code
)
1016 svm
->vmcb
->control
.exit_info_1
= svm
->vcpu
.arch
.exception
.error_code
;
1019 * EXITINFO2 is undefined for all exception intercepts other
1022 if (nr
== PF_VECTOR
) {
1023 if (svm
->vcpu
.arch
.exception
.nested_apf
)
1024 svm
->vmcb
->control
.exit_info_2
= svm
->vcpu
.arch
.apf
.nested_apf_token
;
1025 else if (svm
->vcpu
.arch
.exception
.has_payload
)
1026 svm
->vmcb
->control
.exit_info_2
= svm
->vcpu
.arch
.exception
.payload
;
1028 svm
->vmcb
->control
.exit_info_2
= svm
->vcpu
.arch
.cr2
;
1029 } else if (nr
== DB_VECTOR
) {
1030 /* See inject_pending_event. */
1031 kvm_deliver_exception_payload(&svm
->vcpu
);
1032 if (svm
->vcpu
.arch
.dr7
& DR7_GD
) {
1033 svm
->vcpu
.arch
.dr7
&= ~DR7_GD
;
1034 kvm_update_dr7(&svm
->vcpu
);
1037 WARN_ON(svm
->vcpu
.arch
.exception
.has_payload
);
1039 nested_svm_vmexit(svm
);
1042 static inline bool nested_exit_on_init(struct vcpu_svm
*svm
)
1044 return vmcb_is_intercept(&svm
->nested
.ctl
, INTERCEPT_INIT
);
1047 static int svm_check_nested_events(struct kvm_vcpu
*vcpu
)
1049 struct vcpu_svm
*svm
= to_svm(vcpu
);
1050 bool block_nested_events
=
1051 kvm_event_needs_reinjection(vcpu
) || svm
->nested
.nested_run_pending
;
1052 struct kvm_lapic
*apic
= vcpu
->arch
.apic
;
1054 if (lapic_in_kernel(vcpu
) &&
1055 test_bit(KVM_APIC_INIT
, &apic
->pending_events
)) {
1056 if (block_nested_events
)
1058 if (!nested_exit_on_init(svm
))
1060 nested_svm_simple_vmexit(svm
, SVM_EXIT_INIT
);
1064 if (vcpu
->arch
.exception
.pending
) {
1065 if (block_nested_events
)
1067 if (!nested_exit_on_exception(svm
))
1069 nested_svm_inject_exception_vmexit(svm
);
1073 if (vcpu
->arch
.smi_pending
&& !svm_smi_blocked(vcpu
)) {
1074 if (block_nested_events
)
1076 if (!nested_exit_on_smi(svm
))
1078 nested_svm_simple_vmexit(svm
, SVM_EXIT_SMI
);
1082 if (vcpu
->arch
.nmi_pending
&& !svm_nmi_blocked(vcpu
)) {
1083 if (block_nested_events
)
1085 if (!nested_exit_on_nmi(svm
))
1087 nested_svm_simple_vmexit(svm
, SVM_EXIT_NMI
);
1091 if (kvm_cpu_has_interrupt(vcpu
) && !svm_interrupt_blocked(vcpu
)) {
1092 if (block_nested_events
)
1094 if (!nested_exit_on_intr(svm
))
1096 trace_kvm_nested_intr_vmexit(svm
->vmcb
->save
.rip
);
1097 nested_svm_simple_vmexit(svm
, SVM_EXIT_INTR
);
1104 int nested_svm_exit_special(struct vcpu_svm
*svm
)
1106 u32 exit_code
= svm
->vmcb
->control
.exit_code
;
1108 switch (exit_code
) {
1112 return NESTED_EXIT_HOST
;
1113 case SVM_EXIT_EXCP_BASE
... SVM_EXIT_EXCP_BASE
+ 0x1f: {
1114 u32 excp_bits
= 1 << (exit_code
- SVM_EXIT_EXCP_BASE
);
1116 if (svm
->vmcb01
.ptr
->control
.intercepts
[INTERCEPT_EXCEPTION
] &
1118 return NESTED_EXIT_HOST
;
1119 else if (exit_code
== SVM_EXIT_EXCP_BASE
+ PF_VECTOR
&&
1120 svm
->vcpu
.arch
.apf
.host_apf_flags
)
1121 /* Trap async PF even if not shadowing */
1122 return NESTED_EXIT_HOST
;
1129 return NESTED_EXIT_CONTINUE
;
1132 static int svm_get_nested_state(struct kvm_vcpu
*vcpu
,
1133 struct kvm_nested_state __user
*user_kvm_nested_state
,
1136 struct vcpu_svm
*svm
;
1137 struct kvm_nested_state kvm_state
= {
1139 .format
= KVM_STATE_NESTED_FORMAT_SVM
,
1140 .size
= sizeof(kvm_state
),
1142 struct vmcb __user
*user_vmcb
= (struct vmcb __user
*)
1143 &user_kvm_nested_state
->data
.svm
[0];
1146 return kvm_state
.size
+ KVM_STATE_NESTED_SVM_VMCB_SIZE
;
1150 if (user_data_size
< kvm_state
.size
)
1153 /* First fill in the header and copy it out. */
1154 if (is_guest_mode(vcpu
)) {
1155 kvm_state
.hdr
.svm
.vmcb_pa
= svm
->nested
.vmcb12_gpa
;
1156 kvm_state
.size
+= KVM_STATE_NESTED_SVM_VMCB_SIZE
;
1157 kvm_state
.flags
|= KVM_STATE_NESTED_GUEST_MODE
;
1159 if (svm
->nested
.nested_run_pending
)
1160 kvm_state
.flags
|= KVM_STATE_NESTED_RUN_PENDING
;
1164 kvm_state
.flags
|= KVM_STATE_NESTED_GIF_SET
;
1166 if (copy_to_user(user_kvm_nested_state
, &kvm_state
, sizeof(kvm_state
)))
1169 if (!is_guest_mode(vcpu
))
1173 * Copy over the full size of the VMCB rather than just the size
1176 if (clear_user(user_vmcb
, KVM_STATE_NESTED_SVM_VMCB_SIZE
))
1178 if (copy_to_user(&user_vmcb
->control
, &svm
->nested
.ctl
,
1179 sizeof(user_vmcb
->control
)))
1181 if (copy_to_user(&user_vmcb
->save
, &svm
->vmcb01
.ptr
->save
,
1182 sizeof(user_vmcb
->save
)))
1185 return kvm_state
.size
;
1188 static int svm_set_nested_state(struct kvm_vcpu
*vcpu
,
1189 struct kvm_nested_state __user
*user_kvm_nested_state
,
1190 struct kvm_nested_state
*kvm_state
)
1192 struct vcpu_svm
*svm
= to_svm(vcpu
);
1193 struct vmcb __user
*user_vmcb
= (struct vmcb __user
*)
1194 &user_kvm_nested_state
->data
.svm
[0];
1195 struct vmcb_control_area
*ctl
;
1196 struct vmcb_save_area
*save
;
1200 BUILD_BUG_ON(sizeof(struct vmcb_control_area
) + sizeof(struct vmcb_save_area
) >
1201 KVM_STATE_NESTED_SVM_VMCB_SIZE
);
1203 if (kvm_state
->format
!= KVM_STATE_NESTED_FORMAT_SVM
)
1206 if (kvm_state
->flags
& ~(KVM_STATE_NESTED_GUEST_MODE
|
1207 KVM_STATE_NESTED_RUN_PENDING
|
1208 KVM_STATE_NESTED_GIF_SET
))
1212 * If in guest mode, vcpu->arch.efer actually refers to the L2 guest's
1213 * EFER.SVME, but EFER.SVME still has to be 1 for VMRUN to succeed.
1215 if (!(vcpu
->arch
.efer
& EFER_SVME
)) {
1216 /* GIF=1 and no guest mode are required if SVME=0. */
1217 if (kvm_state
->flags
!= KVM_STATE_NESTED_GIF_SET
)
1221 /* SMM temporarily disables SVM, so we cannot be in guest mode. */
1222 if (is_smm(vcpu
) && (kvm_state
->flags
& KVM_STATE_NESTED_GUEST_MODE
))
1225 if (!(kvm_state
->flags
& KVM_STATE_NESTED_GUEST_MODE
)) {
1226 svm_leave_nested(svm
);
1227 svm_set_gif(svm
, !!(kvm_state
->flags
& KVM_STATE_NESTED_GIF_SET
));
1231 if (!page_address_valid(vcpu
, kvm_state
->hdr
.svm
.vmcb_pa
))
1233 if (kvm_state
->size
< sizeof(*kvm_state
) + KVM_STATE_NESTED_SVM_VMCB_SIZE
)
1237 ctl
= kzalloc(sizeof(*ctl
), GFP_KERNEL
);
1238 save
= kzalloc(sizeof(*save
), GFP_KERNEL
);
1243 if (copy_from_user(ctl
, &user_vmcb
->control
, sizeof(*ctl
)))
1245 if (copy_from_user(save
, &user_vmcb
->save
, sizeof(*save
)))
1249 if (!nested_vmcb_check_controls(ctl
))
1253 * Processor state contains L2 state. Check that it is
1254 * valid for guest mode (see nested_vmcb_checks).
1256 cr0
= kvm_read_cr0(vcpu
);
1257 if (((cr0
& X86_CR0_CD
) == 0) && (cr0
& X86_CR0_NW
))
1261 * Validate host state saved from before VMRUN (see
1262 * nested_svm_check_permissions).
1264 if (!(save
->cr0
& X86_CR0_PG
) ||
1265 !(save
->cr0
& X86_CR0_PE
) ||
1266 (save
->rflags
& X86_EFLAGS_VM
) ||
1267 !nested_vmcb_valid_sregs(vcpu
, save
))
1271 * All checks done, we can enter guest mode. Userspace provides
1272 * vmcb12.control, which will be combined with L1 and stored into
1273 * vmcb02, and the L1 save state which we store in vmcb01.
1274 * L2 registers if needed are moved from the current VMCB to VMCB02.
1277 svm
->nested
.nested_run_pending
=
1278 !!(kvm_state
->flags
& KVM_STATE_NESTED_RUN_PENDING
);
1280 svm
->nested
.vmcb12_gpa
= kvm_state
->hdr
.svm
.vmcb_pa
;
1281 if (svm
->current_vmcb
== &svm
->vmcb01
)
1282 svm
->nested
.vmcb02
.ptr
->save
= svm
->vmcb01
.ptr
->save
;
1284 svm
->vmcb01
.ptr
->save
.es
= save
->es
;
1285 svm
->vmcb01
.ptr
->save
.cs
= save
->cs
;
1286 svm
->vmcb01
.ptr
->save
.ss
= save
->ss
;
1287 svm
->vmcb01
.ptr
->save
.ds
= save
->ds
;
1288 svm
->vmcb01
.ptr
->save
.gdtr
= save
->gdtr
;
1289 svm
->vmcb01
.ptr
->save
.idtr
= save
->idtr
;
1290 svm
->vmcb01
.ptr
->save
.rflags
= save
->rflags
| X86_EFLAGS_FIXED
;
1291 svm
->vmcb01
.ptr
->save
.efer
= save
->efer
;
1292 svm
->vmcb01
.ptr
->save
.cr0
= save
->cr0
;
1293 svm
->vmcb01
.ptr
->save
.cr3
= save
->cr3
;
1294 svm
->vmcb01
.ptr
->save
.cr4
= save
->cr4
;
1295 svm
->vmcb01
.ptr
->save
.rax
= save
->rax
;
1296 svm
->vmcb01
.ptr
->save
.rsp
= save
->rsp
;
1297 svm
->vmcb01
.ptr
->save
.rip
= save
->rip
;
1298 svm
->vmcb01
.ptr
->save
.cpl
= 0;
1300 nested_load_control_from_vmcb12(svm
, ctl
);
1302 svm_switch_vmcb(svm
, &svm
->nested
.vmcb02
);
1304 nested_vmcb02_prepare_control(svm
);
1306 kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES
, vcpu
);
1315 struct kvm_x86_nested_ops svm_nested_ops
= {
1316 .check_events
= svm_check_nested_events
,
1317 .triple_fault
= nested_svm_triple_fault
,
1318 .get_nested_state_pages
= svm_get_nested_state_pages
,
1319 .get_state
= svm_get_nested_state
,
1320 .set_state
= svm_set_nested_state
,