]> git.proxmox.com Git - pve-kernel.git/blob - patches/kernel/0013-kvm-vmx-Reinstate-support-for-CPUs-without-virtual-N.patch
154cb43db8362446b6896521a2bb650d257531d3
[pve-kernel.git] / patches / kernel / 0013-kvm-vmx-Reinstate-support-for-CPUs-without-virtual-N.patch
1 From 0140f5df6cd9e326f3009a16c1b66139b9bb3b45 Mon Sep 17 00:00:00 2001
2 From: Paolo Bonzini <pbonzini@redhat.com>
3 Date: Mon, 6 Nov 2017 13:31:12 +0100
4 Subject: [PATCH 013/233] kvm: vmx: Reinstate support for CPUs without virtual
5 NMI
6 MIME-Version: 1.0
7 Content-Type: text/plain; charset=UTF-8
8 Content-Transfer-Encoding: 8bit
9
10 commit 8a1b43922d0d1279e7936ba85c4c2a870403c95f upstream.
11
12 This is more or less a revert of commit 2c82878b0cb3 ("KVM: VMX: require
13 virtual NMI support", 2017-03-27); it turns out that Core 2 Duo machines
14 only had virtual NMIs in some SKUs.
15
16 The revert is not trivial because in the meanwhile there have been several
17 fixes to nested NMI injection. Therefore, the entire vNMI state is moved
18 to struct loaded_vmcs.
19
20 Another change compared to before the patch is a simplification here:
21
22 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
23 !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
24 get_vmcs12(vcpu))))) {
25
26 The final condition here is always true (because nested_cpu_has_virtual_nmis
27 is always false) and is removed.
28
29 Fixes: 2c82878b0cb38fd516fd612c67852a6bbf282003
30 Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1490803
31 Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
32 Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
33 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
34 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
35 ---
36 arch/x86/kvm/vmx.c | 150 +++++++++++++++++++++++++++++++++++++----------------
37 1 file changed, 106 insertions(+), 44 deletions(-)
38
39 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
40 index 118709e7597d..a2c95522ac99 100644
41 --- a/arch/x86/kvm/vmx.c
42 +++ b/arch/x86/kvm/vmx.c
43 @@ -202,6 +202,10 @@ struct loaded_vmcs {
44 bool nmi_known_unmasked;
45 unsigned long vmcs_host_cr3; /* May not match real cr3 */
46 unsigned long vmcs_host_cr4; /* May not match real cr4 */
47 + /* Support for vnmi-less CPUs */
48 + int soft_vnmi_blocked;
49 + ktime_t entry_time;
50 + s64 vnmi_blocked_time;
51 struct list_head loaded_vmcss_on_cpu_link;
52 };
53
54 @@ -1288,6 +1292,11 @@ static inline bool cpu_has_vmx_invpcid(void)
55 SECONDARY_EXEC_ENABLE_INVPCID;
56 }
57
58 +static inline bool cpu_has_virtual_nmis(void)
59 +{
60 + return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
61 +}
62 +
63 static inline bool cpu_has_vmx_wbinvd_exit(void)
64 {
65 return vmcs_config.cpu_based_2nd_exec_ctrl &
66 @@ -1339,11 +1348,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
67 (vmcs12->secondary_vm_exec_control & bit);
68 }
69
70 -static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
71 -{
72 - return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
73 -}
74 -
75 static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
76 {
77 return vmcs12->pin_based_vm_exec_control &
78 @@ -3676,9 +3680,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
79 &_vmexit_control) < 0)
80 return -EIO;
81
82 - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
83 - PIN_BASED_VIRTUAL_NMIS;
84 - opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
85 + min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
86 + opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
87 + PIN_BASED_VMX_PREEMPTION_TIMER;
88 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
89 &_pin_based_exec_control) < 0)
90 return -EIO;
91 @@ -5538,7 +5542,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
92
93 static void enable_nmi_window(struct kvm_vcpu *vcpu)
94 {
95 - if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
96 + if (!cpu_has_virtual_nmis() ||
97 + vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
98 enable_irq_window(vcpu);
99 return;
100 }
101 @@ -5578,6 +5583,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
102 {
103 struct vcpu_vmx *vmx = to_vmx(vcpu);
104
105 + if (!cpu_has_virtual_nmis()) {
106 + /*
107 + * Tracking the NMI-blocked state in software is built upon
108 + * finding the next open IRQ window. This, in turn, depends on
109 + * well-behaving guests: They have to keep IRQs disabled at
110 + * least as long as the NMI handler runs. Otherwise we may
111 + * cause NMI nesting, maybe breaking the guest. But as this is
112 + * highly unlikely, we can live with the residual risk.
113 + */
114 + vmx->loaded_vmcs->soft_vnmi_blocked = 1;
115 + vmx->loaded_vmcs->vnmi_blocked_time = 0;
116 + }
117 +
118 ++vcpu->stat.nmi_injections;
119 vmx->loaded_vmcs->nmi_known_unmasked = false;
120
121 @@ -5596,6 +5614,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
122 struct vcpu_vmx *vmx = to_vmx(vcpu);
123 bool masked;
124
125 + if (!cpu_has_virtual_nmis())
126 + return vmx->loaded_vmcs->soft_vnmi_blocked;
127 if (vmx->loaded_vmcs->nmi_known_unmasked)
128 return false;
129 masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
130 @@ -5607,13 +5627,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
131 {
132 struct vcpu_vmx *vmx = to_vmx(vcpu);
133
134 - vmx->loaded_vmcs->nmi_known_unmasked = !masked;
135 - if (masked)
136 - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
137 - GUEST_INTR_STATE_NMI);
138 - else
139 - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
140 - GUEST_INTR_STATE_NMI);
141 + if (!cpu_has_virtual_nmis()) {
142 + if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
143 + vmx->loaded_vmcs->soft_vnmi_blocked = masked;
144 + vmx->loaded_vmcs->vnmi_blocked_time = 0;
145 + }
146 + } else {
147 + vmx->loaded_vmcs->nmi_known_unmasked = !masked;
148 + if (masked)
149 + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
150 + GUEST_INTR_STATE_NMI);
151 + else
152 + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
153 + GUEST_INTR_STATE_NMI);
154 + }
155 }
156
157 static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
158 @@ -5621,6 +5648,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
159 if (to_vmx(vcpu)->nested.nested_run_pending)
160 return 0;
161
162 + if (!cpu_has_virtual_nmis() &&
163 + to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
164 + return 0;
165 +
166 return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
167 (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
168 | GUEST_INTR_STATE_NMI));
169 @@ -6348,6 +6379,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
170 * AAK134, BY25.
171 */
172 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
173 + cpu_has_virtual_nmis() &&
174 (exit_qualification & INTR_INFO_UNBLOCK_NMI))
175 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
176
177 @@ -6820,7 +6852,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
178 }
179
180 /* Create a new VMCS */
181 - item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
182 + item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
183 if (!item)
184 return NULL;
185 item->vmcs02.vmcs = alloc_vmcs();
186 @@ -7837,6 +7869,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
187 * "blocked by NMI" bit has to be set before next VM entry.
188 */
189 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
190 + cpu_has_virtual_nmis() &&
191 (exit_qualification & INTR_INFO_UNBLOCK_NMI))
192 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
193 GUEST_INTR_STATE_NMI);
194 @@ -8554,6 +8587,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
195 return 0;
196 }
197
198 + if (unlikely(!cpu_has_virtual_nmis() &&
199 + vmx->loaded_vmcs->soft_vnmi_blocked)) {
200 + if (vmx_interrupt_allowed(vcpu)) {
201 + vmx->loaded_vmcs->soft_vnmi_blocked = 0;
202 + } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
203 + vcpu->arch.nmi_pending) {
204 + /*
205 + * This CPU don't support us in finding the end of an
206 + * NMI-blocked window if the guest runs with IRQs
207 + * disabled. So we pull the trigger after 1 s of
208 + * futile waiting, but inform the user about this.
209 + */
210 + printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
211 + "state on VCPU %d after 1 s timeout\n",
212 + __func__, vcpu->vcpu_id);
213 + vmx->loaded_vmcs->soft_vnmi_blocked = 0;
214 + }
215 + }
216 +
217 if (exit_reason < kvm_vmx_max_exit_handlers
218 && kvm_vmx_exit_handlers[exit_reason])
219 return kvm_vmx_exit_handlers[exit_reason](vcpu);
220 @@ -8837,33 +8889,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
221
222 idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
223
224 - if (vmx->loaded_vmcs->nmi_known_unmasked)
225 - return;
226 - /*
227 - * Can't use vmx->exit_intr_info since we're not sure what
228 - * the exit reason is.
229 - */
230 - exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
231 - unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
232 - vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
233 - /*
234 - * SDM 3: 27.7.1.2 (September 2008)
235 - * Re-set bit "block by NMI" before VM entry if vmexit caused by
236 - * a guest IRET fault.
237 - * SDM 3: 23.2.2 (September 2008)
238 - * Bit 12 is undefined in any of the following cases:
239 - * If the VM exit sets the valid bit in the IDT-vectoring
240 - * information field.
241 - * If the VM exit is due to a double fault.
242 - */
243 - if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
244 - vector != DF_VECTOR && !idtv_info_valid)
245 - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
246 - GUEST_INTR_STATE_NMI);
247 - else
248 - vmx->loaded_vmcs->nmi_known_unmasked =
249 - !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
250 - & GUEST_INTR_STATE_NMI);
251 + if (cpu_has_virtual_nmis()) {
252 + if (vmx->loaded_vmcs->nmi_known_unmasked)
253 + return;
254 + /*
255 + * Can't use vmx->exit_intr_info since we're not sure what
256 + * the exit reason is.
257 + */
258 + exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
259 + unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
260 + vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
261 + /*
262 + * SDM 3: 27.7.1.2 (September 2008)
263 + * Re-set bit "block by NMI" before VM entry if vmexit caused by
264 + * a guest IRET fault.
265 + * SDM 3: 23.2.2 (September 2008)
266 + * Bit 12 is undefined in any of the following cases:
267 + * If the VM exit sets the valid bit in the IDT-vectoring
268 + * information field.
269 + * If the VM exit is due to a double fault.
270 + */
271 + if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
272 + vector != DF_VECTOR && !idtv_info_valid)
273 + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
274 + GUEST_INTR_STATE_NMI);
275 + else
276 + vmx->loaded_vmcs->nmi_known_unmasked =
277 + !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
278 + & GUEST_INTR_STATE_NMI);
279 + } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
280 + vmx->loaded_vmcs->vnmi_blocked_time +=
281 + ktime_to_ns(ktime_sub(ktime_get(),
282 + vmx->loaded_vmcs->entry_time));
283 }
284
285 static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
286 @@ -8980,6 +9037,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
287 struct vcpu_vmx *vmx = to_vmx(vcpu);
288 unsigned long debugctlmsr, cr3, cr4;
289
290 + /* Record the guest's net vcpu time for enforced NMI injections. */
291 + if (unlikely(!cpu_has_virtual_nmis() &&
292 + vmx->loaded_vmcs->soft_vnmi_blocked))
293 + vmx->loaded_vmcs->entry_time = ktime_get();
294 +
295 /* Don't enter VMX if guest state is invalid, let the exit handler
296 start emulation until we arrive back to a valid state */
297 if (vmx->emulation_required)
298 --
299 2.14.2
300