]> git.proxmox.com Git - mirror_ubuntu-impish-kernel.git/blame - arch/x86/kvm/lapic.c
KVM: x86: check PIR even for vCPUs with disabled APICv
[mirror_ubuntu-impish-kernel.git] / arch / x86 / kvm / lapic.c
CommitLineData
20c8ccb1 1// SPDX-License-Identifier: GPL-2.0-only
97222cc8
ED
2
3/*
4 * Local APIC virtualization
5 *
6 * Copyright (C) 2006 Qumranet, Inc.
7 * Copyright (C) 2007 Novell
8 * Copyright (C) 2007 Intel
9611c187 9 * Copyright 2009 Red Hat, Inc. and/or its affiliates.
97222cc8
ED
10 *
11 * Authors:
12 * Dor Laor <dor.laor@qumranet.com>
13 * Gregory Haskins <ghaskins@novell.com>
14 * Yaozu (Eddie) Dong <eddie.dong@intel.com>
15 *
16 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
97222cc8
ED
17 */
18
edf88417 19#include <linux/kvm_host.h>
97222cc8
ED
20#include <linux/kvm.h>
21#include <linux/mm.h>
22#include <linux/highmem.h>
23#include <linux/smp.h>
24#include <linux/hrtimer.h>
25#include <linux/io.h>
1767e931 26#include <linux/export.h>
6f6d6a1a 27#include <linux/math64.h>
5a0e3ad6 28#include <linux/slab.h>
97222cc8
ED
29#include <asm/processor.h>
30#include <asm/msr.h>
31#include <asm/page.h>
32#include <asm/current.h>
33#include <asm/apicdef.h>
d0659d94 34#include <asm/delay.h>
60063497 35#include <linux/atomic.h>
c5cc421b 36#include <linux/jump_label.h>
5fdbf976 37#include "kvm_cache_regs.h"
97222cc8 38#include "irq.h"
88197e6a 39#include "ioapic.h"
229456fc 40#include "trace.h"
fc61b800 41#include "x86.h"
00b27a3e 42#include "cpuid.h"
5c919412 43#include "hyperv.h"
97222cc8 44
b682b814
MT
45#ifndef CONFIG_X86_64
46#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
47#else
48#define mod_64(x, y) ((x) % (y))
49#endif
50
97222cc8
ED
51#define PRId64 "d"
52#define PRIx64 "llx"
53#define PRIu64 "u"
54#define PRIo64 "o"
55
97222cc8 56/* 14 is the version for Xeon and Pentium 8.4.8*/
1e6e2755 57#define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
97222cc8
ED
58#define LAPIC_MMIO_LENGTH (1 << 12)
59/* followed define is not in apicdef.h */
97222cc8 60#define MAX_APIC_VECTOR 256
ecba9a52 61#define APIC_VECTORS_PER_REG 32
97222cc8 62
d0f5a86a 63static bool lapic_timer_advance_dynamic __read_mostly;
a0f0037e
WL
64#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */
65#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */
66#define LAPIC_TIMER_ADVANCE_NS_INIT 1000
67#define LAPIC_TIMER_ADVANCE_NS_MAX 5000
3b8a5df6
WL
68/* step-by-step approximation to mitigate fluctuation */
69#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
70
a0c9a822
MT
71static inline int apic_test_vector(int vec, void *bitmap)
72{
73 return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
74}
75
10606919
YZ
76bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
77{
78 struct kvm_lapic *apic = vcpu->arch.apic;
79
80 return apic_test_vector(vector, apic->regs + APIC_ISR) ||
81 apic_test_vector(vector, apic->regs + APIC_IRR);
82}
83
8680b94b
MT
84static inline int __apic_test_and_set_vector(int vec, void *bitmap)
85{
86 return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
87}
88
89static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
90{
91 return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
92}
93
6e4e3b4d
CL
94__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
95__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);
f8c1ea10 96
97222cc8
ED
97static inline int apic_enabled(struct kvm_lapic *apic)
98{
c48f1496 99 return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic);
54e9818f
GN
100}
101
97222cc8
ED
102#define LVT_MASK \
103 (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
104
105#define LINT_MASK \
106 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
107 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
108
6e500439
RK
109static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
110{
111 return apic->vcpu->vcpu_id;
112}
113
199a8b84 114static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
0c5f81da
WL
115{
116 return pi_inject_timer && kvm_vcpu_apicv_active(vcpu);
117}
199a8b84
PB
118
119bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
120{
121 return kvm_x86_ops.set_hv_timer
122 && !(kvm_mwait_in_guest(vcpu->kvm) ||
123 kvm_can_post_timer_interrupt(vcpu));
124}
125EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer);
0c5f81da
WL
126
127static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
128{
129 return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
130}
131
e45115b6
RK
132static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
133 u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
134 switch (map->mode) {
135 case KVM_APIC_MODE_X2APIC: {
136 u32 offset = (dest_id >> 16) * 16;
0ca52e7b 137 u32 max_apic_id = map->max_apic_id;
e45115b6
RK
138
139 if (offset <= max_apic_id) {
140 u8 cluster_size = min(max_apic_id - offset + 1, 16U);
141
1d487e9b 142 offset = array_index_nospec(offset, map->max_apic_id + 1);
e45115b6
RK
143 *cluster = &map->phys_map[offset];
144 *mask = dest_id & (0xffff >> (16 - cluster_size));
145 } else {
146 *mask = 0;
147 }
3b5a5ffa 148
e45115b6
RK
149 return true;
150 }
151 case KVM_APIC_MODE_XAPIC_FLAT:
152 *cluster = map->xapic_flat_map;
153 *mask = dest_id & 0xff;
154 return true;
155 case KVM_APIC_MODE_XAPIC_CLUSTER:
444fdad8 156 *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
e45115b6
RK
157 *mask = dest_id & 0xf;
158 return true;
159 default:
160 /* Not optimized. */
161 return false;
162 }
3548a259
RK
163}
164
af1bae54 165static void kvm_apic_map_free(struct rcu_head *rcu)
3b5a5ffa 166{
af1bae54 167 struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
3b5a5ffa 168
af1bae54 169 kvfree(map);
3b5a5ffa
RK
170}
171
44d52717
PB
172/*
173 * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock.
174 *
175 * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with
176 * apic_map_lock_held.
177 */
178enum {
179 CLEAN,
180 UPDATE_IN_PROGRESS,
181 DIRTY
182};
183
4abaffce 184void kvm_recalculate_apic_map(struct kvm *kvm)
1e08ec4a
GN
185{
186 struct kvm_apic_map *new, *old = NULL;
187 struct kvm_vcpu *vcpu;
188 int i;
6e500439 189 u32 max_id = 255; /* enough space for any xAPIC ID */
1e08ec4a 190
44d52717
PB
191 /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */
192 if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
4abaffce 193 return;
4abaffce 194
1e08ec4a 195 mutex_lock(&kvm->arch.apic_map_lock);
44d52717
PB
196 /*
197 * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
198 * (if clean) or the APIC registers (if dirty).
199 */
200 if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
201 DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
4abaffce
WL
202 /* Someone else has updated the map. */
203 mutex_unlock(&kvm->arch.apic_map_lock);
204 return;
205 }
1e08ec4a 206
0ca52e7b
RK
207 kvm_for_each_vcpu(i, vcpu, kvm)
208 if (kvm_apic_present(vcpu))
6e500439 209 max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
0ca52e7b 210
a7c3e901 211 new = kvzalloc(sizeof(struct kvm_apic_map) +
254272ce
BG
212 sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
213 GFP_KERNEL_ACCOUNT);
0ca52e7b 214
1e08ec4a
GN
215 if (!new)
216 goto out;
217
0ca52e7b
RK
218 new->max_apic_id = max_id;
219
173beedc
NA
220 kvm_for_each_vcpu(i, vcpu, kvm) {
221 struct kvm_lapic *apic = vcpu->arch.apic;
e45115b6
RK
222 struct kvm_lapic **cluster;
223 u16 mask;
5bd5db38
RK
224 u32 ldr;
225 u8 xapic_id;
226 u32 x2apic_id;
1e08ec4a 227
df04d1d1
RK
228 if (!kvm_apic_present(vcpu))
229 continue;
230
5bd5db38
RK
231 xapic_id = kvm_xapic_id(apic);
232 x2apic_id = kvm_x2apic_id(apic);
233
234 /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
235 if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
236 x2apic_id <= new->max_apic_id)
237 new->phys_map[x2apic_id] = apic;
238 /*
239 * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
240 * prevent them from masking VCPUs with APIC ID <= 0xff.
241 */
242 if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
243 new->phys_map[xapic_id] = apic;
3548a259 244
b14c876b
RK
245 if (!kvm_apic_sw_enabled(apic))
246 continue;
247
6e500439
RK
248 ldr = kvm_lapic_get_reg(apic, APIC_LDR);
249
3b5a5ffa
RK
250 if (apic_x2apic_mode(apic)) {
251 new->mode |= KVM_APIC_MODE_X2APIC;
252 } else if (ldr) {
253 ldr = GET_APIC_LOGICAL_ID(ldr);
dfb95954 254 if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
3b5a5ffa
RK
255 new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
256 else
257 new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
258 }
259
e45115b6 260 if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
3548a259
RK
261 continue;
262
e45115b6
RK
263 if (mask)
264 cluster[ffs(mask) - 1] = apic;
1e08ec4a
GN
265 }
266out:
267 old = rcu_dereference_protected(kvm->arch.apic_map,
268 lockdep_is_held(&kvm->arch.apic_map_lock));
269 rcu_assign_pointer(kvm->arch.apic_map, new);
4abaffce 270 /*
44d52717
PB
271 * Write kvm->arch.apic_map before clearing apic->apic_map_dirty.
272 * If another update has come in, leave it DIRTY.
4abaffce 273 */
44d52717
PB
274 atomic_cmpxchg_release(&kvm->arch.apic_map_dirty,
275 UPDATE_IN_PROGRESS, CLEAN);
1e08ec4a
GN
276 mutex_unlock(&kvm->arch.apic_map_lock);
277
278 if (old)
af1bae54 279 call_rcu(&old->rcu, kvm_apic_map_free);
c7c9c56c 280
b053b2ae 281 kvm_make_scan_ioapic_request(kvm);
1e08ec4a
GN
282}
283
1e1b6c26
NA
284static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
285{
e462755c 286 bool enabled = val & APIC_SPIV_APIC_ENABLED;
1e1b6c26 287
1e6e2755 288 kvm_lapic_set_reg(apic, APIC_SPIV, val);
e462755c
RK
289
290 if (enabled != apic->sw_enabled) {
291 apic->sw_enabled = enabled;
eb1ff0a9 292 if (enabled)
6e4e3b4d 293 static_branch_slow_dec_deferred(&apic_sw_disabled);
eb1ff0a9 294 else
6e4e3b4d 295 static_branch_inc(&apic_sw_disabled.key);
b14c876b 296
44d52717 297 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
1e1b6c26 298 }
2f15d027
VK
299
300 /* Check if there are APF page ready requests pending */
301 if (enabled)
302 kvm_make_request(KVM_REQ_APF_READY, apic->vcpu);
1e1b6c26
NA
303}
304
a92e2543 305static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
1e08ec4a 306{
1e6e2755 307 kvm_lapic_set_reg(apic, APIC_ID, id << 24);
44d52717 308 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
1e08ec4a
GN
309}
310
311static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
312{
1e6e2755 313 kvm_lapic_set_reg(apic, APIC_LDR, id);
44d52717 314 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
1e08ec4a
GN
315}
316
ae6f2496
WL
317static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val)
318{
319 kvm_lapic_set_reg(apic, APIC_DFR, val);
320 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
321}
322
e872fa94
DDAG
323static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
324{
325 return ((id >> 4) << 16) | (1 << (id & 0xf));
326}
327
a92e2543 328static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
257b9a5f 329{
e872fa94 330 u32 ldr = kvm_apic_calc_x2apic_ldr(id);
257b9a5f 331
6e500439
RK
332 WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
333
a92e2543 334 kvm_lapic_set_reg(apic, APIC_ID, id);
1e6e2755 335 kvm_lapic_set_reg(apic, APIC_LDR, ldr);
44d52717 336 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
257b9a5f
RK
337}
338
97222cc8
ED
339static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
340{
dfb95954 341 return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
97222cc8
ED
342}
343
a3e06bbe
LJ
344static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
345{
f30ebc31 346 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
a3e06bbe
LJ
347}
348
97222cc8
ED
349static inline int apic_lvtt_period(struct kvm_lapic *apic)
350{
f30ebc31 351 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
a3e06bbe
LJ
352}
353
354static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
355{
f30ebc31 356 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
97222cc8
ED
357}
358
cc6e462c
JK
359static inline int apic_lvt_nmi_mode(u32 lvt_val)
360{
361 return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
362}
363
fc61b800
GN
364void kvm_apic_set_version(struct kvm_vcpu *vcpu)
365{
366 struct kvm_lapic *apic = vcpu->arch.apic;
fc61b800
GN
367 u32 v = APIC_VERSION;
368
bce87cce 369 if (!lapic_in_kernel(vcpu))
fc61b800
GN
370 return;
371
0bcc3fb9
VK
372 /*
373 * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
374 * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
375 * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
376 * version first and level-triggered interrupts never get EOIed in
377 * IOAPIC.
378 */
565b7820 379 if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) &&
0bcc3fb9 380 !ioapic_in_kernel(vcpu->kvm))
fc61b800 381 v |= APIC_LVR_DIRECTED_EOI;
1e6e2755 382 kvm_lapic_set_reg(apic, APIC_LVR, v);
fc61b800
GN
383}
384
1e6e2755 385static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
a3e06bbe 386 LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */
97222cc8
ED
387 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
388 LVT_MASK | APIC_MODE_MASK, /* LVTPC */
389 LINT_MASK, LINT_MASK, /* LVT0-1 */
390 LVT_MASK /* LVTERR */
391};
392
393static int find_highest_vector(void *bitmap)
394{
ecba9a52
TY
395 int vec;
396 u32 *reg;
97222cc8 397
ecba9a52
TY
398 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
399 vec >= 0; vec -= APIC_VECTORS_PER_REG) {
400 reg = bitmap + REG_POS(vec);
401 if (*reg)
810e6def 402 return __fls(*reg) + vec;
ecba9a52 403 }
97222cc8 404
ecba9a52 405 return -1;
97222cc8
ED
406}
407
8680b94b
MT
408static u8 count_vectors(void *bitmap)
409{
ecba9a52
TY
410 int vec;
411 u32 *reg;
8680b94b 412 u8 count = 0;
ecba9a52
TY
413
414 for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
415 reg = bitmap + REG_POS(vec);
416 count += hweight32(*reg);
417 }
418
8680b94b
MT
419 return count;
420}
421
e7387b0e 422bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
a20ed54d 423{
810e6def 424 u32 i, vec;
e7387b0e
LA
425 u32 pir_val, irr_val, prev_irr_val;
426 int max_updated_irr;
427
428 max_updated_irr = -1;
429 *max_irr = -1;
a20ed54d 430
810e6def 431 for (i = vec = 0; i <= 7; i++, vec += 32) {
ad361091 432 pir_val = READ_ONCE(pir[i]);
810e6def 433 irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
ad361091 434 if (pir_val) {
e7387b0e 435 prev_irr_val = irr_val;
810e6def
PB
436 irr_val |= xchg(&pir[i], 0);
437 *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
e7387b0e
LA
438 if (prev_irr_val != irr_val) {
439 max_updated_irr =
440 __fls(irr_val ^ prev_irr_val) + vec;
441 }
ad361091 442 }
810e6def 443 if (irr_val)
e7387b0e 444 *max_irr = __fls(irr_val) + vec;
a20ed54d 445 }
810e6def 446
e7387b0e
LA
447 return ((max_updated_irr != -1) &&
448 (max_updated_irr == *max_irr));
a20ed54d 449}
705699a1
WV
450EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
451
e7387b0e 452bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
705699a1
WV
453{
454 struct kvm_lapic *apic = vcpu->arch.apic;
455
e7387b0e 456 return __kvm_apic_update_irr(pir, apic->regs, max_irr);
705699a1 457}
a20ed54d
YZ
458EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
459
33e4c686 460static inline int apic_search_irr(struct kvm_lapic *apic)
97222cc8 461{
33e4c686 462 return find_highest_vector(apic->regs + APIC_IRR);
97222cc8
ED
463}
464
465static inline int apic_find_highest_irr(struct kvm_lapic *apic)
466{
467 int result;
468
c7c9c56c
YZ
469 /*
470 * Note that irr_pending is just a hint. It will be always
471 * true with virtual interrupt delivery enabled.
472 */
33e4c686
GN
473 if (!apic->irr_pending)
474 return -1;
475
476 result = apic_search_irr(apic);
97222cc8
ED
477 ASSERT(result == -1 || result >= 16);
478
479 return result;
480}
481
33e4c686
GN
482static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
483{
56cc2406
WL
484 struct kvm_vcpu *vcpu;
485
486 vcpu = apic->vcpu;
487
d62caabb 488 if (unlikely(vcpu->arch.apicv_active)) {
b95234c8 489 /* need to update RVI */
ee171d2f 490 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
b3646477 491 static_call(kvm_x86_hwapic_irr_update)(vcpu,
b95234c8 492 apic_find_highest_irr(apic));
f210f757
NA
493 } else {
494 apic->irr_pending = false;
ee171d2f 495 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
f210f757
NA
496 if (apic_search_irr(apic) != -1)
497 apic->irr_pending = true;
56cc2406 498 }
33e4c686
GN
499}
500
25bb2cf9
SC
501void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec)
502{
503 apic_clear_irr(vec, vcpu->arch.apic);
504}
505EXPORT_SYMBOL_GPL(kvm_apic_clear_irr);
506
8680b94b
MT
507static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
508{
56cc2406
WL
509 struct kvm_vcpu *vcpu;
510
511 if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
512 return;
513
514 vcpu = apic->vcpu;
fc57ac2c 515
8680b94b 516 /*
56cc2406
WL
517 * With APIC virtualization enabled, all caching is disabled
518 * because the processor can modify ISR under the hood. Instead
519 * just set SVI.
8680b94b 520 */
d62caabb 521 if (unlikely(vcpu->arch.apicv_active))
b3646477 522 static_call(kvm_x86_hwapic_isr_update)(vcpu, vec);
56cc2406
WL
523 else {
524 ++apic->isr_count;
525 BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
526 /*
527 * ISR (in service register) bit is set when injecting an interrupt.
528 * The highest vector is injected. Thus the latest bit set matches
529 * the highest bit in ISR.
530 */
531 apic->highest_isr_cache = vec;
532 }
8680b94b
MT
533}
534
fc57ac2c
PB
535static inline int apic_find_highest_isr(struct kvm_lapic *apic)
536{
537 int result;
538
539 /*
540 * Note that isr_count is always 1, and highest_isr_cache
541 * is always -1, with APIC virtualization enabled.
542 */
543 if (!apic->isr_count)
544 return -1;
545 if (likely(apic->highest_isr_cache != -1))
546 return apic->highest_isr_cache;
547
548 result = find_highest_vector(apic->regs + APIC_ISR);
549 ASSERT(result == -1 || result >= 16);
550
551 return result;
552}
553
8680b94b
MT
554static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
555{
fc57ac2c
PB
556 struct kvm_vcpu *vcpu;
557 if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
558 return;
559
560 vcpu = apic->vcpu;
561
562 /*
563 * We do get here for APIC virtualization enabled if the guest
564 * uses the Hyper-V APIC enlightenment. In this case we may need
565 * to trigger a new interrupt delivery by writing the SVI field;
566 * on the other hand isr_count and highest_isr_cache are unused
567 * and must be left alone.
568 */
d62caabb 569 if (unlikely(vcpu->arch.apicv_active))
b3646477
JB
570 static_call(kvm_x86_hwapic_isr_update)(vcpu,
571 apic_find_highest_isr(apic));
fc57ac2c 572 else {
8680b94b 573 --apic->isr_count;
fc57ac2c
PB
574 BUG_ON(apic->isr_count < 0);
575 apic->highest_isr_cache = -1;
576 }
8680b94b
MT
577}
578
6e5d865c
YS
579int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
580{
33e4c686
GN
581 /* This may race with setting of irr in __apic_accept_irq() and
582 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
583 * will cause vmexit immediately and the value will be recalculated
584 * on the next vmentry.
585 */
f8543d6a 586 return apic_find_highest_irr(vcpu->arch.apic);
6e5d865c 587}
76dfafd5 588EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
6e5d865c 589
6da7e3f6 590static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
b4f2225c 591 int vector, int level, int trig_mode,
9e4aabe2 592 struct dest_map *dest_map);
6da7e3f6 593
b4f2225c 594int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
9e4aabe2 595 struct dest_map *dest_map)
97222cc8 596{
ad312c7c 597 struct kvm_lapic *apic = vcpu->arch.apic;
8be5453f 598
58c2dde1 599 return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
b4f2225c 600 irq->level, irq->trig_mode, dest_map);
97222cc8
ED
601}
602
1a686237
ML
603static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
604 struct kvm_lapic_irq *irq, u32 min)
605{
606 int i, count = 0;
607 struct kvm_vcpu *vcpu;
608
609 if (min > map->max_apic_id)
610 return 0;
611
612 for_each_set_bit(i, ipi_bitmap,
613 min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
614 if (map->phys_map[min + i]) {
615 vcpu = map->phys_map[min + i]->vcpu;
616 count += kvm_apic_set_irq(vcpu, irq, NULL);
617 }
618 }
619
620 return count;
621}
622
4180bf1b 623int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
bdf7ffc8 624 unsigned long ipi_bitmap_high, u32 min,
4180bf1b
WL
625 unsigned long icr, int op_64_bit)
626{
4180bf1b 627 struct kvm_apic_map *map;
4180bf1b
WL
628 struct kvm_lapic_irq irq = {0};
629 int cluster_size = op_64_bit ? 64 : 32;
1a686237
ML
630 int count;
631
632 if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
633 return -KVM_EINVAL;
4180bf1b
WL
634
635 irq.vector = icr & APIC_VECTOR_MASK;
636 irq.delivery_mode = icr & APIC_MODE_MASK;
637 irq.level = (icr & APIC_INT_ASSERT) != 0;
638 irq.trig_mode = icr & APIC_INT_LEVELTRIG;
639
4180bf1b
WL
640 rcu_read_lock();
641 map = rcu_dereference(kvm->arch.apic_map);
642
1a686237
ML
643 count = -EOPNOTSUPP;
644 if (likely(map)) {
645 count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
646 min += cluster_size;
647 count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
4180bf1b
WL
648 }
649
650 rcu_read_unlock();
651 return count;
652}
653
ae7a2a3f
MT
654static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
655{
4e335d9e
PB
656
657 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
658 sizeof(val));
ae7a2a3f
MT
659}
660
661static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
662{
4e335d9e
PB
663
664 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
665 sizeof(*val));
ae7a2a3f
MT
666}
667
668static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
669{
670 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
671}
672
673static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
674{
675 u8 val;
23520b2d 676 if (pv_eoi_get_user(vcpu, &val) < 0) {
0d88800d 677 printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
96893977 678 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
23520b2d
ML
679 return false;
680 }
de7860c8 681 return val & KVM_PV_EOI_ENABLED;
ae7a2a3f
MT
682}
683
684static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
685{
686 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
0d88800d 687 printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n",
96893977 688 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
ae7a2a3f
MT
689 return;
690 }
691 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
692}
693
694static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
695{
696 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
0d88800d 697 printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n",
96893977 698 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
ae7a2a3f
MT
699 return;
700 }
701 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
702}
703
b3c045d3
PB
704static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
705{
3d92789f 706 int highest_irr;
fb88c3f1 707 if (kvm_x86_ops.sync_pir_to_irr)
b3646477 708 highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu);
76dfafd5
PB
709 else
710 highest_irr = apic_find_highest_irr(apic);
b3c045d3
PB
711 if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
712 return -1;
713 return highest_irr;
714}
715
716static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
97222cc8 717{
3842d135 718 u32 tpr, isrv, ppr, old_ppr;
97222cc8
ED
719 int isr;
720
dfb95954
SS
721 old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
722 tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
97222cc8
ED
723 isr = apic_find_highest_isr(apic);
724 isrv = (isr != -1) ? isr : 0;
725
726 if ((tpr & 0xf0) >= (isrv & 0xf0))
727 ppr = tpr & 0xff;
728 else
729 ppr = isrv & 0xf0;
730
b3c045d3
PB
731 *new_ppr = ppr;
732 if (old_ppr != ppr)
1e6e2755 733 kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
b3c045d3
PB
734
735 return ppr < old_ppr;
736}
737
738static void apic_update_ppr(struct kvm_lapic *apic)
739{
740 u32 ppr;
741
26fbbee5
PB
742 if (__apic_update_ppr(apic, &ppr) &&
743 apic_has_interrupt_for_ppr(apic, ppr) != -1)
b3c045d3 744 kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
97222cc8
ED
745}
746
eb90f341
PB
747void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
748{
749 apic_update_ppr(vcpu->arch.apic);
750}
751EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
752
97222cc8
ED
753static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
754{
1e6e2755 755 kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
97222cc8
ED
756 apic_update_ppr(apic);
757}
758
03d2249e 759static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
394457a9 760{
b4535b58
RK
761 return mda == (apic_x2apic_mode(apic) ?
762 X2APIC_BROADCAST : APIC_BROADCAST);
394457a9
NA
763}
764
03d2249e 765static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
97222cc8 766{
03d2249e
RK
767 if (kvm_apic_broadcast(apic, mda))
768 return true;
769
770 if (apic_x2apic_mode(apic))
6e500439 771 return mda == kvm_x2apic_id(apic);
03d2249e 772
5bd5db38
RK
773 /*
774 * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if
775 * it were in x2APIC mode. Hotplugged VCPUs start in xAPIC mode and
776 * this allows unique addressing of VCPUs with APIC ID over 0xff.
777 * The 0xff condition is needed because writeable xAPIC ID.
778 */
779 if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic))
780 return true;
781
b4535b58 782 return mda == kvm_xapic_id(apic);
97222cc8
ED
783}
784
52c233a4 785static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
97222cc8 786{
0105d1a5
GN
787 u32 logical_id;
788
394457a9 789 if (kvm_apic_broadcast(apic, mda))
9368b567 790 return true;
394457a9 791
dfb95954 792 logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
97222cc8 793
9368b567 794 if (apic_x2apic_mode(apic))
8a395363
RK
795 return ((logical_id >> 16) == (mda >> 16))
796 && (logical_id & mda & 0xffff) != 0;
97222cc8 797
9368b567 798 logical_id = GET_APIC_LOGICAL_ID(logical_id);
97222cc8 799
dfb95954 800 switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
97222cc8 801 case APIC_DFR_FLAT:
9368b567 802 return (logical_id & mda) != 0;
97222cc8 803 case APIC_DFR_CLUSTER:
9368b567
RK
804 return ((logical_id >> 4) == (mda >> 4))
805 && (logical_id & mda & 0xf) != 0;
97222cc8 806 default:
9368b567 807 return false;
97222cc8 808 }
97222cc8
ED
809}
810
c519265f
RK
811/* The KVM local APIC implementation has two quirks:
812 *
b4535b58
RK
813 * - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
814 * in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
815 * KVM doesn't do that aliasing.
c519265f
RK
816 *
817 * - in-kernel IOAPIC messages have to be delivered directly to
818 * x2APIC, because the kernel does not support interrupt remapping.
819 * In order to support broadcast without interrupt remapping, x2APIC
820 * rewrites the destination of non-IPI messages from APIC_BROADCAST
821 * to X2APIC_BROADCAST.
822 *
823 * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API. This is
824 * important when userspace wants to use x2APIC-format MSIs, because
825 * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
03d2249e 826 */
c519265f
RK
827static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
828 struct kvm_lapic *source, struct kvm_lapic *target)
03d2249e
RK
829{
830 bool ipi = source != NULL;
03d2249e 831
c519265f 832 if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
b4535b58 833 !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
03d2249e
RK
834 return X2APIC_BROADCAST;
835
b4535b58 836 return dest_id;
03d2249e
RK
837}
838
52c233a4 839bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
5c69d5c1 840 int shorthand, unsigned int dest, int dest_mode)
97222cc8 841{
ad312c7c 842 struct kvm_lapic *target = vcpu->arch.apic;
c519265f 843 u32 mda = kvm_apic_mda(vcpu, dest, source, target);
97222cc8 844
bd371396 845 ASSERT(target);
5c69d5c1 846 switch (shorthand) {
97222cc8 847 case APIC_DEST_NOSHORT:
3697f302 848 if (dest_mode == APIC_DEST_PHYSICAL)
03d2249e 849 return kvm_apic_match_physical_addr(target, mda);
343f94fe 850 else
03d2249e 851 return kvm_apic_match_logical_addr(target, mda);
97222cc8 852 case APIC_DEST_SELF:
9368b567 853 return target == source;
97222cc8 854 case APIC_DEST_ALLINC:
9368b567 855 return true;
97222cc8 856 case APIC_DEST_ALLBUT:
9368b567 857 return target != source;
97222cc8 858 default:
9368b567 859 return false;
97222cc8 860 }
97222cc8 861}
1e6e2755 862EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
97222cc8 863
52004014
FW
864int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
865 const unsigned long *bitmap, u32 bitmap_size)
866{
867 u32 mod;
868 int i, idx = -1;
869
870 mod = vector % dest_vcpus;
871
872 for (i = 0; i <= mod; i++) {
873 idx = find_next_bit(bitmap, bitmap_size, idx + 1);
874 BUG_ON(idx == bitmap_size);
875 }
876
877 return idx;
878}
879
4efd805f
RK
880static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
881{
882 if (!kvm->arch.disabled_lapic_found) {
883 kvm->arch.disabled_lapic_found = true;
884 printk(KERN_INFO
885 "Disabled LAPIC found during irq injection\n");
886 }
887}
888
c519265f
RK
889static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
890 struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
1e08ec4a 891{
c519265f
RK
892 if (kvm->arch.x2apic_broadcast_quirk_disabled) {
893 if ((irq->dest_id == APIC_BROADCAST &&
894 map->mode != KVM_APIC_MODE_X2APIC))
895 return true;
896 if (irq->dest_id == X2APIC_BROADCAST)
897 return true;
898 } else {
899 bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
900 if (irq->dest_id == (x2apic_ipi ?
901 X2APIC_BROADCAST : APIC_BROADCAST))
902 return true;
903 }
1e08ec4a 904
c519265f
RK
905 return false;
906}
1e08ec4a 907
64aa47bf
RK
908/* Return true if the interrupt can be handled by using *bitmap as index mask
909 * for valid destinations in *dst array.
910 * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
911 * Note: we may have zero kvm_lapic destinations when we return true, which
912 * means that the interrupt should be dropped. In this case, *bitmap would be
913 * zero and *dst undefined.
914 */
915static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
916 struct kvm_lapic **src, struct kvm_lapic_irq *irq,
917 struct kvm_apic_map *map, struct kvm_lapic ***dst,
918 unsigned long *bitmap)
919{
920 int i, lowest;
1e08ec4a 921
64aa47bf
RK
922 if (irq->shorthand == APIC_DEST_SELF && src) {
923 *dst = src;
924 *bitmap = 1;
925 return true;
926 } else if (irq->shorthand)
1e08ec4a
GN
927 return false;
928
c519265f 929 if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
9ea369b0
RK
930 return false;
931
64aa47bf 932 if (irq->dest_mode == APIC_DEST_PHYSICAL) {
0ca52e7b 933 if (irq->dest_id > map->max_apic_id) {
64aa47bf
RK
934 *bitmap = 0;
935 } else {
1d487e9b
PB
936 u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
937 *dst = &map->phys_map[dest_id];
64aa47bf
RK
938 *bitmap = 1;
939 }
1e08ec4a 940 return true;
bea15428 941 }
698f9755 942
e45115b6
RK
943 *bitmap = 0;
944 if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
945 (u16 *)bitmap))
1e08ec4a 946 return false;
fa834e91 947
64aa47bf
RK
948 if (!kvm_lowest_prio_delivery(irq))
949 return true;
3548a259 950
64aa47bf
RK
951 if (!kvm_vector_hashing_enabled()) {
952 lowest = -1;
953 for_each_set_bit(i, bitmap, 16) {
954 if (!(*dst)[i])
955 continue;
956 if (lowest < 0)
957 lowest = i;
958 else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
959 (*dst)[lowest]->vcpu) < 0)
960 lowest = i;
3548a259 961 }
64aa47bf
RK
962 } else {
963 if (!*bitmap)
964 return true;
3548a259 965
64aa47bf
RK
966 lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
967 bitmap, 16);
45c3094a 968
64aa47bf
RK
969 if (!(*dst)[lowest]) {
970 kvm_apic_disabled_lapic_found(kvm);
971 *bitmap = 0;
972 return true;
973 }
974 }
1e08ec4a 975
64aa47bf 976 *bitmap = (lowest >= 0) ? 1 << lowest : 0;
1e08ec4a 977
64aa47bf
RK
978 return true;
979}
52004014 980
64aa47bf
RK
981bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
982 struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
983{
984 struct kvm_apic_map *map;
985 unsigned long bitmap;
986 struct kvm_lapic **dst = NULL;
987 int i;
988 bool ret;
52004014 989
64aa47bf 990 *r = -1;
52004014 991
64aa47bf
RK
992 if (irq->shorthand == APIC_DEST_SELF) {
993 *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
994 return true;
995 }
52004014 996
64aa47bf
RK
997 rcu_read_lock();
998 map = rcu_dereference(kvm->arch.apic_map);
52004014 999
64aa47bf 1000 ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
0624fca9
PB
1001 if (ret) {
1002 *r = 0;
64aa47bf
RK
1003 for_each_set_bit(i, &bitmap, 16) {
1004 if (!dst[i])
1005 continue;
64aa47bf 1006 *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
1e08ec4a 1007 }
0624fca9 1008 }
1e08ec4a 1009
1e08ec4a
GN
1010 rcu_read_unlock();
1011 return ret;
1012}
1013
6228a0da 1014/*
00116795 1015 * This routine tries to handle interrupts in posted mode, here is how
6228a0da
FW
1016 * it deals with different cases:
1017 * - For single-destination interrupts, handle it in posted mode
1018 * - Else if vector hashing is enabled and it is a lowest-priority
1019 * interrupt, handle it in posted mode and use the following mechanism
67b0ae43 1020 * to find the destination vCPU.
6228a0da
FW
1021 * 1. For lowest-priority interrupts, store all the possible
1022 * destination vCPUs in an array.
1023 * 2. Use "guest vector % max number of destination vCPUs" to find
1024 * the right destination vCPU in the array for the lowest-priority
1025 * interrupt.
1026 * - Otherwise, use remapped mode to inject the interrupt.
1027 */
8feb4a04
FW
1028bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
1029 struct kvm_vcpu **dest_vcpu)
1030{
1031 struct kvm_apic_map *map;
64aa47bf
RK
1032 unsigned long bitmap;
1033 struct kvm_lapic **dst = NULL;
8feb4a04 1034 bool ret = false;
8feb4a04
FW
1035
1036 if (irq->shorthand)
1037 return false;
1038
1039 rcu_read_lock();
1040 map = rcu_dereference(kvm->arch.apic_map);
1041
64aa47bf
RK
1042 if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
1043 hweight16(bitmap) == 1) {
1044 unsigned long i = find_first_bit(&bitmap, 16);
6228a0da 1045
64aa47bf
RK
1046 if (dst[i]) {
1047 *dest_vcpu = dst[i]->vcpu;
1048 ret = true;
6228a0da 1049 }
8feb4a04
FW
1050 }
1051
8feb4a04
FW
1052 rcu_read_unlock();
1053 return ret;
1054}
1055
97222cc8
ED
1056/*
1057 * Add a pending IRQ into lapic.
1058 * Return 1 if successfully added and 0 if discarded.
1059 */
1060static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
b4f2225c 1061 int vector, int level, int trig_mode,
9e4aabe2 1062 struct dest_map *dest_map)
97222cc8 1063{
6da7e3f6 1064 int result = 0;
c5ec1534 1065 struct kvm_vcpu *vcpu = apic->vcpu;
97222cc8 1066
a183b638
PB
1067 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
1068 trig_mode, vector);
97222cc8 1069 switch (delivery_mode) {
97222cc8 1070 case APIC_DM_LOWEST:
e1035715 1071 vcpu->arch.apic_arb_prio++;
df561f66 1072 fallthrough;
e1035715 1073 case APIC_DM_FIXED:
bdaffe1d
PB
1074 if (unlikely(trig_mode && !level))
1075 break;
1076
97222cc8
ED
1077 /* FIXME add logic for vcpu on reset */
1078 if (unlikely(!apic_enabled(apic)))
1079 break;
1080
11f5cc05
JK
1081 result = 1;
1082
9daa5007 1083 if (dest_map) {
9e4aabe2 1084 __set_bit(vcpu->vcpu_id, dest_map->map);
9daa5007
JR
1085 dest_map->vectors[vcpu->vcpu_id] = vector;
1086 }
a5d36f82 1087
bdaffe1d
PB
1088 if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
1089 if (trig_mode)
ee171d2f
WY
1090 kvm_lapic_set_vector(vector,
1091 apic->regs + APIC_TMR);
bdaffe1d 1092 else
ee171d2f
WY
1093 kvm_lapic_clear_vector(vector,
1094 apic->regs + APIC_TMR);
bdaffe1d
PB
1095 }
1096
b3646477 1097 if (static_call(kvm_x86_deliver_posted_interrupt)(vcpu, vector)) {
1e6e2755 1098 kvm_lapic_set_irr(vector, apic);
5a71785d
YZ
1099 kvm_make_request(KVM_REQ_EVENT, vcpu);
1100 kvm_vcpu_kick(vcpu);
1101 }
97222cc8
ED
1102 break;
1103
1104 case APIC_DM_REMRD:
24d2166b
R
1105 result = 1;
1106 vcpu->arch.pv.pv_unhalted = 1;
1107 kvm_make_request(KVM_REQ_EVENT, vcpu);
1108 kvm_vcpu_kick(vcpu);
97222cc8
ED
1109 break;
1110
1111 case APIC_DM_SMI:
64d60670
PB
1112 result = 1;
1113 kvm_make_request(KVM_REQ_SMI, vcpu);
1114 kvm_vcpu_kick(vcpu);
97222cc8 1115 break;
3419ffc8 1116
97222cc8 1117 case APIC_DM_NMI:
6da7e3f6 1118 result = 1;
3419ffc8 1119 kvm_inject_nmi(vcpu);
26df99c6 1120 kvm_vcpu_kick(vcpu);
97222cc8
ED
1121 break;
1122
1123 case APIC_DM_INIT:
a52315e1 1124 if (!trig_mode || level) {
6da7e3f6 1125 result = 1;
66450a21
JK
1126 /* assumes that there are only KVM_APIC_INIT/SIPI */
1127 apic->pending_events = (1UL << KVM_APIC_INIT);
3842d135 1128 kvm_make_request(KVM_REQ_EVENT, vcpu);
c5ec1534 1129 kvm_vcpu_kick(vcpu);
c5ec1534 1130 }
97222cc8
ED
1131 break;
1132
1133 case APIC_DM_STARTUP:
66450a21
JK
1134 result = 1;
1135 apic->sipi_vector = vector;
1136 /* make sure sipi_vector is visible for the receiver */
1137 smp_wmb();
1138 set_bit(KVM_APIC_SIPI, &apic->pending_events);
1139 kvm_make_request(KVM_REQ_EVENT, vcpu);
1140 kvm_vcpu_kick(vcpu);
97222cc8
ED
1141 break;
1142
23930f95
JK
1143 case APIC_DM_EXTINT:
1144 /*
1145 * Should only be called by kvm_apic_local_deliver() with LVT0,
1146 * before NMI watchdog was enabled. Already handled by
1147 * kvm_apic_accept_pic_intr().
1148 */
1149 break;
1150
97222cc8
ED
1151 default:
1152 printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
1153 delivery_mode);
1154 break;
1155 }
1156 return result;
1157}
1158
7ee30bc1
NNL
1159/*
1160 * This routine identifies the destination vcpus mask meant to receive the
1161 * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find
1162 * out the destination vcpus array and set the bitmap or it traverses to
1163 * each available vcpu to identify the same.
1164 */
1165void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
1166 unsigned long *vcpu_bitmap)
1167{
1168 struct kvm_lapic **dest_vcpu = NULL;
1169 struct kvm_lapic *src = NULL;
1170 struct kvm_apic_map *map;
1171 struct kvm_vcpu *vcpu;
1172 unsigned long bitmap;
1173 int i, vcpu_idx;
1174 bool ret;
1175
1176 rcu_read_lock();
1177 map = rcu_dereference(kvm->arch.apic_map);
1178
1179 ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
1180 &bitmap);
1181 if (ret) {
1182 for_each_set_bit(i, &bitmap, 16) {
1183 if (!dest_vcpu[i])
1184 continue;
1185 vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx;
1186 __set_bit(vcpu_idx, vcpu_bitmap);
1187 }
1188 } else {
1189 kvm_for_each_vcpu(i, vcpu, kvm) {
1190 if (!kvm_apic_present(vcpu))
1191 continue;
1192 if (!kvm_apic_match_dest(vcpu, NULL,
b4b29636 1193 irq->shorthand,
7ee30bc1
NNL
1194 irq->dest_id,
1195 irq->dest_mode))
1196 continue;
1197 __set_bit(i, vcpu_bitmap);
1198 }
1199 }
1200 rcu_read_unlock();
1201}
1202
e1035715 1203int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
8be5453f 1204{
e1035715 1205 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
8be5453f
ZX
1206}
1207
3bb345f3
PB
1208static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
1209{
6308630b 1210 return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
3bb345f3
PB
1211}
1212
c7c9c56c
YZ
1213static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
1214{
7543a635
SR
1215 int trigger_mode;
1216
1217 /* Eoi the ioapic only if the ioapic doesn't own the vector. */
1218 if (!kvm_ioapic_handles_vector(apic, vector))
1219 return;
3bb345f3 1220
7543a635
SR
1221 /* Request a KVM exit to inform the userspace IOAPIC. */
1222 if (irqchip_split(apic->vcpu->kvm)) {
1223 apic->vcpu->arch.pending_ioapic_eoi = vector;
1224 kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
1225 return;
c7c9c56c 1226 }
7543a635
SR
1227
1228 if (apic_test_vector(vector, apic->regs + APIC_TMR))
1229 trigger_mode = IOAPIC_LEVEL_TRIG;
1230 else
1231 trigger_mode = IOAPIC_EDGE_TRIG;
1232
1233 kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
c7c9c56c
YZ
1234}
1235
ae7a2a3f 1236static int apic_set_eoi(struct kvm_lapic *apic)
97222cc8
ED
1237{
1238 int vector = apic_find_highest_isr(apic);
ae7a2a3f
MT
1239
1240 trace_kvm_eoi(apic, vector);
1241
97222cc8
ED
1242 /*
1243 * Not every write EOI will has corresponding ISR,
1244 * one example is when Kernel check timer on setup_IO_APIC
1245 */
1246 if (vector == -1)
ae7a2a3f 1247 return vector;
97222cc8 1248
8680b94b 1249 apic_clear_isr(vector, apic);
97222cc8
ED
1250 apic_update_ppr(apic);
1251
f2bc14b6
VK
1252 if (to_hv_vcpu(apic->vcpu) &&
1253 test_bit(vector, to_hv_synic(apic->vcpu)->vec_bitmap))
5c919412
AS
1254 kvm_hv_synic_send_eoi(apic->vcpu, vector);
1255
c7c9c56c 1256 kvm_ioapic_send_eoi(apic, vector);
3842d135 1257 kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
ae7a2a3f 1258 return vector;
97222cc8
ED
1259}
1260
c7c9c56c
YZ
1261/*
1262 * this interface assumes a trap-like exit, which has already finished
1263 * desired side effect including vISR and vPPR update.
1264 */
1265void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
1266{
1267 struct kvm_lapic *apic = vcpu->arch.apic;
1268
1269 trace_kvm_eoi(apic, vector);
1270
1271 kvm_ioapic_send_eoi(apic, vector);
1272 kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1273}
1274EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
1275
d5361678 1276void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
97222cc8 1277{
58c2dde1 1278 struct kvm_lapic_irq irq;
97222cc8 1279
58c2dde1
GN
1280 irq.vector = icr_low & APIC_VECTOR_MASK;
1281 irq.delivery_mode = icr_low & APIC_MODE_MASK;
1282 irq.dest_mode = icr_low & APIC_DEST_MASK;
b7cb2231 1283 irq.level = (icr_low & APIC_INT_ASSERT) != 0;
58c2dde1
GN
1284 irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
1285 irq.shorthand = icr_low & APIC_SHORT_MASK;
93bbf0b8 1286 irq.msi_redir_hint = false;
0105d1a5
GN
1287 if (apic_x2apic_mode(apic))
1288 irq.dest_id = icr_high;
1289 else
1290 irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
97222cc8 1291
1000ff8d
GN
1292 trace_kvm_apic_ipi(icr_low, irq.dest_id);
1293
b4f2225c 1294 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
97222cc8
ED
1295}
1296
1297static u32 apic_get_tmcct(struct kvm_lapic *apic)
1298{
8003c9ae 1299 ktime_t remaining, now;
b682b814 1300 s64 ns;
9da8f4e8 1301 u32 tmcct;
97222cc8
ED
1302
1303 ASSERT(apic != NULL);
1304
9da8f4e8 1305 /* if initial count is 0, current count should also be 0 */
dfb95954 1306 if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
b963a22e 1307 apic->lapic_timer.period == 0)
9da8f4e8
KP
1308 return 0;
1309
5587859f 1310 now = ktime_get();
8003c9ae 1311 remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
b682b814 1312 if (ktime_to_ns(remaining) < 0)
8b0e1953 1313 remaining = 0;
b682b814 1314
d3c7b77d
MT
1315 ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
1316 tmcct = div64_u64(ns,
1317 (APIC_BUS_CYCLE_NS * apic->divide_count));
97222cc8
ED
1318
1319 return tmcct;
1320}
1321
b209749f
AK
1322static void __report_tpr_access(struct kvm_lapic *apic, bool write)
1323{
1324 struct kvm_vcpu *vcpu = apic->vcpu;
1325 struct kvm_run *run = vcpu->run;
1326
a8eeb04a 1327 kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
5fdbf976 1328 run->tpr_access.rip = kvm_rip_read(vcpu);
b209749f
AK
1329 run->tpr_access.is_write = write;
1330}
1331
1332static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
1333{
1334 if (apic->vcpu->arch.tpr_access_reporting)
1335 __report_tpr_access(apic, write);
1336}
1337
97222cc8
ED
1338static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
1339{
1340 u32 val = 0;
1341
1342 if (offset >= LAPIC_MMIO_LENGTH)
1343 return 0;
1344
1345 switch (offset) {
1346 case APIC_ARBPRI:
97222cc8
ED
1347 break;
1348
1349 case APIC_TMCCT: /* Timer CCR */
a3e06bbe
LJ
1350 if (apic_lvtt_tscdeadline(apic))
1351 return 0;
1352
97222cc8
ED
1353 val = apic_get_tmcct(apic);
1354 break;
4a4541a4
AK
1355 case APIC_PROCPRI:
1356 apic_update_ppr(apic);
dfb95954 1357 val = kvm_lapic_get_reg(apic, offset);
4a4541a4 1358 break;
b209749f
AK
1359 case APIC_TASKPRI:
1360 report_tpr_access(apic, false);
df561f66 1361 fallthrough;
97222cc8 1362 default:
dfb95954 1363 val = kvm_lapic_get_reg(apic, offset);
97222cc8
ED
1364 break;
1365 }
1366
1367 return val;
1368}
1369
d76685c4
GH
1370static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
1371{
1372 return container_of(dev, struct kvm_lapic, dev);
1373}
1374
01402cf8
PB
1375#define APIC_REG_MASK(reg) (1ull << ((reg) >> 4))
1376#define APIC_REGS_MASK(first, count) \
1377 (APIC_REG_MASK(first) * ((1ull << (count)) - 1))
1378
1e6e2755 1379int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
0105d1a5 1380 void *data)
97222cc8 1381{
97222cc8
ED
1382 unsigned char alignment = offset & 0xf;
1383 u32 result;
d5b0b5b1 1384 /* this bitmask has a bit cleared for each reserved register */
01402cf8
PB
1385 u64 valid_reg_mask =
1386 APIC_REG_MASK(APIC_ID) |
1387 APIC_REG_MASK(APIC_LVR) |
1388 APIC_REG_MASK(APIC_TASKPRI) |
1389 APIC_REG_MASK(APIC_PROCPRI) |
1390 APIC_REG_MASK(APIC_LDR) |
1391 APIC_REG_MASK(APIC_DFR) |
1392 APIC_REG_MASK(APIC_SPIV) |
1393 APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
1394 APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
1395 APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
1396 APIC_REG_MASK(APIC_ESR) |
1397 APIC_REG_MASK(APIC_ICR) |
1398 APIC_REG_MASK(APIC_ICR2) |
1399 APIC_REG_MASK(APIC_LVTT) |
1400 APIC_REG_MASK(APIC_LVTTHMR) |
1401 APIC_REG_MASK(APIC_LVTPC) |
1402 APIC_REG_MASK(APIC_LVT0) |
1403 APIC_REG_MASK(APIC_LVT1) |
1404 APIC_REG_MASK(APIC_LVTERR) |
1405 APIC_REG_MASK(APIC_TMICT) |
1406 APIC_REG_MASK(APIC_TMCCT) |
1407 APIC_REG_MASK(APIC_TDCR);
1408
1409 /* ARBPRI is not valid on x2APIC */
1410 if (!apic_x2apic_mode(apic))
1411 valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
0105d1a5 1412
218bf772
JM
1413 if (alignment + len > 4)
1414 return 1;
1415
0d88800d 1416 if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
0105d1a5 1417 return 1;
0105d1a5 1418
97222cc8
ED
1419 result = __apic_read(apic, offset & ~0xf);
1420
229456fc
MT
1421 trace_kvm_apic_read(offset, result);
1422
97222cc8
ED
1423 switch (len) {
1424 case 1:
1425 case 2:
1426 case 4:
1427 memcpy(data, (char *)&result + alignment, len);
1428 break;
1429 default:
1430 printk(KERN_ERR "Local APIC read with len = %x, "
1431 "should be 1,2, or 4 instead\n", len);
1432 break;
1433 }
bda9020e 1434 return 0;
97222cc8 1435}
1e6e2755 1436EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
97222cc8 1437
0105d1a5
GN
1438static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
1439{
d1766202
VK
1440 return addr >= apic->base_address &&
1441 addr < apic->base_address + LAPIC_MMIO_LENGTH;
0105d1a5
GN
1442}
1443
e32edf4f 1444static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
0105d1a5
GN
1445 gpa_t address, int len, void *data)
1446{
1447 struct kvm_lapic *apic = to_lapic(this);
1448 u32 offset = address - apic->base_address;
1449
1450 if (!apic_mmio_in_range(apic, address))
1451 return -EOPNOTSUPP;
1452
d1766202
VK
1453 if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
1454 if (!kvm_check_has_quirk(vcpu->kvm,
1455 KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
1456 return -EOPNOTSUPP;
1457
1458 memset(data, 0xff, len);
1459 return 0;
1460 }
1461
1e6e2755 1462 kvm_lapic_reg_read(apic, offset, len, data);
0105d1a5
GN
1463
1464 return 0;
1465}
1466
97222cc8
ED
1467static void update_divide_count(struct kvm_lapic *apic)
1468{
1469 u32 tmp1, tmp2, tdcr;
1470
dfb95954 1471 tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
97222cc8
ED
1472 tmp1 = tdcr & 0xf;
1473 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
d3c7b77d 1474 apic->divide_count = 0x1 << (tmp2 & 0x7);
97222cc8
ED
1475}
1476
ccbfa1d3
WL
1477static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
1478{
1479 /*
1480 * Do not allow the guest to program periodic timers with small
1481 * interval, since the hrtimers are not throttled by the host
1482 * scheduler.
1483 */
dedf9c5e 1484 if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
ccbfa1d3
WL
1485 s64 min_period = min_timer_period_us * 1000LL;
1486
1487 if (apic->lapic_timer.period < min_period) {
1488 pr_info_ratelimited(
1489 "kvm: vcpu %i: requested %lld ns "
1490 "lapic timer period limited to %lld ns\n",
1491 apic->vcpu->vcpu_id,
1492 apic->lapic_timer.period, min_period);
1493 apic->lapic_timer.period = min_period;
1494 }
1495 }
1496}
1497
94be4b85
WL
1498static void cancel_hv_timer(struct kvm_lapic *apic);
1499
e898da78
WL
1500static void cancel_apic_timer(struct kvm_lapic *apic)
1501{
1502 hrtimer_cancel(&apic->lapic_timer.timer);
1503 preempt_disable();
1504 if (apic->lapic_timer.hv_timer_in_use)
1505 cancel_hv_timer(apic);
1506 preempt_enable();
1507}
1508
b6ac0695
RK
1509static void apic_update_lvtt(struct kvm_lapic *apic)
1510{
dfb95954 1511 u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
b6ac0695
RK
1512 apic->lapic_timer.timer_mode_mask;
1513
1514 if (apic->lapic_timer.timer_mode != timer_mode) {
c69518c8 1515 if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
dedf9c5e 1516 APIC_LVT_TIMER_TSCDEADLINE)) {
e898da78 1517 cancel_apic_timer(apic);
44275932
RK
1518 kvm_lapic_set_reg(apic, APIC_TMICT, 0);
1519 apic->lapic_timer.period = 0;
1520 apic->lapic_timer.tscdeadline = 0;
dedf9c5e 1521 }
b6ac0695 1522 apic->lapic_timer.timer_mode = timer_mode;
dedf9c5e 1523 limit_periodic_timer_frequency(apic);
b6ac0695
RK
1524 }
1525}
1526
d0659d94
MT
1527/*
1528 * On APICv, this test will cause a busy wait
1529 * during a higher-priority task.
1530 */
1531
1532static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1533{
1534 struct kvm_lapic *apic = vcpu->arch.apic;
dfb95954 1535 u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
d0659d94
MT
1536
1537 if (kvm_apic_hw_enabled(apic)) {
1538 int vec = reg & APIC_VECTOR_MASK;
f9339860 1539 void *bitmap = apic->regs + APIC_ISR;
d0659d94 1540
d62caabb 1541 if (vcpu->arch.apicv_active)
f9339860
MT
1542 bitmap = apic->regs + APIC_IRR;
1543
1544 if (apic_test_vector(vec, bitmap))
1545 return true;
d0659d94
MT
1546 }
1547 return false;
1548}
1549
b6aa57c6
SC
1550static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
1551{
1552 u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
1553
1554 /*
1555 * If the guest TSC is running at a different ratio than the host, then
1556 * convert the delay to nanoseconds to achieve an accurate delay. Note
1557 * that __delay() uses delay_tsc whenever the hardware has TSC, thus
1558 * always for VMX enabled hardware.
1559 */
1560 if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
1561 __delay(min(guest_cycles,
1562 nsec_to_cycles(vcpu, timer_advance_ns)));
1563 } else {
1564 u64 delay_ns = guest_cycles * 1000000ULL;
1565 do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
1566 ndelay(min_t(u32, delay_ns, timer_advance_ns));
1567 }
1568}
1569
84ea3aca 1570static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
ec0671d5 1571 s64 advance_expire_delta)
d0659d94
MT
1572{
1573 struct kvm_lapic *apic = vcpu->arch.apic;
39497d76 1574 u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
84ea3aca
WL
1575 u64 ns;
1576
d0f5a86a
WL
1577 /* Do not adjust for tiny fluctuations or large random spikes. */
1578 if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
1579 abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
1580 return;
1581
84ea3aca 1582 /* too early */
ec0671d5
WL
1583 if (advance_expire_delta < 0) {
1584 ns = -advance_expire_delta * 1000000ULL;
84ea3aca 1585 do_div(ns, vcpu->arch.virtual_tsc_khz);
d0f5a86a 1586 timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
84ea3aca
WL
1587 } else {
1588 /* too late */
ec0671d5 1589 ns = advance_expire_delta * 1000000ULL;
84ea3aca 1590 do_div(ns, vcpu->arch.virtual_tsc_khz);
d0f5a86a 1591 timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
84ea3aca
WL
1592 }
1593
a0f0037e
WL
1594 if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
1595 timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
84ea3aca
WL
1596 apic->lapic_timer.timer_advance_ns = timer_advance_ns;
1597}
1598
0c5f81da 1599static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
84ea3aca
WL
1600{
1601 struct kvm_lapic *apic = vcpu->arch.apic;
1602 u64 guest_tsc, tsc_deadline;
d0659d94 1603
d0659d94
MT
1604 tsc_deadline = apic->lapic_timer.expired_tscdeadline;
1605 apic->lapic_timer.expired_tscdeadline = 0;
4ba76538 1606 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
ec0671d5 1607 apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
d0659d94 1608
9805cf03
WL
1609 if (lapic_timer_advance_dynamic) {
1610 adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
1611 /*
1612 * If the timer fired early, reread the TSC to account for the
1613 * overhead of the above adjustment to avoid waiting longer
1614 * than is necessary.
1615 */
1616 if (guest_tsc < tsc_deadline)
1617 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1618 }
1619
d0659d94 1620 if (guest_tsc < tsc_deadline)
b6aa57c6 1621 __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
5d87db71 1622}
0c5f81da
WL
1623
1624void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1625{
010fd37f
WL
1626 if (lapic_in_kernel(vcpu) &&
1627 vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1628 vcpu->arch.apic->lapic_timer.timer_advance_ns &&
1629 lapic_timer_int_injected(vcpu))
0c5f81da
WL
1630 __kvm_wait_lapic_expire(vcpu);
1631}
b6c4bc65 1632EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
5d87db71 1633
0c5f81da
WL
1634static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
1635{
1636 struct kvm_timer *ktimer = &apic->lapic_timer;
1637
1638 kvm_apic_local_deliver(apic, APIC_LVTT);
17ac43a8 1639 if (apic_lvtt_tscdeadline(apic)) {
0c5f81da 1640 ktimer->tscdeadline = 0;
17ac43a8 1641 } else if (apic_lvtt_oneshot(apic)) {
0c5f81da
WL
1642 ktimer->tscdeadline = 0;
1643 ktimer->target_expiration = 0;
1644 }
1645}
1646
ae95f566 1647static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
0c5f81da
WL
1648{
1649 struct kvm_vcpu *vcpu = apic->vcpu;
0c5f81da
WL
1650 struct kvm_timer *ktimer = &apic->lapic_timer;
1651
1652 if (atomic_read(&apic->lapic_timer.pending))
1653 return;
1654
1655 if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
1656 ktimer->expired_tscdeadline = ktimer->tscdeadline;
1657
ae95f566
WL
1658 if (!from_timer_fn && vcpu->arch.apicv_active) {
1659 WARN_ON(kvm_get_running_vcpu() != vcpu);
1660 kvm_apic_inject_pending_timer_irqs(apic);
1661 return;
1662 }
1663
0c5f81da 1664 if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
beda4301
SC
1665 /*
1666 * Ensure the guest's timer has truly expired before posting an
1667 * interrupt. Open code the relevant checks to avoid querying
1668 * lapic_timer_int_injected(), which will be false since the
1669 * interrupt isn't yet injected. Waiting until after injecting
1670 * is not an option since that won't help a posted interrupt.
1671 */
1672 if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1673 vcpu->arch.apic->lapic_timer.timer_advance_ns)
1674 __kvm_wait_lapic_expire(vcpu);
0c5f81da
WL
1675 kvm_apic_inject_pending_timer_irqs(apic);
1676 return;
1677 }
1678
1679 atomic_inc(&apic->lapic_timer.pending);
084071d5 1680 kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
68ca7663
WL
1681 if (from_timer_fn)
1682 kvm_vcpu_kick(vcpu);
0c5f81da
WL
1683}
1684
53f9eedf
YJ
1685static void start_sw_tscdeadline(struct kvm_lapic *apic)
1686{
39497d76
SC
1687 struct kvm_timer *ktimer = &apic->lapic_timer;
1688 u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
53f9eedf
YJ
1689 u64 ns = 0;
1690 ktime_t expire;
1691 struct kvm_vcpu *vcpu = apic->vcpu;
1692 unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
1693 unsigned long flags;
1694 ktime_t now;
1695
1696 if (unlikely(!tscdeadline || !this_tsc_khz))
1697 return;
1698
1699 local_irq_save(flags);
1700
5587859f 1701 now = ktime_get();
53f9eedf 1702 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
c09d65d9
LA
1703
1704 ns = (tscdeadline - guest_tsc) * 1000000ULL;
1705 do_div(ns, this_tsc_khz);
1706
1707 if (likely(tscdeadline > guest_tsc) &&
39497d76 1708 likely(ns > apic->lapic_timer.timer_advance_ns)) {
53f9eedf 1709 expire = ktime_add_ns(now, ns);
39497d76 1710 expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
2c0d278f 1711 hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
53f9eedf 1712 } else
ae95f566 1713 apic_timer_expired(apic, false);
53f9eedf
YJ
1714
1715 local_irq_restore(flags);
1716}
1717
24647e0a
PS
1718static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
1719{
1720 return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count;
1721}
1722
c301b909
WL
1723static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
1724{
1725 ktime_t now, remaining;
1726 u64 ns_remaining_old, ns_remaining_new;
1727
24647e0a
PS
1728 apic->lapic_timer.period =
1729 tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
c301b909
WL
1730 limit_periodic_timer_frequency(apic);
1731
1732 now = ktime_get();
1733 remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1734 if (ktime_to_ns(remaining) < 0)
1735 remaining = 0;
1736
1737 ns_remaining_old = ktime_to_ns(remaining);
1738 ns_remaining_new = mul_u64_u32_div(ns_remaining_old,
1739 apic->divide_count, old_divisor);
1740
1741 apic->lapic_timer.tscdeadline +=
1742 nsec_to_cycles(apic->vcpu, ns_remaining_new) -
1743 nsec_to_cycles(apic->vcpu, ns_remaining_old);
1744 apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
1745}
1746
24647e0a 1747static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg)
7d7f7da2
WL
1748{
1749 ktime_t now;
8003c9ae 1750 u64 tscl = rdtsc();
24647e0a 1751 s64 deadline;
7d7f7da2 1752
5587859f 1753 now = ktime_get();
24647e0a
PS
1754 apic->lapic_timer.period =
1755 tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
7d7f7da2 1756
5d74a699
RK
1757 if (!apic->lapic_timer.period) {
1758 apic->lapic_timer.tscdeadline = 0;
8003c9ae 1759 return false;
7d7f7da2
WL
1760 }
1761
ccbfa1d3 1762 limit_periodic_timer_frequency(apic);
24647e0a
PS
1763 deadline = apic->lapic_timer.period;
1764
1765 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
1766 if (unlikely(count_reg != APIC_TMICT)) {
1767 deadline = tmict_to_ns(apic,
1768 kvm_lapic_get_reg(apic, count_reg));
1769 if (unlikely(deadline <= 0))
1770 deadline = apic->lapic_timer.period;
1771 else if (unlikely(deadline > apic->lapic_timer.period)) {
1772 pr_info_ratelimited(
1773 "kvm: vcpu %i: requested lapic timer restore with "
1774 "starting count register %#x=%u (%lld ns) > initial count (%lld ns). "
1775 "Using initial count to start timer.\n",
1776 apic->vcpu->vcpu_id,
1777 count_reg,
1778 kvm_lapic_get_reg(apic, count_reg),
1779 deadline, apic->lapic_timer.period);
1780 kvm_lapic_set_reg(apic, count_reg, 0);
1781 deadline = apic->lapic_timer.period;
1782 }
1783 }
1784 }
7d7f7da2 1785
8003c9ae 1786 apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
24647e0a
PS
1787 nsec_to_cycles(apic->vcpu, deadline);
1788 apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline);
8003c9ae
WL
1789
1790 return true;
1791}
1792
1793static void advance_periodic_target_expiration(struct kvm_lapic *apic)
1794{
d8f2f498
DV
1795 ktime_t now = ktime_get();
1796 u64 tscl = rdtsc();
1797 ktime_t delta;
1798
1799 /*
1800 * Synchronize both deadlines to the same time source or
1801 * differences in the periods (caused by differences in the
1802 * underlying clocks or numerical approximation errors) will
1803 * cause the two to drift apart over time as the errors
1804 * accumulate.
1805 */
8003c9ae
WL
1806 apic->lapic_timer.target_expiration =
1807 ktime_add_ns(apic->lapic_timer.target_expiration,
1808 apic->lapic_timer.period);
d8f2f498
DV
1809 delta = ktime_sub(apic->lapic_timer.target_expiration, now);
1810 apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1811 nsec_to_cycles(apic->vcpu, delta);
7d7f7da2
WL
1812}
1813
ecf08dad
AB
1814static void start_sw_period(struct kvm_lapic *apic)
1815{
1816 if (!apic->lapic_timer.period)
1817 return;
1818
1819 if (ktime_after(ktime_get(),
1820 apic->lapic_timer.target_expiration)) {
ae95f566 1821 apic_timer_expired(apic, false);
ecf08dad
AB
1822
1823 if (apic_lvtt_oneshot(apic))
1824 return;
1825
1826 advance_periodic_target_expiration(apic);
1827 }
1828
1829 hrtimer_start(&apic->lapic_timer.timer,
1830 apic->lapic_timer.target_expiration,
edec6e01 1831 HRTIMER_MODE_ABS_HARD);
ecf08dad
AB
1832}
1833
ce7a058a
YJ
1834bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
1835{
91005300
WL
1836 if (!lapic_in_kernel(vcpu))
1837 return false;
1838
ce7a058a
YJ
1839 return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
1840}
1841EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
1842
7e810a38 1843static void cancel_hv_timer(struct kvm_lapic *apic)
bd97ad0e 1844{
1d518c68 1845 WARN_ON(preemptible());
a749e247 1846 WARN_ON(!apic->lapic_timer.hv_timer_in_use);
b3646477 1847 static_call(kvm_x86_cancel_hv_timer)(apic->vcpu);
bd97ad0e
WL
1848 apic->lapic_timer.hv_timer_in_use = false;
1849}
1850
a749e247 1851static bool start_hv_timer(struct kvm_lapic *apic)
196f20ca 1852{
35ee9e48 1853 struct kvm_timer *ktimer = &apic->lapic_timer;
f9927982
SC
1854 struct kvm_vcpu *vcpu = apic->vcpu;
1855 bool expired;
196f20ca 1856
1d518c68 1857 WARN_ON(preemptible());
199a8b84 1858 if (!kvm_can_use_hv_timer(vcpu))
a749e247
PB
1859 return false;
1860
86bbc1e6
RK
1861 if (!ktimer->tscdeadline)
1862 return false;
1863
b3646477 1864 if (static_call(kvm_x86_set_hv_timer)(vcpu, ktimer->tscdeadline, &expired))
35ee9e48
PB
1865 return false;
1866
1867 ktimer->hv_timer_in_use = true;
1868 hrtimer_cancel(&ktimer->timer);
196f20ca 1869
35ee9e48 1870 /*
f1ba5cfb
SC
1871 * To simplify handling the periodic timer, leave the hv timer running
1872 * even if the deadline timer has expired, i.e. rely on the resulting
1873 * VM-Exit to recompute the periodic timer's target expiration.
35ee9e48 1874 */
f1ba5cfb
SC
1875 if (!apic_lvtt_period(apic)) {
1876 /*
1877 * Cancel the hv timer if the sw timer fired while the hv timer
1878 * was being programmed, or if the hv timer itself expired.
1879 */
1880 if (atomic_read(&ktimer->pending)) {
1881 cancel_hv_timer(apic);
f9927982 1882 } else if (expired) {
ae95f566 1883 apic_timer_expired(apic, false);
f1ba5cfb
SC
1884 cancel_hv_timer(apic);
1885 }
c8533544 1886 }
a749e247 1887
f9927982 1888 trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
f1ba5cfb 1889
35ee9e48
PB
1890 return true;
1891}
1892
a749e247 1893static void start_sw_timer(struct kvm_lapic *apic)
35ee9e48 1894{
a749e247 1895 struct kvm_timer *ktimer = &apic->lapic_timer;
1d518c68
WL
1896
1897 WARN_ON(preemptible());
a749e247
PB
1898 if (apic->lapic_timer.hv_timer_in_use)
1899 cancel_hv_timer(apic);
1900 if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
1901 return;
1902
1903 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1904 start_sw_period(apic);
1905 else if (apic_lvtt_tscdeadline(apic))
1906 start_sw_tscdeadline(apic);
1907 trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
1908}
35ee9e48 1909
a749e247
PB
1910static void restart_apic_timer(struct kvm_lapic *apic)
1911{
1d518c68 1912 preempt_disable();
4ca88b3f
SC
1913
1914 if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
1915 goto out;
1916
a749e247
PB
1917 if (!start_hv_timer(apic))
1918 start_sw_timer(apic);
4ca88b3f 1919out:
1d518c68 1920 preempt_enable();
196f20ca
WL
1921}
1922
8003c9ae
WL
1923void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
1924{
1925 struct kvm_lapic *apic = vcpu->arch.apic;
1926
1d518c68
WL
1927 preempt_disable();
1928 /* If the preempt notifier has already run, it also called apic_timer_expired */
1929 if (!apic->lapic_timer.hv_timer_in_use)
1930 goto out;
da4ad88c 1931 WARN_ON(rcuwait_active(&vcpu->wait));
ae95f566 1932 apic_timer_expired(apic, false);
d981dd15 1933 cancel_hv_timer(apic);
8003c9ae
WL
1934
1935 if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1936 advance_periodic_target_expiration(apic);
a749e247 1937 restart_apic_timer(apic);
8003c9ae 1938 }
1d518c68
WL
1939out:
1940 preempt_enable();
8003c9ae
WL
1941}
1942EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
1943
ce7a058a
YJ
1944void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
1945{
a749e247 1946 restart_apic_timer(vcpu->arch.apic);
ce7a058a
YJ
1947}
1948EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
1949
1950void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
1951{
1952 struct kvm_lapic *apic = vcpu->arch.apic;
1953
1d518c68 1954 preempt_disable();
ce7a058a 1955 /* Possibly the TSC deadline timer is not enabled yet */
a749e247
PB
1956 if (apic->lapic_timer.hv_timer_in_use)
1957 start_sw_timer(apic);
1d518c68 1958 preempt_enable();
a749e247
PB
1959}
1960EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
ce7a058a 1961
a749e247
PB
1962void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
1963{
1964 struct kvm_lapic *apic = vcpu->arch.apic;
ce7a058a 1965
a749e247
PB
1966 WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1967 restart_apic_timer(apic);
ce7a058a 1968}
ce7a058a 1969
24647e0a 1970static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg)
97222cc8 1971{
d3c7b77d 1972 atomic_set(&apic->lapic_timer.pending, 0);
0b975a3c 1973
a749e247 1974 if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
24647e0a 1975 && !set_target_expiration(apic, count_reg))
a749e247
PB
1976 return;
1977
1978 restart_apic_timer(apic);
97222cc8
ED
1979}
1980
24647e0a
PS
1981static void start_apic_timer(struct kvm_lapic *apic)
1982{
1983 __start_apic_timer(apic, APIC_TMICT);
1984}
1985
cc6e462c
JK
1986static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
1987{
59fd1323 1988 bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
cc6e462c 1989
59fd1323
RK
1990 if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
1991 apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
1992 if (lvt0_in_nmi_mode) {
42720138 1993 atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
59fd1323
RK
1994 } else
1995 atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1996 }
cc6e462c
JK
1997}
1998
1e6e2755 1999int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
97222cc8 2000{
0105d1a5 2001 int ret = 0;
97222cc8 2002
0105d1a5 2003 trace_kvm_apic_write(reg, val);
97222cc8 2004
0105d1a5 2005 switch (reg) {
97222cc8 2006 case APIC_ID: /* Local APIC ID */
0105d1a5 2007 if (!apic_x2apic_mode(apic))
a92e2543 2008 kvm_apic_set_xapic_id(apic, val >> 24);
0105d1a5
GN
2009 else
2010 ret = 1;
97222cc8
ED
2011 break;
2012
2013 case APIC_TASKPRI:
b209749f 2014 report_tpr_access(apic, true);
97222cc8
ED
2015 apic_set_tpr(apic, val & 0xff);
2016 break;
2017
2018 case APIC_EOI:
2019 apic_set_eoi(apic);
2020 break;
2021
2022 case APIC_LDR:
0105d1a5 2023 if (!apic_x2apic_mode(apic))
1e08ec4a 2024 kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
0105d1a5
GN
2025 else
2026 ret = 1;
97222cc8
ED
2027 break;
2028
2029 case APIC_DFR:
ae6f2496
WL
2030 if (!apic_x2apic_mode(apic))
2031 kvm_apic_set_dfr(apic, val | 0x0FFFFFFF);
2032 else
0105d1a5 2033 ret = 1;
97222cc8
ED
2034 break;
2035
fc61b800
GN
2036 case APIC_SPIV: {
2037 u32 mask = 0x3ff;
dfb95954 2038 if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
fc61b800 2039 mask |= APIC_SPIV_DIRECTED_EOI;
f8c1ea10 2040 apic_set_spiv(apic, val & mask);
97222cc8
ED
2041 if (!(val & APIC_SPIV_APIC_ENABLED)) {
2042 int i;
2043 u32 lvt_val;
2044
1e6e2755 2045 for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
dfb95954 2046 lvt_val = kvm_lapic_get_reg(apic,
97222cc8 2047 APIC_LVTT + 0x10 * i);
1e6e2755 2048 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
97222cc8
ED
2049 lvt_val | APIC_LVT_MASKED);
2050 }
b6ac0695 2051 apic_update_lvtt(apic);
d3c7b77d 2052 atomic_set(&apic->lapic_timer.pending, 0);
97222cc8
ED
2053
2054 }
2055 break;
fc61b800 2056 }
97222cc8
ED
2057 case APIC_ICR:
2058 /* No delay here, so we always clear the pending bit */
2b0911d1 2059 val &= ~(1 << 12);
d5361678 2060 kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
2b0911d1 2061 kvm_lapic_set_reg(apic, APIC_ICR, val);
97222cc8
ED
2062 break;
2063
2064 case APIC_ICR2:
0105d1a5
GN
2065 if (!apic_x2apic_mode(apic))
2066 val &= 0xff000000;
1e6e2755 2067 kvm_lapic_set_reg(apic, APIC_ICR2, val);
97222cc8
ED
2068 break;
2069
23930f95 2070 case APIC_LVT0:
cc6e462c 2071 apic_manage_nmi_watchdog(apic, val);
df561f66 2072 fallthrough;
97222cc8
ED
2073 case APIC_LVTTHMR:
2074 case APIC_LVTPC:
97222cc8 2075 case APIC_LVT1:
4bf79cb0 2076 case APIC_LVTERR: {
97222cc8 2077 /* TODO: Check vector */
4bf79cb0
MP
2078 size_t size;
2079 u32 index;
2080
c48f1496 2081 if (!kvm_apic_sw_enabled(apic))
97222cc8 2082 val |= APIC_LVT_MASKED;
4bf79cb0
MP
2083 size = ARRAY_SIZE(apic_lvt_mask);
2084 index = array_index_nospec(
2085 (reg - APIC_LVTT) >> 4, size);
2086 val &= apic_lvt_mask[index];
1e6e2755 2087 kvm_lapic_set_reg(apic, reg, val);
97222cc8 2088 break;
4bf79cb0 2089 }
97222cc8 2090
b6ac0695 2091 case APIC_LVTT:
c48f1496 2092 if (!kvm_apic_sw_enabled(apic))
a3e06bbe
LJ
2093 val |= APIC_LVT_MASKED;
2094 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
1e6e2755 2095 kvm_lapic_set_reg(apic, APIC_LVTT, val);
b6ac0695 2096 apic_update_lvtt(apic);
a3e06bbe
LJ
2097 break;
2098
97222cc8 2099 case APIC_TMICT:
a3e06bbe
LJ
2100 if (apic_lvtt_tscdeadline(apic))
2101 break;
2102
e898da78 2103 cancel_apic_timer(apic);
1e6e2755 2104 kvm_lapic_set_reg(apic, APIC_TMICT, val);
97222cc8 2105 start_apic_timer(apic);
0105d1a5 2106 break;
97222cc8 2107
c301b909
WL
2108 case APIC_TDCR: {
2109 uint32_t old_divisor = apic->divide_count;
2110
a445fc45 2111 kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb);
97222cc8 2112 update_divide_count(apic);
c301b909
WL
2113 if (apic->divide_count != old_divisor &&
2114 apic->lapic_timer.period) {
2115 hrtimer_cancel(&apic->lapic_timer.timer);
2116 update_target_expiration(apic, old_divisor);
2117 restart_apic_timer(apic);
2118 }
97222cc8 2119 break;
c301b909 2120 }
0105d1a5 2121 case APIC_ESR:
0d88800d 2122 if (apic_x2apic_mode(apic) && val != 0)
0105d1a5 2123 ret = 1;
0105d1a5
GN
2124 break;
2125
2126 case APIC_SELF_IPI:
2127 if (apic_x2apic_mode(apic)) {
9c2475f3
HL
2128 kvm_lapic_reg_write(apic, APIC_ICR,
2129 APIC_DEST_SELF | (val & APIC_VECTOR_MASK));
0105d1a5
GN
2130 } else
2131 ret = 1;
2132 break;
97222cc8 2133 default:
0105d1a5 2134 ret = 1;
97222cc8
ED
2135 break;
2136 }
0d88800d 2137
4abaffce
WL
2138 kvm_recalculate_apic_map(apic->vcpu->kvm);
2139
0105d1a5
GN
2140 return ret;
2141}
1e6e2755 2142EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
0105d1a5 2143
e32edf4f 2144static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
0105d1a5
GN
2145 gpa_t address, int len, const void *data)
2146{
2147 struct kvm_lapic *apic = to_lapic(this);
2148 unsigned int offset = address - apic->base_address;
2149 u32 val;
2150
2151 if (!apic_mmio_in_range(apic, address))
2152 return -EOPNOTSUPP;
2153
d1766202
VK
2154 if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
2155 if (!kvm_check_has_quirk(vcpu->kvm,
2156 KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
2157 return -EOPNOTSUPP;
2158
2159 return 0;
2160 }
2161
0105d1a5
GN
2162 /*
2163 * APIC register must be aligned on 128-bits boundary.
2164 * 32/64/128 bits registers must be accessed thru 32 bits.
2165 * Refer SDM 8.4.1
2166 */
0d88800d 2167 if (len != 4 || (offset & 0xf))
756975bb 2168 return 0;
0105d1a5
GN
2169
2170 val = *(u32*)data;
2171
0d88800d 2172 kvm_lapic_reg_write(apic, offset & 0xff0, val);
0105d1a5 2173
bda9020e 2174 return 0;
97222cc8
ED
2175}
2176
58fbbf26
KT
2177void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
2178{
1e6e2755 2179 kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
58fbbf26
KT
2180}
2181EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
2182
83d4c286
YZ
2183/* emulate APIC access in a trap manner */
2184void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
2185{
2186 u32 val = 0;
2187
2188 /* hw has done the conditional check and inst decode */
2189 offset &= 0xff0;
2190
1e6e2755 2191 kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val);
83d4c286
YZ
2192
2193 /* TODO: optimize to just emulate side effect w/o one more write */
1e6e2755 2194 kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
83d4c286
YZ
2195}
2196EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
2197
d589444e 2198void kvm_free_lapic(struct kvm_vcpu *vcpu)
97222cc8 2199{
f8c1ea10
GN
2200 struct kvm_lapic *apic = vcpu->arch.apic;
2201
ad312c7c 2202 if (!vcpu->arch.apic)
97222cc8
ED
2203 return;
2204
f8c1ea10 2205 hrtimer_cancel(&apic->lapic_timer.timer);
97222cc8 2206
c5cc421b 2207 if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
6e4e3b4d 2208 static_branch_slow_dec_deferred(&apic_hw_disabled);
c5cc421b 2209
e462755c 2210 if (!apic->sw_enabled)
6e4e3b4d 2211 static_branch_slow_dec_deferred(&apic_sw_disabled);
97222cc8 2212
f8c1ea10
GN
2213 if (apic->regs)
2214 free_page((unsigned long)apic->regs);
2215
2216 kfree(apic);
97222cc8
ED
2217}
2218
2219/*
2220 *----------------------------------------------------------------------
2221 * LAPIC interface
2222 *----------------------------------------------------------------------
2223 */
a3e06bbe
LJ
2224u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
2225{
2226 struct kvm_lapic *apic = vcpu->arch.apic;
a3e06bbe 2227
a970e9b2 2228 if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
a3e06bbe
LJ
2229 return 0;
2230
2231 return apic->lapic_timer.tscdeadline;
2232}
2233
2234void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
2235{
2236 struct kvm_lapic *apic = vcpu->arch.apic;
a3e06bbe 2237
27503833 2238 if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
a3e06bbe
LJ
2239 return;
2240
2241 hrtimer_cancel(&apic->lapic_timer.timer);
2242 apic->lapic_timer.tscdeadline = data;
2243 start_apic_timer(apic);
2244}
2245
97222cc8
ED
2246void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
2247{
ad312c7c 2248 struct kvm_lapic *apic = vcpu->arch.apic;
97222cc8 2249
b93463aa 2250 apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
dfb95954 2251 | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4));
97222cc8
ED
2252}
2253
2254u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
2255{
97222cc8
ED
2256 u64 tpr;
2257
dfb95954 2258 tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
97222cc8
ED
2259
2260 return (tpr & 0xf0) >> 4;
2261}
2262
2263void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
2264{
8d14695f 2265 u64 old_value = vcpu->arch.apic_base;
ad312c7c 2266 struct kvm_lapic *apic = vcpu->arch.apic;
97222cc8 2267
c7dd15b3 2268 if (!apic)
97222cc8 2269 value |= MSR_IA32_APICBASE_BSP;
c5af89b6 2270
e66d2ae7
JK
2271 vcpu->arch.apic_base = value;
2272
c7dd15b3 2273 if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
aedbaf4f 2274 kvm_update_cpuid_runtime(vcpu);
c7dd15b3
JM
2275
2276 if (!apic)
2277 return;
2278
c5cc421b 2279 /* update jump label if enable bit changes */
0dce7cd6 2280 if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
49bd29ba
RK
2281 if (value & MSR_IA32_APICBASE_ENABLE) {
2282 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
6e4e3b4d 2283 static_branch_slow_dec_deferred(&apic_hw_disabled);
2f15d027
VK
2284 /* Check if there are APF page ready requests pending */
2285 kvm_make_request(KVM_REQ_APF_READY, vcpu);
187ca84b 2286 } else {
6e4e3b4d 2287 static_branch_inc(&apic_hw_disabled.key);
44d52717 2288 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
187ca84b 2289 }
c5cc421b
GN
2290 }
2291
8d860bbe
JM
2292 if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
2293 kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
2294
2295 if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
b3646477 2296 static_call(kvm_x86_set_virtual_apic_mode)(vcpu);
8d14695f 2297
ad312c7c 2298 apic->base_address = apic->vcpu->arch.apic_base &
97222cc8
ED
2299 MSR_IA32_APICBASE_BASE;
2300
db324fe6
NA
2301 if ((value & MSR_IA32_APICBASE_ENABLE) &&
2302 apic->base_address != APIC_DEFAULT_PHYS_BASE)
2303 pr_warn_once("APIC base relocation is unsupported by KVM");
97222cc8
ED
2304}
2305
b26a695a
SS
2306void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
2307{
2308 struct kvm_lapic *apic = vcpu->arch.apic;
2309
2310 if (vcpu->arch.apicv_active) {
2311 /* irr_pending is always true when apicv is activated. */
2312 apic->irr_pending = true;
2313 apic->isr_count = 1;
2314 } else {
2315 apic->irr_pending = (apic_search_irr(apic) != -1);
2316 apic->isr_count = count_vectors(apic->regs + APIC_ISR);
2317 }
2318}
2319EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
2320
d28bc9dd 2321void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
97222cc8 2322{
b7e31be3 2323 struct kvm_lapic *apic = vcpu->arch.apic;
97222cc8
ED
2324 int i;
2325
b7e31be3
RK
2326 if (!apic)
2327 return;
97222cc8 2328
97222cc8 2329 /* Stop the timer in case it's a reset to an active apic */
d3c7b77d 2330 hrtimer_cancel(&apic->lapic_timer.timer);
97222cc8 2331
4d8e772b
RK
2332 if (!init_event) {
2333 kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE |
2334 MSR_IA32_APICBASE_ENABLE);
a92e2543 2335 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
4d8e772b 2336 }
fc61b800 2337 kvm_apic_set_version(apic->vcpu);
97222cc8 2338
1e6e2755
SS
2339 for (i = 0; i < KVM_APIC_LVT_NUM; i++)
2340 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
b6ac0695 2341 apic_update_lvtt(apic);
52b54190
JS
2342 if (kvm_vcpu_is_reset_bsp(vcpu) &&
2343 kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
1e6e2755 2344 kvm_lapic_set_reg(apic, APIC_LVT0,
90de4a18 2345 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
dfb95954 2346 apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
97222cc8 2347
ae6f2496 2348 kvm_apic_set_dfr(apic, 0xffffffffU);
f8c1ea10 2349 apic_set_spiv(apic, 0xff);
1e6e2755 2350 kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
c028dd6b
RK
2351 if (!apic_x2apic_mode(apic))
2352 kvm_apic_set_ldr(apic, 0);
1e6e2755
SS
2353 kvm_lapic_set_reg(apic, APIC_ESR, 0);
2354 kvm_lapic_set_reg(apic, APIC_ICR, 0);
2355 kvm_lapic_set_reg(apic, APIC_ICR2, 0);
2356 kvm_lapic_set_reg(apic, APIC_TDCR, 0);
2357 kvm_lapic_set_reg(apic, APIC_TMICT, 0);
97222cc8 2358 for (i = 0; i < 8; i++) {
1e6e2755
SS
2359 kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
2360 kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
2361 kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
97222cc8 2362 }
b26a695a 2363 kvm_apic_update_apicv(vcpu);
8680b94b 2364 apic->highest_isr_cache = -1;
b33ac88b 2365 update_divide_count(apic);
d3c7b77d 2366 atomic_set(&apic->lapic_timer.pending, 0);
c5af89b6 2367 if (kvm_vcpu_is_bsp(vcpu))
5dbc8f3f
GN
2368 kvm_lapic_set_base(vcpu,
2369 vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
ae7a2a3f 2370 vcpu->arch.pv_eoi.msr_val = 0;
97222cc8 2371 apic_update_ppr(apic);
4191db26 2372 if (vcpu->arch.apicv_active) {
b3646477
JB
2373 static_call(kvm_x86_apicv_post_state_restore)(vcpu);
2374 static_call(kvm_x86_hwapic_irr_update)(vcpu, -1);
2375 static_call(kvm_x86_hwapic_isr_update)(vcpu, -1);
4191db26 2376 }
97222cc8 2377
e1035715 2378 vcpu->arch.apic_arb_prio = 0;
41383771 2379 vcpu->arch.apic_attention = 0;
4abaffce
WL
2380
2381 kvm_recalculate_apic_map(vcpu->kvm);
97222cc8
ED
2382}
2383
97222cc8
ED
2384/*
2385 *----------------------------------------------------------------------
2386 * timer interface
2387 *----------------------------------------------------------------------
2388 */
1b9778da 2389
2a6eac96 2390static bool lapic_is_periodic(struct kvm_lapic *apic)
97222cc8 2391{
d3c7b77d 2392 return apic_lvtt_period(apic);
97222cc8
ED
2393}
2394
3d80840d
MT
2395int apic_has_pending_timer(struct kvm_vcpu *vcpu)
2396{
54e9818f 2397 struct kvm_lapic *apic = vcpu->arch.apic;
3d80840d 2398
1e3161b4 2399 if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
54e9818f 2400 return atomic_read(&apic->lapic_timer.pending);
3d80840d
MT
2401
2402 return 0;
2403}
2404
89342082 2405int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
1b9778da 2406{
dfb95954 2407 u32 reg = kvm_lapic_get_reg(apic, lvt_type);
23930f95 2408 int vector, mode, trig_mode;
23930f95 2409
c48f1496 2410 if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
23930f95
JK
2411 vector = reg & APIC_VECTOR_MASK;
2412 mode = reg & APIC_MODE_MASK;
2413 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
b4f2225c
YZ
2414 return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
2415 NULL);
23930f95
JK
2416 }
2417 return 0;
2418}
1b9778da 2419
8fdb2351 2420void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
23930f95 2421{
8fdb2351
JK
2422 struct kvm_lapic *apic = vcpu->arch.apic;
2423
2424 if (apic)
2425 kvm_apic_local_deliver(apic, APIC_LVT0);
1b9778da
ED
2426}
2427
d76685c4
GH
2428static const struct kvm_io_device_ops apic_mmio_ops = {
2429 .read = apic_mmio_read,
2430 .write = apic_mmio_write,
d76685c4
GH
2431};
2432
e9d90d47
AK
2433static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
2434{
2435 struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
2a6eac96 2436 struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
e9d90d47 2437
ae95f566 2438 apic_timer_expired(apic, true);
e9d90d47 2439
2a6eac96 2440 if (lapic_is_periodic(apic)) {
8003c9ae 2441 advance_periodic_target_expiration(apic);
e9d90d47
AK
2442 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
2443 return HRTIMER_RESTART;
2444 } else
2445 return HRTIMER_NORESTART;
2446}
2447
c3941d9e 2448int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
97222cc8
ED
2449{
2450 struct kvm_lapic *apic;
2451
2452 ASSERT(vcpu != NULL);
97222cc8 2453
254272ce 2454 apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
97222cc8
ED
2455 if (!apic)
2456 goto nomem;
2457
ad312c7c 2458 vcpu->arch.apic = apic;
97222cc8 2459
254272ce 2460 apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
afc20184 2461 if (!apic->regs) {
97222cc8
ED
2462 printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
2463 vcpu->vcpu_id);
d589444e 2464 goto nomem_free_apic;
97222cc8 2465 }
97222cc8
ED
2466 apic->vcpu = vcpu;
2467
d3c7b77d 2468 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
2c0d278f 2469 HRTIMER_MODE_ABS_HARD);
e9d90d47 2470 apic->lapic_timer.timer.function = apic_timer_fn;
c3941d9e 2471 if (timer_advance_ns == -1) {
a0f0037e 2472 apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
d0f5a86a 2473 lapic_timer_advance_dynamic = true;
c3941d9e
SC
2474 } else {
2475 apic->lapic_timer.timer_advance_ns = timer_advance_ns;
d0f5a86a 2476 lapic_timer_advance_dynamic = false;
c3941d9e
SC
2477 }
2478
c5cc421b
GN
2479 /*
2480 * APIC is created enabled. This will prevent kvm_lapic_set_base from
ee171d2f 2481 * thinking that APIC state has changed.
c5cc421b
GN
2482 */
2483 vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
6e4e3b4d 2484 static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
d76685c4 2485 kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
97222cc8
ED
2486
2487 return 0;
d589444e
RR
2488nomem_free_apic:
2489 kfree(apic);
a251fb90 2490 vcpu->arch.apic = NULL;
97222cc8 2491nomem:
97222cc8
ED
2492 return -ENOMEM;
2493}
97222cc8
ED
2494
2495int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
2496{
ad312c7c 2497 struct kvm_lapic *apic = vcpu->arch.apic;
b3c045d3 2498 u32 ppr;
97222cc8 2499
72c3bcdc 2500 if (!kvm_apic_present(vcpu))
97222cc8
ED
2501 return -1;
2502
b3c045d3
PB
2503 __apic_update_ppr(apic, &ppr);
2504 return apic_has_interrupt_for_ppr(apic, ppr);
97222cc8 2505}
25bb2cf9 2506EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt);
97222cc8 2507
40487c68
QH
2508int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
2509{
dfb95954 2510 u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
40487c68 2511
c48f1496 2512 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
3ce4dc17 2513 return 1;
e7dca5c0
CL
2514 if ((lvt0 & APIC_LVT_MASKED) == 0 &&
2515 GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
3ce4dc17
ML
2516 return 1;
2517 return 0;
40487c68
QH
2518}
2519
1b9778da
ED
2520void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
2521{
ad312c7c 2522 struct kvm_lapic *apic = vcpu->arch.apic;
1b9778da 2523
54e9818f 2524 if (atomic_read(&apic->lapic_timer.pending) > 0) {
0c5f81da 2525 kvm_apic_inject_pending_timer_irqs(apic);
f1ed0450 2526 atomic_set(&apic->lapic_timer.pending, 0);
1b9778da
ED
2527 }
2528}
2529
97222cc8
ED
2530int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
2531{
2532 int vector = kvm_apic_has_interrupt(vcpu);
ad312c7c 2533 struct kvm_lapic *apic = vcpu->arch.apic;
4d82d12b 2534 u32 ppr;
97222cc8
ED
2535
2536 if (vector == -1)
2537 return -1;
2538
56cc2406
WL
2539 /*
2540 * We get here even with APIC virtualization enabled, if doing
2541 * nested virtualization and L1 runs with the "acknowledge interrupt
2542 * on exit" mode. Then we cannot inject the interrupt via RVI,
2543 * because the process would deliver it through the IDT.
2544 */
2545
97222cc8 2546 apic_clear_irr(vector, apic);
f2bc14b6 2547 if (to_hv_vcpu(vcpu) && test_bit(vector, to_hv_synic(vcpu)->auto_eoi_bitmap)) {
4d82d12b
PB
2548 /*
2549 * For auto-EOI interrupts, there might be another pending
2550 * interrupt above PPR, so check whether to raise another
2551 * KVM_REQ_EVENT.
2552 */
5c919412 2553 apic_update_ppr(apic);
4d82d12b
PB
2554 } else {
2555 /*
2556 * For normal interrupts, PPR has been raised and there cannot
2557 * be a higher-priority pending interrupt---except if there was
2558 * a concurrent interrupt injection, but that would have
2559 * triggered KVM_REQ_EVENT already.
2560 */
2561 apic_set_isr(vector, apic);
2562 __apic_update_ppr(apic, &ppr);
5c919412
AS
2563 }
2564
97222cc8
ED
2565 return vector;
2566}
96ad2cc6 2567
a92e2543
RK
2568static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
2569 struct kvm_lapic_state *s, bool set)
2570{
2571 if (apic_x2apic_mode(vcpu->arch.apic)) {
2572 u32 *id = (u32 *)(s->regs + APIC_ID);
12806ba9 2573 u32 *ldr = (u32 *)(s->regs + APIC_LDR);
a92e2543 2574
37131313
RK
2575 if (vcpu->kvm->arch.x2apic_format) {
2576 if (*id != vcpu->vcpu_id)
2577 return -EINVAL;
2578 } else {
2579 if (set)
2580 *id >>= 24;
2581 else
2582 *id <<= 24;
2583 }
12806ba9
DDAG
2584
2585 /* In x2APIC mode, the LDR is fixed and based on the id */
2586 if (set)
2587 *ldr = kvm_apic_calc_x2apic_ldr(*id);
a92e2543
RK
2588 }
2589
2590 return 0;
2591}
2592
2593int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2594{
2595 memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
24647e0a
PS
2596
2597 /*
2598 * Get calculated timer current count for remaining timer period (if
2599 * any) and store it in the returned register set.
2600 */
2601 __kvm_lapic_set_reg(s->regs, APIC_TMCCT,
2602 __apic_read(vcpu->arch.apic, APIC_TMCCT));
2603
a92e2543
RK
2604 return kvm_apic_state_fixup(vcpu, s, false);
2605}
2606
2607int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
96ad2cc6 2608{
ad312c7c 2609 struct kvm_lapic *apic = vcpu->arch.apic;
a92e2543
RK
2610 int r;
2611
5dbc8f3f 2612 kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
64eb0620
GN
2613 /* set SPIV separately to get count of SW disabled APICs right */
2614 apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
a92e2543
RK
2615
2616 r = kvm_apic_state_fixup(vcpu, s, true);
4abaffce
WL
2617 if (r) {
2618 kvm_recalculate_apic_map(vcpu->kvm);
a92e2543 2619 return r;
4abaffce 2620 }
0e96f31e 2621 memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
a92e2543 2622
44d52717 2623 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
4abaffce 2624 kvm_recalculate_apic_map(vcpu->kvm);
fc61b800
GN
2625 kvm_apic_set_version(vcpu);
2626
96ad2cc6 2627 apic_update_ppr(apic);
d3c7b77d 2628 hrtimer_cancel(&apic->lapic_timer.timer);
35737d2d 2629 apic->lapic_timer.expired_tscdeadline = 0;
b6ac0695 2630 apic_update_lvtt(apic);
dfb95954 2631 apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
96ad2cc6 2632 update_divide_count(apic);
24647e0a 2633 __start_apic_timer(apic, APIC_TMCCT);
b26a695a 2634 kvm_apic_update_apicv(vcpu);
8680b94b 2635 apic->highest_isr_cache = -1;
d62caabb 2636 if (vcpu->arch.apicv_active) {
b3646477
JB
2637 static_call(kvm_x86_apicv_post_state_restore)(vcpu);
2638 static_call(kvm_x86_hwapic_irr_update)(vcpu,
4114c27d 2639 apic_find_highest_irr(apic));
b3646477 2640 static_call(kvm_x86_hwapic_isr_update)(vcpu,
b4eef9b3 2641 apic_find_highest_isr(apic));
d62caabb 2642 }
3842d135 2643 kvm_make_request(KVM_REQ_EVENT, vcpu);
49df6397
SR
2644 if (ioapic_in_kernel(vcpu->kvm))
2645 kvm_rtc_eoi_tracking_restore_one(vcpu);
0669a510
RK
2646
2647 vcpu->arch.apic_arb_prio = 0;
a92e2543
RK
2648
2649 return 0;
96ad2cc6 2650}
a3d7f85f 2651
2f52d58c 2652void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
a3d7f85f 2653{
a3d7f85f
ED
2654 struct hrtimer *timer;
2655
0c5f81da
WL
2656 if (!lapic_in_kernel(vcpu) ||
2657 kvm_can_post_timer_interrupt(vcpu))
a3d7f85f
ED
2658 return;
2659
54e9818f 2660 timer = &vcpu->arch.apic->lapic_timer.timer;
a3d7f85f 2661 if (hrtimer_cancel(timer))
2c0d278f 2662 hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
a3d7f85f 2663}
b93463aa 2664
ae7a2a3f
MT
2665/*
2666 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
2667 *
2668 * Detect whether guest triggered PV EOI since the
2669 * last entry. If yes, set EOI on guests's behalf.
2670 * Clear PV EOI in guest memory in any case.
2671 */
2672static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
2673 struct kvm_lapic *apic)
2674{
2675 bool pending;
2676 int vector;
2677 /*
2678 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
2679 * and KVM_PV_EOI_ENABLED in guest memory as follows:
2680 *
2681 * KVM_APIC_PV_EOI_PENDING is unset:
2682 * -> host disabled PV EOI.
2683 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
2684 * -> host enabled PV EOI, guest did not execute EOI yet.
2685 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
2686 * -> host enabled PV EOI, guest executed EOI.
2687 */
2688 BUG_ON(!pv_eoi_enabled(vcpu));
2689 pending = pv_eoi_get_pending(vcpu);
2690 /*
2691 * Clear pending bit in any case: it will be set again on vmentry.
2692 * While this might not be ideal from performance point of view,
2693 * this makes sure pv eoi is only enabled when we know it's safe.
2694 */
2695 pv_eoi_clr_pending(vcpu);
2696 if (pending)
2697 return;
2698 vector = apic_set_eoi(apic);
2699 trace_kvm_pv_eoi(apic, vector);
2700}
2701
b93463aa
AK
2702void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
2703{
2704 u32 data;
b93463aa 2705
ae7a2a3f
MT
2706 if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
2707 apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
2708
41383771 2709 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
b93463aa
AK
2710 return;
2711
4e335d9e
PB
2712 if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2713 sizeof(u32)))
603242a8 2714 return;
b93463aa
AK
2715
2716 apic_set_tpr(vcpu->arch.apic, data & 0xff);
2717}
2718
ae7a2a3f
MT
2719/*
2720 * apic_sync_pv_eoi_to_guest - called before vmentry
2721 *
2722 * Detect whether it's safe to enable PV EOI and
2723 * if yes do so.
2724 */
2725static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
2726 struct kvm_lapic *apic)
2727{
2728 if (!pv_eoi_enabled(vcpu) ||
2729 /* IRR set or many bits in ISR: could be nested. */
2730 apic->irr_pending ||
2731 /* Cache not set: could be safe but we don't bother. */
2732 apic->highest_isr_cache == -1 ||
2733 /* Need EOI to update ioapic. */
3bb345f3 2734 kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
ae7a2a3f
MT
2735 /*
2736 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
2737 * so we need not do anything here.
2738 */
2739 return;
2740 }
2741
2742 pv_eoi_set_pending(apic->vcpu);
2743}
2744
b93463aa
AK
2745void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
2746{
2747 u32 data, tpr;
2748 int max_irr, max_isr;
ae7a2a3f 2749 struct kvm_lapic *apic = vcpu->arch.apic;
b93463aa 2750
ae7a2a3f
MT
2751 apic_sync_pv_eoi_to_guest(vcpu, apic);
2752
41383771 2753 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
b93463aa
AK
2754 return;
2755
dfb95954 2756 tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
b93463aa
AK
2757 max_irr = apic_find_highest_irr(apic);
2758 if (max_irr < 0)
2759 max_irr = 0;
2760 max_isr = apic_find_highest_isr(apic);
2761 if (max_isr < 0)
2762 max_isr = 0;
2763 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
2764
4e335d9e
PB
2765 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2766 sizeof(u32));
b93463aa
AK
2767}
2768
fda4e2e8 2769int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
b93463aa 2770{
fda4e2e8 2771 if (vapic_addr) {
4e335d9e 2772 if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
fda4e2e8
AH
2773 &vcpu->arch.apic->vapic_cache,
2774 vapic_addr, sizeof(u32)))
2775 return -EINVAL;
41383771 2776 __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
fda4e2e8 2777 } else {
41383771 2778 __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
fda4e2e8
AH
2779 }
2780
2781 vcpu->arch.apic->vapic_addr = vapic_addr;
2782 return 0;
b93463aa 2783}
0105d1a5
GN
2784
2785int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
2786{
2787 struct kvm_lapic *apic = vcpu->arch.apic;
2788 u32 reg = (msr - APIC_BASE_MSR) << 4;
2789
35754c98 2790 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
0105d1a5
GN
2791 return 1;
2792
c69d3d9b
NA
2793 if (reg == APIC_ICR2)
2794 return 1;
2795
0105d1a5 2796 /* if this is ICR write vector before command */
decdc283 2797 if (reg == APIC_ICR)
1e6e2755
SS
2798 kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2799 return kvm_lapic_reg_write(apic, reg, (u32)data);
0105d1a5
GN
2800}
2801
2802int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
2803{
2804 struct kvm_lapic *apic = vcpu->arch.apic;
2805 u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
2806
35754c98 2807 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
0105d1a5
GN
2808 return 1;
2809
0d88800d 2810 if (reg == APIC_DFR || reg == APIC_ICR2)
c69d3d9b 2811 return 1;
c69d3d9b 2812
1e6e2755 2813 if (kvm_lapic_reg_read(apic, reg, 4, &low))
0105d1a5 2814 return 1;
decdc283 2815 if (reg == APIC_ICR)
1e6e2755 2816 kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
0105d1a5
GN
2817
2818 *data = (((u64)high) << 32) | low;
2819
2820 return 0;
2821}
10388a07
GN
2822
2823int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
2824{
2825 struct kvm_lapic *apic = vcpu->arch.apic;
2826
bce87cce 2827 if (!lapic_in_kernel(vcpu))
10388a07
GN
2828 return 1;
2829
2830 /* if this is ICR write vector before command */
2831 if (reg == APIC_ICR)
1e6e2755
SS
2832 kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2833 return kvm_lapic_reg_write(apic, reg, (u32)data);
10388a07
GN
2834}
2835
2836int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
2837{
2838 struct kvm_lapic *apic = vcpu->arch.apic;
2839 u32 low, high = 0;
2840
bce87cce 2841 if (!lapic_in_kernel(vcpu))
10388a07
GN
2842 return 1;
2843
1e6e2755 2844 if (kvm_lapic_reg_read(apic, reg, 4, &low))
10388a07
GN
2845 return 1;
2846 if (reg == APIC_ICR)
1e6e2755 2847 kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
10388a07
GN
2848
2849 *data = (((u64)high) << 32) | low;
2850
2851 return 0;
2852}
ae7a2a3f 2853
72bbf935 2854int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
ae7a2a3f
MT
2855{
2856 u64 addr = data & ~KVM_MSR_ENABLED;
a7c42bb6
VK
2857 struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
2858 unsigned long new_len;
2859
ae7a2a3f
MT
2860 if (!IS_ALIGNED(addr, 4))
2861 return 1;
2862
2863 vcpu->arch.pv_eoi.msr_val = data;
2864 if (!pv_eoi_enabled(vcpu))
2865 return 0;
a7c42bb6
VK
2866
2867 if (addr == ghc->gpa && len <= ghc->len)
2868 new_len = ghc->len;
2869 else
2870 new_len = len;
2871
2872 return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
ae7a2a3f 2873}
c5cc421b 2874
66450a21
JK
2875void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
2876{
2877 struct kvm_lapic *apic = vcpu->arch.apic;
2b4a273b 2878 u8 sipi_vector;
1c96dcce 2879 int r;
299018f4 2880 unsigned long pe;
66450a21 2881
1c96dcce
PB
2882 if (!lapic_in_kernel(vcpu))
2883 return;
2884
2885 /*
2886 * Read pending events before calling the check_events
2887 * callback.
2888 */
2889 pe = smp_load_acquire(&apic->pending_events);
2890 if (!pe)
66450a21
JK
2891 return;
2892
1c96dcce 2893 if (is_guest_mode(vcpu)) {
cb6a32c2 2894 r = kvm_check_nested_events(vcpu);
1c96dcce
PB
2895 if (r < 0)
2896 return;
2897 /*
2898 * If an event has happened and caused a vmexit,
2899 * we know INITs are latched and therefore
2900 * we will not incorrectly deliver an APIC
2901 * event instead of a vmexit.
2902 */
2903 }
2904
cd7764fe 2905 /*
4b9852f4 2906 * INITs are latched while CPU is in specific states
1c96dcce 2907 * (SMM, VMX root mode, SVM with GIF=0).
4b9852f4
LA
2908 * Because a CPU cannot be in these states immediately
2909 * after it has processed an INIT signal (and thus in
2910 * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
2911 * and leave the INIT pending.
cd7764fe 2912 */
27cbe7d6 2913 if (kvm_vcpu_latch_init(vcpu)) {
cd7764fe 2914 WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
1c96dcce 2915 if (test_bit(KVM_APIC_SIPI, &pe))
cd7764fe
PB
2916 clear_bit(KVM_APIC_SIPI, &apic->pending_events);
2917 return;
2918 }
299018f4
GN
2919
2920 if (test_bit(KVM_APIC_INIT, &pe)) {
1c96dcce 2921 clear_bit(KVM_APIC_INIT, &apic->pending_events);
d28bc9dd 2922 kvm_vcpu_reset(vcpu, true);
66450a21
JK
2923 if (kvm_vcpu_is_bsp(apic->vcpu))
2924 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2925 else
2926 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
2927 }
f57ad63a 2928 if (test_bit(KVM_APIC_SIPI, &pe)) {
1c96dcce 2929 clear_bit(KVM_APIC_SIPI, &apic->pending_events);
f57ad63a
ML
2930 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
2931 /* evaluate pending_events before reading the vector */
2932 smp_rmb();
2933 sipi_vector = apic->sipi_vector;
647daca2 2934 kvm_x86_ops.vcpu_deliver_sipi_vector(vcpu, sipi_vector);
f57ad63a
ML
2935 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2936 }
66450a21
JK
2937 }
2938}
2939
cef84c30
DM
2940void kvm_lapic_exit(void)
2941{
2942 static_key_deferred_flush(&apic_hw_disabled);
2943 static_key_deferred_flush(&apic_sw_disabled);
2944}